From c8d687b05c803e3b358014e7f729a5700a003552 Mon Sep 17 00:00:00 2001
From: Matthew Poremba <matthew.poremba@amd.com>
Date: Mon, 7 Nov 2022 16:28:15 -0800
Subject: [PATCH 001/492] dev-amdgpu: Fix SDMA ring buffer wrap around

The current SDMA wrap around handling only considers the ring buffer
location as seen by the GPU. Eventually when the end of the SDMA ring
buffer is reached, the driver waits until the rptr written back to the
host catches up to what the driver sees before wrapping around back to
the beginning of the buffer. This writeback currently does not happen at
all, causing hangs for applications with a lot of SDMA commands.

This changeset first fixes the sizes of the queues, especially RLC
queues, so that the wrap around occurs in the correct place. Second, we
now store the rptr writeback address and the absoluate (unwrapped) rptr
value in each SDMA queue. The absolulte rptr is what the driver sends to
the device and what it expects to be written back.

This was tested with an application which basically does a few hundred
thousand hipMemcpy() calls in a loop. It should also fix the issue with
pannotia BC in fullsystem mode.

Change-Id: I53ebdcc6b02fb4eb4da435c9a509544066a97069
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/65351
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com>
Maintainer: Matt Sinclair <mattdsinclair@gmail.com>
---
 src/dev/amdgpu/pm4_packet_processor.cc | 14 ++++++++----
 src/dev/amdgpu/sdma_engine.cc          | 30 ++++++++++++++++++++------
 src/dev/amdgpu/sdma_engine.hh          | 18 ++++++++++++----
 3 files changed, 47 insertions(+), 15 deletions(-)

diff --git a/src/dev/amdgpu/pm4_packet_processor.cc b/src/dev/amdgpu/pm4_packet_processor.cc
index 404beab16c..4f98f18d16 100644
--- a/src/dev/amdgpu/pm4_packet_processor.cc
+++ b/src/dev/amdgpu/pm4_packet_processor.cc
@@ -441,12 +441,17 @@ void
 PM4PacketProcessor::processSDMAMQD(PM4MapQueues *pkt, PM4Queue *q, Addr addr,
     SDMAQueueDesc *mqd, uint16_t vmid)
 {
+    uint32_t rlc_size = 4UL << bits(mqd->sdmax_rlcx_rb_cntl, 6, 1);
+    Addr rptr_wb_addr = mqd->sdmax_rlcx_rb_rptr_addr_hi;
+    rptr_wb_addr <<= 32;
+    rptr_wb_addr |= mqd->sdmax_rlcx_rb_rptr_addr_lo;
+
     DPRINTF(PM4PacketProcessor, "SDMAMQD: rb base: %#lx rptr: %#x/%#x wptr: "
-            "%#x/%#x ib: %#x/%#x size: %d ctrl: %#x\n", mqd->rb_base,
-            mqd->sdmax_rlcx_rb_rptr, mqd->sdmax_rlcx_rb_rptr_hi,
+            "%#x/%#x ib: %#x/%#x size: %d ctrl: %#x rptr wb addr: %#lx\n",
+            mqd->rb_base, mqd->sdmax_rlcx_rb_rptr, mqd->sdmax_rlcx_rb_rptr_hi,
             mqd->sdmax_rlcx_rb_wptr, mqd->sdmax_rlcx_rb_wptr_hi,
             mqd->sdmax_rlcx_ib_base_lo, mqd->sdmax_rlcx_ib_base_hi,
-            mqd->sdmax_rlcx_ib_size, mqd->sdmax_rlcx_rb_cntl);
+            rlc_size, mqd->sdmax_rlcx_rb_cntl, rptr_wb_addr);
 
     // Engine 2 points to SDMA0 while engine 3 points to SDMA1
     assert(pkt->engineSel == 2 || pkt->engineSel == 3);
@@ -454,7 +459,8 @@ PM4PacketProcessor::processSDMAMQD(PM4MapQueues *pkt, PM4Queue *q, Addr addr,
 
     // Register RLC queue with SDMA
     sdma_eng->registerRLCQueue(pkt->doorbellOffset << 2,
-                               mqd->rb_base << 8);
+                               mqd->rb_base << 8, rlc_size,
+                               rptr_wb_addr);
 
     // Register doorbell with GPU device
     gpuDevice->setSDMAEngine(pkt->doorbellOffset << 2, sdma_eng);
diff --git a/src/dev/amdgpu/sdma_engine.cc b/src/dev/amdgpu/sdma_engine.cc
index e9a4c176d8..59c5027c85 100644
--- a/src/dev/amdgpu/sdma_engine.cc
+++ b/src/dev/amdgpu/sdma_engine.cc
@@ -161,7 +161,8 @@ SDMAEngine::translate(Addr vaddr, Addr size)
 }
 
 void
-SDMAEngine::registerRLCQueue(Addr doorbell, Addr rb_base)
+SDMAEngine::registerRLCQueue(Addr doorbell, Addr rb_base, uint32_t size,
+                             Addr rptr_wb_addr)
 {
     // Get first free RLC
     if (!rlc0.valid()) {
@@ -171,19 +172,19 @@ SDMAEngine::registerRLCQueue(Addr doorbell, Addr rb_base)
         rlc0.base(rb_base);
         rlc0.rptr(0);
         rlc0.wptr(0);
+        rlc0.rptrWbAddr(rptr_wb_addr);
         rlc0.processing(false);
-        // TODO: size - I think pull from MQD 2^rb_cntrl[6:1]-1
-        rlc0.size(1024*1024);
+        rlc0.size(size);
     } else if (!rlc1.valid()) {
         DPRINTF(SDMAEngine, "Doorbell %lx mapped to RLC1\n", doorbell);
         rlcInfo[1] = doorbell;
         rlc1.valid(true);
         rlc1.base(rb_base);
-        rlc1.rptr(1);
-        rlc1.wptr(1);
+        rlc1.rptr(0);
+        rlc1.wptr(0);
+        rlc1.rptrWbAddr(rptr_wb_addr);
         rlc1.processing(false);
-        // TODO: size - I think pull from MQD 2^rb_cntrl[6:1]-1
-        rlc1.size(1024*1024);
+        rlc1.size(size);
     } else {
         panic("No free RLCs. Check they are properly unmapped.");
     }
@@ -291,6 +292,17 @@ SDMAEngine::decodeNext(SDMAQueue *q)
                 { decodeHeader(q, header); });
         dmaReadVirt(q->rptr(), sizeof(uint32_t), cb, &cb->dmaBuffer);
     } else {
+        // The driver expects the rptr to be written back to host memory
+        // periodically. In simulation, we writeback rptr after each burst of
+        // packets from a doorbell, rather than using the cycle count which
+        // is not accurate in all simulation settings (e.g., KVM).
+        DPRINTF(SDMAEngine, "Writing rptr %#lx back to host addr %#lx\n",
+                q->globalRptr(), q->rptrWbAddr());
+        if (q->rptrWbAddr()) {
+            auto cb = new DmaVirtCallback<uint64_t>(
+                [ = ](const uint64_t &) { }, q->globalRptr());
+            dmaWriteVirt(q->rptrWbAddr(), sizeof(Addr), cb, &cb->dmaBuffer);
+        }
         q->processing(false);
         if (q->parent()) {
             DPRINTF(SDMAEngine, "SDMA switching queues\n");
@@ -1158,6 +1170,7 @@ SDMAEngine::setGfxRptrLo(uint32_t data)
 {
     gfxRptr = insertBits(gfxRptr, 31, 0, 0);
     gfxRptr |= data;
+    gfx.rptrWbAddr(getGARTAddr(gfxRptr));
 }
 
 void
@@ -1165,6 +1178,7 @@ SDMAEngine::setGfxRptrHi(uint32_t data)
 {
     gfxRptr = insertBits(gfxRptr, 63, 32, 0);
     gfxRptr |= ((uint64_t)data) << 32;
+    gfx.rptrWbAddr(getGARTAddr(gfxRptr));
 }
 
 void
@@ -1236,6 +1250,7 @@ SDMAEngine::setPageRptrLo(uint32_t data)
 {
     pageRptr = insertBits(pageRptr, 31, 0, 0);
     pageRptr |= data;
+    page.rptrWbAddr(getGARTAddr(pageRptr));
 }
 
 void
@@ -1243,6 +1258,7 @@ SDMAEngine::setPageRptrHi(uint32_t data)
 {
     pageRptr = insertBits(pageRptr, 63, 32, 0);
     pageRptr |= ((uint64_t)data) << 32;
+    page.rptrWbAddr(getGARTAddr(pageRptr));
 }
 
 void
diff --git a/src/dev/amdgpu/sdma_engine.hh b/src/dev/amdgpu/sdma_engine.hh
index 6fe7a8e565..d0afaf7a4a 100644
--- a/src/dev/amdgpu/sdma_engine.hh
+++ b/src/dev/amdgpu/sdma_engine.hh
@@ -58,6 +58,8 @@ class SDMAEngine : public DmaVirtDevice
         Addr _rptr;
         Addr _wptr;
         Addr _size;
+        Addr _rptr_wb_addr = 0;
+        Addr _global_rptr = 0;
         bool _valid;
         bool _processing;
         SDMAQueue *_parent;
@@ -72,6 +74,8 @@ class SDMAEngine : public DmaVirtDevice
         Addr wptr() { return _base + _wptr; }
         Addr getWptr() { return _wptr; }
         Addr size() { return _size; }
+        Addr rptrWbAddr() { return _rptr_wb_addr; }
+        Addr globalRptr() { return _global_rptr; }
         bool valid() { return _valid; }
         bool processing() { return _processing; }
         SDMAQueue* parent() { return _parent; }
@@ -82,22 +86,27 @@ class SDMAEngine : public DmaVirtDevice
         void
         incRptr(uint32_t value)
         {
-            //assert((_rptr + value) <= (_size << 1));
             _rptr = (_rptr + value) % _size;
+            _global_rptr += value;
         }
 
-        void rptr(Addr value) { _rptr = value; }
+        void
+        rptr(Addr value)
+        {
+            _rptr = value;
+            _global_rptr = value;
+        }
 
         void
         setWptr(Addr value)
         {
-            //assert(value <= (_size << 1));
             _wptr = value % _size;
         }
 
         void wptr(Addr value) { _wptr = value; }
 
         void size(Addr value) { _size = value; }
+        void rptrWbAddr(Addr value) { _rptr_wb_addr = value; }
         void valid(bool v) { _valid = v; }
         void processing(bool value) { _processing = value; }
         void parent(SDMAQueue* q) { _parent = q; }
@@ -268,7 +277,8 @@ class SDMAEngine : public DmaVirtDevice
     /**
      * Methods for RLC queues
      */
-    void registerRLCQueue(Addr doorbell, Addr rb_base);
+    void registerRLCQueue(Addr doorbell, Addr rb_base, uint32_t size,
+                          Addr rptr_wb_addr);
     void unregisterRLCQueue(Addr doorbell);
     void deallocateRLCQueues();
 

From 90046bae6f14b4bf94fc450efc617614b9214f92 Mon Sep 17 00:00:00 2001
From: handsomeliu <handsomeliu@google.com>
Date: Tue, 8 Nov 2022 15:39:28 +0800
Subject: [PATCH 002/492] systemc: Add the stream id entry and its conversion
 in control extension

stream id and substream id are properties of gem5 Request. This CL adds
the information into gem5 ControlExtension to manipulate them in SystemC
level, and adds the conversion between ControlExtension and Packet.

Change-Id: Id13d181561ba496c2012f7237eb800f0a9786d05
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/65371
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Yu-hsin Wang <yuhsingw@google.com>
---
 src/systemc/tlm_bridge/sc_ext.cc | 54 +++++++++++++++++++++++++++++++-
 src/systemc/tlm_bridge/sc_ext.hh | 13 ++++++++
 2 files changed, 66 insertions(+), 1 deletion(-)

diff --git a/src/systemc/tlm_bridge/sc_ext.cc b/src/systemc/tlm_bridge/sc_ext.cc
index 4d12fb3d9f..6e3cf113d3 100644
--- a/src/systemc/tlm_bridge/sc_ext.cc
+++ b/src/systemc/tlm_bridge/sc_ext.cc
@@ -33,6 +33,8 @@
 
 #include "systemc/tlm_bridge/sc_ext.hh"
 
+#include <optional>
+
 #include "systemc/ext/utils/sc_report_handler.hh"
 #include "systemc/tlm_bridge/gem5_to_tlm.hh"
 #include "systemc/tlm_bridge/tlm_to_gem5.hh"
@@ -76,6 +78,14 @@ struct ControlConversionRegister
                 }
 
                 pkt->qosValue(control_ex->getQos());
+
+                if (control_ex->hasStreamId()) {
+                    pkt->req->setStreamId(control_ex->getStreamId().value());
+                }
+                if (control_ex->hasSubstreamId()) {
+                    pkt->req->setSubstreamId(
+                        control_ex->getSubstreamId().value());
+                }
             });
         sc_gem5::addPacketToPayloadConversionStep(
             [] (PacketPtr pkt, tlm::tlm_generic_payload &trans)
@@ -90,6 +100,12 @@ struct ControlConversionRegister
                 control_ex->setSecure(pkt->req->isSecure());
                 control_ex->setInstruction(pkt->req->isInstFetch());
                 control_ex->setQos(pkt->qosValue());
+                if (pkt->req->hasStreamId()) {
+                    control_ex->setStreamId(pkt->req->streamId());
+                }
+                if (pkt->req->hasSubstreamId()) {
+                    control_ex->setSubstreamId(pkt->req->substreamId());
+                }
             });
     }
 };
@@ -263,4 +279,40 @@ ControlExtension::setQos(uint8_t q)
     qos = q;
 }
 
-} // namespace Gem5SystemC
+bool
+ControlExtension::hasStreamId() const
+{
+    return stream_id.has_value();
+}
+
+std::optional<uint32_t>
+ControlExtension::getStreamId() const
+{
+    return stream_id;
+}
+
+void
+ControlExtension::setStreamId(std::optional<uint32_t> s)
+{
+    stream_id = std::move(s);
+}
+
+bool
+ControlExtension::hasSubstreamId() const
+{
+    return substream_id.has_value();
+}
+
+std::optional<uint32_t>
+ControlExtension::getSubstreamId() const
+{
+    return substream_id;
+}
+
+void
+ControlExtension::setSubstreamId(std::optional<uint32_t> s)
+{
+    substream_id = std::move(s);
+}
+
+}  // namespace Gem5SystemC
diff --git a/src/systemc/tlm_bridge/sc_ext.hh b/src/systemc/tlm_bridge/sc_ext.hh
index bb676761ce..f23f3fa54d 100644
--- a/src/systemc/tlm_bridge/sc_ext.hh
+++ b/src/systemc/tlm_bridge/sc_ext.hh
@@ -36,6 +36,7 @@
 
 #include <cstdint>
 #include <memory>
+#include <optional>
 
 #include "base/amo.hh"
 #include "mem/packet.hh"
@@ -115,6 +116,14 @@ class ControlExtension : public tlm::tlm_extension<ControlExtension>
     uint8_t getQos() const;
     void setQos(uint8_t q);
 
+    /* Stream ID and Substream ID */
+    bool hasStreamId() const;
+    std::optional<uint32_t> getStreamId() const;
+    void setStreamId(std::optional<uint32_t> s);
+    bool hasSubstreamId() const;
+    std::optional<uint32_t> getSubstreamId() const;
+    void setSubstreamId(std::optional<uint32_t> s);
+
   private:
     /* Secure and privileged access */
     bool privileged;
@@ -123,6 +132,10 @@ class ControlExtension : public tlm::tlm_extension<ControlExtension>
 
     /* Quality of Service (AXI4) */
     uint8_t qos;
+
+    /* Stream ID and Substream ID */
+    std::optional<uint32_t> stream_id;
+    std::optional<uint32_t> substream_id;
 };
 
 } // namespace Gem5SystemC

From 623e2d3dac3e75c67b4e1b8f6a7113f0ab376960 Mon Sep 17 00:00:00 2001
From: Matthew Poremba <matthew.poremba@amd.com>
Date: Tue, 8 Nov 2022 14:24:32 -0800
Subject: [PATCH 003/492] dev-amdgpu: Handle ring buffer wrap for PM4 queue

Change-Id: I27bc274327838add709423b072d437c4e727a714
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/65431
Maintainer: Matt Sinclair <mattdsinclair@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com>
---
 src/dev/amdgpu/pm4_mmio.hh             |  1 +
 src/dev/amdgpu/pm4_packet_processor.cc | 13 +++++++++++--
 src/dev/amdgpu/pm4_packet_processor.hh |  1 +
 src/dev/amdgpu/pm4_queues.hh           |  7 +++++--
 4 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/src/dev/amdgpu/pm4_mmio.hh b/src/dev/amdgpu/pm4_mmio.hh
index a3ce5f14e5..3801223175 100644
--- a/src/dev/amdgpu/pm4_mmio.hh
+++ b/src/dev/amdgpu/pm4_mmio.hh
@@ -60,6 +60,7 @@ namespace gem5
 #define mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI                               0x1251
 #define mmCP_HQD_PQ_WPTR_POLL_ADDR                                    0x1252
 #define mmCP_HQD_PQ_WPTR_POLL_ADDR_HI                                 0x1253
+#define mmCP_HQD_PQ_CONTROL                                           0x1256
 #define mmCP_HQD_IB_CONTROL                                           0x125a
 #define mmCP_HQD_PQ_WPTR_LO                                           0x127b
 #define mmCP_HQD_PQ_WPTR_HI                                           0x127c
diff --git a/src/dev/amdgpu/pm4_packet_processor.cc b/src/dev/amdgpu/pm4_packet_processor.cc
index 4f98f18d16..f78f8333a6 100644
--- a/src/dev/amdgpu/pm4_packet_processor.cc
+++ b/src/dev/amdgpu/pm4_packet_processor.cc
@@ -147,8 +147,8 @@ PM4PacketProcessor::newQueue(QueueDesc *mqd, Addr offset,
     gpuDevice->setDoorbellType(offset, qt);
 
     DPRINTF(PM4PacketProcessor, "New PM4 queue %d, base: %p offset: %p, me: "
-            "%d, pipe %d queue: %d\n", id, q->base(), q->offset(), q->me(),
-            q->pipe(), q->queue());
+            "%d, pipe %d queue: %d size: %d\n", id, q->base(), q->offset(),
+            q->me(), q->pipe(), q->queue(), q->size());
 }
 
 void
@@ -790,6 +790,9 @@ PM4PacketProcessor::writeMMIO(PacketPtr pkt, Addr mmio_offset)
       case mmCP_HQD_PQ_WPTR_POLL_ADDR_HI:
         setHqdPqWptrPollAddrHi(pkt->getLE<uint32_t>());
         break;
+      case mmCP_HQD_PQ_CONTROL:
+        setHqdPqControl(pkt->getLE<uint32_t>());
+        break;
       case mmCP_HQD_IB_CONTROL:
         setHqdIbCtrl(pkt->getLE<uint32_t>());
         break;
@@ -911,6 +914,12 @@ PM4PacketProcessor::setHqdPqWptrPollAddrHi(uint32_t data)
     kiq.hqd_pq_wptr_poll_addr_hi = data;
 }
 
+void
+PM4PacketProcessor::setHqdPqControl(uint32_t data)
+{
+    kiq.hqd_pq_control = data;
+}
+
 void
 PM4PacketProcessor::setHqdIbCtrl(uint32_t data)
 {
diff --git a/src/dev/amdgpu/pm4_packet_processor.hh b/src/dev/amdgpu/pm4_packet_processor.hh
index 48066713a5..4617a21a06 100644
--- a/src/dev/amdgpu/pm4_packet_processor.hh
+++ b/src/dev/amdgpu/pm4_packet_processor.hh
@@ -171,6 +171,7 @@ class PM4PacketProcessor : public DmaVirtDevice
     void setHqdPqRptrReportAddrHi(uint32_t data);
     void setHqdPqWptrPollAddr(uint32_t data);
     void setHqdPqWptrPollAddrHi(uint32_t data);
+    void setHqdPqControl(uint32_t data);
     void setHqdIbCtrl(uint32_t data);
     void setRbVmid(uint32_t data);
     void setRbCntl(uint32_t data);
diff --git a/src/dev/amdgpu/pm4_queues.hh b/src/dev/amdgpu/pm4_queues.hh
index 19973b113e..8b6626d176 100644
--- a/src/dev/amdgpu/pm4_queues.hh
+++ b/src/dev/amdgpu/pm4_queues.hh
@@ -396,14 +396,14 @@ class PM4Queue
     rptr()
     {
         if (ib()) return q->ibBase + q->ibRptr;
-        else return q->base + q->rptr;
+        else return q->base + (q->rptr % size());
     }
 
     Addr
     wptr()
     {
         if (ib()) return q->ibBase + _ibWptr;
-        else return q->base + _wptr;
+        else return q->base + (_wptr % size());
     }
 
     Addr
@@ -470,6 +470,9 @@ class PM4Queue
     uint32_t pipe() { return _pkt.pipe; }
     uint32_t queue() { return _pkt.queueSlot; }
     bool privileged() { return _pkt.queueSel == 0 ? 1 : 0; }
+
+    // Same computation as processMQD. See comment there for details.
+    uint64_t size() { return 4UL << ((q->hqd_pq_control & 0x3f) + 1); }
 };
 
 } // namespace gem5

From 8693d725e202002893aafc4ac814bfa87c86ae76 Mon Sep 17 00:00:00 2001
From: Matthew Poremba <matthew.poremba@amd.com>
Date: Tue, 8 Nov 2022 19:58:07 -0800
Subject: [PATCH 004/492] arch-vega: Fix SOPK instruction sign extends

See: https://gem5-review.googlesource.com/c/public/gem5/+/37495

Same patch but for vega. This fixes issues with lulesh and probably
rodinia - heartwall as well in fullsystem.

Change-Id: I3af36bb9b60d32dc96cc3b439bb1167be1b0945d
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/65432
Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com>
Maintainer: Matt Sinclair <mattdsinclair@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/arch/amdgpu/vega/insts/instructions.cc | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/src/arch/amdgpu/vega/insts/instructions.cc b/src/arch/amdgpu/vega/insts/instructions.cc
index 76bb8aad49..f5b08b7ce1 100644
--- a/src/arch/amdgpu/vega/insts/instructions.cc
+++ b/src/arch/amdgpu/vega/insts/instructions.cc
@@ -1553,7 +1553,7 @@ namespace VegaISA
     void
     Inst_SOPK__S_MOVK_I32::execute(GPUDynInstPtr gpuDynInst)
     {
-        ScalarRegI32 simm16 = (ScalarRegI32)instData.SIMM16;
+        ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16);
         ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
 
         sdst = simm16;
@@ -1579,7 +1579,7 @@ namespace VegaISA
     void
     Inst_SOPK__S_CMOVK_I32::execute(GPUDynInstPtr gpuDynInst)
     {
-        ScalarRegI32 simm16 = (ScalarRegI32)instData.SIMM16;
+        ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16);
         ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
         ConstScalarOperandU32 scc(gpuDynInst, REG_SCC);
 
@@ -1607,7 +1607,7 @@ namespace VegaISA
     void
     Inst_SOPK__S_CMPK_EQ_I32::execute(GPUDynInstPtr gpuDynInst)
     {
-        ScalarRegI32 simm16 = (ScalarRegI32)instData.SIMM16;
+        ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16);
         ConstScalarOperandI32 src(gpuDynInst, instData.SDST);
         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
 
@@ -1634,7 +1634,7 @@ namespace VegaISA
     void
     Inst_SOPK__S_CMPK_LG_I32::execute(GPUDynInstPtr gpuDynInst)
     {
-        ScalarRegI32 simm16 = (ScalarRegI32)instData.SIMM16;
+        ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16);
         ConstScalarOperandI32 src(gpuDynInst, instData.SDST);
         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
 
@@ -1661,7 +1661,7 @@ namespace VegaISA
     void
     Inst_SOPK__S_CMPK_GT_I32::execute(GPUDynInstPtr gpuDynInst)
     {
-        ScalarRegI32 simm16 = (ScalarRegI32)instData.SIMM16;
+        ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16);
         ConstScalarOperandI32 src(gpuDynInst, instData.SDST);
         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
 
@@ -1688,7 +1688,7 @@ namespace VegaISA
     void
     Inst_SOPK__S_CMPK_GE_I32::execute(GPUDynInstPtr gpuDynInst)
     {
-        ScalarRegI32 simm16 = (ScalarRegI32)instData.SIMM16;
+        ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16);
         ConstScalarOperandI32 src(gpuDynInst, instData.SDST);
         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
 
@@ -1715,7 +1715,7 @@ namespace VegaISA
     void
     Inst_SOPK__S_CMPK_LT_I32::execute(GPUDynInstPtr gpuDynInst)
     {
-        ScalarRegI32 simm16 = (ScalarRegI32)instData.SIMM16;
+        ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16);
         ConstScalarOperandI32 src(gpuDynInst, instData.SDST);
         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
 
@@ -1742,7 +1742,7 @@ namespace VegaISA
     void
     Inst_SOPK__S_CMPK_LE_I32::execute(GPUDynInstPtr gpuDynInst)
     {
-        ScalarRegI32 simm16 = (ScalarRegI32)instData.SIMM16;
+        ScalarRegI32 simm16 = (ScalarRegI32)sext<16>(instData.SIMM16);
         ConstScalarOperandI32 src(gpuDynInst, instData.SDST);
         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
 
@@ -1939,7 +1939,7 @@ namespace VegaISA
 
         src.read();
 
-        sdst = src.rawData() + (ScalarRegI32)simm16;
+        sdst = src.rawData() + (ScalarRegI32)sext<16>(simm16);
         scc = (bits(src.rawData(), 31) == bits(simm16, 15)
             && bits(src.rawData(), 31) != bits(sdst.rawData(), 31)) ? 1 : 0;
 
@@ -1969,7 +1969,7 @@ namespace VegaISA
 
         src.read();
 
-        sdst = src.rawData() * (ScalarRegI32)simm16;
+        sdst = src.rawData() * (ScalarRegI32)sext<16>(simm16);
 
         sdst.write();
     } // execute

From 78b978686c5195fa9b4574c1285649872ea34a3e Mon Sep 17 00:00:00 2001
From: Jasjeet Rangi <jasrangi@ucdavis.edu>
Date: Mon, 7 Nov 2022 15:09:24 -0800
Subject: [PATCH 005/492] stdlib: Fix get_isa_from_str() exception behavior in
 isas.py

When given an input string that does not match any valid ISA, the
get_isa_from_str() function should call get_isas_str_set() to to print
the valid ISA strings in the exception. The current behavior is to
recursively call get_isa_from_str() with no input, which prevents
the correct exception from being raised. This change causes the
correct exception to be raised for invalid inputs.

Change-Id: I92bfe862bbd99ce0b63bfc124e539fab3b175e0c
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/65311
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/python/gem5/isas.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/python/gem5/isas.py b/src/python/gem5/isas.py
index c904c9d227..84f02b87e7 100644
--- a/src/python/gem5/isas.py
+++ b/src/python/gem5/isas.py
@@ -81,7 +81,7 @@ def get_isa_from_str(input: str) -> ISA:
             return isa
 
     valid_isas_str_list = str()
-    for isa_str in get_isa_from_str():
+    for isa_str in get_isas_str_set():
         valid_isas_str_list += f"{os.linesep}{isa_str}"
 
     raise Exception(

From dff879cf21ee609cca3662073cd89cb9322146be Mon Sep 17 00:00:00 2001
From: vramadas95 <vramadas@wisc.edu>
Date: Thu, 10 Nov 2022 20:42:25 -0600
Subject: [PATCH 006/492] configs, gpu-compute: Add configurable L1 scalar
 latencies

Previously the scalar cache path used the same latency parameter as the
vector cache path for memory requests. This commit adds new parameters
for the scalar cache path latencies. This commit also modifies the model
to use the new latency parameter to set the memory request latency in
the scalar cache. The new paramters are '--scalar-mem-req-latency' and
'--scalar-mem-resp-latency' and are set to default values of 50 and 0
respectively

Change-Id: I7483f780f2fc0cfbc320ed1fd0c2ee3e2dfc7af2
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/65511
Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Maintainer: Matt Sinclair <mattdsinclair@gmail.com>
---
 configs/example/apu_se.py       | 17 +++++++++++++++++
 src/gpu-compute/GPU.py          | 13 +++++++++++++
 src/gpu-compute/compute_unit.cc |  6 +++++-
 src/gpu-compute/compute_unit.hh |  2 ++
 4 files changed, 37 insertions(+), 1 deletion(-)

diff --git a/configs/example/apu_se.py b/configs/example/apu_se.py
index b33daa5b39..39def024fc 100644
--- a/configs/example/apu_se.py
+++ b/configs/example/apu_se.py
@@ -275,6 +275,21 @@ parser.add_argument(
     default=50,
     help="Latency for responses from ruby to the cu.",
 )
+parser.add_argument(
+    "--scalar-mem-req-latency",
+    type=int,
+    default=50,
+    help="Latency for scalar requests from the cu to ruby.",
+)
+parser.add_argument(
+    "--scalar-mem-resp-latency",
+    type=int,
+    # Set to 0 as the scalar cache response path does not model
+    # response latency yet and this parameter is currently not used
+    default=0,
+    help="Latency for scalar responses from ruby to the cu.",
+)
+
 parser.add_argument(
     "--TLB-prefetch", type=int, help="prefetch depth for" "TLBs"
 )
@@ -463,6 +478,8 @@ for i in range(n_cu):
             vrf_lm_bus_latency=args.vrf_lm_bus_latency,
             mem_req_latency=args.mem_req_latency,
             mem_resp_latency=args.mem_resp_latency,
+            scalar_mem_req_latency=args.scalar_mem_req_latency,
+            scalar_mem_resp_latency=args.scalar_mem_resp_latency,
             localDataStore=LdsState(
                 banks=args.numLdsBanks,
                 bankConflictPenalty=args.ldsBankConflictPenalty,
diff --git a/src/gpu-compute/GPU.py b/src/gpu-compute/GPU.py
index 517d1801c0..0fdc0b75a7 100644
--- a/src/gpu-compute/GPU.py
+++ b/src/gpu-compute/GPU.py
@@ -178,6 +178,19 @@ class ComputeUnit(ClockedObject):
         "TCP and cu as well as TCP data array "
         "access. Specified in GPU clock cycles",
     )
+    scalar_mem_req_latency = Param.Int(
+        50,
+        "Latency for scalar requests from the cu to ruby. "
+        "Represents the pipeline to reach the TCP "
+        "and specified in GPU clock cycles",
+    )
+    scalar_mem_resp_latency = Param.Int(
+        50,
+        "Latency for scalar responses from ruby to the "
+        "cu. Represents the pipeline between the "
+        "TCP and cu as well as TCP data array "
+        "access. Specified in GPU clock cycles",
+    )
     system = Param.System(Parent.any, "system object")
     cu_id = Param.Int("CU id")
     vrf_to_coalescer_bus_width = Param.Int(
diff --git a/src/gpu-compute/compute_unit.cc b/src/gpu-compute/compute_unit.cc
index 8498ea475e..62cfbf94cf 100644
--- a/src/gpu-compute/compute_unit.cc
+++ b/src/gpu-compute/compute_unit.cc
@@ -98,6 +98,10 @@ ComputeUnit::ComputeUnit(const Params &p) : ClockedObject(p),
     countPages(p.countPages),
     req_tick_latency(p.mem_req_latency * p.clk_domain->clockPeriod()),
     resp_tick_latency(p.mem_resp_latency * p.clk_domain->clockPeriod()),
+    scalar_req_tick_latency(
+            p.scalar_mem_req_latency * p.clk_domain->clockPeriod()),
+    scalar_resp_tick_latency(
+            p.scalar_mem_resp_latency * p.clk_domain->clockPeriod()),
     _requestorId(p.system->getRequestorId(this, "ComputeUnit")),
     lds(*p.localDataStore), gmTokenPort(name() + ".gmTokenPort", this),
     ldsPort(csprintf("%s-port", name()), this),
@@ -1786,7 +1790,7 @@ ComputeUnit::ScalarDTLBPort::recvTimingResp(PacketPtr pkt)
             = new ComputeUnit::ScalarDataPort::MemReqEvent
                 (computeUnit->scalarDataPort, req_pkt);
     computeUnit->schedule(scalar_mem_req_event, curTick() +
-                          computeUnit->req_tick_latency);
+                          computeUnit->scalar_req_tick_latency);
 
     return true;
 }
diff --git a/src/gpu-compute/compute_unit.hh b/src/gpu-compute/compute_unit.hh
index a080e3dc1a..fcc4468ec1 100644
--- a/src/gpu-compute/compute_unit.hh
+++ b/src/gpu-compute/compute_unit.hh
@@ -354,6 +354,8 @@ class ComputeUnit : public ClockedObject
 
     Tick req_tick_latency;
     Tick resp_tick_latency;
+    Tick scalar_req_tick_latency;
+    Tick scalar_resp_tick_latency;
 
     /**
      * Number of WFs to schedule to each SIMD. This vector is populated

From a49cba948049b7b8a3a30a586160c8198292ff51 Mon Sep 17 00:00:00 2001
From: "Bobby R. Bruce" <bbruce@ucdavis.edu>
Date: Thu, 10 Nov 2022 17:07:11 -0800
Subject: [PATCH 007/492] arch-x86: X86ISA default vector_string to
 HygonGenuine

This commit,
https://gem5-review.googlesource.com/c/public/gem5/+/64831, changed the
default 'vendor_string' for the 'X86ISA' SimObject from 'M5 Simulator'
and 'AuthenticAMD'. Unforunately due to an issue highlighted here:
https://gem5.atlassian.net/browse/GEM5-1300 we cannot use the
'AuthenticAMD'. Therefore, this change updates the default vector_string
to HygonGenuine.

The HygonGenuine is simple but works.

Change-Id: I21421da8ae73e76d9daaf2fdd0b3238d5b309172
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/65492
Tested-by: kokoro <noreply+kokoro@google.com>
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
---
 src/arch/x86/X86ISA.py | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/src/arch/x86/X86ISA.py b/src/arch/x86/X86ISA.py
index 2760b7672d..bb72c415e9 100644
--- a/src/arch/x86/X86ISA.py
+++ b/src/arch/x86/X86ISA.py
@@ -42,6 +42,15 @@ class X86ISA(BaseISA):
     cxx_class = "gem5::X86ISA::ISA"
     cxx_header = "arch/x86/isa.hh"
 
+    # Here we set the default vector string to "HygonGenuine". Previously this
+    # "M5 Simulator" but due to stricter checks in newer versions of GLIBC,
+    # the CPUID is checked for the required features. As "M5 Simulator" is not
+    # genuine CPUID, an error is returned. This change
+    # https://gem5-review.googlesource.com/c/public/gem5/+/64831 changed this
+    # to "GenuineAMD" but due to issues with booting the Linux Kernel using
+    # this vector string (highlighted here:
+    # https://gem5.atlassian.net/browse/GEM5-1300) we opted to use
+    # "HygonGenuine" instead.
     vendor_string = Param.String(
-        "AuthenticAMD", "Vendor string for CPUID instruction"
+        "HygonGenuine", "Vendor string for CPUID instruction"
     )

From 6651329cc57862ef02ad48dcded762fd8ee43604 Mon Sep 17 00:00:00 2001
From: Quentin Forcioli <quentin.forcioli@telecom-paris.fr>
Date: Thu, 25 Aug 2022 14:52:09 +0200
Subject: [PATCH 008/492] base: query now works the same way normal command
 worked

Query can now return true or false like normal command, to interrupt
execution, it might be needed if a query need to wait for another event.

Change-Id: Ic463287ecd88e6b63a53f2cb9a46c83d3419618c
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/63537
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
Tested-by: kokoro <noreply+kokoro@google.com>
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
---
 src/base/remote_gdb.cc | 29 +++++++++++++++++------------
 src/base/remote_gdb.hh | 16 ++++++++--------
 2 files changed, 25 insertions(+), 20 deletions(-)

diff --git a/src/base/remote_gdb.cc b/src/base/remote_gdb.cc
index da3f113f38..c19dede748 100644
--- a/src/base/remote_gdb.cc
+++ b/src/base/remote_gdb.cc
@@ -1333,13 +1333,14 @@ std::map<std::string, BaseRemoteGDB::QuerySetCommand>
     { "sThreadInfo", { &BaseRemoteGDB::querySThreadInfo } },
 };
 
-void
+bool
 BaseRemoteGDB::queryC(QuerySetCommand::Context &ctx)
 {
     send("QC%x", encodeThreadId(tc->contextId()));
+    return true;
 }
 
-void
+bool
 BaseRemoteGDB::querySupported(QuerySetCommand::Context &ctx)
 {
     std::ostringstream oss;
@@ -1350,9 +1351,10 @@ BaseRemoteGDB::querySupported(QuerySetCommand::Context &ctx)
     for (const auto& feature : availableFeatures())
         oss << ';' << feature;
     send(oss.str());
+    return true;
 }
 
-void
+bool
 BaseRemoteGDB::queryXfer(QuerySetCommand::Context &ctx)
 {
     auto split = splitAt(ctx.args.at(0), ":");
@@ -1391,15 +1393,16 @@ BaseRemoteGDB::queryXfer(QuerySetCommand::Context &ctx)
     std::string encoded;
     encodeXferResponse(content, encoded, offset, length);
     send(encoded);
+    return true;
 }
-void
+bool
 BaseRemoteGDB::querySymbol(QuerySetCommand::Context &ctx)
 {
     //The target does not need to look up any (more) symbols.
     send("OK");
+    return true;
 }
-
-void
+bool
 BaseRemoteGDB::queryAttached(QuerySetCommand::Context &ctx)
 {
     std::string pid="";
@@ -1409,17 +1412,19 @@ BaseRemoteGDB::queryAttached(QuerySetCommand::Context &ctx)
     DPRINTF(GDBMisc, "QAttached : pid=%s\n",pid);
     //The remote server is attached to an existing process.
     send("1");
+    return true;
 }
 
 
-void
+bool
 BaseRemoteGDB::queryFThreadInfo(QuerySetCommand::Context &ctx)
 {
     threadInfoIdx = 0;
     querySThreadInfo(ctx);
+    return true;
 }
 
-void
+bool
 BaseRemoteGDB::querySThreadInfo(QuerySetCommand::Context &ctx)
 {
     if (threadInfoIdx >= threads.size()) {
@@ -1430,6 +1435,7 @@ BaseRemoteGDB::querySThreadInfo(QuerySetCommand::Context &ctx)
         std::advance(it, threadInfoIdx++);
         send("m%x", encodeThreadId(it->second->contextId()));
     }
+    return true;
 }
 
 bool
@@ -1461,10 +1467,9 @@ BaseRemoteGDB::cmdQueryVar(GdbCommand::Context &ctx)
             remaining = std::move(arg_split.second);
         }
     }
-
-    (this->*(query.func))(qctx);
-
-    return true;
+    //returning true if the query want to pursue GDB command processing
+    //false means that the command processing stop until it's trigger again.
+    return (this->*(query.func))(qctx);
 }
 
 std::vector<std::string>
diff --git a/src/base/remote_gdb.hh b/src/base/remote_gdb.hh
index ad64bc721c..4da1dcc0c8 100644
--- a/src/base/remote_gdb.hh
+++ b/src/base/remote_gdb.hh
@@ -416,7 +416,7 @@ class BaseRemoteGDB
             Context(const std::string &_name) : name(_name) {}
         };
 
-        using Func = void (BaseRemoteGDB::*)(Context &ctx);
+        using Func = bool (BaseRemoteGDB::*)(Context &ctx);
 
         const char * const argSep;
         const Func func;
@@ -428,15 +428,15 @@ class BaseRemoteGDB
 
     static std::map<std::string, QuerySetCommand> queryMap;
 
-    void queryC(QuerySetCommand::Context &ctx);
-    void querySupported(QuerySetCommand::Context &ctx);
-    void queryXfer(QuerySetCommand::Context &ctx);
-    void querySymbol(QuerySetCommand::Context &ctx);
-    void queryAttached(QuerySetCommand::Context &ctx);
+    bool queryC(QuerySetCommand::Context &ctx);
+    bool querySupported(QuerySetCommand::Context &ctx);
+    bool queryXfer(QuerySetCommand::Context &ctx);
+    bool querySymbol(QuerySetCommand::Context &ctx);
+    bool queryAttached(QuerySetCommand::Context &ctx);
 
     size_t threadInfoIdx = 0;
-    void queryFThreadInfo(QuerySetCommand::Context &ctx);
-    void querySThreadInfo(QuerySetCommand::Context &ctx);
+    bool queryFThreadInfo(QuerySetCommand::Context &ctx);
+    bool querySThreadInfo(QuerySetCommand::Context &ctx);
 
   protected:
     ThreadContext *context() { return tc; }

From 33a36d35dea1ac9ed9e2b45d85ed78f6c5aae600 Mon Sep 17 00:00:00 2001
From: Matthew Poremba <matthew.poremba@amd.com>
Date: Thu, 17 Nov 2022 08:54:43 -0800
Subject: [PATCH 009/492] dev-amdgpu: Store SDMA queue type, use for ring ID

Currently the SDMA queue type is guessed in the trap method by looking
at which queue in the engine is processing packets. It is possible for
both queues to be processing (e.g., one queue sent a DMA and is waiting
then switch to another queue), triggering an assert.

Instead store the queue type in the queue itself and use that type in
trap to determine which ring ID to use for the interrupt packet.

Change-Id: If91c458e60a03f2013c0dc42bab0b1673e3dbd84
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/65691
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/dev/amdgpu/sdma_engine.cc | 10 +++++-----
 src/dev/amdgpu/sdma_engine.hh |  5 ++++-
 2 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/src/dev/amdgpu/sdma_engine.cc b/src/dev/amdgpu/sdma_engine.cc
index 59c5027c85..02203c8178 100644
--- a/src/dev/amdgpu/sdma_engine.cc
+++ b/src/dev/amdgpu/sdma_engine.cc
@@ -55,11 +55,15 @@ SDMAEngine::SDMAEngine(const SDMAEngineParams &p)
     gfxIb.parent(&gfx);
     gfx.valid(true);
     gfxIb.valid(true);
+    gfx.queueType(SDMAGfx);
+    gfxIb.queueType(SDMAGfx);
 
     page.ib(&pageIb);
     pageIb.parent(&page);
     page.valid(true);
     pageIb.valid(true);
+    page.queueType(SDMAPage);
+    pageIb.queueType(SDMAPage);
 
     rlc0.ib(&rlc0Ib);
     rlc0Ib.parent(&rlc0);
@@ -727,11 +731,7 @@ SDMAEngine::trap(SDMAQueue *q, sdmaTrap *pkt)
 
     DPRINTF(SDMAEngine, "Trap contextId: %p\n", pkt->intrContext);
 
-    uint32_t ring_id = 0;
-    assert(page.processing() ^ gfx.processing());
-    if (page.processing()) {
-        ring_id = 3;
-    }
+    uint32_t ring_id = (q->queueType() == SDMAPage) ? 3 : 0;
 
     gpuDevice->getIH()->prepareInterruptCookie(pkt->intrContext, ring_id,
                                                getIHClientId(), TRAP_ID);
diff --git a/src/dev/amdgpu/sdma_engine.hh b/src/dev/amdgpu/sdma_engine.hh
index d0afaf7a4a..0bfee126c9 100644
--- a/src/dev/amdgpu/sdma_engine.hh
+++ b/src/dev/amdgpu/sdma_engine.hh
@@ -64,9 +64,10 @@ class SDMAEngine : public DmaVirtDevice
         bool _processing;
         SDMAQueue *_parent;
         SDMAQueue *_ib;
+        SDMAType _type;
       public:
         SDMAQueue() : _rptr(0), _wptr(0), _valid(false), _processing(false),
-            _parent(nullptr), _ib(nullptr) {}
+            _parent(nullptr), _ib(nullptr), _type(SDMAGfx) {}
 
         Addr base() { return _base; }
         Addr rptr() { return _base + _rptr; }
@@ -80,6 +81,7 @@ class SDMAEngine : public DmaVirtDevice
         bool processing() { return _processing; }
         SDMAQueue* parent() { return _parent; }
         SDMAQueue* ib() { return _ib; }
+        SDMAType queueType() { return _type; }
 
         void base(Addr value) { _base = value; }
 
@@ -111,6 +113,7 @@ class SDMAEngine : public DmaVirtDevice
         void processing(bool value) { _processing = value; }
         void parent(SDMAQueue* q) { _parent = q; }
         void ib(SDMAQueue* ib) { _ib = ib; }
+        void queueType(SDMAType type) { _type = type; }
     };
 
     /* SDMA Engine ID */

From ec75787aef56665e893d70293bf3a0f93c33bb6a Mon Sep 17 00:00:00 2001
From: "Bobby R. Bruce" <bbruce@ucdavis.edu>
Date: Thu, 17 Nov 2022 15:48:34 -0800
Subject: [PATCH 010/492] arch-arm: Revert 'Setup TC/ISA at construction
 time..'
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reverts:

dd2f1fb2f8520849f10fc25fc5eab5beaa90a7d4
https://gem5-review.googlesource.com/c/public/gem5/+/65174

and

47bd56ee71ba1d684138365e7123aa779989ba1d
https://gem5-review.googlesource.com/c/public/gem5/+/65291

The 47bd56ee change resulted in the
`SuiteUID:tests/gem5/fs/linux/arm/test.py:realview-switcheroo-noncaching-timing-ALL-x86_64-opt`
nightly test stalling. This behavior can be reproduced with:

```
./build/ALL/gem5.opt tests/gem5/fs/linux/arm/run.py tests/gem5/configs/realview-switcheroo-noncaching-timing.py tests/gem5/resources/arm “$(pwd)”
```

The subsequent change, dd2f1fb2, must be reverted for this change to be
reverted.

Change-Id: I6fed74f33d013f321b93cf1a73eee404cb87ce18
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/65732
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/arch/arm/isa.cc                 | 20 +++++++++++---------
 src/dev/arm/gic_v3.cc               |  2 +-
 src/dev/arm/gic_v3_cpu_interface.cc | 17 +++++++++++------
 src/dev/arm/gic_v3_cpu_interface.hh |  9 +++++----
 4 files changed, 28 insertions(+), 20 deletions(-)

diff --git a/src/arch/arm/isa.cc b/src/arch/arm/isa.cc
index fd19f721b2..a30fd94596 100644
--- a/src/arch/arm/isa.cc
+++ b/src/arch/arm/isa.cc
@@ -523,6 +523,16 @@ ISA::setupThreadContext()
         return;
 
     selfDebug->init(tc);
+
+    Gicv3 *gicv3 = dynamic_cast<Gicv3 *>(system->getGIC());
+    if (!gicv3)
+        return;
+
+    if (!gicv3CpuInterface)
+        gicv3CpuInterface.reset(gicv3->getCPUInterface(tc->contextId()));
+
+    gicv3CpuInterface->setISA(this);
+    gicv3CpuInterface->setThreadContext(tc);
 }
 
 void
@@ -1998,15 +2008,7 @@ ISA::getGenericTimer()
 BaseISADevice &
 ISA::getGICv3CPUInterface()
 {
-    if (gicv3CpuInterface)
-        return *gicv3CpuInterface.get();
-
-    assert(system);
-    Gicv3 *gicv3 = dynamic_cast<Gicv3 *>(system->getGIC());
-    panic_if(!gicv3, "The system does not have a GICv3 irq controller\n");
-
-    gicv3CpuInterface.reset(gicv3->getCPUInterface(tc->contextId()));
-
+    panic_if(!gicv3CpuInterface, "GICV3 cpu interface is not registered!");
     return *gicv3CpuInterface.get();
 }
 
diff --git a/src/dev/arm/gic_v3.cc b/src/dev/arm/gic_v3.cc
index e14d1f2bef..dde3818b07 100644
--- a/src/dev/arm/gic_v3.cc
+++ b/src/dev/arm/gic_v3.cc
@@ -147,7 +147,7 @@ Gicv3::init()
 
     for (int i = 0; i < threads; i++) {
         redistributors[i] = new Gicv3Redistributor(this, i);
-        cpuInterfaces[i] = new Gicv3CPUInterface(this, sys->threads[i]);
+        cpuInterfaces[i] = new Gicv3CPUInterface(this, i);
     }
 
     distRange = RangeSize(params().dist_addr,
diff --git a/src/dev/arm/gic_v3_cpu_interface.cc b/src/dev/arm/gic_v3_cpu_interface.cc
index a11dd9b8ed..0e1dbaa04b 100644
--- a/src/dev/arm/gic_v3_cpu_interface.cc
+++ b/src/dev/arm/gic_v3_cpu_interface.cc
@@ -55,19 +55,15 @@ using namespace ArmISA;
 const uint8_t Gicv3CPUInterface::GIC_MIN_BPR;
 const uint8_t Gicv3CPUInterface::GIC_MIN_BPR_NS;
 
-Gicv3CPUInterface::Gicv3CPUInterface(Gicv3 * gic, ThreadContext *_tc)
+Gicv3CPUInterface::Gicv3CPUInterface(Gicv3 * gic, uint32_t cpu_id)
     : BaseISADevice(),
       gic(gic),
       redistributor(nullptr),
       distributor(nullptr),
-      tc(_tc),
-      maintenanceInterrupt(gic->params().maint_int->get(tc)),
-      cpuId(tc->contextId())
+      cpuId(cpu_id)
 {
     hppi.prio = 0xff;
     hppi.intid = Gicv3::INTID_SPURIOUS;
-
-    setISA(static_cast<ISA*>(tc->getIsaPtr()));
 }
 
 void
@@ -84,6 +80,15 @@ Gicv3CPUInterface::resetHppi(uint32_t intid)
         hppi.prio = 0xff;
 }
 
+void
+Gicv3CPUInterface::setThreadContext(ThreadContext *_tc)
+{
+    tc = _tc;
+    maintenanceInterrupt = gic->params().maint_int->get(tc);
+    fatal_if(maintenanceInterrupt->num() >= redistributor->irqPending.size(),
+        "Invalid maintenance interrupt number\n");
+}
+
 bool
 Gicv3CPUInterface::getHCREL2FMO() const
 {
diff --git a/src/dev/arm/gic_v3_cpu_interface.hh b/src/dev/arm/gic_v3_cpu_interface.hh
index c39fab7647..e860373fb5 100644
--- a/src/dev/arm/gic_v3_cpu_interface.hh
+++ b/src/dev/arm/gic_v3_cpu_interface.hh
@@ -68,11 +68,11 @@ class Gicv3CPUInterface : public ArmISA::BaseISADevice, public Serializable
     Gicv3 * gic;
     Gicv3Redistributor * redistributor;
     Gicv3Distributor * distributor;
-
-    ThreadContext *tc;
-    ArmInterruptPin *maintenanceInterrupt;
     uint32_t cpuId;
 
+    ArmInterruptPin *maintenanceInterrupt;
+    ThreadContext *tc;
+
     BitUnion64(ICC_CTLR_EL1)
         Bitfield<63, 20> res0_3;
         Bitfield<19>     ExtRange;
@@ -359,7 +359,7 @@ class Gicv3CPUInterface : public ArmISA::BaseISADevice, public Serializable
     void setBankedMiscReg(ArmISA::MiscRegIndex misc_reg, RegVal val) const;
   public:
 
-    Gicv3CPUInterface(Gicv3 * gic, ThreadContext *tc);
+    Gicv3CPUInterface(Gicv3 * gic, uint32_t cpu_id);
 
     void init();
 
@@ -369,6 +369,7 @@ class Gicv3CPUInterface : public ArmISA::BaseISADevice, public Serializable
   public: // BaseISADevice
     RegVal readMiscReg(int misc_reg) override;
     void setMiscReg(int misc_reg, RegVal val) override;
+    void setThreadContext(ThreadContext *tc) override;
 };
 
 } // namespace gem5

From 5eb73551bda1ecdc632cf50f27eb45ff2dbf1bfa Mon Sep 17 00:00:00 2001
From: Yu-hsin Wang <yuhsingw@google.com>
Date: Mon, 14 Nov 2022 16:11:57 +0800
Subject: [PATCH 011/492] fastmodel: CortexR52 export standbywfi signal

Change-Id: Ic9ed9a3e35f068e151725d36e7fff391013ff5d1
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/65534
Reviewed-by: Gabe Black <gabe.black@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Maintainer: Gabe Black <gabe.black@gmail.com>
---
 .../fastmodel/CortexR52/FastModelCortexR52.py |  5 ++-
 .../arm/fastmodel/CortexR52/cortex_r52.cc     |  4 ++
 src/arch/arm/fastmodel/CortexR52/evs.cc       | 10 +++++
 src/arch/arm/fastmodel/CortexR52/evs.hh       |  1 +
 src/arch/arm/fastmodel/CortexR52/x1/x1.lisa   |  4 ++
 src/arch/arm/fastmodel/CortexR52/x2/x2.lisa   |  4 ++
 src/arch/arm/fastmodel/CortexR52/x3/x3.lisa   |  4 ++
 src/arch/arm/fastmodel/CortexR52/x4/x4.lisa   |  4 ++
 .../arm/fastmodel/common/signal_receiver.hh   | 37 +++++++++++++++++++
 9 files changed, 72 insertions(+), 1 deletion(-)

diff --git a/src/arch/arm/fastmodel/CortexR52/FastModelCortexR52.py b/src/arch/arm/fastmodel/CortexR52/FastModelCortexR52.py
index 4970ae2ed4..1e267f028f 100644
--- a/src/arch/arm/fastmodel/CortexR52/FastModelCortexR52.py
+++ b/src/arch/arm/fastmodel/CortexR52/FastModelCortexR52.py
@@ -31,7 +31,7 @@ from m5.objects.ArmInterrupts import ArmInterrupts
 from m5.objects.ArmISA import ArmISA
 from m5.objects.FastModel import AmbaInitiatorSocket, AmbaTargetSocket
 from m5.objects.ResetPort import ResetResponsePort
-from m5.objects.IntPin import IntSinkPin, VectorIntSinkPin
+from m5.objects.IntPin import IntSourcePin, IntSinkPin, VectorIntSinkPin
 from m5.objects.Iris import IrisBaseCPU
 from m5.objects.SystemC import SystemC_ScModule
 
@@ -56,6 +56,9 @@ class FastModelCortexR52(IrisBaseCPU):
         "processor logic, including debug logic."
     )
     halt = IntSinkPin("Raising this signal will put the core into halt mode.")
+    standbywfi = IntSourcePin(
+        "This signal indicates if a core is in WFI state."
+    )
 
     CFGEND = Param.Bool(
         False,
diff --git a/src/arch/arm/fastmodel/CortexR52/cortex_r52.cc b/src/arch/arm/fastmodel/CortexR52/cortex_r52.cc
index be83082d16..9dfe7a5158 100644
--- a/src/arch/arm/fastmodel/CortexR52/cortex_r52.cc
+++ b/src/arch/arm/fastmodel/CortexR52/cortex_r52.cc
@@ -92,6 +92,10 @@ CortexR52::getPort(const std::string &if_name, PortID idx)
         // Since PPIs are indexed both by core and by number, modify the name
         // to hold the core number.
         return evs->gem5_getPort(csprintf("%s_%d", if_name, num), idx);
+    } else if (if_name == "standbywfi") {
+        // Since standbywfi is indexed by fanout, modify the name to hold the
+        // core number.
+        return evs->gem5_getPort(csprintf("%s_%d", if_name, num), idx);
     } else if (if_name == "amba" || if_name == "llpp" || if_name == "flash" ||
                if_name == "core_reset" || if_name == "poweron_reset" ||
                if_name == "halt") {
diff --git a/src/arch/arm/fastmodel/CortexR52/evs.cc b/src/arch/arm/fastmodel/CortexR52/evs.cc
index 6887c6ced9..734323e026 100644
--- a/src/arch/arm/fastmodel/CortexR52/evs.cc
+++ b/src/arch/arm/fastmodel/CortexR52/evs.cc
@@ -79,6 +79,7 @@ ScxEvsCortexR52<Types>::CorePins::CorePins(Evs *_evs, int _cpu) :
     core_reset(name + ".core_reset", 0),
     poweron_reset(name + ".poweron_reset", 0),
     halt(name + ".halt", 0),
+    standbywfi(name + ".standbywfi"),
     cfgvectable((name + "cfgvectable").c_str())
 {
     for (int i = 0; i < Evs::PpiCount; i++) {
@@ -88,6 +89,7 @@ ScxEvsCortexR52<Types>::CorePins::CorePins(Evs *_evs, int _cpu) :
     core_reset.signal_out.bind(evs->core_reset[cpu]);
     poweron_reset.signal_out.bind(evs->poweron_reset[cpu]);
     halt.signal_out.bind(evs->halt[cpu]);
+    evs->standbywfi[cpu].bind(standbywfi.signal_in);
     cfgvectable.bind(evs->cfgvectable[cpu]);
 }
 
@@ -161,6 +163,14 @@ ScxEvsCortexR52<Types>::gem5_getPort(const std::string &if_name, int idx)
             panic("Couldn't find CPU number in %s.", if_name);
         }
         return *this->corePins.at(cpu)->ppis.at(idx);
+    } else if (if_name.substr(0, 10) == "standbywfi") {
+        int cpu;
+        try {
+            cpu = std::stoi(if_name.substr(11));
+        } catch (const std::invalid_argument &a) {
+            panic("Couldn't find CPU number in %s.", if_name);
+        }
+        return this->corePins.at(cpu)->standbywfi.getSignalOut(idx);
     } else {
         return Base::gem5_getPort(if_name, idx);
     }
diff --git a/src/arch/arm/fastmodel/CortexR52/evs.hh b/src/arch/arm/fastmodel/CortexR52/evs.hh
index 535d678c34..02ef1ae257 100644
--- a/src/arch/arm/fastmodel/CortexR52/evs.hh
+++ b/src/arch/arm/fastmodel/CortexR52/evs.hh
@@ -110,6 +110,7 @@ class ScxEvsCortexR52 : public Types::Base, public Iris::BaseCpuEvs
         SignalSender core_reset;
         SignalSender poweron_reset;
         SignalSender halt;
+        SignalReceiverInt standbywfi;
 
         SignalInitiator<uint64_t> cfgvectable;
     };
diff --git a/src/arch/arm/fastmodel/CortexR52/x1/x1.lisa b/src/arch/arm/fastmodel/CortexR52/x1/x1.lisa
index 2a7299d77e..2738ba23f9 100644
--- a/src/arch/arm/fastmodel/CortexR52/x1/x1.lisa
+++ b/src/arch/arm/fastmodel/CortexR52/x1/x1.lisa
@@ -53,6 +53,9 @@ component CortexR52x1
         self.dbg_reset => core.presetdbg;
         self.halt => core.cpuhalt;
 
+        // Status signals.
+        core.standbywfi => self.standbywfi;
+
         // Clocks.
         clock1Hz.clk_out => clockDiv.clk_in;
         clock1Hz.clk_out => clockDivPeriph.clk_in;
@@ -79,6 +82,7 @@ component CortexR52x1
     slave port<Signal> core_reset[1];
     slave port<Signal> poweron_reset[1];
     slave port<Signal> halt[1];
+    master port<Signal> standbywfi[1];
     slave port<Signal> top_reset;
     slave port<Signal> dbg_reset;
     slave port<Value_64> cfgvectable[1];
diff --git a/src/arch/arm/fastmodel/CortexR52/x2/x2.lisa b/src/arch/arm/fastmodel/CortexR52/x2/x2.lisa
index 9100a5bcc2..485ffee983 100644
--- a/src/arch/arm/fastmodel/CortexR52/x2/x2.lisa
+++ b/src/arch/arm/fastmodel/CortexR52/x2/x2.lisa
@@ -53,6 +53,9 @@ component CortexR52x2
         self.dbg_reset => core.presetdbg;
         self.halt => core.cpuhalt;
 
+        // Status signals.
+        core.standbywfi => self.standbywfi;
+
         // Clocks.
         clock1Hz.clk_out => clockDiv.clk_in;
         clock1Hz.clk_out => clockDivPeriph.clk_in;
@@ -80,6 +83,7 @@ component CortexR52x2
     slave port<Signal> core_reset[2];
     slave port<Signal> poweron_reset[2];
     slave port<Signal> halt[2];
+    master port<Signal> standbywfi[2];
     slave port<Signal> top_reset;
     slave port<Signal> dbg_reset;
     slave port<Value_64> cfgvectable[2];
diff --git a/src/arch/arm/fastmodel/CortexR52/x3/x3.lisa b/src/arch/arm/fastmodel/CortexR52/x3/x3.lisa
index bb8d153f44..1e526d9958 100644
--- a/src/arch/arm/fastmodel/CortexR52/x3/x3.lisa
+++ b/src/arch/arm/fastmodel/CortexR52/x3/x3.lisa
@@ -53,6 +53,9 @@ component CortexR52x3
         self.dbg_reset => core.presetdbg;
         self.halt => core.cpuhalt;
 
+        // Status signals.
+        core.standbywfi => self.standbywfi;
+
         // Clocks.
         clock1Hz.clk_out => clockDiv.clk_in;
         clock1Hz.clk_out => clockDivPeriph.clk_in;
@@ -81,6 +84,7 @@ component CortexR52x3
     slave port<Signal> core_reset[3];
     slave port<Signal> poweron_reset[3];
     slave port<Signal> halt[3];
+    master port<Signal> standbywfi[3];
     slave port<Signal> top_reset;
     slave port<Signal> dbg_reset;
     slave port<Value_64> cfgvectable[3];
diff --git a/src/arch/arm/fastmodel/CortexR52/x4/x4.lisa b/src/arch/arm/fastmodel/CortexR52/x4/x4.lisa
index 5b278ddb41..df23bf17b4 100644
--- a/src/arch/arm/fastmodel/CortexR52/x4/x4.lisa
+++ b/src/arch/arm/fastmodel/CortexR52/x4/x4.lisa
@@ -53,6 +53,9 @@ component CortexR52x4
         self.dbg_reset => core.presetdbg;
         self.halt => core.cpuhalt;
 
+        // Status signals.
+        core.standbywfi => self.standbywfi;
+
         // Clocks.
         clock1Hz.clk_out => clockDiv.clk_in;
         clock1Hz.clk_out => clockDivPeriph.clk_in;
@@ -82,6 +85,7 @@ component CortexR52x4
     slave port<Signal> core_reset[4];
     slave port<Signal> poweron_reset[4];
     slave port<Signal> halt[4];
+    master port<Signal> standbywfi[4];
     slave port<Signal> top_reset;
     slave port<Signal> dbg_reset;
     slave port<Value_64> cfgvectable[4];
diff --git a/src/arch/arm/fastmodel/common/signal_receiver.hh b/src/arch/arm/fastmodel/common/signal_receiver.hh
index 0025e39173..990787743b 100644
--- a/src/arch/arm/fastmodel/common/signal_receiver.hh
+++ b/src/arch/arm/fastmodel/common/signal_receiver.hh
@@ -34,8 +34,12 @@
 #pragma GCC diagnostic pop
 
 #include <functional>
+#include <vector>
 
 #include "base/compiler.hh"
+#include "base/cprintf.hh"
+#include "base/types.hh"
+#include "dev/intpin.hh"
 
 namespace gem5
 {
@@ -80,6 +84,39 @@ class SignalReceiver : public amba_pv::signal_slave_base<bool>
     }
 };
 
+class SignalReceiverInt : public SignalReceiver
+{
+  public:
+    using IntPin = IntSourcePin<SignalReceiverInt>;
+
+    explicit SignalReceiverInt(const std::string &name)
+        : SignalReceiver(name)
+    {
+        onChange([this](bool status) {
+            for (auto &signal : signalOut) {
+                if (signal && signal->isConnected())
+                    status ? signal->raise() : signal->lower();
+            }
+        });
+    }
+
+    IntPin &
+    getSignalOut(int idx)
+    {
+        if (signalOut.size() <= idx) {
+            signalOut.resize(idx + 1);
+        }
+        if (!signalOut[idx]) {
+            signalOut[idx] = std::make_unique<IntPin>(
+                csprintf("%s.signalOut[%d]", get_name(), idx), idx, this);
+        }
+        return *signalOut[idx];
+    }
+
+  private:
+    std::vector<std::unique_ptr<IntPin>> signalOut;
+};
+
 } // namespace fastmodel
 } // namespace gem5
 

From ff16ca3dafcb2228843502d20a5288fdd64a9538 Mon Sep 17 00:00:00 2001
From: Gabe Black <gabeblack@google.com>
Date: Tue, 27 Sep 2022 02:52:42 -0700
Subject: [PATCH 012/492] mem: Add a class to describe a back door request.

In cases where a back door is not being requested alongside a packet
or request, there needs to be a structure which describes the address
range to use, and what type of access the back door should support. It
would be possible to make a Packet/Request to carry that information,
but those types are actually pretty big, and have a lot of extra
overhead which would be overkill for this purpose.

Change-Id: I3638361ffa758ee959cb3bc57f7c35f2aa34a36c
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/65751
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Maintainer: Gabe Black <gabeblack@google.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/mem/backdoor.hh | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/src/mem/backdoor.hh b/src/mem/backdoor.hh
index 73e667017d..54fe4acbd1 100644
--- a/src/mem/backdoor.hh
+++ b/src/mem/backdoor.hh
@@ -126,6 +126,25 @@ class MemBackdoor
 
 typedef MemBackdoor *MemBackdoorPtr;
 
+class MemBackdoorReq
+{
+  private:
+    AddrRange _range;
+    MemBackdoor::Flags _flags;
+
+  public:
+    MemBackdoorReq(AddrRange r, MemBackdoor::Flags new_flags) :
+        _range(r), _flags(new_flags)
+    {}
+
+    const AddrRange &range() const { return _range; }
+
+    bool readable() const { return _flags & MemBackdoor::Readable; }
+    bool writeable() const { return _flags & MemBackdoor::Writeable; }
+
+    MemBackdoor::Flags flags() const { return _flags; }
+};
+
 } // namespace gem5
 
 #endif  //__MEM_BACKDOOR_HH__

From 842a3a935fe0773ae204d0b5eb2b3eac0995b6ed Mon Sep 17 00:00:00 2001
From: Gabe Black <gabeblack@google.com>
Date: Tue, 27 Sep 2022 02:56:10 -0700
Subject: [PATCH 013/492] mem: Add an API for requesting a back door without a
 Packet/Request.

Make this part of the Functional protocol, since it should always
return immediately, can be shared by the atomic and timing protocols,
and thematically fits with that protocol.

The default implementation on the receiving end just ignores the
request and leaves the back door pointer set to null, effectively
making back doors default "off" which matches their behavior in the
atomic protocol.

This mechamism helps fix a bug in the TLM gem5 bridges which need to
translate to/from the DMI and back door mechanisms, where there can be
an explicit request for a back door which does not have a transaction
associated with it. It is also necessary for bridging DMI requests in
timing mode, since the DMI requests must be instant, and the timing
protocol does not send/receive packets instantly.

Change-Id: I905f13b9bc83c3fa7877b05ce932e17c308125e2
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/65752
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Maintainer: Gabe Black <gabeblack@google.com>
---
 src/mem/port.cc                | 16 ++++++++++++++++
 src/mem/port.hh                | 29 +++++++++++++++++++++++++++++
 src/mem/protocol/functional.cc |  8 ++++++++
 src/mem/protocol/functional.hh | 21 +++++++++++++++++++++
 4 files changed, 74 insertions(+)

diff --git a/src/mem/port.cc b/src/mem/port.cc
index 00f7ce6efa..18793d487b 100644
--- a/src/mem/port.cc
+++ b/src/mem/port.cc
@@ -102,6 +102,11 @@ class DefaultResponsePort : public ResponsePort
 
     // Functional protocol.
     void recvFunctional(PacketPtr) override { blowUp(); }
+    void
+    recvMemBackdoorReq(const MemBackdoorReq &, MemBackdoorPtr &) override
+    {
+        blowUp();
+    }
 
     // General.
     AddrRangeList getAddrRanges() const override { return AddrRangeList(); }
@@ -205,4 +210,15 @@ ResponsePort::recvAtomicBackdoor(PacketPtr pkt, MemBackdoorPtr &backdoor)
     return recvAtomic(pkt);
 }
 
+void
+ResponsePort::recvMemBackdoorReq(const MemBackdoorReq &req,
+        MemBackdoorPtr &backdoor)
+{
+    if (!defaultBackdoorWarned) {
+        DPRINTF(ResponsePort,
+                "Port %s doesn't support requesting a back door.", name());
+        defaultBackdoorWarned = true;
+    }
+}
+
 } // namespace gem5
diff --git a/src/mem/port.hh b/src/mem/port.hh
index 33ff117cf2..fb0f4b8812 100644
--- a/src/mem/port.hh
+++ b/src/mem/port.hh
@@ -161,6 +161,21 @@ class RequestPort: public Port, public AtomicRequestProtocol,
      */
     void sendFunctional(PacketPtr pkt) const;
 
+    /**
+     * Send a request for a back door to a range of memory.
+     *
+     * @param req An object which describes what back door is being requested.
+     * @param backdoor Can be set to a back door pointer by the target to let
+     *        caller have direct access to the requested range. The original
+     *        caller should initialize this pointer to nullptr. If a receiver
+     *        does not want to provide a back door, they should leave this
+     *        value. If an intermediary wants to support a back door across it,
+     *        it should pass this pointer through, or if not, return without
+     *        passing the request further downstream.
+     */
+    void sendMemBackdoorReq(const MemBackdoorReq &req,
+            MemBackdoorPtr &backdoor);
+
   public:
     /* The timing protocol. */
 
@@ -438,6 +453,8 @@ class ResponsePort : public Port, public AtomicResponseProtocol,
      * Default implementations.
      */
     Tick recvAtomicBackdoor(PacketPtr pkt, MemBackdoorPtr &backdoor) override;
+    void recvMemBackdoorReq(const MemBackdoorReq &req,
+            MemBackdoorPtr &backdoor) override;
 
     bool
     tryTiming(PacketPtr pkt) override
@@ -491,6 +508,18 @@ RequestPort::sendFunctional(PacketPtr pkt) const
     }
 }
 
+inline void
+RequestPort::sendMemBackdoorReq(const MemBackdoorReq &req,
+        MemBackdoorPtr &backdoor)
+{
+    try {
+        return FunctionalRequestProtocol::sendMemBackdoorReq(
+                _responsePort, req, backdoor);
+    } catch (UnboundPortException) {
+        reportUnbound();
+    }
+}
+
 inline bool
 RequestPort::sendTimingReq(PacketPtr pkt)
 {
diff --git a/src/mem/protocol/functional.cc b/src/mem/protocol/functional.cc
index 0f54d92a76..29cec23bc3 100644
--- a/src/mem/protocol/functional.cc
+++ b/src/mem/protocol/functional.cc
@@ -53,6 +53,14 @@ FunctionalRequestProtocol::send(
     return peer->recvFunctional(pkt);
 }
 
+void
+FunctionalRequestProtocol::sendMemBackdoorReq(
+        FunctionalResponseProtocol *peer,
+        const MemBackdoorReq &req, MemBackdoorPtr &backdoor)
+{
+    return peer->recvMemBackdoorReq(req, backdoor);
+}
+
 /* The response protocol. */
 
 void
diff --git a/src/mem/protocol/functional.hh b/src/mem/protocol/functional.hh
index 27db171b2d..4f330b4788 100644
--- a/src/mem/protocol/functional.hh
+++ b/src/mem/protocol/functional.hh
@@ -41,6 +41,7 @@
 #ifndef __MEM_GEM5_PROTOCOL_FUNCTIONAL_HH__
 #define __MEM_GEM5_PROTOCOL_FUNCTIONAL_HH__
 
+#include "mem/backdoor.hh"
 #include "mem/packet.hh"
 
 namespace gem5
@@ -66,6 +67,16 @@ class FunctionalRequestProtocol
      * Receive a functional snoop request packet from the peer.
      */
     virtual void recvFunctionalSnoop(PacketPtr pkt) = 0;
+
+    /**
+     * Send a request for a back door to a range of memory.
+     *
+     * @param req An object which describes what back door is being requested.
+     * @param backdoor Can be set to a back door pointer by the target to let
+     *        caller have direct access to the requested range.
+     */
+    void sendMemBackdoorReq(FunctionalResponseProtocol *peer,
+            const MemBackdoorReq &req, MemBackdoorPtr &backdoor);
 };
 
 class FunctionalResponseProtocol
@@ -86,6 +97,16 @@ class FunctionalResponseProtocol
      * Receive a functional request packet from the peer.
      */
     virtual void recvFunctional(PacketPtr pkt) = 0;
+
+    /**
+     * Receive a request for a back door to a range of memory.
+     *
+     * @param req An object which describes what back door is being requested.
+     * @param backdoor Can be set to a back door pointer by the target to let
+     *        caller have direct access to the requested range.
+     */
+    virtual void recvMemBackdoorReq(const MemBackdoorReq &req,
+            MemBackdoorPtr &backdoor) = 0;
 };
 
 } // namespace gem5

From d7b3020324782bd0382ff800fb27b165cd3c65e3 Mon Sep 17 00:00:00 2001
From: Gabe Black <gabeblack@google.com>
Date: Sat, 1 Oct 2022 03:18:20 -0700
Subject: [PATCH 014/492] dev,mem,systemc: Implement and use the
 recvMemBackdoorReq func.

Change-Id: If6e12d4fcef0c31131a9768099a72542a8f62ab1
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/65753
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Maintainer: Gabe Black <gabeblack@google.com>
Reviewed-by: Jui-min Lee <fcrh@google.com>
---
 src/mem/cfi_mem.cc                    | 15 ++++++++++++
 src/mem/cfi_mem.hh                    |  4 +++
 src/mem/coherent_xbar.cc              |  8 ++++++
 src/mem/coherent_xbar.hh              | 12 +++++++++
 src/mem/mem_ctrl.cc                   | 18 ++++++++++++++
 src/mem/mem_ctrl.hh                   |  4 +++
 src/mem/noncoherent_xbar.cc           |  8 ++++++
 src/mem/noncoherent_xbar.hh           |  9 +++++++
 src/mem/simple_mem.cc                 | 14 +++++++++++
 src/mem/simple_mem.hh                 |  4 +++
 src/mem/sys_bridge.hh                 |  7 ++++++
 src/systemc/tlm_bridge/gem5_to_tlm.cc | 25 +++++++++++++++++++
 src/systemc/tlm_bridge/gem5_to_tlm.hh |  9 +++++++
 src/systemc/tlm_bridge/tlm_to_gem5.cc | 35 +++++++++++++++------------
 14 files changed, 156 insertions(+), 16 deletions(-)

diff --git a/src/mem/cfi_mem.cc b/src/mem/cfi_mem.cc
index 70dc43fca8..f8c1084700 100644
--- a/src/mem/cfi_mem.cc
+++ b/src/mem/cfi_mem.cc
@@ -275,6 +275,14 @@ CfiMemory::recvFunctional(PacketPtr pkt)
     pkt->popLabel();
 }
 
+void
+CfiMemory::recvMemBackdoorReq(const MemBackdoorReq &req,
+        MemBackdoorPtr &_backdoor)
+{
+    if (backdoor.ptr())
+        _backdoor = &backdoor;
+}
+
 bool
 CfiMemory::recvTimingReq(PacketPtr pkt)
 {
@@ -486,6 +494,13 @@ CfiMemory::MemoryPort::recvFunctional(PacketPtr pkt)
     mem.recvFunctional(pkt);
 }
 
+void
+CfiMemory::MemoryPort::recvMemBackdoorReq(const MemBackdoorReq &req,
+        MemBackdoorPtr &_backdoor)
+{
+    mem.recvMemBackdoorReq(req, _backdoor);
+}
+
 bool
 CfiMemory::MemoryPort::recvTimingReq(PacketPtr pkt)
 {
diff --git a/src/mem/cfi_mem.hh b/src/mem/cfi_mem.hh
index 5a7a1c57aa..4a0226736a 100644
--- a/src/mem/cfi_mem.hh
+++ b/src/mem/cfi_mem.hh
@@ -248,6 +248,8 @@ class CfiMemory : public AbstractMemory
         Tick recvAtomicBackdoor(
                 PacketPtr pkt, MemBackdoorPtr &_backdoor) override;
         void recvFunctional(PacketPtr pkt) override;
+        void recvMemBackdoorReq(const MemBackdoorReq &req,
+                MemBackdoorPtr &_backdoor) override;
         bool recvTimingReq(PacketPtr pkt) override;
         void recvRespRetry() override;
         AddrRangeList getAddrRanges() const override;
@@ -361,6 +363,8 @@ class CfiMemory : public AbstractMemory
     Tick recvAtomic(PacketPtr pkt);
     Tick recvAtomicBackdoor(PacketPtr pkt, MemBackdoorPtr &_backdoor);
     void recvFunctional(PacketPtr pkt);
+    void recvMemBackdoorReq(const MemBackdoorReq &req,
+            MemBackdoorPtr &_backdoor);
     bool recvTimingReq(PacketPtr pkt);
     void recvRespRetry();
 
diff --git a/src/mem/coherent_xbar.cc b/src/mem/coherent_xbar.cc
index 7d1cd5d57d..8163299a09 100644
--- a/src/mem/coherent_xbar.cc
+++ b/src/mem/coherent_xbar.cc
@@ -997,6 +997,14 @@ CoherentXBar::forwardAtomic(PacketPtr pkt, PortID exclude_cpu_side_port_id,
     return std::make_pair(snoop_response_cmd, snoop_response_latency);
 }
 
+void
+CoherentXBar::recvMemBackdoorReq(const MemBackdoorReq &req,
+        MemBackdoorPtr &backdoor)
+{
+    PortID dest_id = findPort(req.range());
+    memSidePorts[dest_id]->sendMemBackdoorReq(req, backdoor);
+}
+
 void
 CoherentXBar::recvFunctional(PacketPtr pkt, PortID cpu_side_port_id)
 {
diff --git a/src/mem/coherent_xbar.hh b/src/mem/coherent_xbar.hh
index 1c55cc00c8..9693d9225e 100644
--- a/src/mem/coherent_xbar.hh
+++ b/src/mem/coherent_xbar.hh
@@ -136,6 +136,13 @@ class CoherentXBar : public BaseXBar
             xbar.recvFunctional(pkt, id);
         }
 
+        void
+        recvMemBackdoorReq(const MemBackdoorReq &req,
+                MemBackdoorPtr &backdoor) override
+        {
+            xbar.recvMemBackdoorReq(req, backdoor);
+        }
+
         AddrRangeList
         getAddrRanges() const override
         {
@@ -374,6 +381,11 @@ class CoherentXBar : public BaseXBar
         transaction.*/
     void recvFunctional(PacketPtr pkt, PortID cpu_side_port_id);
 
+    /** Function called by the port when the crossbar receives a request for
+        a memory backdoor.*/
+    void recvMemBackdoorReq(const MemBackdoorReq &req,
+            MemBackdoorPtr &backdoor);
+
     /** Function called by the port when the crossbar is receiving a functional
         snoop transaction.*/
     void recvFunctionalSnoop(PacketPtr pkt, PortID mem_side_port_id);
diff --git a/src/mem/mem_ctrl.cc b/src/mem/mem_ctrl.cc
index c65d68a5a7..beaace1cbf 100644
--- a/src/mem/mem_ctrl.cc
+++ b/src/mem/mem_ctrl.cc
@@ -1364,6 +1364,17 @@ MemCtrl::recvFunctional(PacketPtr pkt)
              pkt->print());
 }
 
+void
+MemCtrl::recvMemBackdoorReq(const MemBackdoorReq &req,
+        MemBackdoorPtr &backdoor)
+{
+    panic_if(!dram->getAddrRange().contains(req.range().start()),
+            "Can't handle address range for backdoor %s.",
+            req.range().to_string());
+
+    dram->getBackdoor(backdoor);
+}
+
 bool
 MemCtrl::recvFunctionalLogic(PacketPtr pkt, MemInterface* mem_intr)
 {
@@ -1474,6 +1485,13 @@ MemCtrl::MemoryPort::recvFunctional(PacketPtr pkt)
     pkt->popLabel();
 }
 
+void
+MemCtrl::MemoryPort::recvMemBackdoorReq(const MemBackdoorReq &req,
+        MemBackdoorPtr &backdoor)
+{
+    ctrl.recvMemBackdoorReq(req, backdoor);
+}
+
 Tick
 MemCtrl::MemoryPort::recvAtomic(PacketPtr pkt)
 {
diff --git a/src/mem/mem_ctrl.hh b/src/mem/mem_ctrl.hh
index fe5d478280..2819fb4caa 100644
--- a/src/mem/mem_ctrl.hh
+++ b/src/mem/mem_ctrl.hh
@@ -267,6 +267,8 @@ class MemCtrl : public qos::MemCtrl
                 PacketPtr pkt, MemBackdoorPtr &backdoor) override;
 
         void recvFunctional(PacketPtr pkt) override;
+        void recvMemBackdoorReq(const MemBackdoorReq &req,
+                MemBackdoorPtr &backdoor) override;
 
         bool recvTimingReq(PacketPtr) override;
 
@@ -784,6 +786,8 @@ class MemCtrl : public qos::MemCtrl
     virtual Tick recvAtomic(PacketPtr pkt);
     virtual Tick recvAtomicBackdoor(PacketPtr pkt, MemBackdoorPtr &backdoor);
     virtual void recvFunctional(PacketPtr pkt);
+    virtual void recvMemBackdoorReq(const MemBackdoorReq &req,
+            MemBackdoorPtr &backdoor);
     virtual bool recvTimingReq(PacketPtr pkt);
 
     bool recvFunctionalLogic(PacketPtr pkt, MemInterface* mem_intr);
diff --git a/src/mem/noncoherent_xbar.cc b/src/mem/noncoherent_xbar.cc
index 67efdba84a..0a378e2c63 100644
--- a/src/mem/noncoherent_xbar.cc
+++ b/src/mem/noncoherent_xbar.cc
@@ -284,6 +284,14 @@ NoncoherentXBar::recvAtomicBackdoor(PacketPtr pkt, PortID cpu_side_port_id,
     return response_latency;
 }
 
+void
+NoncoherentXBar::recvMemBackdoorReq(const MemBackdoorReq &req,
+        MemBackdoorPtr &backdoor)
+{
+    PortID dest_id = findPort(req.range());
+    memSidePorts[dest_id]->sendMemBackdoorReq(req, backdoor);
+}
+
 void
 NoncoherentXBar::recvFunctional(PacketPtr pkt, PortID cpu_side_port_id)
 {
diff --git a/src/mem/noncoherent_xbar.hh b/src/mem/noncoherent_xbar.hh
index ab833148b5..03f751b77d 100644
--- a/src/mem/noncoherent_xbar.hh
+++ b/src/mem/noncoherent_xbar.hh
@@ -126,6 +126,13 @@ class NoncoherentXBar : public BaseXBar
             xbar.recvFunctional(pkt, id);
         }
 
+        void
+        recvMemBackdoorReq(const MemBackdoorReq &req,
+                MemBackdoorPtr &backdoor) override
+        {
+            xbar.recvMemBackdoorReq(req, backdoor);
+        }
+
         AddrRangeList
         getAddrRanges() const override
         {
@@ -179,6 +186,8 @@ class NoncoherentXBar : public BaseXBar
     Tick recvAtomicBackdoor(PacketPtr pkt, PortID cpu_side_port_id,
                             MemBackdoorPtr *backdoor=nullptr);
     void recvFunctional(PacketPtr pkt, PortID cpu_side_port_id);
+    void recvMemBackdoorReq(const MemBackdoorReq &req,
+            MemBackdoorPtr &backdoor);
 
   public:
 
diff --git a/src/mem/simple_mem.cc b/src/mem/simple_mem.cc
index ced3a38cf4..27fcac1183 100644
--- a/src/mem/simple_mem.cc
+++ b/src/mem/simple_mem.cc
@@ -108,6 +108,13 @@ SimpleMemory::recvFunctional(PacketPtr pkt)
     pkt->popLabel();
 }
 
+void
+SimpleMemory::recvMemBackdoorReq(const MemBackdoorReq &req,
+        MemBackdoorPtr &_backdoor)
+{
+    getBackdoor(_backdoor);
+}
+
 bool
 SimpleMemory::recvTimingReq(PacketPtr pkt)
 {
@@ -294,6 +301,13 @@ SimpleMemory::MemoryPort::recvFunctional(PacketPtr pkt)
     mem.recvFunctional(pkt);
 }
 
+void
+SimpleMemory::MemoryPort::recvMemBackdoorReq(const MemBackdoorReq &req,
+        MemBackdoorPtr &backdoor)
+{
+    mem.recvMemBackdoorReq(req, backdoor);
+}
+
 bool
 SimpleMemory::MemoryPort::recvTimingReq(PacketPtr pkt)
 {
diff --git a/src/mem/simple_mem.hh b/src/mem/simple_mem.hh
index fc6d6849d5..75a03fbe0e 100644
--- a/src/mem/simple_mem.hh
+++ b/src/mem/simple_mem.hh
@@ -98,6 +98,8 @@ class SimpleMemory : public AbstractMemory
         Tick recvAtomicBackdoor(
                 PacketPtr pkt, MemBackdoorPtr &_backdoor) override;
         void recvFunctional(PacketPtr pkt) override;
+        void recvMemBackdoorReq(const MemBackdoorReq &req,
+                MemBackdoorPtr &backdoor) override;
         bool recvTimingReq(PacketPtr pkt) override;
         void recvRespRetry() override;
         AddrRangeList getAddrRanges() const override;
@@ -191,6 +193,8 @@ class SimpleMemory : public AbstractMemory
     Tick recvAtomic(PacketPtr pkt);
     Tick recvAtomicBackdoor(PacketPtr pkt, MemBackdoorPtr &_backdoor);
     void recvFunctional(PacketPtr pkt);
+    void recvMemBackdoorReq(const MemBackdoorReq &req,
+            MemBackdoorPtr &backdoor);
     bool recvTimingReq(PacketPtr pkt);
     void recvRespRetry();
 };
diff --git a/src/mem/sys_bridge.hh b/src/mem/sys_bridge.hh
index 8fa3131f25..15a3fc8270 100644
--- a/src/mem/sys_bridge.hh
+++ b/src/mem/sys_bridge.hh
@@ -331,6 +331,13 @@ class SysBridge : public SimObject
                     pkt->requestorId());
         }
 
+        void
+        recvMemBackdoorReq(const MemBackdoorReq &req,
+                MemBackdoorPtr &backdoor) override
+        {
+            targetPort->sendMemBackdoorReq(req, backdoor);
+        }
+
         AddrRangeList
         getAddrRanges() const override
         {
diff --git a/src/systemc/tlm_bridge/gem5_to_tlm.cc b/src/systemc/tlm_bridge/gem5_to_tlm.cc
index 10f7d1a9c7..a5eb9df27e 100644
--- a/src/systemc/tlm_bridge/gem5_to_tlm.cc
+++ b/src/systemc/tlm_bridge/gem5_to_tlm.cc
@@ -509,6 +509,31 @@ Gem5ToTlmBridge<BITWIDTH>::recvFunctional(PacketPtr packet)
     trans->release();
 }
 
+template <unsigned int BITWIDTH>
+void
+Gem5ToTlmBridge<BITWIDTH>::recvMemBackdoorReq(const MemBackdoorReq &req,
+        MemBackdoorPtr &backdoor)
+{
+    // Create a transaction to send along to TLM's get_direct_mem_ptr.
+    tlm::tlm_generic_payload *trans = mm.allocate();
+    trans->acquire();
+    trans->set_address(req.range().start());
+    trans->set_data_length(req.range().size());
+    trans->set_streaming_width(req.range().size());
+    trans->set_data_ptr(nullptr);
+
+    if (req.writeable())
+        trans->set_command(tlm::TLM_WRITE_COMMAND);
+    else if (req.readable())
+        trans->set_command(tlm::TLM_READ_COMMAND);
+    else
+        trans->set_command(tlm::TLM_IGNORE_COMMAND);
+
+    backdoor = getBackdoor(*trans);
+
+    trans->release();
+}
+
 template <unsigned int BITWIDTH>
 tlm::tlm_sync_enum
 Gem5ToTlmBridge<BITWIDTH>::nb_transport_bw(tlm::tlm_generic_payload &trans,
diff --git a/src/systemc/tlm_bridge/gem5_to_tlm.hh b/src/systemc/tlm_bridge/gem5_to_tlm.hh
index 0cb925ee55..23415b843b 100644
--- a/src/systemc/tlm_bridge/gem5_to_tlm.hh
+++ b/src/systemc/tlm_bridge/gem5_to_tlm.hh
@@ -62,6 +62,7 @@
 #include <functional>
 #include <string>
 
+#include "mem/backdoor.hh"
 #include "mem/port.hh"
 #include "params/Gem5ToTlmBridgeBase.hh"
 #include "sim/system.hh"
@@ -117,6 +118,12 @@ class Gem5ToTlmBridge : public Gem5ToTlmBridgeBase
         {
             return bridge.recvFunctional(pkt);
         }
+        void
+        recvMemBackdoorReq(const gem5::MemBackdoorReq &req,
+                gem5::MemBackdoorPtr &backdoor) override
+        {
+            bridge.recvMemBackdoorReq(req, backdoor);
+        }
         bool
         recvTimingReq(gem5::PacketPtr pkt) override
         {
@@ -179,6 +186,8 @@ class Gem5ToTlmBridge : public Gem5ToTlmBridgeBase
     gem5::Tick recvAtomicBackdoor(gem5::PacketPtr pkt,
         gem5::MemBackdoorPtr &backdoor);
     void recvFunctional(gem5::PacketPtr packet);
+    void recvMemBackdoorReq(const gem5::MemBackdoorReq &req,
+            gem5::MemBackdoorPtr &backdoor);
     bool recvTimingReq(gem5::PacketPtr packet);
     bool tryTiming(gem5::PacketPtr packet);
     bool recvTimingSnoopResp(gem5::PacketPtr packet);
diff --git a/src/systemc/tlm_bridge/tlm_to_gem5.cc b/src/systemc/tlm_bridge/tlm_to_gem5.cc
index 703e118dee..468ea83f37 100644
--- a/src/systemc/tlm_bridge/tlm_to_gem5.cc
+++ b/src/systemc/tlm_bridge/tlm_to_gem5.cc
@@ -401,13 +401,26 @@ bool
 TlmToGem5Bridge<BITWIDTH>::get_direct_mem_ptr(tlm::tlm_generic_payload &trans,
                                               tlm::tlm_dmi &dmi_data)
 {
-    auto [pkt, pkt_created] = payload2packet(_id, trans);
-    pkt->pushSenderState(new Gem5SystemC::TlmSenderState(trans));
-    if (pkt_created)
-        pkt->req->setFlags(Request::NO_ACCESS);
+    MemBackdoor::Flags flags;
+    switch (trans.get_command()) {
+      case tlm::TLM_READ_COMMAND:
+        flags = MemBackdoor::Readable;
+        break;
+      case tlm::TLM_WRITE_COMMAND:
+        flags = MemBackdoor::Writeable;
+        break;
+      default:
+        panic("TlmToGem5Bridge: "
+                "received transaction with unsupported command");
+    }
+    Addr start_addr = trans.get_address();
+    Addr length = trans.get_data_length();
 
+    MemBackdoorReq req({start_addr, start_addr + length}, flags);
     MemBackdoorPtr backdoor = nullptr;
-    bmp.sendAtomicBackdoor(pkt, backdoor);
+
+    bmp.sendMemBackdoorReq(req, backdoor);
+
     if (backdoor) {
         trans.set_dmi_allowed(true);
         dmi_data.set_dmi_ptr(backdoor->ptr());
@@ -434,17 +447,7 @@ TlmToGem5Bridge<BITWIDTH>::get_direct_mem_ptr(tlm::tlm_generic_payload &trans,
         }
     }
 
-    gem5::Packet::SenderState *senderState = pkt->popSenderState();
-    sc_assert(
-        nullptr != dynamic_cast<Gem5SystemC::TlmSenderState*>(senderState));
-
-    // clean up
-    delete senderState;
-
-    setPayloadResponse(trans, pkt);
-
-    if (pkt_created)
-        destroyPacket(pkt);
+    trans.set_response_status(tlm::TLM_OK_RESPONSE);
 
     return backdoor != nullptr;
 }

From 00c2f09bd966988c164a6e6f0b2667f2d4571064 Mon Sep 17 00:00:00 2001
From: "Bobby R. Bruce" <bbruce@ucdavis.edu>
Date: Mon, 21 Nov 2022 11:52:57 -0800
Subject: [PATCH 015/492] stdlib,configs: Update riscvmatched-fs example
 docstring

This documentation string provided in the
"config/example/gem5_library/riscvmatched-fs.py" was minimal. This patch
adds more detail.

Change-Id: I0f203ea6952fc72a078594d7c30853bd426017ff
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/65851
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
---
 configs/example/gem5_library/riscvmatched-fs.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/configs/example/gem5_library/riscvmatched-fs.py b/configs/example/gem5_library/riscvmatched-fs.py
index da47a4be6c..1ed78e81a3 100644
--- a/configs/example/gem5_library/riscvmatched-fs.py
+++ b/configs/example/gem5_library/riscvmatched-fs.py
@@ -25,8 +25,8 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 """
-This gem5 configuration script runs a full system Ubuntu image, Linux
-kernel and calls m5 exit after the kernel is loaded.
+This gem5 configuration script runs the RISCVMatchedBoard in FS mode with a
+an Ubuntu 20.04 image and calls m5 exit after the simulation has booted the OS.
 
 Usage
 ---
@@ -34,8 +34,7 @@ Usage
 ```
 scons build/RISCV/gem5.opt
 
-./build/RISCV/gem5.opt \
-    configs/example/gem5_library/riscvmatched-fs.py
+./build/RISCV/gem5.opt configs/example/gem5_library/riscvmatched-fs.py
 ```
 """
 

From 36f2964d1900f6cafb5596e3014625a38042aada Mon Sep 17 00:00:00 2001
From: "Bobby R. Bruce" <bbruce@ucdavis.edu>
Date: Mon, 21 Nov 2022 11:56:23 -0800
Subject: [PATCH 016/492] configs,stdlib: Fix import in riscvmatched-fs.py

Change-Id: I2ff4139457d32336f40c6655231064a12c4d8694
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/65852
Tested-by: kokoro <noreply+kokoro@google.com>
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
---
 configs/example/gem5_library/riscvmatched-fs.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/configs/example/gem5_library/riscvmatched-fs.py b/configs/example/gem5_library/riscvmatched-fs.py
index 1ed78e81a3..8cf20d9da2 100644
--- a/configs/example/gem5_library/riscvmatched-fs.py
+++ b/configs/example/gem5_library/riscvmatched-fs.py
@@ -38,9 +38,7 @@ scons build/RISCV/gem5.opt
 ```
 """
 
-from python.gem5.prebuilt.riscvmatched.riscvmatched_board import (
-    RISCVMatchedBoard,
-)
+from gem5.prebuilt.riscvmatched.riscvmatched_board import RISCVMatchedBoard
 from gem5.utils.requires import requires
 from gem5.isas import ISA
 from gem5.simulate.simulator import Simulator

From 5794643e445ca49eec55b567c061f6f5fc3cc2bf Mon Sep 17 00:00:00 2001
From: "Bobby R. Bruce" <bbruce@ucdavis.edu>
Date: Mon, 21 Nov 2022 11:57:40 -0800
Subject: [PATCH 017/492] configs,stdlib,tests: Update riscvmatched-fs.py
 to-init

The "test-gem5-library-example-riscvmatched-fs" test, which runs
"configs/example/gem5_library/riscvmatched-fs.py", was running the
script in full. This takes a very long time. Given we already have boot
tests for RISCV, it's better to just run this configuration to just the
end of the Linux boot (significantly faster than a full OS boot). This
patch adds this feature to the config script and modifies the test to
utilize it.

Change-Id: I1e37a26aab5e9a127ebd64590be79fbc16fe53aa
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/65853
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 .../example/gem5_library/riscvmatched-fs.py   | 26 ++++++++++++++++++-
 .../test_gem5_library_examples.py             |  2 +-
 2 files changed, 26 insertions(+), 2 deletions(-)

diff --git a/configs/example/gem5_library/riscvmatched-fs.py b/configs/example/gem5_library/riscvmatched-fs.py
index 8cf20d9da2..3e84b8c1ea 100644
--- a/configs/example/gem5_library/riscvmatched-fs.py
+++ b/configs/example/gem5_library/riscvmatched-fs.py
@@ -44,8 +44,23 @@ from gem5.isas import ISA
 from gem5.simulate.simulator import Simulator
 from gem5.resources.workload import Workload
 
+import argparse
+
 requires(isa_required=ISA.RISCV)
 
+parser = argparse.ArgumentParser(
+    description="A script which uses the RISCVMatchedBoard in FS mode."
+)
+
+parser.add_argument(
+    "-i",
+    "--to-init",
+    action="store_true",
+    help="Exit the simulation after the Linux Kernel boot.",
+)
+
+args = parser.parse_args()
+
 # instantiate the riscv matched board with default parameters
 board = RISCVMatchedBoard(
     clk_freq="1.2GHz",
@@ -57,7 +72,16 @@ board = RISCVMatchedBoard(
 # Ubuntu 20.04. Once the system successfully boots it encounters an `m5_exit`
 # instruction which stops the simulation. When the simulation has ended you may
 # inspect `m5out/system.pc.com_1.device` to see the stdout.
-board.set_workload(Workload("riscv-ubuntu-20.04-boot"))
+#
+# In the case where the `-i` flag is passed, we add the kernel argument
+# `init=/root/exit.sh`. This means the simulation will exit after the Linux
+# Kernel has booted.
+workload = Workload("riscv-ubuntu-20.04-boot")
+kernel_args = board.get_default_kernel_args()
+if args.to_init:
+    kernel_args.append("init=/root/exit.sh")
+workload.set_parameter("kernel_args", kernel_args)
+board.set_workload(workload)
 
 simulator = Simulator(board=board)
 simulator.run()
diff --git a/tests/gem5/gem5_library_example_tests/test_gem5_library_examples.py b/tests/gem5/gem5_library_example_tests/test_gem5_library_examples.py
index 28a10b588b..254b15cd4b 100644
--- a/tests/gem5/gem5_library_example_tests/test_gem5_library_examples.py
+++ b/tests/gem5/gem5_library_example_tests/test_gem5_library_examples.py
@@ -319,7 +319,7 @@ gem5_verify_config(
         "gem5_library",
         "riscvmatched-fs.py",
     ),
-    config_args=[],
+    config_args=["--to-init"],
     valid_isas=(constants.riscv_tag,),
     valid_hosts=constants.supported_hosts,
     length=constants.very_long_tag,

From db35dfb9426b01a2900b9c248834dd3e554622a9 Mon Sep 17 00:00:00 2001
From: "Bobby R. Bruce" <bbruce@ucdavis.edu>
Date: Mon, 21 Nov 2022 12:05:29 -0800
Subject: [PATCH 018/492] tests: Update riscvmatched tests to use ALL/gem5.opt

Where possible we are trying to use the ALL/gem5.opt compilation of
gem5. This change updates the riscvmatched tests to this.

Change-Id: I1c5f1d86cdf5cf29b8964f8a894a3476a7cb290a
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/65854
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
---
 .../gem5_library_example_tests/test_gem5_library_examples.py  | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/gem5/gem5_library_example_tests/test_gem5_library_examples.py b/tests/gem5/gem5_library_example_tests/test_gem5_library_examples.py
index 254b15cd4b..9b5c2c67ff 100644
--- a/tests/gem5/gem5_library_example_tests/test_gem5_library_examples.py
+++ b/tests/gem5/gem5_library_example_tests/test_gem5_library_examples.py
@@ -303,7 +303,7 @@ gem5_verify_config(
         "riscvmatched-hello.py",
     ),
     config_args=[],
-    valid_isas=(constants.riscv_tag,),
+    valid_isas=(constants.all_compiled_tag,),
     valid_hosts=constants.supported_hosts,
     length=constants.long_tag,
 )
@@ -320,7 +320,7 @@ gem5_verify_config(
         "riscvmatched-fs.py",
     ),
     config_args=["--to-init"],
-    valid_isas=(constants.riscv_tag,),
+    valid_isas=(constants.all_compiled_tag,),
     valid_hosts=constants.supported_hosts,
     length=constants.very_long_tag,
 )

From da12e9650729e4411c5dbfc612f8842988751483 Mon Sep 17 00:00:00 2001
From: "Bobby R. Bruce" <bbruce@ucdavis.edu>
Date: Mon, 21 Nov 2022 13:25:57 -0800
Subject: [PATCH 019/492] configs: Add missing `_pre_instantiate` call in
 "run_lupv.py"

As of this change:
https://gem5-review.googlesource.com/c/public/gem5/+/65051, the
`_pre_instantiate` function must be called prior to `m5.instantiate`
when using the stdlib without the Simulator module.

Change-Id: Id5cec3b643d556b0f742719596abb53533b84cbd
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/65871
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
---
 configs/example/lupv/run_lupv.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/configs/example/lupv/run_lupv.py b/configs/example/lupv/run_lupv.py
index f472f53c69..0056cf8bb4 100644
--- a/configs/example/lupv/run_lupv.py
+++ b/configs/example/lupv/run_lupv.py
@@ -107,6 +107,7 @@ board.set_kernel_disk_workload(
 print("Running with ISA: " + processor.get_isa().name)
 print()
 root = Root(full_system=True, system=board)
+board._pre_instantiate()
 m5.instantiate()
 print("Beginning simulation!")
 

From 7230a3e7f0f23621d9d09df3f7420c08a08cc118 Mon Sep 17 00:00:00 2001
From: Quentin Forcioli <quentin.forcioli@telecom-paris.fr>
Date: Thu, 18 Aug 2022 12:26:12 +0200
Subject: [PATCH 020/492] base,sim,ext: Adding GDB signals definition

GDB proposes a signal definition that is not necessarily identical
to the kernel's. To not lost GDB, we need to add this definition
 (in ext/remotegdb/signals.hh) and replace the linux signals everywhere
where they where used to interact with GDB.
 (otherwise it doesn't recognize some trap reasons).

Change-Id: I2bbfee36313cc766549000cf197c23c2561ea5f9
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/63534
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Maintainer: Andreas Sandberg <andreas.sandberg@arm.com>
Reviewed-by: Andreas Sandberg <andreas.sandberg@arm.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Reviewed-by: Quentin Forcioli <quentin.forcioli@telecom-paris.fr>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 ext/gdbremote/signals.hh | 181 +++++++++++++++++++++++++++++++++++++++
 src/arch/power/faults.cc |   6 +-
 src/base/remote_gdb.cc   |  35 ++++----
 src/base/remote_gdb.hh   |  21 ++---
 src/sim/faults.cc        |   4 +-
 src/sim/system.cc        |   2 +-
 src/sim/system.hh        |   2 +-
 src/sim/workload.cc      |   2 +-
 src/sim/workload.hh      |   3 +-
 9 files changed, 220 insertions(+), 36 deletions(-)
 create mode 100644 ext/gdbremote/signals.hh

diff --git a/ext/gdbremote/signals.hh b/ext/gdbremote/signals.hh
new file mode 100644
index 0000000000..11835e6f5a
--- /dev/null
+++ b/ext/gdbremote/signals.hh
@@ -0,0 +1,181 @@
+//===-- Generated From GDBRemoteSignals.cpp ------------------------===//
+//
+// Part of the LLVM Project,
+// under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===---------------------------------------------------------------===//
+
+#include <stdint.h>
+
+#ifndef __BASE_GDB_SIGNALS_HH__
+#define __BASE_GDB_SIGNALS_HH__
+
+/*
+These signals definitions are produced from LLVM's
+  lldb/source/Plugins/Process/Utility/GDBRemoteSignals.cpp
+*/
+namespace gem5{
+  enum class GDBSignal : uint8_t
+  {
+    ZERO = 0, //Signal 0
+    HUP = 1, //hangup
+    INT = 2, //interrupt
+    QUIT = 3, //quit
+    ILL = 4, //illegal instruction
+    TRAP = 5, //trace trap (not reset when caught)
+    ABRT = 6, //SIGIOT
+    EMT = 7, //emulation trap
+    FPE = 8, //floating point exception
+    KILL = 9, //kill
+    BUS = 10, //bus error
+    SEGV = 11, //segmentation violation
+    SYS = 12, //invalid system call
+    PIPE = 13, //write to pipe with reading end closed
+    ALRM = 14, //alarm
+    TERM = 15, //termination requested
+    URG = 16, //urgent data on socket
+    STOP = 17, //process stop
+    TSTP = 18, //tty stop
+    CONT = 19, //process continue
+    CHLD = 20, //SIGCLD
+    TTIN = 21, //background tty read
+    TTOU = 22, //background tty write
+    IO = 23, //input/output ready/Pollable event
+    XCPU = 24, //CPU resource exceeded
+    XFSZ = 25, //file size limit exceeded
+    VTALRM = 26, //virtual time alarm
+    PROF = 27, //profiling time alarm
+    WINCH = 28, //window size changes
+    LOST = 29, //resource lost
+    USR1 = 30, //user defined signal 1
+    USR2 = 31, //user defined signal 2
+    PWR = 32, //power failure
+    POLL = 33, //pollable event
+    WIND = 34, //SIGWIND
+    PHONE = 35, //SIGPHONE
+    WAITING = 36, //process's LWPs are blocked
+    LWP = 37, //signal LWP
+    DANGER = 38, //swap space dangerously low
+    GRANT = 39, //monitor mode granted
+    RETRACT = 40, //need to relinquish monitor mode
+    MSG = 41, //monitor mode data available
+    SOUND = 42, //sound completed
+    SAK = 43, //secure attention
+    PRIO = 44, //SIGPRIO
+
+    SIG33 = 45, //real-time event 33
+    SIG34 = 46, //real-time event 34
+    SIG35 = 47, //real-time event 35
+    SIG36 = 48, //real-time event 36
+    SIG37 = 49, //real-time event 37
+    SIG38 = 50, //real-time event 38
+    SIG39 = 51, //real-time event 39
+    SIG40 = 52, //real-time event 40
+    SIG41 = 53, //real-time event 41
+    SIG42 = 54, //real-time event 42
+    SIG43 = 55, //real-time event 43
+    SIG44 = 56, //real-time event 44
+    SIG45 = 57, //real-time event 45
+    SIG46 = 58, //real-time event 46
+    SIG47 = 59, //real-time event 47
+    SIG48 = 60, //real-time event 48
+    SIG49 = 61, //real-time event 49
+    SIG50 = 62, //real-time event 50
+    SIG51 = 63, //real-time event 51
+    SIG52 = 64, //real-time event 52
+    SIG53 = 65, //real-time event 53
+    SIG54 = 66, //real-time event 54
+    SIG55 = 67, //real-time event 55
+    SIG56 = 68, //real-time event 56
+    SIG57 = 69, //real-time event 57
+    SIG58 = 70, //real-time event 58
+    SIG59 = 71, //real-time event 59
+    SIG60 = 72, //real-time event 60
+    SIG61 = 73, //real-time event 61
+    SIG62 = 74, //real-time event 62
+    SIG63 = 75, //real-time event 63
+
+    CANCEL = 76, //LWP internal signal
+
+    SIG32 = 77, //real-time event 32
+    SIG64 = 78, //real-time event 64
+    SIG65 = 79, //real-time event 65
+    SIG66 = 80, //real-time event 66
+    SIG67 = 81, //real-time event 67
+    SIG68 = 82, //real-time event 68
+    SIG69 = 83, //real-time event 69
+    SIG70 = 84, //real-time event 70
+    SIG71 = 85, //real-time event 71
+    SIG72 = 86, //real-time event 72
+    SIG73 = 87, //real-time event 73
+    SIG74 = 88, //real-time event 74
+    SIG75 = 89, //real-time event 75
+    SIG76 = 90, //real-time event 76
+    SIG77 = 91, //real-time event 77
+    SIG78 = 92, //real-time event 78
+    SIG79 = 93, //real-time event 79
+    SIG80 = 94, //real-time event 80
+    SIG81 = 95, //real-time event 81
+    SIG82 = 96, //real-time event 82
+    SIG83 = 97, //real-time event 83
+    SIG84 = 98, //real-time event 84
+    SIG85 = 99, //real-time event 85
+    SIG86 = 100, //real-time event 86
+    SIG87 = 101, //real-time event 87
+    SIG88 = 102, //real-time event 88
+    SIG89 = 103, //real-time event 89
+    SIG90 = 104, //real-time event 90
+    SIG91 = 105, //real-time event 91
+    SIG92 = 106, //real-time event 92
+    SIG93 = 107, //real-time event 93
+    SIG94 = 108, //real-time event 94
+    SIG95 = 109, //real-time event 95
+    SIG96 = 110, //real-time event 96
+    SIG97 = 111, //real-time event 97
+    SIG98 = 112, //real-time event 98
+    SIG99 = 113, //real-time event 99
+    SIG100 = 114, //real-time event 100
+    SIG101 = 115, //real-time event 101
+    SIG102 = 116, //real-time event 102
+    SIG103 = 117, //real-time event 103
+    SIG104 = 118, //real-time event 104
+    SIG105 = 119, //real-time event 105
+    SIG106 = 120, //real-time event 106
+    SIG107 = 121, //real-time event 107
+    SIG108 = 122, //real-time event 108
+    SIG109 = 123, //real-time event 109
+    SIG110 = 124, //real-time event 110
+    SIG111 = 125, //real-time event 111
+    SIG112 = 126, //real-time event 112
+    SIG113 = 127, //real-time event 113
+    SIG114 = 128, //real-time event 114
+    SIG115 = 129, //real-time event 115
+    SIG116 = 130, //real-time event 116
+    SIG117 = 131, //real-time event 117
+    SIG118 = 132, //real-time event 118
+    SIG119 = 133, //real-time event 119
+    SIG120 = 134, //real-time event 120
+    SIG121 = 135, //real-time event 121
+    SIG122 = 136, //real-time event 122
+    SIG123 = 137, //real-time event 123
+    SIG124 = 138, //real-time event 124
+    SIG125 = 139, //real-time event 125
+    SIG126 = 140, //real-time event 126
+    SIG127 = 141, //real-time event 127
+
+    INFO = 142, //information request
+    unknown = 143, //unknown signal
+
+    EXC_BAD_ACCESS = 145, //could not access memory
+    EXC_BAD_INSTRUCTION = 146, //illegal instruction/operand
+    EXC_ARITHMETIC = 147, //arithmetic exception
+    EXC_EMULATION = 148, //emulation instruction
+    EXC_SOFTWARE = 149, //software generated exception
+    EXC_BREAKPOINT = 150, //breakpoint
+
+    LIBRT = 151, //librt internal signal
+  };
+}
+#endif /* __BASE_GDB_SIGNALS_HH__ */
diff --git a/src/arch/power/faults.cc b/src/arch/power/faults.cc
index be1796e14a..0d8f2ddd68 100644
--- a/src/arch/power/faults.cc
+++ b/src/arch/power/faults.cc
@@ -42,7 +42,7 @@ namespace PowerISA
 void
 UnimplementedOpcodeFault::invoke(ThreadContext *tc, const StaticInstPtr &inst)
 {
-    panic_if(tc->getSystemPtr()->trapToGdb(SIGILL, tc->contextId()),
+    panic_if(tc->getSystemPtr()->trapToGdb(GDBSignal::ILL, tc->contextId()),
              "Unimplemented opcode encountered at virtual address %#x\n",
              tc->pcState().instAddr());
 }
@@ -50,14 +50,14 @@ UnimplementedOpcodeFault::invoke(ThreadContext *tc, const StaticInstPtr &inst)
 void
 AlignmentFault::invoke(ThreadContext *tc, const StaticInstPtr &inst)
 {
-    panic_if(!tc->getSystemPtr()->trapToGdb(SIGBUS, tc->contextId()),
+    panic_if(!tc->getSystemPtr()->trapToGdb(GDBSignal::BUS, tc->contextId()),
              "Alignment fault when accessing virtual address %#x\n", vaddr);
 }
 
 void
 TrapFault::invoke(ThreadContext *tc, const StaticInstPtr &inst)
 {
-    panic_if(tc->getSystemPtr()->trapToGdb(SIGTRAP, tc->contextId()),
+    panic_if(tc->getSystemPtr()->trapToGdb(GDBSignal::TRAP, tc->contextId()),
              "Trap encountered at virtual address %#x\n",
              tc->pcState().instAddr());
 }
diff --git a/src/base/remote_gdb.cc b/src/base/remote_gdb.cc
index c19dede748..47fae75cbb 100644
--- a/src/base/remote_gdb.cc
+++ b/src/base/remote_gdb.cc
@@ -130,7 +130,6 @@
 #include "base/remote_gdb.hh"
 
 #include <sys/select.h>
-#include <sys/signal.h>
 #include <sys/time.h>
 #include <unistd.h>
 
@@ -192,7 +191,7 @@ class HardBreakpoint : public PCEvent
         DPRINTF(GDBMisc, "handling hardware breakpoint at %#x\n", pc());
 
         if (tc == gdb->tc)
-            gdb->trap(tc->contextId(), SIGTRAP,"");
+            gdb->trap(tc->contextId(), GDBSignal::TRAP,"");
     }
 };
 
@@ -549,7 +548,7 @@ BaseRemoteGDB::selectThreadContext(ContextID id)
 // makes sense to use POSIX errno values, because that is what the
 // gdb/remote.c functions want to return.
 void
-BaseRemoteGDB::trap(ContextID id, int signum,const std::string& stopReason)
+BaseRemoteGDB::trap(ContextID id, GDBSignal sig,const std::string& stopReason)
 {
     if (!attached)
         return;
@@ -575,10 +574,10 @@ BaseRemoteGDB::trap(ContextID id, int signum,const std::string& stopReason)
         send("OK");
     } else {
         // Tell remote host that an exception has occurred.
-        sendTPacket(signum,id,stopReason);
+        sendTPacket(sig,id,stopReason);
     }
 
-    processCommands(signum);
+    processCommands(sig);
 }
 
 bool
@@ -613,7 +612,7 @@ BaseRemoteGDB::incomingData(int revent)
     }
 
     if (revent & POLLIN) {
-        scheduleTrapEvent(tc->contextId(),SIGILL,0,"");
+        scheduleTrapEvent(tc->contextId(),GDBSignal::ILL,0,"");
     } else if (revent & POLLNVAL) {
         descheduleInstCommitEvent(&trapEvent);
         scheduleInstCommitEvent(&disconnectEvent, 0);
@@ -766,14 +765,14 @@ BaseRemoteGDB::send(const char *bp)
 }
 
 void
-BaseRemoteGDB::processCommands(int signum)
+BaseRemoteGDB::processCommands(GDBSignal sig)
 {
     // Stick frame regs into our reg cache.
     regCachePtr = gdbRegs();
     regCachePtr->getRegs(tc);
 
     GdbCommand::Context cmd_ctx;
-    cmd_ctx.type = signum;
+    cmd_ctx.type = sig;
     std::vector<char> data;
 
     for (;;) {
@@ -882,7 +881,7 @@ BaseRemoteGDB::singleStep()
 {
     if (!singleStepEvent.scheduled())
         scheduleInstCommitEvent(&singleStepEvent, 1);
-    trap(tc->contextId(), SIGTRAP);
+    trap(tc->contextId(), GDBSignal::TRAP);
 }
 
 void
@@ -951,18 +950,20 @@ BaseRemoteGDB::removeHardBreak(Addr addr, size_t kind)
 }
 
 void
-BaseRemoteGDB::sendTPacket(int errnum, ContextID id,
+BaseRemoteGDB::sendTPacket(GDBSignal sig, ContextID id,
     const std::string& stopReason)
 {
     if (!stopReason.empty()){
-        send("T%02xcore:%x;thread:%x;%s;",errnum,id + 1,id + 1,stopReason);
+        send("T%02xcore:%x;thread:%x;%s;",
+            (uint8_t)sig,id + 1,id + 1,stopReason);
     }else{
-        send("T%02xcore:%x;thread:%x;",errnum,id + 1,id + 1);
+        send("T%02xcore:%x;thread:%x;",
+            (uint8_t)sig,id + 1,id + 1);
     }
 }
 void
-BaseRemoteGDB::sendSPacket(int errnum){
-       send("S%02x",errnum);
+BaseRemoteGDB::sendSPacket(GDBSignal sig){
+       send("S%02x",(uint8_t)sig);
 }
 void
 BaseRemoteGDB::sendOPacket(const std::string message){
@@ -970,12 +971,12 @@ BaseRemoteGDB::sendOPacket(const std::string message){
 }
 
 void
-BaseRemoteGDB::scheduleTrapEvent(ContextID id,int type,int delta,
+BaseRemoteGDB::scheduleTrapEvent(ContextID id,GDBSignal sig,int delta,
     std::string stopReason){
     ThreadContext* _tc = threads[id];
     panic_if(_tc == nullptr, "Unknown context id :%i",id);
     trapEvent.id(id);
-    trapEvent.type(type);
+    trapEvent.type(sig);
     trapEvent.stopReason(stopReason);
     if (!trapEvent.scheduled())
         scheduleInstCommitEvent(&trapEvent,delta,_tc);
@@ -1171,7 +1172,7 @@ BaseRemoteGDB::cmdSetThread(GdbCommand::Context &ctx)
                 throw CmdError("E04");
             // Line up on an instruction boundary in the new thread.
             threadSwitching = true;
-            scheduleTrapEvent(tid,0,0,"");
+            scheduleTrapEvent(tid,GDBSignal::ZERO,0,"");
             return false;
         }
     } else {
diff --git a/src/base/remote_gdb.hh b/src/base/remote_gdb.hh
index 4da1dcc0c8..c23b4ac95e 100644
--- a/src/base/remote_gdb.hh
+++ b/src/base/remote_gdb.hh
@@ -42,7 +42,6 @@
 #ifndef __REMOTE_GDB_HH__
 #define __REMOTE_GDB_HH__
 
-#include <sys/signal.h>
 
 #include <cstdint>
 #include <exception>
@@ -56,6 +55,7 @@
 #include "base/socket.hh"
 #include "base/types.hh"
 #include "cpu/pc_event.hh"
+#include "gdbremote/signals.hh"
 #include "sim/debug.hh"
 #include "sim/eventq.hh"
 
@@ -171,10 +171,10 @@ class BaseRemoteGDB
     void replaceThreadContext(ThreadContext *_tc);
     bool selectThreadContext(ContextID id);
 
-    void trap(ContextID id, int signum,const std::string& stopReason="");
+    void trap(ContextID id, GDBSignal sig,const std::string& stopReason="");
     bool sendMessage(std::string message);
     //schedule a trap event with these properties
-    void scheduleTrapEvent(ContextID id,int type, int delta,
+    void scheduleTrapEvent(ContextID id,GDBSignal type, int delta,
       std::string stopReason);
     /** @} */ // end of api_remote_gdb
 
@@ -259,7 +259,7 @@ class BaseRemoteGDB
      * or SW trap), 'signum' is the signal value reported back to GDB
      * in "S" packet (this is done in trap()).
      */
-    void processCommands(int signum=0);
+    void processCommands(GDBSignal sig=GDBSignal::ZERO);
 
     /*
      * Simulator side debugger state.
@@ -280,7 +280,7 @@ class BaseRemoteGDB
     class TrapEvent : public Event
     {
       protected:
-        int _type;
+        GDBSignal _type;
         ContextID _id;
         std::string _stopReason;
         BaseRemoteGDB *gdb;
@@ -289,7 +289,7 @@ class BaseRemoteGDB
         TrapEvent(BaseRemoteGDB *g) : gdb(g)
         {}
 
-        void type(int t) { _type = t; }
+        void type(GDBSignal t) { _type = t; }
         void stopReason(std::string s) {_stopReason = s; }
         void id(ContextID id) { _id = id; }
          void process() { gdb->trap(_id, _type,_stopReason); }
@@ -327,8 +327,9 @@ class BaseRemoteGDB
     void insertHardBreak(Addr addr, size_t kind);
     void removeHardBreak(Addr addr, size_t kind);
 
-    void sendTPacket(int errnum, ContextID id,const std::string& stopReason);
-    void sendSPacket(int errnum);
+    void sendTPacket(GDBSignal sig, ContextID id,
+      const std::string& stopReason);
+    void sendSPacket(GDBSignal sig);
     //The OPacket allow to send string to be displayed by the remote GDB
     void sendOPacket(const std::string message);
     /*
@@ -341,7 +342,7 @@ class BaseRemoteGDB
         {
             const GdbCommand *cmd;
             char cmdByte;
-            int type;
+            GDBSignal type;
             char *data;
             int len;
         };
@@ -363,7 +364,7 @@ class BaseRemoteGDB
         {
             const GdbMultiLetterCommand *cmd;
             std::string cmdTxt;
-            int type;
+            GDBSignal type;
             char *data;
             int len;
         };
diff --git a/src/sim/faults.cc b/src/sim/faults.cc
index c0a7d76eaa..3049b3be42 100644
--- a/src/sim/faults.cc
+++ b/src/sim/faults.cc
@@ -100,14 +100,14 @@ GenericPageTableFault::invoke(ThreadContext *tc, const StaticInstPtr &inst)
         handled = p->fixupFault(vaddr);
     }
     panic_if(!handled &&
-                 !tc->getSystemPtr()->trapToGdb(SIGSEGV, tc->contextId()),
+            !tc->getSystemPtr()->trapToGdb(GDBSignal::SEGV, tc->contextId()),
              "Page table fault when accessing virtual address %#x\n", vaddr);
 }
 
 void
 GenericAlignmentFault::invoke(ThreadContext *tc, const StaticInstPtr &inst)
 {
-    panic_if(!tc->getSystemPtr()->trapToGdb(SIGSEGV, tc->contextId()),
+    panic_if(!tc->getSystemPtr()->trapToGdb(GDBSignal::SEGV, tc->contextId()),
              "Alignment fault when accessing virtual address %#x\n", vaddr);
 }
 
diff --git a/src/sim/system.cc b/src/sim/system.cc
index 5f67c4d2b8..ee6c70a5d3 100644
--- a/src/sim/system.cc
+++ b/src/sim/system.cc
@@ -391,7 +391,7 @@ System::workItemEnd(uint32_t tid, uint32_t workid)
 }
 
 bool
-System::trapToGdb(int signal, ContextID ctx_id) const
+System::trapToGdb(GDBSignal signal, ContextID ctx_id) const
 {
     return workload->trapToGdb(signal, ctx_id);
 }
diff --git a/src/sim/system.hh b/src/sim/system.hh
index 7738d561c3..d691fb8bf8 100644
--- a/src/sim/system.hh
+++ b/src/sim/system.hh
@@ -554,7 +554,7 @@ class System : public SimObject, public PCEventScope
     void workItemEnd(uint32_t tid, uint32_t workid);
 
     /* Returns whether we successfully trapped into GDB. */
-    bool trapToGdb(int signal, ContextID ctx_id) const;
+    bool trapToGdb(GDBSignal signal, ContextID ctx_id) const;
 
   protected:
     /**
diff --git a/src/sim/workload.cc b/src/sim/workload.cc
index ca51bbdb73..ceb1029f77 100644
--- a/src/sim/workload.cc
+++ b/src/sim/workload.cc
@@ -72,7 +72,7 @@ Workload::replaceThreadContext(ThreadContext *tc)
 }
 
 bool
-Workload::trapToGdb(int signal, ContextID ctx_id)
+Workload::trapToGdb(GDBSignal signal, ContextID ctx_id)
 {
     if (gdb && gdb->isAttached()) {
         gdb->trap(ctx_id, signal);
diff --git a/src/sim/workload.hh b/src/sim/workload.hh
index f9bb8dba3c..10129379e0 100644
--- a/src/sim/workload.hh
+++ b/src/sim/workload.hh
@@ -34,6 +34,7 @@
 #include "base/loader/object_file.hh"
 #include "base/loader/symtab.hh"
 #include "enums/ByteOrder.hh"
+#include "gdbremote/signals.hh"
 #include "params/StubWorkload.hh"
 #include "params/Workload.hh"
 #include "sim/sim_object.hh"
@@ -91,7 +92,7 @@ class Workload : public SimObject
 
     // Once trapping into GDB is no longer a special case routed through the
     // system object, this helper can be removed.
-    bool trapToGdb(int signal, ContextID ctx_id);
+    bool trapToGdb(GDBSignal sig, ContextID ctx_id);
     bool sendToGdb(std::string msg);
 
     virtual void registerThreadContext(ThreadContext *tc);

From d401b1fbadff4a69059e6a2132ad500f34cf2c65 Mon Sep 17 00:00:00 2001
From: Quentin Forcioli <quentin.forcioli@telecom-paris.fr>
Date: Tue, 16 Aug 2022 17:43:31 +0200
Subject: [PATCH 021/492] base,sim: Adding monitor function to GDB

The remote protocol provides a monitor query. This query allows to
provide a implementation defined behavior in the stub.

I proposed to use this command as a way to quit simulation with a
message provided by the GDB client.

Thus calling "monitor my_message" in the client will exit the
simulation with the exit message "GDB_MONITOR:my_message".

This is implemented through a derived class based on
GlobalSimLoopExitEvent and a small addition to the based class that adds
a clean method that will be called when returning siumation after the
Event.

Change-Id: Ib5fda569edcf6733cbcc6240ef6d2ec4dc6502ec
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/63538
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/base/remote_gdb.cc | 46 +++++++++++++++++++++++++++++++++++++-----
 src/base/remote_gdb.hh |  1 +
 src/sim/eventq.hh      |  1 +
 src/sim/sim_events.hh  |  7 +++++--
 src/sim/simulate.cc    |  5 ++++-
 5 files changed, 52 insertions(+), 8 deletions(-)

diff --git a/src/base/remote_gdb.cc b/src/base/remote_gdb.cc
index 47fae75cbb..43f53d1247 100644
--- a/src/base/remote_gdb.cc
+++ b/src/base/remote_gdb.cc
@@ -157,6 +157,7 @@
 #include "mem/translating_port_proxy.hh"
 #include "sim/full_system.hh"
 #include "sim/process.hh"
+#include "sim/sim_events.hh"
 #include "sim/system.hh"
 
 namespace gem5
@@ -241,7 +242,7 @@ hex2c(char c0,char c1)
 
 //this function will be used in a future patch
 //convert a encoded string to a string
-[[maybe_unused]] std::string
+std::string
 hexS2string(std::string hex_in)
 {
    std::string out="";
@@ -554,7 +555,6 @@ BaseRemoteGDB::trap(ContextID id, GDBSignal sig,const std::string& stopReason)
         return;
 
     if (tc->contextId() != id) {
-
         //prevent thread switch when single stepping
         if (singleStepEvent.scheduled()){
             return;
@@ -564,11 +564,14 @@ BaseRemoteGDB::trap(ContextID id, GDBSignal sig,const std::string& stopReason)
             return;
     }
 
+
     DPRINTF(GDBMisc, "trap: PC=%s\n", tc->pcState());
 
     clearSingleStep();
-
-    if (threadSwitching) {
+    if (stopReason=="monitor_return"){
+        //should wnot send any Tpacket here
+        send("OK");
+    }else if (threadSwitching) {
         threadSwitching = false;
         // Tell GDB the thread switch has completed.
         send("OK");
@@ -1326,6 +1329,7 @@ splitAt(std::string str, const char * const delim)
 std::map<std::string, BaseRemoteGDB::QuerySetCommand>
         BaseRemoteGDB::queryMap = {
     { "C", { &BaseRemoteGDB::queryC } },
+    { "Rcmd", { &BaseRemoteGDB::queryRcmd} },
     { "Attached", { &BaseRemoteGDB::queryAttached} },
     { "Supported", { &BaseRemoteGDB::querySupported, ";" } },
     { "Xfer", { &BaseRemoteGDB::queryXfer } },
@@ -1416,6 +1420,38 @@ BaseRemoteGDB::queryAttached(QuerySetCommand::Context &ctx)
     return true;
 }
 
+class MonitorCallEvent : public GlobalSimLoopExitEvent
+{
+    BaseRemoteGDB& gdb;
+    ContextID id;
+    public:
+    MonitorCallEvent(BaseRemoteGDB& gdb,ContextID id,const std::string &_cause,
+                  int code):
+                  GlobalSimLoopExitEvent(_cause,code), gdb(gdb),id(id)
+                  {};
+    void process() override{
+        GlobalSimLoopExitEvent::process();
+    }
+    void clean() override{
+        //trapping now
+        //this is the only point in time when we can call trap
+        //before any breakpoint triggers
+        gdb.trap(id,GDBSignal::ZERO,"monitor_return");
+        delete this;
+    }
+    ~MonitorCallEvent(){
+        DPRINTF(Event,"MonitorCallEvent destructed\n");;
+    }
+};
+
+bool
+BaseRemoteGDB::queryRcmd(QuerySetCommand::Context &ctx){
+    std::string message=hexS2string(ctx.args[0]);
+    DPRINTF(GDBMisc, "Rcmd Query: %s => %s\n", ctx.args[0],message);
+    //Tick when = curTick();
+    new MonitorCallEvent(*this,tc->contextId(),"GDB_MONITOR:"+ message, 0);
+    return false;
+}
 
 bool
 BaseRemoteGDB::queryFThreadInfo(QuerySetCommand::Context &ctx)
@@ -1444,7 +1480,7 @@ BaseRemoteGDB::cmdQueryVar(GdbCommand::Context &ctx)
 {
     // The query command goes until the first ':', or the end of the string.
     std::string s(ctx.data, ctx.len);
-    auto query_split = splitAt({ ctx.data, (size_t)ctx.len }, ":");
+    auto query_split = splitAt({ ctx.data, (size_t)ctx.len }, ":,");
     const auto &query_str = query_split.first;
 
     // Look up the query command, and report if it isn't found.
diff --git a/src/base/remote_gdb.hh b/src/base/remote_gdb.hh
index c23b4ac95e..1c5cd9c7af 100644
--- a/src/base/remote_gdb.hh
+++ b/src/base/remote_gdb.hh
@@ -433,6 +433,7 @@ class BaseRemoteGDB
     bool querySupported(QuerySetCommand::Context &ctx);
     bool queryXfer(QuerySetCommand::Context &ctx);
     bool querySymbol(QuerySetCommand::Context &ctx);
+    bool queryRcmd(QuerySetCommand::Context &ctx);
     bool queryAttached(QuerySetCommand::Context &ctx);
 
     size_t threadInfoIdx = 0;
diff --git a/src/sim/eventq.hh b/src/sim/eventq.hh
index a7226ffead..cd5d285f93 100644
--- a/src/sim/eventq.hh
+++ b/src/sim/eventq.hh
@@ -46,6 +46,7 @@
 
 #include "base/debug.hh"
 #include "base/flags.hh"
+#include "base/trace.hh"
 #include "base/types.hh"
 #include "base/uncontended_mutex.hh"
 #include "debug/Event.hh"
diff --git a/src/sim/sim_events.hh b/src/sim/sim_events.hh
index 06a8e6548d..a1ffc7b34a 100644
--- a/src/sim/sim_events.hh
+++ b/src/sim/sim_events.hh
@@ -68,8 +68,11 @@ class GlobalSimLoopExitEvent : public GlobalEvent
     const std::string getCause() const { return cause; }
     int getCode() const { return code; }
 
-    void process();     // process event
-
+    virtual void process();// process event
+    virtual void clean(){};//cleaning event
+    ~GlobalSimLoopExitEvent (){
+      DPRINTF(Event,"GlobalSimLoopExitEvent destructed\n");
+    };
     virtual const char *description() const;
 };
 
diff --git a/src/sim/simulate.cc b/src/sim/simulate.cc
index c5d07942ef..0c30f10570 100644
--- a/src/sim/simulate.cc
+++ b/src/sim/simulate.cc
@@ -184,9 +184,12 @@ struct DescheduleDeleter
  * terminate the loop.  Exported to Python.
  * @return The SimLoopExitEvent that caused the loop to exit.
  */
+GlobalSimLoopExitEvent *global_exit_event= nullptr;
 GlobalSimLoopExitEvent *
 simulate(Tick num_cycles)
 {
+    if (global_exit_event)//cleaning last global exit event
+        global_exit_event->clean();
     std::unique_ptr<GlobalSyncEvent, DescheduleDeleter> quantum_event;
     const Tick exit_tick = num_cycles < MaxTick - curTick() ?
                                         curTick() + num_cycles : MaxTick;
@@ -224,7 +227,7 @@ simulate(Tick num_cycles)
     BaseGlobalEvent *global_event = local_event->globalEvent();
     assert(global_event);
 
-    GlobalSimLoopExitEvent *global_exit_event =
+    global_exit_event =
         dynamic_cast<GlobalSimLoopExitEvent *>(global_event);
     assert(global_exit_event);
 

From 4054565b853cb8b22ce30b9e0bbed944060d313b Mon Sep 17 00:00:00 2001
From: "Bobby R. Bruce" <bbruce@ucdavis.edu>
Date: Tue, 22 Nov 2022 14:30:35 -0800
Subject: [PATCH 022/492] tests: Delete build directory before running KVM in
 nightly

The nightly tests failed here:
https://jenkins.gem5.org/job/nightly/430/. What seems to have happened
is the ALL/gem5.opt us compiled within the Docker container but then,
for the KVM tests, there is an attempt to recompile on the host, which
causes compilation problems. The safest strategy here is delete the
build directory prior to running the KVM tests.

In latest versions of our test infrastructure, the KVM tests should be
run completely separately (i.e., in different Jenkin's jobs) to avoid
this.

Change-Id: Id7d18c0504dd324f7a0e5e9a7809463520969dda
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/65911
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
---
 tests/nightly.sh | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tests/nightly.sh b/tests/nightly.sh
index a082158e01..ec15f91a78 100755
--- a/tests/nightly.sh
+++ b/tests/nightly.sh
@@ -89,6 +89,10 @@ docker run -u $UID:$GID --volume "${gem5_root}":"${gem5_root}" -w \
 # removes all those part of the 'very-long' (weekly) tests, or for compilation
 # to '.debug' or '.fast'. We also remove ARM targets as our Jenkins is an X86
 # system. Users wishing to run this script elsewhere should be aware of this.
+# Note: we delete the build directory here. It was build in the
+# "ubuntu-22.04_all-dependencies" docker image which may not be compatible with
+# the host environment.
+rm -rf "${gem5_root}/build"
 cd "${gem5_root}/tests"
 ./main.py run -j${compile_threads} -vv \
     --exclude-tags ".*" --include-tags kvm --exclude-tags very\-long \

From 92027a68ceea09624b4ce17da81d05ede883c484 Mon Sep 17 00:00:00 2001
From: Matthew Poremba <matthew.poremba@amd.com>
Date: Wed, 23 Nov 2022 14:20:14 -0800
Subject: [PATCH 023/492] configs: Set CPU vendor to M5 Simulator in apu_se.py

Other vendor strings causes, for some reason, bad addresses to be
computed when running the GPU model. This change reverts back to M5
Simulator only for apu_se.py.

Change-Id: I5992b4e31569f5c0e5e49e523908c8fa0602f845
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/65991
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com>
Maintainer: Matt Sinclair <mattdsinclair@gmail.com>
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
---
 configs/example/apu_se.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/configs/example/apu_se.py b/configs/example/apu_se.py
index 39def024fc..8e8bc60fe8 100644
--- a/configs/example/apu_se.py
+++ b/configs/example/apu_se.py
@@ -757,6 +757,11 @@ if fast_forward:
         (cpu_list[i], future_cpu_list[i]) for i in range(args.num_cpus)
     ]
 
+# Other CPU strings cause bad addresses in ROCm. Revert back to M5 Simulator.
+for (i, cpu) in enumerate(cpu_list):
+    for j in range(len(cpu)):
+        cpu.isa[j].vendor_string = "M5 Simulator"
+
 # Full list of processing cores in the system.
 cpu_list = cpu_list + [shader] + cp_list
 

From 770b84c2ee098efdc7a0fa768334d45ab9720de6 Mon Sep 17 00:00:00 2001
From: "Bobby R. Bruce" <bbruce@ucdavis.edu>
Date: Mon, 28 Nov 2022 13:02:49 -0800
Subject: [PATCH 024/492] sim: Add missing virtual destructor to
 GlobalSyncEvent

This missing destructor in GlobalSyncEvent was causing a compilation
error in gcc-12, thus causing the compiler-tests to fail:
https://jenkins.gem5.org/job/compiler-checks/436/

In addition a destructor was added to BaseGlobalEventTemplate. This does
not directly fix the aforementioned bug provides some additional
security.

Change-Id: Iab86d3f6d55064ba3b6a8a7cb01fb14533cce4b9
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/66152
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
---
 src/sim/global_event.hh | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/sim/global_event.hh b/src/sim/global_event.hh
index 05ae6ca9ad..36332043c6 100644
--- a/src/sim/global_event.hh
+++ b/src/sim/global_event.hh
@@ -162,6 +162,8 @@ class BaseGlobalEventTemplate : public BaseGlobalEvent
         for (int i = 0; i < numMainEventQueues; ++i)
             barrierEvent[i] = new typename Derived::BarrierEvent(this, p, f);
     }
+
+    virtual ~BaseGlobalEventTemplate(){}
 };
 
 
@@ -229,6 +231,8 @@ class GlobalSyncEvent : public BaseGlobalEventTemplate<GlobalSyncEvent>
         schedule(when);
     }
 
+    virtual ~GlobalSyncEvent (){}
+
     void process();
 
     const char *description() const;

From 8391f47bc9293ed4b13740c747ebc1894cf15f2c Mon Sep 17 00:00:00 2001
From: Hoa Nguyen <hoanguyen@ucdavis.edu>
Date: Sat, 26 Nov 2022 00:48:18 +0000
Subject: [PATCH 025/492] stdlib: More helpful message for the filelock error

Change-Id: Ib8e3bc9fc145a9604670e8288209ac62bfbd7932
Signed-off-by: Hoa Nguyen <hoanguyen@ucdavis.edu>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/66091
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/python/gem5/utils/filelock.py | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/src/python/gem5/utils/filelock.py b/src/python/gem5/utils/filelock.py
index 82e1122bf9..a6798e9f53 100644
--- a/src/python/gem5/utils/filelock.py
+++ b/src/python/gem5/utils/filelock.py
@@ -69,12 +69,22 @@ class FileLock(object):
             except OSError as e:
                 if e.errno != errno.EEXIST:
                     raise
+                solution_message = (
+                    "This is likely due to the existence"
+                    " of the lock file '{}'. If there's no other process"
+                    " the lock file, you can manually delete the lock file and"
+                    " rerun the script.".format(self.lockfile)
+                )
                 if self.timeout is None:
                     raise FileLockException(
-                        "Could not acquire lock on {}".format(self.file_name)
+                        "Could not acquire lock on {}. {}".format(
+                            self.file_name, solution_message
+                        )
                     )
                 if (time.time() - start_time) >= self.timeout:
-                    raise FileLockException("Timeout occured.")
+                    raise FileLockException(
+                        "Timeout occured. {}".format(solution_message)
+                    )
                 time.sleep(self.delay)
 
     #        self.is_locked = True

From c8949f085fdfcc590be501b85fe3c0cc9bdb80dc Mon Sep 17 00:00:00 2001
From: Hoa Nguyen <hoanguyen@ucdavis.edu>
Date: Mon, 14 Nov 2022 03:36:52 +0000
Subject: [PATCH 026/492] stdlib: Change #virtual_networks of mesi_two_level to
 3

A grep of "virtual_network" in src/mem/ruby/protocol shows that
files in MESI_Two_Level.slicc utilizes at 3 virtual networks.

Change-Id: I3e8dd09dd82b9c802fdf91145c6d998bc6db541b
Signed-off-by: Hoa Nguyen <hoanguyen@ucdavis.edu>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/65592
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
---
 .../cachehierarchies/ruby/mesi_two_level_cache_hierarchy.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/python/gem5/components/cachehierarchies/ruby/mesi_two_level_cache_hierarchy.py b/src/python/gem5/components/cachehierarchies/ruby/mesi_two_level_cache_hierarchy.py
index 82089a5bdc..79c8b0ada3 100644
--- a/src/python/gem5/components/cachehierarchies/ruby/mesi_two_level_cache_hierarchy.py
+++ b/src/python/gem5/components/cachehierarchies/ruby/mesi_two_level_cache_hierarchy.py
@@ -83,11 +83,11 @@ class MESITwoLevelCacheHierarchy(
 
         self.ruby_system = RubySystem()
 
-        # MESI_Two_Level needs 5 virtual networks
-        self.ruby_system.number_of_virtual_networks = 5
+        # MESI_Two_Level needs 3 virtual networks
+        self.ruby_system.number_of_virtual_networks = 3
 
         self.ruby_system.network = SimplePt2Pt(self.ruby_system)
-        self.ruby_system.network.number_of_virtual_networks = 5
+        self.ruby_system.network.number_of_virtual_networks = 3
 
         self._l1_controllers = []
         for i, core in enumerate(board.get_processor().get_cores()):

From f99947059d4bd22cf066f5261b10be4e8e333fc5 Mon Sep 17 00:00:00 2001
From: Hoa Nguyen <hoanguyen@ucdavis.edu>
Date: Sat, 12 Nov 2022 08:00:49 +0000
Subject: [PATCH 027/492] stdlib: Clean up Ruby cache directory

- Fix typos.
- Fix type inconsistencies.

Change-Id: I98d82ec7e62130abb09295c5ec6cde86b1f7fa27
Signed-off-by: Hoa Nguyen <hoanguyen@ucdavis.edu>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/65571
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 .../cachehierarchies/abstract_two_level_cache_hierarchy.py  | 6 ++----
 .../cachehierarchies/ruby/caches/mesi_two_level/l2_cache.py | 2 +-
 2 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/src/python/gem5/components/cachehierarchies/abstract_two_level_cache_hierarchy.py b/src/python/gem5/components/cachehierarchies/abstract_two_level_cache_hierarchy.py
index 17cae4aba8..d6a035f2cb 100644
--- a/src/python/gem5/components/cachehierarchies/abstract_two_level_cache_hierarchy.py
+++ b/src/python/gem5/components/cachehierarchies/abstract_two_level_cache_hierarchy.py
@@ -24,8 +24,6 @@
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-# from .abstract_cache_hierarchy import AbstractCacheHierarchy
-
 
 class AbstractTwoLevelCacheHierarchy:
     """
@@ -51,9 +49,9 @@ class AbstractTwoLevelCacheHierarchy:
 
         :type l1i_assoc: int
 
-        :param l1dsize: The size of the LL1 Data cache (e.g. "32kB").
+        :param l1d_size: The size of the L1 Data cache (e.g. "32kB").
 
-        :type l1dsize: str
+        :type l1d_size: str
 
         :param l1d_assoc:
 
diff --git a/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_two_level/l2_cache.py b/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_two_level/l2_cache.py
index 56cb5b2ec1..81ef4dbe90 100644
--- a/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_two_level/l2_cache.py
+++ b/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_two_level/l2_cache.py
@@ -45,7 +45,7 @@ class L2Cache(AbstractL2Cache):
             start_index_bit=self.getIndexBit(num_l2Caches),
         )
 
-        self.transitions_per_cycle = "4"
+        self.transitions_per_cycle = 4
 
     def getIndexBit(self, num_l2caches):
         l2_bits = int(math.log(num_l2caches, 2))

From eac06ad681ad48a08f946dda60b42adadfada3bf Mon Sep 17 00:00:00 2001
From: Hoa Nguyen <hoanguyen@ucdavis.edu>
Date: Mon, 28 Nov 2022 03:26:32 +0000
Subject: [PATCH 028/492] python: Fix multiline quotes in a single line

An example case,
```python
mem_side_port = RequestPort(
    "This port sends requests and " "receives responses"
)
```

This is the residue of running the python formatter.
This is done by finding all tokens matching the regex `"\s"(?![.;"])`
and manually replacing them by empty strings.

Change-Id: Icf223bbe889e5fa5749a81ef77aa6e721f38b549
Signed-off-by: Hoa Nguyen <hoanguyen@ucdavis.edu>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/66111
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 build_tools/cxx_config_cc.py                  |   4 +-
 build_tools/enum_cc.py                        |   2 +-
 configs/common/Options.py                     |   4 +-
 configs/example/apu_se.py                     |  18 ++-
 configs/example/arm/baremetal.py              |   8 +-
 configs/example/arm/dist_bigLITTLE.py         |   6 +-
 configs/example/gpufs/amd/AmdGPUOptions.py    |  16 +--
 configs/example/gpufs/runfs.py                |   4 +-
 src/arch/arm/ArmFsWorkload.py                 |   4 +-
 src/arch/arm/ArmPMU.py                        |   2 +-
 .../fastmodel/CortexA76/FastModelCortexA76.py |   2 +-
 .../fastmodel/CortexR52/FastModelCortexR52.py |  42 +++----
 src/arch/arm/fastmodel/GIC/FastModelGIC.py    | 114 +++++++++---------
 .../fastmodel/PL330_DMAC/FastModelPL330.py    |   2 +-
 src/arch/isa_parser/isa_parser.py             |   4 +-
 src/cpu/BaseCPU.py                            |   2 +-
 src/cpu/minor/BaseMinorCPU.py                 |  14 +--
 src/cpu/o3/BaseO3CPU.py                       |  32 ++---
 src/cpu/o3/probe/ElasticTrace.py              |   6 +-
 src/cpu/testers/memtest/MemTest.py            |   4 +-
 src/cpu/testers/rubytest/RubyTester.py        |   2 +-
 src/cpu/testers/traffic_gen/BaseTrafficGen.py |   2 +-
 src/cpu/testers/traffic_gen/GUPSGen.py        |   4 +-
 src/cpu/trace/TraceCPU.py                     |   6 +-
 src/dev/amdgpu/AMDGPU.py                      |   2 +-
 src/dev/arm/RealView.py                       |  12 +-
 src/dev/arm/SMMUv3.py                         |   4 +-
 src/dev/net/Ethernet.py                       |   2 +-
 src/dev/pci/PciHost.py                        |   2 +-
 src/dev/serial/Uart.py                        |   2 +-
 src/gpu-compute/GPU.py                        |  42 +++----
 src/gpu-compute/LdsState.py                   |   2 +-
 src/learning_gem5/part2/HelloObject.py        |   4 +-
 src/mem/AddrMapper.py                         |   4 +-
 src/mem/Bridge.py                             |   4 +-
 src/mem/CommMonitor.py                        |  16 +--
 src/mem/DRAMInterface.py                      |   4 +-
 src/mem/DRAMsim3.py                           |   4 +-
 src/mem/ExternalSlave.py                      |   2 +-
 src/mem/MemChecker.py                         |   8 +-
 src/mem/MemCtrl.py                            |   6 +-
 src/mem/MemDelay.py                           |   4 +-
 src/mem/MemInterface.py                       |   6 +-
 src/mem/PortTerminator.py                     |   4 +-
 src/mem/SerialLink.py                         |   6 +-
 src/mem/XBar.py                               |  10 +-
 src/mem/cache/Cache.py                        |   6 +-
 src/mem/cache/compressors/Compressors.py      |   2 +-
 src/mem/cache/tags/Tags.py                    |   2 +-
 src/mem/probes/StackDistProbe.py              |   2 +-
 src/mem/ruby/slicc_interface/Controller.py    |   2 +-
 src/mem/ruby/system/GPUCoalescer.py           |   2 +-
 src/mem/slicc/symbols/StateMachine.py         |   2 +-
 .../gem5/components/memory/dramsim_3.py       |   2 +-
 src/python/m5/SimObject.py                    |   4 +-
 src/python/m5/params.py                       |   2 +-
 src/sim/System.py                             |   4 +-
 src/systemc/tests/verify.py                   |   4 +-
 tests/configs/gpu-randomtest-ruby.py          |   4 +-
 tests/configs/gpu-ruby.py                     |  12 +-
 tests/gem5/fixture.py                         |   4 +-
 util/cpt_upgraders/isa-is-simobject.py        |   2 +-
 util/gerrit-bot/extract_gitcookies.py         |   2 +-
 util/maint/list_changes.py                    |   6 +-
 util/maint/show_changes_by_file.py            |   4 +-
 util/plot_dram/dram_sweep_plot.py             |   2 +-
 66 files changed, 256 insertions(+), 268 deletions(-)

diff --git a/build_tools/cxx_config_cc.py b/build_tools/cxx_config_cc.py
index a908aa8c17..33d3bba864 100644
--- a/build_tools/cxx_config_cc.py
+++ b/build_tools/cxx_config_cc.py
@@ -255,9 +255,7 @@ for param in sim_object._params.values():
         code('} else if (name == "${{param.name}}") {')
         code.indent()
         code("${{param.name}}.clear();")
-        code(
-            "for (auto i = values.begin(); " "ret && i != values.end(); i ++)"
-        )
+        code("for (auto i = values.begin(); ret && i != values.end(); i ++)")
         code("{")
         code.indent()
         code("${{param.ptype.cxx_type}} elem;")
diff --git a/build_tools/enum_cc.py b/build_tools/enum_cc.py
index 476e49d750..cd192c56fb 100644
--- a/build_tools/enum_cc.py
+++ b/build_tools/enum_cc.py
@@ -87,7 +87,7 @@ namespace gem5
 )
 
 if enum.wrapper_is_struct:
-    code("const char *${wrapper_name}::${name}Strings" "[Num_${name}] =")
+    code("const char *${wrapper_name}::${name}Strings[Num_${name}] =")
 else:
     if enum.is_class:
         code(
diff --git a/configs/common/Options.py b/configs/common/Options.py
index 81d7791285..5585a75b80 100644
--- a/configs/common/Options.py
+++ b/configs/common/Options.py
@@ -217,7 +217,7 @@ def addNoISAOptions(parser):
         "--maxtime",
         type=float,
         default=None,
-        help="Run to the specified absolute simulated time in " "seconds",
+        help="Run to the specified absolute simulated time in seconds",
     )
     parser.add_argument(
         "-P",
@@ -691,7 +691,7 @@ def addSEOptions(parser):
         "-o",
         "--options",
         default="",
-        help="""The options to pass to the binary, use " "
+        help="""The options to pass to the binary, use
                               around the entire string""",
     )
     parser.add_argument(
diff --git a/configs/example/apu_se.py b/configs/example/apu_se.py
index 8e8bc60fe8..c2b97fd82e 100644
--- a/configs/example/apu_se.py
+++ b/configs/example/apu_se.py
@@ -85,7 +85,7 @@ parser.add_argument(
     "--cu-per-sqc",
     type=int,
     default=4,
-    help="number of CUs" "sharing an SQC (icache, and thus icache TLB)",
+    help="number of CUssharing an SQC (icache, and thus icache TLB)",
 )
 parser.add_argument(
     "--cu-per-scalar-cache",
@@ -94,7 +94,7 @@ parser.add_argument(
     help="Number of CUs sharing a scalar cache",
 )
 parser.add_argument(
-    "--simds-per-cu", type=int, default=4, help="SIMD units" "per CU"
+    "--simds-per-cu", type=int, default=4, help="SIMD unitsper CU"
 )
 parser.add_argument(
     "--cu-per-sa",
@@ -140,13 +140,13 @@ parser.add_argument(
     "--glbmem-wr-bus-width",
     type=int,
     default=32,
-    help="VGPR to Coalescer (Global Memory) data bus width " "in bytes",
+    help="VGPR to Coalescer (Global Memory) data bus width in bytes",
 )
 parser.add_argument(
     "--glbmem-rd-bus-width",
     type=int,
     default=32,
-    help="Coalescer to VGPR (Global Memory) data bus width in " "bytes",
+    help="Coalescer to VGPR (Global Memory) data bus width in bytes",
 )
 # Currently we only support 1 local memory pipe
 parser.add_argument(
@@ -166,7 +166,7 @@ parser.add_argument(
     "--wfs-per-simd",
     type=int,
     default=10,
-    help="Number of " "WF slots per SIMD",
+    help="Number of WF slots per SIMD",
 )
 
 parser.add_argument(
@@ -290,13 +290,11 @@ parser.add_argument(
     help="Latency for scalar responses from ruby to the cu.",
 )
 
-parser.add_argument(
-    "--TLB-prefetch", type=int, help="prefetch depth for" "TLBs"
-)
+parser.add_argument("--TLB-prefetch", type=int, help="prefetch depth for TLBs")
 parser.add_argument(
     "--pf-type",
     type=str,
-    help="type of prefetch: " "PF_CU, PF_WF, PF_PHASE, PF_STRIDE",
+    help="type of prefetch: PF_CU, PF_WF, PF_PHASE, PF_STRIDE",
 )
 parser.add_argument("--pf-stride", type=int, help="set prefetch stride")
 parser.add_argument(
@@ -369,7 +367,7 @@ parser.add_argument(
     type=str,
     default="gfx801",
     choices=GfxVersion.vals,
-    help="Gfx version for gpu" "Note: gfx902 is not fully supported by ROCm",
+    help="Gfx version for gpuNote: gfx902 is not fully supported by ROCm",
 )
 
 Ruby.define_options(parser)
diff --git a/configs/example/arm/baremetal.py b/configs/example/arm/baremetal.py
index 9eeba37ff7..a8db6bacd4 100644
--- a/configs/example/arm/baremetal.py
+++ b/configs/example/arm/baremetal.py
@@ -242,25 +242,25 @@ def main():
         "--semi-stdin",
         type=str,
         default="stdin",
-        help="Standard input for semihosting " "(default: gem5's stdin)",
+        help="Standard input for semihosting (default: gem5's stdin)",
     )
     parser.add_argument(
         "--semi-stdout",
         type=str,
         default="stdout",
-        help="Standard output for semihosting " "(default: gem5's stdout)",
+        help="Standard output for semihosting (default: gem5's stdout)",
     )
     parser.add_argument(
         "--semi-stderr",
         type=str,
         default="stderr",
-        help="Standard error for semihosting " "(default: gem5's stderr)",
+        help="Standard error for semihosting (default: gem5's stderr)",
     )
     parser.add_argument(
         "--semi-path",
         type=str,
         default="",
-        help=("Search path for files to be loaded through " "Arm Semihosting"),
+        help=("Search path for files to be loaded through Arm Semihosting"),
     )
     parser.add_argument(
         "args",
diff --git a/configs/example/arm/dist_bigLITTLE.py b/configs/example/arm/dist_bigLITTLE.py
index a3f3ede4eb..2884a5efd5 100644
--- a/configs/example/arm/dist_bigLITTLE.py
+++ b/configs/example/arm/dist_bigLITTLE.py
@@ -51,7 +51,7 @@ import sw
 def addOptions(parser):
     # Options for distributed simulation (i.e. dist-gem5)
     parser.add_argument(
-        "--dist", action="store_true", help="Distributed gem5" " simulation."
+        "--dist", action="store_true", help="Distributed gem5 simulation."
     )
     parser.add_argument(
         "--is-switch",
@@ -71,14 +71,14 @@ def addOptions(parser):
         default=0,
         action="store",
         type=int,
-        help="Number of gem5 processes within the dist gem5" " run.",
+        help="Number of gem5 processes within the dist gem5 run.",
     )
     parser.add_argument(
         "--dist-server-name",
         default="127.0.0.1",
         action="store",
         type=str,
-        help="Name of the message server host\nDEFAULT:" " localhost",
+        help="Name of the message server host\nDEFAULT: localhost",
     )
     parser.add_argument(
         "--dist-server-port",
diff --git a/configs/example/gpufs/amd/AmdGPUOptions.py b/configs/example/gpufs/amd/AmdGPUOptions.py
index 531249ee84..3d6a8cc48e 100644
--- a/configs/example/gpufs/amd/AmdGPUOptions.py
+++ b/configs/example/gpufs/amd/AmdGPUOptions.py
@@ -49,7 +49,7 @@ def addAmdGPUOptions(parser):
         "--cu-per-sqc",
         type=int,
         default=4,
-        help="number of CUs sharing an SQC" " (icache, and thus icache TLB)",
+        help="number of CUs sharing an SQC (icache, and thus icache TLB)",
     )
     parser.add_argument(
         "--cu-per-scalar-cache",
@@ -102,19 +102,19 @@ def addAmdGPUOptions(parser):
         "--issue-period",
         type=int,
         default=4,
-        help="Number of cycles per vector instruction issue" " period",
+        help="Number of cycles per vector instruction issue period",
     )
     parser.add_argument(
         "--glbmem-wr-bus-width",
         type=int,
         default=32,
-        help="VGPR to Coalescer (Global Memory) data bus width" " in bytes",
+        help="VGPR to Coalescer (Global Memory) data bus width in bytes",
     )
     parser.add_argument(
         "--glbmem-rd-bus-width",
         type=int,
         default=32,
-        help="Coalescer to VGPR (Global Memory) data bus width" " in bytes",
+        help="Coalescer to VGPR (Global Memory) data bus width in bytes",
     )
     # Currently we only support 1 local memory pipe
     parser.add_argument(
@@ -204,20 +204,20 @@ def addAmdGPUOptions(parser):
     parser.add_argument(
         "--LocalMemBarrier",
         action="store_true",
-        help="Barrier does not wait for writethroughs to " " complete",
+        help="Barrier does not wait for writethroughs to complete",
     )
     parser.add_argument(
         "--countPages",
         action="store_true",
-        help="Count Page Accesses and output in " " per-CU output files",
+        help="Count Page Accesses and output in per-CU output files",
     )
     parser.add_argument(
-        "--TLB-prefetch", type=int, help="prefetch depth for" "TLBs"
+        "--TLB-prefetch", type=int, help="prefetch depth for TLBs"
     )
     parser.add_argument(
         "--pf-type",
         type=str,
-        help="type of prefetch: " "PF_CU, PF_WF, PF_PHASE, PF_STRIDE",
+        help="type of prefetch: PF_CU, PF_WF, PF_PHASE, PF_STRIDE",
     )
     parser.add_argument("--pf-stride", type=int, help="set prefetch stride")
     parser.add_argument(
diff --git a/configs/example/gpufs/runfs.py b/configs/example/gpufs/runfs.py
index 781ce8e27c..86b91034b0 100644
--- a/configs/example/gpufs/runfs.py
+++ b/configs/example/gpufs/runfs.py
@@ -110,13 +110,13 @@ def addRunFSOptions(parser):
         action="store",
         type=str,
         default="16GB",
-        help="Specify the dGPU physical memory" "  size",
+        help="Specify the dGPU physical memory size",
     )
     parser.add_argument(
         "--dgpu-num-dirs",
         type=int,
         default=1,
-        help="Set " "the number of dGPU directories (memory controllers",
+        help="Set the number of dGPU directories (memory controllers",
     )
     parser.add_argument(
         "--dgpu-mem-type",
diff --git a/src/arch/arm/ArmFsWorkload.py b/src/arch/arm/ArmFsWorkload.py
index d0dcde749a..a9474fe119 100644
--- a/src/arch/arm/ArmFsWorkload.py
+++ b/src/arch/arm/ArmFsWorkload.py
@@ -78,10 +78,10 @@ class ArmFsWorkload(KernelWorkload):
     )
 
     panic_on_panic = Param.Bool(
-        False, "Trigger a gem5 panic if the " "guest kernel panics"
+        False, "Trigger a gem5 panic if the guest kernel panics"
     )
     panic_on_oops = Param.Bool(
-        False, "Trigger a gem5 panic if the " "guest kernel oopses"
+        False, "Trigger a gem5 panic if the guest kernel oopses"
     )
 
 
diff --git a/src/arch/arm/ArmPMU.py b/src/arch/arm/ArmPMU.py
index 80288ded57..f21aaff634 100644
--- a/src/arch/arm/ArmPMU.py
+++ b/src/arch/arm/ArmPMU.py
@@ -88,7 +88,7 @@ class ArmPMU(SimObject):
             or isinstance(newObject, SoftwareIncrement)
         ):
             raise TypeError(
-                "argument must be of ProbeEvent or " "SoftwareIncrement type"
+                "argument must be of ProbeEvent or SoftwareIncrement type"
             )
 
         if not self._events:
diff --git a/src/arch/arm/fastmodel/CortexA76/FastModelCortexA76.py b/src/arch/arm/fastmodel/CortexA76/FastModelCortexA76.py
index 577fd535d8..f690fb5097 100644
--- a/src/arch/arm/fastmodel/CortexA76/FastModelCortexA76.py
+++ b/src/arch/arm/fastmodel/CortexA76/FastModelCortexA76.py
@@ -50,7 +50,7 @@ class FastModelCortexA76(IrisBaseCPU):
 
     redistributor = Gicv3CommsTargetSocket("GIC communication target")
     core_reset = IntSinkPin(
-        "Raising this signal will put the core into " "reset mode."
+        "Raising this signal will put the core into reset mode."
     )
     poweron_reset = IntSinkPin(
         "Power on reset. Initializes all the "
diff --git a/src/arch/arm/fastmodel/CortexR52/FastModelCortexR52.py b/src/arch/arm/fastmodel/CortexR52/FastModelCortexR52.py
index 1e267f028f..fe81e72bd0 100644
--- a/src/arch/arm/fastmodel/CortexR52/FastModelCortexR52.py
+++ b/src/arch/arm/fastmodel/CortexR52/FastModelCortexR52.py
@@ -49,7 +49,7 @@ class FastModelCortexR52(IrisBaseCPU):
     flash = AmbaInitiatorSocket(64, "Flash")
     amba = AmbaInitiatorSocket(64, "AMBA initiator socket")
     core_reset = IntSinkPin(
-        "Raising this signal will put the core into " "reset mode."
+        "Raising this signal will put the core into reset mode."
     )
     poweron_reset = IntSinkPin(
         "Power on reset. Initializes all the "
@@ -68,7 +68,7 @@ class FastModelCortexR52(IrisBaseCPU):
     CFGTE = Param.Bool(False, "Equivalent to CFGTHUMBEXCEPTIONS")
     RVBARADDR = Param.UInt32(0, "Equivalent to CFGVECTABLE")
     ase_present = Param.Bool(
-        True, "Set whether the model has been built " "with NEON support"
+        True, "Set whether the model has been built with NEON support"
     )
     dcache_size = Param.UInt16(0x8000, "L1 D-Cache size in bytes")
     flash_enable = Param.Bool(False, "Equivalent to CFGFLASHEN")
@@ -88,33 +88,33 @@ class FastModelCortexR52(IrisBaseCPU):
         "(0=off=default,1=syncState,2=postInsnIO,3=postInsnAll)",
     )
     semihosting_A32_HLT = Param.UInt16(
-        0xF000, "A32 HLT number for " "semihosting calls."
+        0xF000, "A32 HLT number for semihosting calls."
     )
     semihosting_ARM_SVC = Param.UInt32(
-        0x123456, "A32 SVC number for " "semihosting calls."
+        0x123456, "A32 SVC number for semihosting calls."
     )
     semihosting_T32_HLT = Param.UInt8(
-        60, "T32 HLT number for semihosting " "calls."
+        60, "T32 HLT number for semihosting calls."
     )
     semihosting_Thumb_SVC = Param.UInt8(
-        171, "T32 SVC number for " "semihosting calls."
+        171, "T32 SVC number for semihosting calls."
     )
     semihosting_cmd_line = Param.String(
-        "", "Command line available to " "semihosting calls."
+        "", "Command line available to semihosting calls."
     )
     semihosting_cwd = Param.String(
-        "", "Base directory for semihosting " "file access."
+        "", "Base directory for semihosting file access."
     )
     semihosting_enable = Param.Bool(True, "Enable semihosting SVC/HLT traps.")
     semihosting_heap_base = Param.UInt32(0, "Virtual address of heap base.")
     semihosting_heap_limit = Param.UInt32(
-        0xF000000, "Virtual address of " "top of heap."
+        0xF000000, "Virtual address of top of heap."
     )
     semihosting_stack_base = Param.UInt32(
-        0x10000000, "Virtual address of " "base of descending stack."
+        0x10000000, "Virtual address of base of descending stack."
     )
     semihosting_stack_limit = Param.UInt32(
-        0xF000000, "Virtual address of " "stack limit."
+        0xF000000, "Virtual address of stack limit."
     )
     tcm_a_enable = Param.Bool(False, "Equivalent to CFGTCMBOOT")
     tcm_a_size = Param.UInt32(0x4000, "Sets the size of the ATCM(in bytes)")
@@ -122,7 +122,7 @@ class FastModelCortexR52(IrisBaseCPU):
     tcm_c_size = Param.UInt32(0x2000, "Sets the size of the CTCM(in bytes)")
     vfp_dp_present = Param.Bool(
         True,
-        "Whether double-precision floating " "point feature is implemented",
+        "Whether double-precision floating point feature is implemented",
     )
     vfp_enable_at_reset = Param.Bool(
         False,
@@ -171,10 +171,10 @@ class FastModelCortexR52Cluster(SimObject):
     PERIPHBASE = Param.UInt32(0x13080000, "Equivalent to CFGPERIPHBASE")
     cluster_utid = Param.UInt8(0, "Equivalent to CFGCLUSTERUTID")
     cpi_div = Param.UInt32(
-        1, "Divider for calculating CPI " "(Cycles Per Instruction)"
+        1, "Divider for calculating CPI (Cycles Per Instruction)"
     )
     cpi_mul = Param.UInt32(
-        1, "Multiplier for calculating CPI " "(Cycles Per Instruction)"
+        1, "Multiplier for calculating CPI (Cycles Per Instruction)"
     )
     dcache_prefetch_enabled = Param.Bool(
         False,
@@ -194,7 +194,7 @@ class FastModelCortexR52Cluster(SimObject):
         "upstream bus, this is only used when dcache-state_modelled=true.",
     )
     dcache_state_modelled = Param.Bool(
-        False, "Set whether D-cache has " "stateful implementation"
+        False, "Set whether D-cache has stateful implementation"
     )
     dcache_write_access_latency = Param.UInt64(
         0,
@@ -206,7 +206,7 @@ class FastModelCortexR52Cluster(SimObject):
         "is only used when dcache-state_modelled=true.",
     )
     flash_protection_enable_at_reset = Param.Bool(
-        False, "Equivalent to " "CFGFLASHPROTEN"
+        False, "Equivalent to CFGFLASHPROTEN"
     )
     has_flash_protection = Param.Bool(True, "Equivalent to CFGFLASHPROTIMP")
     icache_prefetch_enabled = Param.Bool(
@@ -227,7 +227,7 @@ class FastModelCortexR52Cluster(SimObject):
         "upstream bus, this is only used when icache-state_modelled=true.",
     )
     icache_state_modelled = Param.Bool(
-        False, "Set whether I-cache has " "stateful implementation"
+        False, "Set whether I-cache has stateful implementation"
     )
     memory_ext_slave_base = Param.UInt32(0, "Equivalent to CFGAXISTCMBASEADDR")
     memory_flash_base = Param.UInt32(0, "Equivalent to CFGFLASHBASEADDR")
@@ -237,16 +237,16 @@ class FastModelCortexR52Cluster(SimObject):
         "memory.flash_size = 0 => CFGFLASHIMP = false",
     )
     num_protection_regions_s1 = Param.UInt8(
-        16, "Number of v8-R stage1 " "protection regions"
+        16, "Number of v8-R stage1 protection regions"
     )
     num_protection_regions_s2 = Param.UInt8(
-        16, "Number of v8-R hyp " "protection regions"
+        16, "Number of v8-R hyp protection regions"
     )
     num_spi = Param.UInt16(
-        960, "Number of interrupts (SPI) into the " "internal GIC controller"
+        960, "Number of interrupts (SPI) into the internal GIC controller"
     )
     ram_protection_enable_at_reset = Param.Bool(
-        False, "Equivalent to " "CFGRAMPROTEN"
+        False, "Equivalent to CFGRAMPROTEN"
     )
     has_export_m_port = Param.Bool(
         True,
diff --git a/src/arch/arm/fastmodel/GIC/FastModelGIC.py b/src/arch/arm/fastmodel/GIC/FastModelGIC.py
index 1ad5a979cd..ce0a8c5fb4 100644
--- a/src/arch/arm/fastmodel/GIC/FastModelGIC.py
+++ b/src/arch/arm/fastmodel/GIC/FastModelGIC.py
@@ -90,10 +90,10 @@ class SCFastModelGIC(SystemC_ScModule):
     vPEID_bits = Param.Unsigned(16, "Number of bits of vPEID with GICv4.1.")
     print_mmap = Param.Bool(False, "Print memory map to stdout")
     monolithic = Param.Bool(
-        False, "Indicate that the implementation is not " "distributed"
+        False, "Indicate that the implementation is not distributed"
     )
     direct_lpi_support = Param.Bool(
-        False, "Enable support for LPI " "operations through GICR registers"
+        False, "Enable support for LPI operations through GICR registers"
     )
     cpu_affinities = Param.String(
         "",
@@ -124,42 +124,42 @@ class SCFastModelGIC(SystemC_ScModule):
         "SPI signalling register aliases(0:Disabled)",
     )
     has_two_security_states = Param.Bool(
-        True, "If true, has two security " "states"
+        True, "If true, has two security states"
     )
     DS_fixed_to_zero = Param.Bool(
-        False, "Enable/disable support of single " "security state"
+        False, "Enable/disable support of single security state"
     )
     IIDR = Param.UInt32(0x0, "GICD_IIDR and GICR_IIDR value")
     gicv2_only = Param.Bool(
         False,
-        "If true, when using the GICv3 model, " "pretend to be a GICv2 system",
+        "If true, when using the GICv3 model, pretend to be a GICv2 system",
     )
     STATUSR_implemented = Param.Bool(
-        True, "Determines whether the " "GICR_STATUSR register is implemented."
+        True, "Determines whether the GICR_STATUSR register is implemented."
     )
     priority_bits_implemented = Param.Unsigned(
-        5, "Number of implemented " "priority bits"
+        5, "Number of implemented priority bits"
     )
     itargets_razwi = Param.Bool(
-        False, "If true, the GICD_ITARGETS registers " "are RAZ/WI"
+        False, "If true, the GICD_ITARGETS registers are RAZ/WI"
     )
     icfgr_sgi_mask = Param.UInt32(
-        0x0, "Mask for writes to ICFGR registers " "that configure SGIs"
+        0x0, "Mask for writes to ICFGR registers that configure SGIs"
     )
     icfgr_ppi_mask = Param.UInt32(
-        0xAAAAAAAA, "Mask for writes to ICFGR " "registers that configure PPIs"
+        0xAAAAAAAA, "Mask for writes to ICFGR registers that configure PPIs"
     )
     icfgr_spi_mask = Param.UInt32(
-        0xAAAAAAAA, "Mask for writes to ICFGR " "registers that configure SPIs"
+        0xAAAAAAAA, "Mask for writes to ICFGR registers that configure SPIs"
     )
     icfgr_sgi_reset = Param.UInt32(
-        0xAAAAAAAA, "Reset value for ICFGR " "registers that configure SGIs"
+        0xAAAAAAAA, "Reset value for ICFGR registers that configure SGIs"
     )
     icfgr_ppi_reset = Param.UInt32(
-        0x0, "Reset value for ICFGR regesters " "that configure PPIs"
+        0x0, "Reset value for ICFGR regesters that configure PPIs"
     )
     icfgr_spi_reset = Param.UInt32(
-        0x0, "Reset value for ICFGR regesters " "that configure SPIs"
+        0x0, "Reset value for ICFGR regesters that configure SPIs"
     )
     icfgr_ppi_rsvd_bit = Param.Bool(
         False,
@@ -167,16 +167,16 @@ class SCFastModelGIC(SystemC_ScModule):
         "bits i.e. bit 0,2,4..30 of ICFGRn for n>0",
     )
     igroup_sgi_mask = Param.UInt16(
-        0xFFFF, "Mask for writes to SGI bits in " "IGROUP registers"
+        0xFFFF, "Mask for writes to SGI bits in IGROUP registers"
     )
     igroup_ppi_mask = Param.UInt16(
-        0xFFFF, "Mask for writes to PPI bits in " "IGROUP registers"
+        0xFFFF, "Mask for writes to PPI bits in IGROUP registers"
     )
     igroup_sgi_reset = Param.UInt16(
-        0x0, "Reset value for SGI bits in IGROUP " "registers"
+        0x0, "Reset value for SGI bits in IGROUP registers"
     )
     igroup_ppi_reset = Param.UInt16(
-        0x0, "Reset value for SGI bits in IGROUP " "registers"
+        0x0, "Reset value for SGI bits in IGROUP registers"
     )
     ppi_implemented_mask = Param.UInt16(
         0xFFFF,
@@ -220,16 +220,16 @@ class SCFastModelGIC(SystemC_ScModule):
         "to be instantiated (0=none)",
     )
     its0_base = Param.Addr(
-        0, "Register base address for ITS0 " "(automatic if 0)."
+        0, "Register base address for ITS0 (automatic if 0)."
     )
     its1_base = Param.Addr(
-        0, "Register base address for ITS1 " "(automatic if 0)."
+        0, "Register base address for ITS1 (automatic if 0)."
     )
     its2_base = Param.Addr(
-        0, "Register base address for ITS2 " "(automatic if 0)."
+        0, "Register base address for ITS2 (automatic if 0)."
     )
     its3_base = Param.Addr(
-        0, "Register base address for ITS3 " "(automatic if 0)."
+        0, "Register base address for ITS3 (automatic if 0)."
     )
     gits_pidr = Param.UInt64(
         0x0,
@@ -286,52 +286,52 @@ class SCFastModelGIC(SystemC_ScModule):
         "2 = Virtual Processors; 3 = Physical Processors; 4 = Collections",
     )
     gits_baser0_entry_bytes = Param.Unsigned(
-        8, "Number of bytes required per " "entry for GITS_BASER0 register."
+        8, "Number of bytes required per entry for GITS_BASER0 register."
     )
     gits_baser1_entry_bytes = Param.Unsigned(
-        8, "Number of bytes required per " "entry for GITS_BASER1 register."
+        8, "Number of bytes required per entry for GITS_BASER1 register."
     )
     gits_baser2_entry_bytes = Param.Unsigned(
-        8, "Number of bytes required per " "entry for GITS_BASER2 register."
+        8, "Number of bytes required per entry for GITS_BASER2 register."
     )
     gits_baser3_entry_bytes = Param.Unsigned(
-        8, "Number of bytes required per " "entry for GITS_BASER3 register."
+        8, "Number of bytes required per entry for GITS_BASER3 register."
     )
     gits_baser4_entry_bytes = Param.Unsigned(
-        8, "Number of bytes required per " "entry for GITS_BASER4 register."
+        8, "Number of bytes required per entry for GITS_BASER4 register."
     )
     gits_baser5_entry_bytes = Param.Unsigned(
-        8, "Number of bytes required per " "entry for GITS_BASER5 register."
+        8, "Number of bytes required per entry for GITS_BASER5 register."
     )
     gits_baser6_entry_bytes = Param.Unsigned(
-        8, "Number of bytes required per " "entry for GITS_BASER6 register."
+        8, "Number of bytes required per entry for GITS_BASER6 register."
     )
     gits_baser7_entry_bytes = Param.Unsigned(
-        8, "Number of bytes required per " "entry for GITS_BASER7 register."
+        8, "Number of bytes required per entry for GITS_BASER7 register."
     )
     gits_baser0_indirect_raz = Param.Bool(
-        False, "Indirect field for " "GITS_BASER0 register is RAZ/WI."
+        False, "Indirect field for GITS_BASER0 register is RAZ/WI."
     )
     gits_baser1_indirect_raz = Param.Bool(
-        False, "Indirect field for " "GITS_BASER1 register is RAZ/WI."
+        False, "Indirect field for GITS_BASER1 register is RAZ/WI."
     )
     gits_baser2_indirect_raz = Param.Bool(
-        False, "Indirect field for " "GITS_BASER2 register is RAZ/WI."
+        False, "Indirect field for GITS_BASER2 register is RAZ/WI."
     )
     gits_baser3_indirect_raz = Param.Bool(
-        False, "Indirect field for " "GITS_BASER3 register is RAZ/WI."
+        False, "Indirect field for GITS_BASER3 register is RAZ/WI."
     )
     gits_baser4_indirect_raz = Param.Bool(
-        False, "Indirect field for " "GITS_BASER4 register is RAZ/WI."
+        False, "Indirect field for GITS_BASER4 register is RAZ/WI."
     )
     gits_baser5_indirect_raz = Param.Bool(
-        False, "Indirect field for " "GITS_BASER5 register is RAZ/WI."
+        False, "Indirect field for GITS_BASER5 register is RAZ/WI."
     )
     gits_baser6_indirect_raz = Param.Bool(
-        False, "Indirect field for " "GITS_BASER6 register is RAZ/WI."
+        False, "Indirect field for GITS_BASER6 register is RAZ/WI."
     )
     gits_baser7_indirect_raz = Param.Bool(
-        False, "Indirect field for " "GITS_BASER7 register is RAZ/WI."
+        False, "Indirect field for GITS_BASER7 register is RAZ/WI."
     )
     its_baser_force_page_alignement = Param.Bool(
         True,
@@ -352,25 +352,25 @@ class SCFastModelGIC(SystemC_ScModule):
         "port rather than an AXI4 port).",
     )
     a3_affinity_supported = Param.Bool(
-        False, "Device supports affinity " "level 3 values that are non-zero."
+        False, "Device supports affinity level 3 values that are non-zero."
     )
     SGI_RSS_support = Param.Bool(
-        False, "Device has support for the Range " "Selector feature for SGI"
+        False, "Device has support for the Range Selector feature for SGI"
     )
     gicr_propbaser_read_only = Param.Bool(
-        False, "GICR_PROPBASER register is " "read-only."
+        False, "GICR_PROPBASER register is read-only."
     )
     gicr_propbaser_reset = Param.UInt64(
-        0x0, "Value of GICR_PROPBASER on " "reset."
+        0x0, "Value of GICR_PROPBASER on reset."
     )
     its_device_bits = Param.Unsigned(
-        16, "Number of bits supported for ITS " "device IDs."
+        16, "Number of bits supported for ITS device IDs."
     )
     its_entry_size = Param.Unsigned(
-        8, "Number of bytes required to store " "each entry in the ITT tables."
+        8, "Number of bytes required to store each entry in the ITT tables."
     )
     its_id_bits = Param.Unsigned(
-        16, "Number of interrupt bits supported by " "ITS."
+        16, "Number of interrupt bits supported by ITS."
     )
     its_collection_id_bits = Param.Unsigned(
         0,
@@ -386,7 +386,7 @@ class SCFastModelGIC(SystemC_ScModule):
         "the number supported in memory only. Irrelevant when HCC=0",
     )
     delay_ITS_accesses = Param.Bool(
-        True, "Delay accesses from the ITS until " "GICR_SYNCR is read."
+        True, "Delay accesses from the ITS until GICR_SYNCR is read."
     )
     local_SEIs = Param.Bool(False, "Generate SEI to signal internal issues")
     local_VSEIs = Param.Bool(False, "Generate VSEI to signal internal issues")
@@ -397,10 +397,10 @@ class SCFastModelGIC(SystemC_ScModule):
         "for distributed implementations",
     )
     ITS_hardware_collection_count = Param.Unsigned(
-        0, "Number of hardware " "collections held exclusively in the ITS"
+        0, "Number of hardware collections held exclusively in the ITS"
     )
     ITS_MOVALL_update_collections = Param.Bool(
-        False, "Whether MOVALL command " "updates the collection entires"
+        False, "Whether MOVALL command updates the collection entires"
     )
     ITS_TRANSLATE64R = Param.Bool(
         False,
@@ -409,7 +409,7 @@ class SCFastModelGIC(SystemC_ScModule):
         "interupt[31:0])",
     )
     enable_protocol_checking = Param.Bool(
-        False, "Enable/disable protocol " "checking at cpu interface"
+        False, "Enable/disable protocol checking at cpu interface"
     )
     fixed_routed_spis = Param.String(
         "",
@@ -470,13 +470,13 @@ class SCFastModelGIC(SystemC_ScModule):
     )
     virtual_lpi_support = Param.Bool(
         False,
-        "GICv4 Virtual LPIs and Direct " "injection of Virtual LPIs supported",
+        "GICv4 Virtual LPIs and Direct injection of Virtual LPIs supported",
     )
     virtual_priority_bits = Param.Unsigned(
-        5, "Number of implemented virtual " "priority bits"
+        5, "Number of implemented virtual priority bits"
     )
     LPI_cache_type = Param.Unsigned(
-        1, "Cache type for LPIs, 0:No caching, " "1:Full caching"
+        1, "Cache type for LPIs, 0:No caching, 1:Full caching"
     )
     LPI_cache_check_data = Param.Bool(
         False,
@@ -490,7 +490,7 @@ class SCFastModelGIC(SystemC_ScModule):
     )
     DPG_ARE_only = Param.Bool(
         False,
-        "Limit application of DPG bits to " "interrupt groups for which ARE=1",
+        "Limit application of DPG bits to interrupt groups for which ARE=1",
     )
     ARE_fixed_to_one = Param.Bool(
         False,
@@ -498,7 +498,7 @@ class SCFastModelGIC(SystemC_ScModule):
         "supported and GICD_CTLR.ARE_* is always one",
     )
     legacy_sgi_enable_rao = Param.Bool(
-        False, "Enables for SGI associated " "with an ARE=0 regime are RAO/WI"
+        False, "Enables for SGI associated with an ARE=0 regime are RAO/WI"
     )
     pa_size = Param.Unsigned(48, "Number of valid bits in physical address")
     MSI_IIDR = Param.UInt32(0x0, "Value returned in MSI_IIDR registers.")
@@ -763,7 +763,7 @@ class SCFastModelGIC(SystemC_ScModule):
         "the core fast model.",
     )
     SPI_MBIS = Param.Bool(
-        True, "Distributor supports meassage based " "signaling of SPI"
+        True, "Distributor supports meassage based signaling of SPI"
     )
     SPI_unimplemented = Param.String(
         "",
@@ -772,7 +772,7 @@ class SCFastModelGIC(SystemC_ScModule):
         "'35, 39-42, 73)'",
     )
     irm_razwi = Param.Bool(
-        False, "GICD_IROUTERn.InterruptRoutingMode is " "RAZ/WI"
+        False, "GICD_IROUTERn.InterruptRoutingMode is RAZ/WI"
     )
     common_LPI_configuration = Param.Unsigned(
         0,
@@ -810,7 +810,7 @@ class SCFastModelGIC(SystemC_ScModule):
         "transient loading state when valid=1",
     )
     allow_LPIEN_clear = Param.Bool(
-        False, "Allow RW behaviour on " "GICR_CTLR.LPIEN isntead of set once"
+        False, "Allow RW behaviour on GICR_CTLR.LPIEN isntead of set once"
     )
     GICD_legacy_reg_reserved = Param.Bool(
         False,
diff --git a/src/arch/arm/fastmodel/PL330_DMAC/FastModelPL330.py b/src/arch/arm/fastmodel/PL330_DMAC/FastModelPL330.py
index d6c515cf44..ad43fed237 100644
--- a/src/arch/arm/fastmodel/PL330_DMAC/FastModelPL330.py
+++ b/src/arch/arm/fastmodel/PL330_DMAC/FastModelPL330.py
@@ -87,7 +87,7 @@ class FastModelPL330(SystemC_ScModule):
     cache_lines = Param.UInt32(1, "number of cache lines")
     max_channels = Param.UInt32(8, "virtual channels")
     controller_nsecure = Param.Bool(
-        False, "Controller non-secure at reset " "(boot_manager_ns)"
+        False, "Controller non-secure at reset (boot_manager_ns)"
     )
     irq_nsecure = Param.UInt32(0, "Interrupts non-secure at reset")
     periph_nsecure = Param.Bool(False, "Peripherals non-secure at reset")
diff --git a/src/arch/isa_parser/isa_parser.py b/src/arch/isa_parser/isa_parser.py
index 62f33828a1..aff3c9f63c 100755
--- a/src/arch/isa_parser/isa_parser.py
+++ b/src/arch/isa_parser/isa_parser.py
@@ -133,7 +133,7 @@ class Template(object):
             """
 
             pcstate_decl = (
-                f"{self.parser.namespace}::PCState " "__parserAutoPCState;\n"
+                f"{self.parser.namespace}::PCState __parserAutoPCState;\n"
             )
             myDict["op_decl"] = operands.concatAttrStrings("op_decl")
             if operands.readPC or operands.setPC:
@@ -1098,7 +1098,7 @@ del wrap
     # 'def [signed] bitfield <ID> [<first>:<last>]'
     # This generates a preprocessor macro in the output file.
     def p_def_bitfield_0(self, t):
-        "def_bitfield : DEF opt_signed " "BITFIELD ID LESS INTLIT COLON INTLIT GREATER SEMI"
+        "def_bitfield : DEF opt_signed BITFIELD ID LESS INTLIT COLON INTLIT GREATER SEMI"
         expr = "bits(machInst, %2d, %2d)" % (t[6], t[8])
         if t[2] == "signed":
             expr = "sext<%d>(%s)" % (t[6] - t[8] + 1, expr)
diff --git a/src/cpu/BaseCPU.py b/src/cpu/BaseCPU.py
index 00374d8e54..438d4f45df 100644
--- a/src/cpu/BaseCPU.py
+++ b/src/cpu/BaseCPU.py
@@ -228,7 +228,7 @@ class BaseCPU(ClockedObject):
         else:
             if len(self.isa) != int(self.numThreads):
                 raise RuntimeError(
-                    "Number of ISA instances doesn't " "match thread count"
+                    "Number of ISA instances doesn't match thread count"
                 )
         if len(self.decoder) != 0:
             raise RuntimeError("Decoders should not be set up manually")
diff --git a/src/cpu/minor/BaseMinorCPU.py b/src/cpu/minor/BaseMinorCPU.py
index 7d15ec4cd4..bcdab1bad5 100644
--- a/src/cpu/minor/BaseMinorCPU.py
+++ b/src/cpu/minor/BaseMinorCPU.py
@@ -67,7 +67,7 @@ class MinorOpClassSet(SimObject):
     cxx_class = "gem5::MinorOpClassSet"
 
     opClasses = VectorParam.MinorOpClass(
-        [], "op classes to be matched." "  An empty list means any class"
+        [], "op classes to be matched.  An empty list means any class"
     )
 
 
@@ -83,13 +83,13 @@ class MinorFUTiming(SimObject):
         " (ext_mach_inst & mask) == match",
     )
     suppress = Param.Bool(
-        False, "if true, this inst. is not executed by" " this FU"
+        False, "if true, this inst. is not executed by this FU"
     )
     extraCommitLat = Param.Cycles(
-        0, "extra cycles to stall commit for" " this inst."
+        0, "extra cycles to stall commit for this inst."
     )
     extraCommitLatExpr = Param.TimingExpr(
-        NULL, "extra cycles as a" " run-time evaluated expression"
+        NULL, "extra cycles as a run-time evaluated expression"
     )
     extraAssumedLat = Param.Cycles(
         0,
@@ -109,7 +109,7 @@ class MinorFUTiming(SimObject):
         " class",
     )
     description = Param.String(
-        "", "description string of the decoding/inst." " class"
+        "", "description string of the decoding/inst class"
     )
 
 
@@ -129,11 +129,11 @@ class MinorFU(SimObject):
 
     opClasses = Param.MinorOpClassSet(
         MinorOpClassSet(),
-        "type of operations" " allowed on this functional unit",
+        "type of operations allowed on this functional unit",
     )
     opLat = Param.Cycles(1, "latency in cycles")
     issueLat = Param.Cycles(
-        1, "cycles until another instruction can be" " issued"
+        1, "cycles until another instruction can be issued"
     )
     timings = VectorParam.MinorFUTiming([], "extra decoding rules")
 
diff --git a/src/cpu/o3/BaseO3CPU.py b/src/cpu/o3/BaseO3CPU.py
index 79bd884b87..07d9df6b7f 100644
--- a/src/cpu/o3/BaseO3CPU.py
+++ b/src/cpu/o3/BaseO3CPU.py
@@ -79,47 +79,43 @@ class BaseO3CPU(BaseCPU):
     activity = Param.Unsigned(0, "Initial count")
 
     cacheStorePorts = Param.Unsigned(
-        200, "Cache Ports. " "Constrains stores only."
-    )
-    cacheLoadPorts = Param.Unsigned(
-        200, "Cache Ports. " "Constrains loads only."
+        200, "Cache Ports. Constrains stores only."
     )
+    cacheLoadPorts = Param.Unsigned(200, "Cache Ports. Constrains loads only.")
 
     decodeToFetchDelay = Param.Cycles(1, "Decode to fetch delay")
     renameToFetchDelay = Param.Cycles(1, "Rename to fetch delay")
-    iewToFetchDelay = Param.Cycles(
-        1, "Issue/Execute/Writeback to fetch " "delay"
-    )
+    iewToFetchDelay = Param.Cycles(1, "Issue/Execute/Writeback to fetch delay")
     commitToFetchDelay = Param.Cycles(1, "Commit to fetch delay")
     fetchWidth = Param.Unsigned(8, "Fetch width")
     fetchBufferSize = Param.Unsigned(64, "Fetch buffer size in bytes")
     fetchQueueSize = Param.Unsigned(
-        32, "Fetch queue size in micro-ops " "per-thread"
+        32, "Fetch queue size in micro-ops per-thread"
     )
 
     renameToDecodeDelay = Param.Cycles(1, "Rename to decode delay")
     iewToDecodeDelay = Param.Cycles(
-        1, "Issue/Execute/Writeback to decode " "delay"
+        1, "Issue/Execute/Writeback to decode delay"
     )
     commitToDecodeDelay = Param.Cycles(1, "Commit to decode delay")
     fetchToDecodeDelay = Param.Cycles(1, "Fetch to decode delay")
     decodeWidth = Param.Unsigned(8, "Decode width")
 
     iewToRenameDelay = Param.Cycles(
-        1, "Issue/Execute/Writeback to rename " "delay"
+        1, "Issue/Execute/Writeback to rename delay"
     )
     commitToRenameDelay = Param.Cycles(1, "Commit to rename delay")
     decodeToRenameDelay = Param.Cycles(1, "Decode to rename delay")
     renameWidth = Param.Unsigned(8, "Rename width")
 
     commitToIEWDelay = Param.Cycles(
-        1, "Commit to " "Issue/Execute/Writeback delay"
+        1, "Commit to Issue/Execute/Writeback delay"
     )
     renameToIEWDelay = Param.Cycles(
-        2, "Rename to " "Issue/Execute/Writeback delay"
+        2, "Rename to Issue/Execute/Writeback delay"
     )
     issueToExecuteDelay = Param.Cycles(
-        1, "Issue to execute delay (internal " "to the IEW stage)"
+        1, "Issue to execute delay (internal to the IEW stage)"
     )
     dispatchWidth = Param.Unsigned(8, "Dispatch width")
     issueWidth = Param.Unsigned(8, "Issue width")
@@ -127,7 +123,7 @@ class BaseO3CPU(BaseCPU):
     fuPool = Param.FUPool(DefaultFUPool(), "Functional Unit pool")
 
     iewToCommitDelay = Param.Cycles(
-        1, "Issue/Execute/Writeback to commit " "delay"
+        1, "Issue/Execute/Writeback to commit delay"
     )
     renameToROBDelay = Param.Cycles(1, "Rename to reorder buffer delay")
     commitWidth = Param.Unsigned(8, "Commit width")
@@ -166,13 +162,11 @@ class BaseO3CPU(BaseCPU):
         256, "Number of physical integer registers"
     )
     numPhysFloatRegs = Param.Unsigned(
-        256, "Number of physical floating point " "registers"
-    )
-    numPhysVecRegs = Param.Unsigned(
-        256, "Number of physical vector " "registers"
+        256, "Number of physical floating point registers"
     )
+    numPhysVecRegs = Param.Unsigned(256, "Number of physical vector registers")
     numPhysVecPredRegs = Param.Unsigned(
-        32, "Number of physical predicate " "registers"
+        32, "Number of physical predicate registers"
     )
     # most ISAs don't use condition-code regs, so default is 0
     numPhysCCRegs = Param.Unsigned(0, "Number of physical cc registers")
diff --git a/src/cpu/o3/probe/ElasticTrace.py b/src/cpu/o3/probe/ElasticTrace.py
index ca4fa4ec46..73e632f647 100644
--- a/src/cpu/o3/probe/ElasticTrace.py
+++ b/src/cpu/o3/probe/ElasticTrace.py
@@ -44,10 +44,10 @@ class ElasticTrace(ProbeListenerObject):
     # Trace files for the following params are created in the output directory.
     # User is forced to provide these when an instance of this class is created.
     instFetchTraceFile = Param.String(
-        desc="Protobuf trace file name for " "instruction fetch tracing"
+        desc="Protobuf trace file name for instruction fetch tracing"
     )
     dataDepTraceFile = Param.String(
-        desc="Protobuf trace file name for " "data dependency tracing"
+        desc="Protobuf trace file name for data dependency tracing"
     )
     # The dependency window size param must be equal to or greater than the
     # number of entries in the O3CPU ROB, a typical value is 3 times ROB size
@@ -66,5 +66,5 @@ class ElasticTrace(ProbeListenerObject):
     )
     # Whether to trace virtual addresses for memory accesses
     traceVirtAddr = Param.Bool(
-        False, "Set to true if virtual addresses are " "to be traced."
+        False, "Set to true if virtual addresses are to be traced."
     )
diff --git a/src/cpu/testers/memtest/MemTest.py b/src/cpu/testers/memtest/MemTest.py
index e8492b5402..24bd974804 100644
--- a/src/cpu/testers/memtest/MemTest.py
+++ b/src/cpu/testers/memtest/MemTest.py
@@ -70,7 +70,7 @@ class MemTest(ClockedObject):
         1000000, "Progress report interval (in accesses)"
     )
     progress_check = Param.Cycles(
-        5000000, "Cycles before exiting " "due to lack of progress"
+        5000000, "Cycles before exiting due to lack of progress"
     )
 
     port = RequestPort("Port to the memory system")
@@ -79,5 +79,5 @@ class MemTest(ClockedObject):
     # Add the ability to supress error responses on functional
     # accesses as Ruby needs this
     suppress_func_errors = Param.Bool(
-        False, "Suppress panic when " "functional accesses fail."
+        False, "Suppress panic when functional accesses fail."
     )
diff --git a/src/cpu/testers/rubytest/RubyTester.py b/src/cpu/testers/rubytest/RubyTester.py
index 3fabece8c4..a90cfe1f82 100644
--- a/src/cpu/testers/rubytest/RubyTester.py
+++ b/src/cpu/testers/rubytest/RubyTester.py
@@ -38,7 +38,7 @@ class RubyTester(ClockedObject):
 
     num_cpus = Param.Int("number of cpus / RubyPorts")
     cpuInstDataPort = VectorRequestPort(
-        "cpu combo ports to inst & " "data caches"
+        "cpu combo ports to inst & data caches"
     )
     cpuInstPort = VectorRequestPort("cpu ports to only inst caches")
     cpuDataPort = VectorRequestPort("cpu ports to only data caches")
diff --git a/src/cpu/testers/traffic_gen/BaseTrafficGen.py b/src/cpu/testers/traffic_gen/BaseTrafficGen.py
index b8de198f9a..0d9146756d 100644
--- a/src/cpu/testers/traffic_gen/BaseTrafficGen.py
+++ b/src/cpu/testers/traffic_gen/BaseTrafficGen.py
@@ -82,7 +82,7 @@ class BaseTrafficGen(ClockedObject):
     # progress for a long period of time. The default value is
     # somewhat arbitrary and may well have to be tuned.
     progress_check = Param.Latency(
-        "1ms", "Time before exiting " "due to lack of progress"
+        "1ms", "Time before exiting due to lack of progress"
     )
 
     # Generator type used for applying Stream and/or Substream IDs to requests
diff --git a/src/cpu/testers/traffic_gen/GUPSGen.py b/src/cpu/testers/traffic_gen/GUPSGen.py
index 31b5ed3e10..6b8b3f72df 100644
--- a/src/cpu/testers/traffic_gen/GUPSGen.py
+++ b/src/cpu/testers/traffic_gen/GUPSGen.py
@@ -58,11 +58,11 @@ class GUPSGen(ClockedObject):
     )
 
     update_limit = Param.Int(
-        0, "The number of updates to issue before the" " simulation is over"
+        0, "The number of updates to issue before the simulation is over"
     )
 
     request_queue_size = Param.Int(
-        1024, "Maximum number of parallel" " outstanding requests"
+        1024, "Maximum number of parallel outstanding requests"
     )
 
     init_memory = Param.Bool(
diff --git a/src/cpu/trace/TraceCPU.py b/src/cpu/trace/TraceCPU.py
index e2dc1db6c5..1be16518d7 100644
--- a/src/cpu/trace/TraceCPU.py
+++ b/src/cpu/trace/TraceCPU.py
@@ -64,7 +64,7 @@ class TraceCPU(BaseCPU):
     instTraceFile = Param.String("", "Instruction trace file")
     dataTraceFile = Param.String("", "Data dependency trace file")
     sizeStoreBuffer = Param.Unsigned(
-        16, "Number of entries in the store " "buffer"
+        16, "Number of entries in the store buffer"
     )
     sizeLoadBuffer = Param.Unsigned(16, "Number of entries in the load buffer")
     sizeROB = Param.Unsigned(40, "Number of entries in the re-order buffer")
@@ -74,13 +74,13 @@ class TraceCPU(BaseCPU):
     # changed when frequency is scaled. A default value of 1.0 means the same
     # frequency as was used for generating the traces.
     freqMultiplier = Param.Float(
-        1.0, "Multiplier scale the Trace CPU " "frequency up or down"
+        1.0, "Multiplier scale the Trace CPU frequency up or down"
     )
 
     # Enable exiting when any one Trace CPU completes execution which is set to
     # false by default
     enableEarlyExit = Param.Bool(
-        False, "Exit when any one Trace CPU " "completes execution"
+        False, "Exit when any one Trace CPU completes execution"
     )
 
     # If progress msg interval is set to a non-zero value, it is treated as
diff --git a/src/dev/amdgpu/AMDGPU.py b/src/dev/amdgpu/AMDGPU.py
index c834d3be92..f9d953fc57 100644
--- a/src/dev/amdgpu/AMDGPU.py
+++ b/src/dev/amdgpu/AMDGPU.py
@@ -73,7 +73,7 @@ class AMDGPUDevice(PciDevice):
     rom_binary = Param.String("ROM binary dumped from hardware")
     trace_file = Param.String("MMIO trace collected on hardware")
     checkpoint_before_mmios = Param.Bool(
-        False, "Take a checkpoint before the" " device begins sending MMIOs"
+        False, "Take a checkpoint before the device begins sending MMIOs"
     )
 
     # Specific to Vega10: Vega10 has two SDMA engines these do not have any
diff --git a/src/dev/arm/RealView.py b/src/dev/arm/RealView.py
index 41e1bcfbb4..0009842771 100644
--- a/src/dev/arm/RealView.py
+++ b/src/dev/arm/RealView.py
@@ -117,7 +117,7 @@ class AmbaDmaDevice(DmaDevice):
     pio_addr = Param.Addr("Address for AMBA responder interface")
     pio_latency = Param.Latency(
         "10ns",
-        "Time between action and write/read" "result by AMBA DMA Device",
+        "Time between action and write/readresult by AMBA DMA Device",
     )
     interrupt = Param.ArmInterruptPin("Interrupt that connects to GIC")
     amba_id = Param.UInt32("ID of AMBA device for kernel detection")
@@ -634,17 +634,17 @@ class HDLcd(AmbaDmaDevice):
     cxx_header = "dev/arm/hdlcd.hh"
     cxx_class = "gem5::HDLcd"
     vnc = Param.VncInput(
-        Parent.any, "Vnc server for remote frame buffer " "display"
+        Parent.any, "Vnc server for remote frame buffer display"
     )
     amba_id = 0x00141000
     workaround_swap_rb = Param.Bool(
-        False, "Workaround incorrect color " "selector order in some kernels"
+        False, "Workaround incorrect color selector order in some kernels"
     )
     workaround_dma_line_count = Param.Bool(
-        True, "Workaround incorrect " "DMA line count (off by 1)"
+        True, "Workaround incorrect DMA line count (off by 1)"
     )
     enable_capture = Param.Bool(
-        True, "capture frame to " "system.framebuffer.{extension}"
+        True, "capture frame to system.framebuffer.{extension}"
     )
     frame_format = Param.ImageFormat(
         "Auto", "image format of the captured frame"
@@ -655,7 +655,7 @@ class HDLcd(AmbaDmaDevice):
     pxl_clk = Param.ClockDomain("Pixel clock source")
     pixel_chunk = Param.Unsigned(32, "Number of pixels to handle in one batch")
     virt_refresh_rate = Param.Frequency(
-        "20Hz", "Frame refresh rate " "in KVM mode"
+        "20Hz", "Frame refresh rate in KVM mode"
     )
     _status = "ok"
 
diff --git a/src/dev/arm/SMMUv3.py b/src/dev/arm/SMMUv3.py
index 415eccd742..a1992ecd63 100644
--- a/src/dev/arm/SMMUv3.py
+++ b/src/dev/arm/SMMUv3.py
@@ -48,13 +48,13 @@ class SMMUv3DeviceInterface(ClockedObject):
     device_port = ResponsePort("Device port")
     slave = DeprecatedParam(device_port, "`slave` is now called `device_port`")
     ats_mem_side_port = RequestPort(
-        "ATS mem side port," "sends requests and receives responses"
+        "ATS mem side port,sends requests and receives responses"
     )
     ats_master = DeprecatedParam(
         ats_mem_side_port, "`ats_master` is now called `ats_mem_side_port`"
     )
     ats_dev_side_port = ResponsePort(
-        "ATS dev_side_port," "sends responses and receives requests"
+        "ATS dev_side_port,sends responses and receives requests"
     )
     ats_slave = DeprecatedParam(
         ats_dev_side_port, "`ats_slave` is now called `ats_dev_side_port`"
diff --git a/src/dev/net/Ethernet.py b/src/dev/net/Ethernet.py
index 97da54c118..608f25b617 100644
--- a/src/dev/net/Ethernet.py
+++ b/src/dev/net/Ethernet.py
@@ -107,7 +107,7 @@ class EtherSwitch(SimObject):
 
     dump = Param.EtherDump(NULL, "dump object")
     fabric_speed = Param.NetworkBandwidth(
-        "10Gbps", "switch fabric speed in " "bits per second"
+        "10Gbps", "switch fabric speed in bits per second"
     )
     interface = VectorEtherInt("Ethernet Interface")
     output_buffer_size = Param.MemorySize(
diff --git a/src/dev/pci/PciHost.py b/src/dev/pci/PciHost.py
index ef8a5ab1f5..007b17a30c 100644
--- a/src/dev/pci/PciHost.py
+++ b/src/dev/pci/PciHost.py
@@ -57,7 +57,7 @@ class GenericPciHost(PciHost):
     conf_base = Param.Addr("Config space base address")
     conf_size = Param.Addr("Config space base address")
     conf_device_bits = Param.UInt8(
-        8, "Number of bits used to as an " "offset a devices address space"
+        8, "Number of bits used to as an offset a devices address space"
     )
 
     pci_pio_base = Param.Addr(0, "Base address for PCI IO accesses")
diff --git a/src/dev/serial/Uart.py b/src/dev/serial/Uart.py
index f3348d6775..2ca68b8f12 100644
--- a/src/dev/serial/Uart.py
+++ b/src/dev/serial/Uart.py
@@ -61,7 +61,7 @@ class SimpleUart(Uart):
     byte_order = Param.ByteOrder("little", "Device byte order")
     pio_size = Param.Addr(0x4, "Size of address range")
     end_on_eot = Param.Bool(
-        False, "End the simulation when a EOT is " "received on the UART"
+        False, "End the simulation when a EOT is received on the UART"
     )
 
 
diff --git a/src/gpu-compute/GPU.py b/src/gpu-compute/GPU.py
index 0fdc0b75a7..3a87186a30 100644
--- a/src/gpu-compute/GPU.py
+++ b/src/gpu-compute/GPU.py
@@ -115,7 +115,7 @@ class Wavefront(SimObject):
     wf_size = Param.Int(64, "Wavefront size (in work items)")
     max_ib_size = Param.Int(
         13,
-        "Maximum size (in number of insts) of the " "instruction buffer (IB).",
+        "Maximum size (in number of insts) of the instruction buffer (IB).",
     )
 
 
@@ -134,32 +134,32 @@ class ComputeUnit(ClockedObject):
     num_SIMDs = Param.Int(4, "number of SIMD units per CU")
     num_scalar_cores = Param.Int(1, "number of Scalar cores per CU")
     num_scalar_mem_pipes = Param.Int(
-        1, "number of Scalar memory pipelines " "per CU"
+        1, "number of Scalar memory pipelines per CU"
     )
     simd_width = Param.Int(16, "width (number of lanes) per SIMD unit")
 
     operand_network_length = Param.Int(
-        1, "number of pipe stages of operand " "network"
+        1, "number of pipe stages of operand network"
     )
 
     spbypass_pipe_length = Param.Int(
-        4, "vector ALU Single Precision bypass " "latency"
+        4, "vector ALU Single Precision bypass latency"
     )
 
     dpbypass_pipe_length = Param.Int(
-        4, "vector ALU Double Precision bypass " "latency"
+        4, "vector ALU Double Precision bypass latency"
     )
     scalar_pipe_length = Param.Int(1, "number of pipe stages per scalar ALU")
     issue_period = Param.Int(4, "number of cycles per issue period")
 
     vrf_gm_bus_latency = Param.Int(
-        1, "number of cycles per use of VRF to " "GM bus"
+        1, "number of cycles per use of VRF to GM bus"
     )
     srf_scm_bus_latency = Param.Int(
-        1, "number of cycles per use of SRF " "to Scalar Mem bus"
+        1, "number of cycles per use of SRF to Scalar Mem bus"
     )
     vrf_lm_bus_latency = Param.Int(
-        1, "number of cycles per use of VRF to " "LM bus"
+        1, "number of cycles per use of VRF to LM bus"
     )
 
     num_global_mem_pipes = Param.Int(1, "number of global memory pipes per CU")
@@ -194,10 +194,10 @@ class ComputeUnit(ClockedObject):
     system = Param.System(Parent.any, "system object")
     cu_id = Param.Int("CU id")
     vrf_to_coalescer_bus_width = Param.Int(
-        64, "VRF->Coalescer data bus " "width in bytes"
+        64, "VRF->Coalescer data bus width in bytes"
     )
     coalescer_to_vrf_bus_width = Param.Int(
-        64, "Coalescer->VRF data bus " "width  in bytes"
+        64, "Coalescer->VRF data bus width  in bytes"
     )
 
     memory_port = VectorRequestPort("Port to the memory system")
@@ -211,7 +211,7 @@ class ComputeUnit(ClockedObject):
     perLaneTLB = Param.Bool(False, "enable per-lane TLB")
     prefetch_depth = Param.Int(
         0,
-        "Number of prefetches triggered at a time" "(0 turns off prefetching)",
+        "Number of prefetches triggered at a time(0 turns off prefetching)",
     )
     prefetch_stride = Param.Int(1, "Fixed Prefetch Stride (1 means next-page)")
     prefetch_prev_type = Param.PrefetchType(
@@ -225,24 +225,24 @@ class ComputeUnit(ClockedObject):
     functionalTLB = Param.Bool(False, "Assume TLB causes no delay")
 
     localMemBarrier = Param.Bool(
-        False, "Assume Barriers do not wait on " "kernel end"
+        False, "Assume Barriers do not wait on kernel end"
     )
 
     countPages = Param.Bool(
         False,
-        "Generate per-CU file of all pages " "touched and how many times",
+        "Generate per-CU file of all pages touched and how many times",
     )
     scalar_mem_queue_size = Param.Int(
-        32, "Number of entries in scalar " "memory pipeline's queues"
+        32, "Number of entries in scalar memory pipeline's queues"
     )
     global_mem_queue_size = Param.Int(
-        256, "Number of entries in the global " "memory pipeline's queues"
+        256, "Number of entries in the global memory pipeline's queues"
     )
     local_mem_queue_size = Param.Int(
-        256, "Number of entries in the local " "memory pipeline's queues"
+        256, "Number of entries in the local memory pipeline's queues"
     )
     max_wave_requests = Param.Int(
-        64, "number of pending vector memory " "requests per wavefront"
+        64, "number of pending vector memory requests per wavefront"
     )
     max_cu_tokens = Param.Int(
         4,
@@ -254,18 +254,18 @@ class ComputeUnit(ClockedObject):
     localDataStore = Param.LdsState("the LDS for this CU")
 
     vector_register_file = VectorParam.VectorRegisterFile(
-        "Vector register " "file"
+        "Vector register file"
     )
 
     scalar_register_file = VectorParam.ScalarRegisterFile(
-        "Scalar register " "file"
+        "Scalar register file"
     )
     out_of_order_data_delivery = Param.Bool(
-        False, "enable OoO data delivery" " in the GM pipeline"
+        False, "enable OoO data delivery in the GM pipeline"
     )
     register_manager = Param.RegisterManager("Register Manager")
     fetch_depth = Param.Int(
-        2, "number of i-cache lines that may be " "buffered in the fetch unit."
+        2, "number of i-cache lines that may be buffered in the fetch unit."
     )
 
 
diff --git a/src/gpu-compute/LdsState.py b/src/gpu-compute/LdsState.py
index 637cf11264..c81859331c 100644
--- a/src/gpu-compute/LdsState.py
+++ b/src/gpu-compute/LdsState.py
@@ -41,7 +41,7 @@ class LdsState(ClockedObject):
     size = Param.Int(65536, "the size of the LDS")
     range = Param.AddrRange("64kB", "address space of the LDS")
     bankConflictPenalty = Param.Int(
-        1, "penalty per LDS bank conflict when " "accessing data"
+        1, "penalty per LDS bank conflict when accessing data"
     )
     banks = Param.Int(32, "Number of LDS banks")
     cuPort = ResponsePort("port that goes to the compute unit")
diff --git a/src/learning_gem5/part2/HelloObject.py b/src/learning_gem5/part2/HelloObject.py
index 07ffd01c06..6b9aa8f811 100644
--- a/src/learning_gem5/part2/HelloObject.py
+++ b/src/learning_gem5/part2/HelloObject.py
@@ -36,7 +36,7 @@ class HelloObject(SimObject):
 
     time_to_wait = Param.Latency("Time before firing the event")
     number_of_fires = Param.Int(
-        1, "Number of times to fire the event before " "goodbye"
+        1, "Number of times to fire the event before goodbye"
     )
 
     goodbye_object = Param.GoodbyeObject("A goodbye object")
@@ -51,5 +51,5 @@ class GoodbyeObject(SimObject):
         "1kB", "Size of buffer to fill with goodbye"
     )
     write_bandwidth = Param.MemoryBandwidth(
-        "100MB/s", "Bandwidth to fill " "the buffer"
+        "100MB/s", "Bandwidth to fill the buffer"
     )
diff --git a/src/mem/AddrMapper.py b/src/mem/AddrMapper.py
index 1897236526..932fbf14e1 100644
--- a/src/mem/AddrMapper.py
+++ b/src/mem/AddrMapper.py
@@ -50,13 +50,13 @@ class AddrMapper(SimObject):
 
     # one port in each direction
     mem_side_port = RequestPort(
-        "This port sends requests and " "receives responses"
+        "This port sends requests and receives responses"
     )
     master = DeprecatedParam(
         mem_side_port, "`master` is now called `mem_side_port`"
     )
     cpu_side_port = ResponsePort(
-        "This port receives requests and " "sends responses"
+        "This port receives requests and sends responses"
     )
     slave = DeprecatedParam(
         cpu_side_port, "`slave` is now called `cpu_side_port`"
diff --git a/src/mem/Bridge.py b/src/mem/Bridge.py
index a82f410d56..8131d62ef8 100644
--- a/src/mem/Bridge.py
+++ b/src/mem/Bridge.py
@@ -46,13 +46,13 @@ class Bridge(ClockedObject):
     cxx_class = "gem5::Bridge"
 
     mem_side_port = RequestPort(
-        "This port sends requests and " "receives responses"
+        "This port sends requests and receives responses"
     )
     master = DeprecatedParam(
         mem_side_port, "`master` is now called `mem_side_port`"
     )
     cpu_side_port = ResponsePort(
-        "This port receives requests and " "sends responses"
+        "This port receives requests and sends responses"
     )
     slave = DeprecatedParam(
         cpu_side_port, "`slave` is now called `cpu_side_port`"
diff --git a/src/mem/CommMonitor.py b/src/mem/CommMonitor.py
index 288aeb5a07..ab946f1e91 100644
--- a/src/mem/CommMonitor.py
+++ b/src/mem/CommMonitor.py
@@ -49,13 +49,13 @@ class CommMonitor(SimObject):
 
     # one port in each direction
     mem_side_port = RequestPort(
-        "This port sends requests and " "receives responses"
+        "This port sends requests and receives responses"
     )
     master = DeprecatedParam(
         mem_side_port, "`master` is now called `mem_side_port`"
     )
     cpu_side_port = ResponsePort(
-        "This port receives requests and " "sends responses"
+        "This port receives requests and sends responses"
     )
     slave = DeprecatedParam(
         cpu_side_port, "`slave` is now called `cpu_side_port`"
@@ -70,10 +70,10 @@ class CommMonitor(SimObject):
 
     # histogram of burst length of packets (not using sample period)
     burst_length_bins = Param.Unsigned(
-        "20", "# bins in burst length " "histograms"
+        "20", "# bins in burst length histograms"
     )
     disable_burst_length_hists = Param.Bool(
-        False, "Disable burst length " "histograms"
+        False, "Disable burst length histograms"
     )
 
     # bandwidth per sample period
@@ -95,18 +95,18 @@ class CommMonitor(SimObject):
     # outstanding requests (that did not yet get a response) per
     # sample period
     outstanding_bins = Param.Unsigned(
-        "20", "# bins in outstanding " "requests histograms"
+        "20", "# bins in outstanding requests histograms"
     )
     disable_outstanding_hists = Param.Bool(
-        False, "Disable outstanding " "requests histograms"
+        False, "Disable outstanding requests histograms"
     )
 
     # transactions (requests) observed per sample period
     transaction_bins = Param.Unsigned(
-        "20", "# bins in transaction " "count histograms"
+        "20", "# bins in transaction count histograms"
     )
     disable_transaction_hists = Param.Bool(
-        False, "Disable transaction count " "histograms"
+        False, "Disable transaction count histograms"
     )
 
     # address distributions (heatmaps) with associated address masks
diff --git a/src/mem/DRAMInterface.py b/src/mem/DRAMInterface.py
index 87bc11b94f..9c041e5838 100644
--- a/src/mem/DRAMInterface.py
+++ b/src/mem/DRAMInterface.py
@@ -57,7 +57,7 @@ class DRAMInterface(MemInterface):
 
     # enforce a limit on the number of accesses per row
     max_accesses_per_row = Param.Unsigned(
-        16, "Max accesses per row before " "closing"
+        16, "Max accesses per row before closing"
     )
 
     # default to 0 bank groups per rank, indicating bank group architecture
@@ -139,7 +139,7 @@ class DRAMInterface(MemInterface):
     # write-to-read, same rank turnaround penalty for same bank group
     tWTR_L = Param.Latency(
         Self.tWTR,
-        "Write to read, same rank switching " "time, same bank group",
+        "Write to read, same rank switching time, same bank group",
     )
 
     # minimum precharge to precharge delay time
diff --git a/src/mem/DRAMsim3.py b/src/mem/DRAMsim3.py
index 0da9c1067b..40f61608d8 100644
--- a/src/mem/DRAMsim3.py
+++ b/src/mem/DRAMsim3.py
@@ -44,11 +44,11 @@ class DRAMsim3(AbstractMemory):
 
     # A single port for now
     port = ResponsePort(
-        "port for receiving requests from" "the CPU or other requestor"
+        "port for receiving requests fromthe CPU or other requestor"
     )
 
     configFile = Param.String(
-        "ext/dramsim3/DRAMsim3/configs/" "DDR4_8Gb_x8_2400.ini",
+        "ext/dramsim3/DRAMsim3/configs/DDR4_8Gb_x8_2400.ini",
         "The configuration file to use with DRAMSim3",
     )
     filePath = Param.String(
diff --git a/src/mem/ExternalSlave.py b/src/mem/ExternalSlave.py
index ce2d5b8b36..b7cd9b21f3 100644
--- a/src/mem/ExternalSlave.py
+++ b/src/mem/ExternalSlave.py
@@ -45,7 +45,7 @@ class ExternalSlave(SimObject):
     port = SlavePort("Slave port")
 
     addr_ranges = VectorParam.AddrRange(
-        [], "Addresses served by" " this port's external agent"
+        [], "Addresses served by this port's external agent"
     )
 
     port_type = Param.String(
diff --git a/src/mem/MemChecker.py b/src/mem/MemChecker.py
index 9fc0d7c59c..fcee653265 100644
--- a/src/mem/MemChecker.py
+++ b/src/mem/MemChecker.py
@@ -51,16 +51,16 @@ class MemCheckerMonitor(SimObject):
 
     # one port in each direction
     mem_side_port = RequestPort(
-        "This port sends requests and receives " "responses"
+        "This port sends requests and receives responses"
     )
     master = DeprecatedParam(
-        mem_side_port, "`master` is now called " "`mem_side_port`"
+        mem_side_port, "`master` is now called `mem_side_port`"
     )
     cpu_side_port = ResponsePort(
-        "This port receives requests and sends " "responses"
+        "This port receives requests and sends responses"
     )
     slave = DeprecatedParam(
-        cpu_side_port, "`slave` is now called " "`cpu_side_port`"
+        cpu_side_port, "`slave` is now called `cpu_side_port`"
     )
     warn_only = Param.Bool(False, "Warn about violations only")
     memchecker = Param.MemChecker("Instance shared with other monitors")
diff --git a/src/mem/MemCtrl.py b/src/mem/MemCtrl.py
index c8acd22ed9..549616ccba 100644
--- a/src/mem/MemCtrl.py
+++ b/src/mem/MemCtrl.py
@@ -63,7 +63,7 @@ class MemCtrl(QoSMemCtrl):
 
     # Interface to memory media
     dram = Param.MemInterface(
-        "Memory interface, can be a DRAM" "or an NVM interface "
+        "Memory interface, can be a DRAMor an NVM interface "
     )
 
     # read and write buffer depths are set in the interface
@@ -79,12 +79,12 @@ class MemCtrl(QoSMemCtrl):
 
     # minimum write bursts to schedule before switching back to reads
     min_writes_per_switch = Param.Unsigned(
-        16, "Minimum write bursts before " "switching to reads"
+        16, "Minimum write bursts before switching to reads"
     )
 
     # minimum read bursts to schedule before switching back to writes
     min_reads_per_switch = Param.Unsigned(
-        16, "Minimum read bursts before " "switching to writes"
+        16, "Minimum read bursts before switching to writes"
     )
 
     # scheduler, address map and page policy
diff --git a/src/mem/MemDelay.py b/src/mem/MemDelay.py
index 430ffb77f5..eb4aaa7bf1 100644
--- a/src/mem/MemDelay.py
+++ b/src/mem/MemDelay.py
@@ -44,13 +44,13 @@ class MemDelay(ClockedObject):
     abstract = True
 
     mem_side_port = RequestPort(
-        "This port sends requests and " "receives responses"
+        "This port sends requests and receives responses"
     )
     master = DeprecatedParam(
         mem_side_port, "`master` is now called `mem_side_port`"
     )
     cpu_side_port = ResponsePort(
-        "This port receives requests and " "sends responses"
+        "This port receives requests and sends responses"
     )
     slave = DeprecatedParam(
         cpu_side_port, "`slave` is now called `cpu_side_port`"
diff --git a/src/mem/MemInterface.py b/src/mem/MemInterface.py
index a32a3b5ec9..60bf99bf47 100644
--- a/src/mem/MemInterface.py
+++ b/src/mem/MemInterface.py
@@ -73,11 +73,11 @@ class MemInterface(AbstractMemory):
     device_size = Param.MemorySize("Size of memory device")
     # the physical organisation of the memory
     device_bus_width = Param.Unsigned(
-        "data bus width in bits for each " "memory device/chip"
+        "data bus width in bits for each memory device/chip"
     )
     burst_length = Param.Unsigned("Burst lenght (BL) in beats")
     device_rowbuffer_size = Param.MemorySize(
-        "Page (row buffer) size per " "device/chip"
+        "Page (row buffer) size per device/chip"
     )
     devices_per_rank = Param.Unsigned("Number of devices/chips per rank")
     ranks_per_channel = Param.Unsigned("Number of ranks per channel")
@@ -96,7 +96,7 @@ class MemInterface(AbstractMemory):
     # Read/Write requests with data size larger than one full burst are broken
     # down into multiple requests in the controller
     tBURST = Param.Latency(
-        "Burst duration " "(typically burst length / 2 cycles)"
+        "Burst duration (typically burst length / 2 cycles)"
     )
 
     # write-to-read, same rank turnaround penalty
diff --git a/src/mem/PortTerminator.py b/src/mem/PortTerminator.py
index 05fdd1177d..85d0af0771 100644
--- a/src/mem/PortTerminator.py
+++ b/src/mem/PortTerminator.py
@@ -35,8 +35,8 @@ class PortTerminator(SimObject):
     cxx_class = "gem5::PortTerminator"
 
     req_ports = VectorRequestPort(
-        "Vector port for connecting terminating " "response ports."
+        "Vector port for connecting terminating response ports."
     )
     resp_ports = VectorResponsePort(
-        "Vector port for terminating " "request ports."
+        "Vector port for terminating request ports."
     )
diff --git a/src/mem/SerialLink.py b/src/mem/SerialLink.py
index a40b714258..6b767050d6 100644
--- a/src/mem/SerialLink.py
+++ b/src/mem/SerialLink.py
@@ -50,13 +50,13 @@ class SerialLink(ClockedObject):
     cxx_class = "gem5::SerialLink"
 
     mem_side_port = RequestPort(
-        "This port sends requests and " "receives responses"
+        "This port sends requests and receives responses"
     )
     master = DeprecatedParam(
         mem_side_port, "`master` is now called `mem_side_port`"
     )
     cpu_side_port = ResponsePort(
-        "This port receives requests and " "sends responses"
+        "This port receives requests and sends responses"
     )
     slave = DeprecatedParam(
         cpu_side_port, "`slave` is now called `cpu_side_port`"
@@ -71,7 +71,7 @@ class SerialLink(ClockedObject):
     #  link belongs to and the number of lanes:
     num_lanes = Param.Unsigned(
         1,
-        "Number of parallel lanes inside the serial" "link. (aka. lane width)",
+        "Number of parallel lanes inside the seriallink. (aka. lane width)",
     )
     link_speed = Param.UInt64(
         1,
diff --git a/src/mem/XBar.py b/src/mem/XBar.py
index dbadccb861..d0becc22a8 100644
--- a/src/mem/XBar.py
+++ b/src/mem/XBar.py
@@ -51,13 +51,13 @@ class BaseXBar(ClockedObject):
     cxx_class = "gem5::BaseXBar"
 
     cpu_side_ports = VectorResponsePort(
-        "Vector port for connecting " "mem side ports"
+        "Vector port for connecting mem side ports"
     )
     slave = DeprecatedParam(
         cpu_side_ports, "`slave` is now called `cpu_side_ports`"
     )
     mem_side_ports = VectorRequestPort(
-        "Vector port for connecting " "cpu side ports"
+        "Vector port for connecting cpu side ports"
     )
     master = DeprecatedParam(
         mem_side_ports, "`master` is now called `mem_side_ports`"
@@ -98,7 +98,7 @@ class BaseXBar(ClockedObject):
     # a two-level hierarchical lookup. This is useful e.g. for the PCI
     # xbar configuration.
     use_default_range = Param.Bool(
-        False, "Perform address mapping for " "the default port"
+        False, "Perform address mapping for the default port"
     )
 
 
@@ -130,12 +130,12 @@ class CoherentXBar(BaseXBar):
     # already committed to responding, by establishing if the crossbar
     # is the point of coherency or not.
     point_of_coherency = Param.Bool(
-        False, "Consider this crossbar the " "point of coherency"
+        False, "Consider this crossbar the point of coherency"
     )
 
     # Specify whether this crossbar is the point of unification.
     point_of_unification = Param.Bool(
-        False, "Consider this crossbar the " "point of unification"
+        False, "Consider this crossbar the point of unification"
     )
 
     system = Param.System(Parent.any, "System that the crossbar belongs to.")
diff --git a/src/mem/cache/Cache.py b/src/mem/cache/Cache.py
index 1dfab1957f..49665dde91 100644
--- a/src/mem/cache/Cache.py
+++ b/src/mem/cache/Cache.py
@@ -61,10 +61,10 @@ class WriteAllocator(SimObject):
     # allow whole-line write coalescing, and eventually switches to a
     # write-no-allocate policy.
     coalesce_limit = Param.Unsigned(
-        2, "Consecutive lines written before " "delaying for coalescing"
+        2, "Consecutive lines written before delaying for coalescing"
     )
     no_allocate_limit = Param.Unsigned(
-        12, "Consecutive lines written before" " skipping allocation"
+        12, "Consecutive lines written before skipping allocation"
     )
 
     delay_threshold = Param.Unsigned(
@@ -129,7 +129,7 @@ class BaseCache(ClockedObject):
     # co-allocatable with another existing entry of the same superblock,
     # so try move the block to co-allocate it
     move_contractions = Param.Bool(
-        True, "Try to co-allocate blocks that " "contract"
+        True, "Try to co-allocate blocks that contract"
     )
 
     sequential_access = Param.Bool(
diff --git a/src/mem/cache/compressors/Compressors.py b/src/mem/cache/compressors/Compressors.py
index c8f82c55a1..eef5f77a18 100644
--- a/src/mem/cache/compressors/Compressors.py
+++ b/src/mem/cache/compressors/Compressors.py
@@ -232,7 +232,7 @@ class FrequentValuesCompressor(BaseCacheCompressor):
     )
     check_saturation = Param.Bool(
         False,
-        "Whether the counters should be " "manipulated in case of saturation.",
+        "Whether the counters should be manipulated in case of saturation.",
     )
 
     vft_assoc = Param.Int(16, "Associativity of the VFT.")
diff --git a/src/mem/cache/tags/Tags.py b/src/mem/cache/tags/Tags.py
index 4e7f632bfb..ade187fa39 100644
--- a/src/mem/cache/tags/Tags.py
+++ b/src/mem/cache/tags/Tags.py
@@ -139,7 +139,7 @@ class FALRU(BaseTags):
     cxx_class = "gem5::FALRU"
 
     min_tracked_cache_size = Param.MemorySize(
-        "128KiB", "Minimum cache size" " for which we track statistics"
+        "128KiB", "Minimum cache size for which we track statistics"
     )
 
     # This tag uses its own embedded indexing
diff --git a/src/mem/probes/StackDistProbe.py b/src/mem/probes/StackDistProbe.py
index 2a9550bea5..5b44d9d333 100644
--- a/src/mem/probes/StackDistProbe.py
+++ b/src/mem/probes/StackDistProbe.py
@@ -44,7 +44,7 @@ class StackDistProbe(BaseMemProbe):
     cxx_class = "gem5::StackDistProbe"
 
     system = Param.System(
-        Parent.any, "System to use when determining system cache " "line size"
+        Parent.any, "System to use when determining system cache line size"
     )
 
     line_size = Param.Unsigned(
diff --git a/src/mem/ruby/slicc_interface/Controller.py b/src/mem/ruby/slicc_interface/Controller.py
index 185812a044..42447f1cca 100644
--- a/src/mem/ruby/slicc_interface/Controller.py
+++ b/src/mem/ruby/slicc_interface/Controller.py
@@ -49,7 +49,7 @@ class RubyController(ClockedObject):
 
     version = Param.Int("")
     addr_ranges = VectorParam.AddrRange(
-        [AllMemory], "Address range this " "controller responds to"
+        [AllMemory], "Address range this controller responds to"
     )
     cluster_id = Param.UInt32(0, "Id of this controller's cluster")
 
diff --git a/src/mem/ruby/system/GPUCoalescer.py b/src/mem/ruby/system/GPUCoalescer.py
index da459de133..fcf49e38b7 100644
--- a/src/mem/ruby/system/GPUCoalescer.py
+++ b/src/mem/ruby/system/GPUCoalescer.py
@@ -44,7 +44,7 @@ class RubyGPUCoalescer(RubyPort):
         40 * 64, "max requests (incl. prefetches) outstanding"
     )
     max_coalesces_per_cycle = Param.Int(
-        1, "max instructions that can be " "coalesced in a single cycle"
+        1, "max instructions that can be coalesced in a single cycle"
     )
 
     icache = Param.RubyCache("")
diff --git a/src/mem/slicc/symbols/StateMachine.py b/src/mem/slicc/symbols/StateMachine.py
index 55ee527c41..b5af9ca8ed 100644
--- a/src/mem/slicc/symbols/StateMachine.py
+++ b/src/mem/slicc/symbols/StateMachine.py
@@ -184,7 +184,7 @@ class StateMachine(Symbol):
         if type_ident == "%s_TBE" % self.ident:
             if self.TBEType != None:
                 self.error(
-                    "Multiple Transaction Buffer types in a " "single machine."
+                    "Multiple Transaction Buffer types in a single machine."
                 )
             self.TBEType = type
 
diff --git a/src/python/gem5/components/memory/dramsim_3.py b/src/python/gem5/components/memory/dramsim_3.py
index b7eba919fc..e5c1877fb5 100644
--- a/src/python/gem5/components/memory/dramsim_3.py
+++ b/src/python/gem5/components/memory/dramsim_3.py
@@ -53,7 +53,7 @@ def config_ds3(mem_type: str, num_chnls: int) -> Tuple[str, str]:
         )
     elif os.path.isfile(input_file):
         raise Exception(
-            "The configuration file '" + input_file + "' cannot " " be found."
+            "The configuration file '" + input_file + "' cannot  be found."
         )
 
     output_file = "/tmp/" + mem_type + "_chnls" + str(num_chnls) + ".ini"
diff --git a/src/python/m5/SimObject.py b/src/python/m5/SimObject.py
index 91cbbc59bd..b5dfca9752 100644
--- a/src/python/m5/SimObject.py
+++ b/src/python/m5/SimObject.py
@@ -478,7 +478,7 @@ def cxxMethod(*args, **kwargs):
         args, varargs, keywords, defaults = inspect.getargspec(func)
         if varargs or keywords:
             raise ValueError(
-                "Wrapped methods must not contain variable " "arguments"
+                "Wrapped methods must not contain variable arguments"
             )
 
         # Create tuples of (argument, default)
@@ -571,7 +571,7 @@ class SimObjectCliWrapper(object):
                     setattr(sim_object, key, val)
                 else:
                     raise SimObjectCliWrapperException(
-                        "tried to set or unsettable" "object parameter: " + key
+                        "tried to set or unsettableobject parameter: " + key
                     )
             else:
                 raise SimObjectCliWrapperException(
diff --git a/src/python/m5/params.py b/src/python/m5/params.py
index 8e96f4668e..e76380bc40 100644
--- a/src/python/m5/params.py
+++ b/src/python/m5/params.py
@@ -1457,7 +1457,7 @@ class MetaEnum(MetaParamValue):
                 cls.map[val] = idx
         else:
             raise TypeError(
-                "Enum-derived class must define " "attribute 'map' or 'vals'"
+                "Enum-derived class must define attribute 'map' or 'vals'"
             )
 
         if cls.is_class:
diff --git a/src/sim/System.py b/src/sim/System.py
index a5722e80fd..eb1280f248 100644
--- a/src/sim/System.py
+++ b/src/sim/System.py
@@ -77,7 +77,7 @@ class System(SimObject):
     # mmap). By enabling this flag, we accommodate cases where a large
     # (but sparse) memory is simulated.
     mmap_using_noreserve = Param.Bool(
-        False, "mmap the backing store " "without reserving swap"
+        False, "mmap the backing store without reserving swap"
     )
 
     # The memory ranges are to be populated when creating the system
@@ -89,7 +89,7 @@ class System(SimObject):
 
     # The ranges backed by a shadowed ROM
     shadow_rom_ranges = VectorParam.AddrRange(
-        [], "Ranges  backed by a " "shadowed ROM"
+        [], "Ranges  backed by a shadowed ROM"
     )
 
     shared_backstore = Param.String(
diff --git a/src/systemc/tests/verify.py b/src/systemc/tests/verify.py
index 5191062e50..0d8ce3cf75 100755
--- a/src/systemc/tests/verify.py
+++ b/src/systemc/tests/verify.py
@@ -148,7 +148,7 @@ class RunPhase(TestPhaseBase):
             "--timeout",
             type=int,
             metavar="SECONDS",
-            help="Time limit for each run in seconds, " "0 to disable.",
+            help="Time limit for each run in seconds, 0 to disable.",
             default=60,
         )
         parser.add_argument(
@@ -604,7 +604,7 @@ filter_opts = parser.add_mutually_exclusive_group()
 filter_opts.add_argument(
     "--filter",
     default="True",
-    help="Python expression which filters tests based " "on their properties",
+    help="Python expression which filters tests based on their properties",
 )
 filter_opts.add_argument(
     "--filter-file",
diff --git a/tests/configs/gpu-randomtest-ruby.py b/tests/configs/gpu-randomtest-ruby.py
index ceede7f500..cfc65526e5 100644
--- a/tests/configs/gpu-randomtest-ruby.py
+++ b/tests/configs/gpu-randomtest-ruby.py
@@ -57,7 +57,7 @@ parser.add_argument(
     help="Number of GPU Command Processors (CP)",
 )
 parser.add_argument(
-    "--simds-per-cu", type=int, default=4, help="SIMD units" "per CU"
+    "--simds-per-cu", type=int, default=4, help="SIMD unitsper CU"
 )
 parser.add_argument(
     "--wf-size", type=int, default=64, help="Wavefront size(in workitems)"
@@ -66,7 +66,7 @@ parser.add_argument(
     "--wfs-per-simd",
     type=int,
     default=10,
-    help="Number of " "WF slots per SIMD",
+    help="Number of WF slots per SIMD",
 )
 
 # Add the ruby specific and protocol specific options
diff --git a/tests/configs/gpu-ruby.py b/tests/configs/gpu-ruby.py
index e45c446373..7606168a98 100644
--- a/tests/configs/gpu-ruby.py
+++ b/tests/configs/gpu-ruby.py
@@ -79,13 +79,13 @@ parser.add_argument(
     help="Number of GPU Command Processors (CP)",
 )
 parser.add_argument(
-    "--simds-per-cu", type=int, default=4, help="SIMD units" "per CU"
+    "--simds-per-cu", type=int, default=4, help="SIMD unitsper CU"
 )
 parser.add_argument(
     "--cu-per-sqc",
     type=int,
     default=4,
-    help="number of CUs" "sharing an SQC (icache, and thus icache TLB)",
+    help="number of CUssharing an SQC (icache, and thus icache TLB)",
 )
 parser.add_argument(
     "--wf-size", type=int, default=64, help="Wavefront size(in workitems)"
@@ -94,7 +94,7 @@ parser.add_argument(
     "--wfs-per-simd",
     type=int,
     default=8,
-    help="Number of " "WF slots per SIMD",
+    help="Number of WF slots per SIMD",
 )
 parser.add_argument(
     "--sp-bypass-path-length",
@@ -194,13 +194,11 @@ parser.add_argument(
     action="store_true",
     help="Count Page Accesses and output in per-CU output files",
 )
-parser.add_argument(
-    "--TLB-prefetch", type=int, help="prefetch depth for" "TLBs"
-)
+parser.add_argument("--TLB-prefetch", type=int, help="prefetch depth forTLBs")
 parser.add_argument(
     "--pf-type",
     type=str,
-    help="type of prefetch: " "PF_CU, PF_WF, PF_PHASE, PF_STRIDE",
+    help="type of prefetch: PF_CU, PF_WF, PF_PHASE, PF_STRIDE",
 )
 parser.add_argument("--pf-stride", type=int, help="set prefetch stride")
 parser.add_argument(
diff --git a/tests/gem5/fixture.py b/tests/gem5/fixture.py
index 65b5454cae..c8bc79ff64 100644
--- a/tests/gem5/fixture.py
+++ b/tests/gem5/fixture.py
@@ -170,7 +170,7 @@ class SConsFixture(UniqueFixture):
             )
         else:
             log.test_log.message(
-                "Building the following targets." " This may take a while."
+                "Building the following targets. This may take a while."
             )
             log.test_log.message("%s" % (", ".join(self.targets)))
             log.test_log.message(
@@ -391,7 +391,7 @@ class DownloadedArchive(DownloadedProgram):
             except (urllib.error.URLError, socket.timeout):
                 # Problem checking the server, use the old files.
                 log.test_log.debug(
-                    "Could not contact server. " "Binaries may be old."
+                    "Could not contact server. Binaries may be old."
                 )
                 return
             # If the server version is more recent, download it
diff --git a/util/cpt_upgraders/isa-is-simobject.py b/util/cpt_upgraders/isa-is-simobject.py
index 3f0132ce36..077d4d98bf 100644
--- a/util/cpt_upgraders/isa-is-simobject.py
+++ b/util/cpt_upgraders/isa-is-simobject.py
@@ -94,7 +94,7 @@ def upgrader(cpt):
         else:
             if cpt.items(sec):
                 raise ValueError(
-                    "Unexpected populated ISA section in old " "checkpoint"
+                    "Unexpected populated ISA section in old checkpoint"
                 )
 
         for (key, value) in options:
diff --git a/util/gerrit-bot/extract_gitcookies.py b/util/gerrit-bot/extract_gitcookies.py
index 24f2ca0afa..ef17be10de 100755
--- a/util/gerrit-bot/extract_gitcookies.py
+++ b/util/gerrit-bot/extract_gitcookies.py
@@ -62,7 +62,7 @@ if __name__ == "__main__":
     )
     parser.add_argument(
         "input",
-        help=("Path to a .gitcookies file or a file with " "a similar format"),
+        help=("Path to a .gitcookies file or a file with a similar format"),
     )
     parser.add_argument("output", help="Path to the output file")
     args = parser.parse_args()
diff --git a/util/maint/list_changes.py b/util/maint/list_changes.py
index 9ada2b52f0..465ae1abb0 100755
--- a/util/maint/list_changes.py
+++ b/util/maint/list_changes.py
@@ -179,14 +179,14 @@ def _main():
         "-u",
         type=str,
         default="origin/master",
-        help="Upstream branch for comparison. " "Default: %(default)s",
+        help="Upstream branch for comparison. Default: %(default)s",
     )
     parser.add_argument(
         "--feature",
         "-f",
         type=str,
         default="HEAD",
-        help="Feature branch for comparison. " "Default: %(default)s",
+        help="Feature branch for comparison. Default: %(default)s",
     )
     parser.add_argument(
         "--show-unknown",
@@ -199,7 +199,7 @@ def _main():
     parser.add_argument(
         "--deep-search",
         action="store_true",
-        help="Use a deep search to find incorrectly " "rebased changes",
+        help="Use a deep search to find incorrectly rebased changes",
     )
     parser.add_argument(
         "paths",
diff --git a/util/maint/show_changes_by_file.py b/util/maint/show_changes_by_file.py
index be222620a0..ea739f78fe 100755
--- a/util/maint/show_changes_by_file.py
+++ b/util/maint/show_changes_by_file.py
@@ -95,14 +95,14 @@ def _main():
         "-u",
         type=str,
         default="origin/master",
-        help="Upstream branch for comparison. " "Default: %(default)s",
+        help="Upstream branch for comparison. Default: %(default)s",
     )
     parser.add_argument(
         "--feature",
         "-f",
         type=str,
         default="HEAD",
-        help="Feature branch for comparison. " "Default: %(default)s",
+        help="Feature branch for comparison. Default: %(default)s",
     )
     parser.add_argument(
         "paths",
diff --git a/util/plot_dram/dram_sweep_plot.py b/util/plot_dram/dram_sweep_plot.py
index 8acb6ab681..ad7bc5e3b6 100755
--- a/util/plot_dram/dram_sweep_plot.py
+++ b/util/plot_dram/dram_sweep_plot.py
@@ -90,7 +90,7 @@ def main():
 
     for line in simout:
         match = re.match(
-            "DRAM sweep with " "burst: (\d+), banks: (\d+), max stride: (\d+)",
+            "DRAM sweep with burst: (\d+), banks: (\d+), max stride: (\d+)",
             line,
         )
         if match:

From aeb617868f805dc058752d4ae3fd832a27941594 Mon Sep 17 00:00:00 2001
From: Hoa Nguyen <hoanguyen@ucdavis.edu>
Date: Mon, 14 Nov 2022 03:13:35 +0000
Subject: [PATCH 029/492] stdlib: Add MESI Three Level cache hierarchy

Change-Id: Ibea6b71d62b71f7817f6860bbceed9e1915bb002
Signed-off-by: Hoa Nguyen <hoanguyen@ucdavis.edu>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/65591
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
---
 src/python/SConscript                         |  23 ++
 .../abstract_three_level_cache_hierarchy.py   |  52 ++++
 .../ruby/caches/mesi_three_level/__init__.py  |   0
 .../ruby/caches/mesi_three_level/directory.py |  51 ++++
 .../caches/mesi_three_level/dma_controller.py |  43 ++++
 .../ruby/caches/mesi_three_level/l1_cache.py  | 110 +++++++++
 .../ruby/caches/mesi_three_level/l2_cache.py  | 113 +++++++++
 .../ruby/caches/mesi_three_level/l3_cache.py  |  89 +++++++
 .../ruby/mesi_three_level_cache_hierarchy.py  | 225 ++++++++++++++++++
 9 files changed, 706 insertions(+)
 create mode 100644 src/python/gem5/components/cachehierarchies/abstract_three_level_cache_hierarchy.py
 create mode 100644 src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/__init__.py
 create mode 100644 src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/directory.py
 create mode 100644 src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/dma_controller.py
 create mode 100644 src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/l1_cache.py
 create mode 100644 src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/l2_cache.py
 create mode 100644 src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/l3_cache.py
 create mode 100644 src/python/gem5/components/cachehierarchies/ruby/mesi_three_level_cache_hierarchy.py

diff --git a/src/python/SConscript b/src/python/SConscript
index e7e464e2df..aeeb8925a3 100644
--- a/src/python/SConscript
+++ b/src/python/SConscript
@@ -61,6 +61,8 @@ PySource('gem5.components.cachehierarchies',
     'gem5/components/cachehierarchies/abstract_cache_hierarchy.py')
 PySource('gem5.components.cachehierarchies',
     'gem5/components/cachehierarchies/abstract_two_level_cache_hierarchy.py')
+PySource('gem5.components.cachehierarchies',
+    'gem5/components/cachehierarchies/abstract_three_level_cache_hierarchy.py')
 PySource('gem5.components.cachehierarchies.chi',
     'gem5/components/cachehierarchies/chi/__init__.py')
 PySource('gem5.components.cachehierarchies.chi',
@@ -108,6 +110,9 @@ PySource('gem5.components.cachehierarchies.ruby',
     'gem5/components/cachehierarchies/ruby/abstract_ruby_cache_hierarchy.py')
 PySource('gem5.components.cachehierarchies.ruby',
     'gem5/components/cachehierarchies/ruby/mesi_two_level_cache_hierarchy.py')
+PySource('gem5.components.cachehierarchies.ruby',
+    'gem5/components/cachehierarchies/ruby/'
+    'mesi_three_level_cache_hierarchy.py')
 PySource('gem5.components.cachehierarchies.ruby',
     'gem5/components/cachehierarchies/ruby/mi_example_cache_hierarchy.py')
 PySource('gem5.components.cachehierarchies.ruby.caches',
@@ -131,6 +136,24 @@ PySource('gem5.components.cachehierarchies.ruby.caches.mesi_two_level',
     'gem5/components/cachehierarchies/ruby/caches/mesi_two_level/l1_cache.py')
 PySource('gem5.components.cachehierarchies.ruby.caches.mesi_two_level',
     'gem5/components/cachehierarchies/ruby/caches/mesi_two_level/l2_cache.py')
+PySource('gem5.components.cachehierarchies.ruby.caches.mesi_three_level',
+    'gem5/components/cachehierarchies/ruby/caches/mesi_three_level/'
+    '__init__.py')
+PySource('gem5.components.cachehierarchies.ruby.caches.mesi_three_level',
+    'gem5/components/cachehierarchies/ruby/caches/mesi_three_level/'
+    'directory.py')
+PySource('gem5.components.cachehierarchies.ruby.caches.mesi_three_level',
+    'gem5/components/cachehierarchies/ruby/caches/mesi_three_level/'
+    'dma_controller.py')
+PySource('gem5.components.cachehierarchies.ruby.caches.mesi_three_level',
+    'gem5/components/cachehierarchies/ruby/caches/mesi_three_level/'
+    'l1_cache.py')
+PySource('gem5.components.cachehierarchies.ruby.caches.mesi_three_level',
+    'gem5/components/cachehierarchies/ruby/caches/mesi_three_level/'
+    'l2_cache.py')
+PySource('gem5.components.cachehierarchies.ruby.caches.mesi_three_level',
+    'gem5/components/cachehierarchies/ruby/caches/mesi_three_level/'
+    'l3_cache.py')
 PySource('gem5.components.cachehierarchies.ruby.caches.mi_example',
     'gem5/components/cachehierarchies/ruby/caches/mi_example/__init__.py')
 PySource('gem5.components.cachehierarchies.ruby.caches.mi_example',
diff --git a/src/python/gem5/components/cachehierarchies/abstract_three_level_cache_hierarchy.py b/src/python/gem5/components/cachehierarchies/abstract_three_level_cache_hierarchy.py
new file mode 100644
index 0000000000..4d2f21abdc
--- /dev/null
+++ b/src/python/gem5/components/cachehierarchies/abstract_three_level_cache_hierarchy.py
@@ -0,0 +1,52 @@
+# Copyright (c) 2022 The Regents of the University of California
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+class AbstractThreeLevelCacheHierarchy:
+    """
+    An abstract three-level hierarchy with configurable size and associativity
+    for each of L1, L2, and L3 caches.
+    """
+
+    def __init__(
+        self,
+        l1i_size: str,
+        l1i_assoc: int,
+        l1d_size: str,
+        l1d_assoc: int,
+        l2_size: str,
+        l2_assoc: int,
+        l3_size: str,
+        l3_assoc: int,
+    ):
+        self._l1i_size = l1i_size
+        self._l1i_assoc = l1i_assoc
+        self._l1d_size = l1d_size
+        self._l1d_assoc = l1d_assoc
+        self._l2_size = l2_size
+        self._l2_assoc = l2_assoc
+        self._l3_size = l3_size
+        self._l3_assoc = l3_assoc
diff --git a/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/__init__.py b/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/directory.py b/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/directory.py
new file mode 100644
index 0000000000..cd4f166fed
--- /dev/null
+++ b/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/directory.py
@@ -0,0 +1,51 @@
+# Copyright (c) 2021 The Regents of the University of California
+# All Rights Reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from ......utils.override import overrides
+from ..abstract_directory import AbstractDirectory
+
+from m5.objects import MessageBuffer, RubyDirectoryMemory
+
+
+class Directory(AbstractDirectory):
+    def __init__(self, network, cache_line_size, mem_range, port):
+
+        super().__init__(network, cache_line_size)
+        self.addr_ranges = [mem_range]
+        self.directory = RubyDirectoryMemory()
+        # Connect this directory to the memory side.
+        self.memory_out_port = port
+
+    @overrides(AbstractDirectory)
+    def connectQueues(self, network):
+        self.requestToDir = MessageBuffer()
+        self.requestToDir.in_port = network.out_port
+        self.responseToDir = MessageBuffer()
+        self.responseToDir.in_port = network.out_port
+        self.responseFromDir = MessageBuffer()
+        self.responseFromDir.out_port = network.in_port
+        self.requestToMemory = MessageBuffer()
+        self.responseFromMemory = MessageBuffer()
diff --git a/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/dma_controller.py b/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/dma_controller.py
new file mode 100644
index 0000000000..ab76d4cb5e
--- /dev/null
+++ b/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/dma_controller.py
@@ -0,0 +1,43 @@
+# Copyright (c) 2021 The Regents of the University of California
+# All Rights Reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from ......utils.override import overrides
+from ..abstract_dma_controller import AbstractDMAController
+
+from m5.objects import MessageBuffer
+
+
+class DMAController(AbstractDMAController):
+    def __init__(self, network, cache_line_size):
+        super().__init__(network, cache_line_size)
+
+    @overrides(AbstractDMAController)
+    def connectQueues(self, network):
+        self.mandatoryQueue = MessageBuffer()
+        self.responseFromDir = MessageBuffer(ordered=True)
+        self.responseFromDir.in_port = network.out_port
+        self.requestToDir = MessageBuffer()
+        self.requestToDir.out_port = network.in_port
diff --git a/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/l1_cache.py b/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/l1_cache.py
new file mode 100644
index 0000000000..2ce13d3b08
--- /dev/null
+++ b/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/l1_cache.py
@@ -0,0 +1,110 @@
+# Copyright (c) 2022 The Regents of the University of California
+# All Rights Reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from .....processors.abstract_core import AbstractCore
+from ......isas import ISA
+from ......utils.override import *
+
+from m5.objects import (
+    MessageBuffer,
+    RubyPrefetcher,
+    RubyCache,
+    ClockDomain,
+    LRURP,
+    L0Cache_Controller,
+)
+
+import math
+
+# L0Cache_Controller is the ruby backend's terminology corresponding to
+# L1 cache in stdlib terms.
+class L1Cache(L0Cache_Controller):
+
+    _version = 0
+
+    @classmethod
+    def versionCount(cls):
+        cls._version += 1
+        return cls._version - 1
+
+    def __init__(
+        self,
+        l1i_size,
+        l1i_assoc,
+        l1d_size,
+        l1d_assoc,
+        network,
+        core: AbstractCore,
+        cache_line_size,
+        target_isa: ISA,
+        clk_domain: ClockDomain,
+    ):
+        super().__init__()
+
+        # This is the cache memory object that stores the cache data and tags
+        self.Icache = RubyCache(
+            size=l1i_size,
+            assoc=l1i_assoc,
+            start_index_bit=self.getBlockSizeBits(),
+            is_icache=True,
+            replacement_policy=LRURP(),
+        )
+        self.Dcache = RubyCache(
+            size=l1d_size,
+            assoc=l1d_assoc,
+            start_index_bit=self.getBlockSizeBits(),
+            is_icache=False,
+            replacement_policy=LRURP(),
+        )
+        self.clk_domain = clk_domain
+        self.prefetcher = RubyPrefetcher()
+        self.send_evictions = core.requires_send_evicts()
+        self.transitions_per_cycle = 32
+        self.enable_prefetch = False
+        self.request_latency = 2
+        self.response_latency = 2
+
+        self.version = self.versionCount()
+        self._cache_line_size = cache_line_size
+        self.connectQueues(network)
+
+    def getBlockSizeBits(self):
+        bits = int(math.log(self._cache_line_size, 2))
+        if 2**bits != self._cache_line_size.value:
+            raise Exception("Cache line size is not a power of 2!")
+        return bits
+
+    def connectQueues(self, network):
+        self.prefetchQueue = MessageBuffer()
+        self.mandatoryQueue = MessageBuffer()
+        self.optionalQueue = MessageBuffer()
+
+        # bufferToL1 and bufferFromL1 are ruby backend terminology.
+        # In stdlib terms, they are bufferToL2 and bufferFromL2 respectively.
+        # These buffers are connections between L1 cache and L2 cache.
+        # Later on, we'll need to connect those buffers to L2.
+        self.bufferToL1 = MessageBuffer(ordered=True)
+        self.bufferFromL1 = MessageBuffer(ordered=True)
diff --git a/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/l2_cache.py b/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/l2_cache.py
new file mode 100644
index 0000000000..e29f566191
--- /dev/null
+++ b/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/l2_cache.py
@@ -0,0 +1,113 @@
+# Copyright (c) 2022 The Regents of the University of California
+# All Rights Reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from .....processors.abstract_core import AbstractCore
+from ......isas import ISA
+from ......utils.override import *
+
+from m5.objects import (
+    MessageBuffer,
+    RubyPrefetcher,
+    RubyCache,
+    ClockDomain,
+    L1Cache_Controller,
+)
+
+import math
+
+# L1Cache_Controller is ruby backend's terminology corresponding to
+# L2Cache in stdlib's terms
+class L2Cache(L1Cache_Controller):
+
+    _version = 0
+
+    @classmethod
+    def versionCount(cls):
+        cls._version += 1
+        return cls._version - 1
+
+    def __init__(
+        self,
+        l2_size,
+        l2_assoc,
+        network,
+        core: AbstractCore,
+        num_l3Caches,
+        cache_line_size,
+        cluster_id,
+        target_isa: ISA,
+        clk_domain: ClockDomain,
+    ):
+        super().__init__()
+
+        # This is the cache memory object that stores the cache data and tags
+        self.cache = RubyCache(
+            size=l2_size,
+            assoc=l2_assoc,
+            start_index_bit=self.getBlockSizeBits(),
+            is_icache=False,
+        )
+        # l2_select_num_bits is ruby backend terminology.
+        # In stdlib terms, it is number of bits for selecting L3 cache.
+        self.l2_select_num_bits = int(math.log(num_l3Caches, 2))
+        self.cluster_id = cluster_id
+        self.clk_domain = clk_domain
+        self.prefetcher = RubyPrefetcher()
+        self.transitions_per_cycle = 32
+        # l1_request_latency, l1_response_latency, to_l2_latency are
+        # ruby backend terminology.
+        # In stdlib terms, they are L2 cache request latency, L2 response
+        # latency, and to L3 cache latency respectively.
+        self.l1_request_latency = 2
+        self.l1_response_latency = 2
+        self.to_l2_latency = 1
+
+        self.version = self.versionCount()
+        self._cache_line_size = cache_line_size
+        self.connectQueues(network)
+
+    def connectQueues(self, network):
+        self.mandatoryQueue = MessageBuffer()
+        self.optionalQueue = MessageBuffer()
+
+        # In the below terms, L2 are ruby backend terminology.
+        # They are L3 in stdlib.
+
+        # Request from/to L2 buffers
+        self.requestFromL2 = MessageBuffer()
+        self.requestFromL2.in_port = network.out_port
+        self.requestToL2 = MessageBuffer()
+        self.requestToL2.out_port = network.in_port
+
+        # Response from/to L2 buffers
+        self.responseFromL2 = MessageBuffer()
+        self.responseFromL2.in_port = network.out_port
+        self.responseToL2 = MessageBuffer()
+        self.responseToL2.out_port = network.in_port
+
+        # Unblock to L2 buffer
+        self.unblockToL2 = MessageBuffer()
+        self.unblockToL2.out_port = network.in_port
diff --git a/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/l3_cache.py b/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/l3_cache.py
new file mode 100644
index 0000000000..6d46d1fdf0
--- /dev/null
+++ b/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/l3_cache.py
@@ -0,0 +1,89 @@
+# Copyright (c) 2022 The Regents of the University of California
+# All Rights Reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from m5.objects import MessageBuffer, RubyCache, L2Cache_Controller
+
+import math
+
+# L2Cache_Controller is ruby backend's terminology corresponding to
+# L3 cache in stdlib.
+class L3Cache(L2Cache_Controller):
+
+    _version = 0
+
+    @classmethod
+    def versionCount(cls):
+        cls._version += 1
+        return cls._version - 1
+
+    def __init__(
+        self,
+        l3_size,
+        l3_assoc,
+        network,
+        num_l3Caches,
+        cache_line_size,
+        cluster_id,
+    ):
+        super().__init__()
+
+        # This is the cache memory object that stores the cache data and tags
+        self.L2cache = RubyCache(
+            size=l3_size,
+            assoc=l3_assoc,
+            start_index_bit=self.getIndexBit(num_l3Caches),
+        )
+
+        self.transitions_per_cycle = 4
+        self.cluster_id = cluster_id
+        self.l2_request_latency = 2
+        self.l2_response_latency = 2
+        self.to_l1_latency = 1
+
+        self.version = self.versionCount()
+        self._cache_line_size = cache_line_size
+        self.connectQueues(network)
+
+    def getIndexBit(self, num_l3caches):
+        l3_bits = int(math.log(num_l3caches, 2))
+        bits = int(math.log(self._cache_line_size, 2)) + l3_bits
+        return bits
+
+    def connectQueues(self, network):
+        # In the below terms, L1 and L2 are ruby backend terminology.
+        # In stdlib, they are L2 and L3 caches respectively.
+        self.DirRequestFromL2Cache = MessageBuffer()
+        self.DirRequestFromL2Cache.out_port = network.in_port
+        self.L1RequestFromL2Cache = MessageBuffer()
+        self.L1RequestFromL2Cache.out_port = network.in_port
+        self.responseFromL2Cache = MessageBuffer()
+        self.responseFromL2Cache.out_port = network.in_port
+        self.unblockToL2Cache = MessageBuffer()
+        self.unblockToL2Cache.in_port = network.out_port
+        self.L1RequestToL2Cache = MessageBuffer()
+        self.L1RequestToL2Cache.in_port = network.out_port
+        self.responseToL2Cache = MessageBuffer()
+        self.responseToL2Cache.in_port = network.out_port
diff --git a/src/python/gem5/components/cachehierarchies/ruby/mesi_three_level_cache_hierarchy.py b/src/python/gem5/components/cachehierarchies/ruby/mesi_three_level_cache_hierarchy.py
new file mode 100644
index 0000000000..89b6b21177
--- /dev/null
+++ b/src/python/gem5/components/cachehierarchies/ruby/mesi_three_level_cache_hierarchy.py
@@ -0,0 +1,225 @@
+# Copyright (c) 2022 The Regents of the University of California
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+from .abstract_ruby_cache_hierarchy import AbstractRubyCacheHierarchy
+from ..abstract_three_level_cache_hierarchy import (
+    AbstractThreeLevelCacheHierarchy,
+)
+from ....coherence_protocol import CoherenceProtocol
+from ....isas import ISA
+from ...boards.abstract_board import AbstractBoard
+from ....utils.requires import requires
+
+from .topologies.simple_pt2pt import SimplePt2Pt
+from .caches.mesi_three_level.l1_cache import L1Cache
+from .caches.mesi_three_level.l2_cache import L2Cache
+from .caches.mesi_three_level.l3_cache import L3Cache
+from .caches.mesi_three_level.directory import Directory
+from .caches.mesi_three_level.dma_controller import DMAController
+
+from m5.objects import RubySystem, RubySequencer, DMASequencer, RubyPortProxy
+
+
+class MESIThreeLevelCacheHierarchy(
+    AbstractRubyCacheHierarchy, AbstractThreeLevelCacheHierarchy
+):
+    """A three-level private-L1-private-L2-shared-L3 MESI hierarchy.
+
+    The on-chip network is a point-to-point all-to-all simple network.
+    """
+
+    def __init__(
+        self,
+        l1i_size: str,
+        l1i_assoc: str,
+        l1d_size: str,
+        l1d_assoc: str,
+        l2_size: str,
+        l2_assoc: str,
+        l3_size: str,
+        l3_assoc: str,
+        num_l3_banks: int,
+    ):
+        AbstractRubyCacheHierarchy.__init__(self=self)
+        AbstractThreeLevelCacheHierarchy.__init__(
+            self,
+            l1i_size=l1i_size,
+            l1i_assoc=l1i_assoc,
+            l1d_size=l1d_size,
+            l1d_assoc=l1d_assoc,
+            l2_size=l2_size,
+            l2_assoc=l2_assoc,
+            l3_size=l3_size,
+            l3_assoc=l3_assoc,
+        )
+
+        self._num_l3_banks = num_l3_banks
+
+    def incorporate_cache(self, board: AbstractBoard) -> None:
+
+        requires(
+            coherence_protocol_required=CoherenceProtocol.MESI_THREE_LEVEL
+        )
+
+        cache_line_size = board.get_cache_line_size()
+
+        self.ruby_system = RubySystem()
+
+        # MESI_Three_Level needs 3 virtual networks
+        self.ruby_system.number_of_virtual_networks = 3
+
+        self.ruby_system.network = SimplePt2Pt(self.ruby_system)
+        self.ruby_system.network.number_of_virtual_networks = 3
+
+        self._l1_controllers = []
+        self._l2_controllers = []
+        self._l3_controllers = []
+        cores = board.get_processor().get_cores()
+        for core_idx, core in enumerate(cores):
+            l1_cache = L1Cache(
+                l1i_size=self._l1i_size,
+                l1i_assoc=self._l1i_assoc,
+                l1d_size=self._l1d_size,
+                l1d_assoc=self._l1d_assoc,
+                network=self.ruby_system.network,
+                core=core,
+                cache_line_size=cache_line_size,
+                target_isa=board.processor.get_isa(),
+                clk_domain=board.get_clock_domain(),
+            )
+
+            l1_cache.sequencer = RubySequencer(
+                version=core_idx,
+                dcache=l1_cache.Dcache,
+                clk_domain=l1_cache.clk_domain,
+            )
+
+            if board.has_io_bus():
+                l1_cache.sequencer.connectIOPorts(board.get_io_bus())
+
+            l1_cache.ruby_system = self.ruby_system
+
+            core.connect_icache(l1_cache.sequencer.in_ports)
+            core.connect_dcache(l1_cache.sequencer.in_ports)
+
+            core.connect_walker_ports(
+                l1_cache.sequencer.in_ports, l1_cache.sequencer.in_ports
+            )
+
+            # Connect the interrupt ports
+            if board.get_processor().get_isa() == ISA.X86:
+                int_req_port = l1_cache.sequencer.interrupt_out_port
+                int_resp_port = l1_cache.sequencer.in_ports
+                core.connect_interrupt(int_req_port, int_resp_port)
+            else:
+                core.connect_interrupt()
+
+            self._l1_controllers.append(l1_cache)
+
+            # For testing purpose, we use point-to-point topology. So, the
+            # assigned cluster ID is ignored by ruby.
+            # Thus, we set cluster_id to 0.
+            l2_cache = L2Cache(
+                l2_size=self._l2_size,
+                l2_assoc=self._l2_assoc,
+                network=self.ruby_system.network,
+                core=core,
+                num_l3Caches=self._num_l3_banks,
+                cache_line_size=cache_line_size,
+                cluster_id=0,
+                target_isa=board.processor.get_isa(),
+                clk_domain=board.get_clock_domain(),
+            )
+
+            l2_cache.ruby_system = self.ruby_system
+            # L0Cache in the ruby backend is l1 cache in stdlib
+            # L1Cache in the ruby backend is l2 cache in stdlib
+            l2_cache.bufferFromL0 = l1_cache.bufferToL1
+            l2_cache.bufferToL0 = l1_cache.bufferFromL1
+
+            self._l2_controllers.append(l2_cache)
+
+        for _ in range(self._num_l3_banks):
+            l3_cache = L3Cache(
+                l3_size=self._l3_size,
+                l3_assoc=self._l3_assoc,
+                network=self.ruby_system.network,
+                num_l3Caches=self._num_l3_banks,
+                cache_line_size=cache_line_size,
+                cluster_id=0,  # cluster_id is ignored in point-to-point topology
+            )
+            l3_cache.ruby_system = self.ruby_system
+            self._l3_controllers.append(l3_cache)
+
+        # TODO: Make this prettier: The problem is not being able to proxy
+        # the ruby system correctly
+        for cache in self._l3_controllers:
+            cache.ruby_system = self.ruby_system
+
+        self._directory_controllers = [
+            Directory(self.ruby_system.network, cache_line_size, range, port)
+            for range, port in board.get_mem_ports()
+        ]
+        # TODO: Make this prettier: The problem is not being able to proxy
+        # the ruby system correctly
+        for dir in self._directory_controllers:
+            dir.ruby_system = self.ruby_system
+
+        self._dma_controllers = []
+        if board.has_dma_ports():
+            dma_ports = board.get_dma_ports()
+            for i, port in enumerate(dma_ports):
+                ctrl = DMAController(self.ruby_system.network, cache_line_size)
+                ctrl.dma_sequencer = DMASequencer(version=i, in_ports=port)
+                self._dma_controllers.append(ctrl)
+                ctrl.ruby_system = self.ruby_system
+
+        self.ruby_system.num_of_sequencers = len(self._l1_controllers) + len(
+            self._dma_controllers
+        )
+        self.ruby_system.l1_controllers = self._l1_controllers
+        self.ruby_system.l2_controllers = self._l2_controllers
+        self.ruby_system.l3_controllers = self._l3_controllers
+        self.ruby_system.directory_controllers = self._directory_controllers
+
+        if len(self._dma_controllers) != 0:
+            self.ruby_system.dma_controllers = self._dma_controllers
+
+        # Create the network and connect the controllers.
+        self.ruby_system.network.connectControllers(
+            self._l1_controllers
+            + self._l2_controllers
+            + self._l3_controllers
+            + self._directory_controllers
+            + self._dma_controllers
+        )
+        self.ruby_system.network.setup_buffers()
+
+        # Set up a proxy port for the system_port. Used for load binaries and
+        # other functional-only things.
+        self.ruby_system.sys_port_proxy = RubyPortProxy()
+        board.connect_system_port(self.ruby_system.sys_port_proxy.in_ports)

From d89d77f1c29a80d75e26c2683265e8891f5ac33e Mon Sep 17 00:00:00 2001
From: Yu-hsin Wang <yuhsingw@google.com>
Date: Wed, 12 Oct 2022 11:20:44 +0800
Subject: [PATCH 030/492] fastmodel: correct the Iris namespace for FastModel
 11.19

Change-Id: I3f899699ce27ffdc5bbed311fec9f38c62027a80
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/66071
Reviewed-by: Earl Ou <shunhsingou@google.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
---
 src/arch/arm/fastmodel/iris/thread_context.cc | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/arch/arm/fastmodel/iris/thread_context.cc b/src/arch/arm/fastmodel/iris/thread_context.cc
index b12536dbdd..45e020d832 100644
--- a/src/arch/arm/fastmodel/iris/thread_context.cc
+++ b/src/arch/arm/fastmodel/iris/thread_context.cc
@@ -441,9 +441,9 @@ void
 ThreadContext::readMem(
     iris::MemorySpaceId space, Addr addr, void *p, size_t size)
 {
-    iris::r0master::MemoryReadResult r;
+    iris::MemoryReadResult r;
     auto err = call().memory_read(_instId, r, space, addr, 1, size);
-    panic_if(err != iris::r0master::E_ok, "readMem failed.");
+    panic_if(err != iris::E_ok, "readMem failed.");
     std::memcpy(p, r.data.data(), size);
 }
 
@@ -455,7 +455,7 @@ ThreadContext::writeMem(
     std::memcpy(data.data(), p, size);
     iris::MemoryWriteResult r;
     auto err = call().memory_write(_instId, r, space, addr, 1, size, data);
-    panic_if(err != iris::r0master::E_ok, "writeMem failed.");
+    panic_if(err != iris::E_ok, "writeMem failed.");
 }
 
 bool

From c0d67cba3a4c2a3ad30b0c4e6c098bd20b56a91f Mon Sep 17 00:00:00 2001
From: Yu-hsin Wang <yuhsingw@google.com>
Date: Wed, 23 Nov 2022 11:00:23 +0800
Subject: [PATCH 031/492] systemc: fix extension not found TlmToGem5 bridge
 response path

The gem5 packet has two ways to associate to the TLM payload. If the
request is initiated from gem5, they would be associated by TLM
extension. If the request is initiated from systemc, they would be
associated by SenderState. So current implementation apparently only
took care the request initiated from gem5 only. We need to update the
logic to take care both.

This change moves the response sync out of beginSendResp and sync it
before calling the function.

Change-Id: If415fbe33249b75e549086d9ca36eda3c20f7ec2
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/66053
Reviewed-by: Earl Ou <shunhsingou@google.com>
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/systemc/tlm_bridge/tlm_to_gem5.cc | 14 ++++----------
 1 file changed, 4 insertions(+), 10 deletions(-)

diff --git a/src/systemc/tlm_bridge/tlm_to_gem5.cc b/src/systemc/tlm_bridge/tlm_to_gem5.cc
index 468ea83f37..47a2fba08e 100644
--- a/src/systemc/tlm_bridge/tlm_to_gem5.cc
+++ b/src/systemc/tlm_bridge/tlm_to_gem5.cc
@@ -207,14 +207,6 @@ void
 TlmToGem5Bridge<BITWIDTH>::sendBeginResp(tlm::tlm_generic_payload &trans,
                                          sc_core::sc_time &delay)
 {
-    Gem5SystemC::Gem5Extension *extension = nullptr;
-    trans.get_extension(extension);
-    panic_if(extension == nullptr,
-             "Missing gem5 extension when sending BEGIN_RESP");
-    auto pkt = extension->getPacket();
-
-    setPayloadResponse(trans, pkt);
-
     tlm::tlm_phase phase = tlm::BEGIN_RESP;
 
     auto status = socket->nb_transport_bw(trans, phase, delay);
@@ -252,6 +244,7 @@ TlmToGem5Bridge<BITWIDTH>::handleBeginReq(tlm::tlm_generic_payload &trans)
         sendEndReq(trans);
         if (!needsResponse) {
             auto delay = sc_core::SC_ZERO_TIME;
+            setPayloadResponse(trans, pkt);
             sendBeginResp(trans, delay);
         }
         trans.release();
@@ -481,6 +474,8 @@ TlmToGem5Bridge<BITWIDTH>::recvTimingResp(PacketPtr pkt)
     sc_assert(tlmSenderState != nullptr);
 
     auto &trans = tlmSenderState->trans;
+    setPayloadResponse(trans, pkt);
+    sendBeginResp(trans, delay);
 
     Gem5SystemC::Gem5Extension *extension = nullptr;
     trans.get_extension(extension);
@@ -493,7 +488,6 @@ TlmToGem5Bridge<BITWIDTH>::recvTimingResp(PacketPtr pkt)
     if (extension == nullptr)
         destroyPacket(pkt);
 
-    sendBeginResp(trans, delay);
     trans.release();
 
     return true;
@@ -512,12 +506,12 @@ TlmToGem5Bridge<BITWIDTH>::recvReqRetry()
     bool needsResponse = pendingPacket->needsResponse();
     if (bmp.sendTimingReq(pendingPacket)) {
         waitForRetry = false;
-        pendingPacket = nullptr;
 
         auto &trans = *pendingRequest;
         sendEndReq(trans);
         if (!needsResponse) {
             auto delay = sc_core::SC_ZERO_TIME;
+            setPayloadResponse(trans, pendingPacket);
             sendBeginResp(trans, delay);
         }
         trans.release();

From eee42275eeea15e4814b1f9df6709b3f69e87b22 Mon Sep 17 00:00:00 2001
From: Matthew Poremba <matthew.poremba@amd.com>
Date: Fri, 18 Nov 2022 16:47:50 -0800
Subject: [PATCH 032/492] dev-amdgpu: Writeback RLC queue MQD when unmapped

Currently when RLC queues (user mode queues) are mapped, the read/write
pointers of the ring buffer are set to zero. However, these queues could
be unmapped and then remapped later. In that situation the read/write
pointers should be the previous value before unmapping occurred. Since
the read pointer gets reset to zero, the queue begins reading from the
start of the ring, which usually contains older packets. There is a 99%
chance those packets contain addresses which are no longer in the page
tables which will cause a page fault.

To fix this we update the MQD with the current read/write pointer values
and then writeback the MQD to memory when the queue is unmapped. This
requires adding a pointer to the MQD and the host address of the MQD
where it should be written back to. The interface for registering RLC
queue is also simplified. Since we need to pass the MQD anyway, we can
get values from it as well.

Fixes b+tree and streamcluster from rodinia (when using RLC queues).

Change-Id: Ie5dad4d7d90ea240c3e9f0cddf3e844a3cd34c4f
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/65791
Tested-by: kokoro <noreply+kokoro@google.com>
Maintainer: Matt Sinclair <mattdsinclair@gmail.com>
Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com>
---
 src/dev/amdgpu/pm4_packet_processor.cc |  4 +-
 src/dev/amdgpu/pm4_queues.hh           | 24 +++++++++--
 src/dev/amdgpu/sdma_engine.cc          | 58 ++++++++++++++++++++++----
 src/dev/amdgpu/sdma_engine.hh          | 12 ++++--
 4 files changed, 79 insertions(+), 19 deletions(-)

diff --git a/src/dev/amdgpu/pm4_packet_processor.cc b/src/dev/amdgpu/pm4_packet_processor.cc
index f78f8333a6..152fd4da73 100644
--- a/src/dev/amdgpu/pm4_packet_processor.cc
+++ b/src/dev/amdgpu/pm4_packet_processor.cc
@@ -458,9 +458,7 @@ PM4PacketProcessor::processSDMAMQD(PM4MapQueues *pkt, PM4Queue *q, Addr addr,
     SDMAEngine *sdma_eng = gpuDevice->getSDMAById(pkt->engineSel - 2);
 
     // Register RLC queue with SDMA
-    sdma_eng->registerRLCQueue(pkt->doorbellOffset << 2,
-                               mqd->rb_base << 8, rlc_size,
-                               rptr_wb_addr);
+    sdma_eng->registerRLCQueue(pkt->doorbellOffset << 2, addr, mqd);
 
     // Register doorbell with GPU device
     gpuDevice->setSDMAEngine(pkt->doorbellOffset << 2, sdma_eng);
diff --git a/src/dev/amdgpu/pm4_queues.hh b/src/dev/amdgpu/pm4_queues.hh
index 8b6626d176..ddadd6543b 100644
--- a/src/dev/amdgpu/pm4_queues.hh
+++ b/src/dev/amdgpu/pm4_queues.hh
@@ -33,6 +33,8 @@
 #ifndef __DEV_AMDGPU_PM4_QUEUES_HH__
 #define __DEV_AMDGPU_PM4_QUEUES_HH__
 
+#include "dev/amdgpu/pm4_defines.hh"
+
 namespace gem5
 {
 
@@ -201,10 +203,24 @@ typedef struct GEM5_PACKED
         };
         uint64_t rb_base;
     };
-    uint32_t sdmax_rlcx_rb_rptr;
-    uint32_t sdmax_rlcx_rb_rptr_hi;
-    uint32_t sdmax_rlcx_rb_wptr;
-    uint32_t sdmax_rlcx_rb_wptr_hi;
+    union
+    {
+        struct
+        {
+            uint32_t sdmax_rlcx_rb_rptr;
+            uint32_t sdmax_rlcx_rb_rptr_hi;
+        };
+        uint64_t rptr;
+    };
+    union
+    {
+        struct
+        {
+            uint32_t sdmax_rlcx_rb_wptr;
+            uint32_t sdmax_rlcx_rb_wptr_hi;
+        };
+        uint64_t wptr;
+    };
     uint32_t sdmax_rlcx_rb_wptr_poll_cntl;
     uint32_t sdmax_rlcx_rb_rptr_addr_hi;
     uint32_t sdmax_rlcx_rb_rptr_addr_lo;
diff --git a/src/dev/amdgpu/sdma_engine.cc b/src/dev/amdgpu/sdma_engine.cc
index 02203c8178..4c03bf57b2 100644
--- a/src/dev/amdgpu/sdma_engine.cc
+++ b/src/dev/amdgpu/sdma_engine.cc
@@ -165,30 +165,40 @@ SDMAEngine::translate(Addr vaddr, Addr size)
 }
 
 void
-SDMAEngine::registerRLCQueue(Addr doorbell, Addr rb_base, uint32_t size,
-                             Addr rptr_wb_addr)
+SDMAEngine::registerRLCQueue(Addr doorbell, Addr mqdAddr, SDMAQueueDesc *mqd)
 {
+    uint32_t rlc_size = 4UL << bits(mqd->sdmax_rlcx_rb_cntl, 6, 1);
+    Addr rptr_wb_addr = mqd->sdmax_rlcx_rb_rptr_addr_hi;
+    rptr_wb_addr <<= 32;
+    rptr_wb_addr |= mqd->sdmax_rlcx_rb_rptr_addr_lo;
+
     // Get first free RLC
     if (!rlc0.valid()) {
         DPRINTF(SDMAEngine, "Doorbell %lx mapped to RLC0\n", doorbell);
         rlcInfo[0] = doorbell;
         rlc0.valid(true);
-        rlc0.base(rb_base);
+        rlc0.base(mqd->rb_base << 8);
+        rlc0.size(rlc_size);
         rlc0.rptr(0);
-        rlc0.wptr(0);
+        rlc0.incRptr(mqd->rptr);
+        rlc0.setWptr(mqd->wptr);
         rlc0.rptrWbAddr(rptr_wb_addr);
         rlc0.processing(false);
-        rlc0.size(size);
+        rlc0.setMQD(mqd);
+        rlc0.setMQDAddr(mqdAddr);
     } else if (!rlc1.valid()) {
         DPRINTF(SDMAEngine, "Doorbell %lx mapped to RLC1\n", doorbell);
         rlcInfo[1] = doorbell;
         rlc1.valid(true);
-        rlc1.base(rb_base);
+        rlc1.base(mqd->rb_base << 8);
+        rlc1.size(rlc_size);
         rlc1.rptr(0);
-        rlc1.wptr(0);
+        rlc1.incRptr(mqd->rptr);
+        rlc1.setWptr(mqd->wptr);
         rlc1.rptrWbAddr(rptr_wb_addr);
         rlc1.processing(false);
-        rlc1.size(size);
+        rlc1.setMQD(mqd);
+        rlc1.setMQDAddr(mqdAddr);
     } else {
         panic("No free RLCs. Check they are properly unmapped.");
     }
@@ -199,9 +209,37 @@ SDMAEngine::unregisterRLCQueue(Addr doorbell)
 {
     DPRINTF(SDMAEngine, "Unregistering RLC queue at %#lx\n", doorbell);
     if (rlcInfo[0] == doorbell) {
+        SDMAQueueDesc *mqd = rlc0.getMQD();
+        if (mqd) {
+            DPRINTF(SDMAEngine, "Writing RLC0 SDMAMQD back to %#lx\n",
+                    rlc0.getMQDAddr());
+
+            mqd->rptr = rlc0.globalRptr();
+            mqd->wptr = rlc0.getWptr();
+
+            auto cb = new DmaVirtCallback<uint32_t>(
+                [ = ] (const uint32_t &) { });
+            dmaWriteVirt(rlc0.getMQDAddr(), sizeof(SDMAQueueDesc), cb, mqd);
+        } else {
+            warn("RLC0 SDMAMQD address invalid\n");
+        }
         rlc0.valid(false);
         rlcInfo[0] = 0;
     } else if (rlcInfo[1] == doorbell) {
+        SDMAQueueDesc *mqd = rlc1.getMQD();
+        if (mqd) {
+            DPRINTF(SDMAEngine, "Writing RLC1 SDMAMQD back to %#lx\n",
+                    rlc1.getMQDAddr());
+
+            mqd->rptr = rlc1.globalRptr();
+            mqd->wptr = rlc1.getWptr();
+
+            auto cb = new DmaVirtCallback<uint32_t>(
+                [ = ] (const uint32_t &) { });
+            dmaWriteVirt(rlc1.getMQDAddr(), sizeof(SDMAQueueDesc), cb, mqd);
+        } else {
+            warn("RLC1 SDMAMQD address invalid\n");
+        }
         rlc1.valid(false);
         rlcInfo[1] = 0;
     } else {
@@ -213,7 +251,9 @@ void
 SDMAEngine::deallocateRLCQueues()
 {
     for (auto doorbell: rlcInfo) {
-        unregisterRLCQueue(doorbell);
+        if (doorbell) {
+            unregisterRLCQueue(doorbell);
+        }
     }
 }
 
diff --git a/src/dev/amdgpu/sdma_engine.hh b/src/dev/amdgpu/sdma_engine.hh
index 0bfee126c9..27c169193b 100644
--- a/src/dev/amdgpu/sdma_engine.hh
+++ b/src/dev/amdgpu/sdma_engine.hh
@@ -34,6 +34,7 @@
 
 #include "base/bitunion.hh"
 #include "dev/amdgpu/amdgpu_device.hh"
+#include "dev/amdgpu/pm4_queues.hh"
 #include "dev/amdgpu/sdma_packets.hh"
 #include "dev/dma_virt_device.hh"
 #include "params/SDMAEngine.hh"
@@ -65,9 +66,11 @@ class SDMAEngine : public DmaVirtDevice
         SDMAQueue *_parent;
         SDMAQueue *_ib;
         SDMAType _type;
+        SDMAQueueDesc *_mqd;
+        Addr _mqd_addr = 0;
       public:
         SDMAQueue() : _rptr(0), _wptr(0), _valid(false), _processing(false),
-            _parent(nullptr), _ib(nullptr), _type(SDMAGfx) {}
+            _parent(nullptr), _ib(nullptr), _type(SDMAGfx), _mqd(nullptr) {}
 
         Addr base() { return _base; }
         Addr rptr() { return _base + _rptr; }
@@ -82,6 +85,8 @@ class SDMAEngine : public DmaVirtDevice
         SDMAQueue* parent() { return _parent; }
         SDMAQueue* ib() { return _ib; }
         SDMAType queueType() { return _type; }
+        SDMAQueueDesc* getMQD() { return _mqd; }
+        Addr getMQDAddr() { return _mqd_addr; }
 
         void base(Addr value) { _base = value; }
 
@@ -114,6 +119,8 @@ class SDMAEngine : public DmaVirtDevice
         void parent(SDMAQueue* q) { _parent = q; }
         void ib(SDMAQueue* ib) { _ib = ib; }
         void queueType(SDMAType type) { _type = type; }
+        void setMQD(SDMAQueueDesc *mqd) { _mqd = mqd; }
+        void setMQDAddr(Addr mqdAddr) { _mqd_addr = mqdAddr; }
     };
 
     /* SDMA Engine ID */
@@ -280,8 +287,7 @@ class SDMAEngine : public DmaVirtDevice
     /**
      * Methods for RLC queues
      */
-    void registerRLCQueue(Addr doorbell, Addr rb_base, uint32_t size,
-                          Addr rptr_wb_addr);
+    void registerRLCQueue(Addr doorbell, Addr mqdAddr, SDMAQueueDesc *mqd);
     void unregisterRLCQueue(Addr doorbell);
     void deallocateRLCQueues();
 

From 8479a691aa57c8f5763b71d26d579697ae96d007 Mon Sep 17 00:00:00 2001
From: "Bobby R. Bruce" <bbruce@ucdavis.edu>
Date: Wed, 30 Nov 2022 15:02:05 -0800
Subject: [PATCH 033/492] stdlib,python: Allow setting of to tick exits via m5

This commit adds the following functions to the `m5` python module:

- setMaxTick(tick) -> None
- getMaxTick() -> int
- getTicksUntilMax() -> int
- scheduleTickExitFromCurrent(tick, exit_string) -> None
- scheduleTickExitAbsolute(tick, exit_string) -> None

Until this patch the only way to set an exit at a particular tick was
via `simulate.run` which would reschedule the maximum tick. This
functionality has been explicity exposed via the new `setMaxTick`
function. However, as this is only rescheduling the maximum tick, it
stops scheduling exits at multiple different ticks.

To get around this problem the `scheduleTickExit` functions have been
added. These allow a user to schedule multiple exit events. The
functions contain a `exit_string` parameter that provides the string
the simulator is to return when the specified tick is met. By default
this string is "Tick exit reached" which is used by the stdlib
Simulator module to declare a new `SCHEDULED_TICK` exit event (Note:
this has been deliberatly kept seperate from the `MAX_TICK` exit event.
This commit serves as an attempt to decouple these are two concepts).

Tests are provided in this patch to ensure these new functions work as
intended.

Additional notes:
- The `simulate` function has been fixed to match the documentation. If
  the `num_cycles` is -1 then the maximum ticks is set to MaxTicks.
  Otherwise the max ticks is set to `curTicks() + num_cycles`. The
  functionality of this function will remain unchanged to the end-user.
- Full integration into the Simulator module is not complete as of this
  patch. Users must us the m5 python module to set these exit events.

Change-Id: I6c92b31dd409dc866152224600ea8166cfcba38b
Issue-on: https://gem5.atlassian.net/browse/GEM5-1131
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/66231
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
---
 src/python/gem5/simulate/exit_event.py        |   3 +
 src/python/gem5/simulate/simulator.py         |   2 +
 src/python/m5/simulate.py                     |  59 +++++-
 src/python/pybind11/event.cc                  |   4 +
 src/sim/simulate.cc                           |  53 +++++-
 src/sim/simulate.hh                           |  32 +++-
 tests/gem5/to_tick/configs/tick-exit.py       | 100 ++++++++++
 tests/gem5/to_tick/configs/tick-to-max.py     | 123 +++++++++++++
 .../to_tick/ref/tick-exit-10-20-30-40.txt     |   6 +
 tests/gem5/to_tick/ref/tick-exit-100.txt      |   3 +
 .../ref/tick-to-max-at-execution-100.txt      |   4 +
 ...-to-max-at-execution-and-after-100-200.txt |   4 +
 .../ref/tick-to-max-before-execution-250.txt  |   4 +
 tests/gem5/to_tick/test_to_tick.py            | 174 ++++++++++++++++++
 14 files changed, 561 insertions(+), 10 deletions(-)
 create mode 100644 tests/gem5/to_tick/configs/tick-exit.py
 create mode 100644 tests/gem5/to_tick/configs/tick-to-max.py
 create mode 100644 tests/gem5/to_tick/ref/tick-exit-10-20-30-40.txt
 create mode 100644 tests/gem5/to_tick/ref/tick-exit-100.txt
 create mode 100644 tests/gem5/to_tick/ref/tick-to-max-at-execution-100.txt
 create mode 100644 tests/gem5/to_tick/ref/tick-to-max-at-execution-and-after-100-200.txt
 create mode 100644 tests/gem5/to_tick/ref/tick-to-max-before-execution-250.txt
 create mode 100644 tests/gem5/to_tick/test_to_tick.py

diff --git a/src/python/gem5/simulate/exit_event.py b/src/python/gem5/simulate/exit_event.py
index 089017806b..1e14fdd11a 100644
--- a/src/python/gem5/simulate/exit_event.py
+++ b/src/python/gem5/simulate/exit_event.py
@@ -42,6 +42,7 @@ class ExitEvent(Enum):
     SWITCHCPU = "switchcpu"  # An exit needed to switch CPU cores.
     FAIL = "fail"  # An exit because the simulation has failed.
     CHECKPOINT = "checkpoint"  # An exit to load a checkpoint.
+    SCHEDULED_TICK = "scheduled tick exit"
     MAX_TICK = "max tick"  # An exit due to a maximum tick value being met.
     USER_INTERRUPT = (  # An exit due to a user interrupt (e.g., cntr + c)
         "user interupt"
@@ -75,6 +76,8 @@ class ExitEvent(Enum):
             return ExitEvent.EXIT
         elif exit_string == "simulate() limit reached":
             return ExitEvent.MAX_TICK
+        elif exit_string == "Tick exit reached":
+            return ExitEvent.SCHEDULED_TICK
         elif exit_string == "switchcpu":
             return ExitEvent.SWITCHCPU
         elif exit_string == "m5_fail instruction encountered":
diff --git a/src/python/gem5/simulate/simulator.py b/src/python/gem5/simulate/simulator.py
index 1d0d3ecc66..e27679a996 100644
--- a/src/python/gem5/simulate/simulator.py
+++ b/src/python/gem5/simulate/simulator.py
@@ -157,6 +157,7 @@ class Simulator:
             * ExitEvent.WORKEND: exit simulation
             * ExitEvent.USER_INTERRUPT: exit simulation
             * ExitEvent.MAX_TICK: exit simulation
+            * ExitEvent.SCHEDULED_TICK: exit simulation
             * ExitEvent.SIMPOINT_BEGIN: reset stats
             * ExitEvent.MAX_INSTS: exit simulation
 
@@ -197,6 +198,7 @@ class Simulator:
             )(),
             ExitEvent.USER_INTERRUPT: exit_generator(),
             ExitEvent.MAX_TICK: exit_generator(),
+            ExitEvent.SCHEDULED_TICK: exit_generator(),
             ExitEvent.SIMPOINT_BEGIN: warn_default_decorator(
                 reset_stats_generator,
                 "simpoint begin",
diff --git a/src/python/m5/simulate.py b/src/python/m5/simulate.py
index a47d4cacd6..744d95f9f6 100644
--- a/src/python/m5/simulate.py
+++ b/src/python/m5/simulate.py
@@ -54,7 +54,7 @@ from . import params
 from m5.util.dot_writer import do_dot, do_dvfs_dot
 from m5.util.dot_writer_ruby import do_ruby_dot
 
-from .util import fatal
+from .util import fatal, warn
 from .util import attrdict
 
 # define a MaxTick parameter, unsigned 64 bit
@@ -205,6 +205,63 @@ def simulate(*args, **kwargs):
     return sim_out
 
 
+def setMaxTick(tick: int) -> None:
+    """Sets the maximum tick the simulation may run to. When when using the
+    stdlib simulator module, reaching this max tick triggers a
+    `ExitEvent.MAX_TICK` exit event.
+
+    :param tick: the maximum tick (absolute, not relative to the current tick).
+    """
+    if tick <= curTick():
+        warn("Max tick scheduled for the past. This will not be triggered.")
+    _m5.event.setMaxTick(tick=tick)
+
+
+def getMaxTick() -> int:
+    """Returns the current maximum tick."""
+    return _m5.event.getMaxTick()
+
+
+def getTicksUntilMax() -> int:
+    """Returns the current number of ticks until the maximum tick."""
+    return getMaxTick() - curTick()
+
+
+def scheduleTickExitFromCurrent(
+    ticks: int, exit_string: str = "Tick exit reached"
+) -> None:
+    """Schedules a tick exit event from the current tick. I.e., if ticks == 100
+    then an exit event will be scheduled at tick `curTick() + 100`.
+
+    The default `exit_string` value is used by the stdlib Simulator module to
+    declare this exit event as `ExitEvent.SCHEDULED_TICK`.
+
+    :param ticks: The simulation ticks, from `curTick()` to schedule the exit
+    event.
+    :param exit_string: The exit string to return when the exit event is
+    triggered.
+    """
+    scheduleTickExitAbsolute(tick=ticks + curTick(), exit_string=exit_string)
+
+
+def scheduleTickExitAbsolute(
+    tick: int, exit_string: str = "Tick exit reached"
+) -> None:
+    """Schedules a tick exit event using absolute ticks. I.e., if tick == 100
+    then an exit event will be scheduled at tick 100.
+
+    The default `exit_string` value is used by the stdlib Simulator module to
+    declare this exit event as `ExitEvent.SCHEDULED_TICK`.
+
+    :param tick: The absolute simulation tick to schedule the exit event.
+    :param exit_string: The exit string to return when the exit event is
+    triggered.
+    """
+    if tick <= curTick():
+        warn("Tick exit scheduled for the past. This will not be triggered.")
+    _m5.event.scheduleTickExit(tick=tick, exit_string=exit_string)
+
+
 def drain():
     """Drain the simulator in preparation of a checkpoint or memory mode
     switch.
diff --git a/src/python/pybind11/event.cc b/src/python/pybind11/event.cc
index 7a02221611..827768f52f 100644
--- a/src/python/pybind11/event.cc
+++ b/src/python/pybind11/event.cc
@@ -107,6 +107,10 @@ pybind_init_event(py::module_ &m_native)
 
     m.def("simulate", &simulate,
           py::arg("ticks") = MaxTick);
+    m.def("setMaxTick", &set_max_tick, py::arg("tick"));
+    m.def("getMaxTick", &get_max_tick, py::return_value_policy::copy);
+    m.def("scheduleTickExit", &schedule_tick_exit, py::arg("tick"),
+        py::arg("exit_string"));
     m.def("terminateEventQueueThreads", &terminateEventQueueThreads);
     m.def("exitSimLoop", &exitSimLoop);
     m.def("getEventQueue", []() { return curEventQueue(); },
diff --git a/src/sim/simulate.cc b/src/sim/simulate.cc
index 0c30f10570..f147b3ec77 100644
--- a/src/sim/simulate.cc
+++ b/src/sim/simulate.cc
@@ -180,8 +180,8 @@ struct DescheduleDeleter
 };
 
 /** Simulate for num_cycles additional cycles.  If num_cycles is -1
- * (the default), do not limit simulation; some other event must
- * terminate the loop.  Exported to Python.
+ * (the default), we simulate to MAX_TICKS unless the max ticks has been set
+ * via the 'set_max_tick' function prior. This function is exported to Python.
  * @return The SimLoopExitEvent that caused the loop to exit.
  */
 GlobalSimLoopExitEvent *global_exit_event= nullptr;
@@ -191,8 +191,6 @@ simulate(Tick num_cycles)
     if (global_exit_event)//cleaning last global exit event
         global_exit_event->clean();
     std::unique_ptr<GlobalSyncEvent, DescheduleDeleter> quantum_event;
-    const Tick exit_tick = num_cycles < MaxTick - curTick() ?
-                                        curTick() + num_cycles : MaxTick;
 
     inform("Entering event queue @ %d.  Starting simulation...\n", curTick());
 
@@ -200,11 +198,22 @@ simulate(Tick num_cycles)
         simulatorThreads.reset(new SimulatorThreads(numMainEventQueues));
 
     if (!simulate_limit_event) {
-        simulate_limit_event = new GlobalSimLoopExitEvent(
-            mainEventQueue[0]->getCurTick(),
-            "simulate() limit reached", 0);
+        // If the simulate_limit_event is not set, we set it to MaxTick.
+        set_max_tick(MaxTick);
+    }
+
+    if (num_cycles != -1) {
+        // If the user has specified an exit event after X cycles, do so here.
+        // Note: This will override any prior set max_tick behaviour (such as
+        // that above when it is set to MAxTick).
+        const Tick max_tick = num_cycles < MaxTick - curTick() ?
+                                    curTick() + num_cycles : MaxTick;
+
+        // This is kept to `set_max_tick` instead of `schedule_tick_exit` to
+        // preserve backwards functionality. It may be better to deprecate this
+        // behaviour at some point in favor of `schedule_tick_exit`.
+        set_max_tick(max_tick);
     }
-    simulate_limit_event->reschedule(exit_tick);
 
     if (numMainEventQueues > 1) {
         fatal_if(simQuantum == 0,
@@ -234,6 +243,34 @@ simulate(Tick num_cycles)
     return global_exit_event;
 }
 
+void set_max_tick(Tick tick)
+{
+    if (!simulate_limit_event) {
+        simulate_limit_event = new GlobalSimLoopExitEvent(
+            mainEventQueue[0]->getCurTick(),
+            "simulate() limit reached", 0);
+    }
+    simulate_limit_event->reschedule(tick);
+}
+
+
+Tick get_max_tick()
+{
+    if (!simulate_limit_event) {
+        /* If the GlobalSimLoopExitEvent has not been setup, the maximum tick
+         * is `MaxTick` as declared in "src/base/types.hh".
+         */
+        return MaxTick;
+    }
+
+    return simulate_limit_event->when();
+}
+
+void schedule_tick_exit(Tick tick, std::string exit_string)
+{
+    new GlobalSimLoopExitEvent(tick, exit_string, 0);
+}
+
 void
 terminateEventQueueThreads()
 {
diff --git a/src/sim/simulate.hh b/src/sim/simulate.hh
index 5ef499541f..e7c4fa640c 100644
--- a/src/sim/simulate.hh
+++ b/src/sim/simulate.hh
@@ -45,7 +45,37 @@ namespace gem5
 
 class GlobalSimLoopExitEvent;
 
-GlobalSimLoopExitEvent *simulate(Tick num_cycles = MaxTick);
+GlobalSimLoopExitEvent *simulate(Tick num_cycles = -1);
+
+/**
+ * @brief Set the maximum tick.
+ *
+ * This function will schedule, or reschedule, the maximum tick for the
+ * simulation.
+ *
+ * This will setup the GlobalSimLoopExitEvent if it does not already exist.
+ *
+ * @param tick The maximum tick.
+ */
+void set_max_tick(Tick tick);
+
+/**
+ * @brief Get the maximum simulation tick.
+ *
+ *
+ * @returns The maximum simulation tick.
+ */
+Tick get_max_tick();
+
+/**
+ * @brief Schedule an exit event at a particular tick.
+ *
+ * Schedule a tick with a particular exit string.
+ *
+ * @param tick The tick at which the simulation loop should exit.
+ * @param exit_string The exit string explaining the exit.
+ */
+void schedule_tick_exit(Tick tick, std::string exit_string);
 
 /**
  * Terminate helper threads when running in parallel mode.
diff --git a/tests/gem5/to_tick/configs/tick-exit.py b/tests/gem5/to_tick/configs/tick-exit.py
new file mode 100644
index 0000000000..9b412cbfb6
--- /dev/null
+++ b/tests/gem5/to_tick/configs/tick-exit.py
@@ -0,0 +1,100 @@
+# Copyright (c) 2022 The Regents of the University of California
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""
+
+"""
+
+from gem5.resources.resource import Resource
+from gem5.isas import ISA
+from gem5.components.memory import SingleChannelDDR3_1600
+from gem5.components.boards.simple_board import SimpleBoard
+from gem5.components.cachehierarchies.classic.no_cache import NoCache
+from gem5.components.processors.simple_processor import SimpleProcessor
+from gem5.components.processors.cpu_types import CPUTypes
+from gem5.simulate.simulator import Simulator
+from gem5.simulate.exit_event import ExitEvent
+
+import m5
+
+import argparse
+
+parser = argparse.ArgumentParser()
+
+parser.add_argument(
+    "-t",
+    "--tick-exits",
+    type=int,
+    nargs="+",
+    required=True,
+    help="Set the tick exits to exit.",
+)
+
+parser.add_argument(
+    "-r",
+    "--resource-directory",
+    type=str,
+    required=False,
+    help="The directory in which resources will be downloaded or exist.",
+)
+
+args = parser.parse_args()
+
+# Setup the system.
+motherboard = SimpleBoard(
+    clk_freq="3GHz",
+    processor=SimpleProcessor(
+        cpu_type=CPUTypes.TIMING,
+        isa=ISA.X86,
+        num_cores=1,
+    ),
+    memory=SingleChannelDDR3_1600(),
+    cache_hierarchy=NoCache(),
+)
+
+# Set the workload
+binary = Resource(
+    "x86-hello64-static", resource_directory=args.resource_directory
+)
+motherboard.set_se_binary_workload(binary)
+
+
+def scheduled_tick_generator():
+    while True:
+        print(f"Exiting at: {m5.curTick()}")
+        yield False
+
+
+# Run the simulation
+simulator = Simulator(
+    board=motherboard,
+    on_exit_event={ExitEvent.SCHEDULED_TICK: scheduled_tick_generator()},
+)
+
+for tick in args.tick_exits:
+    m5.scheduleTickExitFromCurrent(tick)
+
+simulator.run()
diff --git a/tests/gem5/to_tick/configs/tick-to-max.py b/tests/gem5/to_tick/configs/tick-to-max.py
new file mode 100644
index 0000000000..2b679df412
--- /dev/null
+++ b/tests/gem5/to_tick/configs/tick-to-max.py
@@ -0,0 +1,123 @@
+# Copyright (c) 2022 The Regents of the University of California
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""
+This configuration script is used to test running a simulation to a specified
+maximum tick. This script was setup to test setting the number of ticks to
+run before, at, or after the running of `simulator.run`.
+
+**Note:** There can only ever be one MAX_TICK exit event scheduled at any one
+time.
+"""
+
+from gem5.resources.resource import Resource
+from gem5.isas import ISA
+from gem5.components.memory import SingleChannelDDR3_1600
+from gem5.components.boards.simple_board import SimpleBoard
+from gem5.components.cachehierarchies.classic.no_cache import NoCache
+from gem5.components.processors.simple_processor import SimpleProcessor
+from gem5.components.processors.cpu_types import CPUTypes
+from gem5.simulate.simulator import Simulator
+
+import m5
+
+import argparse
+
+parser = argparse.ArgumentParser()
+
+parser.add_argument(
+    "-b",
+    "--set-ticks-before",
+    type=int,
+    required=False,
+    help="Set the number of ticks to run to prior to executing "
+    "`simulator.run`.",
+)
+
+parser.add_argument(
+    "-e",
+    "--set-ticks-at-execution",
+    type=int,
+    required=False,
+    help="Set the number of ticks to run via `simulator.run`.",
+)
+
+parser.add_argument(
+    "-a",
+    "--set-ticks-after",
+    type=int,
+    required=False,
+    help="Set the number of ticks to run after `simulator.run` has ceased "
+    "execution.",
+)
+
+parser.add_argument(
+    "-r",
+    "--resource-directory",
+    type=str,
+    required=False,
+    help="The directory in which resources will be downloaded or exist.",
+)
+
+args = parser.parse_args()
+
+# Setup the system.
+motherboard = SimpleBoard(
+    clk_freq="3GHz",
+    processor=SimpleProcessor(
+        cpu_type=CPUTypes.TIMING,
+        isa=ISA.X86,
+        num_cores=1,
+    ),
+    memory=SingleChannelDDR3_1600(),
+    cache_hierarchy=NoCache(),
+)
+
+# Set the workload
+binary = Resource(
+    "x86-hello64-static", resource_directory=args.resource_directory
+)
+motherboard.set_se_binary_workload(binary)
+
+# Set the max ticks before setting up the simulation, if applicable.
+if args.set_ticks_before:
+    m5.setMaxTick(args.set_ticks_before)
+
+# Run the simulation
+simulator = Simulator(board=motherboard)
+
+if args.set_ticks_at_execution:
+    simulator.run(max_ticks=args.set_ticks_at_execution)
+else:
+    simulator.run()
+
+# Set the max ticks after the simulator run.
+if args.set_ticks_after:
+    m5.setMaxTick(args.set_ticks_after)
+
+print(f"Current Tick: {m5.curTick()}")
+print(f"Current Max Tick: {m5.getMaxTick()}")
+print(f"Ticks until max: {m5.getTicksUntilMax()}")
diff --git a/tests/gem5/to_tick/ref/tick-exit-10-20-30-40.txt b/tests/gem5/to_tick/ref/tick-exit-10-20-30-40.txt
new file mode 100644
index 0000000000..05f8159065
--- /dev/null
+++ b/tests/gem5/to_tick/ref/tick-exit-10-20-30-40.txt
@@ -0,0 +1,6 @@
+Global frequency set at 1000000000000 ticks per second
+Exiting at: 10
+Exiting at: 20
+Exiting at: 30
+Exiting at: 40
+Hello world!
diff --git a/tests/gem5/to_tick/ref/tick-exit-100.txt b/tests/gem5/to_tick/ref/tick-exit-100.txt
new file mode 100644
index 0000000000..62f9330e13
--- /dev/null
+++ b/tests/gem5/to_tick/ref/tick-exit-100.txt
@@ -0,0 +1,3 @@
+Global frequency set at 1000000000000 ticks per second
+Exiting at: 100
+Hello world!
diff --git a/tests/gem5/to_tick/ref/tick-to-max-at-execution-100.txt b/tests/gem5/to_tick/ref/tick-to-max-at-execution-100.txt
new file mode 100644
index 0000000000..1507716e42
--- /dev/null
+++ b/tests/gem5/to_tick/ref/tick-to-max-at-execution-100.txt
@@ -0,0 +1,4 @@
+Global frequency set at 1000000000000 ticks per second
+Current Tick: 100
+Current Max Tick: 100
+Ticks until max: 0
diff --git a/tests/gem5/to_tick/ref/tick-to-max-at-execution-and-after-100-200.txt b/tests/gem5/to_tick/ref/tick-to-max-at-execution-and-after-100-200.txt
new file mode 100644
index 0000000000..b1cde8ae4c
--- /dev/null
+++ b/tests/gem5/to_tick/ref/tick-to-max-at-execution-and-after-100-200.txt
@@ -0,0 +1,4 @@
+Global frequency set at 1000000000000 ticks per second
+Current Tick: 100
+Current Max Tick: 200
+Ticks until max: 100
diff --git a/tests/gem5/to_tick/ref/tick-to-max-before-execution-250.txt b/tests/gem5/to_tick/ref/tick-to-max-before-execution-250.txt
new file mode 100644
index 0000000000..b26e9ebee2
--- /dev/null
+++ b/tests/gem5/to_tick/ref/tick-to-max-before-execution-250.txt
@@ -0,0 +1,4 @@
+Global frequency set at 1000000000000 ticks per second
+Current Tick: 250
+Current Max Tick: 250
+Ticks until max: 0
diff --git a/tests/gem5/to_tick/test_to_tick.py b/tests/gem5/to_tick/test_to_tick.py
new file mode 100644
index 0000000000..ba5bcbf9b9
--- /dev/null
+++ b/tests/gem5/to_tick/test_to_tick.py
@@ -0,0 +1,174 @@
+# Copyright (c) 2022 The Regents of the University of California
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from testlib import *
+
+if config.bin_path:
+    resource_path = config.bin_path
+else:
+    resource_path = joinpath(absdirpath(__file__), "..", "resources")
+
+# This test sets the tick to max tick via the `simulator.run` function. This is
+# set to 100. Therefore, at the end of the execution the expected current tick
+# should be 100, with the max tick still 100. The number of expected ticks to
+# max is therefore 0.
+gem5_verify_config(
+    name="test-to-max-tick-at-execution-100",
+    verifiers=[
+        verifier.MatchStdoutNoPerf(
+            joinpath(getcwd(), "ref", "tick-to-max-at-execution-100.txt")
+        )
+    ],
+    fixtures=(),
+    config=joinpath(
+        config.base_dir,
+        "tests",
+        "gem5",
+        "to_tick",
+        "configs",
+        "tick-to-max.py",
+    ),
+    config_args=[
+        "--resource-directory",
+        resource_path,
+        "--set-ticks-at-execution",
+        "100",
+    ],
+    valid_isas=(constants.all_compiled_tag,),
+    valid_hosts=constants.supported_hosts,
+    length=constants.quick_tag,
+)
+
+# This test sets the max tick  via the `simulator.run` function at tick 100.
+# The `m5.setMaxTick` function is then called after, passing the value 200 .
+# This means at the end of execution the current tick is 100, and the max tick
+# is 200. The number of expected ticks to max is therefore 100.
+gem5_verify_config(
+    name="test-to-max-tick-at-execution-and-after-100-200",
+    verifiers=[
+        verifier.MatchStdoutNoPerf(
+            joinpath(
+                getcwd(),
+                "ref",
+                "tick-to-max-at-execution-and-after-100-200.txt",
+            )
+        )
+    ],
+    fixtures=(),
+    config=joinpath(
+        config.base_dir,
+        "tests",
+        "gem5",
+        "to_tick",
+        "configs",
+        "tick-to-max.py",
+    ),
+    config_args=[
+        "--resource-directory",
+        resource_path,
+        "--set-ticks-at-execution",
+        "100",
+        "--set-ticks-after",
+        "200",
+    ],
+    valid_isas=(constants.all_compiled_tag,),
+    valid_hosts=constants.supported_hosts,
+    length=constants.quick_tag,
+)
+
+# This test sets the max tick to 250 via the `m5.setMaxTick` prior to running
+# `simulator.run`. This means at the end of execution the current tick is 250
+# and the max tick is 250. The expected number of ticks to max is therefore 0.
+gem5_verify_config(
+    name="test-to-max-tick-before-execution-250",
+    verifiers=[
+        verifier.MatchStdoutNoPerf(
+            joinpath(getcwd(), "ref", "tick-to-max-before-execution-250.txt")
+        )
+    ],
+    fixtures=(),
+    config=joinpath(
+        config.base_dir,
+        "tests",
+        "gem5",
+        "to_tick",
+        "configs",
+        "tick-to-max.py",
+    ),
+    config_args=[
+        "--resource-directory",
+        resource_path,
+        "--set-ticks-before",
+        "250",
+    ],
+    valid_isas=(constants.all_compiled_tag,),
+    valid_hosts=constants.supported_hosts,
+    length=constants.quick_tag,
+)
+
+# Tests the scheduling of a tick exit event at tick 100.
+gem5_verify_config(
+    name="test-to-tick-exit-100",
+    verifiers=[
+        verifier.MatchStdoutNoPerf(
+            joinpath(getcwd(), "ref", "tick-exit-100.txt")
+        )
+    ],
+    fixtures=(),
+    config=joinpath(
+        config.base_dir, "tests", "gem5", "to_tick", "configs", "tick-exit.py"
+    ),
+    config_args=["--resource-directory", resource_path, "--tick-exits", "100"],
+    valid_isas=(constants.all_compiled_tag,),
+    valid_hosts=constants.supported_hosts,
+    length=constants.quick_tag,
+)
+
+# Tests the scheduling of a tick exit event at tick 10, 20, 30, and 40.
+gem5_verify_config(
+    name="test-to-tick-exit-10-20-30-40",
+    verifiers=[
+        verifier.MatchStdoutNoPerf(
+            joinpath(getcwd(), "ref", "tick-exit-10-20-30-40.txt")
+        )
+    ],
+    fixtures=(),
+    config=joinpath(
+        config.base_dir, "tests", "gem5", "to_tick", "configs", "tick-exit.py"
+    ),
+    config_args=[
+        "--resource-directory",
+        resource_path,
+        "--tick-exits",
+        "10",
+        "20",
+        "30",
+        "40",
+    ],
+    valid_isas=(constants.all_compiled_tag,),
+    valid_hosts=constants.supported_hosts,
+    length=constants.quick_tag,
+)

From da83764f9438ad0d81f0934567931d4a595f26dd Mon Sep 17 00:00:00 2001
From: Melissa Jost <melissakjost@gmail.com>
Date: Tue, 18 Oct 2022 16:04:18 -0700
Subject: [PATCH 034/492] stdlib, configs: Updating
 configs/example/gem5_library

This commit updates all of the older tests in this directory to
use the Simulator to run instead of m5.simulate()

Change-Id: I2a81d5c2f27c89e8c03abb0203ca3e58a6688672
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/64791
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
Tested-by: kokoro <noreply+kokoro@google.com>
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
---
 .../gem5_library/x86-gapbs-benchmarks.py      | 101 +++---------
 .../gem5_library/x86-npb-benchmarks.py        | 145 ++++++------------
 .../gem5_library/x86-parsec-benchmarks.py     | 110 +++----------
 .../x86-spec-cpu2006-benchmarks.py            | 117 +++-----------
 .../x86-spec-cpu2017-benchmarks.py            | 109 +++----------
 5 files changed, 144 insertions(+), 438 deletions(-)

diff --git a/configs/example/gem5_library/x86-gapbs-benchmarks.py b/configs/example/gem5_library/x86-gapbs-benchmarks.py
index bdc0d9427d..638d34b599 100644
--- a/configs/example/gem5_library/x86-gapbs-benchmarks.py
+++ b/configs/example/gem5_library/x86-gapbs-benchmarks.py
@@ -64,8 +64,8 @@ from gem5.components.processors.cpu_types import CPUTypes
 from gem5.isas import ISA
 from gem5.coherence_protocol import CoherenceProtocol
 from gem5.resources.resource import Resource
-
-from m5.stats.gem5stats import get_simstat
+from gem5.simulate.simulator import Simulator
+from gem5.simulate.exit_event import ExitEvent
 
 requires(
     isa_required=ISA.X86,
@@ -210,14 +210,26 @@ board.set_kernel_disk_workload(
     readfile_contents=command,
 )
 
-root = Root(full_system=True, system=board)
 
-# sim_quantum must be set when KVM cores are used.
+def handle_exit():
+    print("Done booting Linux")
+    print("Resetting stats at the start of ROI!")
+    m5.stats.reset()
+    global start_tick
+    start_tick = m5.curTick()
+    processor.switch()
+    yield False  # E.g., continue the simulation.
+    print("Dump stats at the end of the ROI!")
+    m5.stats.dump()
+    yield True  # Stop the simulation. We're done.
 
-root.sim_quantum = int(1e9)
 
-board._pre_instantiate()
-m5.instantiate()
+simulator = Simulator(
+    board=board,
+    on_exit_event={
+        ExitEvent.EXIT: handle_exit(),
+    },
+)
 
 # We maintain the wall clock time.
 
@@ -232,74 +244,8 @@ print("Using KVM cpu")
 # the first ROI annotation in details. The X86Board currently does not support
 #  `work items started count reached`.
 
-exit_event = m5.simulate()
-
-# The first exit_event ends with a `workbegin` cause. This means that the
-# system started successfully and the execution on the program started. The
-# ROI begin is encountered.
-
-if exit_event.getCause() == "workbegin":
-
-    print("Done booting Linux")
-    print("Resetting stats at the start of ROI!")
-
-    m5.stats.reset()
-    start_tick = m5.curTick()
-
-    # We have completed up to this step using KVM cpu. Now we switch to timing
-    # cpu for detailed simulation.
-
-    processor.switch()
-else:
-    print("Unexpected termination of simulation before ROI was reached!")
-    print(
-        "Exiting @ tick {} because {}.".format(
-            m5.curTick(), exit_event.getCause()
-        )
-    )
-    exit(-1)
-
-# The next exit_event is to simulate the ROI. It should be exited with a cause
-# marked by `workend`. This implies that the first annotation is successfully
-# completed.
-
-exit_event = m5.simulate()
-
-# Reached the end of first ROI.
-# We dump the stats here.
-
-# We exepect that ROI ends with `workend`. Otherwise the simulation ended
-# unexpectedly.
-if exit_event.getCause() == "workend":
-    print("Dump stats at the end of the ROI!")
-
-    m5.stats.dump()
-    end_tick = m5.curTick()
-else:
-    print("Unexpected termination of simulation while ROI was being executed!")
-    print(
-        "Exiting @ tick {} because {}.".format(
-            m5.curTick(), exit_event.getCause()
-        )
-    )
-    exit(-1)
-
-# We get simInsts using get_simstat and output it in the final print statement.
-
-gem5stats = get_simstat(root)
-
-# We get the number of committed instructions from the timing cores. We then
-# sum and print them at the end.
-
-roi_insts = float(
-    gem5stats.to_json()["system"]["processor"]["switch0"]["core"][
-        "exec_context.thread_0"
-    ]["numInsts"]["value"]
-) + float(
-    gem5stats.to_json()["system"]["processor"]["switch1"]["core"][
-        "exec_context.thread_0"
-    ]["numInsts"]["value"]
-)
+simulator.run()
+end_tick = m5.curTick()
 # Since we simulated the ROI in details, therefore, simulation is over at this
 # point.
 
@@ -313,8 +259,9 @@ print()
 print("Performance statistics:")
 
 print("Simulated time in ROI: %.2fs" % ((end_tick - start_tick) / 1e12))
-print("Instructions executed in ROI: %d" % ((roi_insts)))
-print("Ran a total of", m5.curTick() / 1e12, "simulated seconds")
+print(
+    "Ran a total of", simulator.get_current_tick() / 1e12, "simulated seconds"
+)
 print(
     "Total wallclock time: %.2fs, %.2f min"
     % (time.time() - globalStart, (time.time() - globalStart) / 60)
diff --git a/configs/example/gem5_library/x86-npb-benchmarks.py b/configs/example/gem5_library/x86-npb-benchmarks.py
index 385760c7a7..2cb314303f 100644
--- a/configs/example/gem5_library/x86-npb-benchmarks.py
+++ b/configs/example/gem5_library/x86-npb-benchmarks.py
@@ -61,6 +61,8 @@ from gem5.components.processors.cpu_types import CPUTypes
 from gem5.isas import ISA
 from gem5.coherence_protocol import CoherenceProtocol
 from gem5.resources.resource import Resource
+from gem5.simulate.simulator import Simulator
+from gem5.simulate.simulator import ExitEvent
 
 from m5.stats.gem5stats import get_simstat
 from m5.util import warn
@@ -209,17 +211,47 @@ board.set_kernel_disk_workload(
     readfile_contents=command,
 )
 
-# We need this for long running processes.
-m5.disableAllListeners()
+# The first exit_event ends with a `workbegin` cause. This means that the
+# system started successfully and the execution on the program started.
+def handle_workbegin():
+    print("Done booting Linux")
+    print("Resetting stats at the start of ROI!")
 
-root = Root(full_system=True, system=board)
+    m5.stats.reset()
 
-# sim_quantum must be set when KVM cores are used.
+    # We have completed up to this step using KVM cpu. Now we switch to timing
+    # cpu for detailed simulation.
 
-root.sim_quantum = int(1e9)
+    # # Next, we need to check if the user passed a value for --ticks. If yes,
+    # then we limit out execution to this number of ticks during the ROI.
+    # Otherwise, we simulate until the ROI ends.
+    processor.switch()
+    if args.ticks:
+        # schedule an exit event for this amount of ticks in the future.
+        # The simulation will then continue.
+        m5.scheduleTickExitFromCurrent(args.ticks)
+    yield False
 
-board._pre_instantiate()
-m5.instantiate()
+
+# The next exit_event is to simulate the ROI. It should be exited with a cause
+# marked by `workend`.
+
+# We exepect that ROI ends with `workend` or `simulate() limit reached`.
+# Otherwise the simulation ended unexpectedly.
+def handle_workend():
+    print("Dump stats at the end of the ROI!")
+
+    m5.stats.dump()
+    yield False
+
+
+simulator = Simulator(
+    board=board,
+    on_exit_event={
+        ExitEvent.WORKBEGIN: handle_workbegin(),
+        ExitEvent.WORKEND: handle_workend(),
+    },
+)
 
 # We maintain the wall clock time.
 
@@ -229,96 +261,12 @@ print("Running the simulation")
 print("Using KVM cpu")
 
 # We start the simulation.
-
-exit_event = m5.simulate()
-
-# The first exit_event ends with a `workbegin` cause. This means that the
-# system started successfully and the execution on the program started.
-
-if exit_event.getCause() == "workbegin":
-
-    print("Done booting Linux")
-    print("Resetting stats at the start of ROI!")
-
-    m5.stats.reset()
-    start_tick = m5.curTick()
-
-    # We have completed up to this step using KVM cpu. Now we switch to timing
-    # cpu for detailed simulation.
-
-    processor.switch()
-else:
-    # `workbegin` call was never encountered.
-
-    print("Unexpected termination of simulation before ROI was reached!")
-    print(
-        "Exiting @ tick {} because {}.".format(
-            m5.curTick(), exit_event.getCause()
-        )
-    )
-    exit(-1)
-
-# The next exit_event is to simulate the ROI. It should be exited with a cause
-# marked by `workend`.
-
-# Next, we need to check if the user passed a value for --ticks. If yes,
-# then we limit out execution to this number of ticks during the ROI.
-# Otherwise, we simulate until the ROI ends.
-if args.ticks:
-    exit_event = m5.simulate(args.ticks)
-else:
-    exit_event = m5.simulate()
-
-
-# Reached the end of ROI.
-# We dump the stats here.
-
-# We exepect that ROI ends with `workend` or `simulate() limit reached`.
-# Otherwise the simulation ended unexpectedly.
-if exit_event.getCause() == "workend":
-    print("Dump stats at the end of the ROI!")
-
-    m5.stats.dump()
-    end_tick = m5.curTick()
-elif (
-    exit_event.getCause() == "simulate() limit reached"
-    and args.ticks is not None
-):
-    print("Dump stats at the end of {} ticks in the ROI".format(args.ticks))
-
-    m5.stats.dump()
-    end_tick = m5.curTick()
-else:
-    print("Unexpected termination of simulation while ROI was being executed!")
-    print(
-        "Exiting @ tick {} because {}.".format(
-            m5.curTick(), exit_event.getCause()
-        )
-    )
-    exit(-1)
+simulator.run()
 
 # We need to note that the benchmark is not executed completely till this
 # point, but, the ROI has. We collect the essential statistics here before
 # resuming the simulation again.
 
-# We get simInsts using get_simstat and output it in the final
-# print statement.
-
-gem5stats = get_simstat(root)
-
-# We get the number of committed instructions from the timing
-# cores. We then sum and print them at the end.
-
-roi_insts = float(
-    gem5stats.to_json()["system"]["processor"]["switch0"]["core"][
-        "exec_context.thread_0"
-    ]["numInsts"]["value"]
-) + float(
-    gem5stats.to_json()["system"]["processor"]["switch1"]["core"][
-        "exec_context.thread_0"
-    ]["numInsts"]["value"]
-)
-
 # Simulation is over at this point. We acknowledge that all the simulation
 # events were successful.
 print("All simulation events were successful.")
@@ -328,9 +276,16 @@ print("Done with the simulation")
 print()
 print("Performance statistics:")
 
-print("Simulated time in ROI: %.2fs" % ((end_tick - start_tick) / 1e12))
-print("Instructions executed in ROI: %d" % ((roi_insts)))
-print("Ran a total of", m5.curTick() / 1e12, "simulated seconds")
+# manually calculate ROI time if ticks arg is used in case the
+# entire ROI wasn't simulated
+if args.ticks:
+    print(f"Simulated time in ROI (to tick): {args.ticks/ 1e12}s")
+else:
+    print(f"Simulated time in ROI: {simulator.get_roi_ticks()[0] / 1e12}s")
+
+print(
+    f"Ran a total of {simulator.get_current_tick() / 1e12} simulated seconds"
+)
 print(
     "Total wallclock time: %.2fs, %.2f min"
     % (time.time() - globalStart, (time.time() - globalStart) / 60)
diff --git a/configs/example/gem5_library/x86-parsec-benchmarks.py b/configs/example/gem5_library/x86-parsec-benchmarks.py
index 82183802c7..190c0a0980 100644
--- a/configs/example/gem5_library/x86-parsec-benchmarks.py
+++ b/configs/example/gem5_library/x86-parsec-benchmarks.py
@@ -60,8 +60,8 @@ from gem5.components.processors.cpu_types import CPUTypes
 from gem5.isas import ISA
 from gem5.coherence_protocol import CoherenceProtocol
 from gem5.resources.resource import Resource
-
-from m5.stats.gem5stats import get_simstat
+from gem5.simulate.simulator import Simulator
+from gem5.simulate.exit_event import ExitEvent
 
 # We check for the required gem5 build.
 
@@ -195,17 +195,28 @@ board.set_kernel_disk_workload(
     readfile_contents=command,
 )
 
-# We need this for long running processes.
-m5.disableAllListeners()
+# functions to handle different exit events during the simuation
+def handle_workbegin():
+    print("Done booting Linux")
+    print("Resetting stats at the start of ROI!")
+    m5.stats.reset()
+    processor.switch()
+    yield False
 
-root = Root(full_system=True, system=board)
 
-# sim_quantum must be set if KVM cores are used.
+def handle_workend():
+    print("Dump stats at the end of the ROI!")
+    m5.stats.dump()
+    yield True
 
-root.sim_quantum = int(1e9)
 
-board._pre_instantiate()
-m5.instantiate()
+simulator = Simulator(
+    board=board,
+    on_exit_event={
+        ExitEvent.WORKBEGIN: handle_workbegin(),
+        ExitEvent.WORKEND: handle_workend(),
+    },
+)
 
 # We maintain the wall clock time.
 
@@ -214,83 +225,11 @@ globalStart = time.time()
 print("Running the simulation")
 print("Using KVM cpu")
 
-start_tick = m5.curTick()
-end_tick = m5.curTick()
 m5.stats.reset()
 
 # We start the simulation
+simulator.run()
 
-exit_event = m5.simulate()
-
-# The first exit_event ends with a `workbegin` cause. This means that the
-# system booted successfully and the execution on the program started.
-
-if exit_event.getCause() == "workbegin":
-
-    print("Done booting Linux")
-    print("Resetting stats at the start of ROI!")
-
-    m5.stats.reset()
-    start_tick = m5.curTick()
-
-    # We have completed up to this step using KVM cpu. Now we switch to timing
-    # cpu for detailed simulation.
-
-    processor.switch()
-else:
-    # `workbegin` call was never encountered.
-
-    print("Unexpected termination of simulation before ROI was reached!")
-    print(
-        "Exiting @ tick {} because {}.".format(
-            m5.curTick(), exit_event.getCause()
-        )
-    )
-    exit(-1)
-
-# The next exit_event is to simulate the ROI. It should be exited with a cause
-# marked by `workend`.
-
-exit_event = m5.simulate()
-
-# Reached the end of ROI.
-# We dump the stats here.
-
-# We exepect that ROI ends with `workend`. Otherwise the simulation ended
-# unexpectedly.
-if exit_event.getCause() == "workend":
-    print("Dump stats at the end of the ROI!")
-
-    m5.stats.dump()
-    end_tick = m5.curTick()
-else:
-    print("Unexpected termination of simulation while ROI was being executed!")
-    print(
-        "Exiting @ tick {} because {}.".format(
-            m5.curTick(), exit_event.getCause()
-        )
-    )
-    exit(-1)
-
-# ROI has ended here, and we get `simInsts` using get_simstat and print it in
-# the final print statement.
-
-gem5stats = get_simstat(root)
-
-# We get the number of committed instructions from the timing
-# cores. We then sum and print them at the end.
-roi_insts = float(
-    gem5stats.to_json()["system"]["processor"]["switch0"]["core"][
-        "exec_context.thread_0"
-    ]["numInsts"]["value"]
-) + float(
-    gem5stats.to_json()["system"]["processor"]["switch1"]["core"][
-        "exec_context.thread_0"
-    ]["numInsts"]["value"]
-)
-
-# Simulation is over at this point. We acknowledge that all the simulation
-# events were successful.
 print("All simulation events were successful.")
 
 # We print the final simulation statistics.
@@ -299,9 +238,10 @@ print("Done with the simulation")
 print()
 print("Performance statistics:")
 
-print("Simulated time in ROI: %.2fs" % ((end_tick - start_tick) / 1e12))
-print("Instructions executed in ROI: %d" % ((roi_insts)))
-print("Ran a total of", m5.curTick() / 1e12, "simulated seconds")
+print("Simulated time in ROI: " + ((str(simulator.get_roi_ticks()[0]))))
+print(
+    "Ran a total of", simulator.get_current_tick() / 1e12, "simulated seconds"
+)
 print(
     "Total wallclock time: %.2fs, %.2f min"
     % (time.time() - globalStart, (time.time() - globalStart) / 60)
diff --git a/configs/example/gem5_library/x86-spec-cpu2006-benchmarks.py b/configs/example/gem5_library/x86-spec-cpu2006-benchmarks.py
index d656e61145..8f39f49e2e 100644
--- a/configs/example/gem5_library/x86-spec-cpu2006-benchmarks.py
+++ b/configs/example/gem5_library/x86-spec-cpu2006-benchmarks.py
@@ -66,6 +66,8 @@ from gem5.components.processors.cpu_types import CPUTypes
 from gem5.isas import ISA
 from gem5.coherence_protocol import CoherenceProtocol
 from gem5.resources.resource import Resource, CustomDiskImageResource
+from gem5.simulate.simulator import Simulator
+from gem5.simulate.exit_event import ExitEvent
 
 from m5.stats.gem5stats import get_simstat
 from m5.util import warn
@@ -265,17 +267,23 @@ board.set_kernel_disk_workload(
     readfile_contents=command,
 )
 
-# We need this for long running processes.
-m5.disableAllListeners()
 
-root = Root(full_system=True, system=board)
+def handle_exit():
+    print("Done bootling Linux")
+    print("Resetting stats at the start of ROI!")
+    m5.stats.reset()
+    yield False  # E.g., continue the simulation.
+    print("Dump stats at the end of the ROI!")
+    m5.stats.dump()
+    yield True  # Stop the simulation. We're done.
 
-# sim_quantum must be set when KVM cores are used.
 
-root.sim_quantum = int(1e9)
-
-board._pre_instantiate()
-m5.instantiate()
+simulator = Simulator(
+    board=board,
+    on_exit_event={
+        ExitEvent.EXIT: handle_exit(),
+    },
+)
 
 # We maintain the wall clock time.
 
@@ -284,92 +292,10 @@ globalStart = time.time()
 print("Running the simulation")
 print("Using KVM cpu")
 
-start_tick = m5.curTick()
-end_tick = m5.curTick()
 m5.stats.reset()
 
-exit_event = m5.simulate()
-
-if exit_event.getCause() == "m5_exit instruction encountered":
-    # We have completed booting the OS using KVM cpu
-    # Reached the start of ROI
-
-    print("Done booting Linux")
-    print("Resetting stats at the start of ROI!")
-
-    m5.stats.reset()
-    start_tick = m5.curTick()
-
-    # We switch to timing cpu for detailed simulation.
-
-    processor.switch()
-else:
-    # `m5_exit instruction encountered` was never reached
-
-    print("Unexpected termination of simulation before ROI was reached!")
-    print(
-        "Exiting @ tick {} because {}.".format(
-            m5.curTick(), exit_event.getCause()
-        )
-    )
-    exit(-1)
-
-# Simulate the ROI
-exit_event = m5.simulate()
-
-# Reached the end of ROI
-gem5stats = get_simstat(root)
-
-# We get the number of committed instructions from the timing
-# cores. We then sum and print them at the end.
-
-roi_insts = float(
-    json.loads(gem5stats.dumps())["system"]["processor"]["cores2"]["core"][
-        "exec_context.thread_0"
-    ]["numInsts"]["value"]
-) + float(
-    json.loads(gem5stats.dumps())["system"]["processor"]["cores3"]["core"][
-        "exec_context.thread_0"
-    ]["numInsts"]["value"]
-)
-
-if exit_event.getCause() == "m5_exit instruction encountered":
-    print("Dump stats at the end of the ROI!")
-    m5.stats.dump()
-    end_tick = m5.curTick()
-    m5.stats.reset()
-
-else:
-    # `m5_exit instruction encountered` was never reached
-
-    print("Unexpected termination of simulation while ROI was being executed!")
-    print(
-        "Exiting @ tick {} because {}.".format(
-            m5.curTick(), exit_event.getCause()
-        )
-    )
-    exit(-1)
-
-# We need to copy back the contents of the `speclogs' directory to
-# m5.options.outdir
-
-exit_event = m5.simulate()
-
-if exit_event.getCause() == "m5_exit instruction encountered":
-    print("Output logs copied!")
-
-else:
-    print("Unexpected termination of simulation while copying speclogs!")
-    print(
-        "Exiting @ tick {} because {}.".format(
-            m5.curTick(), exit_event.getCause()
-        )
-    )
-    exit(-1)
-
-m5.stats.dump()
-end_tick = m5.curTick()
-m5.stats.reset()
+# We start the simulation
+simulator.run()
 
 # Simulation is over at this point. We acknowledge that all the simulation
 # events were successful.
@@ -378,9 +304,10 @@ print("All simulation events were successful.")
 
 print("Performance statistics:")
 
-print("Simulated time: %.2fs" % ((end_tick - start_tick) / 1e12))
-print("Instructions executed: %d" % ((roi_insts)))
-print("Ran a total of", m5.curTick() / 1e12, "simulated seconds")
+print("Simulated time: " + ((str(simulator.get_roi_ticks()[0]))))
+print(
+    "Ran a total of", simulator.get_current_tick() / 1e12, "simulated seconds"
+)
 print(
     "Total wallclock time: %.2fs, %.2f min"
     % (time.time() - globalStart, (time.time() - globalStart) / 60)
diff --git a/configs/example/gem5_library/x86-spec-cpu2017-benchmarks.py b/configs/example/gem5_library/x86-spec-cpu2017-benchmarks.py
index 2bc948aea1..c4af7f5dd9 100644
--- a/configs/example/gem5_library/x86-spec-cpu2017-benchmarks.py
+++ b/configs/example/gem5_library/x86-spec-cpu2017-benchmarks.py
@@ -64,6 +64,8 @@ from gem5.components.processors.cpu_types import CPUTypes
 from gem5.isas import ISA
 from gem5.coherence_protocol import CoherenceProtocol
 from gem5.resources.resource import Resource, CustomDiskImageResource
+from gem5.simulate.simulator import Simulator
+from gem5.simulate.exit_event import ExitEvent
 
 from m5.stats.gem5stats import get_simstat
 from m5.util import warn
@@ -281,17 +283,23 @@ board.set_kernel_disk_workload(
     readfile_contents=command,
 )
 
-# We need this for long running processes.
-m5.disableAllListeners()
 
-root = Root(full_system=True, system=board)
+def handle_exit():
+    print("Done bootling Linux")
+    print("Resetting stats at the start of ROI!")
+    m5.stats.reset()
+    yield False  # E.g., continue the simulation.
+    print("Dump stats at the end of the ROI!")
+    m5.stats.dump()
+    yield True  # Stop the simulation. We're done.
 
-# sim_quantum must be set when KVM cores are used.
 
-root.sim_quantum = int(1e9)
-
-board._pre_instantiate()
-m5.instantiate()
+simulator = Simulator(
+    board=board,
+    on_exit_event={
+        ExitEvent.EXIT: handle_exit(),
+    },
+)
 
 # We maintain the wall clock time.
 
@@ -300,92 +308,21 @@ globalStart = time.time()
 print("Running the simulation")
 print("Using KVM cpu")
 
-start_tick = m5.curTick()
-end_tick = m5.curTick()
 m5.stats.reset()
 
-exit_event = m5.simulate()
+# We start the simulation
+simulator.run()
 
-if exit_event.getCause() == "m5_exit instruction encountered":
-    # We have completed booting the OS using KVM cpu
-    # Reached the start of ROI
-
-    print("Done booting Linux")
-    print("Resetting stats at the start of ROI!")
-
-    m5.stats.reset()
-    start_tick = m5.curTick()
-
-    # We switch to timing cpu for detailed simulation.
-
-    processor.switch()
-else:
-    print("Unexpected termination of simulation before ROI was reached!")
-    print(
-        "Exiting @ tick {} because {}.".format(
-            m5.curTick(), exit_event.getCause()
-        )
-    )
-    exit(-1)
-
-# Simulate the ROI
-exit_event = m5.simulate()
-
-# Reached the end of ROI
-gem5stats = get_simstat(root)
-
-# We get the number of committed instructions from the timing
-# cores. We then sum and print them at the end.
-
-roi_insts = float(
-    json.loads(gem5stats.dumps())["system"]["processor"]["cores2"]["core"][
-        "exec_context.thread_0"
-    ]["numInsts"]["value"]
-) + float(
-    json.loads(gem5stats.dumps())["system"]["processor"]["cores3"]["core"][
-        "exec_context.thread_0"
-    ]["numInsts"]["value"]
-)
-
-if exit_event.getCause() == "m5_exit instruction encountered":
-    print("Dump stats at the end of the ROI!")
-    m5.stats.dump()
-    end_tick = m5.curTick()
-    m5.stats.reset()
-
-else:
-    print("Unexpected termination of simulation while ROI was being executed!")
-    print(
-        "Exiting @ tick {} because {}.".format(
-            m5.curTick(), exit_event.getCause()
-        )
-    )
-    exit(-1)
-
-# We need to copy back the contents of the `speclogs' directory to
-# m5.options.outdir
-
-exit_event = m5.simulate()
-
-if exit_event.getCause() == "m5_exit instruction encountered":
-    print("Output logs copied!")
-
-else:
-    print("Unexpected termination of simulation while copying speclogs!")
-    print(
-        "Exiting @ tick {} because {}.".format(
-            m5.curTick(), exit_event.getCause()
-        )
-    )
-    exit(-1)
+# We print the final simulation statistics.
 
 print("Done with the simulation")
 print()
 print("Performance statistics:")
 
-print("Simulated time in ROI: %.2fs" % ((end_tick - start_tick) / 1e12))
-print("Instructions executed in ROI: %d" % ((roi_insts)))
-print("Ran a total of", m5.curTick() / 1e12, "simulated seconds")
+print("Simulated time in ROI: " + ((str(simulator.get_roi_ticks()[0]))))
+print(
+    "Ran a total of", simulator.get_current_tick() / 1e12, "simulated seconds"
+)
 print(
     "Total wallclock time: %.2fs, %.2f min"
     % (time.time() - globalStart, (time.time() - globalStart) / 60)

From 749c4779f4644b6660ba19636ba69ab081e33222 Mon Sep 17 00:00:00 2001
From: Roger Chang <rogerycchang@google.com>
Date: Mon, 28 Nov 2022 07:21:14 +0000
Subject: [PATCH 035/492] arch-riscv: Add basic features toward rv32 support

Various changes to support rv32:
1. Add riscv_bits field into RiscvISA to switch rv_type
2. Add rv_type field into ExtMachInst
3. Split various constants into rv32/rv64 version
4. Fix mcause/mstatus/misa setting per rv_type
5. Split RiscvCPU into rv32/rv64
6. Fix how reset/branch create new pc so rv_type is preserved
7. Tag gdb-xml only for rv64

TODO:
Add rv32 gdb-xml
Add rv32 implementation into decoder

Currently there're three places where we store the rv_type information
(1) ISA (2) PCState (3) ExtMachInst. In theory, the ISA should be the
source of truth, and propagates information into PCState, then Inst.

However, there is an API on RiscvProcess that let users modify the
rv_type in PCState, so there's a chance to get inconsistent rv_type. We
should either modify the structure so such kind of usage is well
supported, or just prohibit people from setting a different rv_type.

Change-Id: If5685ae60f8d18f4f2e18137e235989e63156404
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/63091
Reviewed-by: Yu-hsin Wang <yuhsingw@google.com>
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/arch/riscv/RiscvCPU.py                    |  29 ++++
 src/arch/riscv/RiscvISA.py                    |   6 +
 src/arch/riscv/SConscript                     |   3 +-
 src/arch/riscv/decoder.cc                     |   1 +
 src/arch/riscv/faults.cc                      |   9 +-
 src/arch/riscv/gdb-xml/SConscript             |   8 +-
 .../gdb-xml/{riscv.xml => riscv-64bit.xml}    |   0
 src/arch/riscv/insts/standard.hh              |   4 +-
 src/arch/riscv/insts/unknown.hh               |   2 +-
 src/arch/riscv/isa.cc                         |  44 ++++--
 src/arch/riscv/isa.hh                         |   9 +-
 src/arch/riscv/isa/bitfields.isa              |   1 +
 src/arch/riscv/isa/formats/basic.isa          |   4 +-
 src/arch/riscv/isa/formats/compressed.isa     |   2 +-
 src/arch/riscv/isa/formats/standard.isa       |  14 +-
 src/arch/riscv/pcstate.hh                     |  20 ++-
 src/arch/riscv/process.cc                     |   2 +-
 src/arch/riscv/regs/misc.hh                   | 126 ++++++++++++------
 src/arch/riscv/remote_gdb.cc                  |  89 +++++++------
 src/arch/riscv/remote_gdb.hh                  |   4 +-
 src/arch/riscv/types.hh                       |   9 +-
 21 files changed, 265 insertions(+), 121 deletions(-)
 rename src/arch/riscv/gdb-xml/{riscv.xml => riscv-64bit.xml} (100%)

diff --git a/src/arch/riscv/RiscvCPU.py b/src/arch/riscv/RiscvCPU.py
index 1c77045c67..678c3295c6 100644
--- a/src/arch/riscv/RiscvCPU.py
+++ b/src/arch/riscv/RiscvCPU.py
@@ -23,6 +23,8 @@
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+import functools
+
 from m5.objects.BaseAtomicSimpleCPU import BaseAtomicSimpleCPU
 from m5.objects.BaseNonCachingSimpleCPU import BaseNonCachingSimpleCPU
 from m5.objects.BaseTimingSimpleCPU import BaseTimingSimpleCPU
@@ -41,6 +43,13 @@ class RiscvCPU:
     ArchISA = RiscvISA
 
 
+class Riscv32CPU:
+    ArchDecoder = RiscvDecoder
+    ArchMMU = RiscvMMU
+    ArchInterrupts = RiscvInterrupts
+    ArchISA = functools.partial(RiscvISA, riscv_type="RV32")
+
+
 class RiscvAtomicSimpleCPU(BaseAtomicSimpleCPU, RiscvCPU):
     mmu = RiscvMMU()
 
@@ -59,3 +68,23 @@ class RiscvO3CPU(BaseO3CPU, RiscvCPU):
 
 class RiscvMinorCPU(BaseMinorCPU, RiscvCPU):
     mmu = RiscvMMU()
+
+
+class Riscv32AtomicSimpleCPU(BaseAtomicSimpleCPU, Riscv32CPU):
+    mmu = RiscvMMU()
+
+
+class Riscv32NonCachingSimpleCPU(BaseNonCachingSimpleCPU, Riscv32CPU):
+    mmu = RiscvMMU()
+
+
+class Riscv32TimingSimpleCPU(BaseTimingSimpleCPU, Riscv32CPU):
+    mmu = RiscvMMU()
+
+
+class Riscv32O3CPU(BaseO3CPU, Riscv32CPU):
+    mmu = RiscvMMU()
+
+
+class Riscv32MinorCPU(BaseMinorCPU, Riscv32CPU):
+    mmu = RiscvMMU()
diff --git a/src/arch/riscv/RiscvISA.py b/src/arch/riscv/RiscvISA.py
index ee98a5b95d..e2381fd158 100644
--- a/src/arch/riscv/RiscvISA.py
+++ b/src/arch/riscv/RiscvISA.py
@@ -38,10 +38,15 @@
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+from m5.params import Enum
 from m5.params import Param
 from m5.objects.BaseISA import BaseISA
 
 
+class RiscvType(Enum):
+    vals = ["RV32", "RV64"]
+
+
 class RiscvISA(BaseISA):
     type = "RiscvISA"
     cxx_class = "gem5::RiscvISA::ISA"
@@ -50,3 +55,4 @@ class RiscvISA(BaseISA):
     check_alignment = Param.Bool(
         False, "whether to check memory access alignment"
     )
+    riscv_type = Param.RiscvType("RV64", "RV32 or RV64")
diff --git a/src/arch/riscv/SConscript b/src/arch/riscv/SConscript
index dd4e9aed17..924bba5915 100644
--- a/src/arch/riscv/SConscript
+++ b/src/arch/riscv/SConscript
@@ -70,7 +70,8 @@ SimObject('RiscvFsWorkload.py', sim_objects=['RiscvBareMetal', 'RiscvLinux'],
     tags='riscv isa')
 SimObject('RiscvInterrupts.py', sim_objects=['RiscvInterrupts'],
     tags='riscv isa')
-SimObject('RiscvISA.py', sim_objects=['RiscvISA'], tags='riscv isa')
+SimObject('RiscvISA.py', sim_objects=['RiscvISA'],
+    enums=['RiscvType'], tags='riscv isa')
 SimObject('RiscvMMU.py', sim_objects=['RiscvMMU'], tags='riscv isa')
 SimObject('RiscvSeWorkload.py', sim_objects=[
     'RiscvSEWorkload', 'RiscvEmuLinux'], tags='riscv isa')
diff --git a/src/arch/riscv/decoder.cc b/src/arch/riscv/decoder.cc
index a02415fae0..b816c17b21 100644
--- a/src/arch/riscv/decoder.cc
+++ b/src/arch/riscv/decoder.cc
@@ -111,6 +111,7 @@ Decoder::decode(PCStateBase &_next_pc)
         next_pc.compressed(false);
     }
 
+    emi.rv_type = static_cast<int>(next_pc.rvType());
     return decode(emi, next_pc.instAddr());
 }
 
diff --git a/src/arch/riscv/faults.cc b/src/arch/riscv/faults.cc
index e609222b07..3469c71252 100644
--- a/src/arch/riscv/faults.cc
+++ b/src/arch/riscv/faults.cc
@@ -135,10 +135,9 @@ RiscvFault::invoke(ThreadContext *tc, const StaticInstPtr &inst)
         }
 
         // Set fault cause, privilege, and return PC
-        // Interrupt is indicated on the MSB of cause (bit 63 in RV64)
         uint64_t _cause = _code;
         if (isInterrupt()) {
-           _cause |= (1L << 63);
+           _cause |= CAUSE_INTERRUPT_MASKS[pc_state.rvType()];
         }
         tc->setMiscReg(cause, _cause);
         tc->setMiscReg(epc, tc->pcState().instAddr());
@@ -177,8 +176,10 @@ Reset::invoke(ThreadContext *tc, const StaticInstPtr &inst)
 
     // Advance the PC to the implementation-defined reset vector
     auto workload = dynamic_cast<Workload *>(tc->getSystemPtr()->workload);
-    PCState pc(workload->getEntry());
-    tc->pcState(pc);
+    std::unique_ptr<PCState> new_pc(dynamic_cast<PCState *>(
+        tc->getIsaPtr()->newPCState(workload->getEntry())));
+    panic_if(!new_pc, "Failed create new PCState from ISA pointer");
+    tc->pcState(*new_pc);
 }
 
 void
diff --git a/src/arch/riscv/gdb-xml/SConscript b/src/arch/riscv/gdb-xml/SConscript
index a733b1eb0e..722137408b 100644
--- a/src/arch/riscv/gdb-xml/SConscript
+++ b/src/arch/riscv/gdb-xml/SConscript
@@ -43,7 +43,7 @@
 
 Import('*')
 
-GdbXml('riscv.xml', 'gdb_xml_riscv_target', tags='riscv isa')
-GdbXml('riscv-64bit-cpu.xml', 'gdb_xml_riscv_cpu', tags='riscv isa')
-GdbXml('riscv-64bit-fpu.xml', 'gdb_xml_riscv_fpu', tags='riscv isa')
-GdbXml('riscv-64bit-csr.xml', 'gdb_xml_riscv_csr', tags='riscv isa')
+GdbXml('riscv-64bit.xml', 'gdb_xml_riscv_64bit_target', tags='riscv isa')
+GdbXml('riscv-64bit-cpu.xml', 'gdb_xml_riscv_64bit_cpu', tags='riscv isa')
+GdbXml('riscv-64bit-fpu.xml', 'gdb_xml_riscv_64bit_fpu', tags='riscv isa')
+GdbXml('riscv-64bit-csr.xml', 'gdb_xml_riscv_64bit_csr', tags='riscv isa')
diff --git a/src/arch/riscv/gdb-xml/riscv.xml b/src/arch/riscv/gdb-xml/riscv-64bit.xml
similarity index 100%
rename from src/arch/riscv/gdb-xml/riscv.xml
rename to src/arch/riscv/gdb-xml/riscv-64bit.xml
diff --git a/src/arch/riscv/insts/standard.hh b/src/arch/riscv/insts/standard.hh
index be3470fda4..5b0e8c2c22 100644
--- a/src/arch/riscv/insts/standard.hh
+++ b/src/arch/riscv/insts/standard.hh
@@ -66,7 +66,7 @@ class ImmOp : public RiscvStaticInst
   protected:
     I imm;
 
-    ImmOp(const char *mnem, MachInst _machInst, OpClass __opClass)
+    ImmOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass)
         : RiscvStaticInst(mnem, _machInst, __opClass), imm(0)
     {}
 };
@@ -93,7 +93,7 @@ class CSROp : public RiscvStaticInst
     uint64_t uimm;
 
     /// Constructor
-    CSROp(const char *mnem, MachInst _machInst, OpClass __opClass)
+    CSROp(const char *mnem, ExtMachInst _machInst, OpClass __opClass)
         : RiscvStaticInst(mnem, _machInst, __opClass),
             csr(FUNCT12), uimm(CSRIMM)
     {
diff --git a/src/arch/riscv/insts/unknown.hh b/src/arch/riscv/insts/unknown.hh
index a271eb98b0..0c2f75e1e9 100644
--- a/src/arch/riscv/insts/unknown.hh
+++ b/src/arch/riscv/insts/unknown.hh
@@ -53,7 +53,7 @@ namespace RiscvISA
 class Unknown : public RiscvStaticInst
 {
   public:
-    Unknown(MachInst _machInst)
+    Unknown(ExtMachInst _machInst)
         : RiscvStaticInst("unknown", _machInst, No_OpClass)
     {}
 
diff --git a/src/arch/riscv/isa.cc b/src/arch/riscv/isa.cc
index c76bb2bdf3..c8eabd44ad 100644
--- a/src/arch/riscv/isa.cc
+++ b/src/arch/riscv/isa.cc
@@ -204,7 +204,7 @@ RegClass ccRegClass(CCRegClass, CCRegClassName, 0, debug::IntRegs);
 } // anonymous namespace
 
 ISA::ISA(const Params &p) :
-    BaseISA(p), checkAlignment(p.check_alignment)
+    BaseISA(p), rv_type(p.riscv_type), checkAlignment(p.check_alignment)
 {
     _regClasses.push_back(&intRegClass);
     _regClasses.push_back(&floatRegClass);
@@ -243,12 +243,24 @@ void ISA::clear()
     std::fill(miscRegFile.begin(), miscRegFile.end(), 0);
 
     miscRegFile[MISCREG_PRV] = PRV_M;
-    miscRegFile[MISCREG_ISA] = (2ULL << MXL_OFFSET) | 0x14112D;
     miscRegFile[MISCREG_VENDORID] = 0;
     miscRegFile[MISCREG_ARCHID] = 0;
     miscRegFile[MISCREG_IMPID] = 0;
-    miscRegFile[MISCREG_STATUS] = (2ULL << UXL_OFFSET) | (2ULL << SXL_OFFSET) |
-                                  (1ULL << FS_OFFSET);
+    // rv_type dependent init.
+    switch (rv_type) {
+        case RV32:
+            miscRegFile[MISCREG_ISA] = (1ULL << MXL_OFFSETS[RV32]) | 0x14112D;
+            miscRegFile[MISCREG_STATUS] = (1ULL << FS_OFFSET);
+            break;
+        case RV64:
+            miscRegFile[MISCREG_ISA] = (2ULL << MXL_OFFSETS[RV64]) | 0x14112D;
+            miscRegFile[MISCREG_STATUS] = (2ULL << UXL_OFFSET) |
+                                          (2ULL << SXL_OFFSET) |
+                                          (1ULL << FS_OFFSET);
+            break;
+        default:
+            panic("%s: Unknown rv_type: %d", name(), (int)rv_type);
+    }
     miscRegFile[MISCREG_MCOUNTEREN] = 0x7;
     miscRegFile[MISCREG_SCOUNTEREN] = 0x7;
     // don't set it to zero; software may try to determine the supported
@@ -365,8 +377,18 @@ ISA::readMiscReg(RegIndex idx)
             STATUS status = readMiscRegNoEffect(idx);
             uint64_t sd_bit = \
                 (status.xs == 3) || (status.fs == 3) || (status.vs == 3);
-            // We assume RV64 here, updating the SD bit at index 63.
-            status.sd = sd_bit;
+            // For RV32, the SD bit is at index 31
+            // For RV64, the SD bit is at index 63.
+            switch (rv_type) {
+                case RV32:
+                    status.rv32_sd = sd_bit;
+                    break;
+                case RV64:
+                    status.rv64_sd = sd_bit;
+                    break;
+                default:
+                    panic("%s: Unknown rv_type: %d", name(), (int)rv_type);
+            }
             setMiscRegNoEffect(idx, status);
 
             return readMiscRegNoEffect(idx);
@@ -506,10 +528,12 @@ ISA::setMiscReg(RegIndex idx, RegVal val)
             break;
           case MISCREG_STATUS:
             {
-                // SXL and UXL are hard-wired to 64 bit
-                auto cur = readMiscRegNoEffect(idx);
-                val &= ~(STATUS_SXL_MASK | STATUS_UXL_MASK);
-                val |= cur & (STATUS_SXL_MASK | STATUS_UXL_MASK);
+                if (rv_type != RV32) {
+                    // SXL and UXL are hard-wired to 64 bit
+                    auto cur = readMiscRegNoEffect(idx);
+                    val &= ~(STATUS_SXL_MASK | STATUS_UXL_MASK);
+                    val |= cur & (STATUS_SXL_MASK | STATUS_UXL_MASK);
+                }
                 setMiscRegNoEffect(idx, val);
             }
             break;
diff --git a/src/arch/riscv/isa.hh b/src/arch/riscv/isa.hh
index 97a05814fe..e332956972 100644
--- a/src/arch/riscv/isa.hh
+++ b/src/arch/riscv/isa.hh
@@ -70,6 +70,7 @@ enum FPUStatus
 class ISA : public BaseISA
 {
   protected:
+    RiscvType rv_type;
     std::vector<RegVal> miscRegFile;
     bool checkAlignment;
 
@@ -80,10 +81,10 @@ class ISA : public BaseISA
 
     void clear() override;
 
-    PCStateBase *
+    PCStateBase*
     newPCState(Addr new_inst_addr=0) const override
     {
-        return new PCState(new_inst_addr);
+        return new PCState(new_inst_addr, rv_type);
     }
 
   public:
@@ -104,7 +105,7 @@ class ISA : public BaseISA
     virtual const std::unordered_map<int, RegVal>&
     getCSRMaskMap() const
     {
-        return CSRMasks;
+        return CSRMasks[rv_type];
     }
 
     bool alignmentCheckEnabled() const { return checkAlignment; }
@@ -125,6 +126,8 @@ class ISA : public BaseISA
     void handleLockedSnoop(PacketPtr pkt, Addr cacheBlockMask) override;
 
     void globalClearExclusive() override;
+
+    RiscvType rvType() const { return rv_type; }
 };
 
 } // namespace RiscvISA
diff --git a/src/arch/riscv/isa/bitfields.isa b/src/arch/riscv/isa/bitfields.isa
index 60636c68f8..41935c5b0f 100644
--- a/src/arch/riscv/isa/bitfields.isa
+++ b/src/arch/riscv/isa/bitfields.isa
@@ -32,6 +32,7 @@
 //
 // Bitfield definitions.
 //
+def bitfield RVTYPE rv_type;
 
 def bitfield QUADRANT <1:0>;
 def bitfield OPCODE <6:2>;
diff --git a/src/arch/riscv/isa/formats/basic.isa b/src/arch/riscv/isa/formats/basic.isa
index 6dfeea851c..6bae0a6f86 100644
--- a/src/arch/riscv/isa/formats/basic.isa
+++ b/src/arch/riscv/isa/formats/basic.isa
@@ -39,7 +39,7 @@ def template BasicDeclare {{
 
       public:
         /// Constructor.
-        %(class_name)s(MachInst machInst);
+        %(class_name)s(ExtMachInst machInst);
         Fault execute(ExecContext *, trace::InstRecord *) const override;
         using %(base_class)s::generateDisassembly;
     };
@@ -47,7 +47,7 @@ def template BasicDeclare {{
 
 // Basic instruction class constructor template.
 def template BasicConstructor {{
-    %(class_name)s::%(class_name)s(MachInst machInst)
+    %(class_name)s::%(class_name)s(ExtMachInst machInst)
         : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s)
     {
         %(set_reg_idx_arr)s;
diff --git a/src/arch/riscv/isa/formats/compressed.isa b/src/arch/riscv/isa/formats/compressed.isa
index 6fe899b881..d09865803e 100644
--- a/src/arch/riscv/isa/formats/compressed.isa
+++ b/src/arch/riscv/isa/formats/compressed.isa
@@ -125,7 +125,7 @@ def template CBasicDeclare {{
 
       public:
         /// Constructor.
-        %(class_name)s(MachInst machInst);
+        %(class_name)s(ExtMachInst machInst);
         Fault execute(ExecContext *, trace::InstRecord *) const override;
         std::string generateDisassembly(
                 Addr pc, const loader::SymbolTable *symtab) const override;
diff --git a/src/arch/riscv/isa/formats/standard.isa b/src/arch/riscv/isa/formats/standard.isa
index 3cad5ed0c9..5390164f10 100644
--- a/src/arch/riscv/isa/formats/standard.isa
+++ b/src/arch/riscv/isa/formats/standard.isa
@@ -44,7 +44,7 @@ def template ImmDeclare {{
 
       public:
         /// Constructor.
-        %(class_name)s(MachInst machInst);
+        %(class_name)s(ExtMachInst machInst);
         Fault execute(ExecContext *, trace::InstRecord *) const override;
         std::string generateDisassembly(Addr pc,
             const loader::SymbolTable *symtab) const override;
@@ -52,7 +52,7 @@ def template ImmDeclare {{
 }};
 
 def template ImmConstructor {{
-    %(class_name)s::%(class_name)s(MachInst machInst)
+    %(class_name)s::%(class_name)s(ExtMachInst machInst)
         : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s)
     {
         %(set_reg_idx_arr)s;
@@ -168,7 +168,7 @@ def template BranchDeclare {{
 
       public:
         /// Constructor.
-        %(class_name)s(MachInst machInst);
+        %(class_name)s(ExtMachInst machInst);
         Fault execute(ExecContext *, trace::InstRecord *) const override;
 
         std::string
@@ -198,7 +198,9 @@ def template BranchExecute {{
     %(class_name)s::branchTarget(const PCStateBase &branch_pc) const
     {
         auto &rpc = branch_pc.as<RiscvISA::PCState>();
-        return std::make_unique<PCState>(rpc.pc() + imm);
+        std::unique_ptr<PCState> npc(dynamic_cast<PCState*>(rpc.clone()));
+        npc->set(rpc.pc() + imm);
+        return npc;
     }
 
     std::string
@@ -226,7 +228,7 @@ def template JumpDeclare {{
 
       public:
         /// Constructor.
-        %(class_name)s(MachInst machInst);
+        %(class_name)s(ExtMachInst machInst);
         Fault execute(ExecContext *, trace::InstRecord *) const override;
 
         std::string
@@ -241,7 +243,7 @@ def template JumpDeclare {{
 }};
 
 def template JumpConstructor {{
-    %(class_name)s::%(class_name)s(MachInst machInst)
+    %(class_name)s::%(class_name)s(ExtMachInst machInst)
         : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s)
     {
         %(set_reg_idx_arr)s;
diff --git a/src/arch/riscv/pcstate.hh b/src/arch/riscv/pcstate.hh
index 0125507f96..de07145dc3 100644
--- a/src/arch/riscv/pcstate.hh
+++ b/src/arch/riscv/pcstate.hh
@@ -43,21 +43,29 @@
 #define __ARCH_RISCV_PCSTATE_HH__
 
 #include "arch/generic/pcstate.hh"
+#include "enums/RiscvType.hh"
 
 namespace gem5
 {
-
 namespace RiscvISA
 {
 
+using RiscvType = enums::RiscvType;
+constexpr enums::RiscvType RV32 = enums::RV32;
+constexpr enums::RiscvType RV64 = enums::RV64;
+
 class PCState : public GenericISA::UPCState<4>
 {
   private:
     bool _compressed = false;
-    bool _rv32 = false;
+    RiscvType _rv_type = RV64;
 
   public:
-    using GenericISA::UPCState<4>::UPCState;
+    PCState() = default;
+    PCState(const PCState &other) = default;
+    PCState(Addr addr, RiscvType rv_type) : UPCState(addr), _rv_type(rv_type)
+    {
+    }
 
     PCStateBase *clone() const override { return new PCState(*this); }
 
@@ -67,14 +75,14 @@ class PCState : public GenericISA::UPCState<4>
         Base::update(other);
         auto &pcstate = other.as<PCState>();
         _compressed = pcstate._compressed;
-        _rv32 = pcstate._rv32;
+        _rv_type = pcstate._rv_type;
     }
 
     void compressed(bool c) { _compressed = c; }
     bool compressed() const { return _compressed; }
 
-    void rv32(bool val) { _rv32 = val; }
-    bool rv32() const { return _rv32; }
+    void rvType(RiscvType rv_type) { _rv_type = rv_type; }
+    RiscvType rvType() const { return _rv_type; }
 
     bool
     branching() const override
diff --git a/src/arch/riscv/process.cc b/src/arch/riscv/process.cc
index 7c91b92217..dc7abae790 100644
--- a/src/arch/riscv/process.cc
+++ b/src/arch/riscv/process.cc
@@ -115,7 +115,7 @@ RiscvProcess32::initState()
         auto *tc = system->threads[ctx];
         tc->setMiscRegNoEffect(MISCREG_PRV, PRV_U);
         PCState pc = tc->pcState().as<PCState>();
-        pc.rv32(true);
+        pc.rvType(RV32);
         tc->pcState(pc);
     }
 }
diff --git a/src/arch/riscv/regs/misc.hh b/src/arch/riscv/regs/misc.hh
index 5f074475c9..7f6fff4e00 100644
--- a/src/arch/riscv/regs/misc.hh
+++ b/src/arch/riscv/regs/misc.hh
@@ -51,10 +51,12 @@
 
 #include "arch/generic/vec_pred_reg.hh"
 #include "arch/generic/vec_reg.hh"
+#include "arch/riscv/types.hh"
 #include "base/bitunion.hh"
 #include "base/types.hh"
 #include "cpu/reg_class.hh"
 #include "debug/MiscRegs.hh"
+#include "enums/RiscvType.hh"
 
 namespace gem5
 {
@@ -550,9 +552,10 @@ const std::unordered_map<int, CSRMetadata> CSRData = {
  * the fields for higher privileges.
  */
 BitUnion64(STATUS)
-    Bitfield<63> sd;
+    Bitfield<63> rv64_sd;
     Bitfield<35, 34> sxl;
     Bitfield<33, 32> uxl;
+    Bitfield<31> rv32_sd;
     Bitfield<22> tsr;
     Bitfield<21> tw;
     Bitfield<20> tvm;
@@ -590,20 +593,34 @@ BitUnion64(INTERRUPT)
     Bitfield<0> usi;
 EndBitUnion(INTERRUPT)
 
-const off_t MXL_OFFSET = (sizeof(uint64_t) * 8 - 2);
+const off_t MXL_OFFSETS[enums::Num_RiscvType] = {
+    [RV32] = (sizeof(uint32_t) * 8 - 2),
+    [RV64] = (sizeof(uint64_t) * 8 - 2),
+};
 const off_t SXL_OFFSET = 34;
 const off_t UXL_OFFSET = 32;
 const off_t FS_OFFSET = 13;
 const off_t FRM_OFFSET = 5;
 
-const RegVal ISA_MXL_MASK = 3ULL << MXL_OFFSET;
+const RegVal ISA_MXL_MASKS[enums::Num_RiscvType] = {
+    [RV32] = 3ULL << MXL_OFFSETS[RV32],
+    [RV64] = 3ULL << MXL_OFFSETS[RV64],
+};
 const RegVal ISA_EXT_MASK = mask(26);
 const RegVal ISA_EXT_C_MASK = 1UL << ('c' - 'a');
-const RegVal MISA_MASK = ISA_MXL_MASK | ISA_EXT_MASK;
+const RegVal MISA_MASKS[enums::Num_RiscvType] = {
+    [RV32] = ISA_MXL_MASKS[RV32] | ISA_EXT_MASK,
+    [RV64] = ISA_MXL_MASKS[RV64] | ISA_EXT_MASK,
+};
 
-const RegVal STATUS_SD_MASK = 1ULL << ((sizeof(uint64_t) * 8) - 1);
+
+const RegVal STATUS_SD_MASKS[enums::Num_RiscvType] = {
+    [RV32] = 1ULL << ((sizeof(uint32_t) * 8) - 1),
+    [RV64] = 1ULL << ((sizeof(uint64_t) * 8) - 1),
+};
 const RegVal STATUS_SXL_MASK = 3ULL << SXL_OFFSET;
 const RegVal STATUS_UXL_MASK = 3ULL << UXL_OFFSET;
+
 const RegVal STATUS_TSR_MASK = 1ULL << 22;
 const RegVal STATUS_TW_MASK = 1ULL << 21;
 const RegVal STATUS_TVM_MASK = 1ULL << 20;
@@ -621,26 +638,39 @@ const RegVal STATUS_UPIE_MASK = 1ULL << 4;
 const RegVal STATUS_MIE_MASK = 1ULL << 3;
 const RegVal STATUS_SIE_MASK = 1ULL << 1;
 const RegVal STATUS_UIE_MASK = 1ULL << 0;
-const RegVal MSTATUS_MASK = STATUS_SD_MASK | STATUS_SXL_MASK |
-                            STATUS_UXL_MASK | STATUS_TSR_MASK |
-                            STATUS_TW_MASK | STATUS_TVM_MASK |
-                            STATUS_MXR_MASK | STATUS_SUM_MASK |
-                            STATUS_MPRV_MASK | STATUS_XS_MASK |
-                            STATUS_FS_MASK | STATUS_VS_MASK |
-                            STATUS_MPP_MASK | STATUS_SPP_MASK |
-                            STATUS_MPIE_MASK | STATUS_SPIE_MASK |
-                            STATUS_UPIE_MASK | STATUS_MIE_MASK |
-                            STATUS_SIE_MASK | STATUS_UIE_MASK;
-const RegVal SSTATUS_MASK = STATUS_SD_MASK | STATUS_UXL_MASK |
-                            STATUS_MXR_MASK | STATUS_SUM_MASK |
-                            STATUS_XS_MASK | STATUS_FS_MASK |
-                            STATUS_VS_MASK | STATUS_SPP_MASK |
-                            STATUS_SPIE_MASK | STATUS_UPIE_MASK |
-                            STATUS_SIE_MASK | STATUS_UIE_MASK;
-const RegVal USTATUS_MASK = STATUS_SD_MASK | STATUS_MXR_MASK |
-                            STATUS_SUM_MASK | STATUS_XS_MASK |
-                            STATUS_FS_MASK | STATUS_VS_MASK |
-                            STATUS_UPIE_MASK | STATUS_UIE_MASK;
+const RegVal MSTATUS_MASKS[enums::Num_RiscvType] = {
+    [RV32] = STATUS_SD_MASKS[RV32] | STATUS_TSR_MASK | STATUS_TW_MASK |
+             STATUS_TVM_MASK | STATUS_MXR_MASK | STATUS_SUM_MASK |
+             STATUS_MPRV_MASK | STATUS_XS_MASK | STATUS_FS_MASK |
+             STATUS_VS_MASK | STATUS_MPP_MASK | STATUS_SPP_MASK |
+             STATUS_MPIE_MASK | STATUS_SPIE_MASK | STATUS_UPIE_MASK |
+             STATUS_MIE_MASK | STATUS_SIE_MASK | STATUS_UIE_MASK,
+    [RV64] = STATUS_SD_MASKS[RV64] | STATUS_SXL_MASK | STATUS_UXL_MASK |
+             STATUS_TSR_MASK | STATUS_TW_MASK | STATUS_TVM_MASK |
+             STATUS_MXR_MASK | STATUS_SUM_MASK | STATUS_MPRV_MASK |
+             STATUS_XS_MASK | STATUS_FS_MASK | STATUS_VS_MASK|
+             STATUS_MPP_MASK | STATUS_SPP_MASK | STATUS_MPIE_MASK |
+             STATUS_SPIE_MASK | STATUS_UPIE_MASK | STATUS_MIE_MASK |
+             STATUS_SIE_MASK | STATUS_UIE_MASK,
+};
+const RegVal SSTATUS_MASKS[enums::Num_RiscvType] = {
+    [RV32] = STATUS_SD_MASKS[RV32] | STATUS_MXR_MASK | STATUS_SUM_MASK |
+             STATUS_XS_MASK | STATUS_FS_MASK | STATUS_VS_MASK |
+             STATUS_SPP_MASK | STATUS_SPIE_MASK | STATUS_UPIE_MASK |
+             STATUS_SIE_MASK | STATUS_UIE_MASK,
+    [RV64] = STATUS_SD_MASKS[RV64] | STATUS_UXL_MASK | STATUS_MXR_MASK |
+             STATUS_SUM_MASK | STATUS_XS_MASK | STATUS_FS_MASK |
+             STATUS_VS_MASK | STATUS_SPP_MASK | STATUS_SPIE_MASK |
+             STATUS_UPIE_MASK | STATUS_SIE_MASK | STATUS_UIE_MASK,
+};
+const RegVal USTATUS_MASKS[enums::Num_RiscvType] = {
+    [RV32] = STATUS_SD_MASKS[RV32] | STATUS_MXR_MASK | STATUS_SUM_MASK |
+             STATUS_XS_MASK | STATUS_FS_MASK | STATUS_VS_MASK |
+             STATUS_UPIE_MASK | STATUS_UIE_MASK,
+    [RV64] = STATUS_SD_MASKS[RV64] | STATUS_MXR_MASK | STATUS_SUM_MASK |
+             STATUS_XS_MASK | STATUS_FS_MASK | STATUS_VS_MASK |
+             STATUS_UPIE_MASK | STATUS_UIE_MASK,
+};
 
 const RegVal MEI_MASK = 1ULL << 11;
 const RegVal SEI_MASK = 1ULL << 9;
@@ -661,20 +691,38 @@ const RegVal UI_MASK = UEI_MASK | UTI_MASK | USI_MASK;
 const RegVal FFLAGS_MASK = (1 << FRM_OFFSET) - 1;
 const RegVal FRM_MASK = 0x7;
 
-const std::unordered_map<int, RegVal> CSRMasks = {
-    {CSR_USTATUS, USTATUS_MASK},
-    {CSR_UIE, UI_MASK},
-    {CSR_UIP, UI_MASK},
-    {CSR_FFLAGS, FFLAGS_MASK},
-    {CSR_FRM, FRM_MASK},
-    {CSR_FCSR, FFLAGS_MASK | (FRM_MASK << FRM_OFFSET)},
-    {CSR_SSTATUS, SSTATUS_MASK},
-    {CSR_SIE, SI_MASK},
-    {CSR_SIP, SI_MASK},
-    {CSR_MSTATUS, MSTATUS_MASK},
-    {CSR_MISA, MISA_MASK},
-    {CSR_MIE, MI_MASK},
-    {CSR_MIP, MI_MASK}
+const RegVal CAUSE_INTERRUPT_MASKS[enums::Num_RiscvType] = {
+    [RV32] = (1ULL << 31),
+    [RV64] = (1ULL << 63),
+};
+
+const std::unordered_map<int, RegVal> CSRMasks[enums::Num_RiscvType] = {
+    [RV32] = {{CSR_USTATUS, USTATUS_MASKS[RV32]},
+              {CSR_UIE, UI_MASK},
+              {CSR_UIP, UI_MASK},
+              {CSR_FFLAGS, FFLAGS_MASK},
+              {CSR_FRM, FRM_MASK},
+              {CSR_FCSR, FFLAGS_MASK | (FRM_MASK << FRM_OFFSET)},
+              {CSR_SSTATUS, SSTATUS_MASKS[RV32]},
+              {CSR_SIE, SI_MASK},
+              {CSR_SIP, SI_MASK},
+              {CSR_MSTATUS, MSTATUS_MASKS[RV32]},
+              {CSR_MISA, MISA_MASKS[RV32]},
+              {CSR_MIE, MI_MASK},
+              {CSR_MIP, MI_MASK}},
+    [RV64] = {{CSR_USTATUS, USTATUS_MASKS[RV64]},
+              {CSR_UIE, UI_MASK},
+              {CSR_UIP, UI_MASK},
+              {CSR_FFLAGS, FFLAGS_MASK},
+              {CSR_FRM, FRM_MASK},
+              {CSR_FCSR, FFLAGS_MASK | (FRM_MASK << FRM_OFFSET)},
+              {CSR_SSTATUS, SSTATUS_MASKS[RV64]},
+              {CSR_SIE, SI_MASK},
+              {CSR_SIP, SI_MASK},
+              {CSR_MSTATUS, MSTATUS_MASKS[RV64]},
+              {CSR_MISA, MISA_MASKS[RV64]},
+              {CSR_MIE, MI_MASK},
+              {CSR_MIP, MI_MASK}},
 };
 
 } // namespace RiscvISA
diff --git a/src/arch/riscv/remote_gdb.cc b/src/arch/riscv/remote_gdb.cc
index ed700bbf8d..4bdd88fde6 100644
--- a/src/arch/riscv/remote_gdb.cc
+++ b/src/arch/riscv/remote_gdb.cc
@@ -135,10 +135,10 @@
 
 #include <string>
 
-#include "arch/riscv/gdb-xml/gdb_xml_riscv_cpu.hh"
-#include "arch/riscv/gdb-xml/gdb_xml_riscv_csr.hh"
-#include "arch/riscv/gdb-xml/gdb_xml_riscv_fpu.hh"
-#include "arch/riscv/gdb-xml/gdb_xml_riscv_target.hh"
+#include "arch/riscv/gdb-xml/gdb_xml_riscv_64bit_cpu.hh"
+#include "arch/riscv/gdb-xml/gdb_xml_riscv_64bit_csr.hh"
+#include "arch/riscv/gdb-xml/gdb_xml_riscv_64bit_fpu.hh"
+#include "arch/riscv/gdb-xml/gdb_xml_riscv_64bit_target.hh"
 #include "arch/riscv/mmu.hh"
 #include "arch/riscv/pagetable_walker.hh"
 #include "arch/riscv/regs/float.hh"
@@ -155,8 +155,16 @@ namespace gem5
 
 using namespace RiscvISA;
 
+static RiscvType
+getRvType(ThreadContext* tc)
+{
+    auto isa = dynamic_cast<ISA*>(tc->getIsaPtr());
+    panic_if(!isa, "Cannot derive rv_type from non-riscv isa");
+    return isa->rvType();
+}
+
 RemoteGDB::RemoteGDB(System *_system, int _port)
-    : BaseRemoteGDB(_system, _port), regCache(this)
+    : BaseRemoteGDB(_system, _port), regCache64(this)
 {
 }
 
@@ -186,9 +194,10 @@ RemoteGDB::acc(Addr va, size_t len)
 }
 
 void
-RemoteGDB::RiscvGdbRegCache::getRegs(ThreadContext *context)
+RemoteGDB::Riscv64GdbRegCache::getRegs(ThreadContext *context)
 {
     DPRINTF(GDBAcc, "getregs in remotegdb, size %lu\n", size());
+    auto& RVxCSRMasks = CSRMasks[RV64];
 
     // General registers
     for (int i = 0; i < int_reg::NumArchRegs; i++) {
@@ -200,11 +209,11 @@ RemoteGDB::RiscvGdbRegCache::getRegs(ThreadContext *context)
     for (int i = 0; i < float_reg::NumRegs; i++)
         r.fpu[i] = context->getReg(floatRegClass[i]);
     r.fflags = context->readMiscRegNoEffect(
-        CSRData.at(CSR_FFLAGS).physIndex) & CSRMasks.at(CSR_FFLAGS);
+        CSRData.at(CSR_FFLAGS).physIndex) & RVxCSRMasks.at(CSR_FFLAGS);
     r.frm = context->readMiscRegNoEffect(
-        CSRData.at(CSR_FRM).physIndex) & CSRMasks.at(CSR_FRM);
+        CSRData.at(CSR_FRM).physIndex) & RVxCSRMasks.at(CSR_FRM);
     r.fcsr = context->readMiscRegNoEffect(
-        CSRData.at(CSR_FCSR).physIndex) & CSRMasks.at(CSR_FCSR);
+        CSRData.at(CSR_FCSR).physIndex) & RVxCSRMasks.at(CSR_FCSR);
 
     // CSR registers
     r.cycle = context->readMiscRegNoEffect(
@@ -214,9 +223,9 @@ RemoteGDB::RiscvGdbRegCache::getRegs(ThreadContext *context)
 
     // U mode CSR
     r.ustatus = context->readMiscRegNoEffect(
-        CSRData.at(CSR_USTATUS).physIndex) & CSRMasks.at(CSR_USTATUS);
+        CSRData.at(CSR_USTATUS).physIndex) & RVxCSRMasks.at(CSR_USTATUS);
     r.uie = context->readMiscReg(
-        CSRData.at(CSR_UIE).physIndex) & CSRMasks.at(CSR_UIE);
+        CSRData.at(CSR_UIE).physIndex) & RVxCSRMasks.at(CSR_UIE);
     r.utvec = context->readMiscRegNoEffect(
         CSRData.at(CSR_UTVEC).physIndex);
     r.uscratch = context->readMiscRegNoEffect(
@@ -228,17 +237,17 @@ RemoteGDB::RiscvGdbRegCache::getRegs(ThreadContext *context)
     r.utval = context->readMiscRegNoEffect(
         CSRData.at(CSR_UTVAL).physIndex);
     r.uip = context->readMiscReg(
-        CSRData.at(CSR_UIP).physIndex) & CSRMasks.at(CSR_UIP);
+        CSRData.at(CSR_UIP).physIndex) & RVxCSRMasks.at(CSR_UIP);
 
     // S mode CSR
     r.sstatus = context->readMiscRegNoEffect(
-        CSRData.at(CSR_SSTATUS).physIndex) & CSRMasks.at(CSR_SSTATUS);
+        CSRData.at(CSR_SSTATUS).physIndex) & RVxCSRMasks.at(CSR_SSTATUS);
     r.sedeleg = context->readMiscRegNoEffect(
         CSRData.at(CSR_SEDELEG).physIndex);
     r.sideleg = context->readMiscRegNoEffect(
         CSRData.at(CSR_SIDELEG).physIndex);
     r.sie = context->readMiscReg(
-        CSRData.at(CSR_SIE).physIndex) & CSRMasks.at(CSR_SIE);
+        CSRData.at(CSR_SIE).physIndex) & RVxCSRMasks.at(CSR_SIE);
     r.stvec = context->readMiscRegNoEffect(
         CSRData.at(CSR_STVEC).physIndex);
     r.scounteren = context->readMiscRegNoEffect(
@@ -252,7 +261,7 @@ RemoteGDB::RiscvGdbRegCache::getRegs(ThreadContext *context)
     r.stval = context->readMiscRegNoEffect(
         CSRData.at(CSR_STVAL).physIndex);
     r.sip = context->readMiscReg(
-        CSRData.at(CSR_SIP).physIndex) & CSRMasks.at(CSR_SIP);
+        CSRData.at(CSR_SIP).physIndex) & RVxCSRMasks.at(CSR_SIP);
     r.satp = context->readMiscRegNoEffect(
         CSRData.at(CSR_SATP).physIndex);
 
@@ -266,15 +275,15 @@ RemoteGDB::RiscvGdbRegCache::getRegs(ThreadContext *context)
     r.mhartid = context->readMiscRegNoEffect(
         CSRData.at(CSR_MHARTID).physIndex);
     r.mstatus = context->readMiscRegNoEffect(
-        CSRData.at(CSR_MSTATUS).physIndex) & CSRMasks.at(CSR_MSTATUS);
+        CSRData.at(CSR_MSTATUS).physIndex) & RVxCSRMasks.at(CSR_MSTATUS);
     r.misa = context->readMiscRegNoEffect(
-        CSRData.at(CSR_MISA).physIndex) & CSRMasks.at(CSR_MISA);
+        CSRData.at(CSR_MISA).physIndex) & RVxCSRMasks.at(CSR_MISA);
     r.medeleg = context->readMiscRegNoEffect(
         CSRData.at(CSR_MEDELEG).physIndex);
     r.mideleg = context->readMiscRegNoEffect(
         CSRData.at(CSR_MIDELEG).physIndex);
     r.mie = context->readMiscReg(
-        CSRData.at(CSR_MIE).physIndex) & CSRMasks.at(CSR_MIE);
+        CSRData.at(CSR_MIE).physIndex) & RVxCSRMasks.at(CSR_MIE);
     r.mtvec = context->readMiscRegNoEffect(
         CSRData.at(CSR_MTVEC).physIndex);
     r.mcounteren = context->readMiscRegNoEffect(
@@ -288,13 +297,13 @@ RemoteGDB::RiscvGdbRegCache::getRegs(ThreadContext *context)
     r.mtval = context->readMiscRegNoEffect(
         CSRData.at(CSR_MTVAL).physIndex);
     r.mip = context->readMiscReg(
-        CSRData.at(CSR_MIP).physIndex) & CSRMasks.at(CSR_MIP);
+        CSRData.at(CSR_MIP).physIndex) & RVxCSRMasks.at(CSR_MIP);
 
     // H mode CSR (to be implemented)
 }
 
 void
-RemoteGDB::RiscvGdbRegCache::setRegs(ThreadContext *context) const
+RemoteGDB::Riscv64GdbRegCache::setRegs(ThreadContext *context) const
 {
     // NOTE: no error will be reported for attempting to set masked bits.
     RegVal oldVal;
@@ -310,23 +319,25 @@ RemoteGDB::RiscvGdbRegCache::setRegs(ThreadContext *context) const
     for (int i = 0; i < float_reg::NumRegs; i++)
         context->setReg(floatRegClass[i], r.fpu[i]);
 
+    auto& RVxCSRMasks = CSRMasks[RV64];
+
     oldVal = context->readMiscRegNoEffect(
         CSRData.at(CSR_FFLAGS).physIndex);
-    mask = CSRMasks.at(CSR_FFLAGS);
+    mask = RVxCSRMasks.at(CSR_FFLAGS);
     newVal = (oldVal & ~mask) | (r.fflags & mask);
     context->setMiscRegNoEffect(
         CSRData.at(CSR_FFLAGS).physIndex, newVal);
 
     oldVal = context->readMiscRegNoEffect(
         CSRData.at(CSR_FRM).physIndex);
-    mask = CSRMasks.at(CSR_FRM);
+    mask = RVxCSRMasks.at(CSR_FRM);
     newVal = (oldVal & ~mask) | (r.frm & mask);
     context->setMiscRegNoEffect(
         CSRData.at(CSR_FRM).physIndex, newVal);
 
     oldVal = context->readMiscRegNoEffect(
         CSRData.at(CSR_FCSR).physIndex);
-    mask = CSRMasks.at(CSR_FCSR);
+    mask = RVxCSRMasks.at(CSR_FCSR);
     newVal = (oldVal & ~mask) | (r.fcsr & mask);
     context->setMiscRegNoEffect(
         CSRData.at(CSR_FCSR).physIndex, newVal);
@@ -340,13 +351,13 @@ RemoteGDB::RiscvGdbRegCache::setRegs(ThreadContext *context) const
     // U mode CSR
     oldVal = context->readMiscRegNoEffect(
         CSRData.at(CSR_USTATUS).physIndex);
-    mask = CSRMasks.at(CSR_USTATUS);
+    mask = RVxCSRMasks.at(CSR_USTATUS);
     newVal = (oldVal & ~mask) | (r.ustatus & mask);
     context->setMiscRegNoEffect(
         CSRData.at(CSR_USTATUS).physIndex, newVal);
     oldVal = context->readMiscReg(
         CSRData.at(CSR_UIE).physIndex);
-    mask = CSRMasks.at(CSR_UIE);
+    mask = RVxCSRMasks.at(CSR_UIE);
     newVal = (oldVal & ~mask) | (r.uie & mask);
     context->setMiscReg(
         CSRData.at(CSR_UIE).physIndex, newVal);
@@ -362,7 +373,7 @@ RemoteGDB::RiscvGdbRegCache::setRegs(ThreadContext *context) const
         CSRData.at(CSR_UTVAL).physIndex, r.utval);
     oldVal = context->readMiscReg(
         CSRData.at(CSR_UIP).physIndex);
-    mask = CSRMasks.at(CSR_UIP);
+    mask = RVxCSRMasks.at(CSR_UIP);
     newVal = (oldVal & ~mask) | (r.uip & mask);
     context->setMiscReg(
         CSRData.at(CSR_UIP).physIndex, newVal);
@@ -370,7 +381,7 @@ RemoteGDB::RiscvGdbRegCache::setRegs(ThreadContext *context) const
     // S mode CSR
     oldVal = context->readMiscRegNoEffect(
         CSRData.at(CSR_SSTATUS).physIndex);
-    mask = CSRMasks.at(CSR_SSTATUS);
+    mask = RVxCSRMasks.at(CSR_SSTATUS);
     newVal = (oldVal & ~mask) | (r.sstatus & mask);
     context->setMiscRegNoEffect(
         CSRData.at(CSR_SSTATUS).physIndex, newVal);
@@ -380,7 +391,7 @@ RemoteGDB::RiscvGdbRegCache::setRegs(ThreadContext *context) const
         CSRData.at(CSR_SIDELEG).physIndex, r.sideleg);
     oldVal = context->readMiscReg(
         CSRData.at(CSR_SIE).physIndex);
-    mask = CSRMasks.at(CSR_SIE);
+    mask = RVxCSRMasks.at(CSR_SIE);
     newVal = (oldVal & ~mask) | (r.sie & mask);
     context->setMiscReg(
         CSRData.at(CSR_SIE).physIndex, newVal);
@@ -398,7 +409,7 @@ RemoteGDB::RiscvGdbRegCache::setRegs(ThreadContext *context) const
         CSRData.at(CSR_STVAL).physIndex, r.stval);
     oldVal = context->readMiscReg(
         CSRData.at(CSR_SIP).physIndex);
-    mask = CSRMasks.at(CSR_SIP);
+    mask = RVxCSRMasks.at(CSR_SIP);
     newVal = (oldVal & ~mask) | (r.sip & mask);
     context->setMiscReg(
         CSRData.at(CSR_SIP).physIndex, newVal);
@@ -416,13 +427,13 @@ RemoteGDB::RiscvGdbRegCache::setRegs(ThreadContext *context) const
         CSRData.at(CSR_MHARTID).physIndex, r.mhartid);
     oldVal = context->readMiscRegNoEffect(
         CSRData.at(CSR_MSTATUS).physIndex);
-    mask = CSRMasks.at(CSR_MSTATUS);
+    mask = RVxCSRMasks.at(CSR_MSTATUS);
     newVal = (oldVal & ~mask) | (r.mstatus & mask);
     context->setMiscRegNoEffect(
         CSRData.at(CSR_MSTATUS).physIndex, newVal);
     oldVal = context->readMiscRegNoEffect(
         CSRData.at(CSR_MISA).physIndex);
-    mask = CSRMasks.at(CSR_MISA);
+    mask = RVxCSRMasks.at(CSR_MISA);
     newVal = (oldVal & ~mask) | (r.misa & mask);
     context->setMiscRegNoEffect(
         CSRData.at(CSR_MISA).physIndex, newVal);
@@ -432,7 +443,7 @@ RemoteGDB::RiscvGdbRegCache::setRegs(ThreadContext *context) const
         CSRData.at(CSR_MIDELEG).physIndex, r.mideleg);
     oldVal = context->readMiscReg(
         CSRData.at(CSR_MIE).physIndex);
-    mask = CSRMasks.at(CSR_MIE);
+    mask = RVxCSRMasks.at(CSR_MIE);
     newVal = (oldVal & ~mask) | (r.mie & mask);
     context->setMiscReg(
         CSRData.at(CSR_MIE).physIndex, newVal);
@@ -450,7 +461,7 @@ RemoteGDB::RiscvGdbRegCache::setRegs(ThreadContext *context) const
         CSRData.at(CSR_MTVAL).physIndex, r.mtval);
     oldVal = context->readMiscReg(
         CSRData.at(CSR_MIP).physIndex);
-    mask = CSRMasks.at(CSR_MIP);
+    mask = RVxCSRMasks.at(CSR_MIP);
     newVal = (oldVal & ~mask) | (r.mip & mask);
     context->setMiscReg(
         CSRData.at(CSR_MIP).physIndex, newVal);
@@ -473,11 +484,13 @@ RemoteGDB::getXferFeaturesRead(const std::string &annex, std::string &output)
                        Blobs::s##_len)                           \
     }
     static const std::map<std::string, std::string> annexMap{
-        GDB_XML("target.xml", gdb_xml_riscv_target),
-        GDB_XML("riscv-64bit-cpu.xml", gdb_xml_riscv_cpu),
-        GDB_XML("riscv-64bit-fpu.xml", gdb_xml_riscv_fpu),
-        GDB_XML("riscv-64bit-csr.xml", gdb_xml_riscv_csr)};
+        GDB_XML("riscv-64bit.xml", gdb_xml_riscv_64bit_target),
+        GDB_XML("riscv-64bit-cpu.xml", gdb_xml_riscv_64bit_cpu),
+        GDB_XML("riscv-64bit-fpu.xml", gdb_xml_riscv_64bit_fpu),
+        GDB_XML("riscv-64bit-csr.xml", gdb_xml_riscv_64bit_csr)};
 #undef GDB_XML
+    if (getRvType(context()) == RV32)
+        return false;
     auto it = annexMap.find(annex);
     if (it == annexMap.end())
         return false;
@@ -488,7 +501,7 @@ RemoteGDB::getXferFeaturesRead(const std::string &annex, std::string &output)
 BaseGdbRegCache *
 RemoteGDB::gdbRegs()
 {
-    return &regCache;
+    return &regCache64;
 }
 
 } // namespace gem5
diff --git a/src/arch/riscv/remote_gdb.hh b/src/arch/riscv/remote_gdb.hh
index f87481ece0..a8262a6a2a 100644
--- a/src/arch/riscv/remote_gdb.hh
+++ b/src/arch/riscv/remote_gdb.hh
@@ -58,7 +58,7 @@ class RemoteGDB : public BaseRemoteGDB
     // A breakpoint will be 2 bytes if it is compressed and 4 if not
     bool checkBpKind(size_t kind) override { return kind == 2 || kind == 4; }
 
-    class RiscvGdbRegCache : public BaseGdbRegCache
+    class Riscv64GdbRegCache : public BaseGdbRegCache
     {
       using BaseGdbRegCache::BaseGdbRegCache;
       private:
@@ -142,7 +142,7 @@ class RemoteGDB : public BaseRemoteGDB
         }
     };
 
-    RiscvGdbRegCache regCache;
+    Riscv64GdbRegCache regCache64;
 
   public:
     RemoteGDB(System *_system, int _port);
diff --git a/src/arch/riscv/types.hh b/src/arch/riscv/types.hh
index f06fe3eaa4..4aae1a027b 100644
--- a/src/arch/riscv/types.hh
+++ b/src/arch/riscv/types.hh
@@ -43,6 +43,7 @@
 #define __ARCH_RISCV_TYPES_HH__
 
 #include "arch/riscv/pcstate.hh"
+#include "base/bitunion.hh"
 
 namespace gem5
 {
@@ -51,7 +52,13 @@ namespace RiscvISA
 {
 
 typedef uint32_t MachInst;
-typedef uint64_t ExtMachInst;
+
+// This should be further extend someday when we start to support 64b+ inst.
+// For now, we should be safe using the msbs to store extra information.
+BitUnion64(ExtMachInst)
+    // Decoder state
+    Bitfield<63, 62> rv_type;
+EndBitUnion(ExtMachInst)
 
 } // namespace RiscvISA
 } // namespace gem5

From 0df37a33f602fdce8a2697655c318a702f79ba28 Mon Sep 17 00:00:00 2001
From: Giacomo Travaglini <giacomo.travaglini@arm.com>
Date: Wed, 23 Nov 2022 08:35:50 +0000
Subject: [PATCH 036/492] arch-arm: Setup TC/ISA at construction time 2nd
 attempt

This partly reverts commit ec75787aef56665e893d70293bf3a0f93c33bb6a
by fixing the original problem noted by Bobby (long regressions):

setupThreadContext has to be implemented otherswise the GICv3 cpu interface
will end up holding old references when switching TC/ISAs.

This new implementation is still setting up the cpu interface reference
in the ISA only when it is required, but it is storing the
TC/ISA reference within the interface every time the ISA::setupThreadContext
gets called.

Change-Id: I2f54f95761d63655162c253e887b872f3718c764
Signed-off-by: Giacomo Travaglini <giacomo.travaglini@arm.com>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/65931
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
---
 src/arch/arm/isa.cc                 | 33 ++++++++++++++++++++---------
 src/arch/arm/isa.hh                 |  1 +
 src/dev/arm/gic_v3.cc               |  2 +-
 src/dev/arm/gic_v3_cpu_interface.cc |  8 +++++--
 src/dev/arm/gic_v3_cpu_interface.hh |  6 +++---
 5 files changed, 34 insertions(+), 16 deletions(-)

diff --git a/src/arch/arm/isa.cc b/src/arch/arm/isa.cc
index a30fd94596..543e0eba7b 100644
--- a/src/arch/arm/isa.cc
+++ b/src/arch/arm/isa.cc
@@ -524,15 +524,10 @@ ISA::setupThreadContext()
 
     selfDebug->init(tc);
 
-    Gicv3 *gicv3 = dynamic_cast<Gicv3 *>(system->getGIC());
-    if (!gicv3)
-        return;
-
-    if (!gicv3CpuInterface)
-        gicv3CpuInterface.reset(gicv3->getCPUInterface(tc->contextId()));
-
-    gicv3CpuInterface->setISA(this);
-    gicv3CpuInterface->setThreadContext(tc);
+    if (auto gicv3_ifc = getGICv3CPUInterface(tc); gicv3_ifc) {
+        gicv3_ifc->setISA(this);
+        gicv3_ifc->setThreadContext(tc);
+    }
 }
 
 void
@@ -2008,10 +2003,28 @@ ISA::getGenericTimer()
 BaseISADevice &
 ISA::getGICv3CPUInterface()
 {
-    panic_if(!gicv3CpuInterface, "GICV3 cpu interface is not registered!");
+    if (gicv3CpuInterface)
+        return *gicv3CpuInterface.get();
+
+    auto gicv3_ifc = getGICv3CPUInterface(tc);
+    panic_if(!gicv3_ifc, "The system does not have a GICv3 irq controller\n");
+    gicv3CpuInterface.reset(gicv3_ifc);
+
     return *gicv3CpuInterface.get();
 }
 
+BaseISADevice*
+ISA::getGICv3CPUInterface(ThreadContext *tc)
+{
+    assert(system);
+    Gicv3 *gicv3 = dynamic_cast<Gicv3 *>(system->getGIC());
+    if (gicv3) {
+        return gicv3->getCPUInterface(tc->contextId());
+    } else {
+        return nullptr;
+    }
+}
+
 bool
 ISA::inSecureState() const
 {
diff --git a/src/arch/arm/isa.hh b/src/arch/arm/isa.hh
index 1f7a7561a7..9e1afa714b 100644
--- a/src/arch/arm/isa.hh
+++ b/src/arch/arm/isa.hh
@@ -116,6 +116,7 @@ namespace ArmISA
 
         BaseISADevice &getGenericTimer();
         BaseISADevice &getGICv3CPUInterface();
+        BaseISADevice *getGICv3CPUInterface(ThreadContext *tc);
 
         RegVal miscRegs[NUM_MISCREGS];
         const RegId *intRegMap;
diff --git a/src/dev/arm/gic_v3.cc b/src/dev/arm/gic_v3.cc
index dde3818b07..e14d1f2bef 100644
--- a/src/dev/arm/gic_v3.cc
+++ b/src/dev/arm/gic_v3.cc
@@ -147,7 +147,7 @@ Gicv3::init()
 
     for (int i = 0; i < threads; i++) {
         redistributors[i] = new Gicv3Redistributor(this, i);
-        cpuInterfaces[i] = new Gicv3CPUInterface(this, i);
+        cpuInterfaces[i] = new Gicv3CPUInterface(this, sys->threads[i]);
     }
 
     distRange = RangeSize(params().dist_addr,
diff --git a/src/dev/arm/gic_v3_cpu_interface.cc b/src/dev/arm/gic_v3_cpu_interface.cc
index 0e1dbaa04b..28a173943d 100644
--- a/src/dev/arm/gic_v3_cpu_interface.cc
+++ b/src/dev/arm/gic_v3_cpu_interface.cc
@@ -55,15 +55,19 @@ using namespace ArmISA;
 const uint8_t Gicv3CPUInterface::GIC_MIN_BPR;
 const uint8_t Gicv3CPUInterface::GIC_MIN_BPR_NS;
 
-Gicv3CPUInterface::Gicv3CPUInterface(Gicv3 * gic, uint32_t cpu_id)
+Gicv3CPUInterface::Gicv3CPUInterface(Gicv3 * gic, ThreadContext *_tc)
     : BaseISADevice(),
       gic(gic),
       redistributor(nullptr),
       distributor(nullptr),
-      cpuId(cpu_id)
+      tc(_tc),
+      maintenanceInterrupt(gic->params().maint_int->get(tc)),
+      cpuId(tc->contextId())
 {
     hppi.prio = 0xff;
     hppi.intid = Gicv3::INTID_SPURIOUS;
+
+    setISA(static_cast<ISA*>(tc->getIsaPtr()));
 }
 
 void
diff --git a/src/dev/arm/gic_v3_cpu_interface.hh b/src/dev/arm/gic_v3_cpu_interface.hh
index e860373fb5..ff476bc3c6 100644
--- a/src/dev/arm/gic_v3_cpu_interface.hh
+++ b/src/dev/arm/gic_v3_cpu_interface.hh
@@ -68,10 +68,10 @@ class Gicv3CPUInterface : public ArmISA::BaseISADevice, public Serializable
     Gicv3 * gic;
     Gicv3Redistributor * redistributor;
     Gicv3Distributor * distributor;
-    uint32_t cpuId;
 
-    ArmInterruptPin *maintenanceInterrupt;
     ThreadContext *tc;
+    ArmInterruptPin *maintenanceInterrupt;
+    uint32_t cpuId;
 
     BitUnion64(ICC_CTLR_EL1)
         Bitfield<63, 20> res0_3;
@@ -359,7 +359,7 @@ class Gicv3CPUInterface : public ArmISA::BaseISADevice, public Serializable
     void setBankedMiscReg(ArmISA::MiscRegIndex misc_reg, RegVal val) const;
   public:
 
-    Gicv3CPUInterface(Gicv3 * gic, uint32_t cpu_id);
+    Gicv3CPUInterface(Gicv3 * gic, ThreadContext *tc);
 
     void init();
 

From ed6cf2ecedb277a46e205d00613578918ea37e12 Mon Sep 17 00:00:00 2001
From: Giacomo Travaglini <giacomo.travaglini@arm.com>
Date: Tue, 1 Nov 2022 17:43:35 +0000
Subject: [PATCH 037/492] dev-arm: Allow GICv3 to be externally(publicly)
 updated

Change-Id: Ifa7b745ea11e74c17024c22ae993b6103eecb744
Signed-off-by: Giacomo Travaglini <giacomo.travaglini@arm.com>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/66271
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/dev/arm/gic_v3.cc             | 6 ++++++
 src/dev/arm/gic_v3.hh             | 2 ++
 src/dev/arm/gic_v3_distributor.hh | 2 +-
 3 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/src/dev/arm/gic_v3.cc b/src/dev/arm/gic_v3.cc
index e14d1f2bef..cb462f7340 100644
--- a/src/dev/arm/gic_v3.cc
+++ b/src/dev/arm/gic_v3.cc
@@ -286,6 +286,12 @@ Gicv3::postInt(uint32_t cpu, ArmISA::InterruptTypes int_type)
     ArmSystem::callClearStandByWfi(tc);
 }
 
+void
+Gicv3::update()
+{
+    distributor->update();
+}
+
 bool
 Gicv3::supportsVersion(GicVersion version)
 {
diff --git a/src/dev/arm/gic_v3.hh b/src/dev/arm/gic_v3.hh
index 120b0390a6..2ea6a98b3b 100644
--- a/src/dev/arm/gic_v3.hh
+++ b/src/dev/arm/gic_v3.hh
@@ -206,6 +206,8 @@ class Gicv3 : public BaseGic, public Gicv3Registers
 
     void postInt(uint32_t cpu, ArmISA::InterruptTypes int_type);
 
+    void update();
+
   protected: // GIC state transfer
     void copyGicState(Gicv3Registers* from, Gicv3Registers* to);
 
diff --git a/src/dev/arm/gic_v3_distributor.hh b/src/dev/arm/gic_v3_distributor.hh
index 9960e91593..f80800fb0a 100644
--- a/src/dev/arm/gic_v3_distributor.hh
+++ b/src/dev/arm/gic_v3_distributor.hh
@@ -257,7 +257,6 @@ class Gicv3Distributor : public Serializable
 
     void serialize(CheckpointOut & cp) const override;
     void unserialize(CheckpointIn & cp) override;
-    void update();
     Gicv3CPUInterface* route(uint32_t int_id);
 
   public:
@@ -274,6 +273,7 @@ class Gicv3Distributor : public Serializable
                bool is_secure_access);
 
     void copy(Gicv3Registers *from, Gicv3Registers *to);
+    void update();
 };
 
 } // namespace gem5

From 596da56b6108e648875e1c22b23ff0153c0bcaf4 Mon Sep 17 00:00:00 2001
From: Giacomo Travaglini <giacomo.travaglini@arm.com>
Date: Thu, 1 Dec 2022 11:37:14 +0000
Subject: [PATCH 038/492] arch-arm: Remove deprecated Armv7 debug Vector Catch

This was part of Armv7 self hosted debug and has been officially
deprecated in Armv8

Change-Id: I6ad240ac7dfc389f7de32d4b5b44d9da238c6e46
Signed-off-by: Giacomo Travaglini <giacomo.travaglini@arm.com>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/66251
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Maintainer: Andreas Sandberg <andreas.sandberg@arm.com>
Reviewed-by: Andreas Sandberg <andreas.sandberg@arm.com>
---
 src/arch/arm/faults.cc     |  17 -----
 src/arch/arm/self_debug.cc | 126 +------------------------------------
 src/arch/arm/self_debug.hh |  47 +-------------
 3 files changed, 2 insertions(+), 188 deletions(-)

diff --git a/src/arch/arm/faults.cc b/src/arch/arm/faults.cc
index b4ef199201..379e761f98 100644
--- a/src/arch/arm/faults.cc
+++ b/src/arch/arm/faults.cc
@@ -503,9 +503,6 @@ ArmFault::invoke(ThreadContext *tc, const StaticInstPtr &inst)
 void
 ArmFault::invoke32(ThreadContext *tc, const StaticInstPtr &inst)
 {
-    if (vectorCatch(tc, inst))
-        return;
-
     // ARMv7 (ARM ARM issue C B1.9)
     bool have_security = ArmSystem::haveEL(tc, EL3);
 
@@ -729,20 +726,6 @@ ArmFault::invoke64(ThreadContext *tc, const StaticInstPtr &inst)
         setSyndrome(tc, getSyndromeReg64());
 }
 
-bool
-ArmFault::vectorCatch(ThreadContext *tc, const StaticInstPtr &inst)
-{
-    SelfDebug *sd = ArmISA::ISA::getSelfDebug(tc);
-    VectorCatch* vc = sd->getVectorCatch(tc);
-    if (vc && !vc->isVCMatch()) {
-        Fault fault = sd->testVectorCatch(tc, 0x0, this);
-        if (fault != NoFault)
-            fault->invoke(tc, inst);
-        return true;
-    }
-    return false;
-}
-
 ArmStaticInst *
 ArmFault::instrAnnotate(const StaticInstPtr &inst)
 {
diff --git a/src/arch/arm/self_debug.cc b/src/arch/arm/self_debug.cc
index 27064cd7be..a4e685fce8 100644
--- a/src/arch/arm/self_debug.cc
+++ b/src/arch/arm/self_debug.cc
@@ -56,9 +56,7 @@ SelfDebug::testDebug(ThreadContext *tc, const RequestPtr &req,
     if (mode == BaseMMU::Execute) {
         const bool d_step = softStep->advanceSS(tc);
         if (!d_step) {
-            fault = testVectorCatch(tc, req->getVaddr(), nullptr);
-            if (fault == NoFault)
-                fault = testBreakPoints(tc, req->getVaddr());
+            fault = testBreakPoints(tc, req->getVaddr());
         }
     } else if (!req->isCacheMaintenance() ||
              (req->isCacheInvalidate() && !req->isCacheClean())) {
@@ -368,10 +366,6 @@ SelfDebug::init(ThreadContext *tc)
     const HCR hcr = tc->readMiscReg(MISCREG_HCR_EL2);
     const HDCR mdcr  = tc->readMiscRegNoEffect(MISCREG_MDCR_EL2);
     setenableTDETGE(hcr, mdcr);
-
-    // Enable Vector Catch Exceptions
-    const DEVID dvid = tc->readMiscReg(MISCREG_DBGDEVID0);
-    vcExcpt = new VectorCatch(dvid.vectorcatch==0x0, this);
 }
 
 bool
@@ -706,122 +700,4 @@ SoftwareStep::advanceSS(ThreadContext * tc)
     return res;
 }
 
-Fault
-SelfDebug::testVectorCatch(ThreadContext *tc, Addr addr,
-                           ArmFault *fault)
-{
-
-    setAArch32(tc);
-    to32 = targetAArch32(tc);
-    if (!isDebugEnabled(tc) || !mde || !aarch32)
-        return NoFault;
-
-    ExceptionLevel el = (ExceptionLevel) currEL(tc);
-    bool do_debug;
-    if (fault == nullptr)
-        do_debug = vcExcpt->addressMatching(tc, addr, el);
-    else
-        do_debug = vcExcpt->exceptionTrapping(tc, el, fault);
-    if (do_debug) {
-        if (enableTdeTge) {
-            return std::make_shared<HypervisorTrap>(0, 0x22,
-                                        ExceptionClass::PREFETCH_ABORT_TO_HYP);
-        } else {
-            return std::make_shared<PrefetchAbort>(addr,
-                                       ArmFault::DebugEvent, false,
-                                       ArmFault::UnknownTran,
-                                       ArmFault::VECTORCATCH);
-        }
-    }
-
-    return NoFault;
-}
-
-bool
-VectorCatch::addressMatching(ThreadContext *tc, Addr addr, ExceptionLevel el)
-{
-    // Each bit position in this string corresponds to a bit in DBGVCR
-    // and an exception vector.
-    bool enabled;
-    if (conf->isAArch32() && ELIs32(tc, EL1) &&
-        (addr & 0x3) == 0 && el != EL2 ) {
-
-        DBGVCR match_word = 0x0;
-
-        Addr vbase = getVectorBase(tc, false);
-        Addr vaddress = addr & ~ 0x1f;
-        Addr low_addr = bits(addr, 5, 2);
-        if (vaddress == vbase) {
-            if (ArmSystem::haveEL(tc, EL3) && !isSecure(tc)) {
-                uint32_t bmask = 1UL << (low_addr + 24);
-                match_word = match_word | (DBGVCR) bmask;
-                // Non-secure vectors
-            } else {
-                uint32_t bmask = 1UL << (low_addr);
-                match_word = match_word | (DBGVCR) bmask;
-                // Secure vectors (or no EL3)
-            }
-        }
-        uint32_t mvbase = getVectorBase(tc, true);
-        if (ArmSystem::haveEL(tc, EL3) && ELIs32(tc, EL3) &&
-            isSecure(tc) && (vaddress == mvbase)) {
-            uint32_t bmask = 1UL << (low_addr + 8);
-            match_word = match_word | (DBGVCR) bmask;
-            // Monitor vectors
-        }
-
-        DBGVCR mask;
-
-        // Mask out bits not corresponding to vectors.
-        if (!ArmSystem::haveEL(tc, EL3)) {
-            mask = (DBGVCR) 0xDE;
-        } else if (!ELIs32(tc, EL3)) {
-            mask = (DBGVCR) 0xDE0000DE;
-        } else {
-            mask = (DBGVCR) 0xDE00DEDE;
-        }
-        DBGVCR dbgvcr = tc->readMiscReg(MISCREG_DBGVCR);
-        match_word = match_word & dbgvcr & mask;
-        enabled = match_word != 0x0;
-        // Check for UNPREDICTABLE case - match on Prefetch Abort and
-        // Data Abort vectors
-        ExceptionLevel ELd = debugTargetFrom(tc, isSecure(tc));
-        if (((match_word & 0x18001818) != 0x0) && ELd == el) {
-            enabled = false;
-        }
-    } else {
-        enabled = false;
-    }
-    return enabled;
-}
-
-bool
-VectorCatch::exceptionTrapping(ThreadContext *tc, ExceptionLevel el,
-                               ArmFault* fault)
-{
-    if (conf->isAArch32() && ELIs32(tc, EL1) && el != EL2) {
-
-        DBGVCR dbgvcr = tc->readMiscReg(MISCREG_DBGVCR);
-        DBGVCR match_type = fault->vectorCatchFlag();
-        DBGVCR mask;
-
-        if (!ArmSystem::haveEL(tc, EL3)) {
-            mask = (DBGVCR) 0xDE;
-        } else if (ELIs32(tc, EL3) && fault->getToMode() == MODE_MON) {
-            mask = (DBGVCR) 0x0000DE00;
-        } else {
-            if (isSecure(tc))
-                mask = (DBGVCR) 0x000000DE;
-            else
-                mask = (DBGVCR) 0xDE000000;
-        }
-        match_type = match_type & mask & dbgvcr;
-
-        if (match_type != 0x0) {
-            return true;
-        }
-    }
-    return false;
-}
-
 } // namespace gem5
diff --git a/src/arch/arm/self_debug.hh b/src/arch/arm/self_debug.hh
index 5ad0d0991f..069df24066 100644
--- a/src/arch/arm/self_debug.hh
+++ b/src/arch/arm/self_debug.hh
@@ -239,48 +239,12 @@ class SoftwareStep
     }
 };
 
-class VectorCatch
-{
-  private:
-    bool vcmatch;
-    SelfDebug *conf;
-    std::vector<Fault *> vectorTypes();
-
-  public:
-    VectorCatch(bool _vcmatch, SelfDebug* s) : vcmatch(_vcmatch), conf(s)
-    {}
-
-    bool addressMatching(ThreadContext *tc, Addr addr, ExceptionLevel el);
-    bool exceptionTrapping(ThreadContext *tc, ExceptionLevel el,
-                           ArmFault* fault);
-
-    bool isVCMatch() const { return vcmatch; }
-
-  private:
-    Addr
-    getVectorBase(ThreadContext *tc, bool monitor)
-    {
-        if (monitor) {
-            return tc->readMiscReg(MISCREG_MVBAR) & ~0x1F;
-        }
-        SCTLR sctlr = tc->readMiscReg(MISCREG_SCTLR_EL1);
-        if (sctlr.v) {
-            return (Addr) 0xFFFF0000;
-        } else {
-            Addr vbar = tc->readMiscReg(MISCREG_VBAR) & ~0x1F;
-            return vbar;
-        }
-    }
-
-};
-
 class SelfDebug
 {
   private:
     std::vector<BrkPoint> arBrkPoints;
     std::vector<WatchPoint> arWatchPoints;
     SoftwareStep * softStep;
-    VectorCatch * vcExcpt;
 
     bool enableTdeTge; // MDCR_EL2.TDE || HCR_EL2.TGE
 
@@ -294,7 +258,7 @@ class SelfDebug
 
   public:
     SelfDebug()
-      : softStep(nullptr), vcExcpt(nullptr), enableTdeTge(false),
+      : softStep(nullptr), enableTdeTge(false),
         mde(false), sdd(false), kde(false), oslk(false)
     {
         softStep = new SoftwareStep(this);
@@ -303,7 +267,6 @@ class SelfDebug
     ~SelfDebug()
     {
         delete softStep;
-        delete vcExcpt;
     }
 
     Fault testDebug(ThreadContext *tc, const RequestPtr &req,
@@ -318,8 +281,6 @@ class SelfDebug
     Fault triggerWatchpointException(ThreadContext *tc, Addr vaddr,
                                      bool write, bool cm);
   public:
-    Fault testVectorCatch(ThreadContext *tc, Addr addr, ArmFault* flt);
-
     bool enabled() const { return mde || softStep->bSS; };
 
     inline BrkPoint*
@@ -445,12 +406,6 @@ class SelfDebug
         return softStep;
     }
 
-    VectorCatch*
-    getVectorCatch(ThreadContext *tc)
-    {
-        return vcExcpt;
-    }
-
     bool
     targetAArch32(ThreadContext *tc)
     {

From 4fc690f6b7f98c8bc846b970a024879cd91fec79 Mon Sep 17 00:00:00 2001
From: Jarvis Jia <jia44@wisc.edu>
Date: Wed, 23 Nov 2022 15:11:16 -0600
Subject: [PATCH 039/492] mem-cache: Fix FIFO replacement

Change FIFO from using curTicks() to using timeTicks counter to
avoid issues where multiple lines are considered to have entered
the cache at the same tick.

Change-Id: I5e0b894eb9bec4f0f8bc8f48ec2766a0fc5079c6
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/65952
Reviewed-by: Daniel Carvalho <odanrc@yahoo.com.br>
Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com>
Maintainer: Daniel Carvalho <odanrc@yahoo.com.br>
Tested-by: kokoro <noreply+kokoro@google.com>
Maintainer: Matt Sinclair <mattdsinclair@gmail.com>
---
 src/mem/cache/replacement_policies/fifo_rp.cc | 6 ++----
 src/mem/cache/replacement_policies/fifo_rp.hh | 8 +++++++-
 2 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/src/mem/cache/replacement_policies/fifo_rp.cc b/src/mem/cache/replacement_policies/fifo_rp.cc
index 9655c96fa7..bc0680bc8a 100644
--- a/src/mem/cache/replacement_policies/fifo_rp.cc
+++ b/src/mem/cache/replacement_policies/fifo_rp.cc
@@ -36,11 +36,9 @@
 
 namespace gem5
 {
-
 GEM5_DEPRECATED_NAMESPACE(ReplacementPolicy, replacement_policy);
 namespace replacement_policy
 {
-
 FIFO::FIFO(const Params &p)
   : Base(p)
 {
@@ -51,7 +49,7 @@ FIFO::invalidate(const std::shared_ptr<ReplacementData>& replacement_data)
 {
     // Reset insertion tick
     std::static_pointer_cast<FIFOReplData>(
-        replacement_data)->tickInserted = Tick(0);
+        replacement_data)->tickInserted = ++timeTicks;
 }
 
 void
@@ -65,7 +63,7 @@ FIFO::reset(const std::shared_ptr<ReplacementData>& replacement_data) const
 {
     // Set insertion tick
     std::static_pointer_cast<FIFOReplData>(
-        replacement_data)->tickInserted = curTick();
+        replacement_data)->tickInserted = ++timeTicks;
 }
 
 ReplaceableEntry*
diff --git a/src/mem/cache/replacement_policies/fifo_rp.hh b/src/mem/cache/replacement_policies/fifo_rp.hh
index f4703d1299..4b62fd220a 100644
--- a/src/mem/cache/replacement_policies/fifo_rp.hh
+++ b/src/mem/cache/replacement_policies/fifo_rp.hh
@@ -56,13 +56,19 @@ class FIFO : public Base
     {
         /** Tick on which the entry was inserted. */
         Tick tickInserted;
-
         /**
          * Default constructor. Invalidate data.
          */
         FIFOReplData() : tickInserted(0) {}
     };
 
+  private:
+    /**
+     * A counter that tracks the number of
+     * ticks since being created to avoid a tie
+     */
+    mutable Tick timeTicks;
+
   public:
     typedef FIFORPParams Params;
     FIFO(const Params &p);

From b9c0851120b780523c77a9210db3e5d85e9e0fb9 Mon Sep 17 00:00:00 2001
From: Yu-hsin Wang <yuhsingw@google.com>
Date: Wed, 23 Nov 2022 17:13:49 +0800
Subject: [PATCH 040/492] systemc: fix the payload and packet association in
 Gem5ToTlm bridge

If a request is initiated by systemc, passed through TlmToGem5 bridge
and Gem5ToTlm bridge, it wouldn't have the systemc extension about the
association. This feature is also used in TlmToGem5 bridge to detect if
the packet is allocated in the current instance in async interface. In
that case, we would lose the association in the Gem5ToTlm bridge async
interface. For not making wide change, we need an extra way to support
the association in Gem5ToTlm bridge async interface.

This change adds another map to record the association and clears when
the TLM transaction is completed.

Change-Id: I486441e813236ea2cabd1bd6cbb085b08d75ec8f
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/66054
Reviewed-by: Gabe Black <gabe.black@gmail.com>
Maintainer: Gabe Black <gabe.black@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/systemc/tlm_bridge/gem5_to_tlm.cc | 12 ++++++++----
 src/systemc/tlm_bridge/gem5_to_tlm.hh |  7 +++++++
 2 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/src/systemc/tlm_bridge/gem5_to_tlm.cc b/src/systemc/tlm_bridge/gem5_to_tlm.cc
index a5eb9df27e..515975224e 100644
--- a/src/systemc/tlm_bridge/gem5_to_tlm.cc
+++ b/src/systemc/tlm_bridge/gem5_to_tlm.cc
@@ -231,10 +231,10 @@ Gem5ToTlmBridge<BITWIDTH>::pec(
         }
     }
     if (phase == tlm::BEGIN_RESP) {
-        auto &extension = Gem5SystemC::Gem5Extension::getExtension(trans);
-        auto packet = extension.getPacket();
+        PacketPtr packet = packetMap[&trans];
 
         sc_assert(!blockingResponse);
+        sc_assert(packet);
 
         bool need_retry = false;
 
@@ -258,6 +258,7 @@ Gem5ToTlmBridge<BITWIDTH>::pec(
                 sc_core::sc_time delay = sc_core::SC_ZERO_TIME;
                 socket->nb_transport_fw(trans, fw_phase, delay);
                 // Release the transaction with all the extensions.
+                packetMap.erase(&trans);
                 trans.release();
             }
         }
@@ -433,11 +434,13 @@ Gem5ToTlmBridge<BITWIDTH>::recvTimingReq(PacketPtr packet)
         sc_assert(phase == tlm::BEGIN_REQ);
         // Accepted but is now blocking until END_REQ (exclusion rule).
         blockingRequest = trans;
+        packetMap.emplace(trans, packet);
     } else if (status == tlm::TLM_UPDATED) {
         // The Timing annotation must be honored:
         sc_assert(phase == tlm::END_REQ || phase == tlm::BEGIN_RESP);
         // Accepted but is now blocking until END_REQ (exclusion rule).
         blockingRequest = trans;
+        packetMap.emplace(trans, packet);
         auto cb = [this, trans, phase]() { pec(*trans, phase); };
         auto event = new EventFunctionWrapper(
                 cb, "pec", true, getPriorityOfTlmPhase(phase));
@@ -477,8 +480,8 @@ Gem5ToTlmBridge<BITWIDTH>::recvRespRetry()
 
     tlm::tlm_generic_payload *trans = blockingResponse;
     blockingResponse = nullptr;
-    PacketPtr packet =
-        Gem5SystemC::Gem5Extension::getExtension(trans).getPacket();
+    PacketPtr packet = packetMap[blockingResponse];
+    sc_assert(packet);
 
     bool need_retry = !bridgeResponsePort.sendTimingResp(packet);
 
@@ -488,6 +491,7 @@ Gem5ToTlmBridge<BITWIDTH>::recvRespRetry()
     tlm::tlm_phase phase = tlm::END_RESP;
     socket->nb_transport_fw(*trans, phase, delay);
     // Release transaction with all the extensions
+    packetMap.erase(trans);
     trans->release();
 }
 
diff --git a/src/systemc/tlm_bridge/gem5_to_tlm.hh b/src/systemc/tlm_bridge/gem5_to_tlm.hh
index 23415b843b..35d6ba3b4d 100644
--- a/src/systemc/tlm_bridge/gem5_to_tlm.hh
+++ b/src/systemc/tlm_bridge/gem5_to_tlm.hh
@@ -61,6 +61,7 @@
 
 #include <functional>
 #include <string>
+#include <unordered_map>
 
 #include "mem/backdoor.hh"
 #include "mem/port.hh"
@@ -173,6 +174,12 @@ class Gem5ToTlmBridge : public Gem5ToTlmBridgeBase
      */
     tlm::tlm_generic_payload *blockingResponse;
 
+    /**
+     * A map to record the association between payload and packet. This helps us
+     * could get the correct packet when handling nonblocking interfaces.
+     */
+    std::unordered_map<tlm::tlm_generic_payload *, gem5::PacketPtr> packetMap;
+
     gem5::AddrRangeList addrRanges;
 
   protected:

From 9d1cc1bcc91290aa32253462ea3bc6df1a9d83c5 Mon Sep 17 00:00:00 2001
From: Gabe Black <gabe.black@gmail.com>
Date: Mon, 5 Dec 2022 05:03:53 -0800
Subject: [PATCH 041/492] dev: Add an offset checking mechanism to
 RegisterBank.

When adding a long list of registers, it can be easy to miss one which
will offset all the registers after it. It can be hard to find those
sorts of problems, and tedious and error prone to fix them.

This change adds a mechanism to simply annotate what offset a register
should have. That should also make the register list more self
documenting, since you'll be able to easily see what offset a register
has from the source without having to count up everything in front of it.

Change-Id: Ia7e419ffb062a64a10106305f875cec6f9fe9a80
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/66431
Reviewed-by: Yu-hsin Wang <yuhsingw@google.com>
Maintainer: Gabe Black <gabe.black@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/dev/reg_bank.hh      | 70 +++++++++++++++++++++++++++++++++-------
 src/dev/reg_bank.test.cc | 39 ++++++++++++++++++++++
 2 files changed, 98 insertions(+), 11 deletions(-)

diff --git a/src/dev/reg_bank.hh b/src/dev/reg_bank.hh
index 42af7bce89..31c0ce5b66 100644
--- a/src/dev/reg_bank.hh
+++ b/src/dev/reg_bank.hh
@@ -37,6 +37,7 @@
 #include <initializer_list>
 #include <iostream>
 #include <map>
+#include <optional>
 #include <sstream>
 #include <utility>
 
@@ -84,6 +85,9 @@
  * entire device, with the address from accesses passed into read or write
  * unmodified.
  *
+ * The base(), size() and name() methods can be used to access each of those
+ * read only properties of the RegisterBank instance.
+ *
  * To add actual registers to the RegisterBank (discussed below), you can use
  * either the addRegister method which adds a single register, or addRegisters
  * which adds an initializer list of them all at once. The register will be
@@ -91,8 +95,19 @@
  * existing registers. The size of the bank is automatically accumulated as
  * registers are added.
  *
- * The base(), size() and name() methods can be used to access each of those
- * read only properties of the RegisterBank instance.
+ * When adding a lot of registers, you might accidentally add an extra,
+ * or accidentally skip one in a long list. Because the offset is handled
+ * automatically, some of your registers might end up shifted higher or lower
+ * than you expect. To help mitigate this, you can set what offset you expect
+ * a register to have by specifying it as an offset, register pair.
+ *
+ * addRegisters({{0x1000, reg0}, reg1, reg2});
+ *
+ * If the register would end up at a different offset, gem5 will panic. You
+ * can also leave off the register if you want to just check the offset, for
+ * instance between groups of registers.
+ *
+ * addRegisters({reg0, reg1, reg2, 0x100c})
  *
  * While the RegisterBank itself doesn't have any data in it directly and so
  * has no endianness, it's very likely all the registers within it will have
@@ -805,19 +820,52 @@ class RegisterBank : public RegisterBankBase
 
     virtual ~RegisterBank() {}
 
-    void
-    addRegisters(
-            std::initializer_list<std::reference_wrapper<RegisterBase>> regs)
+    class RegisterAdder
     {
-        panic_if(regs.size() == 0, "Adding an empty list of registers to %s?",
-                 name());
-        for (auto &reg: regs) {
-            _offsetMap.emplace(_base + _size, reg);
-            _size += reg.get().size();
+      private:
+        std::optional<Addr> offset;
+        std::optional<RegisterBase *> reg;
+
+      public:
+        // Nothing special to do for this register.
+        RegisterAdder(RegisterBase &new_reg) : reg(&new_reg) {}
+        // Ensure that this register is added at a particular offset.
+        RegisterAdder(Addr new_offset, RegisterBase &new_reg) :
+            offset(new_offset), reg(&new_reg)
+        {}
+        // No register, just check that the offset is what we expect.
+        RegisterAdder(Addr new_offset) : offset(new_offset) {}
+
+        friend class RegisterBank;
+    };
+
+    void
+    addRegisters(std::initializer_list<RegisterAdder> adders)
+    {
+        panic_if(std::empty(adders),
+                "Adding an empty list of registers to %s?", name());
+        for (auto &adder: adders) {
+            const Addr offset = _base + _size;
+
+            if (adder.reg) {
+                auto *reg = adder.reg.value();
+                if (adder.offset && adder.offset.value() != offset) {
+                    panic(
+                        "Expected offset of register %s.%s to be %#x, is %#x.",
+                        name(), reg->name(), adder.offset.value(), offset);
+                }
+                _offsetMap.emplace(offset, *reg);
+                _size += reg->size();
+            } else if (adder.offset) {
+                if (adder.offset.value() != offset) {
+                    panic("Expected current offset of %s to be %#x, is %#x.",
+                        name(), adder.offset.value(), offset);
+                }
+            }
         }
     }
 
-    void addRegister(RegisterBase &reg) { addRegisters({reg}); }
+    void addRegister(RegisterAdder reg) { addRegisters({reg}); }
 
     Addr base() const { return _base; }
     Addr size() const { return _size; }
diff --git a/src/dev/reg_bank.test.cc b/src/dev/reg_bank.test.cc
index 534f86295b..b4bc969724 100644
--- a/src/dev/reg_bank.test.cc
+++ b/src/dev/reg_bank.test.cc
@@ -55,6 +55,7 @@
 
 #include <vector>
 
+#include "base/gtest/logging.hh"
 #include "dev/reg_bank.hh"
 
 using namespace gem5;
@@ -64,6 +65,9 @@ using testing::ElementsAre;
 // This version is needed with enough elements, empirically more than 10.
 using testing::ElementsAreArray;
 
+using testing::AllOf;
+using testing::HasSubstr;
+
 
 /*
  * The RegisterRaz (read as zero) type.
@@ -1011,6 +1015,41 @@ TEST_F(RegisterBankTest, AddRegistersSize)
     EXPECT_EQ(emptyBank.size(), 12);
 }
 
+TEST_F(RegisterBankTest, AddRegistersWithOffsetChecks)
+{
+    emptyBank.addRegister({0x12345});
+    EXPECT_EQ(emptyBank.size(), 0);
+    emptyBank.addRegister({0x12345, reg0});
+    EXPECT_EQ(emptyBank.size(), 4);
+    emptyBank.addRegister({0x12349});
+    EXPECT_EQ(emptyBank.size(), 4);
+
+    emptyBank.addRegisters({{0x12349, reg1}, {0x1234d}, {0x1234d, reg2}});
+    EXPECT_EQ(emptyBank.size(), 12);
+}
+
+TEST_F(RegisterBankTest, BadRegisterOffsetDeath)
+{
+    gtestLogOutput.str("");
+    EXPECT_ANY_THROW(emptyBank.addRegisters({{0xabcd, reg0}, reg1}));
+
+    std::string actual = gtestLogOutput.str();
+    EXPECT_THAT(actual, HasSubstr("empty.reg0"));
+    EXPECT_THAT(actual, HasSubstr("to be 0xabcd"));
+    EXPECT_THAT(actual, HasSubstr("is 0x12345"));
+}
+
+TEST_F(RegisterBankTest, BadBankOffsetDeath)
+{
+    gtestLogOutput.str("");
+    EXPECT_ANY_THROW(emptyBank.addRegisters({{0xabcd}, reg0}));
+
+    std::string actual = gtestLogOutput.str();
+    EXPECT_THAT(actual, HasSubstr("empty "));
+    EXPECT_THAT(actual, HasSubstr("to be 0xabcd"));
+    EXPECT_THAT(actual, HasSubstr("is 0x12345"));
+}
+
 // Reads.
 
 TEST_F(RegisterBankTest, ReadOneAlignedFirst)

From ae20719576533e7d049cecccb0c938d39a2e5f58 Mon Sep 17 00:00:00 2001
From: "Bobby R. Bruce" <bbruce@ucdavis.edu>
Date: Mon, 5 Dec 2022 15:28:00 -0800
Subject: [PATCH 042/492] python: Remove 'scheduleTickExit' in favor of
 'exitSimLoop'

The commit https://gem5-review.googlesource.com/c/public/gem5/+/66231
added an API to m5 for scheduling to-tick exit events. This added the
function `schedule_tick_exit`. It was later pointed out that this
`schedule_tick_exit` event is redundant given the existance of
`exitSimLoop`. This patch therefore removes `schedule_tick_exit` in
favor of `exitSimLoop`.

Change-Id: Ibecf00b98256a5da2868427d766bdc93f03c3f97
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/66451
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Reviewed-by: Andreas Sandberg <andreas.sandberg@arm.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Maintainer: Andreas Sandberg <andreas.sandberg@arm.com>
---
 src/python/m5/simulate.py    |  2 +-
 src/python/pybind11/event.cc |  2 --
 src/sim/simulate.cc          |  5 -----
 src/sim/simulate.hh          | 10 ----------
 4 files changed, 1 insertion(+), 18 deletions(-)

diff --git a/src/python/m5/simulate.py b/src/python/m5/simulate.py
index 744d95f9f6..18fb1d6cd4 100644
--- a/src/python/m5/simulate.py
+++ b/src/python/m5/simulate.py
@@ -259,7 +259,7 @@ def scheduleTickExitAbsolute(
     """
     if tick <= curTick():
         warn("Tick exit scheduled for the past. This will not be triggered.")
-    _m5.event.scheduleTickExit(tick=tick, exit_string=exit_string)
+    _m5.event.exitSimLoop(exit_string, 0, tick, 0, False)
 
 
 def drain():
diff --git a/src/python/pybind11/event.cc b/src/python/pybind11/event.cc
index 827768f52f..95e6ddb844 100644
--- a/src/python/pybind11/event.cc
+++ b/src/python/pybind11/event.cc
@@ -109,8 +109,6 @@ pybind_init_event(py::module_ &m_native)
           py::arg("ticks") = MaxTick);
     m.def("setMaxTick", &set_max_tick, py::arg("tick"));
     m.def("getMaxTick", &get_max_tick, py::return_value_policy::copy);
-    m.def("scheduleTickExit", &schedule_tick_exit, py::arg("tick"),
-        py::arg("exit_string"));
     m.def("terminateEventQueueThreads", &terminateEventQueueThreads);
     m.def("exitSimLoop", &exitSimLoop);
     m.def("getEventQueue", []() { return curEventQueue(); },
diff --git a/src/sim/simulate.cc b/src/sim/simulate.cc
index f147b3ec77..86d516d39a 100644
--- a/src/sim/simulate.cc
+++ b/src/sim/simulate.cc
@@ -266,11 +266,6 @@ Tick get_max_tick()
     return simulate_limit_event->when();
 }
 
-void schedule_tick_exit(Tick tick, std::string exit_string)
-{
-    new GlobalSimLoopExitEvent(tick, exit_string, 0);
-}
-
 void
 terminateEventQueueThreads()
 {
diff --git a/src/sim/simulate.hh b/src/sim/simulate.hh
index e7c4fa640c..eacf67cec2 100644
--- a/src/sim/simulate.hh
+++ b/src/sim/simulate.hh
@@ -67,16 +67,6 @@ void set_max_tick(Tick tick);
  */
 Tick get_max_tick();
 
-/**
- * @brief Schedule an exit event at a particular tick.
- *
- * Schedule a tick with a particular exit string.
- *
- * @param tick The tick at which the simulation loop should exit.
- * @param exit_string The exit string explaining the exit.
- */
-void schedule_tick_exit(Tick tick, std::string exit_string);
-
 /**
  * Terminate helper threads when running in parallel mode.
  *

From 985d9c641f7e72d319a115676c280780e04ed8da Mon Sep 17 00:00:00 2001
From: Yu-hsin Wang <yuhsingw@google.com>
Date: Fri, 2 Dec 2022 16:27:16 +0800
Subject: [PATCH 043/492] systemc: replace the deprecated std::iterator

std::iterator is deprecated in c++17. We can just declare the
required types for iterator traits directly without the helper.

Change-Id: I789e2c2b13e56cc391527686109df8b779474d09
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/66351
Reviewed-by: Gabe Black <gabeblack@google.com>
Maintainer: Gabe Black <gabeblack@google.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/systemc/ext/utils/sc_vector.hh | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/src/systemc/ext/utils/sc_vector.hh b/src/systemc/ext/utils/sc_vector.hh
index ef0d7dc24a..c1d9ded0ce 100644
--- a/src/systemc/ext/utils/sc_vector.hh
+++ b/src/systemc/ext/utils/sc_vector.hh
@@ -49,6 +49,7 @@
 
 #include <stdint.h>
 
+#include <cstddef>
 #include <exception>
 #include <iterator>
 #include <vector>
@@ -259,10 +260,7 @@ class sc_member_access
 
 template <typename Element,
           typename AccessPolicy=sc_direct_access<Element> >
-class sc_vector_iter :
-        public std::iterator<std::random_access_iterator_tag,
-                             typename AccessPolicy::Type>,
-        private AccessPolicy
+class sc_vector_iter : private AccessPolicy
 {
   private:
     typedef Element ElementType;
@@ -282,8 +280,6 @@ class sc_vector_iter :
     template <typename, typename>
     friend class sc_vector_iter;
 
-    typedef std::iterator<std::random_access_iterator_tag, AccessType>
-        BaseType;
     typedef sc_vector_iter ThisType;
     typedef sc_vector<PlainType> VectorType;
     typedef std::vector<void *> StorageType;
@@ -315,9 +311,11 @@ class sc_vector_iter :
     // Conforms to Random Access Iterator category.
     // See ISO/IEC 14882:2003(E), 24.1 [lib.iterator.requirements]
 
-    typedef typename BaseType::difference_type difference_type;
-    typedef typename BaseType::reference reference;
-    typedef typename BaseType::pointer pointer;
+    using difference_type = std::ptrdiff_t;
+    using value_type = typename AccessPolicy::Type;
+    using reference = typename AccessPolicy::Type &;
+    using pointer = typename AccessPolicy::Type *;
+    using iterator_category = std::random_access_iterator_tag;
 
     sc_vector_iter() : Policy(), it_() {}
 

From 00a893ad4e2195450090810ee6d3de1c58b12194 Mon Sep 17 00:00:00 2001
From: Gabe Black <gabeblack@google.com>
Date: Tue, 4 Oct 2022 05:36:22 -0700
Subject: [PATCH 044/492] systemc: Enable DMI in the non-blocking/timing mode
 bridge.

Change-Id: Ia618081e2dbf8b49f62480ac5dc29f87100cd4f1
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/65754
Maintainer: Gabe Black <gabeblack@google.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Gabe Black <gabeblack@google.com>
---
 src/systemc/tlm_bridge/tlm_to_gem5.cc | 27 +++++++++++++++++++++++++--
 1 file changed, 25 insertions(+), 2 deletions(-)

diff --git a/src/systemc/tlm_bridge/tlm_to_gem5.cc b/src/systemc/tlm_bridge/tlm_to_gem5.cc
index 47a2fba08e..c02efe7437 100644
--- a/src/systemc/tlm_bridge/tlm_to_gem5.cc
+++ b/src/systemc/tlm_bridge/tlm_to_gem5.cc
@@ -207,6 +207,29 @@ void
 TlmToGem5Bridge<BITWIDTH>::sendBeginResp(tlm::tlm_generic_payload &trans,
                                          sc_core::sc_time &delay)
 {
+    MemBackdoor::Flags flags;
+    switch (trans.get_command()) {
+      case tlm::TLM_READ_COMMAND:
+        flags = MemBackdoor::Readable;
+        break;
+      case tlm::TLM_WRITE_COMMAND:
+        flags = MemBackdoor::Writeable;
+        break;
+      default:
+        panic("TlmToGem5Bridge: "
+                "received transaction with unsupported command");
+    }
+    Addr start_addr = trans.get_address();
+    Addr length = trans.get_data_length();
+
+    MemBackdoorReq req({start_addr, start_addr + length}, flags);
+    MemBackdoorPtr backdoor = nullptr;
+
+    bmp.sendMemBackdoorReq(req, backdoor);
+
+    if (backdoor)
+        trans.set_dmi_allowed(true);
+
     tlm::tlm_phase phase = tlm::BEGIN_RESP;
 
     auto status = socket->nb_transport_bw(trans, phase, delay);
@@ -574,12 +597,12 @@ TlmToGem5Bridge<BITWIDTH>::before_end_of_elaboration()
         DPRINTF(TlmBridge, "register blocking interface");
         socket.register_b_transport(
                 this, &TlmToGem5Bridge<BITWIDTH>::b_transport);
-        socket.register_get_direct_mem_ptr(
-                this, &TlmToGem5Bridge<BITWIDTH>::get_direct_mem_ptr);
     } else {
         panic("gem5 operates neither in Timing nor in Atomic mode");
     }
 
+    socket.register_get_direct_mem_ptr(
+            this, &TlmToGem5Bridge<BITWIDTH>::get_direct_mem_ptr);
     socket.register_transport_dbg(
             this, &TlmToGem5Bridge<BITWIDTH>::transport_dbg);
 

From a23641e01fc085fa59528b2fa1c404915bc485bc Mon Sep 17 00:00:00 2001
From: "Bobby R. Bruce" <bbruce@ucdavis.edu>
Date: Tue, 6 Dec 2022 10:39:18 -0800
Subject: [PATCH 045/492] configs: Fix x86-gapbs-benchmarks.py example

With https://gem5-review.googlesource.com/c/public/gem5/+/64791 we
updated the configs/example/gem5_library to utilize the `m5.simulate`
module. The GAPBS benchmark example uses the "WORKBEGIN" and "WORKEND"
exit events to specify the ROI. The patch incorrectly assumed an "EXIT"
exit event were used.

As such, the
"test-gem5-library-example-x86-gapbs-benchmarks-ALL-x86_64-opt-MESI_Two_Level"
test was not properly running, causing the Nightly test to fail:
https://jenkins.gem5.org/job/nightly/444. This patch fixes this error.

Change-Id: I207fe3563c8d9c59bcb79428fe62d2d2bbccd013
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/66512
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
---
 configs/example/gem5_library/x86-gapbs-benchmarks.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/configs/example/gem5_library/x86-gapbs-benchmarks.py b/configs/example/gem5_library/x86-gapbs-benchmarks.py
index 638d34b599..6ab37479f9 100644
--- a/configs/example/gem5_library/x86-gapbs-benchmarks.py
+++ b/configs/example/gem5_library/x86-gapbs-benchmarks.py
@@ -211,7 +211,7 @@ board.set_kernel_disk_workload(
 )
 
 
-def handle_exit():
+def handle_workbegin():
     print("Done booting Linux")
     print("Resetting stats at the start of ROI!")
     m5.stats.reset()
@@ -219,6 +219,9 @@ def handle_exit():
     start_tick = m5.curTick()
     processor.switch()
     yield False  # E.g., continue the simulation.
+
+
+def handle_workend():
     print("Dump stats at the end of the ROI!")
     m5.stats.dump()
     yield True  # Stop the simulation. We're done.
@@ -227,7 +230,8 @@ def handle_exit():
 simulator = Simulator(
     board=board,
     on_exit_event={
-        ExitEvent.EXIT: handle_exit(),
+        ExitEvent.WORKBEGIN: handle_workbegin(),
+        ExitEvent.WORKEND: handle_workend(),
     },
 )
 

From e81aa1cd860fc4a0c899c3601d098e4ec3eb8464 Mon Sep 17 00:00:00 2001
From: "Bobby R. Bruce" <bbruce@ucdavis.edu>
Date: Tue, 6 Dec 2022 10:48:48 -0800
Subject: [PATCH 046/492] configs: Alter x86-npb-benchmarks.py to exit after
 WORKEND

While the config script will still function without exiting the SimLoop
after the "WORKEND" exit event, there's no need for the simulation to
continue beyond this point.

Change-Id: I60691215e9516fa1eeb8b8502f2bc5a09de2969b
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/66513
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
---
 configs/example/gem5_library/x86-npb-benchmarks.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/configs/example/gem5_library/x86-npb-benchmarks.py b/configs/example/gem5_library/x86-npb-benchmarks.py
index 2cb314303f..ff363e449c 100644
--- a/configs/example/gem5_library/x86-npb-benchmarks.py
+++ b/configs/example/gem5_library/x86-npb-benchmarks.py
@@ -237,12 +237,11 @@ def handle_workbegin():
 # marked by `workend`.
 
 # We exepect that ROI ends with `workend` or `simulate() limit reached`.
-# Otherwise the simulation ended unexpectedly.
 def handle_workend():
     print("Dump stats at the end of the ROI!")
 
     m5.stats.dump()
-    yield False
+    yield True
 
 
 simulator = Simulator(

From bd319560605f1e3eebf828efd7e06206874d6515 Mon Sep 17 00:00:00 2001
From: Jarvis <jia44@wisc.edu>
Date: Wed, 7 Dec 2022 11:18:30 -0600
Subject: [PATCH 047/492] tests: Add replacement policy tests

Add tests to test the correctness of replacement policies using
TrafficGen.
Enable debug-flags to print the hit and miss messages so that you
can compare the results with the comments in the tests. Even though
the tests are targeting specific replacement policies, they can be
reused to test all replacement policies.

Change-Id: I3a8013fbcb19adae25b0818ac9e4b0be60be0689
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/60389
Tested-by: kokoro <noreply+kokoro@google.com>
Maintainer: Daniel Carvalho <odanrc@yahoo.com.br>
Reviewed-by: Daniel Carvalho <odanrc@yahoo.com.br>
---
 tests/gem5/replacement-policies/README        |  29 +++++
 .../replacement-policies/cache_hierarchies.py |  49 ++++++++
 .../replacement-policies/ref/fifo_test1_ld    |  13 +++
 .../replacement-policies/ref/fifo_test1_st    |  13 +++
 .../replacement-policies/ref/fifo_test2_ld    |  13 +++
 .../replacement-policies/ref/fifo_test2_st    |  13 +++
 .../replacement-policies/ref/lfu_test1_ld     |  10 ++
 .../replacement-policies/ref/lfu_test1_st     |  10 ++
 .../replacement-policies/ref/lfu_test2_ld     |  13 +++
 .../replacement-policies/ref/lfu_test2_st     |  13 +++
 .../replacement-policies/ref/lfu_test3_ld     |  28 +++++
 .../replacement-policies/ref/lfu_test3_st     |  28 +++++
 .../replacement-policies/ref/lip_test1_ld     |  18 +++
 .../replacement-policies/ref/lip_test1_st     |  18 +++
 .../replacement-policies/ref/lru_test1_ld     |  19 +++
 .../replacement-policies/ref/lru_test1_st     |  19 +++
 .../replacement-policies/ref/lru_test2_ld     |  17 +++
 .../replacement-policies/ref/lru_test2_st     |  17 +++
 .../replacement-policies/ref/lru_test3_ld     |  13 +++
 .../replacement-policies/ref/lru_test3_st     |  13 +++
 .../replacement-policies/ref/lru_test4_ld     |  13 +++
 .../replacement-policies/ref/lru_test4_st     |  13 +++
 .../replacement-policies/ref/mru_test1_ld     |  13 +++
 .../replacement-policies/ref/mru_test1_st     |  13 +++
 .../replacement-policies/ref/mru_test2_ld     |  14 +++
 .../replacement-policies/ref/mru_test2_st     |  14 +++
 .../replacement-policies/ref/nru_test1_ld     |  14 +++
 .../replacement-policies/ref/nru_test1_st     |  14 +++
 .../replacement-policies/ref/rrip_test1_ld    |  13 +++
 .../replacement-policies/ref/rrip_test1_st    |  13 +++
 .../replacement-policies/ref/rrip_test2_ld    |  16 +++
 .../replacement-policies/ref/rrip_test2_st    |  16 +++
 .../ref/second_chance_test1_ld                |  13 +++
 .../ref/second_chance_test1_st                |  13 +++
 .../ref/second_chance_test2_ld                |  16 +++
 .../ref/second_chance_test2_st                |  16 +++
 .../ref/second_chance_test3_ld                |  18 +++
 .../ref/second_chance_test3_st                |  18 +++
 .../ref/tree_plru_test1_ld                    |  13 +++
 .../ref/tree_plru_test1_st                    |  13 +++
 .../ref/tree_plru_test2_ld                    |  11 ++
 .../ref/tree_plru_test2_st                    |  11 ++
 .../ref/tree_plru_test3_ld                    |  14 +++
 .../ref/tree_plru_test3_st                    |  14 +++
 .../run_replacement_policy_test.py            |  95 +++++++++++++++
 .../test_replacement_policies.py              | 110 ++++++++++++++++++
 .../traces/fifo_test1_ld.py                   |  70 +++++++++++
 .../traces/fifo_test1_st.py                   |  70 +++++++++++
 .../traces/fifo_test2_ld.py                   |  71 +++++++++++
 .../traces/fifo_test2_st.py                   |  71 +++++++++++
 .../traces/lfu_test1_ld.py                    |  62 ++++++++++
 .../traces/lfu_test1_st.py                    |  62 ++++++++++
 .../traces/lfu_test2_ld.py                    |  66 +++++++++++
 .../traces/lfu_test2_st.py                    |  66 +++++++++++
 .../traces/lfu_test3_ld.py                    |  87 ++++++++++++++
 .../traces/lfu_test3_st.py                    |  87 ++++++++++++++
 .../traces/lip_test1_ld.py                    |  80 +++++++++++++
 .../traces/lip_test1_st.py                    |  80 +++++++++++++
 .../traces/lru_test1_ld.py                    |  76 ++++++++++++
 .../traces/lru_test1_st.py                    |  76 ++++++++++++
 .../traces/lru_test2_ld.py                    |  71 +++++++++++
 .../traces/lru_test2_st.py                    |  71 +++++++++++
 .../traces/lru_test3_ld.py                    |  62 ++++++++++
 .../traces/lru_test3_st.py                    |  62 ++++++++++
 .../traces/lru_test4_ld.py                    |  63 ++++++++++
 .../traces/lru_test4_st.py                    |  63 ++++++++++
 .../traces/mru_test1_ld.py                    |  63 ++++++++++
 .../traces/mru_test1_st.py                    |  63 ++++++++++
 .../traces/mru_test2_ld.py                    |  66 +++++++++++
 .../traces/mru_test2_st.py                    |  66 +++++++++++
 .../traces/nru_test1_ld.py                    |  71 +++++++++++
 .../traces/nru_test1_st.py                    |  71 +++++++++++
 .../traces/rrip_test1_ld.py                   |  67 +++++++++++
 .../traces/rrip_test1_st.py                   |  67 +++++++++++
 .../traces/rrip_test2_ld.py                   |  71 +++++++++++
 .../traces/rrip_test2_st.py                   |  71 +++++++++++
 .../traces/second_chance_test1_ld.py          |  65 +++++++++++
 .../traces/second_chance_test1_st.py          |  65 +++++++++++
 .../traces/second_chance_test2_ld.py          |  71 +++++++++++
 .../traces/second_chance_test2_st.py          |  71 +++++++++++
 .../traces/second_chance_test3_ld.py          |  75 ++++++++++++
 .../traces/second_chance_test3_st.py          |  75 ++++++++++++
 .../traces/tree_plru_test1_ld.py              |  65 +++++++++++
 .../traces/tree_plru_test1_st.py              |  65 +++++++++++
 .../traces/tree_plru_test2_ld.py              |  61 ++++++++++
 .../traces/tree_plru_test2_st.py              |  61 ++++++++++
 .../traces/tree_plru_test3_ld.py              |  67 +++++++++++
 .../traces/tree_plru_test3_st.py              |  67 +++++++++++
 88 files changed, 3807 insertions(+)
 create mode 100644 tests/gem5/replacement-policies/README
 create mode 100644 tests/gem5/replacement-policies/cache_hierarchies.py
 create mode 100644 tests/gem5/replacement-policies/ref/fifo_test1_ld
 create mode 100644 tests/gem5/replacement-policies/ref/fifo_test1_st
 create mode 100644 tests/gem5/replacement-policies/ref/fifo_test2_ld
 create mode 100644 tests/gem5/replacement-policies/ref/fifo_test2_st
 create mode 100644 tests/gem5/replacement-policies/ref/lfu_test1_ld
 create mode 100644 tests/gem5/replacement-policies/ref/lfu_test1_st
 create mode 100644 tests/gem5/replacement-policies/ref/lfu_test2_ld
 create mode 100644 tests/gem5/replacement-policies/ref/lfu_test2_st
 create mode 100644 tests/gem5/replacement-policies/ref/lfu_test3_ld
 create mode 100644 tests/gem5/replacement-policies/ref/lfu_test3_st
 create mode 100644 tests/gem5/replacement-policies/ref/lip_test1_ld
 create mode 100644 tests/gem5/replacement-policies/ref/lip_test1_st
 create mode 100644 tests/gem5/replacement-policies/ref/lru_test1_ld
 create mode 100644 tests/gem5/replacement-policies/ref/lru_test1_st
 create mode 100644 tests/gem5/replacement-policies/ref/lru_test2_ld
 create mode 100644 tests/gem5/replacement-policies/ref/lru_test2_st
 create mode 100644 tests/gem5/replacement-policies/ref/lru_test3_ld
 create mode 100644 tests/gem5/replacement-policies/ref/lru_test3_st
 create mode 100644 tests/gem5/replacement-policies/ref/lru_test4_ld
 create mode 100644 tests/gem5/replacement-policies/ref/lru_test4_st
 create mode 100644 tests/gem5/replacement-policies/ref/mru_test1_ld
 create mode 100644 tests/gem5/replacement-policies/ref/mru_test1_st
 create mode 100644 tests/gem5/replacement-policies/ref/mru_test2_ld
 create mode 100644 tests/gem5/replacement-policies/ref/mru_test2_st
 create mode 100644 tests/gem5/replacement-policies/ref/nru_test1_ld
 create mode 100644 tests/gem5/replacement-policies/ref/nru_test1_st
 create mode 100644 tests/gem5/replacement-policies/ref/rrip_test1_ld
 create mode 100644 tests/gem5/replacement-policies/ref/rrip_test1_st
 create mode 100644 tests/gem5/replacement-policies/ref/rrip_test2_ld
 create mode 100644 tests/gem5/replacement-policies/ref/rrip_test2_st
 create mode 100644 tests/gem5/replacement-policies/ref/second_chance_test1_ld
 create mode 100644 tests/gem5/replacement-policies/ref/second_chance_test1_st
 create mode 100644 tests/gem5/replacement-policies/ref/second_chance_test2_ld
 create mode 100644 tests/gem5/replacement-policies/ref/second_chance_test2_st
 create mode 100644 tests/gem5/replacement-policies/ref/second_chance_test3_ld
 create mode 100644 tests/gem5/replacement-policies/ref/second_chance_test3_st
 create mode 100644 tests/gem5/replacement-policies/ref/tree_plru_test1_ld
 create mode 100644 tests/gem5/replacement-policies/ref/tree_plru_test1_st
 create mode 100644 tests/gem5/replacement-policies/ref/tree_plru_test2_ld
 create mode 100644 tests/gem5/replacement-policies/ref/tree_plru_test2_st
 create mode 100644 tests/gem5/replacement-policies/ref/tree_plru_test3_ld
 create mode 100644 tests/gem5/replacement-policies/ref/tree_plru_test3_st
 create mode 100644 tests/gem5/replacement-policies/run_replacement_policy_test.py
 create mode 100644 tests/gem5/replacement-policies/test_replacement_policies.py
 create mode 100644 tests/gem5/replacement-policies/traces/fifo_test1_ld.py
 create mode 100644 tests/gem5/replacement-policies/traces/fifo_test1_st.py
 create mode 100644 tests/gem5/replacement-policies/traces/fifo_test2_ld.py
 create mode 100644 tests/gem5/replacement-policies/traces/fifo_test2_st.py
 create mode 100644 tests/gem5/replacement-policies/traces/lfu_test1_ld.py
 create mode 100644 tests/gem5/replacement-policies/traces/lfu_test1_st.py
 create mode 100644 tests/gem5/replacement-policies/traces/lfu_test2_ld.py
 create mode 100644 tests/gem5/replacement-policies/traces/lfu_test2_st.py
 create mode 100644 tests/gem5/replacement-policies/traces/lfu_test3_ld.py
 create mode 100644 tests/gem5/replacement-policies/traces/lfu_test3_st.py
 create mode 100644 tests/gem5/replacement-policies/traces/lip_test1_ld.py
 create mode 100644 tests/gem5/replacement-policies/traces/lip_test1_st.py
 create mode 100644 tests/gem5/replacement-policies/traces/lru_test1_ld.py
 create mode 100644 tests/gem5/replacement-policies/traces/lru_test1_st.py
 create mode 100644 tests/gem5/replacement-policies/traces/lru_test2_ld.py
 create mode 100644 tests/gem5/replacement-policies/traces/lru_test2_st.py
 create mode 100644 tests/gem5/replacement-policies/traces/lru_test3_ld.py
 create mode 100644 tests/gem5/replacement-policies/traces/lru_test3_st.py
 create mode 100644 tests/gem5/replacement-policies/traces/lru_test4_ld.py
 create mode 100644 tests/gem5/replacement-policies/traces/lru_test4_st.py
 create mode 100644 tests/gem5/replacement-policies/traces/mru_test1_ld.py
 create mode 100644 tests/gem5/replacement-policies/traces/mru_test1_st.py
 create mode 100644 tests/gem5/replacement-policies/traces/mru_test2_ld.py
 create mode 100644 tests/gem5/replacement-policies/traces/mru_test2_st.py
 create mode 100644 tests/gem5/replacement-policies/traces/nru_test1_ld.py
 create mode 100644 tests/gem5/replacement-policies/traces/nru_test1_st.py
 create mode 100644 tests/gem5/replacement-policies/traces/rrip_test1_ld.py
 create mode 100644 tests/gem5/replacement-policies/traces/rrip_test1_st.py
 create mode 100644 tests/gem5/replacement-policies/traces/rrip_test2_ld.py
 create mode 100644 tests/gem5/replacement-policies/traces/rrip_test2_st.py
 create mode 100644 tests/gem5/replacement-policies/traces/second_chance_test1_ld.py
 create mode 100644 tests/gem5/replacement-policies/traces/second_chance_test1_st.py
 create mode 100644 tests/gem5/replacement-policies/traces/second_chance_test2_ld.py
 create mode 100644 tests/gem5/replacement-policies/traces/second_chance_test2_st.py
 create mode 100644 tests/gem5/replacement-policies/traces/second_chance_test3_ld.py
 create mode 100644 tests/gem5/replacement-policies/traces/second_chance_test3_st.py
 create mode 100644 tests/gem5/replacement-policies/traces/tree_plru_test1_ld.py
 create mode 100644 tests/gem5/replacement-policies/traces/tree_plru_test1_st.py
 create mode 100644 tests/gem5/replacement-policies/traces/tree_plru_test2_ld.py
 create mode 100644 tests/gem5/replacement-policies/traces/tree_plru_test2_st.py
 create mode 100644 tests/gem5/replacement-policies/traces/tree_plru_test3_ld.py
 create mode 100644 tests/gem5/replacement-policies/traces/tree_plru_test3_st.py

diff --git a/tests/gem5/replacement-policies/README b/tests/gem5/replacement-policies/README
new file mode 100644
index 0000000000..626d42e9c9
--- /dev/null
+++ b/tests/gem5/replacement-policies/README
@@ -0,0 +1,29 @@
+There are two seperate files targeting loads and stores for each test.
+In each test file, each letter in the comments represents a 64-bit address
+range. For example, A represents the address from 0 to 63, B represents
+the address from 64 to 127, C represents the address from 128 to 191, and so on.
+If you enable debug flags to print the hits and misses information, you
+can compare the results with your expectation. This test can be used to
+test the correctness of the replacement policy. The first block will
+always get an eviction.
+
+The format of test files should be using traffic generator
+
+To emulate 4-way 1-set cache with implementation of 4-way 2-set cache,
+we will use A, C, E, G, I, K, M, O instead of A, B, C, D, E, F, G, H,
+so they will never get to the second set
+
+The linear traffic generator has
+<duration (ticks)> <start addr> <end addr> <access size (bytes)>
+<min period (ticks)> <max period (ticks)> <percent reads> <data limit (bytes)>
+
+Addresses are expressed as decimal numbers. The period in the linear
+and random state is from a uniform random distribution over the
+interval. If a specific value is desired, then the min and max can
+be set to the same value.
+
+The duration(in ticks) is calculated by the time needed for accessing a cache
+line * (the number of cache lines accessed in this state + 1). For example, to
+access address 0 to address 1023, the duration should be 510000 ticks, which is
+calcualted by 510000 = 30000 * (1024/64 + 1). Please note that 30000 is assumed
+to be the number of ticks in one period here.
diff --git a/tests/gem5/replacement-policies/cache_hierarchies.py b/tests/gem5/replacement-policies/cache_hierarchies.py
new file mode 100644
index 0000000000..6177dd4ac9
--- /dev/null
+++ b/tests/gem5/replacement-policies/cache_hierarchies.py
@@ -0,0 +1,49 @@
+# Copyright (c) 2022 Jarvis Jia, Jing Qu, Matt Sinclair, & Mingyuan Xiang
+# Copyright (c) 2022 The Regents of the University of California
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from typing import Type
+
+from gem5.utils.override import overrides
+from gem5.components.cachehierarchies.ruby.mi_example_cache_hierarchy import (
+    MIExampleCacheHierarchy,
+)
+from gem5.components.boards.abstract_board import AbstractBoard
+from m5.objects.ReplacementPolicies import BaseReplacementPolicy
+
+
+class ModMIExampleCacheHierarchy(MIExampleCacheHierarchy):
+    def __init__(self, replacement_policy_class: Type[BaseReplacementPolicy]):
+        super().__init__(size="512B", assoc="4")
+        self._replacement_policy_class = replacement_policy_class
+
+    @overrides(MIExampleCacheHierarchy)
+    def incorporate_cache(self, board: AbstractBoard) -> None:
+        super().incorporate_cache(board)
+        for controller in self._controllers:
+            controller.cacheMemory.replacement_policy = (
+                self._replacement_policy_class()
+            )
diff --git a/tests/gem5/replacement-policies/ref/fifo_test1_ld b/tests/gem5/replacement-policies/ref/fifo_test1_ld
new file mode 100644
index 0000000000..264fcf6008
--- /dev/null
+++ b/tests/gem5/replacement-policies/ref/fifo_test1_ld
@@ -0,0 +1,13 @@
+Global frequency set at 1000000000000 ticks per second
+Beginning simulation!
+  74000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0]
+ 134000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x80, line 0x80]
+ 194000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x100, line 0x100]
+ 254000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x180, line 0x180]
+ 319000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x200, line 0x200]
+ 379000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x280, line 0x280]
+ 439000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0]
+ 499000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x80, line 0x80]
+ 559000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x100, line 0x100]
+ 619000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x180, line 0x180]
+.
diff --git a/tests/gem5/replacement-policies/ref/fifo_test1_st b/tests/gem5/replacement-policies/ref/fifo_test1_st
new file mode 100644
index 0000000000..264fcf6008
--- /dev/null
+++ b/tests/gem5/replacement-policies/ref/fifo_test1_st
@@ -0,0 +1,13 @@
+Global frequency set at 1000000000000 ticks per second
+Beginning simulation!
+  74000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0]
+ 134000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x80, line 0x80]
+ 194000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x100, line 0x100]
+ 254000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x180, line 0x180]
+ 319000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x200, line 0x200]
+ 379000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x280, line 0x280]
+ 439000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0]
+ 499000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x80, line 0x80]
+ 559000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x100, line 0x100]
+ 619000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x180, line 0x180]
+.
diff --git a/tests/gem5/replacement-policies/ref/fifo_test2_ld b/tests/gem5/replacement-policies/ref/fifo_test2_ld
new file mode 100644
index 0000000000..08a91b696e
--- /dev/null
+++ b/tests/gem5/replacement-policies/ref/fifo_test2_ld
@@ -0,0 +1,13 @@
+Global frequency set at 1000000000000 ticks per second
+Beginning simulation!
+  74000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0]
+ 134000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x80, line 0x80]
+ 194000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x100, line 0x100]
+ 254000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x180, line 0x180]
+ 271000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x0, line 0x0]
+ 331000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x80, line 0x80]
+ 439000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x200, line 0x200]
+ 499000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x280, line 0x280]
+ 511000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x100, line 0x100]
+ 571000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x180, line 0x180]
+.
diff --git a/tests/gem5/replacement-policies/ref/fifo_test2_st b/tests/gem5/replacement-policies/ref/fifo_test2_st
new file mode 100644
index 0000000000..08a91b696e
--- /dev/null
+++ b/tests/gem5/replacement-policies/ref/fifo_test2_st
@@ -0,0 +1,13 @@
+Global frequency set at 1000000000000 ticks per second
+Beginning simulation!
+  74000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0]
+ 134000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x80, line 0x80]
+ 194000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x100, line 0x100]
+ 254000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x180, line 0x180]
+ 271000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x0, line 0x0]
+ 331000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x80, line 0x80]
+ 439000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x200, line 0x200]
+ 499000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x280, line 0x280]
+ 511000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x100, line 0x100]
+ 571000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x180, line 0x180]
+.
diff --git a/tests/gem5/replacement-policies/ref/lfu_test1_ld b/tests/gem5/replacement-policies/ref/lfu_test1_ld
new file mode 100644
index 0000000000..edc9399c9d
--- /dev/null
+++ b/tests/gem5/replacement-policies/ref/lfu_test1_ld
@@ -0,0 +1,10 @@
+Global frequency set at 1000000000000 ticks per second
+Beginning simulation!
+  74000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0]
+ 134000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x80, line 0x80]
+ 194000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x100, line 0x100]
+ 254000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x180, line 0x180]
+ 319000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x200, line 0x200]
+ 379000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0]
+ 439000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x200, line 0x200]
+.
diff --git a/tests/gem5/replacement-policies/ref/lfu_test1_st b/tests/gem5/replacement-policies/ref/lfu_test1_st
new file mode 100644
index 0000000000..edc9399c9d
--- /dev/null
+++ b/tests/gem5/replacement-policies/ref/lfu_test1_st
@@ -0,0 +1,10 @@
+Global frequency set at 1000000000000 ticks per second
+Beginning simulation!
+  74000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0]
+ 134000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x80, line 0x80]
+ 194000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x100, line 0x100]
+ 254000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x180, line 0x180]
+ 319000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x200, line 0x200]
+ 379000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0]
+ 439000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x200, line 0x200]
+.
diff --git a/tests/gem5/replacement-policies/ref/lfu_test2_ld b/tests/gem5/replacement-policies/ref/lfu_test2_ld
new file mode 100644
index 0000000000..5a3618ce82
--- /dev/null
+++ b/tests/gem5/replacement-policies/ref/lfu_test2_ld
@@ -0,0 +1,13 @@
+Global frequency set at 1000000000000 ticks per second
+Beginning simulation!
+  74000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0]
+ 134000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x80, line 0x80]
+ 194000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x100, line 0x100]
+ 254000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x180, line 0x180]
+ 271000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x0, line 0x0]
+ 379000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x200, line 0x200]
+ 439000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x280, line 0x280]
+ 499000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x300, line 0x300]
+ 559000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x380, line 0x380]
+ 571000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x0, line 0x0]
+.
diff --git a/tests/gem5/replacement-policies/ref/lfu_test2_st b/tests/gem5/replacement-policies/ref/lfu_test2_st
new file mode 100644
index 0000000000..5a3618ce82
--- /dev/null
+++ b/tests/gem5/replacement-policies/ref/lfu_test2_st
@@ -0,0 +1,13 @@
+Global frequency set at 1000000000000 ticks per second
+Beginning simulation!
+  74000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0]
+ 134000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x80, line 0x80]
+ 194000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x100, line 0x100]
+ 254000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x180, line 0x180]
+ 271000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x0, line 0x0]
+ 379000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x200, line 0x200]
+ 439000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x280, line 0x280]
+ 499000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x300, line 0x300]
+ 559000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x380, line 0x380]
+ 571000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x0, line 0x0]
+.
diff --git a/tests/gem5/replacement-policies/ref/lfu_test3_ld b/tests/gem5/replacement-policies/ref/lfu_test3_ld
new file mode 100644
index 0000000000..24cdc80257
--- /dev/null
+++ b/tests/gem5/replacement-policies/ref/lfu_test3_ld
@@ -0,0 +1,28 @@
+Global frequency set at 1000000000000 ticks per second
+Beginning simulation!
+  74000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0]
+  91000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x0, line 0x0]
+ 194000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x80, line 0x80]
+ 211000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x80, line 0x80]
+ 271000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x80, line 0x80]
+ 374000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x100, line 0x100]
+ 434000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x180, line 0x180]
+ 451000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x180, line 0x180]
+ 559000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x200, line 0x200]
+ 619000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x100, line 0x100]
+ 631000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x100, line 0x100]
+ 691000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x100, line 0x100]
+ 751000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x100, line 0x100]
+ 859000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x280, line 0x280]
+ 919000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0]
+ 931000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x0, line 0x0]
+ 991000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x0, line 0x0]
+1051000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x0, line 0x0]
+1159000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x300, line 0x300]
+1219000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x180, line 0x180]
+1231000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x180, line 0x180]
+1291000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x180, line 0x180]
+1351000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x180, line 0x180]
+1459000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x380, line 0x380]
+1519000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x80, line 0x80]
+.
diff --git a/tests/gem5/replacement-policies/ref/lfu_test3_st b/tests/gem5/replacement-policies/ref/lfu_test3_st
new file mode 100644
index 0000000000..24cdc80257
--- /dev/null
+++ b/tests/gem5/replacement-policies/ref/lfu_test3_st
@@ -0,0 +1,28 @@
+Global frequency set at 1000000000000 ticks per second
+Beginning simulation!
+  74000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0]
+  91000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x0, line 0x0]
+ 194000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x80, line 0x80]
+ 211000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x80, line 0x80]
+ 271000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x80, line 0x80]
+ 374000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x100, line 0x100]
+ 434000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x180, line 0x180]
+ 451000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x180, line 0x180]
+ 559000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x200, line 0x200]
+ 619000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x100, line 0x100]
+ 631000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x100, line 0x100]
+ 691000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x100, line 0x100]
+ 751000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x100, line 0x100]
+ 859000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x280, line 0x280]
+ 919000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0]
+ 931000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x0, line 0x0]
+ 991000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x0, line 0x0]
+1051000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x0, line 0x0]
+1159000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x300, line 0x300]
+1219000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x180, line 0x180]
+1231000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x180, line 0x180]
+1291000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x180, line 0x180]
+1351000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x180, line 0x180]
+1459000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x380, line 0x380]
+1519000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x80, line 0x80]
+.
diff --git a/tests/gem5/replacement-policies/ref/lip_test1_ld b/tests/gem5/replacement-policies/ref/lip_test1_ld
new file mode 100644
index 0000000000..2b895a2476
--- /dev/null
+++ b/tests/gem5/replacement-policies/ref/lip_test1_ld
@@ -0,0 +1,18 @@
+Global frequency set at 1000000000000 ticks per second
+Beginning simulation!
+  74000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0]
+ 134000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x80, line 0x80]
+ 194000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x100, line 0x100]
+ 254000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x180, line 0x180]
+ 319000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x200, line 0x200]
+ 379000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x280, line 0x280]
+ 439000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x300, line 0x300]
+ 499000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x380, line 0x380]
+ 559000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0]
+ 571000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x80, line 0x80]
+ 631000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x100, line 0x100]
+ 691000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x180, line 0x180]
+ 751000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x0, line 0x0]
+ 859000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x200, line 0x200]
+ 919000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x80, line 0x80]
+.
diff --git a/tests/gem5/replacement-policies/ref/lip_test1_st b/tests/gem5/replacement-policies/ref/lip_test1_st
new file mode 100644
index 0000000000..2b895a2476
--- /dev/null
+++ b/tests/gem5/replacement-policies/ref/lip_test1_st
@@ -0,0 +1,18 @@
+Global frequency set at 1000000000000 ticks per second
+Beginning simulation!
+  74000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0]
+ 134000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x80, line 0x80]
+ 194000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x100, line 0x100]
+ 254000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x180, line 0x180]
+ 319000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x200, line 0x200]
+ 379000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x280, line 0x280]
+ 439000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x300, line 0x300]
+ 499000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x380, line 0x380]
+ 559000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0]
+ 571000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x80, line 0x80]
+ 631000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x100, line 0x100]
+ 691000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x180, line 0x180]
+ 751000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x0, line 0x0]
+ 859000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x200, line 0x200]
+ 919000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x80, line 0x80]
+.
diff --git a/tests/gem5/replacement-policies/ref/lru_test1_ld b/tests/gem5/replacement-policies/ref/lru_test1_ld
new file mode 100644
index 0000000000..57688e4b03
--- /dev/null
+++ b/tests/gem5/replacement-policies/ref/lru_test1_ld
@@ -0,0 +1,19 @@
+Global frequency set at 1000000000000 ticks per second
+Beginning simulation!
+  74000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0]
+ 134000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x80, line 0x80]
+ 194000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x100, line 0x100]
+ 254000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x180, line 0x180]
+ 271000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x0, line 0x0]
+ 331000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x80, line 0x80]
+ 439000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x200, line 0x200]
+ 499000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x280, line 0x280]
+ 559000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x300, line 0x300]
+ 619000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x380, line 0x380]
+ 679000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0]
+ 739000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x80, line 0x80]
+ 799000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x200, line 0x200]
+ 859000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x280, line 0x280]
+ 919000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x300, line 0x300]
+ 979000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x380, line 0x380]
+.
diff --git a/tests/gem5/replacement-policies/ref/lru_test1_st b/tests/gem5/replacement-policies/ref/lru_test1_st
new file mode 100644
index 0000000000..57688e4b03
--- /dev/null
+++ b/tests/gem5/replacement-policies/ref/lru_test1_st
@@ -0,0 +1,19 @@
+Global frequency set at 1000000000000 ticks per second
+Beginning simulation!
+  74000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0]
+ 134000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x80, line 0x80]
+ 194000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x100, line 0x100]
+ 254000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x180, line 0x180]
+ 271000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x0, line 0x0]
+ 331000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x80, line 0x80]
+ 439000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x200, line 0x200]
+ 499000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x280, line 0x280]
+ 559000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x300, line 0x300]
+ 619000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x380, line 0x380]
+ 679000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0]
+ 739000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x80, line 0x80]
+ 799000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x200, line 0x200]
+ 859000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x280, line 0x280]
+ 919000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x300, line 0x300]
+ 979000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x380, line 0x380]
+.
diff --git a/tests/gem5/replacement-policies/ref/lru_test2_ld b/tests/gem5/replacement-policies/ref/lru_test2_ld
new file mode 100644
index 0000000000..d077d93a3b
--- /dev/null
+++ b/tests/gem5/replacement-policies/ref/lru_test2_ld
@@ -0,0 +1,17 @@
+Global frequency set at 1000000000000 ticks per second
+Beginning simulation!
+  74000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0]
+ 134000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x80, line 0x80]
+ 194000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x100, line 0x100]
+ 254000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x180, line 0x180]
+ 271000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x0, line 0x0]
+ 331000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x80, line 0x80]
+ 439000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x200, line 0x200]
+ 499000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x280, line 0x280]
+ 559000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x300, line 0x300]
+ 619000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x380, line 0x380]
+ 679000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x100, line 0x100]
+ 739000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x180, line 0x180]
+ 751000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x100, line 0x100]
+ 811000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x180, line 0x180]
+.
diff --git a/tests/gem5/replacement-policies/ref/lru_test2_st b/tests/gem5/replacement-policies/ref/lru_test2_st
new file mode 100644
index 0000000000..d077d93a3b
--- /dev/null
+++ b/tests/gem5/replacement-policies/ref/lru_test2_st
@@ -0,0 +1,17 @@
+Global frequency set at 1000000000000 ticks per second
+Beginning simulation!
+  74000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0]
+ 134000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x80, line 0x80]
+ 194000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x100, line 0x100]
+ 254000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x180, line 0x180]
+ 271000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x0, line 0x0]
+ 331000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x80, line 0x80]
+ 439000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x200, line 0x200]
+ 499000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x280, line 0x280]
+ 559000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x300, line 0x300]
+ 619000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x380, line 0x380]
+ 679000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x100, line 0x100]
+ 739000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x180, line 0x180]
+ 751000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x100, line 0x100]
+ 811000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x180, line 0x180]
+.
diff --git a/tests/gem5/replacement-policies/ref/lru_test3_ld b/tests/gem5/replacement-policies/ref/lru_test3_ld
new file mode 100644
index 0000000000..4a5252f612
--- /dev/null
+++ b/tests/gem5/replacement-policies/ref/lru_test3_ld
@@ -0,0 +1,13 @@
+Global frequency set at 1000000000000 ticks per second
+Beginning simulation!
+  74000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0]
+ 134000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x80, line 0x80]
+ 194000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x100, line 0x100]
+ 254000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x180, line 0x180]
+ 271000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x0, line 0x0]
+ 331000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x80, line 0x80]
+ 391000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x100, line 0x100]
+ 451000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x180, line 0x180]
+ 559000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x200, line 0x200]
+ 619000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0]
+.
diff --git a/tests/gem5/replacement-policies/ref/lru_test3_st b/tests/gem5/replacement-policies/ref/lru_test3_st
new file mode 100644
index 0000000000..4a5252f612
--- /dev/null
+++ b/tests/gem5/replacement-policies/ref/lru_test3_st
@@ -0,0 +1,13 @@
+Global frequency set at 1000000000000 ticks per second
+Beginning simulation!
+  74000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0]
+ 134000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x80, line 0x80]
+ 194000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x100, line 0x100]
+ 254000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x180, line 0x180]
+ 271000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x0, line 0x0]
+ 331000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x80, line 0x80]
+ 391000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x100, line 0x100]
+ 451000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x180, line 0x180]
+ 559000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x200, line 0x200]
+ 619000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0]
+.
diff --git a/tests/gem5/replacement-policies/ref/lru_test4_ld b/tests/gem5/replacement-policies/ref/lru_test4_ld
new file mode 100644
index 0000000000..93509a9c11
--- /dev/null
+++ b/tests/gem5/replacement-policies/ref/lru_test4_ld
@@ -0,0 +1,13 @@
+Global frequency set at 1000000000000 ticks per second
+Beginning simulation!
+  74000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0]
+ 134000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x80, line 0x80]
+ 194000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x100, line 0x100]
+ 254000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x180, line 0x180]
+ 271000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x100, line 0x100]
+ 331000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x180, line 0x180]
+ 391000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x0, line 0x0]
+ 451000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x80, line 0x80]
+ 559000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x200, line 0x200]
+ 619000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x100, line 0x100]
+.
diff --git a/tests/gem5/replacement-policies/ref/lru_test4_st b/tests/gem5/replacement-policies/ref/lru_test4_st
new file mode 100644
index 0000000000..93509a9c11
--- /dev/null
+++ b/tests/gem5/replacement-policies/ref/lru_test4_st
@@ -0,0 +1,13 @@
+Global frequency set at 1000000000000 ticks per second
+Beginning simulation!
+  74000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0]
+ 134000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x80, line 0x80]
+ 194000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x100, line 0x100]
+ 254000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x180, line 0x180]
+ 271000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x100, line 0x100]
+ 331000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x180, line 0x180]
+ 391000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x0, line 0x0]
+ 451000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x80, line 0x80]
+ 559000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x200, line 0x200]
+ 619000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x100, line 0x100]
+.
diff --git a/tests/gem5/replacement-policies/ref/mru_test1_ld b/tests/gem5/replacement-policies/ref/mru_test1_ld
new file mode 100644
index 0000000000..ff596b6627
--- /dev/null
+++ b/tests/gem5/replacement-policies/ref/mru_test1_ld
@@ -0,0 +1,13 @@
+Global frequency set at 1000000000000 ticks per second
+Beginning simulation!
+  74000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0]
+ 134000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x80, line 0x80]
+ 194000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x100, line 0x100]
+ 254000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x180, line 0x180]
+ 319000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x200, line 0x200]
+ 379000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x280, line 0x280]
+ 439000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x300, line 0x300]
+ 499000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x380, line 0x380]
+ 511000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x0, line 0x0]
+ 571000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x80, line 0x80]
+.
diff --git a/tests/gem5/replacement-policies/ref/mru_test1_st b/tests/gem5/replacement-policies/ref/mru_test1_st
new file mode 100644
index 0000000000..ff596b6627
--- /dev/null
+++ b/tests/gem5/replacement-policies/ref/mru_test1_st
@@ -0,0 +1,13 @@
+Global frequency set at 1000000000000 ticks per second
+Beginning simulation!
+  74000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0]
+ 134000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x80, line 0x80]
+ 194000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x100, line 0x100]
+ 254000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x180, line 0x180]
+ 319000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x200, line 0x200]
+ 379000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x280, line 0x280]
+ 439000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x300, line 0x300]
+ 499000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x380, line 0x380]
+ 511000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x0, line 0x0]
+ 571000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x80, line 0x80]
+.
diff --git a/tests/gem5/replacement-policies/ref/mru_test2_ld b/tests/gem5/replacement-policies/ref/mru_test2_ld
new file mode 100644
index 0000000000..f1d0fa1b63
--- /dev/null
+++ b/tests/gem5/replacement-policies/ref/mru_test2_ld
@@ -0,0 +1,14 @@
+Global frequency set at 1000000000000 ticks per second
+Beginning simulation!
+  74000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0]
+ 134000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x80, line 0x80]
+ 194000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x100, line 0x100]
+ 254000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x180, line 0x180]
+ 271000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x0, line 0x0]
+ 379000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x200, line 0x200]
+ 439000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x280, line 0x280]
+ 499000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x300, line 0x300]
+ 559000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x380, line 0x380]
+ 619000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0]
+ 631000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x180, line 0x180]
+.
diff --git a/tests/gem5/replacement-policies/ref/mru_test2_st b/tests/gem5/replacement-policies/ref/mru_test2_st
new file mode 100644
index 0000000000..f1d0fa1b63
--- /dev/null
+++ b/tests/gem5/replacement-policies/ref/mru_test2_st
@@ -0,0 +1,14 @@
+Global frequency set at 1000000000000 ticks per second
+Beginning simulation!
+  74000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0]
+ 134000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x80, line 0x80]
+ 194000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x100, line 0x100]
+ 254000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x180, line 0x180]
+ 271000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x0, line 0x0]
+ 379000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x200, line 0x200]
+ 439000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x280, line 0x280]
+ 499000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x300, line 0x300]
+ 559000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x380, line 0x380]
+ 619000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0]
+ 631000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x180, line 0x180]
+.
diff --git a/tests/gem5/replacement-policies/ref/nru_test1_ld b/tests/gem5/replacement-policies/ref/nru_test1_ld
new file mode 100644
index 0000000000..7245bc30c9
--- /dev/null
+++ b/tests/gem5/replacement-policies/ref/nru_test1_ld
@@ -0,0 +1,14 @@
+Global frequency set at 1000000000000 ticks per second
+Beginning simulation!
+  74000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0]
+ 134000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x80, line 0x80]
+ 194000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x100, line 0x100]
+ 254000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x180, line 0x180]
+ 271000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x0, line 0x0]
+ 379000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x200, line 0x200]
+ 439000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0]
+ 451000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x100, line 0x100]
+ 559000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x280, line 0x280]
+ 571000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x100, line 0x100]
+ 679000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x180, line 0x180]
+.
diff --git a/tests/gem5/replacement-policies/ref/nru_test1_st b/tests/gem5/replacement-policies/ref/nru_test1_st
new file mode 100644
index 0000000000..7245bc30c9
--- /dev/null
+++ b/tests/gem5/replacement-policies/ref/nru_test1_st
@@ -0,0 +1,14 @@
+Global frequency set at 1000000000000 ticks per second
+Beginning simulation!
+  74000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0]
+ 134000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x80, line 0x80]
+ 194000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x100, line 0x100]
+ 254000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x180, line 0x180]
+ 271000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x0, line 0x0]
+ 379000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x200, line 0x200]
+ 439000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0]
+ 451000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x100, line 0x100]
+ 559000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x280, line 0x280]
+ 571000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x100, line 0x100]
+ 679000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x180, line 0x180]
+.
diff --git a/tests/gem5/replacement-policies/ref/rrip_test1_ld b/tests/gem5/replacement-policies/ref/rrip_test1_ld
new file mode 100644
index 0000000000..c8d4b70917
--- /dev/null
+++ b/tests/gem5/replacement-policies/ref/rrip_test1_ld
@@ -0,0 +1,13 @@
+Global frequency set at 1000000000000 ticks per second
+Beginning simulation!
+  74000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0]
+ 134000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x80, line 0x80]
+ 194000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x100, line 0x100]
+ 254000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x180, line 0x180]
+ 271000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x0, line 0x0]
+ 379000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x200, line 0x200]
+ 439000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x280, line 0x280]
+ 499000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x300, line 0x300]
+ 559000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x80, line 0x80]
+ 619000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0]
+.
diff --git a/tests/gem5/replacement-policies/ref/rrip_test1_st b/tests/gem5/replacement-policies/ref/rrip_test1_st
new file mode 100644
index 0000000000..c8d4b70917
--- /dev/null
+++ b/tests/gem5/replacement-policies/ref/rrip_test1_st
@@ -0,0 +1,13 @@
+Global frequency set at 1000000000000 ticks per second
+Beginning simulation!
+  74000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0]
+ 134000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x80, line 0x80]
+ 194000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x100, line 0x100]
+ 254000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x180, line 0x180]
+ 271000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x0, line 0x0]
+ 379000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x200, line 0x200]
+ 439000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x280, line 0x280]
+ 499000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x300, line 0x300]
+ 559000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x80, line 0x80]
+ 619000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0]
+.
diff --git a/tests/gem5/replacement-policies/ref/rrip_test2_ld b/tests/gem5/replacement-policies/ref/rrip_test2_ld
new file mode 100644
index 0000000000..e2c8c83028
--- /dev/null
+++ b/tests/gem5/replacement-policies/ref/rrip_test2_ld
@@ -0,0 +1,16 @@
+Global frequency set at 1000000000000 ticks per second
+Beginning simulation!
+  74000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0]
+  91000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x0, line 0x0]
+ 151000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x0, line 0x0]
+ 254000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x80, line 0x80]
+ 271000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x80, line 0x80]
+ 374000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x100, line 0x100]
+ 391000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x100, line 0x100]
+ 494000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x180, line 0x180]
+ 559000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x200, line 0x200]
+ 619000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x280, line 0x280]
+ 679000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x300, line 0x300]
+ 739000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x380, line 0x380]
+ 751000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x0, line 0x0]
+.
diff --git a/tests/gem5/replacement-policies/ref/rrip_test2_st b/tests/gem5/replacement-policies/ref/rrip_test2_st
new file mode 100644
index 0000000000..e2c8c83028
--- /dev/null
+++ b/tests/gem5/replacement-policies/ref/rrip_test2_st
@@ -0,0 +1,16 @@
+Global frequency set at 1000000000000 ticks per second
+Beginning simulation!
+  74000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0]
+  91000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x0, line 0x0]
+ 151000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x0, line 0x0]
+ 254000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x80, line 0x80]
+ 271000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x80, line 0x80]
+ 374000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x100, line 0x100]
+ 391000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x100, line 0x100]
+ 494000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x180, line 0x180]
+ 559000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x200, line 0x200]
+ 619000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x280, line 0x280]
+ 679000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x300, line 0x300]
+ 739000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x380, line 0x380]
+ 751000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x0, line 0x0]
+.
diff --git a/tests/gem5/replacement-policies/ref/second_chance_test1_ld b/tests/gem5/replacement-policies/ref/second_chance_test1_ld
new file mode 100644
index 0000000000..96d56df962
--- /dev/null
+++ b/tests/gem5/replacement-policies/ref/second_chance_test1_ld
@@ -0,0 +1,13 @@
+Global frequency set at 1000000000000 ticks per second
+Beginning simulation!
+  74000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0]
+ 134000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x80, line 0x80]
+ 194000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x100, line 0x100]
+ 254000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x180, line 0x180]
+ 271000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x0, line 0x0]
+ 331000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x80, line 0x80]
+ 439000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x200, line 0x200]
+ 499000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x280, line 0x280]
+ 511000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x0, line 0x0]
+ 571000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x80, line 0x80]
+.
diff --git a/tests/gem5/replacement-policies/ref/second_chance_test1_st b/tests/gem5/replacement-policies/ref/second_chance_test1_st
new file mode 100644
index 0000000000..96d56df962
--- /dev/null
+++ b/tests/gem5/replacement-policies/ref/second_chance_test1_st
@@ -0,0 +1,13 @@
+Global frequency set at 1000000000000 ticks per second
+Beginning simulation!
+  74000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0]
+ 134000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x80, line 0x80]
+ 194000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x100, line 0x100]
+ 254000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x180, line 0x180]
+ 271000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x0, line 0x0]
+ 331000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x80, line 0x80]
+ 439000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x200, line 0x200]
+ 499000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x280, line 0x280]
+ 511000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x0, line 0x0]
+ 571000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x80, line 0x80]
+.
diff --git a/tests/gem5/replacement-policies/ref/second_chance_test2_ld b/tests/gem5/replacement-policies/ref/second_chance_test2_ld
new file mode 100644
index 0000000000..c9ab1abd81
--- /dev/null
+++ b/tests/gem5/replacement-policies/ref/second_chance_test2_ld
@@ -0,0 +1,16 @@
+Global frequency set at 1000000000000 ticks per second
+Beginning simulation!
+  74000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0]
+ 134000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x80, line 0x80]
+ 194000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x100, line 0x100]
+ 254000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x180, line 0x180]
+ 271000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x0, line 0x0]
+ 331000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x80, line 0x80]
+ 391000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x100, line 0x100]
+ 451000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x180, line 0x180]
+ 559000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x200, line 0x200]
+ 619000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0]
+ 679000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x80, line 0x80]
+ 739000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x100, line 0x100]
+ 799000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x180, line 0x180]
+.
diff --git a/tests/gem5/replacement-policies/ref/second_chance_test2_st b/tests/gem5/replacement-policies/ref/second_chance_test2_st
new file mode 100644
index 0000000000..c9ab1abd81
--- /dev/null
+++ b/tests/gem5/replacement-policies/ref/second_chance_test2_st
@@ -0,0 +1,16 @@
+Global frequency set at 1000000000000 ticks per second
+Beginning simulation!
+  74000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0]
+ 134000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x80, line 0x80]
+ 194000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x100, line 0x100]
+ 254000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x180, line 0x180]
+ 271000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x0, line 0x0]
+ 331000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x80, line 0x80]
+ 391000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x100, line 0x100]
+ 451000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x180, line 0x180]
+ 559000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x200, line 0x200]
+ 619000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0]
+ 679000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x80, line 0x80]
+ 739000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x100, line 0x100]
+ 799000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x180, line 0x180]
+.
diff --git a/tests/gem5/replacement-policies/ref/second_chance_test3_ld b/tests/gem5/replacement-policies/ref/second_chance_test3_ld
new file mode 100644
index 0000000000..f860d5cd14
--- /dev/null
+++ b/tests/gem5/replacement-policies/ref/second_chance_test3_ld
@@ -0,0 +1,18 @@
+Global frequency set at 1000000000000 ticks per second
+Beginning simulation!
+  74000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0]
+ 134000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x80, line 0x80]
+ 194000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x100, line 0x100]
+ 254000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x180, line 0x180]
+ 271000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x0, line 0x0]
+ 331000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x80, line 0x80]
+ 391000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x100, line 0x100]
+ 451000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x180, line 0x180]
+ 511000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x100, line 0x100]
+ 619000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x200, line 0x200]
+ 679000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0]
+ 739000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x80, line 0x80]
+ 751000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x180, line 0x180]
+ 859000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x100, line 0x100]
+ 871000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x180, line 0x180]
+.
diff --git a/tests/gem5/replacement-policies/ref/second_chance_test3_st b/tests/gem5/replacement-policies/ref/second_chance_test3_st
new file mode 100644
index 0000000000..f860d5cd14
--- /dev/null
+++ b/tests/gem5/replacement-policies/ref/second_chance_test3_st
@@ -0,0 +1,18 @@
+Global frequency set at 1000000000000 ticks per second
+Beginning simulation!
+  74000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0]
+ 134000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x80, line 0x80]
+ 194000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x100, line 0x100]
+ 254000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x180, line 0x180]
+ 271000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x0, line 0x0]
+ 331000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x80, line 0x80]
+ 391000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x100, line 0x100]
+ 451000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x180, line 0x180]
+ 511000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x100, line 0x100]
+ 619000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x200, line 0x200]
+ 679000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0]
+ 739000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x80, line 0x80]
+ 751000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x180, line 0x180]
+ 859000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x100, line 0x100]
+ 871000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x180, line 0x180]
+.
diff --git a/tests/gem5/replacement-policies/ref/tree_plru_test1_ld b/tests/gem5/replacement-policies/ref/tree_plru_test1_ld
new file mode 100644
index 0000000000..6a94bac187
--- /dev/null
+++ b/tests/gem5/replacement-policies/ref/tree_plru_test1_ld
@@ -0,0 +1,13 @@
+Global frequency set at 1000000000000 ticks per second
+Beginning simulation!
+  74000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0]
+ 134000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x80, line 0x80]
+ 194000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x100, line 0x100]
+ 254000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x180, line 0x180]
+ 271000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x0, line 0x0]
+ 379000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x200, line 0x200]
+ 439000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x280, line 0x280]
+ 499000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x300, line 0x300]
+ 559000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x380, line 0x380]
+ 619000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0]
+.
diff --git a/tests/gem5/replacement-policies/ref/tree_plru_test1_st b/tests/gem5/replacement-policies/ref/tree_plru_test1_st
new file mode 100644
index 0000000000..6a94bac187
--- /dev/null
+++ b/tests/gem5/replacement-policies/ref/tree_plru_test1_st
@@ -0,0 +1,13 @@
+Global frequency set at 1000000000000 ticks per second
+Beginning simulation!
+  74000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0]
+ 134000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x80, line 0x80]
+ 194000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x100, line 0x100]
+ 254000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x180, line 0x180]
+ 271000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x0, line 0x0]
+ 379000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x200, line 0x200]
+ 439000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x280, line 0x280]
+ 499000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x300, line 0x300]
+ 559000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x380, line 0x380]
+ 619000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0]
+.
diff --git a/tests/gem5/replacement-policies/ref/tree_plru_test2_ld b/tests/gem5/replacement-policies/ref/tree_plru_test2_ld
new file mode 100644
index 0000000000..5ac1f3506c
--- /dev/null
+++ b/tests/gem5/replacement-policies/ref/tree_plru_test2_ld
@@ -0,0 +1,11 @@
+Global frequency set at 1000000000000 ticks per second
+Beginning simulation!
+  74000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0]
+ 134000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x80, line 0x80]
+ 194000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x100, line 0x100]
+ 254000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x180, line 0x180]
+ 271000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x100, line 0x100]
+ 379000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x200, line 0x200]
+ 439000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x280, line 0x280]
+ 451000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x100, line 0x100]
+.
diff --git a/tests/gem5/replacement-policies/ref/tree_plru_test2_st b/tests/gem5/replacement-policies/ref/tree_plru_test2_st
new file mode 100644
index 0000000000..5ac1f3506c
--- /dev/null
+++ b/tests/gem5/replacement-policies/ref/tree_plru_test2_st
@@ -0,0 +1,11 @@
+Global frequency set at 1000000000000 ticks per second
+Beginning simulation!
+  74000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0]
+ 134000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x80, line 0x80]
+ 194000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x100, line 0x100]
+ 254000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x180, line 0x180]
+ 271000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x100, line 0x100]
+ 379000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x200, line 0x200]
+ 439000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x280, line 0x280]
+ 451000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x100, line 0x100]
+.
diff --git a/tests/gem5/replacement-policies/ref/tree_plru_test3_ld b/tests/gem5/replacement-policies/ref/tree_plru_test3_ld
new file mode 100644
index 0000000000..83662e51ea
--- /dev/null
+++ b/tests/gem5/replacement-policies/ref/tree_plru_test3_ld
@@ -0,0 +1,14 @@
+Global frequency set at 1000000000000 ticks per second
+Beginning simulation!
+  74000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0]
+ 134000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x80, line 0x80]
+ 194000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x100, line 0x100]
+ 254000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x180, line 0x180]
+ 271000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x100, line 0x100]
+ 379000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x200, line 0x200]
+ 439000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x280, line 0x280]
+ 451000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x80, line 0x80]
+ 511000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x100, line 0x100]
+ 619000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0]
+ 631000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x80, line 0x80]
+.
diff --git a/tests/gem5/replacement-policies/ref/tree_plru_test3_st b/tests/gem5/replacement-policies/ref/tree_plru_test3_st
new file mode 100644
index 0000000000..83662e51ea
--- /dev/null
+++ b/tests/gem5/replacement-policies/ref/tree_plru_test3_st
@@ -0,0 +1,14 @@
+Global frequency set at 1000000000000 ticks per second
+Beginning simulation!
+  74000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0]
+ 134000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x80, line 0x80]
+ 194000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x100, line 0x100]
+ 254000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x180, line 0x180]
+ 271000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x100, line 0x100]
+ 379000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x200, line 0x200]
+ 439000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x280, line 0x280]
+ 451000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x80, line 0x80]
+ 511000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x100, line 0x100]
+ 619000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache miss at [0x0, line 0x0]
+ 631000: system.cache_hierarchy.ruby_system.controllers.sequencer: Cache hit at [0x80, line 0x80]
+.
diff --git a/tests/gem5/replacement-policies/run_replacement_policy_test.py b/tests/gem5/replacement-policies/run_replacement_policy_test.py
new file mode 100644
index 0000000000..10061094b4
--- /dev/null
+++ b/tests/gem5/replacement-policies/run_replacement_policy_test.py
@@ -0,0 +1,95 @@
+# Copyright (c) 2022 The Regents of the University of California
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import argparse
+from importlib.machinery import SourceFileLoader
+
+from cache_hierarchies import ModMIExampleCacheHierarchy
+
+import m5
+
+from m5.debug import flags
+from m5.objects import Root
+from gem5.components.boards.test_board import TestBoard
+from gem5.components.memory.simple import SingleChannelSimpleMemory
+from gem5.components.processors.complex_generator import ComplexGenerator
+
+argparser = argparse.ArgumentParser()
+
+argparser.add_argument(
+    "config_name",
+    type=str,
+    help="Name of the python file "
+    "including the defintion of a python generator and "
+    "importing the right replacement policy. The python "
+    "generator should only assume one positional argument "
+    "and be named python_generator. The replacement policy"
+    " should be imported as rp.",
+)
+argparser.add_argument(
+    "config_path",
+    type=str,
+    help="Path to the python file" "specified by config_name.",
+)
+
+args = argparser.parse_args()
+
+module = SourceFileLoader(args.config_name, args.config_path).load_module()
+python_generator = module.python_generator
+rp_class = module.rp
+
+flags["RubyHitMiss"].enable()
+
+cache_hierarchy = ModMIExampleCacheHierarchy(rp_class)
+
+memory = SingleChannelSimpleMemory(
+    latency="30ns",
+    latency_var="0ns",
+    bandwidth="12.8GiB/s",
+    size="512MiB",
+)
+
+generator = ComplexGenerator()
+generator.set_traffic_from_python_generator(python_generator)
+
+# We use the Test Board. This is a special board to run traffic generation
+# tasks
+motherboard = TestBoard(
+    clk_freq="1GHz",
+    generator=generator,  # We pass the traffic generator as the processor.
+    memory=memory,
+    cache_hierarchy=cache_hierarchy,
+)
+root = Root(full_system=False, system=motherboard)
+
+m5.instantiate()
+
+generator.start_traffic()
+print("Beginning simulation!")
+exit_event = m5.simulate()
+print(
+    "Exiting @ tick {} because {}.".format(m5.curTick(), exit_event.getCause())
+)
diff --git a/tests/gem5/replacement-policies/test_replacement_policies.py b/tests/gem5/replacement-policies/test_replacement_policies.py
new file mode 100644
index 0000000000..3a30c0a070
--- /dev/null
+++ b/tests/gem5/replacement-policies/test_replacement_policies.py
@@ -0,0 +1,110 @@
+# Copyright (c) 2022 Jarvis Jia, Jing Qu, Matt Sinclair, & Mingyuan Xiang
+# Copyright (c) 2022 The Regents of the University of California
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import os
+
+from testlib import *
+
+
+def test_replacement_policy(config_name: str, config_path: str) -> None:
+    name = f"test-replacement-policy-{config_name}"
+
+    verifiers = (
+        verifier.MatchStdoutNoPerf(joinpath(getcwd(), "ref", config_name[7:])),
+    )
+    gem5_verify_config(
+        name=name,
+        fixtures=(),
+        verifiers=verifiers,
+        config=joinpath(
+            config.base_dir,
+            "tests",
+            "gem5",
+            "replacement-policies",
+            "run_replacement_policy_test.py",
+        ),
+        config_args=[config_name, config_path],
+        valid_isas=(constants.null_tag,),
+        protocol="MI_example",
+        valid_hosts=constants.supported_hosts,
+        length=constants.quick_tag,
+    )
+
+
+def create_replacement_policy_tests(traces):
+    this_dir = os.path.dirname(__file__)
+    for trace in traces:
+        config_name = trace.split(".")[0]
+        config_path = os.path.join(this_dir, trace)
+        test_replacement_policy(config_name, config_path)
+
+
+traces = [
+    "traces/fifo_test1_ld.py",
+    "traces/fifo_test2_ld.py",
+    "traces/lru_test3_ld.py",
+    "traces/lru_test4_ld.py",
+    "traces/lfu_test1_ld.py",
+    "traces/lfu_test2_ld.py",
+    "traces/lfu_test3_ld.py",
+    "traces/lip_test1_ld.py",
+    "traces/lru_test1_ld.py",
+    "traces/lru_test2_ld.py",
+    "traces/mru_test1_ld.py",
+    "traces/mru_test2_ld.py",
+    "traces/nru_test1_ld.py",
+    "traces/rrip_test1_ld.py",
+    "traces/rrip_test2_ld.py",
+    "traces/second_chance_test1_ld.py",
+    "traces/second_chance_test2_ld.py",
+    "traces/second_chance_test3_ld.py",
+    "traces/tree_plru_test1_ld.py",
+    "traces/tree_plru_test2_ld.py",
+    "traces/tree_plru_test3_ld.py",
+    "traces/fifo_test1_st.py",
+    "traces/fifo_test2_st.py",
+    "traces/lru_test3_st.py",
+    "traces/lru_test4_st.py",
+    "traces/lfu_test1_st.py",
+    "traces/lfu_test2_st.py",
+    "traces/lfu_test3_st.py",
+    "traces/lip_test1_st.py",
+    "traces/lru_test1_st.py",
+    "traces/lru_test2_st.py",
+    "traces/mru_test1_st.py",
+    "traces/mru_test2_st.py",
+    "traces/nru_test1_st.py",
+    "traces/rrip_test1_st.py",
+    "traces/rrip_test2_st.py",
+    "traces/second_chance_test1_st.py",
+    "traces/second_chance_test2_st.py",
+    "traces/second_chance_test3_st.py",
+    "traces/tree_plru_test1_st.py",
+    "traces/tree_plru_test2_st.py",
+    "traces/tree_plru_test3_st.py",
+]
+create_replacement_policy_tests(traces)
diff --git a/tests/gem5/replacement-policies/traces/fifo_test1_ld.py b/tests/gem5/replacement-policies/traces/fifo_test1_ld.py
new file mode 100644
index 0000000000..80e573fb0f
--- /dev/null
+++ b/tests/gem5/replacement-policies/traces/fifo_test1_ld.py
@@ -0,0 +1,70 @@
+# Copyright (c) 2022 Jarvis Jia, Jing Qu, Matt Sinclair, & Mingyuan Xiang
+# All Rights Reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# This test is targeting loads.
+# Access pattern: A, C, E, G, I, K, A, C, E, G
+# Each letter represents a 64-byte address range.
+
+# The [] indicate two different sets, and each set has four ways.
+# [set0way0, set0way1, set0way2, set0way3],
+# [set1way0, set1way1, set1way2, set1way3],
+# If you have a 512B cache with 4-way associativity, and each cache
+# line is 64B with FIFO replacement policy, you will observe:
+# m, m, m, m, m, m, m, m, m, m, where 'm' means miss, and 'h' means hit.
+
+# Explanation of this result:
+# A, C, E, G are misses, now the cache stores
+# ([A*, C, E, G],[ , , ,]), A marked * as the next entry to be evicted.
+# I misses, searches for a victim and selects A.
+# Now the cache stores ([I, C*, E, G],[ , , ,]).
+# K misses, searches for a victim and selects C.
+# Now the cache stores ([I, K, E*, G],[ , , ,]).
+# A misses, searches for a victim and selects E.
+# Now the cache stores ([I, K, A, G*],[ , , ,]).
+# C misses, searches for a victim and selects G.
+# Now the cache stores ([I*, K, A, C],[ , , ,]).
+# E misses, searches for a victim and selects I.
+# Now the cache stores ([E, K*, A, C],[ , , ,]).
+# G misses, searches for a victim and selects K.
+# Now the cache stores ([E, G, A*, C],[ , , ,]).
+
+from m5.objects.ReplacementPolicies import FIFORP as rp
+
+
+def python_generator(generator):
+    yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 512, 575, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 640, 703, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(30000, 0, 0, 0, 30000, 30000, 100, 0)
+
+    yield generator.createExit(0)
diff --git a/tests/gem5/replacement-policies/traces/fifo_test1_st.py b/tests/gem5/replacement-policies/traces/fifo_test1_st.py
new file mode 100644
index 0000000000..7fdb63dd37
--- /dev/null
+++ b/tests/gem5/replacement-policies/traces/fifo_test1_st.py
@@ -0,0 +1,70 @@
+# Copyright (c) 2022 Jarvis Jia, Jing Qu, Matt Sinclair, & Mingyuan Xiang
+# All Rights Reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# This test is targeting stores
+# Access pattern: A, C, E, G, I, K, A, C, E, G
+# Each letter represents a 64-byte address range.
+
+# The [] indicate two different sets, and each set has four ways.
+# [set0way0, set0way1, set0way2, set0way3],
+# [set1way0, set1way1, set1way2, set1way3],
+# If you have a 512B cache with 4-way associativity, and each cache
+# line is 64B with FIFO replacement policy, you will observe:
+# m, m, m, m, m, m, m, m, m, m, where 'm' means miss, and 'h' means hit.
+
+# Explanation of this result:
+# A, C, E, G are misses, now the cache stores
+# ([A*, C, E, G],[ , , ,]), A marked * as the next entry to be evicted.
+# I misses, searches for a victim and selects A.
+# Now the cache stores ([I, C*, E, G],[ , , ,]).
+# K misses, searches for a victim and selects C.
+# Now the cache stores ([I, K, E*, G],[ , , ,]).
+# A misses, searches for a victim and selects E.
+# Now the cache stores ([I, K, A, G*],[ , , ,]).
+# C misses, searches for a victim and selects G.
+# Now the cache stores ([I*, K, A, C],[ , , ,]).
+# E misses, searches for a victim and selects I.
+# Now the cache stores ([E, K*, A, C],[ , , ,]).
+# G misses, searches for a victim and selects K.
+# Now the cache stores ([E, G, A*, C],[ , , ,]).
+
+from m5.objects.ReplacementPolicies import FIFORP as rp
+
+
+def python_generator(generator):
+    yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 512, 575, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 640, 703, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(30000, 0, 0, 0, 30000, 30000, 0, 0)
+
+    yield generator.createExit(0)
diff --git a/tests/gem5/replacement-policies/traces/fifo_test2_ld.py b/tests/gem5/replacement-policies/traces/fifo_test2_ld.py
new file mode 100644
index 0000000000..5f95ad7814
--- /dev/null
+++ b/tests/gem5/replacement-policies/traces/fifo_test2_ld.py
@@ -0,0 +1,71 @@
+# Copyright (c) 2022 Jarvis Jia, Jing Qu, Matt Sinclair, & Mingyuan Xiang
+# All Rights Reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# This test is targeting loads.
+# Access pattern: A, C, E, G, A, C, I, K, E, G
+# Each letter represents a 64-byte address range.
+
+# The [] indicate two different sets, and each set has four ways.
+# [set0way0, set0way1, set0way2, set0way3],
+# [set1way0, set1way1, set1way2, set1way3],
+# If you have a 512B cache with 4-way associativity, and each cache
+# line is 64B with FIFO replacement policy, you will observe:
+# m, m, m, m, h, h, m, m, h, h
+# where 'm' means miss, and 'h' means hit.
+
+# Explanation of this result:
+# A, C, E, G are misses, now the cache stores
+# ([A*, C, E, G],[ , , ,]), A marked * as the next entry to be evicted.
+# A hits.
+# Now the cache stores ([A*, C, E, G],[ , , ,]).
+# C hits.
+# Now the cache stores ([A*, C, E, G],[ , , ,]).
+# I misses, searches for a victim and selects A.
+# Now the cache stores ([I, C*, E, G],[ , , ,]).
+# K misses, searches for a victim and selects C.
+# Now the cache stores ([I, K, E*, G],[ , , ,]).
+# E hits.
+# Now the cache stores ([I, K, E*, G],[ , , ,]).
+# G hits
+# Now the cache stores ([I, K, E*, G],[ , , ,]).
+
+from m5.objects.ReplacementPolicies import FIFORP as rp
+
+
+def python_generator(generator):
+    yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 512, 575, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 640, 703, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(30000, 0, 0, 0, 30000, 30000, 100, 0)
+
+    yield generator.createExit(0)
diff --git a/tests/gem5/replacement-policies/traces/fifo_test2_st.py b/tests/gem5/replacement-policies/traces/fifo_test2_st.py
new file mode 100644
index 0000000000..71866d890f
--- /dev/null
+++ b/tests/gem5/replacement-policies/traces/fifo_test2_st.py
@@ -0,0 +1,71 @@
+# Copyright (c) 2022 Jarvis Jia, Jing Qu, Matt Sinclair, & Mingyuan Xiang
+# All Rights Reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# This test is targeting stores.
+# Access pattern: A, C, E, G, A, C, I, K, E, G
+# Each letter represents a 64-byte address range.
+
+# The [] indicate two different sets, and each set has four ways.
+# [set0way0, set0way1, set0way2, set0way3],
+# [set1way0, set1way1, set1way2, set1way3],
+# If you have a 512B cache with 4-way associativity, and each cache
+# line is 64B with FIFO replacement policy, you will observe:
+# m, m, m, m, h, h, m, m, h, h
+# where 'm' means miss, and 'h' means hit.
+
+# Explanation of this result:
+# A, C, E, G are misses, now the cache stores
+# ([A*, C, E, G],[ , , ,]), A marked * as the next entry to be evicted.
+# A hits.
+# Now the cache stores ([A*, C, E, G],[ , , ,]).
+# C hits.
+# Now the cache stores ([A*, C, E, G],[ , , ,]).
+# I misses, searches for a victim and selects A.
+# Now the cache stores ([I, C*, E, G],[ , , ,]).
+# K misses, searches for a victim and selects C.
+# Now the cache stores ([I, K, E*, G],[ , , ,]).
+# E hits.
+# Now the cache stores ([I, K, E*, G],[ , , ,]).
+# G hits
+# Now the cache stores ([I, K, E*, G],[ , , ,]).
+
+from m5.objects.ReplacementPolicies import FIFORP as rp
+
+
+def python_generator(generator):
+    yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 512, 575, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 640, 703, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(30000, 0, 0, 0, 30000, 30000, 0, 0)
+
+    yield generator.createExit(0)
diff --git a/tests/gem5/replacement-policies/traces/lfu_test1_ld.py b/tests/gem5/replacement-policies/traces/lfu_test1_ld.py
new file mode 100644
index 0000000000..2a88ad3182
--- /dev/null
+++ b/tests/gem5/replacement-policies/traces/lfu_test1_ld.py
@@ -0,0 +1,62 @@
+# Copyright (c) 2022 Jarvis Jia, Jing Qu, Matt Sinclair, & Mingyuan Xiang
+# All Rights Reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# This test is targeting loads.
+# Access pattern: A, C, E, G, I, A, I
+# Each letter represents a 64-byte address range.
+
+# The [] indicate two different sets, and each set has four ways.
+# [set0way0, set0way1, set0way2, set0way3],
+# [set1way0, set1way1, set1way2, set1way3],
+# If you have a 512B cache with 4-way associativity, and each cache
+# line is 64B, with LFU replacement policy, you will observe:
+# m, m, m, m, m, m, m,  where 'm' means miss, and 'h' means hit.
+
+# Explanation of this result:
+# A, C, E, G are misses, now the cache stores
+# ([A1, C1, E1, G1],[ , , ,]).The number after each letter
+# is the count for the  accesses for the address range.
+# I searches for a victim and selects A since it has the least count.
+# Now the cache stores ([I1, C1, E1, G1],[ , , ,]).
+# A searches for a victim and selects I since it has the least count.
+# Now the cache stores ([A1, C1, E1, G1],[ , , ,]).
+# I searches for a victim and selects A since it has the least count.
+# Now the cache stores ([I1, C1, E1, G1],[ , , ,]).
+
+from m5.objects.ReplacementPolicies import LFURP as rp
+
+
+def python_generator(generator):
+    yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 512, 575, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 512, 575, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(30000, 0, 0, 0, 30000, 30000, 100, 0)
+
+    yield generator.createExit(0)
diff --git a/tests/gem5/replacement-policies/traces/lfu_test1_st.py b/tests/gem5/replacement-policies/traces/lfu_test1_st.py
new file mode 100644
index 0000000000..a2c945eee2
--- /dev/null
+++ b/tests/gem5/replacement-policies/traces/lfu_test1_st.py
@@ -0,0 +1,62 @@
+# Copyright (c) 2022 Jarvis Jia, Jing Qu, Matt Sinclair, & Mingyuan Xiang
+# All Rights Reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# This test is targeting stores
+# Access pattern: A, C, E, G, I, A, I
+# Each letter represents a 64-byte address range.
+
+# The [] indicate two different sets, and each set has four ways.
+# [set0way0, set0way1, set0way2, set0way3],
+# [set1way0, set1way1, set1way2, set1way3],
+# If you have a 512B cache with 4-way associativity, and each cache
+# line is 64B, with LFU replacement policy, you will observe:
+# m, m, m, m, m, m, m,  where 'm' means miss, and 'h' means hit.
+
+# Explanation of this result:
+# A, C, E, G are misses, now the cache stores
+# ([A1, C1, E1, G1],[ , , ,]).The number after each letter
+# is the count for the  accesses for the address range.
+# I searches for a victim and selects A since it has the least count.
+# Now the cache stores ([I1, C1, E1, G1],[ , , ,]).
+# A searches for a victim and selects I since it has the least count.
+# Now the cache stores ([A1, C1, E1, G1],[ , , ,]).
+# I searches for a victim and selects A since it has the least count.
+# Now the cache stores ([I1, C1, E1, G1],[ , , ,]).
+
+from m5.objects.ReplacementPolicies import LFURP as rp
+
+
+def python_generator(generator):
+    yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 512, 575, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 512, 575, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(30000, 0, 0, 0, 30000, 30000, 0, 0)
+
+    yield generator.createExit(0)
diff --git a/tests/gem5/replacement-policies/traces/lfu_test2_ld.py b/tests/gem5/replacement-policies/traces/lfu_test2_ld.py
new file mode 100644
index 0000000000..901d813199
--- /dev/null
+++ b/tests/gem5/replacement-policies/traces/lfu_test2_ld.py
@@ -0,0 +1,66 @@
+# Copyright (c) 2022 Jarvis Jia, Jing Qu, Matt Sinclair, & Mingyuan Xiang
+# All Rights Reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# This test is targeting loads.
+# Access pattern: A, C, E, G, A, I, K, M, O, A
+# Each letter represents a 64-byte address range.
+
+# The [] indicate two different sets, and each set has four ways.
+# [set0way0, set0way1, set0way2, set0way3],
+# [set1way0, set1way1, set1way2, set1way3],
+# If you have a 512B cache with 4-way associativity, and each cache
+# line is 64B, with LFU replacement policy, you will observe:
+# m, m, m, m, h, m, m, m, m, h  where 'm' means miss, and 'h' means
+# hit.
+
+# Explanation of this result:
+# A, C, E, G are misses, now the cache stores
+# ([A1, C1, E1, G1],[ , , ,]).The number after each letter
+# is the count for the  accesses for the address range.
+# A is a hit, and the cache now stores ([A2, C1, E1, G1],[ , , ,]).
+# I searches a victim and selects C. Now the cache stores ([A2, I1, E1, G1],[ , , ,]).
+# K searches a victim and selects I. Now the cache stores ([A2, K1, E1, G1],[ , , ,]).
+# M searches a victim and selects K. Now the cache stores ([A2, M1, E1, G1],[ , , ,]).
+# O searches a victim and selects M. Now the cache stores ([A2, O1, E1, G1],[ , , ,]).
+# A hits.
+
+from m5.objects.ReplacementPolicies import LFURP as rp
+
+
+def python_generator(generator):
+    yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 512, 575, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 640, 703, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 768, 831, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 896, 959, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(30000, 0, 0, 0, 30000, 30000, 100, 0)
+
+    yield generator.createExit(0)
diff --git a/tests/gem5/replacement-policies/traces/lfu_test2_st.py b/tests/gem5/replacement-policies/traces/lfu_test2_st.py
new file mode 100644
index 0000000000..415362614b
--- /dev/null
+++ b/tests/gem5/replacement-policies/traces/lfu_test2_st.py
@@ -0,0 +1,66 @@
+# Copyright (c) 2022 Jarvis Jia, Jing Qu, Matt Sinclair, & Mingyuan Xiang
+# All Rights Reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# This test is targeting stores
+# Access pattern: A, C, E, G, A, I, K, M, O, A
+# Each letter represents a 64-byte address range.
+
+# The [] indicate two different sets, and each set has four ways.
+# [set0way0, set0way1, set0way2, set0way3],
+# [set1way0, set1way1, set1way2, set1way3],
+# If you have a 512B cache with 4-way associativity, and each cache
+# line is 64B, with LFU replacement policy, you will observe:
+# m, m, m, m, h, m, m, m, m, h  where 'm' means miss, and 'h' means
+# hit.
+
+# Explanation of this result:
+# A, C, E, G are misses, now the cache stores
+# ([A1, C1, E1, G1],[ , , ,]).The number after each letter
+# is the count for the  accesses for the address range.
+# A is a hit, and the cache now stores ([A2, C1, E1, G1],[ , , ,]).
+# I searches a victim and selects C. Now the cache stores ([A2, I1, E1, G1],[ , , ,]).
+# K searches a victim and selects I. Now the cache stores ([A2, K1, E1, G1],[ , , ,]).
+# M searches a victim and selects K. Now the cache stores ([A2, M1, E1, G1],[ , , ,]).
+# O searches a victim and selects M. Now the cache stores ([A2, O1, E1, G1],[ , , ,]).
+# A hits.
+
+from m5.objects.ReplacementPolicies import LFURP as rp
+
+
+def python_generator(generator):
+    yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 512, 575, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 640, 703, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 768, 831, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 896, 959, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(30000, 0, 0, 0, 30000, 30000, 0, 0)
+
+    yield generator.createExit(0)
diff --git a/tests/gem5/replacement-policies/traces/lfu_test3_ld.py b/tests/gem5/replacement-policies/traces/lfu_test3_ld.py
new file mode 100644
index 0000000000..a4a59ade64
--- /dev/null
+++ b/tests/gem5/replacement-policies/traces/lfu_test3_ld.py
@@ -0,0 +1,87 @@
+# Copyright (c) 2022 Jarvis Jia, Jing Qu, Matt Sinclair, & Mingyuan Xiang
+# All Rights Reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# This test is targeting loads.
+# Access pattern: A, A, C, C, C, E, G, G, I, E, E, E, E, K,
+# A, A, A, A, M, G, G, G, G, O, C
+# Each letter represents a 64-byte address range.
+
+# The [] indicate two different sets, and each set has four ways.
+# [set0way0, set0way1, set0way2, set0way3],
+# [set1way0, set1way1, set1way2, set1way3],
+# If you have a 512B cache with 4-way associativity, and each cache
+# line is 64B, with LFU replacement policy, you will observe:
+# m, h, m, h, h, m, m, h, m, m, h, h, h, m, m, h, h, h, m, m, h, h,
+# h, m, m, where 'm' means miss, and 'h' means hit.
+
+# Explanation of this result:
+# After two A accesses, three C accesses, one E access and two G accesses,
+# the cache stores ([A2,C3,E1,G2],[ , , ,]). The numbers after each letter are the
+# counts of accesses to that address range.
+# I searches a victim and selects E. Now the cache stores ([A2,C3,I1,G2],[ , , ,]).
+# E searches a victim and selects I. Now the cache stores ([A2,C3,E1,G2],[ , , ,]).
+# Three E accesses are hits. Now the cache stores ([A2,C3,E4,G2],[ , , ,]).
+# K searches a victim and selects A. Now the cache stores ([K1,C3,E4,G2],[ , , ,]).
+# A searches a victim and selects K. Now the cache stores ([A1,C3,E4,G2],[ , , ,]).
+# Three A accesses are hits. Now the cache stores ([A4,C3,E4,G2],[ , , ,]).
+# M searches a victim and selects G. Now the cache stores ([A4,C3,E4,M1],[ , , ,]).
+# G searches a victim and selects M. Now the cache stores ([A4,C3,E4,G1],[ , , ,]).
+# Three G accesses are hits. Now the cache stores ([A4,C3,E4,G4],[ , , ,]).
+# O searches a victim and selects C. Now the cache stores ([A4,O1,E4,G4],[ , , ,]).
+# C searches a victim and selects O. Now the cache stores ([A4,C1,E4,G4],[ , , ,]).
+
+from m5.objects.ReplacementPolicies import LFURP as rp
+
+
+def python_generator(generator):
+    yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 512, 575, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 640, 703, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 768, 831, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 896, 959, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(30000, 0, 0, 0, 30000, 30000, 100, 0)
+
+    yield generator.createExit(0)
diff --git a/tests/gem5/replacement-policies/traces/lfu_test3_st.py b/tests/gem5/replacement-policies/traces/lfu_test3_st.py
new file mode 100644
index 0000000000..45e2ee2bcd
--- /dev/null
+++ b/tests/gem5/replacement-policies/traces/lfu_test3_st.py
@@ -0,0 +1,87 @@
+# Copyright (c) 2022 Jarvis Jia, Jing Qu, Matt Sinclair, & Mingyuan Xiang
+# All Rights Reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# This test is targeting stores
+# Access pattern: A, A, C, C, C, E, G, G, I, E, E, E, E, K,
+# A, A, A, A, M, G, G, G, G, O, C
+# Each letter represents a 64-byte address range.
+
+# The [] indicate two different sets, and each set has four ways.
+# [set0way0, set0way1, set0way2, set0way3],
+# [set1way0, set1way1, set1way2, set1way3],
+# If you have a 512B cache with 4-way associativity, and each cache
+# line is 64B, with LFU replacement policy, you will observe:
+# m, h, m, h, h, m, m, h, m, m, h, h, h, m, m, h, h, h, m, m, h, h,
+# h, m, m, where 'm' means miss, and 'h' means hit.
+
+# Explanation of this result:
+# After two A accesses, three C accesses, one E access and two G accesses,
+# the cache stores ([A2,C3,E1,G2],[ , , ,]). The numbers after each letter are the
+# counts of accesses to that address range.
+# I searches a victim and selects E. Now the cache stores ([A2,C3,I1,G2],[ , , ,]).
+# E searches a victim and selects I. Now the cache stores ([A2,C3,E1,G2],[ , , ,]).
+# Three E accesses are hits. Now the cache stores ([A2,C3,E4,G2],[ , , ,]).
+# K searches a victim and selects A. Now the cache stores ([K1,C3,E4,G2],[ , , ,]).
+# A searches a victim and selects K. Now the cache stores ([A1,C3,E4,G2],[ , , ,]).
+# Three A accesses are hits. Now the cache stores ([A4,C3,E4,G2],[ , , ,]).
+# M searches a victim and selects G. Now the cache stores ([A4,C3,E4,M1],[ , , ,]).
+# G searches a victim and selects M. Now the cache stores ([A4,C3,E4,G1],[ , , ,]).
+# Three G accesses are hits. Now the cache stores ([A4,C3,E4,G4],[ , , ,]).
+# O searches a victim and selects C. Now the cache stores ([A4,O1,E4,G4],[ , , ,]).
+# C searches a victim and selects O. Now the cache stores ([A4,C1,E4,G4],[ , , ,]).
+
+from m5.objects.ReplacementPolicies import LFURP as rp
+
+
+def python_generator(generator):
+    yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 512, 575, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 640, 703, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 768, 831, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 896, 959, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(30000, 0, 0, 0, 30000, 30000, 0, 0)
+
+    yield generator.createExit(0)
diff --git a/tests/gem5/replacement-policies/traces/lip_test1_ld.py b/tests/gem5/replacement-policies/traces/lip_test1_ld.py
new file mode 100644
index 0000000000..c7d70a93f1
--- /dev/null
+++ b/tests/gem5/replacement-policies/traces/lip_test1_ld.py
@@ -0,0 +1,80 @@
+# Copyright (c) 2022 Jarvis Jia, Jing Qu, Matt Sinclair, & Mingyuan Xiang
+# All Rights Reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# This test is targeting loads.
+# Access pattern: A, C, E, G, I, K, M, O, A, C, E, G, A, I, C
+# Each letter represents a 64-byte address range.
+
+# The [] indicate two different sets, and each set has four ways.
+# [set0way0, set0way1, set0way2, set0way3],
+# [set1way0, set1way1, set1way2, set1way3],
+# If you have a 512B cache with 4-way associativity,
+# and each cache line is 64B, with LIP replacement policy,
+# you will observe: m,m,m,m,m,m,m,m,m,h,h,h,h,m,m
+# where 'h' means hit and 'm' means miss.
+
+# Explanation of this result:
+# A,C,E,G are misses, now the cache stores ([A, C, E, G],[ , , ,])
+# I searches for a victim and selects A.
+# Now the cache stores ([I, C, E, G],[ , , ,])
+# K searches for a victim and selects I.
+# Now the cache stores ([K, C, E, G],[ , , ,])
+# M searches for a victim and selects K.
+# Now the cache stores ([M, C, E, G],[ , , ,])
+# O searches for a victim and selects M.
+# Now the cache stores ([O, C, E, G],[ , , ,])
+# A searches for a victim and selects O.
+# Now the cache stores ([A, C, E, G],[ , , ,])
+# C hits --> C now MRU
+# E hits --> E now MRU
+# G hits --> G now MRU
+# A hits --> A now MRU
+# I searches for a victim and selects C since A is MRU.
+# Now the cache stores ([A, I, E, G],[ , , ,]).
+# C misses
+
+from m5.objects.ReplacementPolicies import LIPRP as rp
+
+
+def python_generator(generator):
+    yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 512, 575, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 640, 703, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 768, 831, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 896, 959, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 512, 575, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(30000, 0, 0, 0, 30000, 30000, 100, 0)
+
+    yield generator.createExit(0)
diff --git a/tests/gem5/replacement-policies/traces/lip_test1_st.py b/tests/gem5/replacement-policies/traces/lip_test1_st.py
new file mode 100644
index 0000000000..e64db8f12e
--- /dev/null
+++ b/tests/gem5/replacement-policies/traces/lip_test1_st.py
@@ -0,0 +1,80 @@
+# Copyright (c) 2022 Jarvis Jia, Jing Qu, Matt Sinclair, & Mingyuan Xiang
+# All Rights Reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# This test is targeting stores
+# Access pattern: A, C, E, G, I, K, M, O, A, C, E, G, A, I, C
+# Each letter represents a 64-byte address range.
+
+# The [] indicate two different sets, and each set has four ways.
+# [set0way0, set0way1, set0way2, set0way3],
+# [set1way0, set1way1, set1way2, set1way3],
+# If you have a 512B cache with 4-way associativity,
+# and each cache line is 64B, with LIP replacement policy,
+# you will observe: m,m,m,m,m,m,m,m,m,h,h,h,h,m,m
+# where 'h' means hit and 'm' means miss.
+
+# Explanation of this result:
+# A,C,E,G are misses, now the cache stores ([A, C, E, G],[ , , ,])
+# I searches for a victim and selects A.
+# Now the cache stores ([I, C, E, G],[ , , ,])
+# K searches for a victim and selects I.
+# Now the cache stores ([K, C, E, G],[ , , ,])
+# M searches for a victim and selects K.
+# Now the cache stores ([M, C, E, G],[ , , ,])
+# O searches for a victim and selects M.
+# Now the cache stores ([O, C, E, G],[ , , ,])
+# A searches for a victim and selects O.
+# Now the cache stores ([A, C, E, G],[ , , ,])
+# C hits --> C now MRU
+# E hits --> E now MRU
+# G hits --> G now MRU
+# A hits --> A now MRU
+# I searches for a victim and selects C since A is MRU.
+# Now the cache stores ([A, I, E, G],[ , , ,]).
+# C misses
+
+from m5.objects.ReplacementPolicies import LIPRP as rp
+
+
+def python_generator(generator):
+    yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 512, 575, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 640, 703, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 768, 831, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 896, 959, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 512, 575, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(30000, 0, 0, 0, 30000, 30000, 0, 0)
+
+    yield generator.createExit(0)
diff --git a/tests/gem5/replacement-policies/traces/lru_test1_ld.py b/tests/gem5/replacement-policies/traces/lru_test1_ld.py
new file mode 100644
index 0000000000..41b874805f
--- /dev/null
+++ b/tests/gem5/replacement-policies/traces/lru_test1_ld.py
@@ -0,0 +1,76 @@
+# Copyright (c) 2022 Jarvis Jia, Jing Qu, Matt Sinclair, & Mingyuan Xiang
+# All Rights Reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# This test is targeting loads.
+# Access pattern: A, C, E, G, A, C, I, K, M, O, A, C, I, K, M, O
+# Each letter represents a 64-byte address range.
+
+# The [] indicate two different sets, and each set has four ways.
+# [set0way0, set0way1, set0way2, set0way3],
+# [set1way0, set1way1, set1way2, set1way3],
+# If you have a 512B cache with 4-way associativity,
+# and each cache line is 64B. With LRU replacement policy, you will observe:
+# m, m, m, m, h, h, m, m, m, m, m, m, m, m, m, m, where 'h' means hit and
+# 'm' means miss.
+
+# Explanation of this result:
+# A, C, E, G are misses, now the cache stores ([A*, C, E, G],[ , , ,]).
+# A is marked as the LRU address range.
+# A, C are hits, and the cache stores ([A, C, E*, G],[ , , ,]).
+# I searches for a victim and selects E. Now the cache stores ([A, C, I, G*],[ , , ,]).
+# K searches for a victim and selects G. Now the cache stores ([A*, C, I, K],[ , , ,]).
+# M searches for a victim and selects A. Now the cache stores ([M, C*, I, K],[ , , ,]).
+# O searches for a victim and selects C. Now the cache stores ([M, O, I*, K],[ , , ,]).
+# A searches for a victim and selects I. Now the cache stores ([M, O, A, K*],[ , , ,]).
+# C searches for a victim and selects K. Now the cache stores ([M*, O, A, C],[ , , ,]).
+# I searches for a victim and selects M. Now the cache stores ([I, O*, A, C],[ , , ,]).
+# K searches for a victim and selects O. Now the cache stores ([I, K, A*, C],[ , , ,]).
+# M searches for a victim and selects A. Now the cache stores ([I, K, M, C*],[ , , ,]).
+# O searches for a victim and selects C. Now the cache stores ([I*, K, M, O],[ , , ,]).
+
+from m5.objects.ReplacementPolicies import LRURP as rp
+
+
+def python_generator(generator):
+    yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 512, 575, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 640, 703, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 768, 831, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 896, 959, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 512, 575, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 640, 703, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 768, 831, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 896, 959, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(30000, 0, 0, 0, 30000, 30000, 100, 0)
+
+    yield generator.createExit(0)
diff --git a/tests/gem5/replacement-policies/traces/lru_test1_st.py b/tests/gem5/replacement-policies/traces/lru_test1_st.py
new file mode 100644
index 0000000000..20a3594102
--- /dev/null
+++ b/tests/gem5/replacement-policies/traces/lru_test1_st.py
@@ -0,0 +1,76 @@
+# Copyright (c) 2022 Jarvis Jia, Jing Qu, Matt Sinclair, & Mingyuan Xiang
+# All Rights Reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# This test is targeting stores
+# Access pattern: A, C, E, G, A, C, I, K, M, O, A, C, I, K, M, O
+# Each letter represents a 64-byte address range.
+
+# The [] indicate two different sets, and each set has four ways.
+# [set0way0, set0way1, set0way2, set0way3],
+# [set1way0, set1way1, set1way2, set1way3],
+# If you have a 512B cache with 4-way associativity,
+# and each cache line is 64B. With LRU replacement policy, you will observe:
+# m, m, m, m, h, h, m, m, m, m, m, m, m, m, m, m, where 'h' means hit and
+# 'm' means miss.
+
+# Explanation of this result:
+# A, C, E, G are misses, now the cache stores ([A*, C, E, G],[ , , ,]).
+# A is marked as the LRU address range.
+# A, C are hits, and the cache stores ([A, C, E*, G],[ , , ,]).
+# I searches for a victim and selects E. Now the cache stores ([A, C, I, G*],[ , , ,]).
+# K searches for a victim and selects G. Now the cache stores ([A*, C, I, K],[ , , ,]).
+# M searches for a victim and selects A. Now the cache stores ([M, C*, I, K],[ , , ,]).
+# O searches for a victim and selects C. Now the cache stores ([M, O, I*, K],[ , , ,]).
+# A searches for a victim and selects I. Now the cache stores ([M, O, A, K*],[ , , ,]).
+# C searches for a victim and selects K. Now the cache stores ([M*, O, A, C],[ , , ,]).
+# I searches for a victim and selects M. Now the cache stores ([I, O*, A, C],[ , , ,]).
+# K searches for a victim and selects O. Now the cache stores ([I, K, A*, C],[ , , ,]).
+# M searches for a victim and selects A. Now the cache stores ([I, K, M, C*],[ , , ,]).
+# O searches for a victim and selects C. Now the cache stores ([I*, K, M, O],[ , , ,]).
+
+from m5.objects.ReplacementPolicies import LRURP as rp
+
+
+def python_generator(generator):
+    yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 512, 575, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 640, 703, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 768, 831, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 896, 959, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 512, 575, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 640, 703, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 768, 831, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 896, 959, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(30000, 0, 0, 0, 30000, 30000, 0, 0)
+
+    yield generator.createExit(0)
diff --git a/tests/gem5/replacement-policies/traces/lru_test2_ld.py b/tests/gem5/replacement-policies/traces/lru_test2_ld.py
new file mode 100644
index 0000000000..24c8a54010
--- /dev/null
+++ b/tests/gem5/replacement-policies/traces/lru_test2_ld.py
@@ -0,0 +1,71 @@
+# Copyright (c) 2022 Jarvis Jia, Jing Qu, Matt Sinclair, & Mingyuan Xiang
+# All Rights Reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# This test is targeting loads.
+# Access pattern: A, C, E, G, A, C, I, K, M, O, E, G, E, G
+# Each letter represents a 64-byte address range.
+
+# The [] indicate two different sets, and each set has four ways.
+# [set0way0, set0way1, set0way2, set0way3],
+# [set1way0, set1way1, set1way2, set1way3],
+# If you have a 512B cache with 4-way associativity,
+# and each cache line is 64B. With LRU replacement policy, you will observe:
+# m, m, m, m, h, h, m, m, m, m, m, m, h, h, where 'h' means hit and 'm'
+# means miss.
+
+# Explanation of this result:
+# A, C, E, G are misses, now the cache stores ([A*, C, E, G],[ , , ,]).
+# A is marked as the LRU address range.
+# A, C are hits, and the cache stores ([A, C, E*, G],[ , , ,]).
+# I searches for a victim and selects E. Now the cache stores ([A, C, I, G*],[ , , ,]).
+# K searches for a victim and selects G. Now the cache stores (([A*, C, I, K],[ , , ,]).
+# M searches for a victim and selects A. Now the cache stores (([M, C*, I, K],[ , , ,]).
+# O searches for a victim and selects C. Now the cache stores (([M, O, I*, K],[ , , ,]).
+# E searches for a victim and selects I. Now the cache stores (([M, O, E, K*],[ , , ,]).
+# G searches for a victim and selects K. Now the cache stores (([M*, O, E, G],[ , , ,]).
+# E,G are hits.
+
+from m5.objects.ReplacementPolicies import LRURP as rp
+
+
+def python_generator(generator):
+    yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 512, 575, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 640, 703, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 768, 831, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 896, 959, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(30000, 0, 0, 0, 30000, 30000, 100, 0)
+
+    yield generator.createExit(0)
diff --git a/tests/gem5/replacement-policies/traces/lru_test2_st.py b/tests/gem5/replacement-policies/traces/lru_test2_st.py
new file mode 100644
index 0000000000..49dfdfb5db
--- /dev/null
+++ b/tests/gem5/replacement-policies/traces/lru_test2_st.py
@@ -0,0 +1,71 @@
+# Copyright (c) 2022 Jarvis Jia, Jing Qu, Matt Sinclair, & Mingyuan Xiang
+# All Rights Reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# This test is targeting stores
+# Access pattern: A, C, E, G, A, C, I, K, M, O, E, G, E, G
+# Each letter represents a 64-byte address range.
+
+# The [] indicate two different sets, and each set has four ways.
+# [set0way0, set0way1, set0way2, set0way3],
+# [set1way0, set1way1, set1way2, set1way3],
+# If you have a 512B cache with 4-way associativity,
+# and each cache line is 64B. With LRU replacement policy, you will observe:
+# m, m, m, m, h, h, m, m, m, m, m, m, h, h, where 'h' means hit and 'm'
+# means miss.
+
+# Explanation of this result:
+# A, C, E, G are misses, now the cache stores ([A*, C, E, G],[ , , ,]).
+# A is marked as the LRU address range.
+# A, C are hits, and the cache stores ([A, C, E*, G],[ , , ,]).
+# I searches for a victim and selects E. Now the cache stores ([A, C, I, G*],[ , , ,]).
+# K searches for a victim and selects G. Now the cache stores (([A*, C, I, K],[ , , ,]).
+# M searches for a victim and selects A. Now the cache stores (([M, C*, I, K],[ , , ,]).
+# O searches for a victim and selects C. Now the cache stores (([M, O, I*, K],[ , , ,]).
+# E searches for a victim and selects I. Now the cache stores (([M, O, E, K*],[ , , ,]).
+# G searches for a victim and selects K. Now the cache stores (([M*, O, E, G],[ , , ,]).
+# E,G are hits.
+
+from m5.objects.ReplacementPolicies import LRURP as rp
+
+
+def python_generator(generator):
+    yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 512, 575, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 640, 703, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 768, 831, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 896, 959, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(30000, 0, 0, 0, 30000, 30000, 0, 0)
+
+    yield generator.createExit(0)
diff --git a/tests/gem5/replacement-policies/traces/lru_test3_ld.py b/tests/gem5/replacement-policies/traces/lru_test3_ld.py
new file mode 100644
index 0000000000..da817e0272
--- /dev/null
+++ b/tests/gem5/replacement-policies/traces/lru_test3_ld.py
@@ -0,0 +1,62 @@
+# Copyright (c) 2022 Jarvis Jia, Jing Qu, Matt Sinclair, & Mingyuan Xiang
+# All Rights Reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# This test is targeting loads.
+# Access pattern: A, C, E, G, A, C, E, G, I, A
+# Each letter represents a 64-bit address range.
+
+# The [] indicate two different sets, and each set has four ways.
+# [set0way0, set0way1, set0way2, set0way3],
+# [set1way0, set1way1, set1way2, set1way3],
+# If you have a 512B cache with 4-way associativity, and each cache
+# line is 64B with LRU replacement policy, you will observe:
+# m, m, m, m, h, h, h, h, m, m, where 'h' means
+# hit and 'm' means miss.
+
+# Explanation of this result:
+# A, C, E, G are misses, now the cache stores ([A*, C, E, G],[ , , ,]).
+# A is marked as the LRU address range.
+# A, C, E, G then hits, and the cache stores ([A*, C, E, G],[ , , ,]).
+# I searches for a victim and selects A. Now the cache stores ([E, C*, I, G],[ , , ,]).
+# A searches for a victim and selects C. Now the cache stores ([E, A, I*, G],[ , , ,]).
+
+from m5.objects.ReplacementPolicies import LRURP as rp
+
+
+def python_generator(generator):
+    yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 512, 575, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(30000, 0, 0, 0, 30000, 30000, 100, 0)
+
+    yield generator.createExit(0)
diff --git a/tests/gem5/replacement-policies/traces/lru_test3_st.py b/tests/gem5/replacement-policies/traces/lru_test3_st.py
new file mode 100644
index 0000000000..57c7fbe020
--- /dev/null
+++ b/tests/gem5/replacement-policies/traces/lru_test3_st.py
@@ -0,0 +1,62 @@
+# Copyright (c) 2022 Jarvis Jia, Jing Qu, Matt Sinclair, & Mingyuan Xiang
+# All Rights Reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# This test is targeting stores
+# Access pattern: A, C, E, G, A, C, E, G, I, A
+# Each letter represents a 64-bit address range.
+
+# The [] indicate two different sets, and each set has four ways.
+# [set0way0, set0way1, set0way2, set0way3],
+# [set1way0, set1way1, set1way2, set1way3],
+# If you have a 512B cache with 4-way associativity, and each cache
+# line is 64B with LRU replacement policy, you will observe:
+# m, m, m, m, h, h, h, h, m, m, where 'h' means
+# hit and 'm' means miss.
+
+# Explanation of this result:
+# A, C, E, G are misses, now the cache stores ([A*, C, E, G],[ , , ,]).
+# A is marked as the LRU address range.
+# A, C, E, G then hits, and the cache stores ([A*, C, E, G],[ , , ,]).
+# I searches for a victim and selects A. Now the cache stores ([E, C*, I, G],[ , , ,]).
+# A searches for a victim and selects C. Now the cache stores ([E, A, I*, G],[ , , ,]).
+
+from m5.objects.ReplacementPolicies import LRURP as rp
+
+
+def python_generator(generator):
+    yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 512, 575, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(30000, 0, 0, 0, 30000, 30000, 0, 0)
+
+    yield generator.createExit(0)
diff --git a/tests/gem5/replacement-policies/traces/lru_test4_ld.py b/tests/gem5/replacement-policies/traces/lru_test4_ld.py
new file mode 100644
index 0000000000..c6624ed47d
--- /dev/null
+++ b/tests/gem5/replacement-policies/traces/lru_test4_ld.py
@@ -0,0 +1,63 @@
+# Copyright (c) 2022 Jarvis Jia, Jing Qu, Matt Sinclair, & Mingyuan Xiang
+# All Rights Reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# This test is targeting loads.
+# Access pattern: A, C, E, G, E, G, A, C, I, E
+# Each letter represents a 64-byte address range.
+
+# The [] indicate two different sets, and each set has four ways.
+# [set0way0, set0way1, set0way2, set0way3],
+# [set1way0, set1way1, set1way2, set1way3],
+# If you have a 512B cache with 4-way associativity, and each cache
+# line is 64B with LRU replacement policy, you will observe:
+# m, m, m, m, h, h, h, h, m, m, where 'h' means
+# hit and 'm' means miss.
+
+# Explanation of this result:
+# A, C, E, G are misses, now the cache stores ([A*, C, E, G],[ , , ,]).
+# A is marked as the LRU address range.
+# E and G then hits, and the cache stores ([A*, C, E, G],[ , , ,]).
+# A and C then hits, and the cache stores ([A, C, E*, G],[ , , ,]).
+# I searches for a victim and selects E. Now the cache stores ([A, C, I, G*],[ , , ,]).
+# E searches for a victim and selects G. Now the cache stores ([A*, C, I, E],[ , , ,]).
+
+from m5.objects.ReplacementPolicies import LRURP as rp
+
+
+def python_generator(generator):
+    yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 512, 575, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(30000, 0, 0, 0, 30000, 30000, 100, 0)
+
+    yield generator.createExit(0)
diff --git a/tests/gem5/replacement-policies/traces/lru_test4_st.py b/tests/gem5/replacement-policies/traces/lru_test4_st.py
new file mode 100644
index 0000000000..3b3c26b803
--- /dev/null
+++ b/tests/gem5/replacement-policies/traces/lru_test4_st.py
@@ -0,0 +1,63 @@
+# Copyright (c) 2022 Jarvis Jia, Jing Qu, Matt Sinclair, & Mingyuan Xiang
+# All Rights Reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# This test is targeting stores
+# Access pattern: A, C, E, G, E, G, A, C, I, E
+# Each letter represents a 64-byte address range.
+
+# The [] indicate two different sets, and each set has four ways.
+# [set0way0, set0way1, set0way2, set0way3],
+# [set1way0, set1way1, set1way2, set1way3],
+# If you have a 512B cache with 4-way associativity, and each cache
+# line is 64B with LRU replacement policy, you will observe:
+# m, m, m, m, h, h, h, h, m, m, where 'h' means
+# hit and 'm' means miss.
+
+# Explanation of this result:
+# A, C, E, G are misses, now the cache stores ([A*, C, E, G],[ , , ,]).
+# A is marked as the LRU address range.
+# E and G then hits, and the cache stores ([A*, C, E, G],[ , , ,]).
+# A and C then hits, and the cache stores ([A, C, E*, G],[ , , ,]).
+# I searches for a victim and selects E. Now the cache stores ([A, C, I, G*],[ , , ,]).
+# E searches for a victim and selects G. Now the cache stores ([A*, C, I, E],[ , , ,]).
+
+from m5.objects.ReplacementPolicies import LRURP as rp
+
+
+def python_generator(generator):
+    yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 512, 575, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(30000, 0, 0, 0, 30000, 30000, 0, 0)
+
+    yield generator.createExit(0)
diff --git a/tests/gem5/replacement-policies/traces/mru_test1_ld.py b/tests/gem5/replacement-policies/traces/mru_test1_ld.py
new file mode 100644
index 0000000000..d93695a3c1
--- /dev/null
+++ b/tests/gem5/replacement-policies/traces/mru_test1_ld.py
@@ -0,0 +1,63 @@
+# Copyright (c) 2022 Jarvis Jia, Jing Qu, Matt Sinclair, & Mingyuan Xiang
+# All Rights Reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# This test is targeting loads.
+# Access pattern: A, C, E, G, I, K, M, O, A, C,
+# Each letter represents a 64-byte address range.
+
+# The [] indicate two different sets, and each set has four ways.
+# [set0way0, set0way1, set0way2, set0way3],
+# [set1way0, set1way1, set1way2, set1way3],
+# If you have a 512B cache with 4-way associativity,
+# and each cache line is 64B. With MRU replacement policy, you will observe:
+# m, m, m, m, m, m, m, m, h, h, where 'h' means hit and 'm' means miss.
+
+# Explanation of this result:
+# A, C, E, G are misses, now the cache stores ([A, C, E, G*],[ , , ,]).
+# G is marked as the MRU address range.
+# I searches for a victim and selects G. Now the cache stores ([A, C, E, I*],[ , , ,]).
+# K searches for a victim and selects I. Now the cache stores ([A, C, E, K*],[ , , ,]).
+# M searches for a victim and selects K. Now the cache stores ([A, C, E, M*],[ , , ,]).
+# O searches for a victim and selects M. Now the cache stores ([A, C, E, O*],[ , , ,]).
+# A,C are hits.
+
+from m5.objects.ReplacementPolicies import MRURP as rp
+
+
+def python_generator(generator):
+    yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 512, 575, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 640, 703, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 768, 831, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 896, 959, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(30000, 0, 0, 0, 30000, 30000, 100, 0)
+
+    yield generator.createExit(0)
diff --git a/tests/gem5/replacement-policies/traces/mru_test1_st.py b/tests/gem5/replacement-policies/traces/mru_test1_st.py
new file mode 100644
index 0000000000..1b48ff6024
--- /dev/null
+++ b/tests/gem5/replacement-policies/traces/mru_test1_st.py
@@ -0,0 +1,63 @@
+# Copyright (c) 2022 Jarvis Jia, Jing Qu, Matt Sinclair, & Mingyuan Xiang
+# All Rights Reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# This test is targeting stores
+# Access pattern: A, C, E, G, I, K, M, O, A, C,
+# Each letter represents a 64-byte address range.
+
+# The [] indicate two different sets, and each set has four ways.
+# [set0way0, set0way1, set0way2, set0way3],
+# [set1way0, set1way1, set1way2, set1way3],
+# If you have a 512B cache with 4-way associativity,
+# and each cache line is 64B. With MRU replacement policy, you will observe:
+# m, m, m, m, m, m, m, m, h, h, where 'h' means hit and 'm' means miss.
+
+# Explanation of this result:
+# A, C, E, G are misses, now the cache stores ([A, C, E, G*],[ , , ,]).
+# G is marked as the MRU address range.
+# I searches for a victim and selects G. Now the cache stores ([A, C, E, I*],[ , , ,]).
+# K searches for a victim and selects I. Now the cache stores ([A, C, E, K*],[ , , ,]).
+# M searches for a victim and selects K. Now the cache stores ([A, C, E, M*],[ , , ,]).
+# O searches for a victim and selects M. Now the cache stores ([A, C, E, O*],[ , , ,]).
+# A,C are hits.
+
+from m5.objects.ReplacementPolicies import MRURP as rp
+
+
+def python_generator(generator):
+    yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 512, 575, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 640, 703, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 768, 831, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 896, 959, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(30000, 0, 0, 0, 30000, 30000, 0, 0)
+
+    yield generator.createExit(0)
diff --git a/tests/gem5/replacement-policies/traces/mru_test2_ld.py b/tests/gem5/replacement-policies/traces/mru_test2_ld.py
new file mode 100644
index 0000000000..4f5bcbcbd5
--- /dev/null
+++ b/tests/gem5/replacement-policies/traces/mru_test2_ld.py
@@ -0,0 +1,66 @@
+# Copyright (c) 2022 Jarvis Jia, Jing Qu, Matt Sinclair, & Mingyuan Xiang
+# All Rights Reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# This test is targeting loads.
+# Access pattern:  A, C, E, G, A, I, K, M, O, A, G
+# Each letter represents a 64-byte address range.
+
+# The [] indicate two different sets, and each set has four ways.
+# [set0way0, set0way1, set0way2, set0way3],
+# [set1way0, set1way1, set1way2, set1way3],
+# If you have a 512B cache with 4-way associativity,
+# and each cache line is 64B. With MRU replacement policy, you will observe:
+# m, m, m, m, h, m, m, m, m, m, h where 'h' means hit and 'm' means miss.
+
+# Explanation of this result:
+# A, C, E, G are misses, now the cache stores ([A, C, E, G*],[ , , ,]).
+# G is marked as the MRU address range.
+# A is a hit, now the cache stores ([A*, C, E, G],[ , , ,]).
+# I searches for a victim and selects A. Now the cache stores ([I*, C, E, G],[ , , ,]).
+# K searches for a victim and selects I. Now the cache stores ([K*, C, E, G],[ , , ,]).
+# M searches for a victim and selects K. Now the cache stores ([M*, C, E, G],[ , , ,]).
+# O searches for a victim and selects M. Now the cache stores ([O*, C, E, G],[ , , ,]).
+# A searches for a victim and selects O. Now the cache stores ([A*, C, E, G],[ , , ,]).
+# G is a hit.
+
+from m5.objects.ReplacementPolicies import MRURP as rp
+
+
+def python_generator(generator):
+    yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 512, 575, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 640, 703, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 768, 831, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 896, 959, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(30000, 0, 0, 0, 30000, 30000, 100, 0)
+
+    yield generator.createExit(0)
diff --git a/tests/gem5/replacement-policies/traces/mru_test2_st.py b/tests/gem5/replacement-policies/traces/mru_test2_st.py
new file mode 100644
index 0000000000..7ffbde09e2
--- /dev/null
+++ b/tests/gem5/replacement-policies/traces/mru_test2_st.py
@@ -0,0 +1,66 @@
+# Copyright (c) 2022 Jarvis Jia, Jing Qu, Matt Sinclair, & Mingyuan Xiang
+# All Rights Reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# This test is targeting stores.
+# Access pattern:  A, C, E, G, A, I, K, M, O, A, G
+# Each letter represents a 64-byte address range.
+
+# The [] indicate two different sets, and each set has four ways.
+# [set0way0, set0way1, set0way2, set0way3],
+# [set1way0, set1way1, set1way2, set1way3],
+# If you have a 512B cache with 4-way associativity,
+# and each cache line is 64B. With MRU replacement policy, you will observe:
+# m, m, m, m, h, m, m, m, m, m, h where 'h' means hit and 'm' means miss.
+
+# Explanation of this result:
+# A, C, E, G are misses, now the cache stores ([A, C, E, G*],[ , , ,]).
+# G is marked as the MRU address range.
+# A is a hit, now the cache stores ([A*, C, E, G],[ , , ,]).
+# I searches for a victim and selects A. Now the cache stores ([I*, C, E, G],[ , , ,]).
+# K searches for a victim and selects I. Now the cache stores ([K*, C, E, G],[ , , ,]).
+# M searches for a victim and selects K. Now the cache stores ([M*, C, E, G],[ , , ,]).
+# O searches for a victim and selects M. Now the cache stores ([O*, C, E, G],[ , , ,]).
+# A searches for a victim and selects O. Now the cache stores ([A*, C, E, G],[ , , ,]).
+# G is a hit.
+
+from m5.objects.ReplacementPolicies import MRURP as rp
+
+
+def python_generator(generator):
+    yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 512, 575, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 640, 703, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 768, 831, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 896, 959, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(30000, 0, 0, 0, 30000, 30000, 0, 0)
+
+    yield generator.createExit(0)
diff --git a/tests/gem5/replacement-policies/traces/nru_test1_ld.py b/tests/gem5/replacement-policies/traces/nru_test1_ld.py
new file mode 100644
index 0000000000..f3b89fcc7b
--- /dev/null
+++ b/tests/gem5/replacement-policies/traces/nru_test1_ld.py
@@ -0,0 +1,71 @@
+# Copyright (c) 2022 Jarvis Jia, Jing Qu, Matt Sinclair, & Mingyuan Xiang
+# All Rights Reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# This test is targeting loads.
+# Access pattern: A, C, E, G, A, I, A, E, K, E, G
+# Each letter represents a 64-byte address range.
+
+# The [] indicate two different sets, and each set has four ways.
+# [set0way0, set0way1, set0way2, set0way3],
+# [set1way0, set1way1, set1way2, set1way3],
+# If you have a 512B cache with 4-way associativity,
+# and each cache line is 64B.
+# This test can be used to test the correctness of NRU
+# replacement policy. The NRU replacement policy will always find
+# the victim block from the left side of a cache. More specifically,
+# with NRU replacement policy, you will observe:
+# m, m, m, m, h, m, m, h, m, h, m, where 'm' means miss, and 'h' means
+# hit.
+
+# Explanation of this result:
+# A, C, E, G are misses, now the cache stores ([A0, C0, E0, G0],[ , , ,]).
+# The number following each letter is the NRU bit for the address range.
+# A is a hit. Now the cache stores ([A0, C0, E0, G0],[ , , ,]).
+# I searches a victim and selects A. Now the cache stores ([I0, C1, E1, G1],[ , , ,]).
+# A searches a victim and selects C. Now the cache stores ([I0, A0, E1, G1],[ , , ,]).
+# E hits. Now the cache stores ([I0, A0, E0, G1],[ , , ,]).
+# K searches a victim and selects G. Now the cache stores ([I0, A0, E0, K0],[ , , ,]).
+# E hits. Now the cache stores ([I0, A0, E0, K0],[ , , ,]).
+# G searches a victim and selects I. Now the cache stores ([G0, A1, E1, K1],[ , , ,]).
+
+from m5.objects.ReplacementPolicies import NRURP as rp
+
+
+def python_generator(generator):
+    yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 512, 575, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 640, 703, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(30000, 0, 0, 0, 30000, 30000, 100, 0)
+
+    yield generator.createExit(0)
diff --git a/tests/gem5/replacement-policies/traces/nru_test1_st.py b/tests/gem5/replacement-policies/traces/nru_test1_st.py
new file mode 100644
index 0000000000..c5b8738c83
--- /dev/null
+++ b/tests/gem5/replacement-policies/traces/nru_test1_st.py
@@ -0,0 +1,71 @@
+# Copyright (c) 2022 Jarvis Jia, Jing Qu, Matt Sinclair, & Mingyuan Xiang
+# All Rights Reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# This test is targeting stores
+# Access pattern: A, C, E, G, A, I, A, E, K, E, G
+# Each letter represents a 64-byte address range.
+
+# The [] indicate two different sets, and each set has four ways.
+# [set0way0, set0way1, set0way2, set0way3],
+# [set1way0, set1way1, set1way2, set1way3],
+# If you have a 512B cache with 4-way associativity,
+# and each cache line is 64B.
+# This test can be used to test the correctness of NRU
+# replacement policy. The NRU replacement policy will always find
+# the victim block from the left side of a cache. More specifically,
+# with NRU replacement policy, you will observe:
+# m, m, m, m, h, m, m, h, m, h, m, where 'm' means miss, and 'h' means
+# hit.
+
+# Explanation of this result:
+# A, C, E, G are misses, now the cache stores ([A0, C0, E0, G0],[ , , ,]).
+# The number following each letter is the NRU bit for the address range.
+# A is a hit. Now the cache stores ([A0, C0, E0, G0],[ , , ,]).
+# I searches a victim and selects A. Now the cache stores ([I0, C1, E1, G1],[ , , ,]).
+# A searches a victim and selects C. Now the cache stores ([I0, A0, E1, G1],[ , , ,]).
+# E hits. Now the cache stores ([I0, A0, E0, G1],[ , , ,]).
+# K searches a victim and selects G. Now the cache stores ([I0, A0, E0, K0],[ , , ,]).
+# E hits. Now the cache stores ([I0, A0, E0, K0],[ , , ,]).
+# G searches a victim and selects I. Now the cache stores ([G0, A1, E1, K1],[ , , ,]).
+
+from m5.objects.ReplacementPolicies import NRURP as rp
+
+
+def python_generator(generator):
+    yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 512, 575, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 640, 703, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(30000, 0, 0, 0, 30000, 30000, 0, 0)
+
+    yield generator.createExit(0)
diff --git a/tests/gem5/replacement-policies/traces/rrip_test1_ld.py b/tests/gem5/replacement-policies/traces/rrip_test1_ld.py
new file mode 100644
index 0000000000..e6a674c6d8
--- /dev/null
+++ b/tests/gem5/replacement-policies/traces/rrip_test1_ld.py
@@ -0,0 +1,67 @@
+# Copyright (c) 2022 Jarvis Jia, Jing Qu, Matt Sinclair, & Mingyuan Xiang
+# All Rights Reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# This test is targeting loads.
+# Access pattern: A, C, E, G, A, I, K, M, C, A
+# Each letter represents a 64-byte address range.
+
+# The [] indicate two different sets, and each set has four ways.
+# [set0way0, set0way1, set0way2, set0way3],
+# [set1way0, set1way1, set1way2, set1way3],
+# If you have a 512B cache with 4-way associativity,
+# and each cache line is 64B. This test can be used to test the correctness of RRIP
+# replacement policy. you will observe: # m, m, m, m, h, m, m, m, m, m.
+# where 'm' means miss, and 'h' means hit.
+
+# Explanation of this result:
+# A, C, E, G are misses, now the cache stores ([A2, C2, E2, G2],[ , , ,]).
+# The number following each letter is the RRPV for that address range.
+# A is a hit, now the cache stores ([A1, C2, E2, G2],[ , , ,]).
+# I searches for a victim and selects the highest RRPV C, since C
+# is not saturated, A, E and G will be increased by 1,
+# which it stores ([A2, I2, E3, G3],[ , , ,]).
+# K searches for a victim and selects E. Now it stores ([A2, I2, K2, G3],[ , , ,]).
+# M searches for a victim and selects G. Now it stores ([A2, I2, K2, M2],[ , , ,]).
+# C searches for a victim and selects A. Now it stores ([C2, I3, K3, M3],[ , , ,]).
+# A searches for a victim and selects I. Now it stores ([C2, A2, K3, M3],[ , , ,]).
+
+from m5.objects.ReplacementPolicies import RRIPRP as rp
+
+
+def python_generator(generator):
+    yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 512, 575, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 640, 703, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 768, 831, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(30000, 0, 0, 0, 30000, 30000, 100, 0)
+
+    yield generator.createExit(0)
diff --git a/tests/gem5/replacement-policies/traces/rrip_test1_st.py b/tests/gem5/replacement-policies/traces/rrip_test1_st.py
new file mode 100644
index 0000000000..702e2a1bc7
--- /dev/null
+++ b/tests/gem5/replacement-policies/traces/rrip_test1_st.py
@@ -0,0 +1,67 @@
+# Copyright (c) 2022 Jarvis Jia, Jing Qu, Matt Sinclair, & Mingyuan Xiang
+# All Rights Reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# This test is targeting stores
+# Access pattern: A, C, E, G, A, I, K, M, C, A
+# Each letter represents a 64-byte address range.
+
+# The [] indicate two different sets, and each set has four ways.
+# [set0way0, set0way1, set0way2, set0way3],
+# [set1way0, set1way1, set1way2, set1way3],
+# If you have a 512B cache with 4-way associativity,
+# and each cache line is 64B. This test can be used to test the correctness of RRIP
+# replacement policy. you will observe: # m, m, m, m, h, m, m, m, m, m.
+# where 'm' means miss, and 'h' means hit.
+
+# Explanation of this result:
+# A, C, E, G are misses, now the cache stores ([A2, C2, E2, G2],[ , , ,]).
+# The number following each letter is the RRPV for that address range.
+# A is a hit, now the cache stores ([A1, C2, E2, G2],[ , , ,]).
+# I searches for a victim and selects the highest RRPV C, since C
+# is not saturated, A, E and G will be increased by 1,
+# which it stores ([A2, I2, E3, G3],[ , , ,]).
+# K searches for a victim and selects E. Now it stores ([A2, I2, K2, G3],[ , , ,]).
+# M searches for a victim and selects G. Now it stores ([A2, I2, K2, M2],[ , , ,]).
+# C searches for a victim and selects A. Now it stores ([C2, I3, K3, M3],[ , , ,]).
+# A searches for a victim and selects I. Now it stores ([C2, A2, K3, M3],[ , , ,]).
+
+from m5.objects.ReplacementPolicies import RRIPRP as rp
+
+
+def python_generator(generator):
+    yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 512, 575, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 640, 703, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 768, 831, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(30000, 0, 0, 0, 30000, 30000, 0, 0)
+
+    yield generator.createExit(0)
diff --git a/tests/gem5/replacement-policies/traces/rrip_test2_ld.py b/tests/gem5/replacement-policies/traces/rrip_test2_ld.py
new file mode 100644
index 0000000000..b9f2ee026e
--- /dev/null
+++ b/tests/gem5/replacement-policies/traces/rrip_test2_ld.py
@@ -0,0 +1,71 @@
+# Copyright (c) 2022 Jarvis Jia, Jing Qu, Matt Sinclair, & Mingyuan Xiang
+# All Rights Reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+# This test is targeting loads.
+# Access pattern: A, A, A, C, C, E, E, G, I, K, M, O, A
+# Each letter represents a 64-byte address range.
+
+# The [] indicate two different sets, and each set has four ways.
+# [set0way0, set0way1, set0way2, set0way3],
+# [set1way0, set1way1, set1way2, set1way3],
+# If you have a 512B cache with 4-way associativity,
+# and each cache line is 64B. This test can be used to test the correctness
+# of RRIP replacement policy. More specifically, with RRIP replacement
+# policy, you will observe: m, h, h, m, h, m, h, m, m, m, m, m, h
+# where 'm' means miss, and 'h' means hit.
+
+# Explanation of this result:
+# After three A access, two C accesses, 2 E accesses and one G access,
+# the cache stores ([A0, C1, E1, G2],[ , , ,]).
+# The number following each letter is the RRPV for that address range.
+# I searches a victim and selects G. Now the cache stores ([A1, C2, E2, I2],[ , , ,]).
+# K searches a victim and selects C. Now the cache stores ([A2, K2, E3, I3],[ , , ,]).
+# M searches a victim and selects E. Now the cache stores ([A2, K2, M2, I3],[ , , ,]).
+# O searches a victim and selects I. NOW the cache stores ([A2, K2, M2, O2],[ , , ,]).
+# A hits.
+
+from m5.objects.ReplacementPolicies import RRIPRP as rp
+
+
+def python_generator(generator):
+
+    yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 512, 575, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 640, 703, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 768, 831, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 896, 959, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(30000, 0, 0, 0, 30000, 30000, 100, 0)
+
+    yield generator.createExit(0)
diff --git a/tests/gem5/replacement-policies/traces/rrip_test2_st.py b/tests/gem5/replacement-policies/traces/rrip_test2_st.py
new file mode 100644
index 0000000000..be23756a95
--- /dev/null
+++ b/tests/gem5/replacement-policies/traces/rrip_test2_st.py
@@ -0,0 +1,71 @@
+# Copyright (c) 2022 Jarvis Jia, Jing Qu, Matt Sinclair, & Mingyuan Xiang
+# All Rights Reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+# This test is targeting stores
+# Access pattern: A, A, A, C, C, E, E, G, I, K, M, O, A
+# Each letter represents a 64-byte address range.
+
+# The [] indicate two different sets, and each set has four ways.
+# [set0way0, set0way1, set0way2, set0way3],
+# [set1way0, set1way1, set1way2, set1way3],
+# If you have a 512B cache with 4-way associativity,
+# and each cache line is 64B. This test can be used to test the correctness
+# of RRIP replacement policy. More specifically, with RRIP replacement
+# policy, you will observe: m, h, h, m, h, m, h, m, m, m, m, m, h
+# where 'm' means miss, and 'h' means hit.
+
+# Explanation of this result:
+# After three A access, two C accesses, 2 E accesses and one G access,
+# the cache stores ([A0, C1, E1, G2],[ , , ,]).
+# The number following each letter is the RRPV for that address range.
+# I searches a victim and selects G. Now the cache stores ([A1, C2, E2, I2],[ , , ,]).
+# K searches a victim and selects C. Now the cache stores ([A2, K2, E3, I3],[ , , ,]).
+# M searches a victim and selects E. Now the cache stores ([A2, K2, M2, I3],[ , , ,]).
+# O searches a victim and selects I. NOW the cache stores ([A2, K2, M2, O2],[ , , ,]).
+# A hits.
+
+from m5.objects.ReplacementPolicies import RRIPRP as rp
+
+
+def python_generator(generator):
+
+    yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 512, 575, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 640, 703, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 768, 831, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 896, 959, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(30000, 0, 0, 0, 30000, 30000, 0, 0)
+
+    yield generator.createExit(0)
diff --git a/tests/gem5/replacement-policies/traces/second_chance_test1_ld.py b/tests/gem5/replacement-policies/traces/second_chance_test1_ld.py
new file mode 100644
index 0000000000..2c527d84c6
--- /dev/null
+++ b/tests/gem5/replacement-policies/traces/second_chance_test1_ld.py
@@ -0,0 +1,65 @@
+# Copyright (c) 2022 Jarvis Jia, Jing Qu, Matt Sinclair, & Mingyuan Xiang
+# All Rights Reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# This test is targeting loads.
+# Access pattern: A, C, E, G, A, C, I, K, A, C
+# Each letter represents a 64-byte address range.
+
+# The [] indicate two different sets, and each set has four ways.
+# [set0way0, set0way1, set0way2, set0way3],
+# [set1way0, set1way1, set1way2, set1way3],
+# If you have a 512B cache with 4-way associativity,
+# and each cache line is 64B. This test can be used to test the correctness of Second Chance
+# replacement policy. The Second Chance replacement policy will keep the block
+# 'A' and 'C' in the cache because of the second chance bit. More specifically,
+# with Second Chance replacement policy, you will observe:
+# m, m, m, m, h, h, m, m, h, h, where 'm' means miss, and 'h' means hit.
+
+# Explanation of the result:
+# The number after each letter is the second chance bit, which would be set after a re-reference.
+# A, C, E, G are misses. The cache stores ([A0, C0, E0, G0],[ , , ,]).
+# A, C are hit. Now the cache stores ([A1, C1, E0, G0],[ , , ,]).
+# I searches a victim and selects E. Now the cache stores ([A0, C0, I0, G0],[ , , ,]).
+# K searches a victim and selects G. Now the cache stores ([A0, C0, I0, K0],[ , , ,]).
+# A, C are hits.
+
+from m5.objects.ReplacementPolicies import SecondChanceRP as rp
+
+
+def python_generator(generator):
+    yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 512, 575, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 640, 703, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(30000, 0, 0, 0, 30000, 30000, 100, 0)
+
+    yield generator.createExit(0)
diff --git a/tests/gem5/replacement-policies/traces/second_chance_test1_st.py b/tests/gem5/replacement-policies/traces/second_chance_test1_st.py
new file mode 100644
index 0000000000..bf04697be5
--- /dev/null
+++ b/tests/gem5/replacement-policies/traces/second_chance_test1_st.py
@@ -0,0 +1,65 @@
+# Copyright (c) 2022 Jarvis Jia, Jing Qu, Matt Sinclair, & Mingyuan Xiang
+# All Rights Reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# This test is targeting stores
+# Access pattern: A, C, E, G, A, C, I, K, A, C
+# Each letter represents a 64-byte address range.
+
+# The [] indicate two different sets, and each set has four ways.
+# [set0way0, set0way1, set0way2, set0way3],
+# [set1way0, set1way1, set1way2, set1way3],
+# If you have a 512B cache with 4-way associativity,
+# and each cache line is 64B. This test can be used to test the correctness of Second Chance
+# replacement policy. The Second Chance replacement policy will keep the block
+# 'A' and 'C'in the cache because of the second chance bit. More specifically,
+# with Second Chance replacement policy, you will observe:
+# m, m, m, m, h, h, m, m, h, h, where 'm' means miss, and 'h' means hit.
+
+# Explanation of the result:
+# The number after each letter is the second chance bit, which would be set after a re-reference.
+# A, C, E, G are misses. The cache stores ([A0, C0, E0, G0],[ , , ,]).
+# A, C are hit. Now the cache stores ([A1, C1, E0, G0],[ , , ,]).
+# I searches a victim and selects E. Now the cache stores ([A0, C0, I0, G0],[ , , ,]).
+# K searches a victim and selects G. Now the cache stores ([A0, C0, I0, K0],[ , , ,]).
+# A, C are hits.
+
+from m5.objects.ReplacementPolicies import SecondChanceRP as rp
+
+
+def python_generator(generator):
+    yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 512, 575, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 640, 703, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(30000, 0, 0, 0, 30000, 30000, 0, 0)
+
+    yield generator.createExit(0)
diff --git a/tests/gem5/replacement-policies/traces/second_chance_test2_ld.py b/tests/gem5/replacement-policies/traces/second_chance_test2_ld.py
new file mode 100644
index 0000000000..d187cbec3f
--- /dev/null
+++ b/tests/gem5/replacement-policies/traces/second_chance_test2_ld.py
@@ -0,0 +1,71 @@
+# Copyright (c) 2022 Jarvis Jia, Jing Qu, Matt Sinclair, & Mingyuan Xiang
+# All Rights Reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# This test is targeting loads.
+# access pattern: A, C, E, G, A, C, E, G, I, A, C, E, G
+# Each letter represents a 64-byte address range.
+
+# The [] indicate two different sets, and each set has four ways.
+# [set0way0, set0way1, set0way2, set0way3],
+# [set1way0, set1way1, set1way2, set1way3],
+# If you have a 512B cache with 4-way associativity,
+# and each cache line is 64B. This test can be used to test the correctness of Second Chance
+# replacement policy. The Second Chance replacement policy will keep the block
+# 'A' and 'C' in the cache because of the second chance bit. More specifically,
+# with Second Chance replacement policy, you will observe:
+# m, m, m, m, h, h, h, h, m, m, m, m, m where 'm' means miss, and 'h' means hit.
+
+# Explanation of the result:
+# A, C, E, G are misses. The cache stores ([A0, C0, E0, G0],[ , , ,]).
+# The number after each letter is the second chance bit, which would be set after a re-reference.
+# A, C, E, G are hit. Now the cache stores ([A1, C1, E1, G1],[ , , ,]).
+# I searches a victim and selects A. Now the cache stores ([I0, C0, E0, G0],[ , , ,]).
+# A searches a victim and selects C. Now the cache stores ([I0, A0, E0, G0],[ , , ,]).
+# C searches a victim and selects E. Now the cache stores ([I0, A0, C0, G0],[ , , ,]).
+# E searches a victim and selects G. Now the cache stores ([I0, A0, C0, E0],[ , , ,]).
+# G searches a victim and selects I. Now the cache stores ([G0, A0, C0, E0],[ , , ,]).
+
+from m5.objects.ReplacementPolicies import SecondChanceRP as rp
+
+
+def python_generator(generator):
+
+    yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 512, 575, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(30000, 0, 0, 0, 30000, 30000, 100, 0)
+
+    yield generator.createExit(0)
diff --git a/tests/gem5/replacement-policies/traces/second_chance_test2_st.py b/tests/gem5/replacement-policies/traces/second_chance_test2_st.py
new file mode 100644
index 0000000000..477f31d43c
--- /dev/null
+++ b/tests/gem5/replacement-policies/traces/second_chance_test2_st.py
@@ -0,0 +1,71 @@
+# Copyright (c) 2022 Jarvis Jia, Jing Qu, Matt Sinclair, & Mingyuan Xiang
+# All Rights Reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# This test is targeting stores.
+# access pattern: A, C, E, G, A, C, E, G, I, A, C, E, G
+# Each letter represents a 64-byte address range.
+
+# The [] indicate two different sets, and each set has four ways.
+# [set0way0, set0way1, set0way2, set0way3],
+# [set1way0, set1way1, set1way2, set1way3],
+# If you have a 512B cache with 4-way associativity,
+# and each cache line is 64B. This test can be used to test the correctness of Second Chance
+# replacement policy. The Second Chance replacement policy will keep the block
+# 'A' and 'C' in the cache because of the second chance bit. More specifically,
+# with Second Chance replacement policy, you will observe:
+# m, m, m, m, h, h, h, h, m, m, m, m, m where 'm' means miss, and 'h' means hit.
+
+# Explanation of the result:
+# A, C, E, G are misses. The cache stores ([A0, C0, E0, G0],[ , , ,]).
+# The number after each letter is the second chance bit, which would be set after a re-reference.
+# A, C, E, G are hit. Now the cache stores ([A1, C1, E1, G1],[ , , ,]).
+# I searches a victim and selects A. Now the cache stores ([I0, C0, E0, G0],[ , , ,]).
+# A searches a victim and selects C. Now the cache stores ([I0, A0, E0, G0],[ , , ,]).
+# C searches a victim and selects E. Now the cache stores ([I0, A0, C0, G0],[ , , ,]).
+# E searches a victim and selects G. Now the cache stores ([I0, A0, C0, E0],[ , , ,]).
+# G searches a victim and selects I. Now the cache stores ([G0, A0, C0, E0],[ , , ,]).
+
+
+from m5.objects.ReplacementPolicies import SecondChanceRP as rp
+
+
+def python_generator(generator):
+    yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 512, 575, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(30000, 0, 0, 0, 30000, 30000, 0, 0)
+
+    yield generator.createExit(0)
diff --git a/tests/gem5/replacement-policies/traces/second_chance_test3_ld.py b/tests/gem5/replacement-policies/traces/second_chance_test3_ld.py
new file mode 100644
index 0000000000..ee50de6747
--- /dev/null
+++ b/tests/gem5/replacement-policies/traces/second_chance_test3_ld.py
@@ -0,0 +1,75 @@
+# Copyright (c) 2022 Jarvis Jia, Jing Qu, Matt Sinclair, & Mingyuan Xiang
+# All Rights Reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# This test is targeting loads.
+# access pattern: A, C, E, G, A, C, E, G, E, I, A, C, G, E
+# Each letter represents a 64-byte address range.
+
+# The [] indicate two different sets, and each set has four ways.
+# [set0way0, set0way1, set0way2, set0way3],
+# [set1way0, set1way1, set1way2, set1way3],
+# If you have a 512B cache with 4-way associativity,
+# and each cache line is 64B. This test can be used to test the correctness of Second Chance
+# replacement policy. The Second Chance replacement policy will keep the block
+# 'A' and 'C' in the cache because of the second chance bit. More specifically,
+# with Second Chance replacement policy, you will observe:
+# m, m, m, m, h, h, h, h, h, m, m, m, h, m, h where 'm' means miss, and 'h' means hit.
+
+# Explanation of the result:
+# A, C, E, G are misses. The cache stores ([A0, C0, E0, G0],[ , , ,]).
+# The number after each letter is the second chance bit, which would be set after a re-reference.
+# A, C, E, G are hit. Now the cache stores ([A1, C1, E1, G1],[ , , ,]).
+# E hits.
+# I searches a victim and selects A. Now the cache stores ([I0, C0, E0, G0],[ , , ,]).
+# A searches a victim and selects C. Now the cache stores ([I0, A0, E0, G0],[ , , ,]).
+# C searches a victim and selects E. Now the cache stores ([I0, A0, C0, G0],[ , , ,]).
+# G hits.
+# E searches a victim and selects I. Now the cache stores ([E0, A0, C0, G0],[ , , ,]).
+# G hits
+
+
+from m5.objects.ReplacementPolicies import SecondChanceRP as rp
+
+
+def python_generator(generator):
+    yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 512, 575, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(30000, 0, 0, 0, 30000, 30000, 100, 0)
+
+    yield generator.createExit(0)
diff --git a/tests/gem5/replacement-policies/traces/second_chance_test3_st.py b/tests/gem5/replacement-policies/traces/second_chance_test3_st.py
new file mode 100644
index 0000000000..53dcbffe89
--- /dev/null
+++ b/tests/gem5/replacement-policies/traces/second_chance_test3_st.py
@@ -0,0 +1,75 @@
+# Copyright (c) 2022 Jarvis Jia, Jing Qu, Matt Sinclair, & Mingyuan Xiang
+# All Rights Reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# This test is targeting stores.
+# access pattern: A, C, E, G, A, C, E, G, E, I, A, C, G, E
+# Each letter represents a 64-byte address range.
+
+# The [] indicate two different sets, and each set has four ways.
+# [set0way0, set0way1, set0way2, set0way3],
+# [set1way0, set1way1, set1way2, set1way3],
+# If you have a 512B cache with 4-way associativity,
+# and each cache line is 64B. This test can be used to test the correctness of Second Chance
+# replacement policy. The Second Chance replacement policy will keep the block
+# 'A' and 'C' in the cache because of the second chance bit. More specifically,
+# with Second Chance replacement policy, you will observe:
+# m, m, m, m, h, h, h, h, h, m, m, m, h, m, h where 'm' means miss, and 'h' means hit.
+
+# Explanation of the result:
+# A, C, E, G are misses. The cache stores ([A0, C0, E0, G0],[ , , ,]).
+# The number after each letter is the second chance bit, which would be set after a re-reference.
+# A, C, E, G are hit. Now the cache stores ([A1, C1, E1, G1],[ , , ,]).
+# E hits.
+# I searches a victim and selects A. Now the cache stores ([I0, C0, E0, G0],[ , , ,]).
+# A searches a victim and selects C. Now the cache stores ([I0, A0, E0, G0],[ , , ,]).
+# C searches a victim and selects E. Now the cache stores ([I0, A0, C0, G0],[ , , ,]).
+# G hits.
+# E searches a victim and selects I. Now the cache stores ([E0, A0, C0, G0],[ , , ,]).
+# G hits
+
+from m5.objects.ReplacementPolicies import SecondChanceRP as rp
+
+
+def python_generator(generator):
+
+    yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 512, 575, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(30000, 0, 0, 0, 30000, 30000, 0, 0)
+
+    yield generator.createExit(0)
diff --git a/tests/gem5/replacement-policies/traces/tree_plru_test1_ld.py b/tests/gem5/replacement-policies/traces/tree_plru_test1_ld.py
new file mode 100644
index 0000000000..419ce019fb
--- /dev/null
+++ b/tests/gem5/replacement-policies/traces/tree_plru_test1_ld.py
@@ -0,0 +1,65 @@
+# Copyright (c) 2022 Jarvis Jia, Jing Qu, Matt Sinclair, & Mingyuan Xiang
+# All Rights Reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# This test is targeting loads.
+# Access pattern: A, C, E, G, A, I, K, M, O, A
+# Each letter represents a 64-byte address range.
+
+# The [] indicate two different sets, and each set has four ways.
+# [set0way0, set0way1, set0way2, set0way3],
+# [set1way0, set1way1, set1way2, set1way3],
+# If you have a 512B cache with 4-way associativity,
+# and each cache line is 64B with TreePLRU replacement policy,
+# you will observe: m, m, m, m, h, m, m, m, m, m,
+# where 'm' means miss, and 'h' means hit.
+
+# Explanation of this result:
+# A, C, E, G are misses, now the cache stores ([A*, C, E, G],[ , , ,]).
+# and A is the next one to get replaced.
+# A hits
+# I searches for a victim and selects E. Now the cache stores ([A, C*, I, G],[ , , ,]).
+# K searches for a victim and selects C. Now the cache stores ([A, K, I, G*],[ , , ,]).
+# M searches for a victim and selects G. Now the cache stores ([A*, K, I, M],[ , , ,]).
+# O searches for a victim and selects A. Now the cache stores ([O, K, I*, M],[ , , ,]).
+# A searches for a victim and selects I. Now the cache stores ([O, K*, A, M],[ , , ,]).
+
+from m5.objects.ReplacementPolicies import TreePLRURP as rp
+
+
+def python_generator(generator):
+    yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 512, 575, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 640, 703, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 768, 831, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 896, 959, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(30000, 0, 0, 0, 30000, 30000, 100, 0)
+
+    yield generator.createExit(0)
diff --git a/tests/gem5/replacement-policies/traces/tree_plru_test1_st.py b/tests/gem5/replacement-policies/traces/tree_plru_test1_st.py
new file mode 100644
index 0000000000..8f677bef0a
--- /dev/null
+++ b/tests/gem5/replacement-policies/traces/tree_plru_test1_st.py
@@ -0,0 +1,65 @@
+# Copyright (c) 2022 Jarvis Jia, Jing Qu, Matt Sinclair, & Mingyuan Xiang
+# All Rights Reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# This test is targeting stores
+# Access pattern: A, C, E, G, A, I, K, M, O, A
+# Each letter represents a 64-byte address range.
+
+# The [] indicate two different sets, and each set has four ways.
+# [set0way0, set0way1, set0way2, set0way3],
+# [set1way0, set1way1, set1way2, set1way3],
+# If you have a 512B cache with 4-way associativity,
+# and each cache line is 64B with TreePLRU replacement policy,
+# you will observe: m, m, m, m, h, m, m, m, m, m,
+# where 'm' means miss, and 'h' means hit.
+
+# Explanation of this result:
+# A, C, E, G are misses, now the cache stores ([A*, C, E, G],[ , , ,]).
+# and A is the next one to get replaced.
+# A hits
+# I searches for a victim and selects E. Now the cache stores ([A, C*, I, G],[ , , ,]).
+# K searches for a victim and selects C. Now the cache stores ([A, K, I, G*],[ , , ,]).
+# M searches for a victim and selects G. Now the cache stores ([A*, K, I, M],[ , , ,]).
+# O searches for a victim and selects A. Now the cache stores ([O, K, I*, M],[ , , ,]).
+# A searches for a victim and selects I. Now the cache stores ([O, K*, A, M],[ , , ,]).
+
+from m5.objects.ReplacementPolicies import TreePLRURP as rp
+
+
+def python_generator(generator):
+    yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 512, 575, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 640, 703, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 768, 831, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 896, 959, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(30000, 0, 0, 0, 30000, 30000, 0, 0)
+
+    yield generator.createExit(0)
diff --git a/tests/gem5/replacement-policies/traces/tree_plru_test2_ld.py b/tests/gem5/replacement-policies/traces/tree_plru_test2_ld.py
new file mode 100644
index 0000000000..6793cbee9d
--- /dev/null
+++ b/tests/gem5/replacement-policies/traces/tree_plru_test2_ld.py
@@ -0,0 +1,61 @@
+# Copyright (c) 2022 Jarvis Jia, Jing Qu, Matt Sinclair, & Mingyuan Xiang
+# All Rights Reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# This test is targeting loads.
+# Access pattern: A, C, E, G, E, I, K, E
+# Each letter represents a 64-byte address range.
+
+# The [] indicate two different sets, and each set has four ways.
+# [set0way0, set0way1, set0way2, set0way3],
+# [set1way0, set1way1, set1way2, set1way3],
+# If you have a 512B cache with 4-way associativity,
+# and each cache line is 64B with TreePLRU replacement policy,
+# you will observe: m, m, m, m, h, m, m, h,
+# where 'm' means miss, and 'h' means hit.
+
+# Explanation of this result:
+# A, C, E, G are misses, now the cache stores ([A*, C, E, G],[ , , ,]).
+# and A is the next one to get replaced.
+# E hits
+# I searches for a victim and selects A. Now the cache stores ([I, C, E, G*],[ , , ,]).
+# K searches for a victim and selects G. Now the cache stores ([I, C*, E, K],[ , , ,]).
+# E hits
+
+from m5.objects.ReplacementPolicies import TreePLRURP as rp
+
+
+def python_generator(generator):
+    yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 512, 575, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 640, 703, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(30000, 0, 0, 0, 30000, 30000, 100, 0)
+
+    yield generator.createExit(0)
diff --git a/tests/gem5/replacement-policies/traces/tree_plru_test2_st.py b/tests/gem5/replacement-policies/traces/tree_plru_test2_st.py
new file mode 100644
index 0000000000..ea4332897c
--- /dev/null
+++ b/tests/gem5/replacement-policies/traces/tree_plru_test2_st.py
@@ -0,0 +1,61 @@
+# Copyright (c) 2022 Jarvis Jia, Jing Qu, Matt Sinclair, & Mingyuan Xiang
+# All Rights Reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# This test is targeting stores
+# Access pattern: A, C, E, G, E, I, K, E
+# Each letter represents a 64-byte address range.
+
+# The [] indicate two different sets, and each set has four ways.
+# [set0way0, set0way1, set0way2, set0way3],
+# [set1way0, set1way1, set1way2, set1way3],
+# If you have a 512B cache with 4-way associativity,
+# and each cache line is 64B with TreePLRU replacement policy,
+# you will observe: m, m, m, m, h, m, m, h,
+# where 'm' means miss, and 'h' means hit.
+
+# Explanation of this result:
+# A, C, E, G are misses, now the cache stores ([A*, C, E, G],[ , , ,]).
+# and A is the next one to get replaced.
+# E hits
+# I searches for a victim and selects A. Now the cache stores ([I, C, E, G*],[ , , ,]).
+# K searches for a victim and selects G. Now the cache stores ([I, C*, E, K],[ , , ,]).
+# E hits
+
+from m5.objects.ReplacementPolicies import TreePLRURP as rp
+
+
+def python_generator(generator):
+    yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 512, 575, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 640, 703, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(30000, 0, 0, 0, 30000, 30000, 0, 0)
+
+    yield generator.createExit(0)
diff --git a/tests/gem5/replacement-policies/traces/tree_plru_test3_ld.py b/tests/gem5/replacement-policies/traces/tree_plru_test3_ld.py
new file mode 100644
index 0000000000..f358bb0ce7
--- /dev/null
+++ b/tests/gem5/replacement-policies/traces/tree_plru_test3_ld.py
@@ -0,0 +1,67 @@
+# Copyright (c) 2022 Jarvis Jia, Jing Qu, Matt Sinclair, & Mingyuan Xiang
+# All Rights Reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# This test is targeting loads.
+# Access pattern: A, C, E, G, E, I, K, C, E, A, C
+# Each letter represents a 64-byte address range.
+
+# The [] indicate two different sets, and each set has four ways.
+# [set0way0, set0way1, set0way2, set0way3],
+# [set1way0, set1way1, set1way2, set1way3],
+# If you have a 512B cache with 4-way associativity,
+# and each cache line is 64B with TreePLRU replacement policy,
+# you will observe: m, m, m, m, h, m, m, h, h
+# where 'm' means miss, and 'h' means hit.
+
+# Explanation of this result:
+# A, C, E, G are misses, now the cache stores ([A*, C, E, G],[ , , ,]).
+# and A is the next one to get replaced.
+# E hits
+# I searches for a victim and selects A. Now the cache stores ([I, C, E, G*],[ , , ,]).
+# K searches for a victim and selects G. Now the cache stores ([I, C*, E, K],[ , , ,]).
+# C hits. Now the cache stores ([I, C, E*, K],[ , , ,]).
+# E hits. Now the cache stores ([I*, C, E, K],[ , , ,]).
+# A searches for a victim and selects I. Now the cache stores ([A, C, E, K*],[ , , ,]).
+# C hits
+
+from m5.objects.ReplacementPolicies import TreePLRURP as rp
+
+
+def python_generator(generator):
+    yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 512, 575, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 640, 703, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 100, 0)
+    yield generator.createLinear(30000, 0, 0, 0, 30000, 30000, 100, 0)
+
+    yield generator.createExit(0)
diff --git a/tests/gem5/replacement-policies/traces/tree_plru_test3_st.py b/tests/gem5/replacement-policies/traces/tree_plru_test3_st.py
new file mode 100644
index 0000000000..0b689af492
--- /dev/null
+++ b/tests/gem5/replacement-policies/traces/tree_plru_test3_st.py
@@ -0,0 +1,67 @@
+# Copyright (c) 2022 Jarvis Jia, Jing Qu, Matt Sinclair, & Mingyuan Xiang
+# All Rights Reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# This test is targeting stores
+# Access pattern: A, C, E, G, E, I, K, C, E, A, C
+# Each letter represents a 64-byte address range.
+
+# The [] indicate two different sets, and each set has four ways.
+# [set0way0, set0way1, set0way2, set0way3],
+# [set1way0, set1way1, set1way2, set1way3],
+# If you have a 512B cache with 4-way associativity,
+# and each cache line is 64B with TreePLRU replacement policy,
+# you will observe: m, m, m, m, h, m, m, h, h
+# where 'm' means miss, and 'h' means hit.
+
+# Explanation of this result:
+# A, C, E, G are misses, now the cache stores ([A*, C, E, G],[ , , ,]).
+# and A is the next one to get replaced.
+# E hits
+# I searches for a victim and selects A. Now the cache stores ([I, C, E, G*],[ , , ,]).
+# K searches for a victim and selects G. Now the cache stores ([I, C*, E, K],[ , , ,]).
+# C hits. Now the cache stores ([I, C, E*, K],[ , , ,]).
+# E hits. Now the cache stores ([I*, C, E, K],[ , , ,]).
+# A searches for a victim and selects I. Now the cache stores ([A, C, E, K*],[ , , ,]).
+# C hits
+
+from m5.objects.ReplacementPolicies import TreePLRURP as rp
+
+
+def python_generator(generator):
+    yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 384, 447, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 512, 575, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 640, 703, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 256, 319, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 0, 63, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(60000, 128, 191, 64, 30000, 30000, 0, 0)
+    yield generator.createLinear(30000, 0, 0, 0, 30000, 30000, 0, 0)
+
+    yield generator.createExit(0)

From 1b2252cbc0dbd2473f0e106775419dd8b87992f9 Mon Sep 17 00:00:00 2001
From: "Bobby R. Bruce" <bbruce@ucdavis.edu>
Date: Tue, 6 Dec 2022 14:13:40 -0800
Subject: [PATCH 048/492] misc: Update .mailmap

This commit updates the mailmap since the initial commit in mid-July
2020: https://gem5-review.googlesource.com/c/public/gem5/+/29672.

`sort -u` has been run on this file so some previous entries have been
moved.

Change-Id: I46df1e9675f6f7057b680ca2abbcebdffd50462a
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/66517
Tested-by: kokoro <noreply+kokoro@google.com>
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
---
 .mailmap | 221 ++++++++++++++++++++++++++++++++++++++++---------------
 1 file changed, 160 insertions(+), 61 deletions(-)

diff --git a/.mailmap b/.mailmap
index 5125666bbc..49c438d3eb 100644
--- a/.mailmap
+++ b/.mailmap
@@ -1,37 +1,43 @@
-ARM gem5 Developers <none@none>
 Abdul Mutaal Ahmad <abdul.mutaal@gmail.com>
+adarshpatil <adarshpatil123@gmail.com>
+Adrià Armejach <adria.armejach@bsc.es> Adrià Armejach <adria.armejach@gmail.com>
 Adrian Herrera <adrian.herrera@arm.com>
 Adrien Pesle <adrien.pesle@arm.com>
-Adrià Armejach <adria.armejach@bsc.es> Adrià Armejach <adria.armejach@gmail.com>
 Akash Bagdia <akash.bagdia@ARM.com> Akash Bagdia <akash.bagdia@arm.com>
 Alec Roelke <alec.roelke@gmail.com> Alec Roelke <ar4jc@virginia.edu>
+Alexander Klimov <Alexander.Klimov@arm.com>
 Alexandru Dutu <alexandru.dutu@amd.com> Alexandru <alexandru.dutu@amd.com>
+Alex Richardson <alexrichardson@google.com>
 Ali Jafri <ali.jafri@arm.com>
-Ali Saidi <Ali.Saidi@arm.com> Ali Saidi <Ali.Saidi@ARM.com>
 Ali Saidi <Ali.Saidi@arm.com> Ali Saidi <ali.saidi@arm.com>
+Ali Saidi <Ali.Saidi@arm.com> Ali Saidi <Ali.Saidi@ARM.com>
 Ali Saidi <Ali.Saidi@arm.com> Ali Saidi <saidi@eecs.umich.edu>
+Alistair Delva <adelva@google.com>
 Amin Farmahini <aminfar@gmail.com>
 Anders Handler <s052838@student.dtu.dk>
-Andrea Mondelli <andrea.mondelli@ucf.edu> Andrea Mondelli <Andrea.Mondelli@ucf.edu>
+Andrea Mondelli <andrea.mondelli@huawei.com> Andrea Mondelli <andrea.mondelli@ucf.edu>
+Andrea Mondelli <andrea.mondelli@huawei.com> Andrea Mondelli <Andrea.Mondelli@ucf.edu>
 Andrea Pellegrini <andrea.pellegrini@gmail.com>
-Andreas Hansson <andreas.hanson@arm.com> Andreas Hansson <Andreas.Hansson@ARM.com>
 Andreas Hansson <andreas.hanson@arm.com> Andreas Hansson <andreas.hansson>
 Andreas Hansson <andreas.hanson@arm.com> Andreas Hansson <andreas.hansson@arm.com>
+Andreas Hansson <andreas.hanson@arm.com> Andreas Hansson <Andreas.Hansson@ARM.com>
 Andreas Hansson <andreas.hanson@arm.com> Andreas Hansson <andreas.hansson@armm.com>
-Andreas Sandberg <Andreas.Sandberg@arm.com> Andreas Sandberg <Andreas.Sandberg@ARM.com>
 Andreas Sandberg <Andreas.Sandberg@arm.com> Andreas Sandberg <andreas.sandberg@arm.com>
+Andreas Sandberg <Andreas.Sandberg@arm.com> Andreas Sandberg <Andreas.Sandberg@ARM.com>
 Andreas Sandberg <Andreas.Sandberg@arm.com> Andreas Sandberg <andreas@sandberg.pp.se>
 Andrew Bardsley <Andrew.Bardsley@arm.com> Andrew Bardsley <Andreas.Bardsley@arm.com>
 Andrew Lukefahr <lukefahr@umich.edu>
 Andrew Schultz <alschult@umich.edu>
 Andriani Mappoura <andriani.mappoura@arm.com>
-Ani Udipi <ani.udipi@arm.com>
+Angie Lee <peiyinglee@google.com>
 Anis Peysieux <anis.peysieux@inria.fr>
+Ani Udipi <ani.udipi@arm.com>
 Anouk Van Laer <anouk.vanlaer@arm.com>
-Arthur Perais <arthur.perais@inria.fr>
+ARM gem5 Developers <none@none>
+Arthur Perais <Arthur.Perais@univ-grenoble-alpes.fr> Arthur Perais <arthur.perais@inria.fr>
+Arun Rodrigues <afrodri@gmail.com>
 Ashkan Tousi <ashkan.tousimojarad@arm.com>
-Austin Harris <austinharris@utexas.edu>
-Richard D. Strong <r.d.strong@gmail.com>
+Austin Harris <austinharris@utexas.edu> Austin Harris <mail@austin-harris.com>
 Avishai Tvila <avishai.tvila@gmail.com>
 Ayaz Akram <yazakram@ucdavis.edu>
 Bagus Hanindhito <hanindhito@bagus.my.id>
@@ -41,80 +47,108 @@ Binh Pham <binhpham@cs.rutgers.edu>
 Bjoern A. Zeeb <baz21@cam.ac.uk>
 Blake Hechtman <bah13@duke.edu> Blake Hechtman <blake.hechtman@amd.com>
 Blake Hechtman <bah13@duke.edu> Blake Hechtman ext:(%2C%20Nilay%20Vaish%20%3Cnilay%40cs.wisc.edu%3E) <bah13@duke.edu>
-Bobby R. Bruce <bbruce@ucdavis.edu>
+Bobby R. Bruce <bbruce@ucdavis.edu> Bobby Bruce <bbruce@amarillo.cs.ucdavis.edu>
 Boris Shingarov <shingarov@gmail.com> Boris Shingarov <shingarov@labware.com>
 Brad Beckmann <brad.beckmann@amd.com> Brad Beckmann <Brad.Beckmann@amd.com>
 Brad Beckmann <brad.beckmann@amd.com> Brad Beckmann ext:(%2C%20Nilay%20Vaish%20%3Cnilay%40cs.wisc.edu%3E) <Brad.Beckmann@amd.com>
 Brad Danofsky <bradley.danofsky@amd.com>
 Bradley Wang <radwang@ucdavis.edu> Bradley <animalvgamer@gmail.com>
+Brandon Potter <brandon.potter@amd.com> BKP <brandon.potter@amd.com>
 Brandon Potter <brandon.potter@amd.com> bpotter <brandon.potter@amd.com>
 Brandon Potter <brandon.potter@amd.com> Brandon Potter <Brandon.Potter@amd.com>
-Brandon Potter <brandon.potter@amd.com> BKP <brandon.potter@amd.com>
 Brian Grayson <b.grayson@samsung.com>
 Cagdas Dirik <cdirik@micron.com> cdirik <cdirik@micron.com>
+Carlos Falquez <c.falquez@fz-juelich.de>
 Chander Sudanthi <chander.sudanthi@arm.com> Chander Sudanthi <Chander.Sudanthi@arm.com>
 Chander Sudanthi <chander.sudanthi@arm.com> Chander Sudanthi <Chander.Sudanthi@ARM.com>
+Charles Jamieson <cjamieson2@wisc.edu>
+CHEN Meng <tundriolaxy@gmail.com>
 Chen Zou <chenzou@uchicago.edu>
+Chia-You Chen <hortune@google.com>
+Chow, Marcus <marcus.chow@amd.com>
 Chris Adeniyi-Jones <Chris.Adeniyi-Jones@arm.com>
-Chris Emmons <chris.emmons@arm.com> Chris Emmons <Chris.Emmons@ARM.com>
 Chris Emmons <chris.emmons@arm.com> Chris Emmons <Chris.Emmons@arm.com>
+Chris Emmons <chris.emmons@arm.com> Chris Emmons <Chris.Emmons@ARM.com>
+Chris January <chris.january@arm.com>
 Christian Menard <christian.menard@tu-dresden.de> Christian Menard <Christian.Menard@tu-dresden.de>
-Christoph Pfister <pfistchr@student.ethz.ch>
 Christopher Torng <clt67@cornell.edu>
+Christoph Pfister <pfistchr@student.ethz.ch>
 Chuan Zhu <chuan.zhu@arm.com>
 Chun-Chen Hsu <chunchenhsu@google.com> Chun-Chen TK Hsu <chunchenhsu@google.com>
 Ciro Santilli <ciro.santilli@arm.com>
 Clint Smullen <cws3k@cs.virginia.edu>
+Cui Jin <cuijinbird@gmail.com> Cui Jin <cuijin7@huawei.com>
 Curtis Dunham <Curtis.Dunham@arm.com>
+Daecheol You <daecheol.you@samsung.com>
 Dam Sunwoo <dam.sunwoo@arm.com>
 Dan Gibson <gibson@cs.wisc.edu>
 Daniel Carvalho <odanrc@yahoo.com.br> Daniel <odanrc@yahoo.com.br>
 Daniel Carvalho <odanrc@yahoo.com.br> Daniel R. Carvalho <odanrc@yahoo.com.br>
+Daniel Gerzhoy <daniel.gerzhoy@gmail.com>
 Daniel Johnson <daniel.johnson@arm.com>
 Daniel Sanchez <sanchezd@stanford.edu>
+Davide Basilio Bartolini <davide.basilio.bartolini@huawei.com>
 David Guillen-Fandos <david.guillen@arm.com> David Guillen <david.guillen@arm.com>
 David Guillen-Fandos <david.guillen@arm.com> David Guillen Fandos <david.guillen@arm.com>
 David Hashe <david.hashe@amd.com> David Hashe <david.j.hashe@gmail.com>
 David Oehmke <doehmke@umich.edu>
+David Schall <david.schall2@arm.com>
+Derek Christ <dchrist@rhrk.uni-kl.de>
 Derek Hower <drh5@cs.wisc.edu>
-Deyaun Guo <guodeyuan@tsinghua.org.cn> Deyuan Guo <guodeyuan@tsinghua.org.cn>
 Deyaun Guo <guodeyuan@tsinghua.org.cn> Deyuan Guo ext:(%2C%20Nilay%20Vaish%20%3Cnilay%40cs.wisc.edu%3E) <guodeyuan@tsinghua.org.cn>
+Deyaun Guo <guodeyuan@tsinghua.org.cn> Deyuan Guo <guodeyuan@tsinghua.org.cn>
 Dibakar Gope <gope@wisc.edu> Dibakar Gope ext:(%2C%20Nilay%20Vaish%20%3Cnilay%40cs.wisc.edu%3E) <gope@wisc.edu>
+Dimitrios Chasapis <k4s4s.heavener@gmail.com>
 Djordje Kovacevic <djordje.kovacevic@arm.com> Djordje Kovacevic <Djordje.Kovacevic@arm.com>
-Dongxue Zhang <elta.era@gmail.com>
 Doğukan Korkmaztürk <d.korkmazturk@gmail.com>
+Dongxue Zhang <elta.era@gmail.com>
 Dylan Johnson <Dylan.Johnson@ARM.com>
 Earl Ou <shunhsingou@google.com>
+eavivi <eavivi@ucdavis.edu>
+Éder F. Zulian <zulian@eit.uni-kl.de>
 Edmund Grimley Evans <Edmund.Grimley-Evans@arm.com>
+Eduardo José Gómez Hernández <eduardojose.gomez@um.es>
+Eliot Moss <moss@cs.umass.edu>
 Emilio Castillo <castilloe@unican.es> Emilio Castillo <ecastill@bsc.es>
 Emilio Castillo <castilloe@unican.es> Emilio Castillo ext:(%2C%20Nilay%20Vaish%20%3Cnilay%40cs.wisc.edu%3E) <castilloe@unican.es>
+Emily Brickey <esbrickey@ucdavis.edu>
 Erfan Azarkhish <erfan.azarkhish@unibo.it>
+Erhu <fengerhu.ipads@gmail.com>
 Eric Van Hensbergen <eric.vanhensbergen@arm.com> Eric Van Hensbergen <Eric.VanHensbergen@ARM.com>
+Eric Ye <ericye@google.com>
 Erik Hallnor <ehallnor@umich.edu>
 Erik Tomusk <E.Tomusk@sms.ed.ac.uk>
 Faissal Sleiman <Faissal.Sleiman@arm.com> Faissal Sleiman <sleimanf@umich.edu>
 Fernando Endo <fernando.endo2@gmail.com>
+Franklin He <franklinh@google.com>
 Gabe Black <gabe.black@gmail.com> Gabe Black <gabeblack@google.com>
 Gabe Black <gabe.black@gmail.com> Gabe Black <gblack@eecs.umich.edu>
+Gabe Loh <gabriel.loh@amd.com> gloh <none@none>
 Gabor Dozsa <gabor.dozsa@arm.com>
+Gabriel Busnot <gabriel.busnot@arteris.com>
+gauravjain14 <gjain6@wisc.edu>
 Gedare Bloom <gedare@rtems.org> Gedare Bloom <gedare@gwmail.gwu.edu>
 Gene Wu <gene.wu@arm.com> Gene WU <gene.wu@arm.com>
 Gene WU <gene.wu@arm.com> Gene Wu <Gene.Wu@arm.com>
-Geoffrey Blake <geoffrey.blake@arm.com> Geoffrey Blake <Geoffrey.Blake@arm.com>
 Geoffrey Blake <geoffrey.blake@arm.com> Geoffrey Blake <blakeg@umich.edu>
+Geoffrey Blake <geoffrey.blake@arm.com> Geoffrey Blake <Geoffrey.Blake@arm.com>
 Georg Kotheimer <georg.kotheimer@mailbox.tu-dresden.de>
 Giacomo Gabrielli <giacomo.gabrielli@arm.com> Giacomo Gabrielli <Giacomo.Gabrielli@arm.com>
 Giacomo Travaglini <giacomo.travaglini@arm.com>
 Glenn Bergmans <glenn.bergmans@arm.com>
+GWDx <gwdx@mail.ustc.edu.cn>
 Hamid Reza Khaleghzadeh <khaleghzadeh@gmail.com> Hamid Reza Khaleghzadeh ext:(%2C%20Lluc%20Alvarez%20%3Clluc.alvarez%40bsc.es%3E%2C%20Nilay%20Vaish%20%3Cnilay%40cs.wisc.edu%3E) <khaleghzadeh@gmail.com>
+handsomeliu <handsomeliu@google.com>
 Hanhwi Jang <jang.hanhwi@gmail.com>
 Hoa Nguyen <hoanguyen@ucdavis.edu>
 Hongil Yoon <ongal@cs.wisc.edu>
 Hsuan Hsu <hsuan.hsu@mediatek.com>
+huangjs <jiasen.hjs@alibaba-inc.com>
 Hussein Elnawawy <hussein.elnawawy@gmail.com>
 Ian Jiang <ianjiang.ict@gmail.com>
 IanJiangICT <ianjiang.ict@gmail.com>
 Ilias Vougioukas <Ilias.Vougioukas@ARM.com>
+Iru Cai <mytbk920423@gmail.com>
 Isaac Richter <isaac.richter@rochester.edu>
 Isaac Sánchez Barrera <isaac.sanchez@bsc.es>
 Ivan Pizarro <ivan.pizarro@metempsy.com>
@@ -123,104 +157,152 @@ Jairo Balart <jairo.balart@metempsy.com>
 Jakub Jermar <jakub@jermar.eu>
 James Clarkson <james.clarkson@arm.com>
 Jan-Peter Larsson <jan-peter.larsson@arm.com>
-Jason Lowe-Power <jason@lowepower.com> Jason Lowe-Power <power.jg@gmail.com>
+Jan Vrany <jan.vrany@labware.com>
+Jarvis Jia <jia44@wisc.edu>
+Jasjeet Rangi <jasrangi@ucdavis.edu>
 Jason Lowe-Power <jason@lowepower.com> Jason Lowe-Power <powerjg@cs.wisc.edu>
-Jason Lowe-Power <jason@lowepower.com> Jason Power <power.jg@gmail.com>
-Jason Lowe-Power <jason@lowepower.com> Jason Power <powerjg@cs.wisc.edu>
+Jason Lowe-Power <jason@lowepower.com> Jason Lowe-Power <power.jg@gmail.com>
 Jason Lowe-Power <jason@lowepower.com> Jason Power ext:(%2C%20Joel%20Hestness%20%3Chestness%40cs.wisc.edu%3E) <power.jg@gmail.com>
+Jason Lowe-Power <jason@lowepower.com> Jason Power <powerjg@cs.wisc.edu>
+Jason Lowe-Power <jason@lowepower.com> Jason Power <power.jg@gmail.com>
+Jason Yu <yuzhijingcheng1996@hotmail.com>
 Javier Bueno Hedo <javier.bueno@metempsy.com> Javier Bueno <javier.bueno@metempsy.com>
 Javier Cano-Cano <javier.cano555@gmail.com>
+Javier Garcia Hernandez <avefenixavefenix@gmail.com>
 Javier Setoain <javier.setoain@arm.com>
 Jayneel Gandhi <jayneel@cs.wisc.edu>
 Jennifer Treichler <jtreichl@umich.edu>
-Jieming Yin <jieming.yin@amd.com>
+Jerin Joy <joy@rivosinc.com>
+Jiajie Chen <c@jia.je>
+Jiasen Huang <jiasen.hjs@alibaba-inc.com>
+Jiasen <jiasen.hjs@alibaba-inc.com>
+Jiayi Huang <jyhuang91@gmail.com>
+jiegec <noc@jiegec.ac.cn>
+Jieming Yin <jieming.yin@amd.com> jiemingyin <bjm419@gmail.com>
 Jing Qu <jqu32@wisc.edu> JingQuJQ <jqu32@wisc.edu>
 Jiuyue Ma <majiuyue@ncic.ac.cn>
 Joe Gross <joe.gross@amd.com> Joe Gross <joseph.gross@amd.com>
+Joel Hestness <jthestness@gmail.com> Joel Hestness ext:(%2C%20Nilay%20Vaish%20%3Cnilay%40cs.wisc.edu%3E) <jthestness@gmail.com>
 Joel Hestness <jthestness@gmail.com> Joel Hestness <hestness@cs.utexas.edu>
 Joel Hestness <jthestness@gmail.com> Joel Hestness <hestness@cs.wisc.edu>
-Joel Hestness <jthestness@gmail.com> Joel Hestness ext:(%2C%20Nilay%20Vaish%20%3Cnilay%40cs.wisc.edu%3E) <jthestness@gmail.com>
+Joël Porquet-Lupine <joel@porquet.org>
 John Alsop <johnathan.alsop@amd.com>
 John Kalamatianos <john.kalamatianos@amd.com> jkalamat <john.kalamatianos@amd.com>
 Jordi Vaquero <jordi.vaquero@metempsy.com>
 Jose Marinho <jose.marinho@arm.com>
+Juan M. Cebrian <jm.cebriangonzalez@gmail.com>
 Jui-min Lee <fcrh@google.com>
+kai.ren <kai.ren@streamcomputing.com> Kai Ren <binarystar2006@outlook.com>
 Kanishk Sugand <kanishk.sugand@arm.com>
 Karthik Sangaiah <karthik.sangaiah@arm.com>
+Kaustav Goswami <kggoswami@ucdavis.edu>
+Kelly Nguyen <klynguyen@ucdavis.edu>
 Ke Meng <mengke97@hotmail.com>
 Kevin Brodsky <kevin.brodsky@arm.com>
 Kevin Lim <ktlim@umich.edu>
+Kevin Loughlin <kevlough@umich.edu>
 Khalique <khalique913@gmail.com>
 Koan-Sin Tan <koansin.tan@gmail.com>
 Korey Sewell <ksewell@umich.edu>
 Krishnendra Nathella <Krishnendra.Nathella@arm.com> Krishnendra Nathella <krinat01@arm.com>
+ksco <numbksco@gmail.com>
+kunpai <kunpai@ucdavis.edu>
+Kyle Roarty <kyleroarty1716@gmail.com> Kyle Roarty <Kyle.Roarty@amd.com>
+Laura Hinman <llhinman@ucdavis.edu>
 Lena Olson <leolson@google.com> Lena Olson <lena@cs.wisc,edu>
 Lena Olson <leolson@google.com> Lena Olson <lena@cs.wisc.edu>
 Lisa Hsu <Lisa.Hsu@amd.com> Lisa Hsu <hsul@eecs.umich.edu>
 Lluc Alvarez <lluc.alvarez@bsc.es>
 Lluís Vilanova <vilanova@ac.upc.edu> Lluis Vilanova <vilanova@ac.upc.edu>
+Lukas Steiner <lsteiner@rhrk.uni-kl.de>
+Luming Wang <wlm199558@126.com>
+m5test <m5test@zizzer>
 Mahyar Samani <msamani@ucdavis.edu>
+Majid Jalili <majid0jalili@gmail.com>
 Malek Musleh <malek.musleh@gmail.com> Nilay Vaish ext:(%2C%20Malek%20Musleh%20%3Cmalek.musleh%40gmail.com%3E) <nilay@cs.wisc.edu>
 Marc Mari Barcelo <marc.maribarcelo@arm.com>
-Marc Orr <marc.orr@gmail.com> Marc Orr <morr@cs.wisc.edu>
 Marco Balboni <Marco.Balboni@ARM.com>
 Marco Elver <Marco.Elver@ARM.com> Marco Elver <marco.elver@ed.ac.uk>
+Marc Orr <marc.orr@gmail.com> Marc Orr <morr@cs.wisc.edu>
+Marjan Fariborz <mfariborz@ucdavis.edu> marjanfariborz <mfariborz@ucdavis.edu>
+Mark Hildebrand <mhildebrand@ucdavis.edu>
+Marton Erdos <marton.erdos@arm.com>
+Maryam Babaie <mbabaie@ucdavis.edu>
 Matt DeVuyst <mdevuyst@gmail.com>
-Matt Evans <matt.evans@arm.com> Matt Evans <Matt.Evans@arm.com>
-Matt Horsnell <matt.horsnell@arm.com>Matt Horsnell <Matt.Horsnell@ARM.com>
-Matt Horsnell <matt.horsnell@arm.com> Matt Horsnell <Matt.Horsnell@arm.com>
-Matt Horsnell <matt.horsnell@arm.com> Matt Horsnell <matt.horsnell@ARM.com>
-Matt Poremba <matthew.poremba@amd.com> Matt Poremba <Matthew.Poremba@amd.com>
 Matteo Andreozzi <matteo.andreozzi@arm.com> Matteo Andreozzi <Matteo.Andreozzi@arm.com>
 Matteo M. Fusi <matteo.fusi@bsc.es>
+Matt Evans <matt.evans@arm.com> Matt Evans <Matt.Evans@arm.com>
 Matthew Poremba <matthew.poremba@amd.com> Matthew Poremba <Matthew.Poremba@amd.com>
-Matt Sinclair <mattdsinclair@gmail.com> Matthew Sinclair <matthew.sinclair@amd.com>
 Matthias Hille <matthiashille8@gmail.com>
 Matthias Jung <jungma@eit.uni-kl.de>
+Matthias Jung <matthias.jung@iese.fraunhofer.de>
+Matt Horsnell <matt.horsnell@arm.com> Matt Horsnell <matt.horsnell@ARM.com>
+Matt Horsnell <matt.horsnell@arm.com> Matt Horsnell <Matt.Horsnell@arm.com>
+Matt Horsnell <matt.horsnell@arm.com>Matt Horsnell <Matt.Horsnell@ARM.com>
+Matt Poremba <matthew.poremba@amd.com> Matt Poremba <Matthew.Poremba@amd.com>
+Matt Sinclair <mattdsinclair@gmail.com> Matthew Sinclair <matthew.sinclair@amd.com>
+Matt Sinclair <mattdsinclair.wisc@gmail.com> Matt Sinclair <Matthew.Sinclair@amd.com>
 Maurice Becker <madnaurice@googlemail.com>
 Maxime Martinasso <maxime.cscs@gmail.com>
-Maximilian Stein <maximilian.stein@tu-dresden.de>
+Maximilian Stein <maximilian.stein@tu-dresden.de>Maximilian Stein <m@steiny.biz>
 Maximilien Breughe <maximilien.breughe@elis.ugent.be> Maximilien Breughe <Maximilien.Breughe@elis.ugent.be>
+Melissa Jost <melissakjost@gmail.com>
 Michael Adler <Michael.Adler@intel.com>
+Michael Boyer <Michael.Boyer@amd.com>
 Michael LeBeane <michael.lebeane@amd.com> Michael LeBeane <Michael.Lebeane@amd.com>
 Michael LeBeane <michael.lebeane@amd.com> mlebeane <michael.lebeane@amd.com>
 Michael Levenhagen <mjleven@sandia.gov>
-Michiel Van Tol <michiel.vantol@arm.com> Michiel W. van Tol <Michiel.VanTol@arm.com>
 Michiel Van Tol <michiel.vantol@arm.com> Michiel van Tol <Michiel.VanTol@arm.com>
+Michiel Van Tol <michiel.vantol@arm.com> Michiel W. van Tol <Michiel.VanTol@arm.com>
 Miguel Serrano <mserrano@umich.edu>
+Mike Upton <michaelupton@gmail.com>
 Miles Kaufmann <milesck@eecs.umich.edu>
-Min Kyu Jeong <minkyu.jeong@arm.com> Min Kyu Jeong <MinKyu.Jeong@arm.com>
 Mingyuan <xiang_my@outlook.com>
-Mitch Hayenga <mitch.hayenga@arm.com> Mitch Hayenga <Mitch.Hayenga@ARM.com>
-Mitch Hayenga <mitch.hayenga@arm.com> Mitch Hayenga <Mitch.Hayenga@arm.com>
-Mitch Hayenga <mitch.hayenga@arm.com> Mitch Hayenga <mitch.hayenga+gem5@gmail.com>
-Mitch Hayenga <mitch.hayenga@arm.com> Mitch Hayenga ext:(%2C%20Amin%20Farmahini%20%3Caminfar%40gmail.com%3E) <mitch.hayenga+gem5@gmail.com>
+Min Kyu Jeong <minkyu.jeong@arm.com> Min Kyu Jeong <MinKyu.Jeong@arm.com>
 Mitch Hayenga <mitch.hayenga@arm.com> Mitchell Hayenga <Mitchell.Hayenga@ARM.com>
+Mitch Hayenga <mitch.hayenga@arm.com> Mitch Hayenga ext:(%2C%20Amin%20Farmahini%20%3Caminfar%40gmail.com%3E) <mitch.hayenga+gem5@gmail.com>
+Mitch Hayenga <mitch.hayenga@arm.com> Mitch Hayenga <Mitch.Hayenga@arm.com>
+Mitch Hayenga <mitch.hayenga@arm.com> Mitch Hayenga <Mitch.Hayenga@ARM.com>
+Mitch Hayenga <mitch.hayenga@arm.com> Mitch Hayenga <mitch.hayenga+gem5@gmail.com>
 Mohammad Alian <m.alian1369@gmail.com>
 Monir Mozumder <monir.mozumder@amd.com>
 Moyang Wang <mw828@cornell.edu>
 Mrinmoy Ghosh <mrinmoy.ghosh@arm.com> Mrinmoy Ghosh <Mrinmoy.Ghosh@arm.com>
-Nathan Binkert <nate@binkert.org> Nathan Binkert <binkertn@umich.edu>
+Muhammad Sarmad Saeed <mssaeed@ucdavis.edu>
+Nadia Etemadi <netemadi@ucdavis.edu>
 Nathanael Premillieu <nathanael.premillieu@arm.com> Nathanael Premillieu <Nathanael.Premillieu@arm.com>
+Nathanael Premillieu <nathanael.premillieu@arm.com> Nathanael Premillieu <nathanael.premillieu@huawei.com>
 Nathanael Premillieu <nathanael.premillieu@arm.com> Nathanael Premillieu <nathanael.premillieu@irisa.fr>
 Nathanael Premillieu <nathanael.premillieu@arm.com> Nathanael Premillieu <nathananel.premillieu@arm.com>
 Nathanael Premillieu <nathanael.premillieu@arm.com> Nathanael Premillieu <npremill@irisa.fr>
+Nathan Binkert <nate@binkert.org> Nathan Binkert <binkertn@umich.edu>
 Nayan Deshmukh <nayan26deshmukh@gmail.com>
 Neha Agarwal <neha.agarwal@arm.com>
+Neil Natekar <nanatekar@ucdavis.edu>
 Nicholas Lindsay <nicholas.lindsay@arm.com>
+Nicolas Boichat <drinkcat@google.com>
 Nicolas Derumigny <nderumigny@gmail.com>
 Nicolas Zea <nicolas.zea@gmail.com>
 Nikos Nikoleris <nikos.nikoleris@arm.com> Nikos Nikoleris <nikos.nikoleris@gmail.com>
+Nilay Vaish ext:(%2C%20Timothy%20Jones%20%3Ctimothy.jones%40cl.cam.ac.uk%3E) <nilay@cs.wisc.edu>
 Nils Asmussen <nils.asmussen@barkhauseninstitut.org> Nils Asmussen <nilsasmussen7@gmail.com>
+Noah Katz <nkatz@rivosinc.com>
+ntampouratzis <ntampouratzis@isc.tuc.gr>
 Nuwan Jayasena <Nuwan.Jayasena@amd.com>
 Ola Jeppsson <ola.jeppsson@gmail.com>
 Omar Naji <Omar.Naji@arm.com>
+Onur Kayiran <onur.kayiran@amd.com>
 Pablo Prieto <pablo.prieto@unican.es>
+paikunal <kunpai@ucdavis.edu>
 Palle Lyckegaard <palle@lyckegaard.dk>
 Pau Cabre <pau.cabre@metempsy.com>
 Paul Rosenfeld <prosenfeld@micron.com> Paul Rosenfeld <dramninjas@gmail.com>
 Paul Rosenfeld <prosenfeld@micron.com> Paul Rosenfeld <prosenfeld@micon.com>
 Peter Enns <Peter.Enns@arm.com> Pierre-Yves Péneau <pierre-yves.peneau@lirmm.fr>
+Peter <petery.hin@huawei.com>
+Peter Yuen <ppeetteerrsx@gmail.com>
+Philip Metzler <cpmetz@google.com>
+Pierre Ayoub <pierre.ayoub.pro@tutanota.com>
 Pin-Yen Lin <treapking@google.com>
 Po-Hao Su <supohaosu@gmail.com>
 Polina Dudnik <pdudnik@cs.wisc.edu> Polina Dudnik <pdudnik@gmail.com>
@@ -229,23 +311,26 @@ Pouya Fotouhi <pfotouhi@ucdavis.edu> Pouya Fotouhi <Pouya.Fotouhi@amd.com>
 Prakash Ramrakhyani <prakash.ramrakhyani@arm.com> Prakash Ramrakhani <Prakash.Ramrakhani@arm.com>
 Prakash Ramrakhyani <prakash.ramrakhyani@arm.com> Prakash Ramrakhyani <Prakash.Ramrakhyani@arm.com>
 Pritha Ghoshal <pritha9987@tamu.edu>
+Quentin Forcioli <quentin.forcioli@telecom-paris.fr>
 Radhika Jagtap <radhika.jagtap@arm.com> Radhika Jagtap <radhika.jagtap@ARM.com>
 Rahul Thakur <rjthakur@google.com>
 Reiley Jeapaul <Reiley.Jeyapaul@arm.com>
-Rekai Gonzalez-Alberquilla <rekai.gonzalezalberquilla@arm.com> Rekai <Rekai.GonzalezAlberquilla@arm.com>
-Rekai Gonzalez-Alberquilla <rekai.gonzalezalberquilla@arm.com> Rekai Gonzalez Alberquilla <Rekai.GonzalezAlberquilla@arm.com>
 Rekai Gonzalez-Alberquilla <rekai.gonzalezalberquilla@arm.com> Rekai Gonzalez Alberquilla <rekai.gonzalezalberquilla@arm.com>
+Rekai Gonzalez-Alberquilla <rekai.gonzalezalberquilla@arm.com> Rekai Gonzalez Alberquilla <Rekai.GonzalezAlberquilla@arm.com>
 Rekai Gonzalez-Alberquilla <rekai.gonzalezalberquilla@arm.com> Rekai Gonzalez-Alberquilla <Rekai.GonzalezAlberquilla@arm.com>
+Rekai Gonzalez-Alberquilla <rekai.gonzalezalberquilla@arm.com> Rekai <Rekai.GonzalezAlberquilla@arm.com>
 Rene de Jong <rene.dejong@arm.com>
 Ricardo Alves <ricardo.alves@arm.com>
+Richard Cooper <richard.cooper@arm.com>
+Richard D. Strong <r.d.strong@gmail.com>
 Richard Strong <rstrong@hp.com> Richard Strong <r.d.strong@gmail.com>
 Richard Strong <rstrong@hp.com> Richard Strong <rstrong@cs.ucsd.edu>
 Richard Strong <rstrong@hp.com> Rick Strong <rstrong@cs.ucsd.edu>
 Rico Amslinger <rico.amslinger@informatik.uni-augsburg.de>
 Riken Gohil <Riken.Gohil@arm.com>
 Rizwana Begum <rb639@drexel.edu>
-Robert Scheffel <robert.scheffel1@tu-dresden.de> Robert <robert.scheffel1@tu-dresden.de>
 Robert Kovacsics <rmk35@cl.cam.ac.uk>
+Robert Scheffel <robert.scheffel1@tu-dresden.de> Robert <robert.scheffel1@tu-dresden.de>
 Rohit Kurup <rohit.kurup@arm.com>
 Ron Dreslinski <rdreslin@umich.edu> Ronald Dreslinski <rdreslin@umich.edu>
 Ruben Ayrapetyan <ruben.ayrapetyan@arm.com>
@@ -253,20 +338,27 @@ Rune Holm <rune.holm@arm.com>
 Ruslan Bukin <br@bsdpad.com> Ruslan Bukin ext:(%2C%20Zhang%20Guoye) <br@bsdpad.com>
 Rutuja Oza <roza@ucdavis.edu>
 Ryan Gambord <gambordr@oregonstate.edu>
+sacak32 <byrakocalan99@gmail.com>
+Sampad Mohapatra <sampad.mohapatra@gmail.com>
 Samuel Grayson <sam@samgrayson.me>
-Sandipan Das <sandipan@linux.ibm.com>
+Samuel Stark <samuel.stark2@arm.com>
+Sandipan Das <31861871+sandip4n@users.noreply.github.com>
+Sandipan Das <sandipan@linux.ibm.com> Sandipan Das <31861871+sandip4n@users.noreply.github.com>
 Santi Galan <santi.galan@metempsy.com>
-Sascha Bischoff <sascha.bischoff@arm.com> Sascha Bischoff <Sascha.Bischoff@ARM.com>
 Sascha Bischoff <sascha.bischoff@arm.com> Sascha Bischoff <sascha.bischoff@ARM.com>
+Sascha Bischoff <sascha.bischoff@arm.com> Sascha Bischoff <Sascha.Bischoff@ARM.com>
 Sean McGoogan <Sean.McGoogan@arm.com>
 Sean Wilson <spwilson2@wisc.edu>
 Sergei Trofimov <sergei.trofimov@arm.com>
 Severin Wischmann <wiseveri@student.ethz.ch> Severin Wischmann ext:(%2C%20Ioannis%20Ilkos%20%3Cioannis.ilkos09%40imperial.ac.uk%3E) <wiseveri@student.ethz.ch>
 Shawn Rosti <shawn.rosti@gmail.com>
 Sherif Elhabbal <elhabbalsherif@gmail.com>
+Shivani Parekh <shparekh@ucdavis.edu>
+Shivani <shparekh@ucdavis.edu>
 Siddhesh Poyarekar <siddhesh.poyarekar@gmail.com>
 Somayeh Sardashti <somayeh@cs.wisc.edu>
 Sooraj Puthoor <puthoorsooraj@gmail.com>
+Sooraj Puthoor <Sooraj.Puthoor@amd.com>
 Sophiane Senni <sophiane.senni@gmail.com>
 Soumyaroop Roy <sroy@cse.usf.edu>
 Srikant Bharadwaj <srikant.bharadwaj@amd.com>
@@ -275,13 +367,14 @@ Stanislaw Czerniawski <stacze01@arm.com>
 Stephan Diestelhorst <stephan.diestelhorst@arm.com> Stephan Diestelhorst <stephan.diestelhorst@ARM.com>
 Stephen Hines <hines@cs.fsu.edu>
 Steve Raasch <sraasch@umich.edu>
-Steve Reinhardt <stever@gmail.com> Steve Reinhardt <Steve.Reinhardt@amd.com>
-Steve Reinhardt <stever@gmail.com> Steve Reinhardt <steve.reinhardt@amd.com>
-Steve Reinhardt <stever@gmail.com> Steve Reinhardt <stever@eecs.umich.edu>
 Steve Reinhardt <stever@gmail.com> Steve Reinhardt ext:(%2C%20Nilay%20Vaish%20%3Cnilay%40cs.wisc.edu%3E%2C%20Ali%20Saidi%20%3CAli.Saidi%40ARM.com%3E) <stever@gmail.com>
+Steve Reinhardt <stever@gmail.com> Steve Reinhardt <stever@eecs.umich.edu>
+Steve Reinhardt <stever@gmail.com> Steve Reinhardt <steve.reinhardt@amd.com>
+Steve Reinhardt <stever@gmail.com> Steve Reinhardt <Steve.Reinhardt@amd.com>
 Stian Hvatum <stian@dream-web.no>
 Sudhanshu Jha <sudhanshu.jha@arm.com>
 Sujay Phadke <electronicsguy123@gmail.com>
+Sungkeun Kim <ksungkeun84@tamu.edu>
 Swapnil Haria <swapnilster@gmail.com> Swapnil Haria <swapnilh@cs.wisc.edu>
 Taeho Kgil <tkgil@umich.edu>
 Tao Zhang <tao.zhang.0924@gmail.com>
@@ -290,44 +383,50 @@ Tiago Mück <tiago.muck@arm.com> Tiago Muck <tiago.muck@arm.com>
 Tim Harris <tharris@microsoft.com>
 Timothy Hayes <timothy.hayes@arm.com>
 Timothy M. Jones <timothy.jones@arm.com> Timothy Jones <timothy.jones@cl.cam.ac.uk>
-Timothy M. Jones <timothy.jones@arm.com> Nilay Vaish ext:(%2C%20Timothy%20Jones%20%3Ctimothy.jones%40cl.cam.ac.uk%3E) <nilay@cs.wisc.edu>
 Timothy M. Jones <timothy.jones@arm.com> Timothy M. Jones <timothy.jones@cl.cam.ac.uk>
 Timothy M. Jones <timothy.jones@arm.com> Timothy M. Jones <tjones1@inf.ed.ac.uk>
 Tom Jablin <tjablin@gmail.com>
 Tommaso Marinelli <tommarin@ucm.es>
+Tom Rollet <tom.rollet@huawei.com>
+Tong Shen <endlessroad@google.com>
 Tony Gutierrez <anthony.gutierrez@amd.com> Anthony Gutierrez <atgutier@umich.edu>
-Tuan Ta <qtt2@cornell.edu> Tuan Ta <taquangtuan1992@gmail.com>
-Tushar Krishna <tushar@ece.gatech.edu> Tushar Krishna <Tushar.Krishna@amd.com>
+Travis Boraten <travis.boraten@amd.com>
+Trivikram Reddy <tvreddy@ucdavis.edu> tv-reddy <tvreddy@ucdavis.edu>
+Tuan Ta <qtt2@cornell.edu> Tuan Ta <taquangtuan1992@gmail.com> Tuan Ta <tuan.ta@amd.com>
 Tushar Krishna <tushar@ece.gatech.edu> Tushar Krishna <tushar@csail.mit.edu>
+Tushar Krishna <tushar@ece.gatech.edu> Tushar Krishna <Tushar.Krishna@amd.com>
 Umesh Bhaskar <umesh.b2006@gmail.com>
 Uri Wiener <uri.wiener@arm.com>
 Victor Garcia <victor.garcia@arm.com>
 Vilas Sridharan <vilas.sridharan@gmail.com>
-Vince Weaver <vince@csl.cornell.edu>
 Vincentius Robby <acolyte@umich.edu>
+Vince Weaver <vince@csl.cornell.edu>
+vramadas95 <vramadas@wisc.edu>
+vsoria <victor.soria@bsc.es>
 Wade Walker <wade.walker@arm.com>
+Wei-Han Chen <weihanchen@google.com>
 Weiping Liao <weipingliao@google.com>
+Wende Tan <twd2@163.com>
 Wendy Elsasser <wendy.elsasser@arm.com>
-William Wang <william.wang@arm.com> William Wang <William.Wang@ARM.com>
 William Wang <william.wang@arm.com> William Wang <William.Wang@arm.com>
+William Wang <william.wang@arm.com> William Wang <William.Wang@ARM.com>
 Willy Wolff <willy.mh.wolff.ml@gmail.com>
+Wing Li <wingers@google.com>
 Xiangyu Dong <rioshering@gmail.com>
-Xianwei Zhang <xianwei.zhang@amd.com>
+Xianwei Zhang <xianwei.zhang.@amd.com> Xianwei Zhang <xianwei.zhang@amd.com>
 Xiaoyu Ma <xiaoyuma@google.com>
 Xin Ouyang <xin.ouyang@streamcomputing.com>
+Xiongfei <xiongfei.liao@gmail.com>
 Yasuko Eckert <yasuko.eckert@amd.com>
-Yi Xiang <yix@colostate.edu>
+Yen-lin Lai <yenlinlai@google.com>
 Yifei Liu <liu.ad2039@gmail.com>
-Yu-hsin Wang <yuhsingw@google.com>
+yiwkd2 <yiwkd2@gmail.com>
+Yi Xiang <yix@colostate.edu>
 Yuan Yao <yuanyao@seas.harvard.edu>
 Yuetsu Kodama <yuetsu.kodama@riken.jp> yuetsu.kodama <yuetsu.kodama@riken.jp>
+Yu-hsin Wang <yuhsingw@google.com>
 Zhang Zheng <perise@gmail.com>
+Zhantong Qiu <ztqiu@ucdavis.edu>
+Zhengrong Wang <seanzw@ucla.edu> seanzw <seanyukigeek@gmail.com>
+zhongchengyong <zhongcy93@gmail.com>
 Zicong Wang <wangzicong@nudt.edu.cn>
-Éder F. Zulian <zulian@eit.uni-kl.de>
-Gabe Loh <gabriel.loh@amd.com> gloh <none@none>
-jiegec <noc@jiegec.ac.cn>
-m5test <m5test@zizzer>
-Marjan Fariborz <mfariborz@ucdavis.edu> marjanfariborz <mfariborz@ucdavis.edu>
-Mike Upton <michaelupton@gmail.com>
-seanzw <seanyukigeek@gmail.com>
-Trivikram Reddy <tvreddy@ucdavis.edu> tv-reddy <tvreddy@ucdavis.edu>

From 91f8f2b276bf0a4ef03963822dbcbadd6a657d2e Mon Sep 17 00:00:00 2001
From: "Bobby R. Bruce" <bbruce@ucdavis.edu>
Date: Fri, 2 Dec 2022 11:15:45 -0800
Subject: [PATCH 049/492] tests: Add missing `_pre_instantiate()`

As of this change:
https://gem5-review.googlesource.com/c/public/gem5/+/65051, the
`_pre_instantiate` function must be called prior to `m5.instantiate`
when using the stdlib without the Simulator module. The
"test/gem5/replacement-policies/run_replacement_policy_test.py" lacked
this and was causing errors when running replacement policy tests.

In addition we also fix the incorrect type of size in`createArtifact'.
This was causing problems with the Kokoro build system.
The typing here was `int` but had a default value of `None`. The correct
type is therefore `Optional[int]`.

Change-Id: Ibaf63151196b15f68e643fa5c1b290439d6618c8
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/66371
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
(cherry picked from commit a3fd9631cc209914fad2e2c1fb24006d6d5adc2d)
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/66613
---
 tests/gem5/replacement-policies/run_replacement_policy_test.py | 1 +
 util/gem5art/artifact/gem5art/artifact/artifact.py             | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/gem5/replacement-policies/run_replacement_policy_test.py b/tests/gem5/replacement-policies/run_replacement_policy_test.py
index 10061094b4..31076c6d99 100644
--- a/tests/gem5/replacement-policies/run_replacement_policy_test.py
+++ b/tests/gem5/replacement-policies/run_replacement_policy_test.py
@@ -85,6 +85,7 @@ motherboard = TestBoard(
 )
 root = Root(full_system=False, system=motherboard)
 
+motherboard._pre_instantiate()
 m5.instantiate()
 
 generator.start_traffic()
diff --git a/util/gem5art/artifact/gem5art/artifact/artifact.py b/util/gem5art/artifact/gem5art/artifact/artifact.py
index 91ffc64e50..46664e82fb 100644
--- a/util/gem5art/artifact/gem5art/artifact/artifact.py
+++ b/util/gem5art/artifact/gem5art/artifact/artifact.py
@@ -158,7 +158,7 @@ class Artifact:
         documentation: str,
         inputs: List["Artifact"] = [],
         architecture: str = "",
-        size: int = None,
+        size: Optional[int] = None,
         is_zipped: bool = False,
         md5sum: str = "",
         url: str = "",

From ce03482a394fd4309104ca15d002d3070fac3aef Mon Sep 17 00:00:00 2001
From: Yu-hsin Wang <yuhsingw@google.com>
Date: Thu, 8 Dec 2022 17:03:49 +0800
Subject: [PATCH 050/492] mem: Implement and use the recvMemBackdoorReq func.

In the previous change, we miss some model supporting atomic backdoor.
We should also implement the recvMemBackdoorReq to them.

Change-Id: I4706d215aa4a5d18fe4306b2387f9c8750cb4b4a
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/66551
Reviewed-by: Gabe Black <gabe.black@gmail.com>
Maintainer: Gabe Black <gabe.black@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/mem/hbm_ctrl.cc      | 15 +++++++++++++++
 src/mem/hbm_ctrl.hh      |  2 ++
 src/mem/thread_bridge.cc |  8 ++++++++
 src/mem/thread_bridge.hh |  2 ++
 4 files changed, 27 insertions(+)

diff --git a/src/mem/hbm_ctrl.cc b/src/mem/hbm_ctrl.cc
index 99618c4b5f..747e714f57 100644
--- a/src/mem/hbm_ctrl.cc
+++ b/src/mem/hbm_ctrl.cc
@@ -150,6 +150,21 @@ HBMCtrl::recvAtomicBackdoor(PacketPtr pkt, MemBackdoorPtr &backdoor)
     return latency;
 }
 
+void
+HBMCtrl::recvMemBackdoorReq(const MemBackdoorReq &req,
+        MemBackdoorPtr &backdoor)
+{
+    auto &range = req.range();
+    if (pc0Int && pc0Int->getAddrRange().isSubset(range)) {
+        pc0Int->getBackdoor(backdoor);
+    } else if (pc1Int && pc1Int->getAddrRange().isSubset(range)) {
+        pc1Int->getBackdoor(backdoor);
+    }
+    else {
+        panic("Can't handle address range for range %s\n", range.to_string());
+    }
+}
+
 bool
 HBMCtrl::writeQueueFullPC0(unsigned int neededEntries) const
 {
diff --git a/src/mem/hbm_ctrl.hh b/src/mem/hbm_ctrl.hh
index c9045f0ae7..a6ecf6c589 100644
--- a/src/mem/hbm_ctrl.hh
+++ b/src/mem/hbm_ctrl.hh
@@ -259,6 +259,8 @@ class HBMCtrl : public MemCtrl
     Tick recvAtomic(PacketPtr pkt) override;
     Tick recvAtomicBackdoor(PacketPtr pkt, MemBackdoorPtr &backdoor) override;
     void recvFunctional(PacketPtr pkt) override;
+    void recvMemBackdoorReq(const MemBackdoorReq &req,
+            MemBackdoorPtr &_backdoor) override;
     bool recvTimingReq(PacketPtr pkt) override;
 
 };
diff --git a/src/mem/thread_bridge.cc b/src/mem/thread_bridge.cc
index 3f76ef49b3..efaf19a0e2 100644
--- a/src/mem/thread_bridge.cc
+++ b/src/mem/thread_bridge.cc
@@ -84,6 +84,14 @@ ThreadBridge::IncomingPort::recvFunctional(PacketPtr pkt)
     device_.out_port_.sendFunctional(pkt);
 }
 
+void
+ThreadBridge::IncomingPort::recvMemBackdoorReq(const MemBackdoorReq &req,
+                                               MemBackdoorPtr &backdoor)
+{
+    EventQueue::ScopedMigration migrate(device_.eventQueue());
+    device_.out_port_.sendMemBackdoorReq(req, backdoor);
+}
+
 ThreadBridge::OutgoingPort::OutgoingPort(const std::string &name,
                                          ThreadBridge &device)
     : RequestPort(name, &device), device_(device)
diff --git a/src/mem/thread_bridge.hh b/src/mem/thread_bridge.hh
index 28c959193c..92cb078dd1 100644
--- a/src/mem/thread_bridge.hh
+++ b/src/mem/thread_bridge.hh
@@ -61,6 +61,8 @@ class ThreadBridge : public SimObject
 
         // FunctionalResponseProtocol
         void recvFunctional(PacketPtr pkt) override;
+        void recvMemBackdoorReq(const MemBackdoorReq &req,
+                                MemBackdoorPtr &backdoor) override;
 
       private:
         ThreadBridge &device_;

From d65173d596cee8f62fa25b41c78ab07dcf18cf72 Mon Sep 17 00:00:00 2001
From: "Bobby R. Bruce" <bbruce@ucdavis.edu>
Date: Sat, 10 Dec 2022 15:56:02 -0800
Subject: [PATCH 051/492] tests: Move replacement policy tests to long/Nightly

These tests require the compilation of NULL with the MI_Example cache
coherence protocol. This is a large overhead for these tests. They are
therefore better off being run nightly rather than as a
pre-commit/kokoro/quick test.

Change-Id: I87b25afa313ecca65c738e3a8692a9bf72b06620
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/66615
Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Maintainer: Matt Sinclair <mattdsinclair@gmail.com>
---
 tests/gem5/replacement-policies/test_replacement_policies.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/gem5/replacement-policies/test_replacement_policies.py b/tests/gem5/replacement-policies/test_replacement_policies.py
index 3a30c0a070..7b00e10429 100644
--- a/tests/gem5/replacement-policies/test_replacement_policies.py
+++ b/tests/gem5/replacement-policies/test_replacement_policies.py
@@ -51,7 +51,7 @@ def test_replacement_policy(config_name: str, config_path: str) -> None:
         valid_isas=(constants.null_tag,),
         protocol="MI_example",
         valid_hosts=constants.supported_hosts,
-        length=constants.quick_tag,
+        length=constants.long_tag,
     )
 
 

From fa34ebc8535c682717f6dc55649d41b0f16b9762 Mon Sep 17 00:00:00 2001
From: Roger Chang <rogerycchang@google.com>
Date: Tue, 29 Nov 2022 16:18:22 +0800
Subject: [PATCH 052/492] arch-riscv: Fork ACDFIMU_Zfh instructions into
 rv32/rv64

1. Add rvSelect for rv32 and rv64.
2. Add rvZext and rvSext for rv32 handle sign extension
3. Fork the following instructions into rv32/rv64 version

A extensions:
SC.W
LR.D
SC.D
AMOADD.D
AMOSWAP.D
AMOXOR.D
AMOOR.D
AMOAND.D
AMOMIN.D
AMOMAX.D
AMOMINU.D
AMOMAXU.D

C extensions:
C.ADDI4SPN
C.FLD
C.LW
C.FLW
C.LD
C.FSD
C.SW
C.FSW
C.SD
C.ADDI
C.JAL
C.ADDIW
C.ADDI16SP
C.SRLI
C.SRAI
C.ANDI
C.SUB
C.XOR
C.OR
C.AND
C.SUBW
C.ADDW
C.J
C.BEQZ
C.BNEZ
C.SLLI
C.FLDSP
C.LWSP
C.FLWSP
C.LDSP
C.JR
C.MV
C.EBREAK
C.JALR
C.ADD
C.FSDSP
C.SWSP
C.FSWSP
C.SDSPF

D extensions:
FCVT.L.D
FCVT.LU.D
FCVT.D.L
FCVT.D.LU
FMV.X.D
FCLASS.D
FMV.D.X

F extensions:
FSW
FCVT.L.S
FCVT.LU.S
FCVT.S.W
FCVT.S.WU
FCVT.S.L
FCVT.S.LU
FMV.X.W
FCLASS.S
FMV.W.X

I extensions:
LD
LWU
SLLI
ADDI
SLTI
SLTIU
XORI
SRLI
SRAI
ORI
ANDI
AUIPC
ADDIW
SLLIW
SRLIW
SRAIW
SD
ADD
SUB
SLL
SLT
SLTU
XOR
SRL
SRA
OR
AND
LUI
BEQ
BNE
BLT
BGE
BLTU
BGEU
JALR
JAL
ADDW
SUBW
SLLW
SRLW
SRAW

M extensions:
MUL
MULH
MULHSU
MULHU
DIV
DIVU
REM
REMU
MULW
DIVW
DIVUW
REMW
REMUW

ZFH extensions:
FSH
FCVT.L.H
FCVT.LU.H
FCVT.H.L
FCVT.H.LU

Change-Id: I8604324eadb700591db028aa3b013b060ba37de5
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/65111
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Reviewed-by: Gabe Black <gabe.black@gmail.com>
Maintainer: Gabe Black <gabe.black@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/arch/riscv/insts/static_inst.hh |   12 +
 src/arch/riscv/isa/bitfields.isa    |    2 +
 src/arch/riscv/isa/decoder.isa      | 1084 ++++++++++++++++-----------
 src/arch/riscv/isa/formats/amo.isa  |   10 +-
 src/arch/riscv/isa/formats/mem.isa  |    4 +-
 5 files changed, 681 insertions(+), 431 deletions(-)

diff --git a/src/arch/riscv/insts/static_inst.hh b/src/arch/riscv/insts/static_inst.hh
index bccecf2e2f..f835713505 100644
--- a/src/arch/riscv/insts/static_inst.hh
+++ b/src/arch/riscv/insts/static_inst.hh
@@ -58,6 +58,18 @@ class RiscvStaticInst : public StaticInst
 
     bool alignmentOk(ExecContext* xc, Addr addr, Addr size) const;
 
+    template <typename T>
+    T
+    rvSelect(T v32, T v64) const
+    {
+        return (machInst.rv_type == RV32) ? v32 : v64;
+    }
+
+    template <typename T32, typename T64>
+    T64 rvExt(T64 x) const { return rvSelect((T64)(T32)x, x); }
+    uint64_t rvZext(uint64_t x) const { return rvExt<uint32_t, uint64_t>(x); }
+    int64_t rvSext(int64_t x) const { return rvExt<int32_t, int64_t>(x); }
+
   public:
     ExtMachInst machInst;
 
diff --git a/src/arch/riscv/isa/bitfields.isa b/src/arch/riscv/isa/bitfields.isa
index 41935c5b0f..863982cfec 100644
--- a/src/arch/riscv/isa/bitfields.isa
+++ b/src/arch/riscv/isa/bitfields.isa
@@ -3,6 +3,7 @@
 // Copyright (c) 2015 RISC-V Foundation
 // Copyright (c) 2016 The University of Virginia
 // Copyright (c) 2020 Barkhausen Institut
+// Copyright (c) 2022 Google LLC
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
@@ -49,6 +50,7 @@ def bitfield FUNCT7 <31:25>;
 def bitfield SRTYPE <30>;
 def bitfield SHAMT5 <24:20>;
 def bitfield SHAMT6 <25:20>;
+def bitfield SHAMT6BIT5 <25>;
 
 // I-Type
 def bitfield IMM12  <31:20>;
diff --git a/src/arch/riscv/isa/decoder.isa b/src/arch/riscv/isa/decoder.isa
index c6b74ff44f..d442002bd5 100644
--- a/src/arch/riscv/isa/decoder.isa
+++ b/src/arch/riscv/isa/decoder.isa
@@ -4,6 +4,7 @@
 // Copyright (c) 2017 The University of Virginia
 // Copyright (c) 2020 Barkhausen Institut
 // Copyright (c) 2021 StreamComputing Corp
+// Copyright (c) 2022 Google LLC
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
@@ -34,6 +35,10 @@
 // The RISC-V ISA decoder
 //
 
+// In theory, all registers should be sign extended if not operating in the
+// full MXLEN register, but that will cause memory address out of range as it is
+// always regarded as uint64. So we'll zero extend PC related registers and
+// memory address, and sign extend others.
 decode QUADRANT default Unknown::unknown() {
     0x0: decode COPCODE {
         0x0: CIAddi4spnOp::c_addi4spn({{
@@ -45,7 +50,7 @@ decode QUADRANT default Unknown::unknown() {
             if (machInst == 0)
                 return std::make_shared<IllegalInstFault>("zero instruction",
                                                            machInst);
-            Rp2 = sp + imm;
+            Rp2 = rvSext(sp + imm);
         }}, uint64_t);
         format CompressedLoad {
             0x1: c_fld({{
@@ -58,7 +63,7 @@ decode QUADRANT default Unknown::unknown() {
 
                 Fp2_bits = Mem;
             }}, {{
-                EA = Rp1 + offset;
+                EA = rvZext(Rp1 + offset);
             }});
             0x2: c_lw({{
                 offset = CIMM2<1:1> << 2 |
@@ -67,15 +72,32 @@ decode QUADRANT default Unknown::unknown() {
             }}, {{
                 Rp2_sd = Mem_sw;
             }}, {{
-                EA = Rp1 + offset;
-            }});
-            0x3: c_ld({{
-                offset = CIMM3 << 3 | CIMM2 << 6;
-            }}, {{
-                Rp2_sd = Mem_sd;
-            }}, {{
-                EA = Rp1 + offset;
+                EA = rvZext(Rp1 + offset);
             }});
+            0x3: decode RVTYPE {
+                0x0: c_flw({{
+                    offset = CIMM2<1:1> << 2 |
+                             CIMM3 << 3 |
+                             CIMM2<0:0> << 6;
+                }}, {{
+                    STATUS status = xc->readMiscReg(MISCREG_STATUS);
+                    if (status.fs == FPUStatus::OFF)
+                        return std::make_shared<IllegalInstFault>("FPU is off",
+                                                                   machInst);
+
+                    freg_t fd = freg(f32(Mem_uw));
+                    Fp2_bits = fd.v;
+                }}, {{
+                    EA = (uint32_t)(Rp1_uw + offset);
+                }});
+                0x1: c_ld({{
+                    offset = CIMM3 << 3 | CIMM2 << 6;
+                }}, {{
+                    Rp2_sd = Mem_sd;
+                }}, {{
+                    EA = Rp1 + offset;
+                }});
+            }
         }
         format CompressedStore {
             0x5: c_fsd({{
@@ -88,7 +110,7 @@ decode QUADRANT default Unknown::unknown() {
 
                 Mem = Fp2_bits;
             }}, {{
-                EA = Rp1 + offset;
+                EA = rvZext(Rp1 + offset);
             }});
             0x6: c_sw({{
                 offset = CIMM2<1:1> << 2 |
@@ -97,15 +119,31 @@ decode QUADRANT default Unknown::unknown() {
             }}, {{
                 Mem_uw = Rp2_uw;
             }}, ea_code={{
-                EA = Rp1 + offset;
+                EA = rvZext(Rp1 + offset);
             }});
-            0x7: c_sd({{
-                offset = CIMM3 << 3 | CIMM2 << 6;
-            }}, {{
+            0x7: decode RVTYPE {
+                0x0: c_fsw({{
+                    offset = CIMM2<1:1> << 2 |
+                             CIMM3 << 3 |
+                             CIMM2<0:0> << 6;
+                }}, {{
+                    STATUS status = xc->readMiscReg(MISCREG_STATUS);
+                    if (status.fs == FPUStatus::OFF)
+                        return std::make_shared<IllegalInstFault>("FPU is off",
+                                                                   machInst);
+
+                    Mem_uw = unboxF32(boxF32(Fs2_bits));
+                }}, {{
+                    EA = (uint32_t)(Rp1_uw + offset);
+                }});
+                0x1: c_sd({{
+                    offset = CIMM3 << 3 | CIMM2 << 6;
+                }}, {{
                     Mem_ud = Rp2_ud;
-            }}, {{
-                EA = Rp1 + offset;
-            }});
+                }}, {{
+                    EA = Rp1 + offset;
+                }});
+            }
         }
     }
     0x1: decode COPCODE {
@@ -124,19 +162,34 @@ decode QUADRANT default Unknown::unknown() {
                                 "immediate = 0", machInst);
                     }
                 }
-                Rc1_sd = Rc1_sd + imm;
-            }});
-            0x1: c_addiw({{
-                imm = CIMM5;
-                if (CIMM1 > 0)
-                    imm |= ~((uint64_t)0x1F);
-            }}, {{
-                if (RC1 == 0) {
-                    return std::make_shared<IllegalInstFault>(
-                            "source reg x0", machInst);
-                }
-                Rc1_sw = (int32_t)(Rc1_sw + imm);
+                Rc1_sd = rvSext(Rc1_sd + imm);
             }});
+            0x1: decode RVTYPE {
+                0x0: c_jal({{
+                    imm = sext<12>((CJUMPIMM3TO1 << 1) |
+                                   (CJUMPIMM4TO4 << 4) |
+                                   (CJUMPIMM5TO5 << 5) |
+                                   (CJUMPIMM6TO6 << 6) |
+                                   (CJUMPIMM7TO7 << 7) |
+                                   (CJUMPIMM9TO8 << 8) |
+                                   (CJUMPIMM10TO10 << 10) |
+                                   (CJUMPIMMSIGN << 11));
+                }}, {{
+                    ra_sw = NPC_uw;
+                    NPC_uw = PC_uw + imm;
+                }});
+                0x1: c_addiw({{
+                    imm = CIMM5;
+                    if (CIMM1 > 0)
+                        imm |= ~((uint64_t)0x1F);
+                }}, {{
+                    if (RC1 == 0) {
+                        return std::make_shared<IllegalInstFault>(
+                                "source reg x0", machInst);
+                    }
+                    Rc1_sw = (int32_t)(Rc1_sw + imm);
+                }});
+            }
             0x2: c_li({{
                 imm = CIMM5;
                 if (CIMM1 > 0)
@@ -161,7 +214,7 @@ decode QUADRANT default Unknown::unknown() {
                         return std::make_shared<IllegalInstFault>(
                                 "immediate = 0", machInst);
                     }
-                    sp_sd = sp_sd + imm;
+                    sp_sd = rvSext(sp_sd + imm);
                 }});
                 default: c_lui({{
                     imm = CIMM5 << 12;
@@ -185,69 +238,80 @@ decode QUADRANT default Unknown::unknown() {
                 0x0: c_srli({{
                     imm = CIMM5 | (CIMM1 << 5);
                 }}, {{
+                    if (rvSelect((bool)CIMM1, false)) {
+                        return std::make_shared<IllegalInstFault>(
+                                "shmat[5] != 0", machInst);
+                    }
                     if (imm == 0) {
                         return std::make_shared<IllegalInstFault>(
                                 "immediate = 0", machInst);
                     }
-                    Rp1 = Rp1 >> imm;
+                    // The MSB can never be 1, hence no need to sign ext.
+                    Rp1 = rvZext(Rp1) >> imm;
                 }}, uint64_t);
                 0x1: c_srai({{
                     imm = CIMM5 | (CIMM1 << 5);
                 }}, {{
+                    if (rvSelect((bool)CIMM1, false)) {
+                        return std::make_shared<IllegalInstFault>(
+                                "shmat[5] != 0", machInst);
+                    }
                     if (imm == 0) {
                         return std::make_shared<IllegalInstFault>(
                                 "immediate = 0", machInst);
                     }
-                    Rp1_sd = Rp1_sd >> imm;
+                    Rp1_sd = rvSext(Rp1_sd) >> imm;
                 }}, uint64_t);
                 0x2: c_andi({{
                     imm = CIMM5;
                     if (CIMM1 > 0)
                         imm |= ~((uint64_t)0x1F);
                 }}, {{
-                    Rp1 = Rp1 & imm;
+                    Rp1 = rvSext(Rp1 & imm);
                 }}, uint64_t);
             }
             format CompressedROp {
                 0x3: decode CFUNCT1 {
                     0x0: decode CFUNCT2LOW {
                         0x0: c_sub({{
-                            Rp1 = Rp1 - Rp2;
+                            Rp1 = rvSext(Rp1 - Rp2);
                         }});
                         0x1: c_xor({{
-                            Rp1 = Rp1 ^ Rp2;
+                            Rp1 = rvSext(Rp1 ^ Rp2);
                         }});
                         0x2: c_or({{
-                            Rp1 = Rp1 | Rp2;
+                            Rp1 = rvSext(Rp1 | Rp2);
                         }});
                         0x3: c_and({{
-                            Rp1 = Rp1 & Rp2;
+                            Rp1 = rvSext(Rp1 & Rp2);
                         }});
                     }
-                    0x1: decode CFUNCT2LOW {
-                        0x0: c_subw({{
-                            Rp1_sd = (int32_t)Rp1_sd - Rp2_sw;
-                        }});
-                        0x1: c_addw({{
-                            Rp1_sd = (int32_t)Rp1_sd + Rp2_sw;
-                        }});
+                    0x1: decode RVTYPE {
+                        0x1: decode CFUNCT2LOW {
+                            0x0: c_subw({{
+                                Rp1_sd = (int32_t)Rp1_sd - Rp2_sw;
+                            }});
+                            0x1: c_addw({{
+                                Rp1_sd = (int32_t)Rp1_sd + Rp2_sw;
+                            }});
+                        }
                     }
                 }
             }
         }
         0x5: CJOp::c_j({{
-            NPC = PC + imm;
+            NPC = rvZext(PC + imm);
         }}, IsDirectControl, IsUncondControl);
         format CBOp {
             0x6: c_beqz({{
-                if (Rp1 == 0)
-                    NPC = PC + imm;
+                if (rvSext(Rp1) == 0)
+                    NPC = rvZext(PC + imm);
                 else
                     NPC = NPC;
             }}, IsDirectControl, IsCondControl);
             0x7: c_bnez({{
-                if (Rp1 != 0)
-                    NPC = PC + imm;
+                if (rvSext(Rp1) != 0)
+                    NPC = rvZext(PC + imm);
                 else
                     NPC = NPC;
             }}, IsDirectControl, IsCondControl);
@@ -257,6 +321,10 @@ decode QUADRANT default Unknown::unknown() {
         0x0: CIOp::c_slli({{
             imm = CIMM5 | (CIMM1 << 5);
         }}, {{
+            if (rvSelect((bool)CIMM1, false)) {
+                return std::make_shared<IllegalInstFault>(
+                        "shmat[5] != 0", machInst);
+            }
             if (imm == 0) {
                 return std::make_shared<IllegalInstFault>(
                         "immediate = 0", machInst);
@@ -265,7 +333,7 @@ decode QUADRANT default Unknown::unknown() {
                 return std::make_shared<IllegalInstFault>(
                         "source reg x0", machInst);
             }
-            Rc1 = Rc1 << imm;
+            Rc1 = rvSext(Rc1 << imm);
         }}, uint64_t);
         format CompressedLoad {
             0x1: c_fldsp({{
@@ -275,7 +343,7 @@ decode QUADRANT default Unknown::unknown() {
             }}, {{
                 Fc1_bits = Mem;
             }}, {{
-                EA = sp + offset;
+                EA = rvZext(sp + offset);
             }});
             0x2: c_lwsp({{
                 offset = CIMM5<4:2> << 2 |
@@ -286,23 +354,36 @@ decode QUADRANT default Unknown::unknown() {
                     return std::make_shared<IllegalInstFault>(
                             "source reg x0", machInst);
                 }
-                Rc1_sd = Mem_sw;
+                Rc1_sw = Mem_sw;
             }}, {{
-                EA = sp + offset;
-            }});
-            0x3: c_ldsp({{
-                offset = CIMM5<4:3> << 3 |
-                         CIMM1 << 5 |
-                         CIMM5<2:0> << 6;
-            }}, {{
-                if (RC1 == 0) {
-                    return std::make_shared<IllegalInstFault>(
-                            "source reg x0", machInst);
-                }
-                Rc1_sd = Mem_sd;
-            }}, {{
-                EA = sp + offset;
+                EA = rvZext(sp + offset);
             }});
+            0x3: decode RVTYPE {
+                0x0: c_flwsp({{
+                    offset = CIMM5<4:2> << 2 |
+                             CIMM1 << 5 |
+                             CIMM5<1:0> << 6;
+                }}, {{
+                    freg_t fd;
+                    fd = freg(f32(Mem_uw));
+                    Fd_bits = fd.v;
+                }}, {{
+                    EA = (uint32_t)(sp_uw + offset);
+                }});
+                0x1: c_ldsp({{
+                    offset = CIMM5<4:3> << 3 |
+                             CIMM1 << 5 |
+                             CIMM5<2:0> << 6;
+                }}, {{
+                    if (RC1 == 0) {
+                        return std::make_shared<IllegalInstFault>(
+                                "source reg x0", machInst);
+                    }
+                    Rc1_sd = Mem_sd;
+                }}, {{
+                    EA = sp + offset;
+                }});
+            }
         }
         0x4: decode CFUNCT1 {
             0x0: decode RC2 {
@@ -311,14 +392,14 @@ decode QUADRANT default Unknown::unknown() {
                         return std::make_shared<IllegalInstFault>(
                                 "source reg x0", machInst);
                     }
-                    NPC = Rc1;
+                    NPC = rvZext(Rc1);
                 }}, IsIndirectControl, IsUncondControl);
                 default: CROp::c_mv({{
                     if (RC1 == 0) {
                         return std::make_shared<IllegalInstFault>(
                                 "source reg x0", machInst);
                     }
-                    Rc1 = Rc2;
+                    Rc1 = rvSext(Rc2);
                 }});
             }
             0x1: decode RC1 {
@@ -335,11 +416,11 @@ decode QUADRANT default Unknown::unknown() {
                             return std::make_shared<IllegalInstFault>(
                                     "source reg x0", machInst);
                         }
-                        ra = NPC;
-                        NPC = Rc1;
-                    }}, IsIndirectControl, IsUncondControl, IsCall);
+                        ra = rvSext(NPC);
+                        NPC = rvZext(Rc1);
+                    }}, IsIndirectControl, IsUncondControl);
                     default: CompressedROp::c_add({{
-                        Rc1_sd = Rc1_sd + Rc2_sd;
+                        Rc1_sd = rvSext(Rc1_sd + Rc2_sd);
                     }});
                 }
             }
@@ -351,7 +432,7 @@ decode QUADRANT default Unknown::unknown() {
             }}, {{
                 Mem_ud = Fc2_bits;
             }}, {{
-                EA = sp + offset;
+                EA = rvZext(sp + offset);
             }});
             0x6: c_swsp({{
                 offset = CIMM6<5:2> << 2 |
@@ -359,16 +440,26 @@ decode QUADRANT default Unknown::unknown() {
             }}, {{
                 Mem_uw = Rc2_uw;
             }}, {{
-                EA = sp + offset;
-            }});
-            0x7: c_sdsp({{
-                offset = CIMM6<5:3> << 3 |
-                         CIMM6<2:0> << 6;
-            }}, {{
-                Mem = Rc2;
-            }}, {{
-                EA = sp + offset;
+                EA = rvZext(sp + offset);
             }});
+            0x7: decode RVTYPE {
+                0x0: c_fswsp({{
+                    offset = CIMM6<5:2> << 2 |
+                             CIMM6<1:0> << 6;
+                }}, {{
+                    Mem_uw = unboxF32(boxF32(Fs2_bits));
+                }}, {{
+                    EA = (uint32_t)(sp_uw + offset);
+                }});
+                0x1: c_sdsp({{
+                    offset = CIMM6<5:3> << 3 |
+                             CIMM6<2:0> << 6;
+                }}, {{
+                    Mem = Rc2;
+                }}, {{
+                    EA = sp + offset;
+                }});
+            }
         }
     }
     0x3: decode OPCODE {
@@ -383,18 +474,22 @@ decode QUADRANT default Unknown::unknown() {
                 0x2: lw({{
                     Rd_sd = Mem_sw;
                 }});
-                0x3: ld({{
-                    Rd_sd = Mem_sd;
-                }});
+                0x3: decode RVTYPE {
+                    0x1: ld({{
+                        Rd_sd = Mem_sd;
+                    }});
+                }
                 0x4: lbu({{
                     Rd = Mem_ub;
                 }});
                 0x5: lhu({{
                     Rd = Mem_uh;
                 }});
-                0x6: lwu({{
-                    Rd = Mem_uw;
-                }});
+                0x6: decode RVTYPE {
+                    0x1: lwu({{
+                        Rd = Mem_uw;
+                    }});
+                }
             }
         }
 
@@ -443,7 +538,11 @@ decode QUADRANT default Unknown::unknown() {
             0x1: decode FS3 {
                 format IOp {
                     0x00: slli({{
-                        Rd = Rs1 << imm;
+                        if (rvSelect((bool)SHAMT6BIT5, false)) {
+                            return std::make_shared<IllegalInstFault>(
+                                    "shmat[5] != 0", machInst);
+                        }
+                        Rd = rvSext(Rs1 << imm);
                     }}, imm_type = uint64_t, imm_code = {{ imm = SHAMT6; }});
                     0x02: decode FS2 {
                         0x0: sha256sum0({{
@@ -521,20 +620,24 @@ decode QUADRANT default Unknown::unknown() {
 
             format IOp {
                 0x0: addi({{
-                    Rd_sd = Rs1_sd + imm;
+                    Rd_sd = rvSext(Rs1_sd + imm);
                 }});
                 0x2: slti({{
-                    Rd = (Rs1_sd < imm) ? 1 : 0;
+                    Rd = (rvSext(Rs1_sd) < imm) ? 1 : 0;
                 }});
                 0x3: sltiu({{
-                    Rd = (Rs1 < imm) ? 1 : 0;
-                }}, uint64_t);
+                    Rd = (rvZext(Rs1) < imm) ? 1 : 0;
+                }}, uint64_t, imm_code = {{ imm = rvZext(sext<12>(IMM12)); }});
                 0x4: xori({{
-                    Rd = Rs1 ^ imm;
+                    Rd = rvSext(Rs1 ^ imm);
                 }}, uint64_t);
                 0x5: decode FS3 {
                     0x0: srli({{
-                        Rd = Rs1 >> imm;
+                        if (rvSelect((bool)SHAMT6BIT5, false)) {
+                            return std::make_shared<IllegalInstFault>(
+                                    "shmat[5] != 0", machInst);
+                        }
+                        Rd = rvSext(rvZext(Rs1) >> imm);
                     }}, imm_type = uint64_t, imm_code = {{ imm = SHAMT6; }});
                     0x5: orc_b({{
                         uint64_t result = 0;
@@ -549,7 +652,11 @@ decode QUADRANT default Unknown::unknown() {
                         Rd = result;
                     }}, imm_type = uint64_t, imm_code = {{ imm = SHAMT6; }});
                     0x8: srai({{
-                        Rd_sd = Rs1_sd >> imm;
+                        if (rvSelect((bool)SHAMT6BIT5, false)) {
+                            return std::make_shared<IllegalInstFault>(
+                                    "shmat[5] != 0", machInst);
+                        }
+                        Rd_sd = rvSext(Rs1_sd) >> imm;
                     }}, imm_type = uint64_t, imm_code = {{ imm = SHAMT6; }});
                     0x9: bexti({{
                         uint64_t index = imm & (64 - 1);
@@ -579,27 +686,31 @@ decode QUADRANT default Unknown::unknown() {
                     }
                 }
                 0x6: ori({{
-                    Rd = Rs1 | imm;
+                    Rd = rvSext(Rs1 | imm);
                 }}, uint64_t);
                 0x7: andi({{
-                    Rd = Rs1 & imm;
+                    Rd = rvSext(Rs1 & imm);
                 }}, uint64_t);
             }
         }
 
         0x05: UOp::auipc({{
-            Rd = PC + (sext<20>(imm) << 12);
+            Rd = rvSext(PC + (sext<20>(imm) << 12));
         }});
 
         0x06: decode FUNCT3 {
             format IOp {
-                0x0: addiw({{
-                    Rd_sw = (int32_t)(Rs1_sw + imm);
-                }}, int32_t);
+                0x0: decode RVTYPE {
+                    0x1: addiw({{
+                        Rd_sw = (int32_t)(Rs1_sw + imm);
+                    }}, int32_t);
+                }
                 0x1: decode FS3 {
-                    0x0: slliw({{
-                        Rd_sd = Rs1_sw << imm;
-                    }}, imm_type = uint64_t, imm_code = {{ imm = SHAMT5; }});
+                    0x0: decode RVTYPE {
+                        0x1: slliw({{
+                            Rd_sd = Rs1_sw << imm;
+                        }}, imm_type = uint64_t, imm_code = {{ imm = SHAMT5; }});
+                    }
                     0x1: slli_uw({{
                         Rd = ((uint64_t)(Rs1_uw)) << imm;
                     }}, imm_type = uint64_t, imm_code = {{ imm = SHAMT5; }});
@@ -616,12 +727,16 @@ decode QUADRANT default Unknown::unknown() {
                     }
                 }
                 0x5: decode FS3 {
-                    0x0: srliw({{
-                        Rd_sd = (int32_t)(Rs1_uw >> imm);
-                    }}, imm_type = uint64_t, imm_code = {{ imm = SHAMT5; }});
-                    0x8: sraiw({{
-                        Rd_sd = Rs1_sw >> imm;
-                    }}, imm_type = uint64_t, imm_code = {{ imm = SHAMT5; }});
+                    0x0: decode RVTYPE {
+                        0x1: srliw({{
+                            Rd_sd = (int32_t)(Rs1_uw >> imm);
+                        }}, imm_type = uint64_t, imm_code = {{ imm = SHAMT5; }});
+                    }
+                    0x8: decode RVTYPE {
+                        0x1: sraiw({{
+                            Rd_sd = Rs1_sw >> imm;
+                        }}, imm_type = uint64_t, imm_code = {{ imm = SHAMT5; }});
+                    }
                     0xc: roriw({{
                         Rd = (int32_t) ((Rs1_uw >> imm) | (Rs1_uw << ((32 - imm) & (32 - 1))));
                     }}, imm_type = uint64_t, imm_code = {{ imm = SHAMT5; }});
@@ -640,9 +755,11 @@ decode QUADRANT default Unknown::unknown() {
                 0x2: sw({{
                     Mem_uw = Rs2_uw;
                 }});
-                0x3: sd({{
-                    Mem_ud = Rs2_ud;
-                }});
+                0x3: decode RVTYPE {
+                    0x1: sd({{
+                        Mem_ud = Rs2_ud;
+                    }});
+                }
             }
         }
 
@@ -654,7 +771,7 @@ decode QUADRANT default Unknown::unknown() {
                         return std::make_shared<IllegalInstFault>(
                                 "FPU is off", machInst);
 
-                    Mem_uh = (uint16_t)Fs2_bits;
+                    Mem_uh = unboxF16(boxF16(Fs2_bits));
                 }}, inst_flags=FloatMemWriteOp);
                 0x2: fsw({{
                     STATUS status = xc->readMiscReg(MISCREG_STATUS);
@@ -662,7 +779,7 @@ decode QUADRANT default Unknown::unknown() {
                         return std::make_shared<IllegalInstFault>(
                                 "FPU is off", machInst);
 
-                    Mem_uw = (uint32_t)Fs2_bits;
+                    Mem_uw = unboxF32(boxF32(Fs2_bits));
                 }}, inst_flags=FloatMemWriteOp);
                 0x3: fsd({{
                     STATUS status = xc->readMiscReg(MISCREG_STATUS);
@@ -683,7 +800,7 @@ decode QUADRANT default Unknown::unknown() {
                 0x3: StoreCond::sc_w({{
                     Mem_uw = Rs2_uw;
                 }}, {{
-                    Rd = result;
+                    Rd = rvSext(result);
                 }}, inst_flags=IsStoreConditional, mem_flags=LLSC);
                 0x0: AtomicMemOp::amoadd_w({{
                     Rd_sd = Mem_sw;
@@ -749,78 +866,84 @@ decode QUADRANT default Unknown::unknown() {
                         [](uint32_t* b, uint32_t a){ if (a > *b) *b = a; });
                 }}, mem_flags=ATOMIC_RETURN_OP);
             }
-            0x3: decode AMOFUNCT {
-                0x2: LoadReserved::lr_d({{
-                    Rd_sd = Mem_sd;
-                }}, mem_flags=LLSC);
-                0x3: StoreCond::sc_d({{
-                    Mem = Rs2;
-                }}, {{
-                    Rd = result;
-                }}, mem_flags=LLSC, inst_flags=IsStoreConditional);
-                0x0: AtomicMemOp::amoadd_d({{
-                    Rd_sd = Mem_sd;
-                }}, {{
-                    TypedAtomicOpFunctor<int64_t> *amo_op =
+            0x3: decode RVTYPE {
+                0x1: decode AMOFUNCT {
+                    0x2: LoadReserved::lr_d({{
+                        Rd_sd = Mem_sd;
+                    }}, mem_flags=LLSC);
+                    0x3: StoreCond::sc_d({{
+                        Mem = Rs2;
+                    }}, {{
+                        Rd = result;
+                    }}, mem_flags=LLSC, inst_flags=IsStoreConditional);
+                    0x0: AtomicMemOp::amoadd_d({{
+                        Rd_sd = Mem_sd;
+                    }}, {{
+                        TypedAtomicOpFunctor<int64_t> *amo_op =
+                              new AtomicGenericOp<int64_t>(Rs2_sd,
+                                      [](int64_t* b, int64_t a){ *b += a; });
+                    }}, mem_flags=ATOMIC_RETURN_OP);
+                    0x1: AtomicMemOp::amoswap_d({{
+                        Rd_sd = Mem_sd;
+                    }}, {{
+                        TypedAtomicOpFunctor<uint64_t> *amo_op =
+                              new AtomicGenericOp<uint64_t>(Rs2_ud,
+                                      [](uint64_t* b, uint64_t a){ *b = a; });
+                    }}, mem_flags=ATOMIC_RETURN_OP);
+                    0x4: AtomicMemOp::amoxor_d({{
+                        Rd_sd = Mem_sd;
+                    }}, {{
+                        TypedAtomicOpFunctor<uint64_t> *amo_op =
+                              new AtomicGenericOp<uint64_t>(Rs2_ud,
+                                     [](uint64_t* b, uint64_t a){ *b ^= a; });
+                    }}, mem_flags=ATOMIC_RETURN_OP);
+                    0x8: AtomicMemOp::amoor_d({{
+                        Rd_sd = Mem_sd;
+                    }}, {{
+                        TypedAtomicOpFunctor<uint64_t> *amo_op =
+                              new AtomicGenericOp<uint64_t>(Rs2_ud,
+                                     [](uint64_t* b, uint64_t a){ *b |= a; });
+                    }}, mem_flags=ATOMIC_RETURN_OP);
+                    0xc: AtomicMemOp::amoand_d({{
+                        Rd_sd = Mem_sd;
+                    }}, {{
+                        TypedAtomicOpFunctor<uint64_t> *amo_op =
+                              new AtomicGenericOp<uint64_t>(Rs2_ud,
+                                     [](uint64_t* b, uint64_t a){ *b &= a; });
+                    }}, mem_flags=ATOMIC_RETURN_OP);
+                    0x10: AtomicMemOp::amomin_d({{
+                        Rd_sd = Mem_sd;
+                    }}, {{
+                        TypedAtomicOpFunctor<int64_t> *amo_op =
                           new AtomicGenericOp<int64_t>(Rs2_sd,
-                                  [](int64_t* b, int64_t a){ *b += a; });
-                }}, mem_flags=ATOMIC_RETURN_OP);
-                0x1: AtomicMemOp::amoswap_d({{
-                    Rd_sd = Mem_sd;
-                }}, {{
-                    TypedAtomicOpFunctor<uint64_t> *amo_op =
+                            [](int64_t* b, int64_t a){ if (a < *b) *b = a; });
+                    }}, mem_flags=ATOMIC_RETURN_OP);
+                    0x14: AtomicMemOp::amomax_d({{
+                        Rd_sd = Mem_sd;
+                    }}, {{
+                        TypedAtomicOpFunctor<int64_t> *amo_op =
+                          new AtomicGenericOp<int64_t>(Rs2_sd,
+                            [](int64_t* b, int64_t a){ if (a > *b) *b = a; });
+                    }}, mem_flags=ATOMIC_RETURN_OP);
+                    0x18: AtomicMemOp::amominu_d({{
+                        Rd_sd = Mem_sd;
+                    }}, {{
+                        TypedAtomicOpFunctor<uint64_t> *amo_op =
                           new AtomicGenericOp<uint64_t>(Rs2_ud,
-                                  [](uint64_t* b, uint64_t a){ *b = a; });
-                }}, mem_flags=ATOMIC_RETURN_OP);
-                0x4: AtomicMemOp::amoxor_d({{
-                    Rd_sd = Mem_sd;
-                }}, {{
-                    TypedAtomicOpFunctor<uint64_t> *amo_op =
+                            [](uint64_t* b, uint64_t a){
+                              if (a < *b) *b = a;
+                            });
+                    }}, mem_flags=ATOMIC_RETURN_OP);
+                    0x1c: AtomicMemOp::amomaxu_d({{
+                        Rd_sd = Mem_sd;
+                    }}, {{
+                        TypedAtomicOpFunctor<uint64_t> *amo_op =
                           new AtomicGenericOp<uint64_t>(Rs2_ud,
-                                 [](uint64_t* b, uint64_t a){ *b ^= a; });
-                }}, mem_flags=ATOMIC_RETURN_OP);
-                0x8: AtomicMemOp::amoor_d({{
-                    Rd_sd = Mem_sd;
-                }}, {{
-                    TypedAtomicOpFunctor<uint64_t> *amo_op =
-                          new AtomicGenericOp<uint64_t>(Rs2_ud,
-                                 [](uint64_t* b, uint64_t a){ *b |= a; });
-                }}, mem_flags=ATOMIC_RETURN_OP);
-                0xc: AtomicMemOp::amoand_d({{
-                    Rd_sd = Mem_sd;
-                }}, {{
-                    TypedAtomicOpFunctor<uint64_t> *amo_op =
-                          new AtomicGenericOp<uint64_t>(Rs2_ud,
-                                 [](uint64_t* b, uint64_t a){ *b &= a; });
-                }}, mem_flags=ATOMIC_RETURN_OP);
-                0x10: AtomicMemOp::amomin_d({{
-                    Rd_sd = Mem_sd;
-                }}, {{
-                    TypedAtomicOpFunctor<int64_t> *amo_op =
-                      new AtomicGenericOp<int64_t>(Rs2_sd,
-                        [](int64_t* b, int64_t a){ if (a < *b) *b = a; });
-                }}, mem_flags=ATOMIC_RETURN_OP);
-                0x14: AtomicMemOp::amomax_d({{
-                    Rd_sd = Mem_sd;
-                }}, {{
-                    TypedAtomicOpFunctor<int64_t> *amo_op =
-                      new AtomicGenericOp<int64_t>(Rs2_sd,
-                        [](int64_t* b, int64_t a){ if (a > *b) *b = a; });
-                }}, mem_flags=ATOMIC_RETURN_OP);
-                0x18: AtomicMemOp::amominu_d({{
-                    Rd_sd = Mem_sd;
-                }}, {{
-                    TypedAtomicOpFunctor<uint64_t> *amo_op =
-                      new AtomicGenericOp<uint64_t>(Rs2_ud,
-                        [](uint64_t* b, uint64_t a){ if (a < *b) *b = a; });
-                }}, mem_flags=ATOMIC_RETURN_OP);
-                0x1c: AtomicMemOp::amomaxu_d({{
-                    Rd_sd = Mem_sd;
-                }}, {{
-                    TypedAtomicOpFunctor<uint64_t> *amo_op =
-                      new AtomicGenericOp<uint64_t>(Rs2_ud,
-                        [](uint64_t* b, uint64_t a){ if (a > *b) *b = a; });
-                }}, mem_flags=ATOMIC_RETURN_OP);
+                            [](uint64_t* b, uint64_t a){
+                              if (a > *b) *b = a;
+                            });
+                    }}, mem_flags=ATOMIC_RETURN_OP);
+                }
             }
         }
         0x0c: decode FUNCT3 {
@@ -828,15 +951,15 @@ decode QUADRANT default Unknown::unknown() {
                 0x0: decode KFUNCT5 {
                     0x00: decode BS {
                         0x0: add({{
-                            Rd = Rs1_sd + Rs2_sd;
+                            Rd = rvSext(Rs1_sd + Rs2_sd);
                         }});
                         0x1: sub({{
-                            Rd = Rs1_sd - Rs2_sd;
+                            Rd = rvSext(Rs1_sd - Rs2_sd);
                         }});
                     }
                     0x01: decode BS {
                         0x0: mul({{
-                            Rd = Rs1_sd * Rs2_sd;
+                            Rd = rvSext(Rs1_sd * Rs2_sd);
                         }}, IntMultOp);
                     }
                     0x18: sm4ed({{
@@ -871,30 +994,36 @@ decode QUADRANT default Unknown::unknown() {
                 }
                 0x1: decode FUNCT7 {
                     0x0: sll({{
-                        Rd = Rs1 << Rs2<5:0>;
+                        Rd = rvSext(Rs1 << rvSelect(Rs2<4:0>, Rs2<5:0>));
                     }});
-                    0x1: mulh({{
-                        bool negate = (Rs1_sd < 0) != (Rs2_sd < 0);
+                    0x1: decode RVTYPE {
+                        0x0: rv32_mulh({{
+                            Rd_sw = ((int64_t)Rs1_sw * Rs2_sw) >> 32;
+                        }}, IntMultOp);
+                        0x1: mulh({{
+                            bool negate = (Rs1_sd < 0) != (Rs2_sd < 0);
 
-                        uint64_t Rs1_lo = (uint32_t)std::abs(Rs1_sd);
-                        uint64_t Rs1_hi = (uint64_t)std::abs(Rs1_sd) >> 32;
-                        uint64_t Rs2_lo = (uint32_t)std::abs(Rs2_sd);
-                        uint64_t Rs2_hi = (uint64_t)std::abs(Rs2_sd) >> 32;
+                            uint64_t Rs1_lo = (uint32_t)std::abs(Rs1_sd);
+                            uint64_t Rs1_hi = (uint64_t)std::abs(Rs1_sd) >> 32;
+                            uint64_t Rs2_lo = (uint32_t)std::abs(Rs2_sd);
+                            uint64_t Rs2_hi = (uint64_t)std::abs(Rs2_sd) >> 32;
 
-                        uint64_t hi = Rs1_hi*Rs2_hi;
-                        uint64_t mid1 = Rs1_hi*Rs2_lo;
-                        uint64_t mid2 = Rs1_lo*Rs2_hi;
-                        uint64_t lo = Rs2_lo*Rs1_lo;
-                        uint64_t carry = ((uint64_t)(uint32_t)mid1
-                                + (uint64_t)(uint32_t)mid2 + (lo >> 32)) >> 32;
+                            uint64_t hi = Rs1_hi*Rs2_hi;
+                            uint64_t mid1 = Rs1_hi*Rs2_lo;
+                            uint64_t mid2 = Rs1_lo*Rs2_hi;
+                            uint64_t lo = Rs2_lo*Rs1_lo;
+                            uint64_t carry = ((uint64_t)(uint32_t)mid1
+                                    + (uint64_t)(uint32_t)mid2
+                                    + (lo >> 32)) >> 32;
 
-                        uint64_t res = hi +
-                                       (mid1 >> 32) +
-                                       (mid2 >> 32) +
-                                       carry;
-                        Rd = negate ? ~res + (Rs1_sd*Rs2_sd == 0 ? 1 : 0)
-                                    : res;
-                    }}, IntMultOp);
+                            uint64_t res = hi +
+                                          (mid1 >> 32) +
+                                          (mid2 >> 32) +
+                                          carry;
+                            Rd = negate ? ~res + (Rs1_sd*Rs2_sd == 0 ? 1 : 0)
+                                        : res;
+                        }}, IntMultOp);
+                    }
                     0x5: clmul({{
                         uint64_t result = 0;
                         for (int i = 0; i < 64; i++) {
@@ -923,28 +1052,34 @@ decode QUADRANT default Unknown::unknown() {
                 }
                 0x2: decode FUNCT7 {
                     0x0: slt({{
-                        Rd = (Rs1_sd < Rs2_sd) ? 1 : 0;
+                        Rd = (rvSext(Rs1_sd) < rvSext(Rs2_sd)) ? 1 : 0;
                     }});
-                    0x1: mulhsu({{
-                        bool negate = Rs1_sd < 0;
-                        uint64_t Rs1_lo = (uint32_t)std::abs(Rs1_sd);
-                        uint64_t Rs1_hi = (uint64_t)std::abs(Rs1_sd) >> 32;
-                        uint64_t Rs2_lo = (uint32_t)Rs2;
-                        uint64_t Rs2_hi = Rs2 >> 32;
+                    0x1: decode RVTYPE {
+                        0x0: rv32_mulhsu({{
+                            Rd_sw = ((int64_t)Rs1_sw * Rs2_uw) >> 32;
+                        }}, IntMultOp);
+                        0x1: mulhsu({{
+                            bool negate = Rs1_sd < 0;
+                            uint64_t Rs1_lo = (uint32_t)std::abs(Rs1_sd);
+                            uint64_t Rs1_hi = (uint64_t)std::abs(Rs1_sd) >> 32;
+                            uint64_t Rs2_lo = (uint32_t)Rs2;
+                            uint64_t Rs2_hi = Rs2 >> 32;
 
-                        uint64_t hi = Rs1_hi*Rs2_hi;
-                        uint64_t mid1 = Rs1_hi*Rs2_lo;
-                        uint64_t mid2 = Rs1_lo*Rs2_hi;
-                        uint64_t lo = Rs1_lo*Rs2_lo;
-                        uint64_t carry = ((uint64_t)(uint32_t)mid1
-                                + (uint64_t)(uint32_t)mid2 + (lo >> 32)) >> 32;
+                            uint64_t hi = Rs1_hi*Rs2_hi;
+                            uint64_t mid1 = Rs1_hi*Rs2_lo;
+                            uint64_t mid2 = Rs1_lo*Rs2_hi;
+                            uint64_t lo = Rs1_lo*Rs2_lo;
+                            uint64_t carry = ((uint64_t)(uint32_t)mid1
+                                    + (uint64_t)(uint32_t)mid2
+                                    + (lo >> 32)) >> 32;
 
-                        uint64_t res = hi +
-                                       (mid1 >> 32) +
-                                       (mid2 >> 32) +
-                                       carry;
-                        Rd = negate ? ~res + (Rs1_sd*Rs2 == 0 ? 1 : 0) : res;
-                    }}, IntMultOp);
+                            uint64_t res = hi +
+                                          (mid1 >> 32) +
+                                          (mid2 >> 32) +
+                                          carry;
+                            Rd = negate ? ~res + (Rs1_sd*Rs2 == 0 ? 1 : 0) : res;
+                        }}, IntMultOp);
+                    }
                     0x5: clmulr({{
                         uint64_t result = 0;
                         for (int i = 0; i < 64; i++) {
@@ -963,23 +1098,29 @@ decode QUADRANT default Unknown::unknown() {
                 }
                 0x3: decode FUNCT7 {
                     0x0: sltu({{
-                        Rd = (Rs1 < Rs2) ? 1 : 0;
+                        Rd = (rvZext(Rs1) < rvZext(Rs2)) ? 1 : 0;
                     }});
-                    0x1: mulhu({{
-                        uint64_t Rs1_lo = (uint32_t)Rs1;
-                        uint64_t Rs1_hi = Rs1 >> 32;
-                        uint64_t Rs2_lo = (uint32_t)Rs2;
-                        uint64_t Rs2_hi = Rs2 >> 32;
+                    0x1: decode RVTYPE {
+                        0x0: rv32_mulhu({{
+                            Rd_sw = ((uint64_t)Rs1_uw * Rs2_uw) >> 32;
+                        }}, IntMultOp);
+                        0x1: mulhu({{
+                            uint64_t Rs1_lo = (uint32_t)Rs1;
+                            uint64_t Rs1_hi = Rs1 >> 32;
+                            uint64_t Rs2_lo = (uint32_t)Rs2;
+                            uint64_t Rs2_hi = Rs2 >> 32;
 
-                        uint64_t hi = Rs1_hi*Rs2_hi;
-                        uint64_t mid1 = Rs1_hi*Rs2_lo;
-                        uint64_t mid2 = Rs1_lo*Rs2_hi;
-                        uint64_t lo = Rs1_lo*Rs2_lo;
-                        uint64_t carry = ((uint64_t)(uint32_t)mid1
-                                + (uint64_t)(uint32_t)mid2 + (lo >> 32)) >> 32;
+                            uint64_t hi = Rs1_hi*Rs2_hi;
+                            uint64_t mid1 = Rs1_hi*Rs2_lo;
+                            uint64_t mid2 = Rs1_lo*Rs2_hi;
+                            uint64_t lo = Rs1_lo*Rs2_lo;
+                            uint64_t carry = ((uint64_t)(uint32_t)mid1
+                                    + (uint64_t)(uint32_t)mid2
+                                    + (lo >> 32)) >> 32;
 
-                        Rd = hi + (mid1 >> 32) + (mid2 >> 32) + carry;
-                    }}, IntMultOp);
+                            Rd = hi + (mid1 >> 32) + (mid2 >> 32) + carry;
+                        }}, IntMultOp);
+                    }
                     0x5: clmulh({{
                         uint64_t result = 0;
                         for (int i = 1; i < 64; i++) {
@@ -992,19 +1133,32 @@ decode QUADRANT default Unknown::unknown() {
                 }
                 0x4: decode FUNCT7 {
                     0x0: xor({{
-                        Rd = Rs1 ^ Rs2;
+                        Rd = rvSext(Rs1 ^ Rs2);
                     }});
-                    0x1: div({{
-                        if (Rs2_sd == 0) {
-                            Rd_sd = -1;
-                        } else if (
-                                Rs1_sd == std::numeric_limits<int64_t>::min()
-                                && Rs2_sd == -1) {
-                            Rd_sd = std::numeric_limits<int64_t>::min();
-                        } else {
-                            Rd_sd = Rs1_sd/Rs2_sd;
-                        }
-                    }}, IntDivOp);
+                    0x1: decode RVTYPE {
+                        0x0: rv32_div({{
+                            constexpr int32_t kRsMin = \
+                                std::numeric_limits<int32_t>::min();
+                            if (Rs2_sw == 0) {
+                                Rd_sw = -1;
+                            } else if (Rs1_sw == kRsMin && Rs2_sw == -1) {
+                                Rd_sw = kRsMin;
+                            } else {
+                                Rd_sw = Rs1_sw/Rs2_sw;
+                            }
+                        }}, IntDivOp);
+                        0x1: div({{
+                            constexpr int64_t kRsMin = \
+                                std::numeric_limits<int64_t>::min();
+                            if (Rs2_sd == 0) {
+                                Rd_sd = -1;
+                            } else if (Rs1_sd == kRsMin && Rs2_sd == -1) {
+                                Rd_sd = kRsMin;
+                            } else {
+                                Rd_sd = Rs1_sd/Rs2_sd;
+                            }
+                        }}, IntDivOp);
+                    }
                     0x5: min({{
                         Rd = (((int64_t) Rs1) < ((int64_t) Rs2)) ? Rs1 : Rs2;
                     }});
@@ -1020,17 +1174,27 @@ decode QUADRANT default Unknown::unknown() {
                 }
                 0x5: decode FUNCT7 {
                     0x0: srl({{
-                        Rd = Rs1 >> Rs2<5:0>;
+                        Rd = rvSext(rvZext(Rs1) >>
+                                    rvSelect(Rs2<4:0>, Rs2<5:0>));
                     }});
-                    0x1: divu({{
-                        if (Rs2 == 0) {
-                            Rd = std::numeric_limits<uint64_t>::max();
-                        } else {
-                            Rd = Rs1/Rs2;
-                        }
-                    }}, IntDivOp);
+                    0x1: decode RVTYPE {
+                        0x0: rv32_divu({{
+                            if (Rs2_uw == 0) {
+                                Rd_sw = std::numeric_limits<uint32_t>::max();
+                            } else {
+                                Rd_sw = Rs1_uw/Rs2_uw;
+                            }
+                        }}, IntDivOp);
+                        0x1: divu({{
+                            if (Rs2 == 0) {
+                                Rd = std::numeric_limits<uint64_t>::max();
+                            } else {
+                                Rd = Rs1/Rs2;
+                            }
+                        }}, IntDivOp);
+                    }
                     0x20: sra({{
-                        Rd_sd = Rs1_sd >> Rs2<5:0>;
+                        Rd = rvSext(Rs1_sd) >> rvSelect(Rs2<4:0>, Rs2<5:0>);
                     }});
                     0x5: minu({{
                         Rd = Rs1 < Rs2 ? Rs1 : Rs2;
@@ -1046,19 +1210,32 @@ decode QUADRANT default Unknown::unknown() {
                 }
                 0x6: decode FUNCT7 {
                     0x0: or({{
-                        Rd = Rs1 | Rs2;
+                        Rd = rvSext(Rs1 | Rs2);
                     }});
-                    0x1: rem({{
-                        if (Rs2_sd == 0) {
-                            Rd = Rs1_sd;
-                        } else if (
-                                Rs1_sd == std::numeric_limits<int64_t>::min()
-                                && Rs2_sd == -1) {
-                            Rd = 0;
-                        } else {
-                            Rd = Rs1_sd%Rs2_sd;
-                        }
-                    }}, IntDivOp);
+                    0x1: decode RVTYPE {
+                        0x0: rv32_rem({{
+                            constexpr int32_t kRsMin = \
+                                std::numeric_limits<int32_t>::min();
+                            if (Rs2_sw == 0) {
+                                Rd_sw = Rs1_sw;
+                            } else if (Rs1_sw == kRsMin && Rs2_sw == -1) {
+                                Rd_sw = 0;
+                            } else {
+                                Rd_sw = Rs1_sw%Rs2_sw;
+                            }
+                        }}, IntDivOp);
+                        0x1: rem({{
+                            constexpr int64_t kRsMin = \
+                                std::numeric_limits<int64_t>::min();
+                            if (Rs2_sd == 0) {
+                                Rd = Rs1_sd;
+                            } else if (Rs1_sd == kRsMin && Rs2_sd == -1) {
+                                Rd = 0;
+                            } else {
+                                Rd = Rs1_sd%Rs2_sd;
+                            }
+                        }}, IntDivOp);
+                    }
                     0x5: max({{
                         Rd = (((int64_t) Rs1) > ((int64_t) Rs2)) ? Rs1 : Rs2;
                     }});
@@ -1071,15 +1248,24 @@ decode QUADRANT default Unknown::unknown() {
                 }
                 0x7: decode FUNCT7 {
                     0x0: and({{
-                        Rd = Rs1 & Rs2;
+                        Rd = rvSext(Rs1 & Rs2);
                     }});
-                    0x1: remu({{
-                        if (Rs2 == 0) {
-                            Rd = Rs1;
-                        } else {
-                            Rd = Rs1%Rs2;
-                        }
-                    }}, IntDivOp);
+                    0x1: decode RVTYPE {
+                        0x0: rv32_remu({{
+                            if (Rs2_uw == 0) {
+                                Rd_sw = Rs1_uw;
+                            } else {
+                                Rd_sw = Rs1_uw%Rs2_uw;
+                            }
+                        }}, IntDivOp);
+                        0x1: remu({{
+                            if (Rs2 == 0) {
+                                Rd = Rs1;
+                            } else {
+                                Rd = Rs1%Rs2;
+                            }
+                        }}, IntDivOp);
+                    }
                     0x5: maxu({{
                         Rd = Rs1 > Rs2 ? Rs1 : Rs2;
                     }});
@@ -1091,29 +1277,37 @@ decode QUADRANT default Unknown::unknown() {
         }
 
         0x0d: UOp::lui({{
-            Rd = (uint64_t)(sext<20>(imm) << 12);
+            Rd = (sext<20>(imm) << 12);
         }});
 
         0x0e: decode FUNCT3 {
             format ROp {
                 0x0: decode FUNCT7 {
-                    0x0: addw({{
-                        Rd_sd = Rs1_sw + Rs2_sw;
-                    }});
-                    0x1: mulw({{
-                        Rd_sd = (int32_t)(Rs1_sw*Rs2_sw);
-                    }}, IntMultOp);
+                    0x0: decode RVTYPE {
+                        0x1: addw({{
+                            Rd_sd = Rs1_sw + Rs2_sw;
+                        }});
+                    }
+                    0x1: decode RVTYPE {
+                        0x1: mulw({{
+                            Rd_sd = (int32_t)(Rs1_sw*Rs2_sw);
+                        }}, IntMultOp);
+                    }
                     0x4: add_uw({{
                         Rd = Rs1_uw + Rs2;
                     }});
-                    0x20: subw({{
-                        Rd_sd = Rs1_sw - Rs2_sw;
-                    }});
+                    0x20: decode RVTYPE {
+                        0x1: subw({{
+                            Rd_sd = Rs1_sw - Rs2_sw;
+                        }});
+                    }
                 }
                 0x1: decode FUNCT7 {
-                    0x0: sllw({{
-                        Rd_sd = Rs1_sw << Rs2<4:0>;
-                    }});
+                    0x0: decode RVTYPE {
+                        0x1: sllw({{
+                            Rd_sd = Rs1_sw << Rs2<4:0>;
+                        }});
+                    }
                     0x30: rolw({{
                         int shamt = Rs2 & (32 - 1);
                         Rd = (int32_t) ((Rs1_uw << shamt) | (Rs1_uw >> ((32 - shamt) & (32 - 1))));
@@ -1125,16 +1319,19 @@ decode QUADRANT default Unknown::unknown() {
                     }});
                 }
                 0x4: decode FUNCT7 {
-                    0x1: divw({{
-                        if (Rs2_sw == 0) {
-                            Rd_sd = -1;
-                        } else if (Rs1_sw == std::numeric_limits<int32_t>::min()
-                                && Rs2_sw == -1) {
-                            Rd_sd = std::numeric_limits<int32_t>::min();
-                        } else {
-                            Rd_sd = Rs1_sw/Rs2_sw;
-                        }
-                    }}, IntDivOp);
+                    0x1: decode RVTYPE {
+                        0x1: divw({{
+                            constexpr int32_t kRsMin = \
+                                std::numeric_limits<int32_t>::min();
+                            if (Rs2_sw == 0) {
+                                Rd_sd = -1;
+                            } else if (Rs1_sw == kRsMin && Rs2_sw == -1) {
+                                Rd_sd = kRsMin;
+                            } else {
+                                Rd_sd = Rs1_sw/Rs2_sw;
+                            }
+                        }}, IntDivOp);
+                    }
                     0x4: zext_h({{
                         Rd = Rs1_uh;
                     }});
@@ -1143,46 +1340,57 @@ decode QUADRANT default Unknown::unknown() {
                     }});
                 }
                 0x5: decode FUNCT7 {
-                    0x0: srlw({{
-                        Rd_sd = (int32_t)(Rs1_uw >> Rs2<4:0>);
-                    }});
-                    0x1: divuw({{
-                        if (Rs2_uw == 0) {
-                            Rd_sd = std::numeric_limits<uint64_t>::max();
-                        } else {
-                            Rd_sd = (int32_t)(Rs1_uw/Rs2_uw);
-                        }
-                    }}, IntDivOp);
-                    0x20: sraw({{
-                        Rd_sd = Rs1_sw >> Rs2<4:0>;
-                    }});
+                    0x0: decode RVTYPE {
+                        0x1: srlw({{
+                            Rd_sd = (int32_t)(Rs1_uw >> Rs2<4:0>);
+                        }});
+                    }
+                    0x1: decode RVTYPE {
+                        0x1: divuw({{
+                            if (Rs2_uw == 0) {
+                                Rd_sd = std::numeric_limits<uint64_t>::max();
+                            } else {
+                                Rd_sd = (int32_t)(Rs1_uw/Rs2_uw);
+                            }
+                        }}, IntDivOp);
+                    }
+                    0x20: decode RVTYPE {
+                        0x1: sraw({{
+                            Rd_sd = Rs1_sw >> Rs2<4:0>;
+                        }});
+                    }
                     0x30: rorw({{
                         int shamt = Rs2 & (32 - 1);
                         Rd = (int32_t) ((Rs1_uw >> shamt) | (Rs1_uw << ((32 - shamt) & (32 - 1))));
                     }});
                 }
                 0x6:  decode FUNCT7 {
-                    0x1: remw({{
-                        if (Rs2_sw == 0) {
-                            Rd_sd = Rs1_sw;
-                        } else if (Rs1_sw == std::numeric_limits<int32_t>::min()
-                                && Rs2_sw == -1) {
-                            Rd_sd = 0;
-                        } else {
-                            Rd_sd = Rs1_sw%Rs2_sw;
-                        }
-                    }}, IntDivOp);
+                    0x1: decode RVTYPE {
+                        0x1: remw({{
+                            constexpr int32_t kRsMin = \
+                                std::numeric_limits<int32_t>::min();
+                            if (Rs2_sw == 0) {
+                                Rd_sd = Rs1_sw;
+                            } else if (Rs1_sw == kRsMin && Rs2_sw == -1) {
+                                Rd_sd = 0;
+                            } else {
+                                Rd_sd = Rs1_sw%Rs2_sw;
+                            }
+                        }}, IntDivOp);
+                    }
                     0x10: sh3add_uw({{
                         Rd = (((uint64_t)Rs1_uw) << 3) + Rs2;
                     }});
                 }
-                0x7: remuw({{
-                    if (Rs2_uw == 0) {
-                        Rd_sd = (int32_t)Rs1_uw;
-                    } else {
-                        Rd_sd = (int32_t)(Rs1_uw%Rs2_uw);
-                    }
-                }}, IntDivOp);
+                0x7: decode RVTYPE {
+                    0x1: remuw({{
+                        if (Rs2_uw == 0) {
+                            Rd_sd = (int32_t)Rs1_uw;
+                        } else {
+                            Rd_sd = (int32_t)(Rs1_uw%Rs2_uw);
+                        }
+                    }}, IntDivOp);
+                }
             }
         }
 
@@ -1643,14 +1851,18 @@ decode QUADRANT default Unknown::unknown() {
                         Rd = sext<32>(f32_to_ui32(f32(freg(Fs1_bits)), rm,
                                                   true));
                     }}, FloatCvtOp);
-                    0x2: fcvt_l_s({{
-                        RM_REQUIRED;
-                        Rd_sd = f32_to_i64(f32(freg(Fs1_bits)), rm, true);
-                    }}, FloatCvtOp);
-                    0x3: fcvt_lu_s({{
-                        RM_REQUIRED;
-                        Rd = f32_to_ui64(f32(freg(Fs1_bits)), rm, true);
-                    }}, FloatCvtOp);
+                    0x2: decode RVTYPE {
+                        0x1: fcvt_l_s({{
+                            RM_REQUIRED;
+                            Rd_sd = f32_to_i64(f32(freg(Fs1_bits)), rm, true);
+                        }}, FloatCvtOp);
+                    }
+                    0x3: decode RVTYPE {
+                        0x1: fcvt_lu_s({{
+                            RM_REQUIRED;
+                            Rd = f32_to_ui64(f32(freg(Fs1_bits)), rm, true);
+                        }}, FloatCvtOp);
+                    }
                 }
                 0x61: decode CONV_SGN {
                     0x0: fcvt_w_d({{
@@ -1663,14 +1875,18 @@ decode QUADRANT default Unknown::unknown() {
                         Rd = sext<32>(f64_to_ui32(f64(freg(Fs1_bits)), rm,
                                                   true));
                     }}, FloatCvtOp);
-                    0x2: fcvt_l_d({{
-                        RM_REQUIRED;
-                        Rd_sd = f64_to_i64(f64(freg(Fs1_bits)), rm, true);
-                    }}, FloatCvtOp);
-                    0x3: fcvt_lu_d({{
-                        RM_REQUIRED;
-                        Rd = f64_to_ui64(f64(freg(Fs1_bits)), rm, true);
-                    }}, FloatCvtOp);
+                    0x2: decode RVTYPE {
+                        0x1: fcvt_l_d({{
+                            RM_REQUIRED;
+                            Rd_sd = f64_to_i64(f64(freg(Fs1_bits)), rm, true);
+                        }}, FloatCvtOp);
+                    }
+                    0x3: decode RVTYPE {
+                        0x1: fcvt_lu_d({{
+                            RM_REQUIRED;
+                            Rd = f64_to_ui64(f64(freg(Fs1_bits)), rm, true);
+                        }}, FloatCvtOp);
+                    }
                 }
                 0x62: decode CONV_SGN {
                     0x0: fcvt_w_h({{
@@ -1683,40 +1899,48 @@ decode QUADRANT default Unknown::unknown() {
                         Rd = sext<32>(f16_to_ui32(f16(freg(Fs1_bits)), rm,
                                                   true));
                     }}, FloatCvtOp);
-                    0x2: fcvt_l_h({{
-                        RM_REQUIRED;
-                        Rd_sd = f16_to_i64(f16(freg(Fs1_bits)), rm, true);
-                    }}, FloatCvtOp);
-                    0x3: fcvt_lu_h({{
-                        RM_REQUIRED;
-                        Rd = f16_to_ui64(f16(freg(Fs1_bits)), rm, true);
-                    }}, FloatCvtOp);
+                    0x2: decode RVTYPE {
+                        0x1: fcvt_l_h({{
+                            RM_REQUIRED;
+                            Rd_sd = f16_to_i64(f16(freg(Fs1_bits)), rm, true);
+                        }}, FloatCvtOp);
+                    }
+                    0x3: decode RVTYPE {
+                        0x1: fcvt_lu_h({{
+                            RM_REQUIRED;
+                            Rd = f16_to_ui64(f16(freg(Fs1_bits)), rm, true);
+                        }}, FloatCvtOp);
+                    }
                 }
                 0x68: decode CONV_SGN {
                     0x0: fcvt_s_w({{
                         RM_REQUIRED;
                         freg_t fd;
-                        fd = freg(i32_to_f32((int32_t)Rs1_sw));
+                        fd = freg(i32_to_f32(Rs1_sw));
                         Fd_bits = fd.v;
                         }}, FloatCvtOp);
                     0x1: fcvt_s_wu({{
                         RM_REQUIRED;
                         freg_t fd;
-                        fd = freg(ui32_to_f32((uint32_t)Rs1_uw));
+                        fd = freg(ui32_to_f32(Rs1_uw));
                         Fd_bits = fd.v;
                         }}, FloatCvtOp);
-                    0x2: fcvt_s_l({{
-                        RM_REQUIRED;
-                        freg_t fd;
-                        fd = freg(i64_to_f32(Rs1_ud));
-                        Fd_bits = fd.v;
+                    0x2: decode RVTYPE {
+                        0x1: fcvt_s_l({{
+                            RM_REQUIRED;
+                            freg_t fd;
+                            fd = freg(i64_to_f32(Rs1_ud));
+                            Fd_bits = fd.v;
                         }}, FloatCvtOp);
-                    0x3: fcvt_s_lu({{
-                        RM_REQUIRED;
-                        freg_t fd;
-                        fd = freg(ui64_to_f32(Rs1));
-                        Fd_bits = fd.v;
+                    }
+                    0x3: decode RVTYPE {
+                        0x1: fcvt_s_lu({{
+                            RM_REQUIRED;
+                            freg_t fd;
+                            fd = freg(ui64_to_f32(Rs1));
+                            Fd_bits = fd.v;
                         }}, FloatCvtOp);
+                    }
                 }
                 0x69: decode CONV_SGN {
                     0x0: fcvt_d_w({{
@@ -1727,14 +1951,18 @@ decode QUADRANT default Unknown::unknown() {
                         RM_REQUIRED;
                         Fd = (double)Rs1_uw;
                     }}, FloatCvtOp);
-                    0x2: fcvt_d_l({{
-                        RM_REQUIRED;
-                        Fd = (double)Rs1_sd;
-                    }}, FloatCvtOp);
-                    0x3: fcvt_d_lu({{
-                        RM_REQUIRED;
-                        Fd = (double)Rs1;
-                    }}, FloatCvtOp);
+                    0x2: decode RVTYPE {
+                        0x1: fcvt_d_l({{
+                            RM_REQUIRED;
+                            Fd = (double)Rs1_sd;
+                        }}, FloatCvtOp);
+                    }
+                    0x3: decode RVTYPE {
+                        0x1: fcvt_d_lu({{
+                            RM_REQUIRED;
+                            Fd = (double)Rs1;
+                        }}, FloatCvtOp);
+                    }
                 }
                 0x6a: decode CONV_SGN {
                     0x0: fcvt_h_w({{
@@ -1749,18 +1977,22 @@ decode QUADRANT default Unknown::unknown() {
                         fd = freg(ui32_to_f16((uint32_t)Rs1_uw));
                         Fd_bits = fd.v;
                         }}, FloatCvtOp);
-                    0x2: fcvt_h_l({{
-                        RM_REQUIRED;
-                        freg_t fd;
-                        fd = freg(i64_to_f16(Rs1_ud));
-                        Fd_bits = fd.v;
-                        }}, FloatCvtOp);
-                    0x3: fcvt_h_lu({{
-                        RM_REQUIRED;
-                        freg_t fd;
-                        fd = freg(ui64_to_f16(Rs1));
-                        Fd_bits = fd.v;
-                        }}, FloatCvtOp);
+                    0x2: decode RVTYPE {
+                        0x1: fcvt_h_l({{
+                            RM_REQUIRED;
+                            freg_t fd;
+                            fd = freg(i64_to_f16(Rs1_ud));
+                            Fd_bits = fd.v;
+                            }}, FloatCvtOp);
+                    }
+                    0x3: decode RVTYPE {
+                        0x1: fcvt_h_lu({{
+                            RM_REQUIRED;
+                            freg_t fd;
+                            fd = freg(ui64_to_f16(Rs1));
+                            Fd_bits = fd.v;
+                            }}, FloatCvtOp);
+                    }
                 }
                 0x70: decode ROUND_MODE {
                     0x0: fmv_x_w({{
@@ -1771,13 +2003,15 @@ decode QUADRANT default Unknown::unknown() {
                         Rd = result;
                     }}, FloatCvtOp);
                     0x1: fclass_s({{
-                        Rd = f32_classify(f32(freg(Fs1_bits)));
+                        Rd = rvSext(f32_classify(f32(freg(Fs1_bits))));
                     }}, FloatMiscOp);
                 }
                 0x71: decode ROUND_MODE {
-                    0x0: fmv_x_d({{
-                        Rd = freg(Fs1_bits).v;
-                    }}, FloatCvtOp);
+                    0x0: decode RVTYPE {
+                        0x1: fmv_x_d({{
+                            Rd = freg(Fs1_bits).v;
+                        }}, FloatCvtOp);
+                    }
                     0x1: fclass_d({{
                         Rd = f64_classify(f64(freg(Fs1_bits)));
                     }}, FloatMiscOp);
@@ -1799,11 +2033,13 @@ decode QUADRANT default Unknown::unknown() {
                     fd = freg(f32(Rs1_uw));
                     Fd_bits = fd.v;
                 }}, FloatCvtOp);
-                0x79: fmv_d_x({{
-                    freg_t fd;
-                    fd = freg(f64(Rs1));
-                    Fd_bits = fd.v;
-                }}, FloatCvtOp);
+                0x79: decode RVTYPE {
+                    0x1: fmv_d_x({{
+                        freg_t fd;
+                        fd = freg(f64(Rs1));
+                        Fd_bits = fd.v;
+                    }}, FloatCvtOp);
+                }
                 0x7a: fmv_h_x({{
                     freg_t fd;
                     fd = freg(f16(Rs1_uh));
@@ -1815,45 +2051,45 @@ decode QUADRANT default Unknown::unknown() {
         0x18: decode FUNCT3 {
             format BOp {
                 0x0: beq({{
-                    if (Rs1 == Rs2) {
-                        NPC = PC + imm;
+                    if (rvSext(Rs1) == rvSext(Rs2)) {
+                        NPC = rvZext(PC + imm);
                     } else {
-                        NPC = NPC;
+                        NPC = rvZext(NPC);
                     }
                 }}, IsDirectControl, IsCondControl);
                 0x1: bne({{
-                    if (Rs1 != Rs2) {
-                        NPC = PC + imm;
+                    if (rvSext(Rs1) != rvSext(Rs2)) {
+                        NPC = rvZext(PC + imm);
                     } else {
-                        NPC = NPC;
+                        NPC = rvZext(NPC);
                     }
                 }}, IsDirectControl, IsCondControl);
                 0x4: blt({{
-                    if (Rs1_sd < Rs2_sd) {
-                        NPC = PC + imm;
+                    if (rvSext(Rs1_sd) < rvSext(Rs2_sd)) {
+                        NPC = rvZext(PC + imm);
                     } else {
-                        NPC = NPC;
+                        NPC = rvZext(NPC);
                     }
                 }}, IsDirectControl, IsCondControl);
                 0x5: bge({{
-                    if (Rs1_sd >= Rs2_sd) {
-                        NPC = PC + imm;
+                    if (rvSext(Rs1_sd) >= rvSext(Rs2_sd)) {
+                        NPC = rvZext(PC + imm);
                     } else {
-                        NPC = NPC;
+                        NPC = rvZext(NPC);
                     }
                 }}, IsDirectControl, IsCondControl);
                 0x6: bltu({{
-                    if (Rs1 < Rs2) {
-                        NPC = PC + imm;
+                    if (rvZext(Rs1) < rvZext(Rs2)) {
+                        NPC = rvZext(PC + imm);
                     } else {
-                        NPC = NPC;
+                        NPC = rvZext(NPC);
                     }
                 }}, IsDirectControl, IsCondControl);
                 0x7: bgeu({{
-                    if (Rs1 >= Rs2) {
-                        NPC = PC + imm;
+                    if (rvZext(Rs1) >= rvZext(Rs2)) {
+                        NPC = rvZext(PC + imm);
                     } else {
-                        NPC = NPC;
+                        NPC = rvZext(NPC);
                     }
                 }}, IsDirectControl, IsCondControl);
             }
@@ -1861,14 +2097,14 @@ decode QUADRANT default Unknown::unknown() {
 
         0x19: decode FUNCT3 {
             0x0: Jump::jalr({{
-                Rd = NPC;
-                NPC = (imm + Rs1) & (~0x1);
+                Rd = rvSext(NPC);
+                NPC = rvZext((imm + Rs1) & (~0x1));
             }}, IsIndirectControl, IsUncondControl);
         }
 
         0x1b: JOp::jal({{
-            Rd = NPC;
-            NPC = PC + imm;
+            Rd = rvSext(NPC);
+            NPC = rvZext(PC + imm);
         }}, IsDirectControl, IsUncondControl);
 
         0x1c: decode FUNCT3 {
diff --git a/src/arch/riscv/isa/formats/amo.isa b/src/arch/riscv/isa/formats/amo.isa
index 6b22e8f439..fe497536cc 100644
--- a/src/arch/riscv/isa/formats/amo.isa
+++ b/src/arch/riscv/isa/formats/amo.isa
@@ -446,8 +446,8 @@ def template AtomicMemOpRMWCompleteAcc {{
 
 // LR/SC/AMO decode formats
 
-def format LoadReserved(memacc_code, postacc_code={{ }}, ea_code={{EA = Rs1;}},
-        mem_flags=[], inst_flags=[]) {{
+def format LoadReserved(memacc_code, postacc_code={{ }},
+        ea_code={{EA = rvZext(Rs1);}}, mem_flags=[], inst_flags=[]) {{
     macro_ea_code = ''
     macro_inst_flags = []
     macro_iop = InstObjParams(name, Name, 'LoadReserved', macro_ea_code,
@@ -473,8 +473,8 @@ def format LoadReserved(memacc_code, postacc_code={{ }}, ea_code={{EA = Rs1;}},
         + LoadReservedCompleteAcc.subst(iop)
 }};
 
-def format StoreCond(memacc_code, postacc_code={{ }}, ea_code={{EA = Rs1;}},
-        mem_flags=[], inst_flags=[]) {{
+def format StoreCond(memacc_code, postacc_code={{ }},
+        ea_code={{EA = rvZext(Rs1);}}, mem_flags=[], inst_flags=[]) {{
     macro_ea_code = ''
     macro_inst_flags = []
     macro_iop = InstObjParams(name, Name, 'StoreCond', macro_ea_code,
@@ -501,7 +501,7 @@ def format StoreCond(memacc_code, postacc_code={{ }}, ea_code={{EA = Rs1;}},
 }};
 
 def format AtomicMemOp(memacc_code, amoop_code, postacc_code={{ }},
-        ea_code={{EA = Rs1;}}, mem_flags=[], inst_flags=[]) {{
+        ea_code={{EA = rvZext(Rs1);}}, mem_flags=[], inst_flags=[]) {{
     macro_ea_code = ''
     macro_inst_flags = []
     macro_iop = InstObjParams(name, Name, 'AtomicMemOp', macro_ea_code,
diff --git a/src/arch/riscv/isa/formats/mem.isa b/src/arch/riscv/isa/formats/mem.isa
index fa334585a7..0d80260a25 100644
--- a/src/arch/riscv/isa/formats/mem.isa
+++ b/src/arch/riscv/isa/formats/mem.isa
@@ -228,7 +228,7 @@ def template StoreCompleteAcc {{
     }
 }};
 
-def format Load(memacc_code, ea_code = {{EA = Rs1 + offset;}},
+def format Load(memacc_code, ea_code = {{EA = rvZext(Rs1 + offset);}},
         offset_code={{offset = sext<12>(IMM12);}},
         mem_flags=[], inst_flags=[]) {{
     (header_output, decoder_output, decode_block, exec_output) = \
@@ -236,7 +236,7 @@ def format Load(memacc_code, ea_code = {{EA = Rs1 + offset;}},
         inst_flags, 'Load', exec_template_base='Load')
 }};
 
-def format Store(memacc_code, ea_code={{EA = Rs1 + offset;}},
+def format Store(memacc_code, ea_code={{EA = rvZext(Rs1 + offset);}},
         offset_code={{offset = sext<12>(IMM5 | (IMM7 << 5));}},
         mem_flags=[], inst_flags=[]) {{
     (header_output, decoder_output, decode_block, exec_output) = \

From dd04e7044503046d19b89361edd9a48e14e66ab8 Mon Sep 17 00:00:00 2001
From: Roger Chang <rogerycchang@google.com>
Date: Mon, 28 Nov 2022 08:27:48 +0000
Subject: [PATCH 053/492] arch-riscv: Implement rv32 zicsr extension

1. Add misc register mstatush, cycleh, timeh, instreth,
   hpmcounter03...hpmcounter31, pmpcfg1, pmpcfg3
2. Implement handling RV32 only registers
3. Implement methods of set time CSR

Change-Id: I5c55c18a0da91977d6e23da24ea3cbcba9f0509b
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/65733
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/arch/riscv/isa.cc                   | 110 ++++-
 src/arch/riscv/isa/decoder.isa          |  24 +-
 src/arch/riscv/isa/formats/standard.isa |   9 +
 src/arch/riscv/regs/misc.hh             | 586 ++++++++++++++++--------
 src/dev/riscv/clint.cc                  |   7 +-
 5 files changed, 518 insertions(+), 218 deletions(-)

diff --git a/src/arch/riscv/isa.cc b/src/arch/riscv/isa.cc
index c8eabd44ad..3b4f378afa 100644
--- a/src/arch/riscv/isa.cc
+++ b/src/arch/riscv/isa.cc
@@ -148,9 +148,9 @@ namespace RiscvISA
     [MISCREG_MCAUSE]        = "MCAUSE",
     [MISCREG_MTVAL]         = "MTVAL",
     [MISCREG_PMPCFG0]       = "PMPCFG0",
-    // pmpcfg1 rv32 only
+    [MISCREG_PMPCFG1]       = "PMPCFG1",   // pmpcfg1 is rv32 only
     [MISCREG_PMPCFG2]       = "PMPCFG2",
-    // pmpcfg3 rv32 only
+    [MISCREG_PMPCFG3]       = "PMPCFG3",   // pmpcfg3 is rv32 only
     [MISCREG_PMPADDR00]     = "PMPADDR00",
     [MISCREG_PMPADDR01]     = "PMPADDR01",
     [MISCREG_PMPADDR02]     = "PMPADDR02",
@@ -189,6 +189,42 @@ namespace RiscvISA
     [MISCREG_NMIVEC]        = "NMIVEC",
     [MISCREG_NMIE]          = "NMIE",
     [MISCREG_NMIP]          = "NMIP",
+
+    // following are rv32 only registers
+    [MISCREG_MSTATUSH]      = "MSTATUSH",
+
+    [MISCREG_CYCLEH]         = "CYCLEH",
+    [MISCREG_TIMEH]          = "TIMEH",
+    [MISCREG_INSTRETH]       = "INSTRETH",
+    [MISCREG_HPMCOUNTER03H]  = "HPMCOUNTER03H",
+    [MISCREG_HPMCOUNTER04H]  = "HPMCOUNTER04H",
+    [MISCREG_HPMCOUNTER05H]  = "HPMCOUNTER05H",
+    [MISCREG_HPMCOUNTER06H]  = "HPMCOUNTER06H",
+    [MISCREG_HPMCOUNTER07H]  = "HPMCOUNTER07H",
+    [MISCREG_HPMCOUNTER08H]  = "HPMCOUNTER08H",
+    [MISCREG_HPMCOUNTER09H]  = "HPMCOUNTER09H",
+    [MISCREG_HPMCOUNTER10H]  = "HPMCOUNTER10H",
+    [MISCREG_HPMCOUNTER11H]  = "HPMCOUNTER11H",
+    [MISCREG_HPMCOUNTER12H]  = "HPMCOUNTER12H",
+    [MISCREG_HPMCOUNTER13H]  = "HPMCOUNTER13H",
+    [MISCREG_HPMCOUNTER14H]  = "HPMCOUNTER14H",
+    [MISCREG_HPMCOUNTER15H]  = "HPMCOUNTER15H",
+    [MISCREG_HPMCOUNTER16H]  = "HPMCOUNTER16H",
+    [MISCREG_HPMCOUNTER17H]  = "HPMCOUNTER17H",
+    [MISCREG_HPMCOUNTER18H]  = "HPMCOUNTER18H",
+    [MISCREG_HPMCOUNTER19H]  = "HPMCOUNTER19H",
+    [MISCREG_HPMCOUNTER20H]  = "HPMCOUNTER20H",
+    [MISCREG_HPMCOUNTER21H]  = "HPMCOUNTER21H",
+    [MISCREG_HPMCOUNTER22H]  = "HPMCOUNTER22H",
+    [MISCREG_HPMCOUNTER23H]  = "HPMCOUNTER23H",
+    [MISCREG_HPMCOUNTER24H]  = "HPMCOUNTER24H",
+    [MISCREG_HPMCOUNTER25H]  = "HPMCOUNTER25H",
+    [MISCREG_HPMCOUNTER26H]  = "HPMCOUNTER26H",
+    [MISCREG_HPMCOUNTER27H]  = "HPMCOUNTER27H",
+    [MISCREG_HPMCOUNTER28H]  = "HPMCOUNTER28H",
+    [MISCREG_HPMCOUNTER29H]  = "HPMCOUNTER29H",
+    [MISCREG_HPMCOUNTER30H]  = "HPMCOUNTER30H",
+    [MISCREG_HPMCOUNTER31H]  = "HPMCOUNTER31H",
 }};
 
 namespace
@@ -273,7 +309,13 @@ void ISA::clear()
 bool
 ISA::hpmCounterEnabled(int misc_reg) const
 {
-    int hpmcounter = misc_reg - MISCREG_CYCLE;
+    int hpmcounter = 0;
+    if (misc_reg >= MISCREG_CYCLEH) {
+        hpmcounter = misc_reg - MISCREG_CYCLEH;
+    } else {
+        hpmcounter = misc_reg - MISCREG_CYCLE;
+    }
+
     if (hpmcounter < 0 || hpmcounter > 31)
         panic("Illegal HPM counter %d\n", hpmcounter);
     int counteren;
@@ -313,7 +355,16 @@ ISA::readMiscReg(RegIndex idx)
         if (hpmCounterEnabled(MISCREG_CYCLE)) {
             DPRINTF(RiscvMisc, "Cycle counter at: %llu.\n",
                     tc->getCpuPtr()->curCycle());
-            return tc->getCpuPtr()->curCycle();
+            return static_cast<RegVal>(tc->getCpuPtr()->curCycle());
+        } else {
+            warn("Cycle counter disabled.\n");
+            return 0;
+        }
+      case MISCREG_CYCLEH:
+        if (hpmCounterEnabled(MISCREG_CYCLEH)) {
+            DPRINTF(RiscvMisc, "Cycle counter at: %llu.\n",
+                    tc->getCpuPtr()->curCycle());
+            return bits<RegVal>(tc->getCpuPtr()->curCycle(), 63, 32);
         } else {
             warn("Cycle counter disabled.\n");
             return 0;
@@ -327,11 +378,29 @@ ISA::readMiscReg(RegIndex idx)
             warn("Wall clock disabled.\n");
             return 0;
         }
+      case MISCREG_TIMEH:
+        if (hpmCounterEnabled(MISCREG_TIMEH)) {
+            DPRINTF(RiscvMisc, "Wall-clock counter at: %llu.\n",
+                    std::time(nullptr));
+            return readMiscRegNoEffect(MISCREG_TIMEH);
+        } else {
+            warn("Wall clock disabled.\n");
+            return 0;
+        }
       case MISCREG_INSTRET:
         if (hpmCounterEnabled(MISCREG_INSTRET)) {
             DPRINTF(RiscvMisc, "Instruction counter at: %llu.\n",
                     tc->getCpuPtr()->totalInsts());
-            return tc->getCpuPtr()->totalInsts();
+            return static_cast<RegVal>(tc->getCpuPtr()->totalInsts());
+        } else {
+            warn("Instruction counter disabled.\n");
+            return 0;
+        }
+      case MISCREG_INSTRETH:
+        if (hpmCounterEnabled(MISCREG_INSTRETH)) {
+            DPRINTF(RiscvMisc, "Instruction counter at: %llu.\n",
+                    tc->getCpuPtr()->totalInsts());
+            return bits<RegVal>(tc->getCpuPtr()->totalInsts(), 63, 32);
         } else {
             warn("Instruction counter disabled.\n");
             return 0;
@@ -406,6 +475,16 @@ ISA::readMiscReg(RegIndex idx)
                 warn("HPM counter %d disabled.\n", idx - MISCREG_CYCLE);
                 return 0;
             }
+        } else if (idx >= MISCREG_HPMCOUNTER03H &&
+                idx <= MISCREG_HPMCOUNTER31H) {
+            if (hpmCounterEnabled(idx)) {
+                DPRINTF(RiscvMisc, "HPM counter %d: %llu.\n",
+                        idx - MISCREG_CYCLE, tc->getCpuPtr()->curCycle());
+                return bits<RegVal>(tc->getCpuPtr()->curCycle(), 63, 32);
+            } else {
+                warn("HPM counter %d disabled.\n", idx - MISCREG_CYCLE);
+                return 0;
+            }
         }
         return readMiscRegNoEffect(idx);
     }
@@ -437,18 +516,32 @@ ISA::setMiscReg(RegIndex idx, RegVal val)
           // for 8 PMP entries.
 
           case MISCREG_PMPCFG0:
+          case MISCREG_PMPCFG1:
           case MISCREG_PMPCFG2:
+          case MISCREG_PMPCFG3:
             {
                 // PMP registers should only be modified in M mode
                 assert(readMiscRegNoEffect(MISCREG_PRV) == PRV_M);
 
+                int regSize = 0;
+                switch (rv_type) {
+                    case RV32:
+                        regSize = 4;
+                    break;
+                    case RV64:
+                        regSize = 8;
+                    break;
+                    default:
+                        panic("%s: Unknown rv_type: %d", name(), (int)rv_type);
+                }
+
                 // Specs do not seem to mention what should be
                 // configured first, cfg or address regs!
                 // qemu seems to update the tables when
                 // pmp addr regs are written (with the assumption
                 // that cfg regs are already written)
 
-                for (int i=0; i < sizeof(val); i++) {
+                for (int i=0; i < regSize; i++) {
 
                     uint8_t cfg_val = (val >> (8*i)) & 0xff;
                     auto mmu = dynamic_cast<RiscvISA::MMU *>
@@ -456,10 +549,7 @@ ISA::setMiscReg(RegIndex idx, RegVal val)
 
                     // Form pmp_index using the index i and
                     // PMPCFG register number
-                    // Note: MISCREG_PMPCFG2 - MISCREG_PMPCFG0 = 1
-                    // 8*(idx-MISCREG_PMPCFG0) will be useful
-                    // if a system contains more than 16 PMP entries
-                    uint32_t pmp_index = i+(8*(idx-MISCREG_PMPCFG0));
+                    uint32_t pmp_index = i+(4*(idx-MISCREG_PMPCFG0));
                     mmu->getPMP()->pmpUpdateCfg(pmp_index,cfg_val);
                 }
 
diff --git a/src/arch/riscv/isa/decoder.isa b/src/arch/riscv/isa/decoder.isa
index d442002bd5..885794032a 100644
--- a/src/arch/riscv/isa/decoder.isa
+++ b/src/arch/riscv/isa/decoder.isa
@@ -2201,28 +2201,28 @@ decode QUADRANT default Unknown::unknown() {
             }
             format CSROp {
                 0x1: csrrw({{
-                    Rd = data;
-                    data = Rs1;
+                    Rd = rvSext(data);
+                    data = rvZext(Rs1);
                 }}, IsSerializeAfter, IsNonSpeculative, No_OpClass);
                 0x2: csrrs({{
-                    Rd = data;
-                    data |= Rs1;
+                    Rd = rvSext(data);
+                    data = rvZext(data | Rs1);
                 }}, IsSerializeAfter, IsNonSpeculative, No_OpClass);
                 0x3: csrrc({{
-                    Rd = data;
-                    data &= ~Rs1;
+                    Rd = rvSext(data);
+                    data = rvZext(data & ~Rs1);
                 }}, IsSerializeAfter, IsNonSpeculative, No_OpClass);
                 0x5: csrrwi({{
-                    Rd = data;
-                    data = uimm;
+                    Rd = rvSext(data);
+                    data = rvZext(uimm);
                 }}, IsSerializeAfter, IsNonSpeculative, No_OpClass);
                 0x6: csrrsi({{
-                    Rd = data;
-                    data |= uimm;
+                    Rd = rvSext(data);
+                    data = rvZext(data | uimm);
                 }}, IsSerializeAfter, IsNonSpeculative, No_OpClass);
                 0x7: csrrci({{
-                    Rd = data;
-                    data &= ~uimm;
+                    Rd = rvSext(data);
+                    data = rvZext(data & ~uimm);
                 }}, IsSerializeAfter, IsNonSpeculative, No_OpClass);
             }
         }
diff --git a/src/arch/riscv/isa/formats/standard.isa b/src/arch/riscv/isa/formats/standard.isa
index 5390164f10..6be281fa56 100644
--- a/src/arch/riscv/isa/formats/standard.isa
+++ b/src/arch/riscv/isa/formats/standard.isa
@@ -338,6 +338,14 @@ def template CSRExecute {{
 
         RegIndex midx = csr_data_it->second.physIndex;
         const std::string& csrName = csr_data_it->second.name;
+        if ((csr_data_it->second.rvTypes & (1 << machInst.rv_type)) == 0) {
+            return std::make_shared<IllegalInstFault>(
+                    csprintf("%s is not support in mode %d\n",
+                             csrName,
+                             machInst.rv_type),
+                    machInst);
+        }
+
         auto mask_it = csr_masks.find(csr);
         RegVal maskVal = (mask_it == csr_masks.end()) ? mask(64)
                                                       : mask_it->second;
@@ -373,6 +381,7 @@ def template CSRExecute {{
         } else {
             olddata = xc->readMiscReg(midx);
         }
+        olddata = rvZext(olddata);
         auto olddata_all = olddata;
 
         olddata &= maskVal;
diff --git a/src/arch/riscv/regs/misc.hh b/src/arch/riscv/regs/misc.hh
index 7f6fff4e00..8cb4ca0f91 100644
--- a/src/arch/riscv/regs/misc.hh
+++ b/src/arch/riscv/regs/misc.hh
@@ -153,9 +153,9 @@ enum MiscRegIndex
     MISCREG_MCAUSE,
     MISCREG_MTVAL,
     MISCREG_PMPCFG0,
-    // pmpcfg1 rv32 only
+    MISCREG_PMPCFG1,     // pmpcfg1 is rv32 only
     MISCREG_PMPCFG2,
-    // pmpcfg3 rv32 only
+    MISCREG_PMPCFG3,     // pmpcfg3 is rv32 only
     MISCREG_PMPADDR00,
     MISCREG_PMPADDR01,
     MISCREG_PMPADDR02,
@@ -201,6 +201,42 @@ enum MiscRegIndex
     // non-maskable-interrupt-pending: NMI version of xIP
     MISCREG_NMIP,
 
+    // the following MicsRegIndex are RV32 only
+    MISCREG_MSTATUSH,
+
+    MISCREG_CYCLEH,
+    MISCREG_TIMEH,
+    MISCREG_INSTRETH,
+    MISCREG_HPMCOUNTER03H,
+    MISCREG_HPMCOUNTER04H,
+    MISCREG_HPMCOUNTER05H,
+    MISCREG_HPMCOUNTER06H,
+    MISCREG_HPMCOUNTER07H,
+    MISCREG_HPMCOUNTER08H,
+    MISCREG_HPMCOUNTER09H,
+    MISCREG_HPMCOUNTER10H,
+    MISCREG_HPMCOUNTER11H,
+    MISCREG_HPMCOUNTER12H,
+    MISCREG_HPMCOUNTER13H,
+    MISCREG_HPMCOUNTER14H,
+    MISCREG_HPMCOUNTER15H,
+    MISCREG_HPMCOUNTER16H,
+    MISCREG_HPMCOUNTER17H,
+    MISCREG_HPMCOUNTER18H,
+    MISCREG_HPMCOUNTER19H,
+    MISCREG_HPMCOUNTER20H,
+    MISCREG_HPMCOUNTER21H,
+    MISCREG_HPMCOUNTER22H,
+    MISCREG_HPMCOUNTER23H,
+    MISCREG_HPMCOUNTER24H,
+    MISCREG_HPMCOUNTER25H,
+    MISCREG_HPMCOUNTER26H,
+    MISCREG_HPMCOUNTER27H,
+    MISCREG_HPMCOUNTER28H,
+    MISCREG_HPMCOUNTER29H,
+    MISCREG_HPMCOUNTER30H,
+    MISCREG_HPMCOUNTER31H,
+
     NUM_MISCREGS
 };
 
@@ -252,7 +288,41 @@ enum CSRIndex
     CSR_HPMCOUNTER29 = 0xC1D,
     CSR_HPMCOUNTER30 = 0xC1E,
     CSR_HPMCOUNTER31 = 0xC1F,
-    // HPMCOUNTERH rv32 only
+
+    // rv32 only csr register begin
+    CSR_CYCLEH = 0xC80,
+    CSR_TIMEH = 0xC81,
+    CSR_INSTRETH = 0xC82,
+    CSR_HPMCOUNTER03H = 0xC83,
+    CSR_HPMCOUNTER04H = 0xC84,
+    CSR_HPMCOUNTER05H = 0xC85,
+    CSR_HPMCOUNTER06H = 0xC86,
+    CSR_HPMCOUNTER07H = 0xC87,
+    CSR_HPMCOUNTER08H = 0xC88,
+    CSR_HPMCOUNTER09H = 0xC89,
+    CSR_HPMCOUNTER10H = 0xC8A,
+    CSR_HPMCOUNTER11H = 0xC8B,
+    CSR_HPMCOUNTER12H = 0xC8C,
+    CSR_HPMCOUNTER13H = 0xC8D,
+    CSR_HPMCOUNTER14H = 0xC8E,
+    CSR_HPMCOUNTER15H = 0xC8F,
+    CSR_HPMCOUNTER16H = 0xC90,
+    CSR_HPMCOUNTER17H = 0xC91,
+    CSR_HPMCOUNTER18H = 0xC92,
+    CSR_HPMCOUNTER19H = 0xC93,
+    CSR_HPMCOUNTER20H = 0xC94,
+    CSR_HPMCOUNTER21H = 0xC95,
+    CSR_HPMCOUNTER22H = 0xC96,
+    CSR_HPMCOUNTER23H = 0xC97,
+    CSR_HPMCOUNTER24H = 0xC98,
+    CSR_HPMCOUNTER25H = 0xC99,
+    CSR_HPMCOUNTER26H = 0xC9A,
+    CSR_HPMCOUNTER27H = 0xC9B,
+    CSR_HPMCOUNTER28H = 0xC9C,
+    CSR_HPMCOUNTER29H = 0xC9D,
+    CSR_HPMCOUNTER30H = 0xC9E,
+    CSR_HPMCOUNTER31H = 0xC9F,
+    // rv32 only csr register end
 
     CSR_SSTATUS = 0x100,
     CSR_SEDELEG = 0x102,
@@ -278,15 +348,16 @@ enum CSRIndex
     CSR_MIE = 0x304,
     CSR_MTVEC = 0x305,
     CSR_MCOUNTEREN = 0x306,
+    CSR_MSTATUSH = 0x310, // rv32 only
     CSR_MSCRATCH = 0x340,
     CSR_MEPC = 0x341,
     CSR_MCAUSE = 0x342,
     CSR_MTVAL = 0x343,
     CSR_MIP = 0x344,
     CSR_PMPCFG0 = 0x3A0,
-    // pmpcfg1 rv32 only
+    CSR_PMPCFG1 = 0x3A1, // pmpcfg1 rv32 only
     CSR_PMPCFG2 = 0x3A2,
-    // pmpcfg3 rv32 only
+    CSR_PMPCFG3 = 0x3A3,// pmpcfg3 rv32 only
     CSR_PMPADDR00 = 0x3B0,
     CSR_PMPADDR01 = 0x3B1,
     CSR_PMPADDR02 = 0x3B2,
@@ -305,36 +376,70 @@ enum CSRIndex
     CSR_PMPADDR15 = 0x3BF,
     CSR_MCYCLE = 0xB00,
     CSR_MINSTRET = 0xB02,
-    CSR_MHPMCOUNTER03 = 0xC03,
-    CSR_MHPMCOUNTER04 = 0xC04,
-    CSR_MHPMCOUNTER05 = 0xC05,
-    CSR_MHPMCOUNTER06 = 0xC06,
-    CSR_MHPMCOUNTER07 = 0xC07,
-    CSR_MHPMCOUNTER08 = 0xC08,
-    CSR_MHPMCOUNTER09 = 0xC09,
-    CSR_MHPMCOUNTER10 = 0xC0A,
-    CSR_MHPMCOUNTER11 = 0xC0B,
-    CSR_MHPMCOUNTER12 = 0xC0C,
-    CSR_MHPMCOUNTER13 = 0xC0D,
-    CSR_MHPMCOUNTER14 = 0xC0E,
-    CSR_MHPMCOUNTER15 = 0xC0F,
-    CSR_MHPMCOUNTER16 = 0xC10,
-    CSR_MHPMCOUNTER17 = 0xC11,
-    CSR_MHPMCOUNTER18 = 0xC12,
-    CSR_MHPMCOUNTER19 = 0xC13,
-    CSR_MHPMCOUNTER20 = 0xC14,
-    CSR_MHPMCOUNTER21 = 0xC15,
-    CSR_MHPMCOUNTER22 = 0xC16,
-    CSR_MHPMCOUNTER23 = 0xC17,
-    CSR_MHPMCOUNTER24 = 0xC18,
-    CSR_MHPMCOUNTER25 = 0xC19,
-    CSR_MHPMCOUNTER26 = 0xC1A,
-    CSR_MHPMCOUNTER27 = 0xC1B,
-    CSR_MHPMCOUNTER28 = 0xC1C,
-    CSR_MHPMCOUNTER29 = 0xC1D,
-    CSR_MHPMCOUNTER30 = 0xC1E,
-    CSR_MHPMCOUNTER31 = 0xC1F,
-    // MHPMCOUNTERH rv32 only
+    CSR_MHPMCOUNTER03 = 0xB03,
+    CSR_MHPMCOUNTER04 = 0xB04,
+    CSR_MHPMCOUNTER05 = 0xB05,
+    CSR_MHPMCOUNTER06 = 0xB06,
+    CSR_MHPMCOUNTER07 = 0xB07,
+    CSR_MHPMCOUNTER08 = 0xB08,
+    CSR_MHPMCOUNTER09 = 0xB09,
+    CSR_MHPMCOUNTER10 = 0xB0A,
+    CSR_MHPMCOUNTER11 = 0xB0B,
+    CSR_MHPMCOUNTER12 = 0xB0C,
+    CSR_MHPMCOUNTER13 = 0xB0D,
+    CSR_MHPMCOUNTER14 = 0xB0E,
+    CSR_MHPMCOUNTER15 = 0xB0F,
+    CSR_MHPMCOUNTER16 = 0xB10,
+    CSR_MHPMCOUNTER17 = 0xB11,
+    CSR_MHPMCOUNTER18 = 0xB12,
+    CSR_MHPMCOUNTER19 = 0xB13,
+    CSR_MHPMCOUNTER20 = 0xB14,
+    CSR_MHPMCOUNTER21 = 0xB15,
+    CSR_MHPMCOUNTER22 = 0xB16,
+    CSR_MHPMCOUNTER23 = 0xB17,
+    CSR_MHPMCOUNTER24 = 0xB18,
+    CSR_MHPMCOUNTER25 = 0xB19,
+    CSR_MHPMCOUNTER26 = 0xB1A,
+    CSR_MHPMCOUNTER27 = 0xB1B,
+    CSR_MHPMCOUNTER28 = 0xB1C,
+    CSR_MHPMCOUNTER29 = 0xB1D,
+    CSR_MHPMCOUNTER30 = 0xB1E,
+    CSR_MHPMCOUNTER31 = 0xB1F,
+
+    // rv32 only csr register begin
+    CSR_MCYCLEH = 0xB80,
+    CSR_MINSTRETH = 0xB82,
+    CSR_MHPMCOUNTER03H = 0xB83,
+    CSR_MHPMCOUNTER04H = 0xB84,
+    CSR_MHPMCOUNTER05H = 0xB85,
+    CSR_MHPMCOUNTER06H = 0xB86,
+    CSR_MHPMCOUNTER07H = 0xB87,
+    CSR_MHPMCOUNTER08H = 0xB88,
+    CSR_MHPMCOUNTER09H = 0xB89,
+    CSR_MHPMCOUNTER10H = 0xB8A,
+    CSR_MHPMCOUNTER11H = 0xB8B,
+    CSR_MHPMCOUNTER12H = 0xB8C,
+    CSR_MHPMCOUNTER13H = 0xB8D,
+    CSR_MHPMCOUNTER14H = 0xB8E,
+    CSR_MHPMCOUNTER15H = 0xB8F,
+    CSR_MHPMCOUNTER16H = 0xB90,
+    CSR_MHPMCOUNTER17H = 0xB91,
+    CSR_MHPMCOUNTER18H = 0xB92,
+    CSR_MHPMCOUNTER19H = 0xB93,
+    CSR_MHPMCOUNTER20H = 0xB94,
+    CSR_MHPMCOUNTER21H = 0xB95,
+    CSR_MHPMCOUNTER22H = 0xB96,
+    CSR_MHPMCOUNTER23H = 0xB97,
+    CSR_MHPMCOUNTER24H = 0xB98,
+    CSR_MHPMCOUNTER25H = 0xB99,
+    CSR_MHPMCOUNTER26H = 0xB9A,
+    CSR_MHPMCOUNTER27H = 0xB9B,
+    CSR_MHPMCOUNTER28H = 0xB9C,
+    CSR_MHPMCOUNTER29H = 0xB9D,
+    CSR_MHPMCOUNTER30H = 0xB9E,
+    CSR_MHPMCOUNTER31H = 0xB9F,
+    // rv32 only csr register end
+
     CSR_MHPMEVENT03 = 0x323,
     CSR_MHPMEVENT04 = 0x324,
     CSR_MHPMEVENT05 = 0x325,
@@ -378,170 +483,242 @@ struct CSRMetadata
 {
     const std::string name;
     const int physIndex;
+    const uint64_t rvTypes;
 };
 
+template <typename... T>
+constexpr uint64_t rvTypeFlags(T... args) {
+    return ((1 << args) | ...);
+}
+
 const std::unordered_map<int, CSRMetadata> CSRData = {
-    {CSR_USTATUS, {"ustatus", MISCREG_STATUS}},
-    {CSR_UIE, {"uie", MISCREG_IE}},
-    {CSR_UTVEC, {"utvec", MISCREG_UTVEC}},
-    {CSR_USCRATCH, {"uscratch", MISCREG_USCRATCH}},
-    {CSR_UEPC, {"uepc", MISCREG_UEPC}},
-    {CSR_UCAUSE, {"ucause", MISCREG_UCAUSE}},
-    {CSR_UTVAL, {"utval", MISCREG_UTVAL}},
-    {CSR_UIP, {"uip", MISCREG_IP}},
-    {CSR_FFLAGS, {"fflags", MISCREG_FFLAGS}},
-    {CSR_FRM, {"frm", MISCREG_FRM}},
-    {CSR_FCSR, {"fcsr", MISCREG_FFLAGS}}, // Actually FRM << 5 | FFLAGS
-    {CSR_CYCLE, {"cycle", MISCREG_CYCLE}},
-    {CSR_TIME, {"time", MISCREG_TIME}},
-    {CSR_INSTRET, {"instret", MISCREG_INSTRET}},
-    {CSR_HPMCOUNTER03, {"hpmcounter03", MISCREG_HPMCOUNTER03}},
-    {CSR_HPMCOUNTER04, {"hpmcounter04", MISCREG_HPMCOUNTER04}},
-    {CSR_HPMCOUNTER05, {"hpmcounter05", MISCREG_HPMCOUNTER05}},
-    {CSR_HPMCOUNTER06, {"hpmcounter06", MISCREG_HPMCOUNTER06}},
-    {CSR_HPMCOUNTER07, {"hpmcounter07", MISCREG_HPMCOUNTER07}},
-    {CSR_HPMCOUNTER08, {"hpmcounter08", MISCREG_HPMCOUNTER08}},
-    {CSR_HPMCOUNTER09, {"hpmcounter09", MISCREG_HPMCOUNTER09}},
-    {CSR_HPMCOUNTER10, {"hpmcounter10", MISCREG_HPMCOUNTER10}},
-    {CSR_HPMCOUNTER11, {"hpmcounter11", MISCREG_HPMCOUNTER11}},
-    {CSR_HPMCOUNTER12, {"hpmcounter12", MISCREG_HPMCOUNTER12}},
-    {CSR_HPMCOUNTER13, {"hpmcounter13", MISCREG_HPMCOUNTER13}},
-    {CSR_HPMCOUNTER14, {"hpmcounter14", MISCREG_HPMCOUNTER14}},
-    {CSR_HPMCOUNTER15, {"hpmcounter15", MISCREG_HPMCOUNTER15}},
-    {CSR_HPMCOUNTER16, {"hpmcounter16", MISCREG_HPMCOUNTER16}},
-    {CSR_HPMCOUNTER17, {"hpmcounter17", MISCREG_HPMCOUNTER17}},
-    {CSR_HPMCOUNTER18, {"hpmcounter18", MISCREG_HPMCOUNTER18}},
-    {CSR_HPMCOUNTER19, {"hpmcounter19", MISCREG_HPMCOUNTER19}},
-    {CSR_HPMCOUNTER20, {"hpmcounter20", MISCREG_HPMCOUNTER20}},
-    {CSR_HPMCOUNTER21, {"hpmcounter21", MISCREG_HPMCOUNTER21}},
-    {CSR_HPMCOUNTER22, {"hpmcounter22", MISCREG_HPMCOUNTER22}},
-    {CSR_HPMCOUNTER23, {"hpmcounter23", MISCREG_HPMCOUNTER23}},
-    {CSR_HPMCOUNTER24, {"hpmcounter24", MISCREG_HPMCOUNTER24}},
-    {CSR_HPMCOUNTER25, {"hpmcounter25", MISCREG_HPMCOUNTER25}},
-    {CSR_HPMCOUNTER26, {"hpmcounter26", MISCREG_HPMCOUNTER26}},
-    {CSR_HPMCOUNTER27, {"hpmcounter27", MISCREG_HPMCOUNTER27}},
-    {CSR_HPMCOUNTER28, {"hpmcounter28", MISCREG_HPMCOUNTER28}},
-    {CSR_HPMCOUNTER29, {"hpmcounter29", MISCREG_HPMCOUNTER29}},
-    {CSR_HPMCOUNTER30, {"hpmcounter30", MISCREG_HPMCOUNTER30}},
-    {CSR_HPMCOUNTER31, {"hpmcounter31", MISCREG_HPMCOUNTER31}},
+    {CSR_USTATUS, {"ustatus", MISCREG_STATUS, rvTypeFlags(RV64, RV32)}},
+    {CSR_UIE, {"uie", MISCREG_IE, rvTypeFlags(RV64, RV32)}},
+    {CSR_UTVEC, {"utvec", MISCREG_UTVEC, rvTypeFlags(RV64, RV32)}},
+    {CSR_USCRATCH, {"uscratch", MISCREG_USCRATCH, rvTypeFlags(RV64, RV32)}},
+    {CSR_UEPC, {"uepc", MISCREG_UEPC, rvTypeFlags(RV64, RV32)}},
+    {CSR_UCAUSE, {"ucause", MISCREG_UCAUSE, rvTypeFlags(RV64, RV32)}},
+    {CSR_UTVAL, {"utval", MISCREG_UTVAL, rvTypeFlags(RV64, RV32)}},
+    {CSR_UIP, {"uip", MISCREG_IP, rvTypeFlags(RV64, RV32)}},
+    {CSR_FFLAGS, {"fflags", MISCREG_FFLAGS, rvTypeFlags(RV64, RV32)}},
+    {CSR_FRM, {"frm", MISCREG_FRM, rvTypeFlags(RV64, RV32)}},
+    {CSR_FCSR, {"fcsr", MISCREG_FFLAGS, rvTypeFlags(RV64, RV32)}}, // Actually FRM << 5 | FFLAGS
+    {CSR_CYCLE, {"cycle", MISCREG_CYCLE, rvTypeFlags(RV64, RV32)}},
+    {CSR_TIME, {"time", MISCREG_TIME, rvTypeFlags(RV64, RV32)}},
+    {CSR_INSTRET, {"instret", MISCREG_INSTRET, rvTypeFlags(RV64, RV32)}},
+    {CSR_HPMCOUNTER03, {"hpmcounter03", MISCREG_HPMCOUNTER03, rvTypeFlags(RV64, RV32)}},
+    {CSR_HPMCOUNTER04, {"hpmcounter04", MISCREG_HPMCOUNTER04, rvTypeFlags(RV64, RV32)}},
+    {CSR_HPMCOUNTER05, {"hpmcounter05", MISCREG_HPMCOUNTER05, rvTypeFlags(RV64, RV32)}},
+    {CSR_HPMCOUNTER06, {"hpmcounter06", MISCREG_HPMCOUNTER06, rvTypeFlags(RV64, RV32)}},
+    {CSR_HPMCOUNTER07, {"hpmcounter07", MISCREG_HPMCOUNTER07, rvTypeFlags(RV64, RV32)}},
+    {CSR_HPMCOUNTER08, {"hpmcounter08", MISCREG_HPMCOUNTER08, rvTypeFlags(RV64, RV32)}},
+    {CSR_HPMCOUNTER09, {"hpmcounter09", MISCREG_HPMCOUNTER09, rvTypeFlags(RV64, RV32)}},
+    {CSR_HPMCOUNTER10, {"hpmcounter10", MISCREG_HPMCOUNTER10, rvTypeFlags(RV64, RV32)}},
+    {CSR_HPMCOUNTER11, {"hpmcounter11", MISCREG_HPMCOUNTER11, rvTypeFlags(RV64, RV32)}},
+    {CSR_HPMCOUNTER12, {"hpmcounter12", MISCREG_HPMCOUNTER12, rvTypeFlags(RV64, RV32)}},
+    {CSR_HPMCOUNTER13, {"hpmcounter13", MISCREG_HPMCOUNTER13, rvTypeFlags(RV64, RV32)}},
+    {CSR_HPMCOUNTER14, {"hpmcounter14", MISCREG_HPMCOUNTER14, rvTypeFlags(RV64, RV32)}},
+    {CSR_HPMCOUNTER15, {"hpmcounter15", MISCREG_HPMCOUNTER15, rvTypeFlags(RV64, RV32)}},
+    {CSR_HPMCOUNTER16, {"hpmcounter16", MISCREG_HPMCOUNTER16, rvTypeFlags(RV64, RV32)}},
+    {CSR_HPMCOUNTER17, {"hpmcounter17", MISCREG_HPMCOUNTER17, rvTypeFlags(RV64, RV32)}},
+    {CSR_HPMCOUNTER18, {"hpmcounter18", MISCREG_HPMCOUNTER18, rvTypeFlags(RV64, RV32)}},
+    {CSR_HPMCOUNTER19, {"hpmcounter19", MISCREG_HPMCOUNTER19, rvTypeFlags(RV64, RV32)}},
+    {CSR_HPMCOUNTER20, {"hpmcounter20", MISCREG_HPMCOUNTER20, rvTypeFlags(RV64, RV32)}},
+    {CSR_HPMCOUNTER21, {"hpmcounter21", MISCREG_HPMCOUNTER21, rvTypeFlags(RV64, RV32)}},
+    {CSR_HPMCOUNTER22, {"hpmcounter22", MISCREG_HPMCOUNTER22, rvTypeFlags(RV64, RV32)}},
+    {CSR_HPMCOUNTER23, {"hpmcounter23", MISCREG_HPMCOUNTER23, rvTypeFlags(RV64, RV32)}},
+    {CSR_HPMCOUNTER24, {"hpmcounter24", MISCREG_HPMCOUNTER24, rvTypeFlags(RV64, RV32)}},
+    {CSR_HPMCOUNTER25, {"hpmcounter25", MISCREG_HPMCOUNTER25, rvTypeFlags(RV64, RV32)}},
+    {CSR_HPMCOUNTER26, {"hpmcounter26", MISCREG_HPMCOUNTER26, rvTypeFlags(RV64, RV32)}},
+    {CSR_HPMCOUNTER27, {"hpmcounter27", MISCREG_HPMCOUNTER27, rvTypeFlags(RV64, RV32)}},
+    {CSR_HPMCOUNTER28, {"hpmcounter28", MISCREG_HPMCOUNTER28, rvTypeFlags(RV64, RV32)}},
+    {CSR_HPMCOUNTER29, {"hpmcounter29", MISCREG_HPMCOUNTER29, rvTypeFlags(RV64, RV32)}},
+    {CSR_HPMCOUNTER30, {"hpmcounter30", MISCREG_HPMCOUNTER30, rvTypeFlags(RV64, RV32)}},
+    {CSR_HPMCOUNTER31, {"hpmcounter31", MISCREG_HPMCOUNTER31, rvTypeFlags(RV64, RV32)}},
+    {CSR_CYCLEH, {"cycleh", MISCREG_CYCLEH, rvTypeFlags(RV32)}},
+    {CSR_TIMEH, {"timeh", MISCREG_TIMEH, rvTypeFlags(RV32)}},
+    {CSR_INSTRETH, {"instreth", MISCREG_INSTRETH, rvTypeFlags(RV32)}},
+    {CSR_HPMCOUNTER03H, {"hpmcounter03h", MISCREG_HPMCOUNTER03H, rvTypeFlags(RV32)}},
+    {CSR_HPMCOUNTER04H, {"hpmcounter04h", MISCREG_HPMCOUNTER04H, rvTypeFlags(RV32)}},
+    {CSR_HPMCOUNTER05H, {"hpmcounter05h", MISCREG_HPMCOUNTER05H, rvTypeFlags(RV32)}},
+    {CSR_HPMCOUNTER06H, {"hpmcounter06h", MISCREG_HPMCOUNTER06H, rvTypeFlags(RV32)}},
+    {CSR_HPMCOUNTER07H, {"hpmcounter07h", MISCREG_HPMCOUNTER07H, rvTypeFlags(RV32)}},
+    {CSR_HPMCOUNTER08H, {"hpmcounter08h", MISCREG_HPMCOUNTER08H, rvTypeFlags(RV32)}},
+    {CSR_HPMCOUNTER09H, {"hpmcounter09h", MISCREG_HPMCOUNTER09H, rvTypeFlags(RV32)}},
+    {CSR_HPMCOUNTER10H, {"hpmcounter10h", MISCREG_HPMCOUNTER10H, rvTypeFlags(RV32)}},
+    {CSR_HPMCOUNTER11H, {"hpmcounter11h", MISCREG_HPMCOUNTER11H, rvTypeFlags(RV32)}},
+    {CSR_HPMCOUNTER12H, {"hpmcounter12h", MISCREG_HPMCOUNTER12H, rvTypeFlags(RV32)}},
+    {CSR_HPMCOUNTER13H, {"hpmcounter13h", MISCREG_HPMCOUNTER13H, rvTypeFlags(RV32)}},
+    {CSR_HPMCOUNTER14H, {"hpmcounter14h", MISCREG_HPMCOUNTER14H, rvTypeFlags(RV32)}},
+    {CSR_HPMCOUNTER15H, {"hpmcounter15h", MISCREG_HPMCOUNTER15H, rvTypeFlags(RV32)}},
+    {CSR_HPMCOUNTER16H, {"hpmcounter16h", MISCREG_HPMCOUNTER16H, rvTypeFlags(RV32)}},
+    {CSR_HPMCOUNTER17H, {"hpmcounter17h", MISCREG_HPMCOUNTER17H, rvTypeFlags(RV32)}},
+    {CSR_HPMCOUNTER18H, {"hpmcounter18h", MISCREG_HPMCOUNTER18H, rvTypeFlags(RV32)}},
+    {CSR_HPMCOUNTER19H, {"hpmcounter19h", MISCREG_HPMCOUNTER19H, rvTypeFlags(RV32)}},
+    {CSR_HPMCOUNTER20H, {"hpmcounter20h", MISCREG_HPMCOUNTER20H, rvTypeFlags(RV32)}},
+    {CSR_HPMCOUNTER21H, {"hpmcounter21h", MISCREG_HPMCOUNTER21H, rvTypeFlags(RV32)}},
+    {CSR_HPMCOUNTER22H, {"hpmcounter22h", MISCREG_HPMCOUNTER22H, rvTypeFlags(RV32)}},
+    {CSR_HPMCOUNTER23H, {"hpmcounter23h", MISCREG_HPMCOUNTER23H, rvTypeFlags(RV32)}},
+    {CSR_HPMCOUNTER24H, {"hpmcounter24h", MISCREG_HPMCOUNTER24H, rvTypeFlags(RV32)}},
+    {CSR_HPMCOUNTER25H, {"hpmcounter25h", MISCREG_HPMCOUNTER25H, rvTypeFlags(RV32)}},
+    {CSR_HPMCOUNTER26H, {"hpmcounter26h", MISCREG_HPMCOUNTER26H, rvTypeFlags(RV32)}},
+    {CSR_HPMCOUNTER27H, {"hpmcounter27h", MISCREG_HPMCOUNTER27H, rvTypeFlags(RV32)}},
+    {CSR_HPMCOUNTER28H, {"hpmcounter28h", MISCREG_HPMCOUNTER28H, rvTypeFlags(RV32)}},
+    {CSR_HPMCOUNTER29H, {"hpmcounter29h", MISCREG_HPMCOUNTER29H, rvTypeFlags(RV32)}},
+    {CSR_HPMCOUNTER30H, {"hpmcounter30h", MISCREG_HPMCOUNTER30H, rvTypeFlags(RV32)}},
+    {CSR_HPMCOUNTER31H, {"hpmcounter31h", MISCREG_HPMCOUNTER31H, rvTypeFlags(RV32)}},
 
-    {CSR_SSTATUS, {"sstatus", MISCREG_STATUS}},
-    {CSR_SEDELEG, {"sedeleg", MISCREG_SEDELEG}},
-    {CSR_SIDELEG, {"sideleg", MISCREG_SIDELEG}},
-    {CSR_SIE, {"sie", MISCREG_IE}},
-    {CSR_STVEC, {"stvec", MISCREG_STVEC}},
-    {CSR_SCOUNTEREN, {"scounteren", MISCREG_SCOUNTEREN}},
-    {CSR_SSCRATCH, {"sscratch", MISCREG_SSCRATCH}},
-    {CSR_SEPC, {"sepc", MISCREG_SEPC}},
-    {CSR_SCAUSE, {"scause", MISCREG_SCAUSE}},
-    {CSR_STVAL, {"stval", MISCREG_STVAL}},
-    {CSR_SIP, {"sip", MISCREG_IP}},
-    {CSR_SATP, {"satp", MISCREG_SATP}},
+    {CSR_SSTATUS, {"sstatus", MISCREG_STATUS, rvTypeFlags(RV64, RV32)}},
+    {CSR_SEDELEG, {"sedeleg", MISCREG_SEDELEG, rvTypeFlags(RV64, RV32)}},
+    {CSR_SIDELEG, {"sideleg", MISCREG_SIDELEG, rvTypeFlags(RV64, RV32)}},
+    {CSR_SIE, {"sie", MISCREG_IE, rvTypeFlags(RV64, RV32)}},
+    {CSR_STVEC, {"stvec", MISCREG_STVEC, rvTypeFlags(RV64, RV32)}},
+    {CSR_SCOUNTEREN, {"scounteren", MISCREG_SCOUNTEREN, rvTypeFlags(RV64, RV32)}},
+    {CSR_SSCRATCH, {"sscratch", MISCREG_SSCRATCH, rvTypeFlags(RV64, RV32)}},
+    {CSR_SEPC, {"sepc", MISCREG_SEPC, rvTypeFlags(RV64, RV32)}},
+    {CSR_SCAUSE, {"scause", MISCREG_SCAUSE, rvTypeFlags(RV64, RV32)}},
+    {CSR_STVAL, {"stval", MISCREG_STVAL, rvTypeFlags(RV64, RV32)}},
+    {CSR_SIP, {"sip", MISCREG_IP, rvTypeFlags(RV64, RV32)}},
+    {CSR_SATP, {"satp", MISCREG_SATP, rvTypeFlags(RV64, RV32)}},
 
-    {CSR_MVENDORID, {"mvendorid", MISCREG_VENDORID}},
-    {CSR_MARCHID, {"marchid", MISCREG_ARCHID}},
-    {CSR_MIMPID, {"mimpid", MISCREG_IMPID}},
-    {CSR_MHARTID, {"mhartid", MISCREG_HARTID}},
-    {CSR_MSTATUS, {"mstatus", MISCREG_STATUS}},
-    {CSR_MISA, {"misa", MISCREG_ISA}},
-    {CSR_MEDELEG, {"medeleg", MISCREG_MEDELEG}},
-    {CSR_MIDELEG, {"mideleg", MISCREG_MIDELEG}},
-    {CSR_MIE, {"mie", MISCREG_IE}},
-    {CSR_MTVEC, {"mtvec", MISCREG_MTVEC}},
-    {CSR_MCOUNTEREN, {"mcounteren", MISCREG_MCOUNTEREN}},
-    {CSR_MSCRATCH, {"mscratch", MISCREG_MSCRATCH}},
-    {CSR_MEPC, {"mepc", MISCREG_MEPC}},
-    {CSR_MCAUSE, {"mcause", MISCREG_MCAUSE}},
-    {CSR_MTVAL, {"mtval", MISCREG_MTVAL}},
-    {CSR_MIP, {"mip", MISCREG_IP}},
-    {CSR_PMPCFG0, {"pmpcfg0", MISCREG_PMPCFG0}},
-    // pmpcfg1 rv32 only
-    {CSR_PMPCFG2, {"pmpcfg2", MISCREG_PMPCFG2}},
-    // pmpcfg3 rv32 only
-    {CSR_PMPADDR00, {"pmpaddr0", MISCREG_PMPADDR00}},
-    {CSR_PMPADDR01, {"pmpaddr1", MISCREG_PMPADDR01}},
-    {CSR_PMPADDR02, {"pmpaddr2", MISCREG_PMPADDR02}},
-    {CSR_PMPADDR03, {"pmpaddr3", MISCREG_PMPADDR03}},
-    {CSR_PMPADDR04, {"pmpaddr4", MISCREG_PMPADDR04}},
-    {CSR_PMPADDR05, {"pmpaddr5", MISCREG_PMPADDR05}},
-    {CSR_PMPADDR06, {"pmpaddr6", MISCREG_PMPADDR06}},
-    {CSR_PMPADDR07, {"pmpaddr7", MISCREG_PMPADDR07}},
-    {CSR_PMPADDR08, {"pmpaddr8", MISCREG_PMPADDR08}},
-    {CSR_PMPADDR09, {"pmpaddr9", MISCREG_PMPADDR09}},
-    {CSR_PMPADDR10, {"pmpaddr10", MISCREG_PMPADDR10}},
-    {CSR_PMPADDR11, {"pmpaddr11", MISCREG_PMPADDR11}},
-    {CSR_PMPADDR12, {"pmpaddr12", MISCREG_PMPADDR12}},
-    {CSR_PMPADDR13, {"pmpaddr13", MISCREG_PMPADDR13}},
-    {CSR_PMPADDR14, {"pmpaddr14", MISCREG_PMPADDR14}},
-    {CSR_PMPADDR15, {"pmpaddr15", MISCREG_PMPADDR15}},
-    {CSR_MCYCLE, {"mcycle", MISCREG_CYCLE}},
-    {CSR_MINSTRET, {"minstret", MISCREG_INSTRET}},
-    {CSR_MHPMCOUNTER03, {"mhpmcounter03", MISCREG_HPMCOUNTER03}},
-    {CSR_MHPMCOUNTER04, {"mhpmcounter04", MISCREG_HPMCOUNTER04}},
-    {CSR_MHPMCOUNTER05, {"mhpmcounter05", MISCREG_HPMCOUNTER05}},
-    {CSR_MHPMCOUNTER06, {"mhpmcounter06", MISCREG_HPMCOUNTER06}},
-    {CSR_MHPMCOUNTER07, {"mhpmcounter07", MISCREG_HPMCOUNTER07}},
-    {CSR_MHPMCOUNTER08, {"mhpmcounter08", MISCREG_HPMCOUNTER08}},
-    {CSR_MHPMCOUNTER09, {"mhpmcounter09", MISCREG_HPMCOUNTER09}},
-    {CSR_MHPMCOUNTER10, {"mhpmcounter10", MISCREG_HPMCOUNTER10}},
-    {CSR_MHPMCOUNTER11, {"mhpmcounter11", MISCREG_HPMCOUNTER11}},
-    {CSR_MHPMCOUNTER12, {"mhpmcounter12", MISCREG_HPMCOUNTER12}},
-    {CSR_MHPMCOUNTER13, {"mhpmcounter13", MISCREG_HPMCOUNTER13}},
-    {CSR_MHPMCOUNTER14, {"mhpmcounter14", MISCREG_HPMCOUNTER14}},
-    {CSR_MHPMCOUNTER15, {"mhpmcounter15", MISCREG_HPMCOUNTER15}},
-    {CSR_MHPMCOUNTER16, {"mhpmcounter16", MISCREG_HPMCOUNTER16}},
-    {CSR_MHPMCOUNTER17, {"mhpmcounter17", MISCREG_HPMCOUNTER17}},
-    {CSR_MHPMCOUNTER18, {"mhpmcounter18", MISCREG_HPMCOUNTER18}},
-    {CSR_MHPMCOUNTER19, {"mhpmcounter19", MISCREG_HPMCOUNTER19}},
-    {CSR_MHPMCOUNTER20, {"mhpmcounter20", MISCREG_HPMCOUNTER20}},
-    {CSR_MHPMCOUNTER21, {"mhpmcounter21", MISCREG_HPMCOUNTER21}},
-    {CSR_MHPMCOUNTER22, {"mhpmcounter22", MISCREG_HPMCOUNTER22}},
-    {CSR_MHPMCOUNTER23, {"mhpmcounter23", MISCREG_HPMCOUNTER23}},
-    {CSR_MHPMCOUNTER24, {"mhpmcounter24", MISCREG_HPMCOUNTER24}},
-    {CSR_MHPMCOUNTER25, {"mhpmcounter25", MISCREG_HPMCOUNTER25}},
-    {CSR_MHPMCOUNTER26, {"mhpmcounter26", MISCREG_HPMCOUNTER26}},
-    {CSR_MHPMCOUNTER27, {"mhpmcounter27", MISCREG_HPMCOUNTER27}},
-    {CSR_MHPMCOUNTER28, {"mhpmcounter28", MISCREG_HPMCOUNTER28}},
-    {CSR_MHPMCOUNTER29, {"mhpmcounter29", MISCREG_HPMCOUNTER29}},
-    {CSR_MHPMCOUNTER30, {"mhpmcounter30", MISCREG_HPMCOUNTER30}},
-    {CSR_MHPMCOUNTER31, {"mhpmcounter31", MISCREG_HPMCOUNTER31}},
-    {CSR_MHPMEVENT03, {"mhpmevent03", MISCREG_HPMEVENT03}},
-    {CSR_MHPMEVENT04, {"mhpmevent04", MISCREG_HPMEVENT04}},
-    {CSR_MHPMEVENT05, {"mhpmevent05", MISCREG_HPMEVENT05}},
-    {CSR_MHPMEVENT06, {"mhpmevent06", MISCREG_HPMEVENT06}},
-    {CSR_MHPMEVENT07, {"mhpmevent07", MISCREG_HPMEVENT07}},
-    {CSR_MHPMEVENT08, {"mhpmevent08", MISCREG_HPMEVENT08}},
-    {CSR_MHPMEVENT09, {"mhpmevent09", MISCREG_HPMEVENT09}},
-    {CSR_MHPMEVENT10, {"mhpmevent10", MISCREG_HPMEVENT10}},
-    {CSR_MHPMEVENT11, {"mhpmevent11", MISCREG_HPMEVENT11}},
-    {CSR_MHPMEVENT12, {"mhpmevent12", MISCREG_HPMEVENT12}},
-    {CSR_MHPMEVENT13, {"mhpmevent13", MISCREG_HPMEVENT13}},
-    {CSR_MHPMEVENT14, {"mhpmevent14", MISCREG_HPMEVENT14}},
-    {CSR_MHPMEVENT15, {"mhpmevent15", MISCREG_HPMEVENT15}},
-    {CSR_MHPMEVENT16, {"mhpmevent16", MISCREG_HPMEVENT16}},
-    {CSR_MHPMEVENT17, {"mhpmevent17", MISCREG_HPMEVENT17}},
-    {CSR_MHPMEVENT18, {"mhpmevent18", MISCREG_HPMEVENT18}},
-    {CSR_MHPMEVENT19, {"mhpmevent19", MISCREG_HPMEVENT19}},
-    {CSR_MHPMEVENT20, {"mhpmevent20", MISCREG_HPMEVENT20}},
-    {CSR_MHPMEVENT21, {"mhpmevent21", MISCREG_HPMEVENT21}},
-    {CSR_MHPMEVENT22, {"mhpmevent22", MISCREG_HPMEVENT22}},
-    {CSR_MHPMEVENT23, {"mhpmevent23", MISCREG_HPMEVENT23}},
-    {CSR_MHPMEVENT24, {"mhpmevent24", MISCREG_HPMEVENT24}},
-    {CSR_MHPMEVENT25, {"mhpmevent25", MISCREG_HPMEVENT25}},
-    {CSR_MHPMEVENT26, {"mhpmevent26", MISCREG_HPMEVENT26}},
-    {CSR_MHPMEVENT27, {"mhpmevent27", MISCREG_HPMEVENT27}},
-    {CSR_MHPMEVENT28, {"mhpmevent28", MISCREG_HPMEVENT28}},
-    {CSR_MHPMEVENT29, {"mhpmevent29", MISCREG_HPMEVENT29}},
-    {CSR_MHPMEVENT30, {"mhpmevent30", MISCREG_HPMEVENT30}},
-    {CSR_MHPMEVENT31, {"mhpmevent31", MISCREG_HPMEVENT31}},
+    {CSR_MVENDORID, {"mvendorid", MISCREG_VENDORID, rvTypeFlags(RV64, RV32)}},
+    {CSR_MARCHID, {"marchid", MISCREG_ARCHID, rvTypeFlags(RV64, RV32)}},
+    {CSR_MIMPID, {"mimpid", MISCREG_IMPID, rvTypeFlags(RV64, RV32)}},
+    {CSR_MHARTID, {"mhartid", MISCREG_HARTID, rvTypeFlags(RV64, RV32)}},
+    {CSR_MSTATUS, {"mstatus", MISCREG_STATUS, rvTypeFlags(RV64, RV32)}},
+    {CSR_MISA, {"misa", MISCREG_ISA, rvTypeFlags(RV64, RV32)}},
+    {CSR_MEDELEG, {"medeleg", MISCREG_MEDELEG, rvTypeFlags(RV64, RV32)}},
+    {CSR_MIDELEG, {"mideleg", MISCREG_MIDELEG, rvTypeFlags(RV64, RV32)}},
+    {CSR_MIE, {"mie", MISCREG_IE, rvTypeFlags(RV64, RV32)}},
+    {CSR_MTVEC, {"mtvec", MISCREG_MTVEC, rvTypeFlags(RV64, RV32)}},
+    {CSR_MCOUNTEREN, {"mcounteren", MISCREG_MCOUNTEREN, rvTypeFlags(RV64, RV32)}},
+    {CSR_MSTATUSH, {"mstatush", MISCREG_MSTATUSH, rvTypeFlags(RV32)}},
+    {CSR_MSCRATCH, {"mscratch", MISCREG_MSCRATCH, rvTypeFlags(RV64, RV32)}},
+    {CSR_MEPC, {"mepc", MISCREG_MEPC, rvTypeFlags(RV64, RV32)}},
+    {CSR_MCAUSE, {"mcause", MISCREG_MCAUSE, rvTypeFlags(RV64, RV32)}},
+    {CSR_MTVAL, {"mtval", MISCREG_MTVAL, rvTypeFlags(RV64, RV32)}},
+    {CSR_MIP, {"mip", MISCREG_IP, rvTypeFlags(RV64, RV32)}},
+    {CSR_PMPCFG0, {"pmpcfg0", MISCREG_PMPCFG0, rvTypeFlags(RV64, RV32)}},
+    {CSR_PMPCFG1, {"pmpcfg1", MISCREG_PMPCFG1, rvTypeFlags(RV32)}},  // pmpcfg1 rv32 only
+    {CSR_PMPCFG2, {"pmpcfg2", MISCREG_PMPCFG2, rvTypeFlags(RV64, RV32)}},
+    {CSR_PMPCFG3, {"pmpcfg3", MISCREG_PMPCFG3, rvTypeFlags(RV32)}},  // pmpcfg3 rv32 only
+    {CSR_PMPADDR00, {"pmpaddr0", MISCREG_PMPADDR00, rvTypeFlags(RV64, RV32)}},
+    {CSR_PMPADDR01, {"pmpaddr1", MISCREG_PMPADDR01, rvTypeFlags(RV64, RV32)}},
+    {CSR_PMPADDR02, {"pmpaddr2", MISCREG_PMPADDR02, rvTypeFlags(RV64, RV32)}},
+    {CSR_PMPADDR03, {"pmpaddr3", MISCREG_PMPADDR03, rvTypeFlags(RV64, RV32)}},
+    {CSR_PMPADDR04, {"pmpaddr4", MISCREG_PMPADDR04, rvTypeFlags(RV64, RV32)}},
+    {CSR_PMPADDR05, {"pmpaddr5", MISCREG_PMPADDR05, rvTypeFlags(RV64, RV32)}},
+    {CSR_PMPADDR06, {"pmpaddr6", MISCREG_PMPADDR06, rvTypeFlags(RV64, RV32)}},
+    {CSR_PMPADDR07, {"pmpaddr7", MISCREG_PMPADDR07, rvTypeFlags(RV64, RV32)}},
+    {CSR_PMPADDR08, {"pmpaddr8", MISCREG_PMPADDR08, rvTypeFlags(RV64, RV32)}},
+    {CSR_PMPADDR09, {"pmpaddr9", MISCREG_PMPADDR09, rvTypeFlags(RV64, RV32)}},
+    {CSR_PMPADDR10, {"pmpaddr10", MISCREG_PMPADDR10, rvTypeFlags(RV64, RV32)}},
+    {CSR_PMPADDR11, {"pmpaddr11", MISCREG_PMPADDR11, rvTypeFlags(RV64, RV32)}},
+    {CSR_PMPADDR12, {"pmpaddr12", MISCREG_PMPADDR12, rvTypeFlags(RV64, RV32)}},
+    {CSR_PMPADDR13, {"pmpaddr13", MISCREG_PMPADDR13, rvTypeFlags(RV64, RV32)}},
+    {CSR_PMPADDR14, {"pmpaddr14", MISCREG_PMPADDR14, rvTypeFlags(RV64, RV32)}},
+    {CSR_PMPADDR15, {"pmpaddr15", MISCREG_PMPADDR15, rvTypeFlags(RV64, RV32)}},
+    {CSR_MCYCLE, {"mcycle", MISCREG_CYCLE, rvTypeFlags(RV64, RV32)}},
+    {CSR_MINSTRET, {"minstret", MISCREG_INSTRET, rvTypeFlags(RV64, RV32)}},
+    {CSR_MHPMCOUNTER03, {"mhpmcounter03", MISCREG_HPMCOUNTER03, rvTypeFlags(RV64, RV32)}},
+    {CSR_MHPMCOUNTER04, {"mhpmcounter04", MISCREG_HPMCOUNTER04, rvTypeFlags(RV64, RV32)}},
+    {CSR_MHPMCOUNTER05, {"mhpmcounter05", MISCREG_HPMCOUNTER05, rvTypeFlags(RV64, RV32)}},
+    {CSR_MHPMCOUNTER06, {"mhpmcounter06", MISCREG_HPMCOUNTER06, rvTypeFlags(RV64, RV32)}},
+    {CSR_MHPMCOUNTER07, {"mhpmcounter07", MISCREG_HPMCOUNTER07, rvTypeFlags(RV64, RV32)}},
+    {CSR_MHPMCOUNTER08, {"mhpmcounter08", MISCREG_HPMCOUNTER08, rvTypeFlags(RV64, RV32)}},
+    {CSR_MHPMCOUNTER09, {"mhpmcounter09", MISCREG_HPMCOUNTER09, rvTypeFlags(RV64, RV32)}},
+    {CSR_MHPMCOUNTER10, {"mhpmcounter10", MISCREG_HPMCOUNTER10, rvTypeFlags(RV64, RV32)}},
+    {CSR_MHPMCOUNTER11, {"mhpmcounter11", MISCREG_HPMCOUNTER11, rvTypeFlags(RV64, RV32)}},
+    {CSR_MHPMCOUNTER12, {"mhpmcounter12", MISCREG_HPMCOUNTER12, rvTypeFlags(RV64, RV32)}},
+    {CSR_MHPMCOUNTER13, {"mhpmcounter13", MISCREG_HPMCOUNTER13, rvTypeFlags(RV64, RV32)}},
+    {CSR_MHPMCOUNTER14, {"mhpmcounter14", MISCREG_HPMCOUNTER14, rvTypeFlags(RV64, RV32)}},
+    {CSR_MHPMCOUNTER15, {"mhpmcounter15", MISCREG_HPMCOUNTER15, rvTypeFlags(RV64, RV32)}},
+    {CSR_MHPMCOUNTER16, {"mhpmcounter16", MISCREG_HPMCOUNTER16, rvTypeFlags(RV64, RV32)}},
+    {CSR_MHPMCOUNTER17, {"mhpmcounter17", MISCREG_HPMCOUNTER17, rvTypeFlags(RV64, RV32)}},
+    {CSR_MHPMCOUNTER18, {"mhpmcounter18", MISCREG_HPMCOUNTER18, rvTypeFlags(RV64, RV32)}},
+    {CSR_MHPMCOUNTER19, {"mhpmcounter19", MISCREG_HPMCOUNTER19, rvTypeFlags(RV64, RV32)}},
+    {CSR_MHPMCOUNTER20, {"mhpmcounter20", MISCREG_HPMCOUNTER20, rvTypeFlags(RV64, RV32)}},
+    {CSR_MHPMCOUNTER21, {"mhpmcounter21", MISCREG_HPMCOUNTER21, rvTypeFlags(RV64, RV32)}},
+    {CSR_MHPMCOUNTER22, {"mhpmcounter22", MISCREG_HPMCOUNTER22, rvTypeFlags(RV64, RV32)}},
+    {CSR_MHPMCOUNTER23, {"mhpmcounter23", MISCREG_HPMCOUNTER23, rvTypeFlags(RV64, RV32)}},
+    {CSR_MHPMCOUNTER24, {"mhpmcounter24", MISCREG_HPMCOUNTER24, rvTypeFlags(RV64, RV32)}},
+    {CSR_MHPMCOUNTER25, {"mhpmcounter25", MISCREG_HPMCOUNTER25, rvTypeFlags(RV64, RV32)}},
+    {CSR_MHPMCOUNTER26, {"mhpmcounter26", MISCREG_HPMCOUNTER26, rvTypeFlags(RV64, RV32)}},
+    {CSR_MHPMCOUNTER27, {"mhpmcounter27", MISCREG_HPMCOUNTER27, rvTypeFlags(RV64, RV32)}},
+    {CSR_MHPMCOUNTER28, {"mhpmcounter28", MISCREG_HPMCOUNTER28, rvTypeFlags(RV64, RV32)}},
+    {CSR_MHPMCOUNTER29, {"mhpmcounter29", MISCREG_HPMCOUNTER29, rvTypeFlags(RV64, RV32)}},
+    {CSR_MHPMCOUNTER30, {"mhpmcounter30", MISCREG_HPMCOUNTER30, rvTypeFlags(RV64, RV32)}},
+    {CSR_MHPMCOUNTER31, {"mhpmcounter31", MISCREG_HPMCOUNTER31, rvTypeFlags(RV64, RV32)}},
 
-    {CSR_TSELECT, {"tselect", MISCREG_TSELECT}},
-    {CSR_TDATA1, {"tdata1", MISCREG_TDATA1}},
-    {CSR_TDATA2, {"tdata2", MISCREG_TDATA2}},
-    {CSR_TDATA3, {"tdata3", MISCREG_TDATA3}},
-    {CSR_DCSR, {"dcsr", MISCREG_DCSR}},
-    {CSR_DPC, {"dpc", MISCREG_DPC}},
-    {CSR_DSCRATCH, {"dscratch", MISCREG_DSCRATCH}}
+    {CSR_MCYCLEH, {"mcycleh", MISCREG_CYCLEH, rvTypeFlags(RV32)}},
+    {CSR_MINSTRETH, {"minstreth", MISCREG_INSTRETH, rvTypeFlags(RV32)}},
+    {CSR_MHPMCOUNTER03H, {"mhpmcounter03h", MISCREG_HPMCOUNTER03H, rvTypeFlags(RV32)}},
+    {CSR_MHPMCOUNTER04H, {"mhpmcounter04h", MISCREG_HPMCOUNTER04H, rvTypeFlags(RV32)}},
+    {CSR_MHPMCOUNTER05H, {"mhpmcounter05h", MISCREG_HPMCOUNTER05H, rvTypeFlags(RV32)}},
+    {CSR_MHPMCOUNTER06H, {"mhpmcounter06h", MISCREG_HPMCOUNTER06H, rvTypeFlags(RV32)}},
+    {CSR_MHPMCOUNTER07H, {"mhpmcounter07h", MISCREG_HPMCOUNTER07H, rvTypeFlags(RV32)}},
+    {CSR_MHPMCOUNTER08H, {"mhpmcounter08h", MISCREG_HPMCOUNTER08H, rvTypeFlags(RV32)}},
+    {CSR_MHPMCOUNTER09H, {"mhpmcounter09h", MISCREG_HPMCOUNTER09H, rvTypeFlags(RV32)}},
+    {CSR_MHPMCOUNTER10H, {"mhpmcounter10h", MISCREG_HPMCOUNTER10H, rvTypeFlags(RV32)}},
+    {CSR_MHPMCOUNTER11H, {"mhpmcounter11h", MISCREG_HPMCOUNTER11H, rvTypeFlags(RV32)}},
+    {CSR_MHPMCOUNTER12H, {"mhpmcounter12h", MISCREG_HPMCOUNTER12H, rvTypeFlags(RV32)}},
+    {CSR_MHPMCOUNTER13H, {"mhpmcounter13h", MISCREG_HPMCOUNTER13H, rvTypeFlags(RV32)}},
+    {CSR_MHPMCOUNTER14H, {"mhpmcounter14h", MISCREG_HPMCOUNTER14H, rvTypeFlags(RV32)}},
+    {CSR_MHPMCOUNTER15H, {"mhpmcounter15h", MISCREG_HPMCOUNTER15H, rvTypeFlags(RV32)}},
+    {CSR_MHPMCOUNTER16H, {"mhpmcounter16h", MISCREG_HPMCOUNTER16H, rvTypeFlags(RV32)}},
+    {CSR_MHPMCOUNTER17H, {"mhpmcounter17h", MISCREG_HPMCOUNTER17H, rvTypeFlags(RV32)}},
+    {CSR_MHPMCOUNTER18H, {"mhpmcounter18h", MISCREG_HPMCOUNTER18H, rvTypeFlags(RV32)}},
+    {CSR_MHPMCOUNTER19H, {"mhpmcounter19h", MISCREG_HPMCOUNTER19H, rvTypeFlags(RV32)}},
+    {CSR_MHPMCOUNTER20H, {"mhpmcounter20h", MISCREG_HPMCOUNTER20H, rvTypeFlags(RV32)}},
+    {CSR_MHPMCOUNTER21H, {"mhpmcounter21h", MISCREG_HPMCOUNTER21H, rvTypeFlags(RV32)}},
+    {CSR_MHPMCOUNTER22H, {"mhpmcounter22h", MISCREG_HPMCOUNTER22H, rvTypeFlags(RV32)}},
+    {CSR_MHPMCOUNTER23H, {"mhpmcounter23h", MISCREG_HPMCOUNTER23H, rvTypeFlags(RV32)}},
+    {CSR_MHPMCOUNTER24H, {"mhpmcounter24h", MISCREG_HPMCOUNTER24H, rvTypeFlags(RV32)}},
+    {CSR_MHPMCOUNTER25H, {"mhpmcounter25h", MISCREG_HPMCOUNTER25H, rvTypeFlags(RV32)}},
+    {CSR_MHPMCOUNTER26H, {"mhpmcounter26h", MISCREG_HPMCOUNTER26H, rvTypeFlags(RV32)}},
+    {CSR_MHPMCOUNTER27H, {"mhpmcounter27h", MISCREG_HPMCOUNTER27H, rvTypeFlags(RV32)}},
+    {CSR_MHPMCOUNTER28H, {"mhpmcounter28h", MISCREG_HPMCOUNTER28H, rvTypeFlags(RV32)}},
+    {CSR_MHPMCOUNTER29H, {"mhpmcounter29h", MISCREG_HPMCOUNTER29H, rvTypeFlags(RV32)}},
+    {CSR_MHPMCOUNTER30H, {"mhpmcounter30h", MISCREG_HPMCOUNTER30H, rvTypeFlags(RV32)}},
+    {CSR_MHPMCOUNTER31H, {"mhpmcounter31h", MISCREG_HPMCOUNTER31H, rvTypeFlags(RV32)}},
+
+    {CSR_MHPMEVENT03, {"mhpmevent03", MISCREG_HPMEVENT03, rvTypeFlags(RV64, RV32)}},
+    {CSR_MHPMEVENT04, {"mhpmevent04", MISCREG_HPMEVENT04, rvTypeFlags(RV64, RV32)}},
+    {CSR_MHPMEVENT05, {"mhpmevent05", MISCREG_HPMEVENT05, rvTypeFlags(RV64, RV32)}},
+    {CSR_MHPMEVENT06, {"mhpmevent06", MISCREG_HPMEVENT06, rvTypeFlags(RV64, RV32)}},
+    {CSR_MHPMEVENT07, {"mhpmevent07", MISCREG_HPMEVENT07, rvTypeFlags(RV64, RV32)}},
+    {CSR_MHPMEVENT08, {"mhpmevent08", MISCREG_HPMEVENT08, rvTypeFlags(RV64, RV32)}},
+    {CSR_MHPMEVENT09, {"mhpmevent09", MISCREG_HPMEVENT09, rvTypeFlags(RV64, RV32)}},
+    {CSR_MHPMEVENT10, {"mhpmevent10", MISCREG_HPMEVENT10, rvTypeFlags(RV64, RV32)}},
+    {CSR_MHPMEVENT11, {"mhpmevent11", MISCREG_HPMEVENT11, rvTypeFlags(RV64, RV32)}},
+    {CSR_MHPMEVENT12, {"mhpmevent12", MISCREG_HPMEVENT12, rvTypeFlags(RV64, RV32)}},
+    {CSR_MHPMEVENT13, {"mhpmevent13", MISCREG_HPMEVENT13, rvTypeFlags(RV64, RV32)}},
+    {CSR_MHPMEVENT14, {"mhpmevent14", MISCREG_HPMEVENT14, rvTypeFlags(RV64, RV32)}},
+    {CSR_MHPMEVENT15, {"mhpmevent15", MISCREG_HPMEVENT15, rvTypeFlags(RV64, RV32)}},
+    {CSR_MHPMEVENT16, {"mhpmevent16", MISCREG_HPMEVENT16, rvTypeFlags(RV64, RV32)}},
+    {CSR_MHPMEVENT17, {"mhpmevent17", MISCREG_HPMEVENT17, rvTypeFlags(RV64, RV32)}},
+    {CSR_MHPMEVENT18, {"mhpmevent18", MISCREG_HPMEVENT18, rvTypeFlags(RV64, RV32)}},
+    {CSR_MHPMEVENT19, {"mhpmevent19", MISCREG_HPMEVENT19, rvTypeFlags(RV64, RV32)}},
+    {CSR_MHPMEVENT20, {"mhpmevent20", MISCREG_HPMEVENT20, rvTypeFlags(RV64, RV32)}},
+    {CSR_MHPMEVENT21, {"mhpmevent21", MISCREG_HPMEVENT21, rvTypeFlags(RV64, RV32)}},
+    {CSR_MHPMEVENT22, {"mhpmevent22", MISCREG_HPMEVENT22, rvTypeFlags(RV64, RV32)}},
+    {CSR_MHPMEVENT23, {"mhpmevent23", MISCREG_HPMEVENT23, rvTypeFlags(RV64, RV32)}},
+    {CSR_MHPMEVENT24, {"mhpmevent24", MISCREG_HPMEVENT24, rvTypeFlags(RV64, RV32)}},
+    {CSR_MHPMEVENT25, {"mhpmevent25", MISCREG_HPMEVENT25, rvTypeFlags(RV64, RV32)}},
+    {CSR_MHPMEVENT26, {"mhpmevent26", MISCREG_HPMEVENT26, rvTypeFlags(RV64, RV32)}},
+    {CSR_MHPMEVENT27, {"mhpmevent27", MISCREG_HPMEVENT27, rvTypeFlags(RV64, RV32)}},
+    {CSR_MHPMEVENT28, {"mhpmevent28", MISCREG_HPMEVENT28, rvTypeFlags(RV64, RV32)}},
+    {CSR_MHPMEVENT29, {"mhpmevent29", MISCREG_HPMEVENT29, rvTypeFlags(RV64, RV32)}},
+    {CSR_MHPMEVENT30, {"mhpmevent30", MISCREG_HPMEVENT30, rvTypeFlags(RV64, RV32)}},
+    {CSR_MHPMEVENT31, {"mhpmevent31", MISCREG_HPMEVENT31, rvTypeFlags(RV64, RV32)}},
+
+    {CSR_TSELECT, {"tselect", MISCREG_TSELECT, rvTypeFlags(RV64, RV32)}},
+    {CSR_TDATA1, {"tdata1", MISCREG_TDATA1, rvTypeFlags(RV64, RV32)}},
+    {CSR_TDATA2, {"tdata2", MISCREG_TDATA2, rvTypeFlags(RV64, RV32)}},
+    {CSR_TDATA3, {"tdata3", MISCREG_TDATA3, rvTypeFlags(RV64, RV32)}},
+    {CSR_DCSR, {"dcsr", MISCREG_DCSR, rvTypeFlags(RV64, RV32)}},
+    {CSR_DPC, {"dpc", MISCREG_DPC, rvTypeFlags(RV64, RV32)}},
+    {CSR_DSCRATCH, {"dscratch", MISCREG_DSCRATCH, rvTypeFlags(RV64, RV32)}}
 };
 
 /**
@@ -597,6 +774,14 @@ const off_t MXL_OFFSETS[enums::Num_RiscvType] = {
     [RV32] = (sizeof(uint32_t) * 8 - 2),
     [RV64] = (sizeof(uint64_t) * 8 - 2),
 };
+const off_t MBE_OFFSET[enums::Num_RiscvType] = {
+    [RV32] = 5,
+    [RV64] = 37,
+};
+const off_t SBE_OFFSET[enums::Num_RiscvType] = {
+    [RV32] = 4,
+    [RV64] = 36,
+};
 const off_t SXL_OFFSET = 34;
 const off_t UXL_OFFSET = 32;
 const off_t FS_OFFSET = 13;
@@ -618,9 +803,16 @@ const RegVal STATUS_SD_MASKS[enums::Num_RiscvType] = {
     [RV32] = 1ULL << ((sizeof(uint32_t) * 8) - 1),
     [RV64] = 1ULL << ((sizeof(uint64_t) * 8) - 1),
 };
+const RegVal STATUS_MBE_MASK[enums::Num_RiscvType] = {
+    [RV32] = 1ULL << MBE_OFFSET[RV32],
+    [RV64] = 1ULL << MBE_OFFSET[RV64],
+};
+const RegVal STATUS_SBE_MASK[enums::Num_RiscvType] = {
+    [RV32] = 1ULL << SBE_OFFSET[RV32],
+    [RV64] = 1ULL << SBE_OFFSET[RV64],
+};
 const RegVal STATUS_SXL_MASK = 3ULL << SXL_OFFSET;
 const RegVal STATUS_UXL_MASK = 3ULL << UXL_OFFSET;
-
 const RegVal STATUS_TSR_MASK = 1ULL << 22;
 const RegVal STATUS_TW_MASK = 1ULL << 21;
 const RegVal STATUS_TVM_MASK = 1ULL << 20;
@@ -645,7 +837,8 @@ const RegVal MSTATUS_MASKS[enums::Num_RiscvType] = {
              STATUS_VS_MASK | STATUS_MPP_MASK | STATUS_SPP_MASK |
              STATUS_MPIE_MASK | STATUS_SPIE_MASK | STATUS_UPIE_MASK |
              STATUS_MIE_MASK | STATUS_SIE_MASK | STATUS_UIE_MASK,
-    [RV64] = STATUS_SD_MASKS[RV64] | STATUS_SXL_MASK | STATUS_UXL_MASK |
+    [RV64] = STATUS_SD_MASKS[RV64] | STATUS_MBE_MASK[RV64] |
+             STATUS_SBE_MASK[RV64] | STATUS_SXL_MASK | STATUS_UXL_MASK |
              STATUS_TSR_MASK | STATUS_TW_MASK | STATUS_TVM_MASK |
              STATUS_MXR_MASK | STATUS_SUM_MASK | STATUS_MPRV_MASK |
              STATUS_XS_MASK | STATUS_FS_MASK | STATUS_VS_MASK|
@@ -653,6 +846,8 @@ const RegVal MSTATUS_MASKS[enums::Num_RiscvType] = {
              STATUS_SPIE_MASK | STATUS_UPIE_MASK | STATUS_MIE_MASK |
              STATUS_SIE_MASK | STATUS_UIE_MASK,
 };
+// rv32 only
+const RegVal MSTATUSH_MASKS = STATUS_MBE_MASK[RV32] | STATUS_SBE_MASK[RV32];
 const RegVal SSTATUS_MASKS[enums::Num_RiscvType] = {
     [RV32] = STATUS_SD_MASKS[RV32] | STATUS_MXR_MASK | STATUS_SUM_MASK |
              STATUS_XS_MASK | STATUS_FS_MASK | STATUS_VS_MASK |
@@ -709,6 +904,7 @@ const std::unordered_map<int, RegVal> CSRMasks[enums::Num_RiscvType] = {
               {CSR_MSTATUS, MSTATUS_MASKS[RV32]},
               {CSR_MISA, MISA_MASKS[RV32]},
               {CSR_MIE, MI_MASK},
+              {CSR_MSTATUSH, MSTATUSH_MASKS},
               {CSR_MIP, MI_MASK}},
     [RV64] = {{CSR_USTATUS, USTATUS_MASKS[RV64]},
               {CSR_UIE, UI_MASK},
diff --git a/src/dev/riscv/clint.cc b/src/dev/riscv/clint.cc
index b27b9bf61d..209f656d3c 100644
--- a/src/dev/riscv/clint.cc
+++ b/src/dev/riscv/clint.cc
@@ -71,7 +71,12 @@ Clint::raiseInterruptPin(int id)
 
         // Update misc reg file
         ISA* isa = dynamic_cast<ISA*>(tc->getIsaPtr());
-        isa->setMiscRegNoEffect(MISCREG_TIME, mtime);
+        if (isa->rvType() == RV32) {
+            isa->setMiscRegNoEffect(MISCREG_TIME, bits(mtime, 31, 0));
+            isa->setMiscRegNoEffect(MISCREG_TIMEH, bits(mtime, 63, 32));
+        } else {
+            isa->setMiscRegNoEffect(MISCREG_TIME, mtime);
+        }
 
         // Post timer interrupt
         uint64_t mtimecmp = registers.mtimecmp[context_id].get();

From ad107116a1d01aca2535e4c2f191278249177083 Mon Sep 17 00:00:00 2001
From: Roger Chang <rogerycchang@google.com>
Date: Mon, 28 Nov 2022 08:28:12 +0000
Subject: [PATCH 054/492] arch-riscv: Support RV32 to remote gdb

support rv32 cpu, fpu, csr registers to remote gdb.

Change-Id: Ib821a35ff2e95f6f47569b1f4cb35cd98fcca77d
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/66131
Tested-by: kokoro <noreply+kokoro@google.com>
Maintainer: Gabe Black <gabeblack@google.com>
Reviewed-by: Gabe Black <gabeblack@google.com>
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
---
 src/arch/riscv/gdb-xml/SConscript          |   4 +
 src/arch/riscv/gdb-xml/riscv-32bit-cpu.xml |  48 +++
 src/arch/riscv/gdb-xml/riscv-32bit-csr.xml | 249 ++++++++++++++
 src/arch/riscv/gdb-xml/riscv-32bit-fpu.xml |  58 ++++
 src/arch/riscv/gdb-xml/riscv-32bit.xml     |  13 +
 src/arch/riscv/remote_gdb.cc               | 376 ++++++++++++++-------
 src/arch/riscv/remote_gdb.hh               |  89 ++++-
 7 files changed, 719 insertions(+), 118 deletions(-)
 create mode 100644 src/arch/riscv/gdb-xml/riscv-32bit-cpu.xml
 create mode 100644 src/arch/riscv/gdb-xml/riscv-32bit-csr.xml
 create mode 100644 src/arch/riscv/gdb-xml/riscv-32bit-fpu.xml
 create mode 100644 src/arch/riscv/gdb-xml/riscv-32bit.xml

diff --git a/src/arch/riscv/gdb-xml/SConscript b/src/arch/riscv/gdb-xml/SConscript
index 722137408b..bafea174d2 100644
--- a/src/arch/riscv/gdb-xml/SConscript
+++ b/src/arch/riscv/gdb-xml/SConscript
@@ -43,6 +43,10 @@
 
 Import('*')
 
+GdbXml('riscv-32bit.xml', 'gdb_xml_riscv_32bit_target', tags='riscv isa')
+GdbXml('riscv-32bit-cpu.xml', 'gdb_xml_riscv_32bit_cpu', tags='riscv isa')
+GdbXml('riscv-32bit-fpu.xml', 'gdb_xml_riscv_32bit_fpu', tags='riscv isa')
+GdbXml('riscv-32bit-csr.xml', 'gdb_xml_riscv_32bit_csr', tags='riscv isa')
 GdbXml('riscv-64bit.xml', 'gdb_xml_riscv_64bit_target', tags='riscv isa')
 GdbXml('riscv-64bit-cpu.xml', 'gdb_xml_riscv_64bit_cpu', tags='riscv isa')
 GdbXml('riscv-64bit-fpu.xml', 'gdb_xml_riscv_64bit_fpu', tags='riscv isa')
diff --git a/src/arch/riscv/gdb-xml/riscv-32bit-cpu.xml b/src/arch/riscv/gdb-xml/riscv-32bit-cpu.xml
new file mode 100644
index 0000000000..c48f770ded
--- /dev/null
+++ b/src/arch/riscv/gdb-xml/riscv-32bit-cpu.xml
@@ -0,0 +1,48 @@
+<?xml version="1.0"?>
+<!-- Copyright (C) 2018-2019 Free Software Foundation, Inc.
+     Contributed by Huawei International
+
+     Copying and distribution of this file, with or without modification,
+     are permitted in any medium without royalty provided the copyright
+     notice and this notice are preserved.  -->
+
+<!-- Register numbers are hard-coded in order to maintain backward
+     compatibility with older versions of tools that didn't use xml
+     register descriptions.  -->
+
+<!DOCTYPE feature SYSTEM "gdb-target.dtd">
+<feature name="org.gnu.gdb.riscv.cpu">
+  <reg name="zero" bitsize="32" type="int" regnum="0"/>
+  <reg name="ra" bitsize="32" type="code_ptr"/>
+  <reg name="sp" bitsize="32" type="data_ptr"/>
+  <reg name="gp" bitsize="32" type="data_ptr"/>
+  <reg name="tp" bitsize="32" type="data_ptr"/>
+  <reg name="t0" bitsize="32" type="int"/>
+  <reg name="t1" bitsize="32" type="int"/>
+  <reg name="t2" bitsize="32" type="int"/>
+  <reg name="fp" bitsize="32" type="data_ptr"/>
+  <reg name="s1" bitsize="32" type="int"/>
+  <reg name="a0" bitsize="32" type="int"/>
+  <reg name="a1" bitsize="32" type="int"/>
+  <reg name="a2" bitsize="32" type="int"/>
+  <reg name="a3" bitsize="32" type="int"/>
+  <reg name="a4" bitsize="32" type="int"/>
+  <reg name="a5" bitsize="32" type="int"/>
+  <reg name="a6" bitsize="32" type="int"/>
+  <reg name="a7" bitsize="32" type="int"/>
+  <reg name="s2" bitsize="32" type="int"/>
+  <reg name="s3" bitsize="32" type="int"/>
+  <reg name="s4" bitsize="32" type="int"/>
+  <reg name="s5" bitsize="32" type="int"/>
+  <reg name="s6" bitsize="32" type="int"/>
+  <reg name="s7" bitsize="32" type="int"/>
+  <reg name="s8" bitsize="32" type="int"/>
+  <reg name="s9" bitsize="32" type="int"/>
+  <reg name="s10" bitsize="32" type="int"/>
+  <reg name="s11" bitsize="32" type="int"/>
+  <reg name="t3" bitsize="32" type="int"/>
+  <reg name="t4" bitsize="32" type="int"/>
+  <reg name="t5" bitsize="32" type="int"/>
+  <reg name="t6" bitsize="32" type="int"/>
+  <reg name="pc" bitsize="32" type="code_ptr"/>
+</feature>
diff --git a/src/arch/riscv/gdb-xml/riscv-32bit-csr.xml b/src/arch/riscv/gdb-xml/riscv-32bit-csr.xml
new file mode 100644
index 0000000000..7cf7bc05b4
--- /dev/null
+++ b/src/arch/riscv/gdb-xml/riscv-32bit-csr.xml
@@ -0,0 +1,249 @@
+<?xml version="1.0"?>
+<!-- Copyright (C) 2018-2019 Free Software Foundation, Inc.
+     Contributed by Huawei International
+
+     Copying and distribution of this file, with or without modification,
+     are permitted in any medium without royalty provided the copyright
+     notice and this notice are preserved.  -->
+
+<!DOCTYPE feature SYSTEM "gdb-target.dtd">
+<feature name="org.gnu.gdb.riscv.csr">
+  <reg name="cycle" bitsize="32"/>
+  <reg name="time" bitsize="32"/>
+  <reg name="cycleh" bitsize="32"/>
+  <reg name="timeh" bitsize="32"/>
+  <reg name="ustatus" bitsize="32"/>
+  <reg name="uie" bitsize="32"/>
+  <reg name="utvec" bitsize="32"/>
+  <reg name="uscratch" bitsize="32"/>
+  <reg name="uepc" bitsize="32"/>
+  <reg name="ucause" bitsize="32"/>
+  <reg name="utval" bitsize="32"/>
+  <reg name="uip" bitsize="32"/>
+  <reg name="sstatus" bitsize="32"/>
+  <reg name="sedeleg" bitsize="32"/>
+  <reg name="sideleg" bitsize="32"/>
+  <reg name="sie" bitsize="32"/>
+  <reg name="stvec" bitsize="32"/>
+  <reg name="scounteren" bitsize="32"/>
+  <reg name="sscratch" bitsize="32"/>
+  <reg name="sepc" bitsize="32"/>
+  <reg name="scause" bitsize="32"/>
+  <reg name="stval" bitsize="32"/>
+  <reg name="sip" bitsize="32"/>
+  <reg name="satp" bitsize="32"/>
+  <reg name="mvendorid" bitsize="32"/>
+  <reg name="marchid" bitsize="32"/>
+  <reg name="mimpid" bitsize="32"/>
+  <reg name="mhartid" bitsize="32"/>
+  <reg name="mstatus" bitsize="32"/>
+  <reg name="misa" bitsize="32"/>
+  <reg name="medeleg" bitsize="32"/>
+  <reg name="mideleg" bitsize="32"/>
+  <reg name="mie" bitsize="32"/>
+  <reg name="mtvec" bitsize="32"/>
+  <reg name="mcounteren" bitsize="32"/>
+  <reg name="mstatush" bitsize="32"/>
+  <reg name="mscratch" bitsize="32"/>
+  <reg name="mepc" bitsize="32"/>
+  <reg name="mcause" bitsize="32"/>
+  <reg name="mtval" bitsize="32"/>
+  <reg name="mip" bitsize="32"/>
+  <reg name="hstatus" bitsize="32"/>
+  <reg name="hedeleg" bitsize="32"/>
+  <reg name="hideleg" bitsize="32"/>
+  <reg name="hie" bitsize="32"/>
+  <reg name="htvec" bitsize="32"/>
+  <reg name="hscratch" bitsize="32"/>
+  <reg name="hepc" bitsize="32"/>
+  <reg name="hcause" bitsize="32"/>
+  <reg name="hbadaddr" bitsize="32"/>
+  <reg name="hip" bitsize="32"/>
+  <!-- <reg name="instret" bitsize="32"/>
+  <reg name="hpmcounter3" bitsize="32"/>
+  <reg name="hpmcounter4" bitsize="32"/>
+  <reg name="hpmcounter5" bitsize="32"/>
+  <reg name="hpmcounter6" bitsize="32"/>
+  <reg name="hpmcounter7" bitsize="32"/>
+  <reg name="hpmcounter8" bitsize="32"/>
+  <reg name="hpmcounter9" bitsize="32"/>
+  <reg name="hpmcounter10" bitsize="32"/>
+  <reg name="hpmcounter11" bitsize="32"/>
+  <reg name="hpmcounter12" bitsize="32"/>
+  <reg name="hpmcounter13" bitsize="32"/>
+  <reg name="hpmcounter14" bitsize="32"/>
+  <reg name="hpmcounter15" bitsize="32"/>
+  <reg name="hpmcounter16" bitsize="32"/>
+  <reg name="hpmcounter17" bitsize="32"/>
+  <reg name="hpmcounter18" bitsize="32"/>
+  <reg name="hpmcounter19" bitsize="32"/>
+  <reg name="hpmcounter20" bitsize="32"/>
+  <reg name="hpmcounter21" bitsize="32"/>
+  <reg name="hpmcounter22" bitsize="32"/>
+  <reg name="hpmcounter23" bitsize="32"/>
+  <reg name="hpmcounter24" bitsize="32"/>
+  <reg name="hpmcounter25" bitsize="32"/>
+  <reg name="hpmcounter26" bitsize="32"/>
+  <reg name="hpmcounter27" bitsize="32"/>
+  <reg name="hpmcounter28" bitsize="32"/>
+  <reg name="hpmcounter29" bitsize="32"/>
+  <reg name="hpmcounter30" bitsize="32"/>
+  <reg name="hpmcounter31" bitsize="32"/>
+  <reg name="instreth" bitsize="32"/>
+  <reg name="hpmcounter3h" bitsize="32"/>
+  <reg name="hpmcounter4h" bitsize="32"/>
+  <reg name="hpmcounter5h" bitsize="32"/>
+  <reg name="hpmcounter6h" bitsize="32"/>
+  <reg name="hpmcounter7h" bitsize="32"/>
+  <reg name="hpmcounter8h" bitsize="32"/>
+  <reg name="hpmcounter9h" bitsize="32"/>
+  <reg name="hpmcounter10h" bitsize="32"/>
+  <reg name="hpmcounter11h" bitsize="32"/>
+  <reg name="hpmcounter12h" bitsize="32"/>
+  <reg name="hpmcounter13h" bitsize="32"/>
+  <reg name="hpmcounter14h" bitsize="32"/>
+  <reg name="hpmcounter15h" bitsize="32"/>
+  <reg name="hpmcounter16h" bitsize="32"/>
+  <reg name="hpmcounter17h" bitsize="32"/>
+  <reg name="hpmcounter18h" bitsize="32"/>
+  <reg name="hpmcounter19h" bitsize="32"/>
+  <reg name="hpmcounter20h" bitsize="32"/>
+  <reg name="hpmcounter21h" bitsize="32"/>
+  <reg name="hpmcounter22h" bitsize="32"/>
+  <reg name="hpmcounter23h" bitsize="32"/>
+  <reg name="hpmcounter24h" bitsize="32"/>
+  <reg name="hpmcounter25h" bitsize="32"/>
+  <reg name="hpmcounter26h" bitsize="32"/>
+  <reg name="hpmcounter27h" bitsize="32"/>
+  <reg name="hpmcounter28h" bitsize="32"/>
+  <reg name="hpmcounter29h" bitsize="32"/>
+  <reg name="hpmcounter30h" bitsize="32"/>
+  <reg name="hpmcounter31h" bitsize="32"/>
+  <reg name="pmpcfg0" bitsize="32"/>
+  <reg name="pmpcfg1" bitsize="32"/>
+  <reg name="pmpcfg2" bitsize="32"/>
+  <reg name="pmpcfg3" bitsize="32"/>
+  <reg name="pmpaddr0" bitsize="32"/>
+  <reg name="pmpaddr1" bitsize="32"/>
+  <reg name="pmpaddr2" bitsize="32"/>
+  <reg name="pmpaddr3" bitsize="32"/>
+  <reg name="pmpaddr4" bitsize="32"/>
+  <reg name="pmpaddr5" bitsize="32"/>
+  <reg name="pmpaddr6" bitsize="32"/>
+  <reg name="pmpaddr7" bitsize="32"/>
+  <reg name="pmpaddr8" bitsize="32"/>
+  <reg name="pmpaddr9" bitsize="32"/>
+  <reg name="pmpaddr10" bitsize="32"/>
+  <reg name="pmpaddr11" bitsize="32"/>
+  <reg name="pmpaddr12" bitsize="32"/>
+  <reg name="pmpaddr13" bitsize="32"/>
+  <reg name="pmpaddr14" bitsize="32"/>
+  <reg name="pmpaddr15" bitsize="32"/>
+  <reg name="mcycle" bitsize="32"/>
+  <reg name="minstret" bitsize="32"/>
+  <reg name="mhpmcounter3" bitsize="32"/>
+  <reg name="mhpmcounter4" bitsize="32"/>
+  <reg name="mhpmcounter5" bitsize="32"/>
+  <reg name="mhpmcounter6" bitsize="32"/>
+  <reg name="mhpmcounter7" bitsize="32"/>
+  <reg name="mhpmcounter8" bitsize="32"/>
+  <reg name="mhpmcounter9" bitsize="32"/>
+  <reg name="mhpmcounter10" bitsize="32"/>
+  <reg name="mhpmcounter11" bitsize="32"/>
+  <reg name="mhpmcounter12" bitsize="32"/>
+  <reg name="mhpmcounter13" bitsize="32"/>
+  <reg name="mhpmcounter14" bitsize="32"/>
+  <reg name="mhpmcounter15" bitsize="32"/>
+  <reg name="mhpmcounter16" bitsize="32"/>
+  <reg name="mhpmcounter17" bitsize="32"/>
+  <reg name="mhpmcounter18" bitsize="32"/>
+  <reg name="mhpmcounter19" bitsize="32"/>
+  <reg name="mhpmcounter20" bitsize="32"/>
+  <reg name="mhpmcounter21" bitsize="32"/>
+  <reg name="mhpmcounter22" bitsize="32"/>
+  <reg name="mhpmcounter23" bitsize="32"/>
+  <reg name="mhpmcounter24" bitsize="32"/>
+  <reg name="mhpmcounter25" bitsize="32"/>
+  <reg name="mhpmcounter26" bitsize="32"/>
+  <reg name="mhpmcounter27" bitsize="32"/>
+  <reg name="mhpmcounter28" bitsize="32"/>
+  <reg name="mhpmcounter29" bitsize="32"/>
+  <reg name="mhpmcounter30" bitsize="32"/>
+  <reg name="mhpmcounter31" bitsize="32"/>
+  <reg name="mcycleh" bitsize="32"/>
+  <reg name="minstreth" bitsize="32"/>
+  <reg name="mhpmcounter3h" bitsize="32"/>
+  <reg name="mhpmcounter4h" bitsize="32"/>
+  <reg name="mhpmcounter5h" bitsize="32"/>
+  <reg name="mhpmcounter6h" bitsize="32"/>
+  <reg name="mhpmcounter7h" bitsize="32"/>
+  <reg name="mhpmcounter8h" bitsize="32"/>
+  <reg name="mhpmcounter9h" bitsize="32"/>
+  <reg name="mhpmcounter10h" bitsize="32"/>
+  <reg name="mhpmcounter11h" bitsize="32"/>
+  <reg name="mhpmcounter12h" bitsize="32"/>
+  <reg name="mhpmcounter13h" bitsize="32"/>
+  <reg name="mhpmcounter14h" bitsize="32"/>
+  <reg name="mhpmcounter15h" bitsize="32"/>
+  <reg name="mhpmcounter16h" bitsize="32"/>
+  <reg name="mhpmcounter17h" bitsize="32"/>
+  <reg name="mhpmcounter18h" bitsize="32"/>
+  <reg name="mhpmcounter19h" bitsize="32"/>
+  <reg name="mhpmcounter20h" bitsize="32"/>
+  <reg name="mhpmcounter21h" bitsize="32"/>
+  <reg name="mhpmcounter22h" bitsize="32"/>
+  <reg name="mhpmcounter23h" bitsize="32"/>
+  <reg name="mhpmcounter24h" bitsize="32"/>
+  <reg name="mhpmcounter25h" bitsize="32"/>
+  <reg name="mhpmcounter26h" bitsize="32"/>
+  <reg name="mhpmcounter27h" bitsize="32"/>
+  <reg name="mhpmcounter28h" bitsize="32"/>
+  <reg name="mhpmcounter29h" bitsize="32"/>
+  <reg name="mhpmcounter30h" bitsize="32"/>
+  <reg name="mhpmcounter31h" bitsize="32"/>
+  <reg name="mhpmevent3" bitsize="32"/>
+  <reg name="mhpmevent4" bitsize="32"/>
+  <reg name="mhpmevent5" bitsize="32"/>
+  <reg name="mhpmevent6" bitsize="32"/>
+  <reg name="mhpmevent7" bitsize="32"/>
+  <reg name="mhpmevent8" bitsize="32"/>
+  <reg name="mhpmevent9" bitsize="32"/>
+  <reg name="mhpmevent10" bitsize="32"/>
+  <reg name="mhpmevent11" bitsize="32"/>
+  <reg name="mhpmevent12" bitsize="32"/>
+  <reg name="mhpmevent13" bitsize="32"/>
+  <reg name="mhpmevent14" bitsize="32"/>
+  <reg name="mhpmevent15" bitsize="32"/>
+  <reg name="mhpmevent16" bitsize="32"/>
+  <reg name="mhpmevent17" bitsize="32"/>
+  <reg name="mhpmevent18" bitsize="32"/>
+  <reg name="mhpmevent19" bitsize="32"/>
+  <reg name="mhpmevent20" bitsize="32"/>
+  <reg name="mhpmevent21" bitsize="32"/>
+  <reg name="mhpmevent22" bitsize="32"/>
+  <reg name="mhpmevent23" bitsize="32"/>
+  <reg name="mhpmevent24" bitsize="32"/>
+  <reg name="mhpmevent25" bitsize="32"/>
+  <reg name="mhpmevent26" bitsize="32"/>
+  <reg name="mhpmevent27" bitsize="32"/>
+  <reg name="mhpmevent28" bitsize="32"/>
+  <reg name="mhpmevent29" bitsize="32"/>
+  <reg name="mhpmevent30" bitsize="32"/>
+  <reg name="mhpmevent31" bitsize="32"/>
+  <reg name="tselect" bitsize="32"/>
+  <reg name="tdata1" bitsize="32"/>
+  <reg name="tdata2" bitsize="32"/>
+  <reg name="tdata3" bitsize="32"/>
+  <reg name="dcsr" bitsize="32"/>
+  <reg name="dpc" bitsize="32"/>
+  <reg name="dscratch" bitsize="32"/>
+  <reg name="mbase" bitsize="32"/>
+  <reg name="mbound" bitsize="32"/>
+  <reg name="mibase" bitsize="32"/>
+  <reg name="mibound" bitsize="32"/>
+  <reg name="mdbase" bitsize="32"/>
+  <reg name="mdbound" bitsize="32"/>
+  <reg name="mucounteren" bitsize="32"/>
+  <reg name="mscounteren" bitsize="32"/>
+  <reg name="mhcounteren" bitsize="32"/> -->
+</feature>
diff --git a/src/arch/riscv/gdb-xml/riscv-32bit-fpu.xml b/src/arch/riscv/gdb-xml/riscv-32bit-fpu.xml
new file mode 100644
index 0000000000..9661b0e004
--- /dev/null
+++ b/src/arch/riscv/gdb-xml/riscv-32bit-fpu.xml
@@ -0,0 +1,58 @@
+<?xml version="1.0"?>
+<!-- Copyright (C) 2018-2019 Free Software Foundation, Inc.
+     Contributed by Huawei International
+
+     Copying and distribution of this file, with or without modification,
+     are permitted in any medium without royalty provided the copyright
+     notice and this notice are preserved.  -->
+
+<!-- Register numbers are hard-coded in order to maintain backward
+     compatibility with older versions of tools that didn't use xml
+     register descriptions.  -->
+
+<!DOCTYPE feature SYSTEM "gdb-target.dtd">
+<feature name="org.gnu.gdb.riscv.fpu">
+
+  <union id="riscv_double">
+    <field name="float" type="ieee_single"/>
+    <field name="double" type="ieee_double"/>
+  </union>
+
+  <reg name="ft0" bitsize="64" type="riscv_double" regnum="33"/>
+  <reg name="ft1" bitsize="64" type="riscv_double"/>
+  <reg name="ft2" bitsize="64" type="riscv_double"/>
+  <reg name="ft3" bitsize="64" type="riscv_double"/>
+  <reg name="ft4" bitsize="64" type="riscv_double"/>
+  <reg name="ft5" bitsize="64" type="riscv_double"/>
+  <reg name="ft6" bitsize="64" type="riscv_double"/>
+  <reg name="ft7" bitsize="64" type="riscv_double"/>
+  <reg name="fs0" bitsize="64" type="riscv_double"/>
+  <reg name="fs1" bitsize="64" type="riscv_double"/>
+  <reg name="fa0" bitsize="64" type="riscv_double"/>
+  <reg name="fa1" bitsize="64" type="riscv_double"/>
+  <reg name="fa2" bitsize="64" type="riscv_double"/>
+  <reg name="fa3" bitsize="64" type="riscv_double"/>
+  <reg name="fa4" bitsize="64" type="riscv_double"/>
+  <reg name="fa5" bitsize="64" type="riscv_double"/>
+  <reg name="fa6" bitsize="64" type="riscv_double"/>
+  <reg name="fa7" bitsize="64" type="riscv_double"/>
+  <reg name="fs2" bitsize="64" type="riscv_double"/>
+  <reg name="fs3" bitsize="64" type="riscv_double"/>
+  <reg name="fs4" bitsize="64" type="riscv_double"/>
+  <reg name="fs5" bitsize="64" type="riscv_double"/>
+  <reg name="fs6" bitsize="64" type="riscv_double"/>
+  <reg name="fs7" bitsize="64" type="riscv_double"/>
+  <reg name="fs8" bitsize="64" type="riscv_double"/>
+  <reg name="fs9" bitsize="64" type="riscv_double"/>
+  <reg name="fs10" bitsize="64" type="riscv_double"/>
+  <reg name="fs11" bitsize="64" type="riscv_double"/>
+  <reg name="ft8" bitsize="64" type="riscv_double"/>
+  <reg name="ft9" bitsize="64" type="riscv_double"/>
+  <reg name="ft10" bitsize="64" type="riscv_double"/>
+  <reg name="ft11" bitsize="64" type="riscv_double"/>
+
+  <reg name="fflags" bitsize="32" type="int" regnum="66"/>
+  <reg name="frm" bitsize="32" type="int" regnum="67"/>
+  <reg name="fcsr" bitsize="32" type="int" regnum="68"/>
+  <reg name="placeholder" bitsize="32" type="int" regnum="69"/>
+</feature>
diff --git a/src/arch/riscv/gdb-xml/riscv-32bit.xml b/src/arch/riscv/gdb-xml/riscv-32bit.xml
new file mode 100644
index 0000000000..982e6b0674
--- /dev/null
+++ b/src/arch/riscv/gdb-xml/riscv-32bit.xml
@@ -0,0 +1,13 @@
+<?xml version="1.0"?>
+<!-- Copyright (C) 2009-2013 Free Software Foundation, Inc.
+     Contributed by Huawei International
+     Copying and distribution of this file, with or without modification,
+     are permitted in any medium without royalty provided the copyright
+     notice and this notice are preserved.  -->
+<!DOCTYPE target SYSTEM "gdb-target.dtd">
+<target>
+  <architecture>riscv</architecture>
+  <xi:include href="riscv-32bit-cpu.xml"/>
+  <xi:include href="riscv-32bit-fpu.xml"/>
+  <xi:include href="riscv-32bit-csr.xml"/>
+</target>
diff --git a/src/arch/riscv/remote_gdb.cc b/src/arch/riscv/remote_gdb.cc
index 4bdd88fde6..54ecde061f 100644
--- a/src/arch/riscv/remote_gdb.cc
+++ b/src/arch/riscv/remote_gdb.cc
@@ -135,6 +135,10 @@
 
 #include <string>
 
+#include "arch/riscv/gdb-xml/gdb_xml_riscv_32bit_cpu.hh"
+#include "arch/riscv/gdb-xml/gdb_xml_riscv_32bit_csr.hh"
+#include "arch/riscv/gdb-xml/gdb_xml_riscv_32bit_fpu.hh"
+#include "arch/riscv/gdb-xml/gdb_xml_riscv_32bit_target.hh"
 #include "arch/riscv/gdb-xml/gdb_xml_riscv_64bit_cpu.hh"
 #include "arch/riscv/gdb-xml/gdb_xml_riscv_64bit_csr.hh"
 #include "arch/riscv/gdb-xml/gdb_xml_riscv_64bit_fpu.hh"
@@ -163,8 +167,31 @@ getRvType(ThreadContext* tc)
     return isa->rvType();
 }
 
+template <typename xint>
+static void
+setRegNoEffectWithMask(
+        ThreadContext *context, RiscvType type, CSRIndex idx, xint val)
+{
+    RegVal oldVal, newVal;
+    RegVal mask = CSRMasks[type].at(idx);
+    oldVal = context->readMiscRegNoEffect(CSRData.at(idx).physIndex);
+    newVal = (oldVal & ~mask) | (val & mask);
+    context->setMiscRegNoEffect(CSRData.at(idx).physIndex, newVal);
+}
+
+template <typename xint>
+static void
+setRegWithMask(ThreadContext *context, RiscvType type, CSRIndex idx, xint val)
+{
+    RegVal oldVal, newVal;
+    RegVal mask = CSRMasks[type].at(idx);
+    oldVal = context->readMiscReg(CSRData.at(idx).physIndex);
+    newVal = (oldVal & ~mask) | (val & mask);
+    context->setMiscReg(CSRData.at(idx).physIndex, newVal);
+}
+
 RemoteGDB::RemoteGDB(System *_system, int _port)
-    : BaseRemoteGDB(_system, _port), regCache64(this)
+    : BaseRemoteGDB(_system, _port), regCache32(this), regCache64(this)
 {
 }
 
@@ -193,6 +220,201 @@ RemoteGDB::acc(Addr va, size_t len)
     return context()->getProcessPtr()->pTable->lookup(va) != nullptr;
 }
 
+void
+RemoteGDB::Riscv32GdbRegCache::getRegs(ThreadContext *context)
+{
+    DPRINTF(GDBAcc, "getregs in remotegdb, size %lu\n", size());
+    auto& RVxCSRMasks = CSRMasks[RV32];
+
+    // General registers
+    for (int i = 0; i < int_reg::NumArchRegs; i++) {
+        r.gpr[i] = context->getReg(intRegClass[i]);
+    }
+    r.pc = context->pcState().instAddr();
+
+    // Floating point registers
+    for (int i = 0; i < float_reg::NumRegs; i++)
+        r.fpu[i] = context->getReg(floatRegClass[i]);
+    r.fflags = context->readMiscRegNoEffect(
+        CSRData.at(CSR_FFLAGS).physIndex) & RVxCSRMasks.at(CSR_FFLAGS);
+    r.frm = context->readMiscRegNoEffect(
+        CSRData.at(CSR_FRM).physIndex) & RVxCSRMasks.at(CSR_FRM);
+    r.fcsr = context->readMiscRegNoEffect(
+        CSRData.at(CSR_FCSR).physIndex) & RVxCSRMasks.at(CSR_FCSR);
+
+    // CSR registers
+    r.cycle = context->readMiscRegNoEffect(
+        CSRData.at(CSR_CYCLE).physIndex);
+    r.cycleh = context->readMiscRegNoEffect(
+        CSRData.at(CSR_CYCLEH).physIndex);
+    r.time = context->readMiscRegNoEffect(
+        CSRData.at(CSR_TIME).physIndex);
+    r.timeh = context->readMiscRegNoEffect(
+        CSRData.at(CSR_TIMEH).physIndex);
+
+    // U mode CSR
+    r.ustatus = context->readMiscReg(
+        CSRData.at(CSR_USTATUS).physIndex) & RVxCSRMasks.at(CSR_USTATUS);
+    r.uie = context->readMiscReg(
+        CSRData.at(CSR_UIE).physIndex) & RVxCSRMasks.at(CSR_UIE);
+    r.utvec = context->readMiscRegNoEffect(
+        CSRData.at(CSR_UTVEC).physIndex);
+    r.uscratch = context->readMiscRegNoEffect(
+        CSRData.at(CSR_USCRATCH).physIndex);
+    r.uepc = context->readMiscRegNoEffect(
+        CSRData.at(CSR_UEPC).physIndex);
+    r.ucause = context->readMiscRegNoEffect(
+        CSRData.at(CSR_UCAUSE).physIndex);
+    r.utval = context->readMiscRegNoEffect(
+        CSRData.at(CSR_UTVAL).physIndex);
+    r.uip = context->readMiscReg(
+        CSRData.at(CSR_UIP).physIndex) & RVxCSRMasks.at(CSR_UIP);
+
+    // S mode CSR
+    r.sstatus = context->readMiscReg(
+        CSRData.at(CSR_SSTATUS).physIndex) & RVxCSRMasks.at(CSR_SSTATUS);
+    r.sedeleg = context->readMiscRegNoEffect(
+        CSRData.at(CSR_SEDELEG).physIndex);
+    r.sideleg = context->readMiscRegNoEffect(
+        CSRData.at(CSR_SIDELEG).physIndex);
+    r.sie = context->readMiscReg(
+        CSRData.at(CSR_SIE).physIndex) & RVxCSRMasks.at(CSR_SIE);
+    r.stvec = context->readMiscRegNoEffect(
+        CSRData.at(CSR_STVEC).physIndex);
+    r.scounteren = context->readMiscRegNoEffect(
+        CSRData.at(CSR_SCOUNTEREN).physIndex);
+    r.sscratch = context->readMiscRegNoEffect(
+        CSRData.at(CSR_SSCRATCH).physIndex);
+    r.sepc = context->readMiscReg(
+        CSRData.at(CSR_SEPC).physIndex);
+    r.scause = context->readMiscRegNoEffect(
+        CSRData.at(CSR_SCAUSE).physIndex);
+    r.stval = context->readMiscRegNoEffect(
+        CSRData.at(CSR_STVAL).physIndex);
+    r.sip = context->readMiscReg(
+        CSRData.at(CSR_SIP).physIndex) & RVxCSRMasks.at(CSR_SIP);
+    r.satp = context->readMiscRegNoEffect(
+        CSRData.at(CSR_SATP).physIndex);
+
+    // M mode CSR
+    r.mvendorid = context->readMiscRegNoEffect(
+        CSRData.at(CSR_MVENDORID).physIndex);
+    r.marchid = context->readMiscRegNoEffect(
+        CSRData.at(CSR_MARCHID).physIndex);
+    r.mimpid = context->readMiscRegNoEffect(
+        CSRData.at(CSR_MIMPID).physIndex);
+    r.mhartid = context->contextId();
+    r.mstatus = context->readMiscReg(
+        CSRData.at(CSR_MSTATUS).physIndex) & RVxCSRMasks.at(CSR_MSTATUS);
+    r.misa = context->readMiscRegNoEffect(
+        CSRData.at(CSR_MISA).physIndex) & RVxCSRMasks.at(CSR_MISA);
+    r.medeleg = context->readMiscRegNoEffect(
+        CSRData.at(CSR_MEDELEG).physIndex);
+    r.mideleg = context->readMiscRegNoEffect(
+        CSRData.at(CSR_MIDELEG).physIndex);
+    r.mie = context->readMiscReg(
+        CSRData.at(CSR_MIE).physIndex) & RVxCSRMasks.at(CSR_MIE);
+    r.mtvec = context->readMiscRegNoEffect(
+        CSRData.at(CSR_MTVEC).physIndex);
+    r.mcounteren = context->readMiscRegNoEffect(
+        CSRData.at(CSR_MCOUNTEREN).physIndex);
+    r.mstatush = context->readMiscReg(
+        CSRData.at(CSR_MSTATUSH).physIndex) & RVxCSRMasks.at(CSR_MSTATUSH);
+    r.mscratch = context->readMiscRegNoEffect(
+        CSRData.at(CSR_MSCRATCH).physIndex);
+    r.mepc = context->readMiscReg(
+        CSRData.at(CSR_MEPC).physIndex);
+    r.mcause = context->readMiscRegNoEffect(
+        CSRData.at(CSR_MCAUSE).physIndex);
+    r.mtval = context->readMiscRegNoEffect(
+        CSRData.at(CSR_MTVAL).physIndex);
+    r.mip = context->readMiscReg(
+        CSRData.at(CSR_MIP).physIndex) & RVxCSRMasks.at(CSR_MIP);
+
+    // H mode CSR (to be implemented)
+}
+
+void
+RemoteGDB::Riscv32GdbRegCache::setRegs(ThreadContext *context) const
+{
+    DPRINTF(GDBAcc, "setregs in remotegdb \n");
+    for (int i = 0; i < int_reg::NumArchRegs; i++)
+        context->setReg(intRegClass[i], r.gpr[i]);
+    context->pcState(r.pc);
+
+    // Floating point registers
+    for (int i = 0; i < float_reg::NumRegs; i++)
+        context->setReg(floatRegClass[i], r.fpu[i]);
+
+    setRegNoEffectWithMask(context, RV32, CSR_FFLAGS, r.fflags);
+    setRegNoEffectWithMask(context, RV32, CSR_FRM, r.frm);
+    setRegNoEffectWithMask(context, RV32, CSR_FCSR, r.fcsr);
+
+    // TODO: implement CSR counter registers for mcycle(h), minstret(h)
+
+    // U mode CSR
+    setRegNoEffectWithMask(context, RV32, CSR_USTATUS, r.ustatus);
+    setRegWithMask(context, RV32, CSR_UIE, r.uie);
+    setRegWithMask(context, RV32, CSR_UIP, r.uip);
+    context->setMiscRegNoEffect(
+        CSRData.at(CSR_UTVEC).physIndex, r.utvec);
+    context->setMiscRegNoEffect(
+        CSRData.at(CSR_USCRATCH).physIndex, r.uscratch);
+    context->setMiscRegNoEffect(
+        CSRData.at(CSR_UEPC).physIndex, r.uepc);
+    context->setMiscRegNoEffect(
+        CSRData.at(CSR_UCAUSE).physIndex, r.ucause);
+    context->setMiscRegNoEffect(
+        CSRData.at(CSR_UTVAL).physIndex, r.utval);
+
+    // S mode CSR
+    setRegNoEffectWithMask(context, RV32, CSR_SSTATUS, r.sstatus);
+    setRegWithMask(context, RV32, CSR_SIE, r.sie);
+    setRegWithMask(context, RV32, CSR_SIP, r.sip);
+    context->setMiscRegNoEffect(
+        CSRData.at(CSR_SEDELEG).physIndex, r.sedeleg);
+    context->setMiscRegNoEffect(
+        CSRData.at(CSR_SIDELEG).physIndex, r.sideleg);
+    context->setMiscRegNoEffect(
+        CSRData.at(CSR_STVEC).physIndex, r.stvec);
+    context->setMiscRegNoEffect(
+        CSRData.at(CSR_SCOUNTEREN).physIndex, r.scounteren);
+    context->setMiscRegNoEffect(
+        CSRData.at(CSR_SSCRATCH).physIndex, r.sscratch);
+    context->setMiscRegNoEffect(
+        CSRData.at(CSR_SEPC).physIndex, r.sepc);
+    context->setMiscRegNoEffect(
+        CSRData.at(CSR_SCAUSE).physIndex, r.scause);
+    context->setMiscRegNoEffect(
+        CSRData.at(CSR_STVAL).physIndex, r.stval);
+    context->setMiscRegNoEffect(
+        CSRData.at(CSR_SATP).physIndex, r.satp);
+
+    // M mode CSR
+    setRegNoEffectWithMask(context, RV32, CSR_MSTATUS, r.mstatus);
+    setRegNoEffectWithMask(context, RV32, CSR_MISA, r.misa);
+    setRegWithMask(context, RV32, CSR_MIE, r.mie);
+    setRegWithMask(context, RV32, CSR_MIP, r.mip);
+    context->setMiscRegNoEffect(
+        CSRData.at(CSR_MEDELEG).physIndex, r.medeleg);
+    context->setMiscRegNoEffect(
+        CSRData.at(CSR_MIDELEG).physIndex, r.mideleg);
+    context->setMiscRegNoEffect(
+        CSRData.at(CSR_MTVEC).physIndex, r.mtvec);
+    context->setMiscRegNoEffect(
+        CSRData.at(CSR_MCOUNTEREN).physIndex, r.mcounteren);
+    context->setMiscRegNoEffect(
+        CSRData.at(CSR_MSCRATCH).physIndex, r.mscratch);
+    context->setMiscRegNoEffect(
+        CSRData.at(CSR_MEPC).physIndex, r.mepc);
+    context->setMiscRegNoEffect(
+        CSRData.at(CSR_MCAUSE).physIndex, r.mcause);
+    context->setMiscRegNoEffect(
+        CSRData.at(CSR_MTVAL).physIndex, r.mtval);
+
+    // H mode CSR (to be implemented)
+}
+
 void
 RemoteGDB::Riscv64GdbRegCache::getRegs(ThreadContext *context)
 {
@@ -222,7 +444,7 @@ RemoteGDB::Riscv64GdbRegCache::getRegs(ThreadContext *context)
         CSRData.at(CSR_TIME).physIndex);
 
     // U mode CSR
-    r.ustatus = context->readMiscRegNoEffect(
+    r.ustatus = context->readMiscReg(
         CSRData.at(CSR_USTATUS).physIndex) & RVxCSRMasks.at(CSR_USTATUS);
     r.uie = context->readMiscReg(
         CSRData.at(CSR_UIE).physIndex) & RVxCSRMasks.at(CSR_UIE);
@@ -240,7 +462,7 @@ RemoteGDB::Riscv64GdbRegCache::getRegs(ThreadContext *context)
         CSRData.at(CSR_UIP).physIndex) & RVxCSRMasks.at(CSR_UIP);
 
     // S mode CSR
-    r.sstatus = context->readMiscRegNoEffect(
+    r.sstatus = context->readMiscReg(
         CSRData.at(CSR_SSTATUS).physIndex) & RVxCSRMasks.at(CSR_SSTATUS);
     r.sedeleg = context->readMiscRegNoEffect(
         CSRData.at(CSR_SEDELEG).physIndex);
@@ -254,7 +476,7 @@ RemoteGDB::Riscv64GdbRegCache::getRegs(ThreadContext *context)
         CSRData.at(CSR_SCOUNTEREN).physIndex);
     r.sscratch = context->readMiscRegNoEffect(
         CSRData.at(CSR_SSCRATCH).physIndex);
-    r.sepc = context->readMiscRegNoEffect(
+    r.sepc = context->readMiscReg(
         CSRData.at(CSR_SEPC).physIndex);
     r.scause = context->readMiscRegNoEffect(
         CSRData.at(CSR_SCAUSE).physIndex);
@@ -272,9 +494,8 @@ RemoteGDB::Riscv64GdbRegCache::getRegs(ThreadContext *context)
         CSRData.at(CSR_MARCHID).physIndex);
     r.mimpid = context->readMiscRegNoEffect(
         CSRData.at(CSR_MIMPID).physIndex);
-    r.mhartid = context->readMiscRegNoEffect(
-        CSRData.at(CSR_MHARTID).physIndex);
-    r.mstatus = context->readMiscRegNoEffect(
+    r.mhartid = context->contextId();
+    r.mstatus = context->readMiscReg(
         CSRData.at(CSR_MSTATUS).physIndex) & RVxCSRMasks.at(CSR_MSTATUS);
     r.misa = context->readMiscRegNoEffect(
         CSRData.at(CSR_MISA).physIndex) & RVxCSRMasks.at(CSR_MISA);
@@ -290,7 +511,7 @@ RemoteGDB::Riscv64GdbRegCache::getRegs(ThreadContext *context)
         CSRData.at(CSR_MCOUNTEREN).physIndex);
     r.mscratch = context->readMiscRegNoEffect(
         CSRData.at(CSR_MSCRATCH).physIndex);
-    r.mepc = context->readMiscRegNoEffect(
+    r.mepc = context->readMiscReg(
         CSRData.at(CSR_MEPC).physIndex);
     r.mcause = context->readMiscRegNoEffect(
         CSRData.at(CSR_MCAUSE).physIndex);
@@ -305,11 +526,6 @@ RemoteGDB::Riscv64GdbRegCache::getRegs(ThreadContext *context)
 void
 RemoteGDB::Riscv64GdbRegCache::setRegs(ThreadContext *context) const
 {
-    // NOTE: no error will be reported for attempting to set masked bits.
-    RegVal oldVal;
-    int mask;
-    RegVal newVal;
-
     DPRINTF(GDBAcc, "setregs in remotegdb \n");
     for (int i = 0; i < int_reg::NumArchRegs; i++)
         context->setReg(intRegClass[i], r.gpr[i]);
@@ -319,48 +535,16 @@ RemoteGDB::Riscv64GdbRegCache::setRegs(ThreadContext *context) const
     for (int i = 0; i < float_reg::NumRegs; i++)
         context->setReg(floatRegClass[i], r.fpu[i]);
 
-    auto& RVxCSRMasks = CSRMasks[RV64];
+    setRegNoEffectWithMask(context, RV64, CSR_FFLAGS, r.fflags);
+    setRegNoEffectWithMask(context, RV64, CSR_FRM, r.frm);
+    setRegNoEffectWithMask(context, RV64, CSR_FCSR, r.fcsr);
 
-    oldVal = context->readMiscRegNoEffect(
-        CSRData.at(CSR_FFLAGS).physIndex);
-    mask = RVxCSRMasks.at(CSR_FFLAGS);
-    newVal = (oldVal & ~mask) | (r.fflags & mask);
-    context->setMiscRegNoEffect(
-        CSRData.at(CSR_FFLAGS).physIndex, newVal);
-
-    oldVal = context->readMiscRegNoEffect(
-        CSRData.at(CSR_FRM).physIndex);
-    mask = RVxCSRMasks.at(CSR_FRM);
-    newVal = (oldVal & ~mask) | (r.frm & mask);
-    context->setMiscRegNoEffect(
-        CSRData.at(CSR_FRM).physIndex, newVal);
-
-    oldVal = context->readMiscRegNoEffect(
-        CSRData.at(CSR_FCSR).physIndex);
-    mask = RVxCSRMasks.at(CSR_FCSR);
-    newVal = (oldVal & ~mask) | (r.fcsr & mask);
-    context->setMiscRegNoEffect(
-        CSRData.at(CSR_FCSR).physIndex, newVal);
-
-    // CSR registers
-    context->setMiscRegNoEffect(
-        CSRData.at(CSR_CYCLE).physIndex, r.cycle);
-    context->setMiscRegNoEffect(
-        CSRData.at(CSR_TIME).physIndex, r.time);
+    // TODO: implement CSR counter registers for mcycle, minstret
 
     // U mode CSR
-    oldVal = context->readMiscRegNoEffect(
-        CSRData.at(CSR_USTATUS).physIndex);
-    mask = RVxCSRMasks.at(CSR_USTATUS);
-    newVal = (oldVal & ~mask) | (r.ustatus & mask);
-    context->setMiscRegNoEffect(
-        CSRData.at(CSR_USTATUS).physIndex, newVal);
-    oldVal = context->readMiscReg(
-        CSRData.at(CSR_UIE).physIndex);
-    mask = RVxCSRMasks.at(CSR_UIE);
-    newVal = (oldVal & ~mask) | (r.uie & mask);
-    context->setMiscReg(
-        CSRData.at(CSR_UIE).physIndex, newVal);
+    setRegNoEffectWithMask(context, RV64, CSR_USTATUS, r.ustatus);
+    setRegWithMask(context, RV64, CSR_UIE, r.uie);
+    setRegWithMask(context, RV64, CSR_UIP, r.uip);
     context->setMiscRegNoEffect(
         CSRData.at(CSR_UTVEC).physIndex, r.utvec);
     context->setMiscRegNoEffect(
@@ -371,30 +555,15 @@ RemoteGDB::Riscv64GdbRegCache::setRegs(ThreadContext *context) const
         CSRData.at(CSR_UCAUSE).physIndex, r.ucause);
     context->setMiscRegNoEffect(
         CSRData.at(CSR_UTVAL).physIndex, r.utval);
-    oldVal = context->readMiscReg(
-        CSRData.at(CSR_UIP).physIndex);
-    mask = RVxCSRMasks.at(CSR_UIP);
-    newVal = (oldVal & ~mask) | (r.uip & mask);
-    context->setMiscReg(
-        CSRData.at(CSR_UIP).physIndex, newVal);
 
     // S mode CSR
-    oldVal = context->readMiscRegNoEffect(
-        CSRData.at(CSR_SSTATUS).physIndex);
-    mask = RVxCSRMasks.at(CSR_SSTATUS);
-    newVal = (oldVal & ~mask) | (r.sstatus & mask);
-    context->setMiscRegNoEffect(
-        CSRData.at(CSR_SSTATUS).physIndex, newVal);
+    setRegNoEffectWithMask(context, RV64, CSR_SSTATUS, r.sstatus);
+    setRegWithMask(context, RV64, CSR_SIE, r.sie);
+    setRegWithMask(context, RV64, CSR_SIP, r.sip);
     context->setMiscRegNoEffect(
         CSRData.at(CSR_SEDELEG).physIndex, r.sedeleg);
     context->setMiscRegNoEffect(
         CSRData.at(CSR_SIDELEG).physIndex, r.sideleg);
-    oldVal = context->readMiscReg(
-        CSRData.at(CSR_SIE).physIndex);
-    mask = RVxCSRMasks.at(CSR_SIE);
-    newVal = (oldVal & ~mask) | (r.sie & mask);
-    context->setMiscReg(
-        CSRData.at(CSR_SIE).physIndex, newVal);
     context->setMiscRegNoEffect(
         CSRData.at(CSR_STVEC).physIndex, r.stvec);
     context->setMiscRegNoEffect(
@@ -407,46 +576,18 @@ RemoteGDB::Riscv64GdbRegCache::setRegs(ThreadContext *context) const
         CSRData.at(CSR_SCAUSE).physIndex, r.scause);
     context->setMiscRegNoEffect(
         CSRData.at(CSR_STVAL).physIndex, r.stval);
-    oldVal = context->readMiscReg(
-        CSRData.at(CSR_SIP).physIndex);
-    mask = RVxCSRMasks.at(CSR_SIP);
-    newVal = (oldVal & ~mask) | (r.sip & mask);
-    context->setMiscReg(
-        CSRData.at(CSR_SIP).physIndex, newVal);
     context->setMiscRegNoEffect(
         CSRData.at(CSR_SATP).physIndex, r.satp);
 
     // M mode CSR
-    context->setMiscRegNoEffect(
-        CSRData.at(CSR_MVENDORID).physIndex, r.mvendorid);
-    context->setMiscRegNoEffect(
-        CSRData.at(CSR_MARCHID).physIndex, r.marchid);
-    context->setMiscRegNoEffect(
-        CSRData.at(CSR_MIMPID).physIndex, r.mimpid);
-    context->setMiscRegNoEffect(
-        CSRData.at(CSR_MHARTID).physIndex, r.mhartid);
-    oldVal = context->readMiscRegNoEffect(
-        CSRData.at(CSR_MSTATUS).physIndex);
-    mask = RVxCSRMasks.at(CSR_MSTATUS);
-    newVal = (oldVal & ~mask) | (r.mstatus & mask);
-    context->setMiscRegNoEffect(
-        CSRData.at(CSR_MSTATUS).physIndex, newVal);
-    oldVal = context->readMiscRegNoEffect(
-        CSRData.at(CSR_MISA).physIndex);
-    mask = RVxCSRMasks.at(CSR_MISA);
-    newVal = (oldVal & ~mask) | (r.misa & mask);
-    context->setMiscRegNoEffect(
-        CSRData.at(CSR_MISA).physIndex, newVal);
+    setRegNoEffectWithMask(context, RV64, CSR_MSTATUS, r.mstatus);
+    setRegNoEffectWithMask(context, RV64, CSR_MISA, r.misa);
+    setRegWithMask(context, RV64, CSR_MIE, r.mie);
+    setRegWithMask(context, RV64, CSR_MIP, r.mip);
     context->setMiscRegNoEffect(
         CSRData.at(CSR_MEDELEG).physIndex, r.medeleg);
     context->setMiscRegNoEffect(
         CSRData.at(CSR_MIDELEG).physIndex, r.mideleg);
-    oldVal = context->readMiscReg(
-        CSRData.at(CSR_MIE).physIndex);
-    mask = RVxCSRMasks.at(CSR_MIE);
-    newVal = (oldVal & ~mask) | (r.mie & mask);
-    context->setMiscReg(
-        CSRData.at(CSR_MIE).physIndex, newVal);
     context->setMiscRegNoEffect(
         CSRData.at(CSR_MTVEC).physIndex, r.mtvec);
     context->setMiscRegNoEffect(
@@ -459,12 +600,6 @@ RemoteGDB::Riscv64GdbRegCache::setRegs(ThreadContext *context) const
         CSRData.at(CSR_MCAUSE).physIndex, r.mcause);
     context->setMiscRegNoEffect(
         CSRData.at(CSR_MTVAL).physIndex, r.mtval);
-    oldVal = context->readMiscReg(
-        CSRData.at(CSR_MIP).physIndex);
-    mask = RVxCSRMasks.at(CSR_MIP);
-    newVal = (oldVal & ~mask) | (r.mip & mask);
-    context->setMiscReg(
-        CSRData.at(CSR_MIP).physIndex, newVal);
 
     // H mode CSR (to be implemented)
 }
@@ -483,14 +618,17 @@ RemoteGDB::getXferFeaturesRead(const std::string &annex, std::string &output)
         x, std::string(reinterpret_cast<const char *>(Blobs::s), \
                        Blobs::s##_len)                           \
     }
-    static const std::map<std::string, std::string> annexMap{
-        GDB_XML("riscv-64bit.xml", gdb_xml_riscv_64bit_target),
-        GDB_XML("riscv-64bit-cpu.xml", gdb_xml_riscv_64bit_cpu),
-        GDB_XML("riscv-64bit-fpu.xml", gdb_xml_riscv_64bit_fpu),
-        GDB_XML("riscv-64bit-csr.xml", gdb_xml_riscv_64bit_csr)};
-#undef GDB_XML
-    if (getRvType(context()) == RV32)
-        return false;
+    static const std::map<std::string, std::string> annexMaps[enums::Num_RiscvType] = {
+        [RV32] = {GDB_XML("target.xml", gdb_xml_riscv_32bit_target),
+                  GDB_XML("riscv-32bit-cpu.xml", gdb_xml_riscv_32bit_cpu),
+                  GDB_XML("riscv-32bit-fpu.xml", gdb_xml_riscv_32bit_fpu),
+                  GDB_XML("riscv-32bit-csr.xml", gdb_xml_riscv_32bit_csr)},
+        [RV64] = {GDB_XML("target.xml", gdb_xml_riscv_64bit_target),
+                  GDB_XML("riscv-64bit-cpu.xml", gdb_xml_riscv_64bit_cpu),
+                  GDB_XML("riscv-64bit-fpu.xml", gdb_xml_riscv_64bit_fpu),
+                  GDB_XML("riscv-64bit-csr.xml", gdb_xml_riscv_64bit_csr)},
+    };
+    auto& annexMap = annexMaps[getRvType(context())];
     auto it = annexMap.find(annex);
     if (it == annexMap.end())
         return false;
@@ -501,7 +639,11 @@ RemoteGDB::getXferFeaturesRead(const std::string &annex, std::string &output)
 BaseGdbRegCache *
 RemoteGDB::gdbRegs()
 {
-    return &regCache64;
+    BaseGdbRegCache* regs[enums::Num_RiscvType] = {
+        [RV32] = &regCache32,
+        [RV64] = &regCache64,
+    };
+    return regs[getRvType(context())];
 }
 
 } // namespace gem5
diff --git a/src/arch/riscv/remote_gdb.hh b/src/arch/riscv/remote_gdb.hh
index a8262a6a2a..8f8abb0587 100644
--- a/src/arch/riscv/remote_gdb.hh
+++ b/src/arch/riscv/remote_gdb.hh
@@ -58,6 +58,92 @@ class RemoteGDB : public BaseRemoteGDB
     // A breakpoint will be 2 bytes if it is compressed and 4 if not
     bool checkBpKind(size_t kind) override { return kind == 2 || kind == 4; }
 
+    class Riscv32GdbRegCache : public BaseGdbRegCache
+    {
+      using BaseGdbRegCache::BaseGdbRegCache;
+      private:
+        /**
+         * RISC-V Register Cache
+         * Order and sizes of registers found in ext/gdb-xml/riscv.xml
+         * To add support for more CSRs:
+         * 1. Uncomment relevant lines in ext/gdb-xml/riscv-32bit-csr.xml
+         * 2. Add register to struct below
+         * 3. Modify RiscvGdbRegCache::getRegs and setRegs
+         */
+        struct GEM5_PACKED
+        {
+            uint32_t gpr[int_reg::NumArchRegs];
+            uint32_t pc;
+            uint64_t fpu[float_reg::NumRegs];
+            uint32_t fflags;
+            uint32_t frm;
+            uint32_t fcsr;
+            // Placeholder for byte alignment
+            uint32_t placeholder;
+            uint32_t cycle;
+            uint32_t time;
+            uint32_t cycleh;
+            uint32_t timeh;
+            uint32_t ustatus;
+            uint32_t uie;
+            uint32_t utvec;
+            uint32_t uscratch;
+            uint32_t uepc;
+            uint32_t ucause;
+            uint32_t utval;
+            uint32_t uip;
+            uint32_t sstatus;
+            uint32_t sedeleg;
+            uint32_t sideleg;
+            uint32_t sie;
+            uint32_t stvec;
+            uint32_t scounteren;
+            uint32_t sscratch;
+            uint32_t sepc;
+            uint32_t scause;
+            uint32_t stval;
+            uint32_t sip;
+            uint32_t satp;
+            uint32_t mvendorid;
+            uint32_t marchid;
+            uint32_t mimpid;
+            uint32_t mhartid;
+            uint32_t mstatus;
+            uint32_t misa;
+            uint32_t medeleg;
+            uint32_t mideleg;
+            uint32_t mie;
+            uint32_t mtvec;
+            uint32_t mcounteren;
+            uint32_t mstatush;
+            uint32_t mscratch;
+            uint32_t mepc;
+            uint32_t mcause;
+            uint32_t mtval;
+            uint32_t mip;
+            uint32_t hstatus;
+            uint32_t hedeleg;
+            uint32_t hideleg;
+            uint32_t hie;
+            uint32_t htvec;
+            uint32_t hscratch;
+            uint32_t hepc;
+            uint32_t hcause;
+            uint32_t hbadaddr;
+            uint32_t hip;
+        } r;
+      public:
+        char *data() const { return (char *)&r; }
+        size_t size() const { return sizeof(r); }
+        void getRegs(ThreadContext*);
+        void setRegs(ThreadContext*) const;
+
+        const std::string
+        name() const
+        {
+            return gdb->name() + ".RiscvGdbRegCache";
+        }
+    };
     class Riscv64GdbRegCache : public BaseGdbRegCache
     {
       using BaseGdbRegCache::BaseGdbRegCache;
@@ -70,7 +156,7 @@ class RemoteGDB : public BaseRemoteGDB
          * 2. Add register to struct below
          * 3. Modify RiscvGdbRegCache::getRegs and setRegs
          */
-        struct
+        struct GEM5_PACKED
         {
             uint64_t gpr[int_reg::NumArchRegs];
             uint64_t pc;
@@ -142,6 +228,7 @@ class RemoteGDB : public BaseRemoteGDB
         }
     };
 
+    Riscv32GdbRegCache regCache32;
     Riscv64GdbRegCache regCache64;
 
   public:

From 2ed43238994e3fcb938c238f1b132385abfb4d7e Mon Sep 17 00:00:00 2001
From: "Bobby R. Bruce" <bbruce@ucdavis.edu>
Date: Mon, 12 Dec 2022 13:52:22 -0800
Subject: [PATCH 055/492] tests: Fix compiler-tests.sh for no build args passed
 case

When a user ran "tests/compiler-tests.sh" without passing any arguments,
the compiler tests would fail with:

```
scons: Reading SConscript files ...
Error: No existing build directory and no variant for /gem5
```

However, when passed with arguments, such as:

```
./tests/compiler-tests.sh -j6
```

the tests passed.

The fix for this is to merge the "$build_out" and "$build_args" into a
single string when executing the docker. I do not know exactly why this
works, but it does fix the error.

Change-Id: Ibcd316668b60fb7706f0ee05ab6dadf56228319d
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/66631
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 tests/compiler-tests.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/compiler-tests.sh b/tests/compiler-tests.sh
index e01d9de347..f16e8e5fdb 100755
--- a/tests/compiler-tests.sh
+++ b/tests/compiler-tests.sh
@@ -135,7 +135,7 @@ for compiler in ${images[@]}; do
                 docker run --rm -v "${gem5_root}":"/gem5" -u $UID:$GID \
                     -w /gem5 --memory="${docker_mem_limit}" $repo_name \
                     /usr/bin/env python3 /usr/bin/scons --ignore-style \
-                    "${build_out}" "${build_args}"
+                    "${build_out} ${build_args}"
             }>"${build_stdout}" 2>"${build_stderr}"
             result=$?
 

From f7d0808a5c87a7ca3349bc3e68bb33a4fe912d3c Mon Sep 17 00:00:00 2001
From: Roger Chang <rogerycchang@google.com>
Date: Wed, 30 Nov 2022 16:36:05 +0800
Subject: [PATCH 056/492] arch-riscv: Fork Zba, Zbb, Zbc, Zbs instructions into
 rv32 / rv64

The following instructions will be supported for both rv32 and rv64

Zba extensions:
SLLI.UW
SH1ADD
SH2ADD
SH3ADD
ADD.UW
SH1ADD.UW
SH2ADD.UW
SH3ADD.UW

Zbb extensions:
CLZ
CTZ
CPOP
SEXT.B
SEXT.H
ORC.B
RORI
REV8
CLZW
CTZW
CPOPW
RORIW
ROL
MIN
XNOR
MINU
ROR
MAX
ORN
MAXU
ANDN
ROLW
ZEXT.H
RORW

Zbc extensions:
CLMUL
CLMULR
CLMULH

Zbs extensions:
BSETI
BCLRI
BINVI
BEXTI
BSET
BCLR
BINV
BEXT

Change-Id: I3f489a3a1bab8799e2d95218740e495313b9961d
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/66211
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/arch/riscv/isa/bitfields.isa |   1 +
 src/arch/riscv/isa/decoder.isa   | 320 +++++++++++++++++--------------
 2 files changed, 174 insertions(+), 147 deletions(-)

diff --git a/src/arch/riscv/isa/bitfields.isa b/src/arch/riscv/isa/bitfields.isa
index 863982cfec..4f58416237 100644
--- a/src/arch/riscv/isa/bitfields.isa
+++ b/src/arch/riscv/isa/bitfields.isa
@@ -129,6 +129,7 @@ def bitfield M5FUNC <31:25>;
 
 // Cryptography instructions
 def bitfield BIT24         <24>;
+def bitfield BIT25         <25>;
 def bitfield RNUM       <23:20>;
 def bitfield KFUNCT5    <29:25>;
 def bitfield BS         <31:30>;
diff --git a/src/arch/riscv/isa/decoder.isa b/src/arch/riscv/isa/decoder.isa
index 885794032a..458327e5ec 100644
--- a/src/arch/riscv/isa/decoder.isa
+++ b/src/arch/riscv/isa/decoder.isa
@@ -577,8 +577,12 @@ decode QUADRANT default Unknown::unknown() {
                         }});
                     }
                     0x05: bseti({{
-                        uint64_t index = imm & (64 - 1);
-                        Rd = Rs1 | (UINT64_C(1) << index);
+                        if (rvSelect((bool)SHAMT6BIT5, false)) {
+                            return std::make_shared<IllegalInstFault>(
+                                    "shmat[5] != 0", machInst);
+                        }
+                        uint64_t index = imm & rvSelect(32 - 1, 64 - 1);
+                        Rd = rvSext(Rs1 | (UINT64_C(1) << index));
                     }}, imm_type = uint64_t, imm_code = {{ imm = SHAMT6; }});
                     0x06: decode BIT24 {
                         0x0: aes64im({{
@@ -589,24 +593,32 @@ decode QUADRANT default Unknown::unknown() {
                         }}, imm_type = int32_t, imm_code={{ imm = RNUM; }});
                     }
                     0x09: bclri({{
-                        uint64_t index = imm & (64 - 1);
-                        Rd = Rs1 & (~(UINT64_C(1) << index));
+                        if (rvSelect((bool)SHAMT6BIT5, false)) {
+                            return std::make_shared<IllegalInstFault>(
+                                    "shmat[5] != 0", machInst);
+                        }
+                        uint64_t index = imm & rvSelect(32 - 1, 64 - 1);
+                        Rd = rvSext(Rs1 & (~(UINT64_C(1) << index)));
                     }}, imm_type = uint64_t, imm_code = {{ imm = SHAMT6; }});
                     0x0d: binvi({{
-                        uint64_t index = imm & (64 - 1);
-                        Rd = Rs1 ^ (UINT64_C(1) << index);
+                        if (rvSelect((bool)SHAMT6BIT5, false)) {
+                            return std::make_shared<IllegalInstFault>(
+                                    "shmat[5] != 0", machInst);
+                        }
+                        uint64_t index = imm & rvSelect(32 - 1, 64 - 1);
+                        Rd = rvSext(Rs1 ^ (UINT64_C(1) << index));
                     }}, imm_type = uint64_t, imm_code = {{ imm = SHAMT6; }});
                 }
                 format ROp {
                     0x0c: decode RS2 {
                         0x00: clz({{
-                            Rd = clz64(Rs1);
+                            Rd = (machInst.rv_type == RV32) ? clz32(Rs1) : clz64(Rs1);
                         }});
                         0x01: ctz({{
-                            Rd = ctz64(Rs1);
+                            Rd = (machInst.rv_type == RV32) ? ctz32(Rs1) : ctz64(Rs1);
                         }});
                         0x02: cpop({{
-                            Rd = popCount(Rs1);
+                            Rd = (machInst.rv_type == RV32) ? popCount(Rs1<31:0>) : popCount(Rs1);
                         }});
                         0x04: sext_b({{
                             Rd = sext<8>(Rs1_ub);
@@ -649,7 +661,7 @@ decode QUADRANT default Unknown::unknown() {
                         result |= (Rs1<47:40> ? UINT64_C(0xff) : 0x0) << 40;
                         result |= (Rs1<55:48> ? UINT64_C(0xff) : 0x0) << 48;
                         result |= (Rs1<63:56> ? UINT64_C(0xff) : 0x0) << 56;
-                        Rd = result;
+                        Rd = rvSext(result);
                     }}, imm_type = uint64_t, imm_code = {{ imm = SHAMT6; }});
                     0x8: srai({{
                         if (rvSelect((bool)SHAMT6BIT5, false)) {
@@ -659,30 +671,53 @@ decode QUADRANT default Unknown::unknown() {
                         Rd_sd = rvSext(Rs1_sd) >> imm;
                     }}, imm_type = uint64_t, imm_code = {{ imm = SHAMT6; }});
                     0x9: bexti({{
-                        uint64_t index = imm & (64 - 1);
+                        if (rvSelect((bool)SHAMT6BIT5, false)) {
+                            return std::make_shared<IllegalInstFault>(
+                                    "shmat[5] != 0", machInst);
+                        }
+                        uint64_t index = imm & rvSelect(32 - 1, 64 - 1);
                         Rd = (Rs1 >> index) & 0x1;
                     }}, imm_type = uint64_t, imm_code = {{ imm = SHAMT6; }});
                     0xc: rori({{
-                        Rd = (Rs1 >> imm) | (Rs1 << ((64 - imm) & (64 - 1)));
+                        if (rvSelect((bool)SHAMT6BIT5, false)) {
+                            return std::make_shared<IllegalInstFault>(
+                                    "shmat[5] != 0", machInst);
+                        }
+                        uint64_t xlen = rvSelect(32, 64);
+                        Rd = rvSext((rvZext(Rs1) >> imm)
+                            | (Rs1 << ((xlen - imm) & (xlen - 1))));
                     }}, imm_type = uint64_t, imm_code = {{ imm = SHAMT6; }});
                     0xd: decode RS2 {
-                        0x18: rev8({{
-                            uint64_t result = 0;
-                            result |=
-                                ((Rs1 & 0xffULL) << 56)
-                                | (((Rs1 >> 56) & 0xffULL));
-                            result |=
-                                (((Rs1 >> 8) & 0xffULL) << 48)
-                                | (((Rs1 >> 48) & 0xffULL) << 8);
-                            result |=
-                                (((Rs1 >> 16) & 0xffULL) << 40)
-                                | (((Rs1 >> 40) & 0xffULL) << 16);
-                            result |=
-                                (((Rs1 >> 24) & 0xffULL) << 32)
-                                | (((Rs1 >> 32) & 0xffULL) << 24);
-                            Rd = result;
-                        }},
-                        imm_type = uint64_t, imm_code = {{ imm = SHAMT6; }});
+                        0x18: decode BIT25 {
+                            0x0: rv32_rev8({{
+                                uint32_t result = 0;
+                                result |=
+                                    ((Rs1_uw & 0xffUL) << 24)
+                                    | (((Rs1_uw >> 24) & 0xffUL));
+                                result |=
+                                    (((Rs1_uw >> 8) & 0xffUL) << 16)
+                                    | (((Rs1_uw >> 16) & 0xffUL) << 8);
+                                Rd = rvSext(result);
+                            }},
+                            imm_type = uint64_t, imm_code = {{ imm = SHAMT5; }});
+                            0x1: rev8({{
+                                uint64_t result = 0;
+                                result |=
+                                    ((Rs1 & 0xffULL) << 56)
+                                    | (((Rs1 >> 56) & 0xffULL));
+                                result |=
+                                    (((Rs1 >> 8) & 0xffULL) << 48)
+                                    | (((Rs1 >> 48) & 0xffULL) << 8);
+                                result |=
+                                    (((Rs1 >> 16) & 0xffULL) << 40)
+                                    | (((Rs1 >> 40) & 0xffULL) << 16);
+                                result |=
+                                    (((Rs1 >> 24) & 0xffULL) << 32)
+                                    | (((Rs1 >> 32) & 0xffULL) << 24);
+                                Rd = result;
+                            }},
+                            imm_type = uint64_t, imm_code = {{ imm = SHAMT6; }});
+                        }
                     }
                 }
                 0x6: ori({{
@@ -698,48 +733,42 @@ decode QUADRANT default Unknown::unknown() {
             Rd = rvSext(PC + (sext<20>(imm) << 12));
         }});
 
-        0x06: decode FUNCT3 {
-            format IOp {
-                0x0: decode RVTYPE {
-                    0x1: addiw({{
+        0x06: decode RVTYPE {
+            0x1: decode FUNCT3 {
+                format IOp {
+                    0x0: addiw({{
                         Rd_sw = (int32_t)(Rs1_sw + imm);
                     }}, int32_t);
-                }
-                0x1: decode FS3 {
-                    0x0: decode RVTYPE {
-                        0x1: slliw({{
+                    0x1: decode FS3 {
+                        0x0: slliw({{
                             Rd_sd = Rs1_sw << imm;
                         }}, imm_type = uint64_t, imm_code = {{ imm = SHAMT5; }});
+                        0x1: slli_uw({{
+                            Rd = ((uint64_t)(Rs1_uw)) << imm;
+                        }}, imm_type = uint64_t, imm_code = {{ imm = SHAMT6; }});
+                        0xc: decode FS2 {
+                            0x0: clzw({{
+                                Rd = clz32(Rs1);
+                            }});
+                            0x1: ctzw({{
+                                Rd = ctz32(Rs1);
+                            }});
+                            0x2: cpopw({{
+                                Rd = popCount(Rs1<31:0>);
+                            }});
+                        }
                     }
-                    0x1: slli_uw({{
-                        Rd = ((uint64_t)(Rs1_uw)) << imm;
-                    }}, imm_type = uint64_t, imm_code = {{ imm = SHAMT5; }});
-                    0xc: decode FS2 {
-                        0x0: clzw({{
-                            Rd = clz32(Rs1);
-                        }});
-                        0x1: ctzw({{
-                            Rd = ctz32(Rs1);
-                        }});
-                        0x2: cpopw({{
-                            Rd = popCount(Rs1<31:0>);
-                        }});
-                    }
-                }
-                0x5: decode FS3 {
-                    0x0: decode RVTYPE {
-                        0x1: srliw({{
+                    0x5: decode FS3 {
+                        0x0: srliw({{
                             Rd_sd = (int32_t)(Rs1_uw >> imm);
                         }}, imm_type = uint64_t, imm_code = {{ imm = SHAMT5; }});
-                    }
-                    0x8: decode RVTYPE {
-                        0x1: sraiw({{
+                        0x8: sraiw({{
                             Rd_sd = Rs1_sw >> imm;
                         }}, imm_type = uint64_t, imm_code = {{ imm = SHAMT5; }});
+                        0xc: roriw({{
+                            Rd = (int32_t) ((Rs1_uw >> imm) | (Rs1_uw << ((32 - imm) & (32 - 1))));
+                        }}, imm_type = uint64_t, imm_code = {{ imm = SHAMT5; }});
                     }
-                    0xc: roriw({{
-                        Rd = (int32_t) ((Rs1_uw >> imm) | (Rs1_uw << ((32 - imm) & (32 - 1))));
-                    }}, imm_type = uint64_t, imm_code = {{ imm = SHAMT5; }});
                 }
             }
         }
@@ -1026,28 +1055,30 @@ decode QUADRANT default Unknown::unknown() {
                     }
                     0x5: clmul({{
                         uint64_t result = 0;
-                        for (int i = 0; i < 64; i++) {
+                        for (int i = 0; i < rvSelect(32, 64); i++) {
                             if ((Rs2 >> i) & 1) {
                                 result ^= Rs1 << i;
                             }
                         }
-                        Rd = result;
+                        Rd = rvSext(result);
                     }});
                     0x14: bset({{
-                        Rs2 &= (64 - 1);
-                        Rd = Rs1 | (UINT64_C(1) << Rs2);
+                        Rs2 &= rvSelect(32 - 1, 64 - 1);
+                        Rd = rvSext(Rs1 | (UINT64_C(1) << Rs2));
                     }});
                     0x24: bclr({{
-                        Rs2 &= (64 - 1);
-                        Rd = Rs1 & (~(UINT64_C(1) << Rs2));
+                        Rs2 &= rvSelect(32 - 1, 64 - 1);
+                        Rd = rvSext(Rs1 & (~(UINT64_C(1) << Rs2)));
                     }});
                     0x30: rol({{
-                        int shamt = Rs2 & (64 - 1);
-                        Rd = (Rs1 << shamt) | (Rs1 >> ((64 - shamt) & (64 - 1)));
+                        uint64_t xlen = rvSelect(32, 64);
+                        int shamt = Rs2 & (xlen - 1);
+                        Rd = rvSext((Rs1 << shamt)
+                            | (rvZext(Rs1) >> ((xlen - shamt) & (xlen - 1))));
                     }});
                     0x34: binv({{
-                        Rs2 &= (64 - 1);
-                        Rd = Rs1 ^ (UINT64_C(1) << Rs2);
+                        Rs2 &= rvSelect(32 - 1, 64 - 1);
+                        Rd = rvSext(Rs1 ^ (UINT64_C(1) << Rs2));
                     }});
                 }
                 0x2: decode FUNCT7 {
@@ -1082,15 +1113,17 @@ decode QUADRANT default Unknown::unknown() {
                     }
                     0x5: clmulr({{
                         uint64_t result = 0;
-                        for (int i = 0; i < 64; i++) {
+                        uint64_t xlen = rvSelect(32, 64);
+                        uint64_t zextRs1 = rvZext(Rs1);
+                        for (int i = 0; i < xlen; i++) {
                             if ((Rs2 >> i) & 1) {
-                                result ^= Rs1 >> (64-i-1);
+                                result ^= zextRs1 >> (xlen-i-1);
                             }
                         }
-                        Rd = result;
+                        Rd = rvSext(result);
                     }});
                     0x10: sh1add({{
-                        Rd = (Rs1 << 1) + Rs2;
+                        Rd = rvSext((Rs1 << 1) + Rs2);
                     }});
                     0x14: xperm4({{
                         Rd_sd = _rvk_emu_xperm4_64(Rs1_sd, Rs2_sd);
@@ -1123,11 +1156,14 @@ decode QUADRANT default Unknown::unknown() {
                     }
                     0x5: clmulh({{
                         uint64_t result = 0;
-                        for (int i = 1; i < 64; i++) {
+                        uint64_t xlen = rvSelect(32, 64);
+                        uint64_t zextRs1 = rvZext(Rs1);
+                        for (int i = 1; i < xlen; i++) {
                             if ((Rs2 >> i) & 1) {
-                                result ^= (Rs1 >> (64-i));
+                                result ^= zextRs1 >> (xlen-i);
                             }
                         }
+                        // The MSB can never be 1, no need to sign extend.
                         Rd = result;
                     }});
                 }
@@ -1159,17 +1195,22 @@ decode QUADRANT default Unknown::unknown() {
                             }
                         }}, IntDivOp);
                     }
+                    0x4: decode RVTYPE {
+                        0x0: rv32_zext_h({{
+                            Rd = Rs1_uh;
+                        }});
+                    }
                     0x5: min({{
-                        Rd = (((int64_t) Rs1) < ((int64_t) Rs2)) ? Rs1 : Rs2;
+                        Rd_sd = std::min(rvSext(Rs1_sd), rvSext(Rs2_sd));
                     }});
                     0x10: sh2add({{
-                        Rd = (Rs1 << 2) + Rs2;
+                        Rd = rvSext((Rs1 << 2) + Rs2);
                     }});
                     0x14: xperm8({{
                         Rd_sd = _rvk_emu_xperm8_64(Rs1_sd, Rs2_sd);
                     }});
                     0x20: xnor({{
-                        Rd = ~(Rs1 ^ Rs2);
+                        Rd = rvSext(~(Rs1 ^ Rs2));
                     }});
                 }
                 0x5: decode FUNCT7 {
@@ -1197,15 +1238,18 @@ decode QUADRANT default Unknown::unknown() {
                         Rd = rvSext(Rs1_sd) >> rvSelect(Rs2<4:0>, Rs2<5:0>);
                     }});
                     0x5: minu({{
-                        Rd = Rs1 < Rs2 ? Rs1 : Rs2;
+                        Rd = rvSext(std::min(rvZext(Rs1), rvZext(Rs2)));
                     }});
                     0x24: bext({{
-                        Rs2 &= (64 - 1);
+                        Rs2 &= (rvSelect(32, 64) - 1);
+                        // It doesn't need to sign ext because MSB is always 0
                         Rd = (Rs1 >> Rs2) & 0x1;
                     }});
                     0x30: ror({{
-                        int shamt = Rs2 & (64 - 1);
-                        Rd = (Rs1 >> shamt) | (Rs1 << ((64 - shamt) & (64 - 1)));
+                        uint64_t xlen = rvSelect(32, 64);
+                        int shamt = Rs2 & (xlen - 1);
+                        Rd = rvSext((rvZext(Rs1) >> shamt)
+                            | (Rs1 << ((xlen - shamt) & (xlen - 1))));
                     }});
                 }
                 0x6: decode FUNCT7 {
@@ -1237,13 +1281,13 @@ decode QUADRANT default Unknown::unknown() {
                         }}, IntDivOp);
                     }
                     0x5: max({{
-                        Rd = (((int64_t) Rs1) > ((int64_t) Rs2)) ? Rs1 : Rs2;
+                        Rd_sd = std::max(rvSext(Rs1_sd), rvSext(Rs2_sd));
                     }});
                     0x10: sh3add({{
-                        Rd = (Rs1 << 3) + Rs2;
+                        Rd = rvSext((Rs1 << 3) + Rs2);
                     }});
                     0x20: orn({{
-                        Rd = Rs1 | (~Rs2);
+                        Rd = rvSext(Rs1 | (~Rs2));
                     }});
                 }
                 0x7: decode FUNCT7 {
@@ -1267,10 +1311,10 @@ decode QUADRANT default Unknown::unknown() {
                         }}, IntDivOp);
                     }
                     0x5: maxu({{
-                        Rd = Rs1 > Rs2 ? Rs1 : Rs2;
+                        Rd = rvSext(std::max(rvZext(Rs1), rvZext(Rs2)));
                     }});
                     0x20: andn({{
-                        Rd = Rs1 & (~Rs2);
+                        Rd = rvSext(Rs1 & (~Rs2));
                     }});
                 }
             }
@@ -1280,46 +1324,38 @@ decode QUADRANT default Unknown::unknown() {
             Rd = (sext<20>(imm) << 12);
         }});
 
-        0x0e: decode FUNCT3 {
-            format ROp {
-                0x0: decode FUNCT7 {
-                    0x0: decode RVTYPE {
-                        0x1: addw({{
+        0x0e: decode RVTYPE {
+            0x1: decode FUNCT3 {
+                format ROp {
+                    0x0: decode FUNCT7 {
+                        0x0: addw({{
                             Rd_sd = Rs1_sw + Rs2_sw;
                         }});
-                    }
-                    0x1: decode RVTYPE {
                         0x1: mulw({{
                             Rd_sd = (int32_t)(Rs1_sw*Rs2_sw);
                         }}, IntMultOp);
-                    }
-                    0x4: add_uw({{
-                        Rd = Rs1_uw + Rs2;
-                    }});
-                    0x20: decode RVTYPE {
-                        0x1: subw({{
+                        0x4: add_uw({{
+                            Rd = Rs1_uw + Rs2;
+                        }});
+                        0x20: subw({{
                             Rd_sd = Rs1_sw - Rs2_sw;
                         }});
                     }
-                }
-                0x1: decode FUNCT7 {
-                    0x0: decode RVTYPE {
-                        0x1: sllw({{
+                    0x1: decode FUNCT7 {
+                        0x0: sllw({{
                             Rd_sd = Rs1_sw << Rs2<4:0>;
                         }});
+                        0x30: rolw({{
+                            int shamt = Rs2 & (32 - 1);
+                            Rd = (int32_t) ((Rs1_uw << shamt) | (Rs1_uw >> ((32 - shamt) & (32 - 1))));
+                        }});
                     }
-                    0x30: rolw({{
-                        int shamt = Rs2 & (32 - 1);
-                        Rd = (int32_t) ((Rs1_uw << shamt) | (Rs1_uw >> ((32 - shamt) & (32 - 1))));
-                    }});
-                }
-                0x2: decode FUNCT7 {
-                    0x10: sh1add_uw({{
-                        Rd = (((uint64_t)Rs1_uw) << 1) + Rs2;
-                    }});
-                }
-                0x4: decode FUNCT7 {
-                    0x1: decode RVTYPE {
+                    0x2: decode FUNCT7 {
+                        0x10: sh1add_uw({{
+                            Rd = (((uint64_t)Rs1_uw) << 1) + Rs2;
+                        }});
+                    }
+                    0x4: decode FUNCT7 {
                         0x1: divw({{
                             constexpr int32_t kRsMin = \
                                 std::numeric_limits<int32_t>::min();
@@ -1331,21 +1367,17 @@ decode QUADRANT default Unknown::unknown() {
                                 Rd_sd = Rs1_sw/Rs2_sw;
                             }
                         }}, IntDivOp);
-                    }
-                    0x4: zext_h({{
-                        Rd = Rs1_uh;
-                    }});
-                    0x10: sh2add_uw({{
-                        Rd = (((uint64_t)Rs1_uw) << 2) + Rs2;
-                    }});
-                }
-                0x5: decode FUNCT7 {
-                    0x0: decode RVTYPE {
-                        0x1: srlw({{
-                            Rd_sd = (int32_t)(Rs1_uw >> Rs2<4:0>);
+                        0x4: zext_h({{
+                            Rd = Rs1_uh;
+                        }});
+                        0x10: sh2add_uw({{
+                            Rd = (((uint64_t)Rs1_uw) << 2) + Rs2;
                         }});
                     }
-                    0x1: decode RVTYPE {
+                    0x5: decode FUNCT7 {
+                        0x0: srlw({{
+                            Rd_sd = (int32_t)(Rs1_uw >> Rs2<4:0>);
+                        }});
                         0x1: divuw({{
                             if (Rs2_uw == 0) {
                                 Rd_sd = std::numeric_limits<uint64_t>::max();
@@ -1353,19 +1385,15 @@ decode QUADRANT default Unknown::unknown() {
                                 Rd_sd = (int32_t)(Rs1_uw/Rs2_uw);
                             }
                         }}, IntDivOp);
-                    }
-                    0x20: decode RVTYPE {
-                        0x1: sraw({{
+                        0x20: sraw({{
                             Rd_sd = Rs1_sw >> Rs2<4:0>;
                         }});
+                        0x30: rorw({{
+                            int shamt = Rs2 & (32 - 1);
+                            Rd = (int32_t) ((Rs1_uw >> shamt) | (Rs1_uw << ((32 - shamt) & (32 - 1))));
+                        }});
                     }
-                    0x30: rorw({{
-                        int shamt = Rs2 & (32 - 1);
-                        Rd = (int32_t) ((Rs1_uw >> shamt) | (Rs1_uw << ((32 - shamt) & (32 - 1))));
-                    }});
-                }
-                0x6:  decode FUNCT7 {
-                    0x1: decode RVTYPE {
+                    0x6:  decode FUNCT7 {
                         0x1: remw({{
                             constexpr int32_t kRsMin = \
                                 std::numeric_limits<int32_t>::min();
@@ -1377,13 +1405,11 @@ decode QUADRANT default Unknown::unknown() {
                                 Rd_sd = Rs1_sw%Rs2_sw;
                             }
                         }}, IntDivOp);
+                        0x10: sh3add_uw({{
+                            Rd = (((uint64_t)Rs1_uw) << 3) + Rs2;
+                        }});
                     }
-                    0x10: sh3add_uw({{
-                        Rd = (((uint64_t)Rs1_uw) << 3) + Rs2;
-                    }});
-                }
-                0x7: decode RVTYPE {
-                    0x1: remuw({{
+                    0x7: remuw({{
                         if (Rs2_uw == 0) {
                             Rd_sd = (int32_t)Rs1_uw;
                         } else {

From f96513fd042c3a1843eb4a3131d08b0fe0aa947f Mon Sep 17 00:00:00 2001
From: Emin Gadzhiev <e.gadzhiev.mhk@gmail.com>
Date: Fri, 2 Dec 2022 02:18:44 +0300
Subject: [PATCH 057/492] sim,sim-se: Fix restoring of VMAs of memory-mapped
 files

This patch fixes a problem that occurs when restoring from
a checkpoint where Mapped File Buffers are not restored. This
causes errors and unexpected behavior during further execution.
Since the checkpoint already has the size of the
area (address range) and the file name, only the offset is
missing to restore the Mapped File Buffer. Having the offset
value, it's possible to open those files for which an offset is
specified and create a VMA with a Mapped File Buffer.

Change-Id: Ib9dfa174cda6348b966b892184c36daeaba80e81
Signed-off-by: Emin Gadzhiev <e.gadzhiev.mhk@gmail.com>
Issue-On: https://gem5.atlassian.net/browse/GEM5-1302
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/66311
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
---
 src/sim/mem_state.hh | 18 +++++++++++++++++-
 src/sim/vma.cc       |  2 +-
 src/sim/vma.hh       |  6 ++++++
 3 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/src/sim/mem_state.hh b/src/sim/mem_state.hh
index 05f2239f96..b2b50d0760 100644
--- a/src/sim/mem_state.hh
+++ b/src/sim/mem_state.hh
@@ -29,6 +29,9 @@
 #ifndef SRC_SIM_MEM_STATE_HH
 #define SRC_SIM_MEM_STATE_HH
 
+#include <fcntl.h>
+#include <unistd.h>
+
 #include <list>
 #include <memory>
 #include <string>
@@ -199,6 +202,9 @@ class MemState : public Serializable
         for (auto vma : _vmaList) {
             ScopedCheckpointSection sec(cp, csprintf("Vma%d", count++));
             paramOut(cp, "name", vma.getName());
+            if (vma.hasHostBuf()) {
+                paramOut(cp, "fileOffset", vma.getFileMappingOffset());
+            }
             paramOut(cp, "addrRangeStart", vma.start());
             paramOut(cp, "addrRangeEnd", vma.end());
         }
@@ -223,10 +229,20 @@ class MemState : public Serializable
             std::string name;
             Addr start;
             Addr end;
+            off_t offset = 0;
+            int host_fd = -1;
             paramIn(cp, "name", name);
+            if (optParamIn(cp, "fileOffset", offset, false)) {
+                host_fd = open(name.c_str(), O_RDONLY);
+                fatal_if(host_fd < 0,
+                         "Failed to open %s file "
+                         "while unserializing file-backed VMA\n", name);
+            }
             paramIn(cp, "addrRangeStart", start);
             paramIn(cp, "addrRangeEnd", end);
-            _vmaList.emplace_back(AddrRange(start, end), _pageBytes, name);
+            _vmaList.emplace_back(AddrRange(start, end), _pageBytes, name,
+                                  host_fd, offset);
+            close(host_fd);
         }
     }
 
diff --git a/src/sim/vma.cc b/src/sim/vma.cc
index 7e5ed1c491..ff5a4fe3cc 100644
--- a/src/sim/vma.cc
+++ b/src/sim/vma.cc
@@ -120,7 +120,7 @@ VMA::sanityCheck()
 
 VMA::MappedFileBuffer::MappedFileBuffer(int fd, size_t length,
                                         off_t offset)
-    : _buffer(nullptr), _length(length)
+    : _buffer(nullptr), _length(length), _offset(offset)
 {
     panic_if(_length == 0, "Tried to mmap file of length zero");
 
diff --git a/src/sim/vma.hh b/src/sim/vma.hh
index b238a2e416..8f2a77f36c 100644
--- a/src/sim/vma.hh
+++ b/src/sim/vma.hh
@@ -105,6 +105,10 @@ class VMA
     void sliceRegionLeft(Addr slice_addr);
 
     const std::string& getName() { return _vmaName; }
+    off_t getFileMappingOffset() const
+    {
+        return hasHostBuf() ? _origHostBuf->getOffset() : 0;
+    }
 
     /**
      * Defer AddrRange related calls to the AddrRange.
@@ -191,10 +195,12 @@ class VMA
 
         void *getBuffer() const { return _buffer; }
         uint64_t getLength() const { return _length; }
+        off_t getOffset() const { return _offset; }
 
       private:
         void *_buffer;       // Host buffer ptr
         size_t _length;       // Length of host ptr
+        off_t _offset;       // Offset in file at which mapping starts
     };
 };
 

From 8b1688da340e5573a0c6f00c835f08ac5f73f963 Mon Sep 17 00:00:00 2001
From: Gabe Black <gabeblack@google.com>
Date: Sat, 10 Dec 2022 02:29:43 -0800
Subject: [PATCH 058/492] dev: Introduce a reset() method on RegisterBank and
 Register classes.

This will make it much easier to implement reset behaviors on devices
which have RegisterBanks in them.

Change-Id: I73fe9874fcb69feed33611a320dcca85c0de2d0e
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/66671
Tested-by: kokoro <noreply+kokoro@google.com>
Maintainer: Gabe Black <gabeblack@google.com>
Reviewed-by: Yu-hsin Wang <yuhsingw@google.com>
Reviewed-by: Jui-min Lee <fcrh@google.com>
---
 src/dev/reg_bank.hh        | 41 ++++++++++++++++++++++++++++++++++++--
 src/dev/serial/uart8250.hh |  7 +++++++
 2 files changed, 46 insertions(+), 2 deletions(-)

diff --git a/src/dev/reg_bank.hh b/src/dev/reg_bank.hh
index 31c0ce5b66..66d668b338 100644
--- a/src/dev/reg_bank.hh
+++ b/src/dev/reg_bank.hh
@@ -117,6 +117,11 @@
  * RegisterBankLE and RegisterBankBE aliases to make it a little easier to
  * refer to one or the other version.
  *
+ * A RegisterBank also has a reset() method which will (by default) call the
+ * reset() method on each register within it. This method is virtual, and so
+ * can be overridden if something additional or different needs to be done to
+ * reset the hardware model.
+ *
  *
  * == Register interface ==
  *
@@ -145,6 +150,12 @@
  * it still has to implement these methods, but they don't have to actually do
  * anything.
  *
+ * Each register also has a "reset" method, which will reset the register as
+ * if its containing device is being reset. By default, this will just restore
+ * the initial value of the register, but can be overridden to implement
+ * additional behavior like resetting other aspects of the device which are
+ * controlled by the value of the register.
+ *
  *
  * == Basic Register types ==
  *
@@ -360,6 +371,9 @@ class RegisterBank : public RegisterBankBase
         // Methods for implementing serialization for checkpoints.
         virtual void serialize(std::ostream &os) const = 0;
         virtual bool unserialize(const std::string &s) = 0;
+
+        // Reset the register.
+        virtual void reset() = 0;
     };
 
     // Filler registers which return a fixed pattern.
@@ -388,6 +402,9 @@ class RegisterBank : public RegisterBankBase
 
         void serialize(std::ostream &os) const override {}
         bool unserialize(const std::string &s) override { return true; }
+
+        // Resetting a read only register doesn't need to do anything.
+        void reset() override {}
     };
 
     // Register which reads as all zeroes.
@@ -453,6 +470,10 @@ class RegisterBank : public RegisterBankBase
         void serialize(std::ostream &os) const override {}
         bool unserialize(const std::string &s) override { return true; }
 
+        // Assume since the buffer is managed externally, it will be reset
+        // externally.
+        void reset() override {}
+
       protected:
         /**
          * This method exists so that derived classes that need to initialize
@@ -516,6 +537,8 @@ class RegisterBank : public RegisterBankBase
 
             return true;
         }
+
+        void reset() override { buffer = std::array<uint8_t, BufBytes>{}; }
     };
 
     template <typename Data, ByteOrder RegByteOrder=BankByteOrder>
@@ -534,6 +557,7 @@ class RegisterBank : public RegisterBankBase
 
       private:
         Data _data = {};
+        Data _resetData = {};
         Data _writeMask = mask(sizeof(Data) * 8);
 
         ReadFunc _reader = defaultReader;
@@ -602,11 +626,13 @@ class RegisterBank : public RegisterBankBase
 
         // Constructor and move constructor with an initial data value.
         constexpr Register(const std::string &new_name, const Data &new_data) :
-            RegisterBase(new_name, sizeof(Data)), _data(new_data)
+            RegisterBase(new_name, sizeof(Data)), _data(new_data),
+            _resetData(new_data)
         {}
         constexpr Register(const std::string &new_name,
                            const Data &&new_data) :
-            RegisterBase(new_name, sizeof(Data)), _data(new_data)
+            RegisterBase(new_name, sizeof(Data)), _data(new_data),
+            _resetData(new_data)
         {}
 
         // Set which bits of the register are writeable.
@@ -789,6 +815,9 @@ class RegisterBank : public RegisterBankBase
         {
             return ParseParam<Data>::parse(s, get());
         }
+
+        // Reset our data to its initial value.
+        void reset() override { get() = _resetData; }
     };
 
   private:
@@ -984,6 +1013,14 @@ class RegisterBank : public RegisterBankBase
             }
         }
     }
+
+    // By default, reset all the registers in the bank.
+    virtual void
+    reset()
+    {
+        for (auto &[offset, reg]: _offsetMap)
+            reg.get().reset();
+    }
 };
 
 using RegisterBankLE = RegisterBank<ByteOrder::little>;
diff --git a/src/dev/serial/uart8250.hh b/src/dev/serial/uart8250.hh
index c55d889c82..5774f78aab 100644
--- a/src/dev/serial/uart8250.hh
+++ b/src/dev/serial/uart8250.hh
@@ -113,6 +113,13 @@ class Uart8250 : public Uart
 
             void serialize(std::ostream &os) const override {}
             bool unserialize(const std::string &s) override { return true; }
+
+            void
+            reset() override
+            {
+                _reg1.reset();
+                _reg2.reset();
+            }
         };
 
         class BankedRegister : public PairedRegister

From 7a21ecf15cddb2dcd545574bfeb530d2d7dcee13 Mon Sep 17 00:00:00 2001
From: Gabe Black <gabeblack@google.com>
Date: Mon, 12 Dec 2022 23:51:14 -0800
Subject: [PATCH 059/492] dev: Implement a "Signal" port which has a templated
 State type.

This port type transmits a value of the templated State type. When the
value changes, the sink port will call the registered callback with
the new value.

Change-Id: I72eaf74658a2c63bece95e48c1a72694874eaad8
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/66672
Maintainer: Gabe Black <gabeblack@google.com>
Reviewed-by: Jui-min Lee <fcrh@google.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Yu-hsin Wang <yuhsingw@google.com>
---
 src/sim/signal.hh | 131 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 131 insertions(+)
 create mode 100644 src/sim/signal.hh

diff --git a/src/sim/signal.hh b/src/sim/signal.hh
new file mode 100644
index 0000000000..3cb3f62c0d
--- /dev/null
+++ b/src/sim/signal.hh
@@ -0,0 +1,131 @@
+/*
+ * Copyright 2022 Google, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __SIM_SIGNAL_HH__
+#define __SIM_SIGNAL_HH__
+
+#include <functional>
+
+#include "base/logging.hh"
+#include "sim/port.hh"
+
+namespace gem5
+{
+
+template <typename State>
+class SignalSourcePort;
+
+template <typename State>
+class SignalSinkPort : public Port
+{
+  public:
+    using OnChangeFunc = std::function<void(const State &new_val)>;
+
+  private:
+    friend SignalSourcePort<State>;
+
+    SignalSourcePort<State> *_source = nullptr;
+
+    State _state = {};
+    OnChangeFunc _onChange;
+
+  protected:
+    void
+    set(const State &new_state)
+    {
+        if (new_state == _state)
+            return;
+
+        _state = new_state;
+        if (_onChange)
+            _onChange(_state);
+    }
+
+  public:
+    SignalSinkPort(const std::string &_name, PortID _id=InvalidPortID) :
+        Port(_name, _id)
+    {}
+
+    const State &state() const { return _state; }
+    void onChange(OnChangeFunc func) { _onChange = std::move(func); }
+
+    void
+    bind(Port &peer) override
+    {
+        _source = dynamic_cast<SignalSourcePort<State> *>(&peer);
+        fatal_if(!_source, "Attempt to bind signal pin %s to "
+                "incompatible pin %s", name(), peer.name());
+        Port::bind(peer);
+    }
+    void
+    unbind() override
+    {
+        _source = nullptr;
+        Port::unbind();
+    }
+};
+
+template <typename State>
+class SignalSourcePort : public Port
+{
+  private:
+    SignalSinkPort<State> *sink = nullptr;
+    State _state = {};
+
+  public:
+    SignalSourcePort(const std::string &_name, PortID _id=InvalidPortID) :
+        Port(_name, _id)
+    {}
+
+    void
+    set(const State &new_state)
+    {
+        _state = new_state;
+        sink->set(new_state);
+    }
+
+    const State &state() const { return _state; }
+
+    void
+    bind(Port &peer) override
+    {
+        sink = dynamic_cast<SignalSinkPort<State> *>(&peer);
+        fatal_if(!sink, "Attempt to bind signal pin %s to "
+                "incompatible pin %s", name(), peer.name());
+        Port::bind(peer);
+    }
+    void
+    unbind() override
+    {
+        sink = nullptr;
+        Port::unbind();
+    }
+};
+
+} // namespace gem5
+
+#endif //__SIM_SIGNAL_HH__

From 89d5bfca7ce79bbce1f01471c578978d029b8f22 Mon Sep 17 00:00:00 2001
From: Gabe Black <gabeblack@google.com>
Date: Mon, 12 Dec 2022 23:59:21 -0800
Subject: [PATCH 060/492] fastmodel,dev: Rework the Int*Pin classes with
 Signal*Port.

These are largely compatibility wrappers around the Signal*Port
classes. The python versions of these types enforce more specific
compatibility, but on the c++ side the Signal*Port<bool> classes can
be used directly instead.

Change-Id: I1325074d0ed1c8fc6dfece5ac1ee33872cc4f5e3
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/66673
Maintainer: Gabe Black <gabeblack@google.com>
Reviewed-by: Yu-hsin Wang <yuhsingw@google.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 .../arm/fastmodel/common/signal_sender.hh     |  10 +-
 src/dev/SConscript                            |   1 -
 src/dev/intpin.cc                             |  67 -----------
 src/dev/intpin.hh                             | 109 +++++++-----------
 4 files changed, 48 insertions(+), 139 deletions(-)
 delete mode 100644 src/dev/intpin.cc

diff --git a/src/arch/arm/fastmodel/common/signal_sender.hh b/src/arch/arm/fastmodel/common/signal_sender.hh
index f4772cfad6..812a1b95c8 100644
--- a/src/arch/arm/fastmodel/common/signal_sender.hh
+++ b/src/arch/arm/fastmodel/common/signal_sender.hh
@@ -50,11 +50,11 @@ class SignalSender : public IntSinkPinBase
     SignalSender(const std::string &_name, PortID _id) :
         IntSinkPinBase(_name, _id, 0),
         signal_out((_name + ".sig").c_str())
-    {}
-
-  private:
-    void raiseOnDevice() override { signal_out.set_state(id, true); }
-    void lowerOnDevice() override { signal_out.set_state(id, false); }
+    {
+        onChange([this](const bool &new_val) {
+            signal_out.set_state(id, new_val);
+        });
+    }
 };
 
 } // namespace fastmodel
diff --git a/src/dev/SConscript b/src/dev/SConscript
index 6a6ce40dcc..d991ed53a9 100644
--- a/src/dev/SConscript
+++ b/src/dev/SConscript
@@ -36,7 +36,6 @@ Source('dma_device.cc')
 Source('dma_virt_device.cc')
 
 SimObject('IntPin.py', sim_objects=[])
-Source('intpin.cc')
 
 SimObject('ResetPort.py', sim_objects=[])
 Source('reset_port.cc')
diff --git a/src/dev/intpin.cc b/src/dev/intpin.cc
deleted file mode 100644
index c378337d4a..0000000000
--- a/src/dev/intpin.cc
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Copyright 2019 Google, Inc.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met: redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer;
- * redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution;
- * neither the name of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "dev/intpin.hh"
-
-#include "base/logging.hh"
-
-namespace gem5
-{
-
-void
-IntSinkPinBase::bind(Port &peer)
-{
-    source = dynamic_cast<IntSourcePinBase *>(&peer);
-    fatal_if(!source, "Attempt to bind interrupt sink pin %s to "
-            "incompatible port %s.", name(), peer.name());
-    Port::bind(peer);
-}
-
-void
-IntSinkPinBase::unbind()
-{
-    source = nullptr;
-    Port::unbind();
-}
-
-void
-IntSourcePinBase::bind(Port &peer)
-{
-    sink = dynamic_cast<IntSinkPinBase *>(&peer);
-    fatal_if(!sink, "Attempt to bind interrupt source pin %s to "
-            "incompatible port %s.", name(), peer.name());
-    Port::bind(peer);
-}
-
-void
-IntSourcePinBase::unbind()
-{
-    sink = nullptr;
-    Port::unbind();
-}
-
-} // namespace gem5
diff --git a/src/dev/intpin.hh b/src/dev/intpin.hh
index 2704887f35..578dd90d9c 100644
--- a/src/dev/intpin.hh
+++ b/src/dev/intpin.hh
@@ -28,94 +28,71 @@
 #ifndef __DEV_INTPIN_HH__
 #define __DEV_INTPIN_HH__
 
-#include "sim/port.hh"
+#include <type_traits>
+
+#include "sim/signal.hh"
 
 namespace gem5
 {
 
-class IntSourcePinBase;
-
-class IntSinkPinBase : public Port
+class IntSinkPinBase : public SignalSinkPort<bool>
 {
-  protected:
-    friend IntSourcePinBase;
+  private:
+    const int _number = 0;
 
-    IntSourcePinBase *source = nullptr;
+  public:
 
-    int _number = 0;
-    bool _state = false;
+    template <class Device>
+    IntSinkPinBase(const std::string &_name, PortID _id, Device *dev,
+            int num) :
+        SignalSinkPort(_name, _id), _number(num)
+    {
+        onChange([dev, num](const bool &new_val) {
+            if (new_val)
+                dev->raiseInterruptPin(num);
+            else
+                dev->lowerInterruptPin(num);
+        });
+    }
+
+    template <class Device>
+    IntSinkPinBase(const std::string &_name, PortID _id, Device *dev) :
+        IntSinkPinBase(_name, _id, dev, _id)
+    {}
 
     IntSinkPinBase(const std::string &_name, PortID _id, int num) :
-        Port(_name, _id), _number(num)
+        SignalSinkPort(_name, _id), _number(num)
     {}
 
-    virtual void raiseOnDevice() = 0;
-    virtual void lowerOnDevice() = 0;
+    IntSinkPinBase(const std::string &_name, PortID _id) :
+        IntSinkPinBase(_name, _id, _id)
+    {}
 
-    void
-    raise()
-    {
-        _state = true;
-        raiseOnDevice();
-    }
-
-    void
-    lower()
-    {
-        _state = false;
-        lowerOnDevice();
-    }
-
-  public:
     int number() { return _number; }
-    bool state() { return _state; }
-
-    void bind(Port &peer) override;
-    void unbind() override;
 };
 
-template <class Device>
-class IntSinkPin : public IntSinkPinBase
+template <class Compat>
+using IntSinkPin = IntSinkPinBase;
+
+class IntSourcePinBase : public SignalSourcePort<bool>
 {
-  private:
-    Device *device = nullptr;
-
-    void raiseOnDevice() override { device->raiseInterruptPin(number()); }
-    void lowerOnDevice() override { device->lowerInterruptPin(number()); }
-
   public:
-    IntSinkPin(const std::string &_name, PortID _id, Device *dev, int num) :
-        IntSinkPinBase(_name, _id, num), device(dev) {}
-    IntSinkPin(const std::string &_name, PortID _id, Device *dev) :
-        IntSinkPin(_name, _id, dev, _id) {}
-};
-
-class IntSourcePinBase : public Port
-{
-  private:
-    IntSinkPinBase *sink = nullptr;
-
-  public:
-    IntSourcePinBase(const std::string &_name, PortID _id):
-        Port(_name, _id)
+    template <class Device>
+    IntSourcePinBase(const std::string &_name, PortID _id, Device *owner) :
+        SignalSourcePort(_name, _id)
     {}
 
-    void raise() { sink->raise(); }
-    void lower() { sink->lower(); }
-
-    void bind(Port &peer) override;
-    void unbind() override;
-};
-
-template <class Device>
-class IntSourcePin : public IntSourcePinBase
-{
-  public:
-    IntSourcePin(const std::string &_name, PortID _id, Device *owner) :
-        IntSourcePinBase(_name, _id)
+    IntSourcePinBase(const std::string &_name, PortID _id) :
+        SignalSourcePort(_name, _id)
     {}
+
+    void raise() { set(true); }
+    void lower() { set(false); }
 };
 
+template <class Compat>
+using IntSourcePin = IntSourcePinBase;
+
 } // namespace gem5
 
 #endif //__DEV_INTPIN_HH__

From 0aaaa6b4ae61c7a57c1a44d1c615dcafc057e0a8 Mon Sep 17 00:00:00 2001
From: Gabe Black <gabeblack@google.com>
Date: Tue, 13 Dec 2022 00:32:08 -0800
Subject: [PATCH 061/492] fastmodel: Change the Signal proxies to use
 Signal*Port<bool>.

Change-Id: Ia1aa32d5ea50ff4cc47d1d72a9c25dabd6c30de9
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/66674
Maintainer: Gabe Black <gabeblack@google.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Yu-hsin Wang <yuhsingw@google.com>
---
 src/arch/arm/fastmodel/common/signal_receiver.hh | 6 +++---
 src/arch/arm/fastmodel/common/signal_sender.hh   | 7 +++----
 2 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/src/arch/arm/fastmodel/common/signal_receiver.hh b/src/arch/arm/fastmodel/common/signal_receiver.hh
index 990787743b..9ec760e32b 100644
--- a/src/arch/arm/fastmodel/common/signal_receiver.hh
+++ b/src/arch/arm/fastmodel/common/signal_receiver.hh
@@ -87,7 +87,7 @@ class SignalReceiver : public amba_pv::signal_slave_base<bool>
 class SignalReceiverInt : public SignalReceiver
 {
   public:
-    using IntPin = IntSourcePin<SignalReceiverInt>;
+    using IntPin = SignalSourcePort<bool>;
 
     explicit SignalReceiverInt(const std::string &name)
         : SignalReceiver(name)
@@ -95,7 +95,7 @@ class SignalReceiverInt : public SignalReceiver
         onChange([this](bool status) {
             for (auto &signal : signalOut) {
                 if (signal && signal->isConnected())
-                    status ? signal->raise() : signal->lower();
+                    signal->set(status);
             }
         });
     }
@@ -108,7 +108,7 @@ class SignalReceiverInt : public SignalReceiver
         }
         if (!signalOut[idx]) {
             signalOut[idx] = std::make_unique<IntPin>(
-                csprintf("%s.signalOut[%d]", get_name(), idx), idx, this);
+                csprintf("%s.signalOut[%d]", get_name(), idx), idx);
         }
         return *signalOut[idx];
     }
diff --git a/src/arch/arm/fastmodel/common/signal_sender.hh b/src/arch/arm/fastmodel/common/signal_sender.hh
index 812a1b95c8..c596ed108e 100644
--- a/src/arch/arm/fastmodel/common/signal_sender.hh
+++ b/src/arch/arm/fastmodel/common/signal_sender.hh
@@ -33,7 +33,7 @@
 #include <amba_pv.h>
 #pragma GCC diagnostic pop
 
-#include "dev/intpin.hh"
+#include "sim/signal.hh"
 
 namespace gem5
 {
@@ -42,14 +42,13 @@ GEM5_DEPRECATED_NAMESPACE(FastModel, fastmodel);
 namespace fastmodel
 {
 
-class SignalSender : public IntSinkPinBase
+class SignalSender : public SignalSinkPort<bool>
 {
   public:
     amba_pv::signal_master_port<bool> signal_out;
 
     SignalSender(const std::string &_name, PortID _id) :
-        IntSinkPinBase(_name, _id, 0),
-        signal_out((_name + ".sig").c_str())
+        SignalSinkPort(_name, _id), signal_out((_name + ".sig").c_str())
     {
         onChange([this](const bool &new_val) {
             signal_out.set_state(id, new_val);

From fbd0722de4f4929f2d26f98f92427e33ef4fd775 Mon Sep 17 00:00:00 2001
From: Gabe Black <gabeblack@google.com>
Date: Tue, 13 Dec 2022 02:17:22 -0800
Subject: [PATCH 062/492] fastmodel,dev: Replace the reset port with a
 Signal*Port<bool>.

The ResetRequestPort and ResetResponsePort have a few problems:

1. A reset signal should happen during the time a reset is asserted,
or in other words the device should stay in reset and not doing
anything while reset is asserted. It should not immediately restart
execution while the reset is still held.

2. These names are misleading, since there is no response. These names
are inherited from other port types where there is an actual response.

There is a new generic SignalSourcePort and SignalSinkPort set of port
classes which are templated on the type of signal they propogate, and
which can be used in place of reset ports in c++. These ports can
still have a specialized role which will ensure that only reset ports
are connected to each other for a form of type checking, although
the underlying c++ instances are more interoperable than that.

Change-Id: Id98bef901ab61ac5b200dbbe49439bb2d2e6c57f
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/66675
Maintainer: Gabe Black <gabeblack@google.com>
Reviewed-by: Yu-hsin Wang <yuhsingw@google.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/arch/arm/fastmodel/CortexA76/evs.cc       | 29 +++-----
 src/arch/arm/fastmodel/CortexA76/evs.hh       |  6 +-
 src/arch/arm/fastmodel/CortexR52/evs.cc       | 12 +++-
 src/arch/arm/fastmodel/CortexR52/evs.hh       | 20 +-----
 .../arm/fastmodel/reset_controller/example.cc | 16 ++---
 .../arm/fastmodel/reset_controller/example.hh |  7 +-
 src/dev/SConscript                            |  2 -
 src/dev/reset_port.cc                         | 57 ---------------
 src/dev/reset_port.hh                         | 72 -------------------
 9 files changed, 33 insertions(+), 188 deletions(-)
 delete mode 100644 src/dev/reset_port.cc
 delete mode 100644 src/dev/reset_port.hh

diff --git a/src/arch/arm/fastmodel/CortexA76/evs.cc b/src/arch/arm/fastmodel/CortexA76/evs.cc
index 1c069351ca..c9ce3cc656 100644
--- a/src/arch/arm/fastmodel/CortexA76/evs.cc
+++ b/src/arch/arm/fastmodel/CortexA76/evs.cc
@@ -70,23 +70,6 @@ ScxEvsCortexA76<Types>::setResetAddr(int core, Addr addr, bool secure)
     this->rvbaraddr[core]->set_state(0, addr);
 }
 
-template <class Types>
-void
-ScxEvsCortexA76<Types>::requestReset()
-{
-    // Reset all cores.
-    for (auto &poweron_reset : this->poweron_reset) {
-        poweron_reset->signal_out.set_state(0, true);
-        poweron_reset->signal_out.set_state(0, false);
-    }
-    // Reset DSU.
-    this->top_reset.signal_out.set_state(0, true);
-    this->top_reset.signal_out.set_state(0, false);
-    // Reset debug APB.
-    this->dbg_reset.signal_out.set_state(0, true);
-    this->dbg_reset.signal_out.set_state(0, false);
-}
-
 template <class Types>
 ScxEvsCortexA76<Types>::ScxEvsCortexA76(
         const sc_core::sc_module_name &mod_name, const Params &p) :
@@ -94,9 +77,19 @@ ScxEvsCortexA76<Types>::ScxEvsCortexA76(
     amba(Base::amba, p.name + ".amba", -1),
     top_reset(p.name + ".top_reset", 0),
     dbg_reset(p.name + ".dbg_reset", 0),
-    model_reset(p.name + ".model_reset", -1, this),
+    model_reset(p.name + ".model_reset"),
     params(p)
 {
+    model_reset.onChange([this](const bool &new_val) {
+        // Set reset for all cores.
+        for (auto &poweron_reset : poweron_reset)
+            poweron_reset->signal_out.set_state(0, new_val);
+        // Set reset for DSU.
+        top_reset.signal_out.set_state(0, new_val);
+        // Set reset for debug APB.
+        dbg_reset.signal_out.set_state(0, new_val);
+    });
+
     for (int i = 0; i < CoreCount; i++) {
         redist.emplace_back(new TlmGicTarget(this->redistributor[i],
                     csprintf("%s.redistributor[%d]", name(), i), i));
diff --git a/src/arch/arm/fastmodel/CortexA76/evs.hh b/src/arch/arm/fastmodel/CortexA76/evs.hh
index 081e80f701..7c4ef601a7 100644
--- a/src/arch/arm/fastmodel/CortexA76/evs.hh
+++ b/src/arch/arm/fastmodel/CortexA76/evs.hh
@@ -35,7 +35,6 @@
 #include "arch/arm/fastmodel/common/signal_sender.hh"
 #include "arch/arm/fastmodel/iris/cpu.hh"
 #include "arch/arm/fastmodel/protocol/exported_clock_rate_control.hh"
-#include "dev/reset_port.hh"
 #include "mem/port_proxy.hh"
 #include "params/FastModelScxEvsCortexA76x1.hh"
 #include "params/FastModelScxEvsCortexA76x2.hh"
@@ -45,6 +44,7 @@
 #include "scx_evs_CortexA76x2.h"
 #include "scx_evs_CortexA76x3.h"
 #include "scx_evs_CortexA76x4.h"
+#include "sim/signal.hh"
 #include "systemc/ext/core/sc_event.hh"
 #include "systemc/ext/core/sc_module.hh"
 #include "systemc/tlm_port_wrapper.hh"
@@ -99,7 +99,7 @@ class ScxEvsCortexA76 : public Types::Base, public Iris::BaseCpuEvs
 
     SignalSender dbg_reset;
 
-    ResetResponsePort<ScxEvsCortexA76> model_reset;
+    SignalSinkPort<bool> model_reset;
 
     CortexA76Cluster *gem5CpuCluster;
 
@@ -129,8 +129,6 @@ class ScxEvsCortexA76 : public Types::Base, public Iris::BaseCpuEvs
     void setCluster(SimObject *cluster) override;
 
     void setResetAddr(int core, Addr addr, bool secure) override;
-
-    void requestReset();
 };
 
 struct ScxEvsCortexA76x1Types
diff --git a/src/arch/arm/fastmodel/CortexR52/evs.cc b/src/arch/arm/fastmodel/CortexR52/evs.cc
index 734323e026..0ad3f18412 100644
--- a/src/arch/arm/fastmodel/CortexR52/evs.cc
+++ b/src/arch/arm/fastmodel/CortexR52/evs.cc
@@ -101,9 +101,19 @@ ScxEvsCortexR52<Types>::ScxEvsCortexR52(
     ext_slave(Base::ext_slave, p.name + ".ext_slave", -1),
     top_reset(p.name + ".top_reset", 0),
     dbg_reset(p.name + ".dbg_reset", 0),
-    model_reset(p.name + ".model_reset", -1, this),
+    model_reset(p.name + ".model_reset"),
     params(p)
 {
+    model_reset.onChange([this](const bool &new_val) {
+        // Set reset for all cores.
+        for (auto &core_pin : corePins)
+            core_pin->poweron_reset.signal_out.set_state(0, new_val);
+        // Set reset for L2 system.
+        top_reset.signal_out.set_state(0, new_val);
+        // Set reset for debug APB.
+        dbg_reset.signal_out.set_state(0, new_val);
+    });
+
     for (int i = 0; i < CoreCount; i++)
         corePins.emplace_back(new CorePins(this, i));
 
diff --git a/src/arch/arm/fastmodel/CortexR52/evs.hh b/src/arch/arm/fastmodel/CortexR52/evs.hh
index 02ef1ae257..9cebec3846 100644
--- a/src/arch/arm/fastmodel/CortexR52/evs.hh
+++ b/src/arch/arm/fastmodel/CortexR52/evs.hh
@@ -37,7 +37,6 @@
 #include "arch/arm/fastmodel/protocol/exported_clock_rate_control.hh"
 #include "arch/arm/fastmodel/protocol/signal_interrupt.hh"
 #include "dev/intpin.hh"
-#include "dev/reset_port.hh"
 #include "mem/port_proxy.hh"
 #include "params/FastModelScxEvsCortexR52x1.hh"
 #include "params/FastModelScxEvsCortexR52x2.hh"
@@ -47,6 +46,7 @@
 #include "scx_evs_CortexR52x2.h"
 #include "scx_evs_CortexR52x3.h"
 #include "scx_evs_CortexR52x4.h"
+#include "sim/signal.hh"
 #include "systemc/ext/core/sc_event.hh"
 #include "systemc/ext/core/sc_module.hh"
 #include "systemc/tlm_port_wrapper.hh"
@@ -127,7 +127,7 @@ class ScxEvsCortexR52 : public Types::Base, public Iris::BaseCpuEvs
 
     SignalSender dbg_reset;
 
-    ResetResponsePort<ScxEvsCortexR52> model_reset;
+    SignalSinkPort<bool> model_reset;
 
     CortexR52Cluster *gem5CpuCluster;
 
@@ -149,22 +149,6 @@ class ScxEvsCortexR52 : public Types::Base, public Iris::BaseCpuEvs
         this->signalInterrupt->spi(num, false);
     }
 
-    void
-    requestReset()
-    {
-        // Reset all cores.
-        for (auto &core_pin : corePins) {
-            core_pin->poweron_reset.signal_out.set_state(0, true);
-            core_pin->poweron_reset.signal_out.set_state(0, false);
-        }
-        // Reset L2 system.
-        this->top_reset.signal_out.set_state(0, true);
-        this->top_reset.signal_out.set_state(0, false);
-        // Reset debug APB.
-        this->dbg_reset.signal_out.set_state(0, true);
-        this->dbg_reset.signal_out.set_state(0, false);
-    }
-
     Port &gem5_getPort(const std::string &if_name, int idx) override;
 
     void
diff --git a/src/arch/arm/fastmodel/reset_controller/example.cc b/src/arch/arm/fastmodel/reset_controller/example.cc
index 33769acb30..04dfa3bf10 100644
--- a/src/arch/arm/fastmodel/reset_controller/example.cc
+++ b/src/arch/arm/fastmodel/reset_controller/example.cc
@@ -37,8 +37,8 @@ namespace fastmodel
 {
 
 ResetControllerExample::CorePins::CorePins(const std::string &module_name)
-    : reset(module_name + ".reset", 0, this),
-      halt(module_name + ".halt", 0, this)
+    : reset(module_name + ".reset"),
+      halt(module_name + ".halt")
 {}
 
 ResetControllerExample::Registers::Registers(
@@ -65,22 +65,14 @@ ResetControllerExample::Registers::Registers(
           {
               panic_if(!pins->reset.isConnected(),
                        "%s is not connected.", pins->reset.name());
-
-              if (val)
-                  pins->reset.raise();
-              else
-                  pins->reset.lower();
+              pins->reset.set(val);
           });
       halt.writer(
           [this] (auto &reg, auto val)
           {
               panic_if(!pins->halt.isConnected(),
                        "%s is not connected.", pins->halt.name());
-
-              if (val)
-                  pins->halt.raise();
-              else
-                  pins->halt.lower();
+              pins->halt.set(val);
           });
 
       addRegisters({
diff --git a/src/arch/arm/fastmodel/reset_controller/example.hh b/src/arch/arm/fastmodel/reset_controller/example.hh
index 2805d6f077..af663236d5 100644
--- a/src/arch/arm/fastmodel/reset_controller/example.hh
+++ b/src/arch/arm/fastmodel/reset_controller/example.hh
@@ -31,11 +31,11 @@
 #include <string>
 
 #include "arch/arm/fastmodel/iris/cpu.hh"
-#include "dev/intpin.hh"
 #include "dev/io_device.hh"
 #include "dev/reg_bank.hh"
 #include "mem/packet_access.hh"
 #include "params/FastModelResetControllerExample.hh"
+#include "sim/signal.hh"
 
 namespace gem5
 {
@@ -48,9 +48,8 @@ class ResetControllerExample : public BasicPioDevice
   private:
     struct CorePins
     {
-        using CoreInt = IntSourcePin<CorePins>;
-        CoreInt reset;
-        CoreInt halt;
+        SignalSourcePort<bool> reset;
+        SignalSourcePort<bool> halt;
 
         explicit CorePins(const std::string &);
     };
diff --git a/src/dev/SConscript b/src/dev/SConscript
index d991ed53a9..a7714a22d7 100644
--- a/src/dev/SConscript
+++ b/src/dev/SConscript
@@ -36,9 +36,7 @@ Source('dma_device.cc')
 Source('dma_virt_device.cc')
 
 SimObject('IntPin.py', sim_objects=[])
-
 SimObject('ResetPort.py', sim_objects=[])
-Source('reset_port.cc')
 
 DebugFlag('IsaFake')
 DebugFlag('DMA')
diff --git a/src/dev/reset_port.cc b/src/dev/reset_port.cc
deleted file mode 100644
index 8d32c7d93c..0000000000
--- a/src/dev/reset_port.cc
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Copyright 2022 Google, Inc.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met: redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer;
- * redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution;
- * neither the name of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "dev/reset_port.hh"
-
-#include "base/logging.hh"
-
-namespace gem5
-{
-
-void
-ResetRequestPort::bind(Port &p)
-{
-    peer = dynamic_cast<ResetResponsePortBase*>(&p);
-    fatal_if(peer == nullptr, "Attempt to bind reset request port %s to "
-            "incompatible port %s.", name(), p.name());
-    Port::bind(p);
-}
-
-void
-ResetRequestPort::unbind()
-{
-    peer = nullptr;
-    Port::unbind();
-}
-
-void
-ResetRequestPort::requestReset()
-{
-    peer->requestReset();
-}
-
-} // namespace gem5
diff --git a/src/dev/reset_port.hh b/src/dev/reset_port.hh
deleted file mode 100644
index a08db1ca8e..0000000000
--- a/src/dev/reset_port.hh
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * Copyright 2022 Google, Inc.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met: redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer;
- * redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution;
- * neither the name of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef __DEV_RESET_PORT_HH__
-#define __DEV_RESET_PORT_HH__
-
-#include "sim/port.hh"
-
-#include <string>
-
-namespace gem5
-{
-
-class ResetResponsePortBase : public Port
-{
-  public:
-    using Port::Port;
-    virtual void requestReset() = 0;
-};
-
-template <class Device>
-class ResetResponsePort : public ResetResponsePortBase
-{
-  public:
-    ResetResponsePort(const std::string &name, PortID id, Device *dev) :
-        ResetResponsePortBase(name, id), device(dev) {}
-    void requestReset() override { device->requestReset(); }
-
-  private:
-    Device *device = nullptr;
-};
-
-class ResetRequestPort : public Port
-{
-  public:
-    ResetRequestPort(const std::string &_name, PortID _id)
-        : Port(_name, _id) {}
-    void bind(Port &p) override;
-    void unbind() override;
-    void requestReset();
-
-  private:
-    ResetResponsePortBase *peer = nullptr;
-};
-
-} // namespace gem5
-
-#endif // __DEV_RESET_PORT_HH__

From af2cecf59e9cffbbc96bb88b9137da8ef6c74410 Mon Sep 17 00:00:00 2001
From: Matthew Poremba <matthew.poremba@amd.com>
Date: Thu, 15 Dec 2022 11:43:01 -0800
Subject: [PATCH 063/492] gpu-compute: Fix ABI init for DispatchId

DispatchId should allocate two SGPRs instead of one. Allocating one was
causing all subsequent SGPR index values to be off by one, leading to
bad addresses for things like flat scratch and private segment. This
field is not used very often so it was not impacting most applications.

Change-Id: I17744e2d099fbc0447f400211ba7f8a42675ea06
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/66711
Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com>
Maintainer: Matt Sinclair <mattdsinclair@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/gpu-compute/wavefront.cc | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/src/gpu-compute/wavefront.cc b/src/gpu-compute/wavefront.cc
index 7e4b36f7e5..8a1adfe802 100644
--- a/src/gpu-compute/wavefront.cc
+++ b/src/gpu-compute/wavefront.cc
@@ -118,8 +118,10 @@ Wavefront::initRegState(HSAQueueEntry *task, int wgSizeInWorkItems)
 {
     int regInitIdx = 0;
 
-    // iterate over all the init fields and check which
-    // bits are enabled
+    // Iterate over all the init fields and check which
+    // bits are enabled. Useful information can be found here:
+    // https://github.com/ROCm-Developer-Tools/ROCm-ComputeABI-Doc/
+    //                    blob/master/AMDGPU-ABI.md
     for (int en_bit = 0; en_bit < NumScalarInitFields; ++en_bit) {
 
         if (task->sgprBitEnabled(en_bit)) {
@@ -263,6 +265,12 @@ Wavefront::initRegState(HSAQueueEntry *task, int wgSizeInWorkItems)
                         computeUnit->cu_id, simdId,
                         wfSlotId, wfDynId, physSgprIdx,
                         task->dispatchId());
+
+                // Dispatch ID in gem5 is an int. Set upper 32-bits to zero.
+                physSgprIdx
+                    = computeUnit->registerManager->mapSgpr(this, regInitIdx);
+                computeUnit->srf[simdId]->write(physSgprIdx, 0);
+                ++regInitIdx;
                 break;
               case FlatScratchInit:
                 physSgprIdx

From 4cae2ae4adf202bd6fa520b6e6912088c8694dd9 Mon Sep 17 00:00:00 2001
From: Melissa Jost <melissakjost@gmail.com>
Date: Fri, 9 Dec 2022 16:17:25 -0800
Subject: [PATCH 064/492] tests: Remove get_runtime_isa() from
 parsec_disk_run.py

This change removes the call to get_runtime_isa(), as it has
been deprecated.

Change-Id: Ie1b0b5fb456fd8ed504a531841fe4ea8e211502c
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/66612
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
---
 tests/gem5/configs/parsec_disk_run.py | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/tests/gem5/configs/parsec_disk_run.py b/tests/gem5/configs/parsec_disk_run.py
index 4275ffbd3f..fbe1cd3688 100644
--- a/tests/gem5/configs/parsec_disk_run.py
+++ b/tests/gem5/configs/parsec_disk_run.py
@@ -214,11 +214,6 @@ board.set_kernel_disk_workload(
     readfile_contents=command,
 )
 
-print("Running with ISA: " + get_runtime_isa().name)
-print("Running with protocol: " + get_runtime_coherence_protocol().name)
-print()
-
-
 # Here we define some custom workbegin/workend exit event generators. Here we
 # want to switch to detailed CPUs at the beginning of the ROI, then continue to
 # the end of of the ROI. Then we exit the simulation.

From 06f18242fedb67280c425e9e7b533326445580b1 Mon Sep 17 00:00:00 2001
From: "Bobby R. Bruce" <bbruce@ucdavis.edu>
Date: Sat, 17 Dec 2022 03:11:53 -0800
Subject: [PATCH 065/492] tests: Fix compiler-tests.sh build args passing

Reverts this fix:
https://gem5-review.googlesource.com/c/public/gem5/+/66631

While this did fix the case where no build args were passed, it broke
the case where build args were passed.

This fix ensures the script works in both cases.

Change-Id: I6cc8cc0c2a10c801d4a59e54b070383ac8ee93ae
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/66772
Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com>
Maintainer: Matt Sinclair <mattdsinclair@gmail.com>
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 tests/compiler-tests.sh | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/tests/compiler-tests.sh b/tests/compiler-tests.sh
index f16e8e5fdb..f5d4bb189f 100755
--- a/tests/compiler-tests.sh
+++ b/tests/compiler-tests.sh
@@ -76,7 +76,13 @@ builds_per_compiler=1
 base_url="gcr.io/gem5-test"
 
 # Arguments passed into scons on every build target test.
-build_args="$@"
+if [ $# -eq 0 ];then
+    # If none is sepcified by the user we pass "-j1" (compile on one thread).
+    # If `build_args` is left as an empty string, this script will fail.
+    build_args="-j1"
+else
+    build_args="$@"
+fi
 
 # Testing directory variables
 mkdir -p "${build_dir}" # Create the build directory if it doesn't exist.
@@ -135,7 +141,7 @@ for compiler in ${images[@]}; do
                 docker run --rm -v "${gem5_root}":"/gem5" -u $UID:$GID \
                     -w /gem5 --memory="${docker_mem_limit}" $repo_name \
                     /usr/bin/env python3 /usr/bin/scons --ignore-style \
-                    "${build_out} ${build_args}"
+                    "${build_out}" "${build_args}"
             }>"${build_stdout}" 2>"${build_stderr}"
             result=$?
 

From 7fb2fda841be0d61b00569c5a456fcdc0ab75bb1 Mon Sep 17 00:00:00 2001
From: "Daniel R. Carvalho" <odanrc@yahoo.com.br>
Date: Sun, 18 Dec 2022 10:33:10 -0300
Subject: [PATCH 066/492] base: Fix signature of SatCounter::saturate()

The variants that use more than 8 bits were broken,
since the size of the difference in those cases
could be larger than 8 bits, and the return value
was only 8-bits long.

Change-Id: I8b75be48f924cc33ebf5e5aeff6d4045fac66bcc
Signed-off-by: Daniel R. Carvalho <odanrc@yahoo.com.br>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/66791
Maintainer: Matt Sinclair <mattdsinclair@gmail.com>
Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/base/sat_counter.hh      |  4 ++--
 src/base/sat_counter.test.cc | 14 ++++++++++++++
 2 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/src/base/sat_counter.hh b/src/base/sat_counter.hh
index a607c4ca85..ecb8df8b49 100644
--- a/src/base/sat_counter.hh
+++ b/src/base/sat_counter.hh
@@ -318,9 +318,9 @@ class GenericSatCounter
      *
      * @ingroup api_sat_counter
      */
-    uint8_t saturate()
+    T saturate()
     {
-        const uint8_t diff = maxVal - counter;
+        const T diff = maxVal - counter;
         counter = maxVal;
         return diff;
     }
diff --git a/src/base/sat_counter.test.cc b/src/base/sat_counter.test.cc
index 07a01c7279..0a6459c23f 100644
--- a/src/base/sat_counter.test.cc
+++ b/src/base/sat_counter.test.cc
@@ -149,6 +149,20 @@ TEST(SatCounterTest, Saturate)
     ASSERT_TRUE(counter.isSaturated());
 }
 
+TEST(SatCounterTest, Saturate16)
+{
+    const unsigned bits = 14;
+    const unsigned max_value = (1 << bits) - 1;
+    SatCounter16 counter(bits);
+    counter++;
+    ASSERT_FALSE(counter.isSaturated());
+
+    // Make sure the value added is what was missing to saturate
+    const unsigned diff = counter.saturate();
+    ASSERT_EQ(diff, max_value - 1);
+    ASSERT_TRUE(counter.isSaturated());
+}
+
 /**
  * Test back and forth against an int.
  */

From 5447d55e398746df7b038da447048e0ce47c8460 Mon Sep 17 00:00:00 2001
From: Giacomo Travaglini <giacomo.travaglini@arm.com>
Date: Wed, 21 Dec 2022 09:00:30 +0000
Subject: [PATCH 067/492] dev: Fix -Wunused-variable in structured binding

Change-Id: Ia244767dd9d1dd7b72c320fb78e48f206694f5a2
Signed-off-by: Giacomo Travaglini <giacomo.travaglini@arm.com>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/66891
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Yu-hsin Wang <yuhsingw@google.com>
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
---
 src/dev/reg_bank.hh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/dev/reg_bank.hh b/src/dev/reg_bank.hh
index 66d668b338..32d9058a15 100644
--- a/src/dev/reg_bank.hh
+++ b/src/dev/reg_bank.hh
@@ -1018,8 +1018,8 @@ class RegisterBank : public RegisterBankBase
     virtual void
     reset()
     {
-        for (auto &[offset, reg]: _offsetMap)
-            reg.get().reset();
+        for (auto &it: _offsetMap)
+            it.second.get().reset();
     }
 };
 

From 6797c78942ade19c6863cbace528c84d913775aa Mon Sep 17 00:00:00 2001
From: Roger Chang <rogerycchang@google.com>
Date: Fri, 16 Dec 2022 13:27:14 +0800
Subject: [PATCH 068/492] arch-riscv: Refactor compressed instructions

1. C.JAL should use CJOp format to generate code
2. Use sext function to handle MSB for immediate
3. Add IsCall flags to c.jal, c.jalr
4. Use JumpConstructor to CJOp format

Change-Id: Id01c0d7cc1a3e17776890268879c568fc9996bc5
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/66732
Reviewed-by: Yu-hsin Wang <yuhsingw@google.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/arch/riscv/isa/decoder.isa            | 136 +++++++++-------------
 src/arch/riscv/isa/formats/compressed.isa |  30 +++--
 2 files changed, 72 insertions(+), 94 deletions(-)

diff --git a/src/arch/riscv/isa/decoder.isa b/src/arch/riscv/isa/decoder.isa
index 458327e5ec..c0703927b4 100644
--- a/src/arch/riscv/isa/decoder.isa
+++ b/src/arch/riscv/isa/decoder.isa
@@ -47,7 +47,7 @@ decode QUADRANT default Unknown::unknown() {
                   CIMM8<7:6> << 4 |
                   CIMM8<5:2> << 6;
         }}, {{
-            if (machInst == 0)
+            if (imm == 0)
                 return std::make_shared<IllegalInstFault>("zero instruction",
                                                            machInst);
             Rp2 = rvSext(sp + imm);
@@ -147,91 +147,71 @@ decode QUADRANT default Unknown::unknown() {
         }
     }
     0x1: decode COPCODE {
-        format CIOp {
-            0x0: c_addi({{
-                imm = CIMM5;
-                if (CIMM1 > 0)
-                    imm |= ~((uint64_t)0x1F);
-            }}, {{
-                if ((RC1 == 0) != (imm == 0)) {
-                    if (RC1 == 0) {
-                        return std::make_shared<IllegalInstFault>(
-                                "source reg x0", machInst);
-                    } else { // imm == 0
-                        return std::make_shared<IllegalInstFault>(
-                                "immediate = 0", machInst);
-                    }
+        0x0: CIOp::c_addi({{
+            imm = sext<6>(CIMM5 | (CIMM1 << 5));
+        }}, {{
+            if ((RC1 == 0) != (imm == 0)) {
+                if (RC1 == 0) {
+                    return std::make_shared<IllegalInstFault>(
+                            "source reg x0", machInst);
+                } else { // imm == 0
+                    return std::make_shared<IllegalInstFault>(
+                            "immediate = 0", machInst);
                 }
-                Rc1_sd = rvSext(Rc1_sd + imm);
-            }});
-            0x1: decode RVTYPE {
-                0x0: c_jal({{
-                    imm = sext<12>((CJUMPIMM3TO1 << 1) |
-                                   (CJUMPIMM4TO4 << 4) |
-                                   (CJUMPIMM5TO5 << 5) |
-                                   (CJUMPIMM6TO6 << 6) |
-                                   (CJUMPIMM7TO7 << 7) |
-                                   (CJUMPIMM9TO8 << 8) |
-                                   (CJUMPIMM10TO10 << 10) |
-                                   (CJUMPIMMSIGN << 11));
-                }}, {{
-                    ra_sw = NPC_uw;
-                    NPC_uw = PC_uw + imm;
-                }});
-                0x1: c_addiw({{
-                    imm = CIMM5;
-                    if (CIMM1 > 0)
-                        imm |= ~((uint64_t)0x1F);
-                }}, {{
-                    if (RC1 == 0) {
-                        return std::make_shared<IllegalInstFault>(
-                                "source reg x0", machInst);
-                    }
-                    Rc1_sw = (int32_t)(Rc1_sw + imm);
-                }});
             }
-            0x2: c_li({{
-                imm = CIMM5;
-                if (CIMM1 > 0)
-                    imm |= ~((uint64_t)0x1F);
+            Rc1_sd = rvSext(Rc1_sd + imm);
+        }});
+        0x1: decode RVTYPE {
+            0x0: CJOp::c_jal({{
+                ra_sw = NPC_uw;
+                NPC_uw = PC_uw + imm;
+            }}, IsDirectControl, IsUncondControl, IsCall);
+            0x1: CIOp::c_addiw({{
+                imm = sext<6>(CIMM5 | (CIMM1 << 5));
             }}, {{
                 if (RC1 == 0) {
                     return std::make_shared<IllegalInstFault>(
                             "source reg x0", machInst);
                 }
+                Rc1_sw = (int32_t)(Rc1_sw + imm);
+            }});
+        }
+        0x2: CIOp::c_li({{
+            imm = sext<6>(CIMM5 | (CIMM1 << 5));
+        }}, {{
+            if (RC1 == 0) {
+                return std::make_shared<IllegalInstFault>(
+                        "source reg x0", machInst);
+            }
+            Rc1_sd = imm;
+        }});
+        0x3: decode RC1 {
+            0x2: CIOp::c_addi16sp({{
+                imm = sext<10>((CIMM5<4:4> << 4) |
+                               (CIMM5<0:0> << 5) |
+                               (CIMM5<3:3> << 6) |
+                               (CIMM5<2:1> << 7) |
+                               (CIMM1 << 9));
+            }}, {{
+                if (imm == 0) {
+                    return std::make_shared<IllegalInstFault>(
+                            "immediate = 0", machInst);
+                }
+                sp_sd = rvSext(sp_sd + imm);
+            }});
+            default: CIOp::c_lui({{
+                imm = sext<6>(CIMM5 | (CIMM1 << 5)) << 12;
+            }}, {{
+                if (RC1 == 0 || RC1 == 2) {
+                    return std::make_shared<IllegalInstFault>(
+                            "source reg x0", machInst);
+                }
+                if (imm == 0) {
+                    return std::make_shared<IllegalInstFault>(
+                            "immediate = 0", machInst);
+                }
                 Rc1_sd = imm;
             }});
-            0x3: decode RC1 {
-                0x2: c_addi16sp({{
-                    imm = CIMM5<4:4> << 4 |
-                          CIMM5<0:0> << 5 |
-                          CIMM5<3:3> << 6 |
-                          CIMM5<2:1> << 7;
-                    if (CIMM1 > 0)
-                        imm |= ~((int64_t)0x1FF);
-                }}, {{
-                    if (imm == 0) {
-                        return std::make_shared<IllegalInstFault>(
-                                "immediate = 0", machInst);
-                    }
-                    sp_sd = rvSext(sp_sd + imm);
-                }});
-                default: c_lui({{
-                    imm = CIMM5 << 12;
-                    if (CIMM1 > 0)
-                        imm |= ~((uint64_t)0x1FFFF);
-                }}, {{
-                    if (RC1 == 0 || RC1 == 2) {
-                        return std::make_shared<IllegalInstFault>(
-                                "source reg x0", machInst);
-                    }
-                    if (imm == 0) {
-                        return std::make_shared<IllegalInstFault>(
-                                "immediate = 0", machInst);
-                    }
-                    Rc1_sd = imm;
-                }});
-            }
         }
         0x4: decode CFUNCT2HIGH {
             format CIOp {
@@ -418,7 +398,7 @@ decode QUADRANT default Unknown::unknown() {
                         }
                         ra = rvSext(NPC);
                         NPC = rvZext(Rc1);
-                    }}, IsIndirectControl, IsUncondControl);
+                    }}, IsIndirectControl, IsUncondControl, IsCall);
                     default: CompressedROp::c_add({{
                         Rc1_sd = rvSext(Rc1_sd + Rc2_sd);
                     }});
diff --git a/src/arch/riscv/isa/formats/compressed.isa b/src/arch/riscv/isa/formats/compressed.isa
index d09865803e..3d89ec38a6 100644
--- a/src/arch/riscv/isa/formats/compressed.isa
+++ b/src/arch/riscv/isa/formats/compressed.isa
@@ -61,33 +61,31 @@ def format CIOp(imm_code, code, imm_type='int64_t', *opt_flags) {{
 
 def format CJOp(code, *opt_flags) {{
     imm_code = """
-           imm =             CJUMPIMM3TO1 << 1 |
-                             CJUMPIMM4TO4 << 4 |
-                             CJUMPIMM5TO5 << 5 |
-                             CJUMPIMM6TO6 << 6 |
-                             CJUMPIMM7TO7 << 7 |
-                             CJUMPIMM9TO8 << 8 |
-                             CJUMPIMM10TO10 << 10;
-            if (CJUMPIMMSIGN)
-                imm |= ~((int64_t)0x7FF);
+            imm = sext<12>((CJUMPIMM3TO1 << 1) |
+                           (CJUMPIMM4TO4 << 4) |
+                           (CJUMPIMM5TO5 << 5) |
+                           (CJUMPIMM6TO6 << 6) |
+                           (CJUMPIMM7TO7 << 7) |
+                           (CJUMPIMM9TO8 << 8) |
+                           (CJUMPIMM10TO10 << 10) |
+                           (CJUMPIMMSIGN << 11));
     """
     iop = InstObjParams(name, Name, 'ImmOp<int64_t>',
         {'code': code, 'imm_code': imm_code,
          'regs': ''}, opt_flags)
     header_output = BranchDeclare.subst(iop)
-    decoder_output = ImmConstructor.subst(iop)
+    decoder_output = JumpConstructor.subst(iop)
     decode_block = BasicDecode.subst(iop)
     exec_output = BranchExecute.subst(iop)
 }};
 
 def format CBOp(code, *opt_flags) {{
     imm_code = """
-                imm = CIMM5<2:1> << 1 |
-                      CIMM3<1:0> << 3 |
-                      CIMM5<0:0> << 5 |
-                      CIMM5<4:3> << 6;
-                if (CIMM3<2:2> > 0)
-                    imm |= ~((int64_t)0xFF);
+                imm = sext<9>((CIMM5<2:1> << 1) |
+                              (CIMM3<1:0> << 3) |
+                              (CIMM5<0:0> << 5) |
+                              (CIMM5<4:3> << 6) |
+                              (CIMM3<2:2> << 8));
                """
     regs = 'srcRegIdx(0)'
     iop = InstObjParams(name, Name, 'ImmOp<int64_t>',

From 9ce8c9b81c046328ced0c3b9e41789c593b4bf94 Mon Sep 17 00:00:00 2001
From: Roger Chang <rogerycchang@google.com>
Date: Mon, 19 Dec 2022 14:08:22 +0800
Subject: [PATCH 069/492] arch-riscv: Refactor template JumpConstructor

Add COPCODE == 4 condition to ensure the available instruction is either c_jr or c_jalr and the flag IsReturn should set for instruction c_jalr if RC1 == t0

Change-Id: I1b39a6c1dc52c8035f16cc64a1b4c494b14879c0
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/66811
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Yu-hsin Wang <yuhsingw@google.com>
---
 src/arch/riscv/isa/formats/standard.isa | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/src/arch/riscv/isa/formats/standard.isa b/src/arch/riscv/isa/formats/standard.isa
index 6be281fa56..bb500f5f49 100644
--- a/src/arch/riscv/isa/formats/standard.isa
+++ b/src/arch/riscv/isa/formats/standard.isa
@@ -250,9 +250,14 @@ def template JumpConstructor {{
         %(constructor)s;
         %(imm_code)s;
         if (QUADRANT != 0x3) {
-            // Handle "c_jr" instruction, set "IsReturn" flag if RC1 is 1 or 5
-            if (CFUNCT1 == 0 && (RC1 == 1 || RC1 == 5))
-                flags[IsReturn] = true;
+            if (COPCODE == 4) {
+                // Handle "c_jr" instruction, set "IsReturn" flag if RC1 is 1 or 5
+                if (CFUNCT1 == 0 && (RC1 == 1 || RC1 == 5))
+                    flags[IsReturn] = true;
+                // Handle "c_jalr" instruction, set IsReturn if RC1 != ra
+                if (CFUNCT1 == 1 && RC1 == 5)
+                    flags[IsReturn] = true;
+            }
         } else {
             bool rd_link = (RD == 1 || RD == 5);
             bool rs1_link = (RS1 == 1 || RS1 == 5);

From 9cd61d000a33a91f679d4c47f45b89c4b68592d4 Mon Sep 17 00:00:00 2001
From: Roger Chang <rogerycchang@google.com>
Date: Fri, 23 Dec 2022 10:05:46 +0800
Subject: [PATCH 070/492] arch-riscv: Correct the IllegalInstFault messege of
 instruction c.addi4spn

In Riscv Manual Volumn I: Unpriviledged ISA section 18.5, c.addi4spn
will not working if imm == 0, not machInst == 0. It is changed in the
https://gem5-review.git.corp.google.com/c/public/gem5/+/66732, and here is the additional patch to the CL.

Change-Id: I2a3c9660dc43f1399f68e03c4f59207f869807a0
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/66931
Reviewed-by: Yu-hsin Wang <yuhsingw@google.com>
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/arch/riscv/isa/decoder.isa | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/arch/riscv/isa/decoder.isa b/src/arch/riscv/isa/decoder.isa
index c0703927b4..53d4a4d558 100644
--- a/src/arch/riscv/isa/decoder.isa
+++ b/src/arch/riscv/isa/decoder.isa
@@ -48,7 +48,7 @@ decode QUADRANT default Unknown::unknown() {
                   CIMM8<5:2> << 6;
         }}, {{
             if (imm == 0)
-                return std::make_shared<IllegalInstFault>("zero instruction",
+                return std::make_shared<IllegalInstFault>("immediate = 0",
                                                            machInst);
             Rp2 = rvSext(sp + imm);
         }}, uint64_t);

From 8d117aad71283601c6ceb7145817df1557c3bdfd Mon Sep 17 00:00:00 2001
From: Giacomo Travaglini <giacomo.travaglini@arm.com>
Date: Mon, 19 Dec 2022 15:56:49 +0000
Subject: [PATCH 071/492] util: cxxConfigInit has been removed by gem5

This was merged in [1]

[1]: https://gem5-review.googlesource.com/c/public/gem5/+/49455

Change-Id: Iba558dd01d5c8fbc05e4d3a106a3e3ff6b696333
Signed-off-by: Giacomo Travaglini <giacomo.travaglini@arm.com>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/66851
Reviewed-by: Daniel Carvalho <odanrc@yahoo.com.br>
Maintainer: Daniel Carvalho <odanrc@yahoo.com.br>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Matthias Jung <jungma@eit.uni-kl.de>
---
 util/tlm/src/sim_control.cc | 2 --
 1 file changed, 2 deletions(-)

diff --git a/util/tlm/src/sim_control.cc b/util/tlm/src/sim_control.cc
index a8a3da4a3f..834cfe064e 100644
--- a/util/tlm/src/sim_control.cc
+++ b/util/tlm/src/sim_control.cc
@@ -72,8 +72,6 @@ Gem5SimControl::Gem5SimControl(sc_core::sc_module_name name,
     }
     instance = this;
 
-    gem5::cxxConfigInit();
-
     // register the systemc slave and master port handler
     gem5::ExternalSlave::registerHandler("tlm_slave",
         new SCSlavePortHandler(*this));

From 25b4defa6aff3873e283c54615cda214786b5db7 Mon Sep 17 00:00:00 2001
From: Giacomo Travaglini <giacomo.travaglini@arm.com>
Date: Mon, 19 Dec 2022 16:00:34 +0000
Subject: [PATCH 072/492] util: Fix missing include of sim/core.hh in util-tlm

Change-Id: I6dbf71dac903a660369bf8b33ae0c88d28d07457
Signed-off-by: Giacomo Travaglini <giacomo.travaglini@arm.com>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/66852
Reviewed-by: Matthias Jung <jungma@eit.uni-kl.de>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Daniel Carvalho <odanrc@yahoo.com.br>
Maintainer: Daniel Carvalho <odanrc@yahoo.com.br>
---
 util/tlm/src/sc_master_port.cc | 1 +
 1 file changed, 1 insertion(+)

diff --git a/util/tlm/src/sc_master_port.cc b/util/tlm/src/sc_master_port.cc
index 2e1082818c..c0bb6d5f5f 100644
--- a/util/tlm/src/sc_master_port.cc
+++ b/util/tlm/src/sc_master_port.cc
@@ -36,6 +36,7 @@
 #include "params/ExternalMaster.hh"
 #include "sc_ext.hh"
 #include "sc_master_port.hh"
+#include "sim/core.hh"
 #include "sim/system.hh"
 
 namespace Gem5SystemC

From 55fb8bf40e19c63ebbc4fa8c0a04bce838514c7a Mon Sep 17 00:00:00 2001
From: Giacomo Travaglini <giacomo.travaglini@arm.com>
Date: Mon, 19 Dec 2022 16:06:13 +0000
Subject: [PATCH 073/492] util: Update util-tlm to require C++17

It's the version we currently use to compile gem5

Change-Id: I5d2d26e5ba32191d65a4a5ae58d29a16970d062d
Signed-off-by: Giacomo Travaglini <giacomo.travaglini@arm.com>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/66853
Maintainer: Daniel Carvalho <odanrc@yahoo.com.br>
Reviewed-by: Daniel Carvalho <odanrc@yahoo.com.br>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 util/tlm/SConstruct | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/util/tlm/SConstruct b/util/tlm/SConstruct
index f1e057db70..1a9a79f402 100644
--- a/util/tlm/SConstruct
+++ b/util/tlm/SConstruct
@@ -51,11 +51,12 @@ shlibsuffix = env['SHLIBSUFFIX']
 env.Append(CPPPATH=[gem5_root + '/build/' + gem5_arch,
                     gem5_root + '/util/systemc/gem5_within_systemc',
                     gem5_root + '/ext/systemc/src',
+                    gem5_root + '/ext',
                     '#src',
                     '#examples/common',
                     ])
 
-env.Append(CXXFLAGS=['-std=c++14',
+env.Append(CXXFLAGS=['-std=c++17',
                      '-DSC_INCLUDE_DYNAMIC_PROCESSES',
                      '-DTRACING_ON',
                      ])

From fcde59b24525056bea598278ee8d496182ede7fd Mon Sep 17 00:00:00 2001
From: Giacomo Travaglini <giacomo.travaglini@arm.com>
Date: Mon, 19 Dec 2022 17:09:28 +0000
Subject: [PATCH 074/492] util: ext/systemc is importing env Environment
 instead of main

This got changed by [1]
With this patch we export env instead of main. There is no risk of
ext/systemc polluting the environment as its SConscript is cloning env
to the systemc variable anyway, so this double copy was redundant anyway

[1]: https://gem5-review.googlesource.com/c/public/gem5/+/56750

Change-Id: Ib6648e9b38416cac0bc7f06d90a337f32bdca6ca
Signed-off-by: Giacomo Travaglini <giacomo.travaglini@arm.com>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/66854
Maintainer: Daniel Carvalho <odanrc@yahoo.com.br>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Daniel Carvalho <odanrc@yahoo.com.br>
---
 util/tlm/SConstruct | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/util/tlm/SConstruct b/util/tlm/SConstruct
index 1a9a79f402..7fe09d446d 100644
--- a/util/tlm/SConstruct
+++ b/util/tlm/SConstruct
@@ -74,15 +74,14 @@ deps += SConscript('examples/common/SConscript',
 
 # the SystemC SConscript makes certain assumptions, we need to fulfill these
 # assumptions before calling the SConscript.
-main = env
 sys.path.append(gem5_root + '/src/python')
 AddOption('--no-colors', dest='use_colors', action='store_false',
           help="Don't add color to abbreviated scons output")
 
-main.SConsignFile('build/systemc/sconsign')
+env.SConsignFile('build/systemc/sconsign')
 SConscript(gem5_root + '/ext/systemc/SConscript',
            variant_dir='build/systemc',
-           exports='main')
+           exports='env')
 
 # By adding libraries as dependencies instead of using LIBS, we avoid that
 # the user needs to set the LD_LIBRARY_PATH

From 083566d0c82d2610b246b0b5ad903766e336ac31 Mon Sep 17 00:00:00 2001
From: Roger Chang <rogerycchang@google.com>
Date: Fri, 9 Dec 2022 10:31:50 +0800
Subject: [PATCH 075/492] arch-riscv: add RV32 ADFIMU_Zfh instruction tests

1. Add rv32 binary files into asmtests
2. Support Riscv CPU with 32 bits register to  simple_binary_run.py

Change-Id: I5cc4c2eeb7654a4acc2d167eb76d8b6522e65dd9
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/65533
Reviewed-by: Yu-hsin Wang <yuhsingw@google.com>
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
Tested-by: kokoro <noreply+kokoro@google.com>
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
---
 tests/gem5/asmtest/tests.py             | 310 +++++++++++++-----------
 tests/gem5/configs/simple_binary_run.py |  61 ++++-
 2 files changed, 211 insertions(+), 160 deletions(-)

diff --git a/tests/gem5/asmtest/tests.py b/tests/gem5/asmtest/tests.py
index b2a5992da0..0ddffb27cf 100644
--- a/tests/gem5/asmtest/tests.py
+++ b/tests/gem5/asmtest/tests.py
@@ -34,156 +34,159 @@ else:
 # The following lists the RISCV binaries. Those commented out presently result
 # in a test failure. This is outlined in the following Jira issue:
 # https://gem5.atlassian.net/browse/GEM5-496
-binaries = (
-    "rv64samt-ps-sysclone_d",
-    "rv64samt-ps-sysfutex1_d",
+binary_configs = (
+    ("rv{}samt-ps-sysclone_d", (64,)),
+    ("rv{}samt-ps-sysfutex1_d", (64,)),
     #    'rv64samt-ps-sysfutex2_d',
-    "rv64samt-ps-sysfutex3_d",
+    ("rv{}samt-ps-sysfutex3_d", (64,)),
     #    'rv64samt-ps-sysfutex_d',
-    "rv64ua-ps-amoadd_d",
-    "rv64ua-ps-amoadd_w",
-    "rv64ua-ps-amoand_d",
-    "rv64ua-ps-amoand_w",
-    "rv64ua-ps-amomax_d",
-    "rv64ua-ps-amomax_w",
-    "rv64ua-ps-amomaxu_d",
-    "rv64ua-ps-amomaxu_w",
-    "rv64ua-ps-amomin_d",
-    "rv64ua-ps-amomin_w",
-    "rv64ua-ps-amominu_d",
-    "rv64ua-ps-amominu_w",
-    "rv64ua-ps-amoor_d",
-    "rv64ua-ps-amoor_w",
-    "rv64ua-ps-amoswap_d",
-    "rv64ua-ps-amoswap_w",
-    "rv64ua-ps-amoxor_d",
-    "rv64ua-ps-amoxor_w",
-    "rv64ua-ps-lrsc",
-    "rv64uamt-ps-amoadd_d",
-    "rv64uamt-ps-amoand_d",
-    "rv64uamt-ps-amomax_d",
-    "rv64uamt-ps-amomaxu_d",
-    "rv64uamt-ps-amomin_d",
-    "rv64uamt-ps-amominu_d",
-    "rv64uamt-ps-amoor_d",
-    "rv64uamt-ps-amoswap_d",
-    "rv64uamt-ps-amoxor_d",
-    "rv64uamt-ps-lrsc_d",
-    "rv64ud-ps-fadd",
-    "rv64ud-ps-fclass",
-    "rv64ud-ps-fcmp",
-    "rv64ud-ps-fcvt",
-    "rv64ud-ps-fcvt_w",
-    "rv64ud-ps-fdiv",
-    "rv64ud-ps-fmadd",
-    "rv64ud-ps-fmin",
-    "rv64ud-ps-ldst",
-    "rv64ud-ps-move",
-    "rv64ud-ps-recoding",
-    "rv64ud-ps-structural",
-    "rv64uf-ps-fadd",
-    "rv64uf-ps-fclass",
-    "rv64uf-ps-fcmp",
-    "rv64uf-ps-fcvt",
-    "rv64uf-ps-fcvt_w",
-    "rv64uf-ps-fdiv",
-    "rv64uf-ps-fmadd",
-    "rv64uf-ps-fmin",
-    "rv64uf-ps-ldst",
-    "rv64uf-ps-move",
-    "rv64uf-ps-recoding",
-    "rv64ui-ps-add",
-    "rv64ui-ps-addi",
-    "rv64ui-ps-addiw",
-    "rv64ui-ps-addw",
-    "rv64ui-ps-and",
-    "rv64ui-ps-andi",
-    "rv64ui-ps-auipc",
-    "rv64ui-ps-beq",
-    "rv64ui-ps-bge",
-    "rv64ui-ps-bgeu",
-    "rv64ui-ps-blt",
-    "rv64ui-ps-bltu",
-    "rv64ui-ps-bne",
-    "rv64ui-ps-fence_i",
-    "rv64ui-ps-jal",
-    "rv64ui-ps-jalr",
-    "rv64ui-ps-lb",
-    "rv64ui-ps-lbu",
-    "rv64ui-ps-ld",
-    "rv64ui-ps-lh",
-    "rv64ui-ps-lhu",
-    "rv64ui-ps-lui",
-    "rv64ui-ps-lw",
-    "rv64ui-ps-lwu",
-    "rv64ui-ps-or",
-    "rv64ui-ps-ori",
-    "rv64ui-ps-sb",
-    "rv64ui-ps-sd",
-    "rv64ui-ps-sh",
-    "rv64ui-ps-simple",
-    "rv64ui-ps-sll",
-    "rv64ui-ps-slli",
-    "rv64ui-ps-slliw",
-    "rv64ui-ps-sllw",
-    "rv64ui-ps-slt",
-    "rv64ui-ps-slti",
-    "rv64ui-ps-sltiu",
-    "rv64ui-ps-sltu",
-    "rv64ui-ps-sra",
-    "rv64ui-ps-srai",
-    "rv64ui-ps-sraiw",
-    "rv64ui-ps-sraw",
-    "rv64ui-ps-srl",
-    "rv64ui-ps-srli",
-    "rv64ui-ps-srliw",
-    "rv64ui-ps-srlw",
-    "rv64ui-ps-sub",
-    "rv64ui-ps-subw",
-    "rv64ui-ps-sw",
-    "rv64ui-ps-xor",
-    "rv64ui-ps-xori",
-    "rv64um-ps-div",
-    "rv64um-ps-divu",
-    "rv64um-ps-divuw",
-    "rv64um-ps-divw",
-    "rv64um-ps-mul",
-    "rv64um-ps-mulh",
-    "rv64um-ps-mulhsu",
-    "rv64um-ps-mulhu",
-    "rv64um-ps-mulw",
-    "rv64um-ps-rem",
-    "rv64um-ps-remu",
-    "rv64um-ps-remuw",
-    "rv64um-ps-remw",
-    "rv64uzfh-ps-fadd",
-    "rv64uzfh-ps-fclass",
-    "rv64uzfh-ps-fcmp",
-    "rv64uzfh-ps-fcvt",
-    "rv64uzfh-ps-fcvt_w",
-    "rv64uzfh-ps-fdiv",
-    "rv64uzfh-ps-fmadd",
-    "rv64uzfh-ps-fmin",
-    "rv64uzfh-ps-ldst",
-    "rv64uzfh-ps-move",
-    "rv64uzfh-ps-recoding",
+    ("rv{}ua-ps-amoadd_d", (64,)),
+    ("rv{}ua-ps-amoadd_w", (32, 64)),
+    ("rv{}ua-ps-amoand_d", (64,)),
+    ("rv{}ua-ps-amoand_w", (32, 64)),
+    ("rv{}ua-ps-amomax_d", (64,)),
+    ("rv{}ua-ps-amomax_w", (32, 64)),
+    ("rv{}ua-ps-amomaxu_d", (64,)),
+    ("rv{}ua-ps-amomaxu_w", (32, 64)),
+    ("rv{}ua-ps-amomin_d", (64,)),
+    ("rv{}ua-ps-amomin_w", (32, 64)),
+    ("rv{}ua-ps-amominu_d", (64,)),
+    ("rv{}ua-ps-amominu_w", (32, 64)),
+    ("rv{}ua-ps-amoor_d", (64,)),
+    ("rv{}ua-ps-amoor_w", (32, 64)),
+    ("rv{}ua-ps-amoswap_d", (64,)),
+    ("rv{}ua-ps-amoswap_w", (32, 64)),
+    ("rv{}ua-ps-amoxor_d", (64,)),
+    ("rv{}ua-ps-amoxor_w", (32, 64)),
+    ("rv{}ua-ps-lrsc", (32, 64)),
+    ("rv{}uamt-ps-amoadd_d", (64,)),
+    ("rv{}uamt-ps-amoand_d", (64,)),
+    ("rv{}uamt-ps-amomax_d", (64,)),
+    ("rv{}uamt-ps-amomaxu_d", (64,)),
+    ("rv{}uamt-ps-amomin_d", (64,)),
+    ("rv{}uamt-ps-amominu_d", (64,)),
+    ("rv{}uamt-ps-amoor_d", (64,)),
+    ("rv{}uamt-ps-amoswap_d", (64,)),
+    ("rv{}uamt-ps-amoxor_d", (64,)),
+    ("rv{}uamt-ps-lrsc_d", (64,)),
+    ("rv{}uamt-ps-amoadd_w", (32,)),
+    ("rv{}uamt-ps-amoand_w", (32,)),
+    ("rv{}uamt-ps-amomax_w", (32,)),
+    ("rv{}uamt-ps-amomaxu_w", (32,)),
+    ("rv{}uamt-ps-amomin_w", (32,)),
+    ("rv{}uamt-ps-amominu_w", (32,)),
+    ("rv{}uamt-ps-amoor_w", (32,)),
+    ("rv{}uamt-ps-amoswap_w", (32,)),
+    ("rv{}uamt-ps-amoxor_w", (32,)),
+    ("rv{}uamt-ps-lrsc_w", (32,)),
+    ("rv{}ud-ps-fadd", (32, 64)),
+    ("rv{}ud-ps-fclass", (32, 64)),
+    ("rv{}ud-ps-fcmp", (32, 64)),
+    ("rv{}ud-ps-fcvt", (32, 64)),
+    ("rv{}ud-ps-fcvt_w", (32, 64)),
+    ("rv{}ud-ps-fdiv", (32, 64)),
+    ("rv{}ud-ps-fmadd", (32, 64)),
+    ("rv{}ud-ps-fmin", (32, 64)),
+    ("rv{}ud-ps-ldst", (32, 64)),
+    ("rv{}ud-ps-move", (64,)),
+    ("rv{}ud-ps-recoding", (32, 64)),
+    ("rv{}ud-ps-structural", (64,)),
+    ("rv{}uf-ps-fadd", (32, 64)),
+    ("rv{}uf-ps-fclass", (32, 64)),
+    ("rv{}uf-ps-fcmp", (32, 64)),
+    ("rv{}uf-ps-fcvt", (32, 64)),
+    ("rv{}uf-ps-fcvt_w", (32, 64)),
+    ("rv{}uf-ps-fdiv", (32, 64)),
+    ("rv{}uf-ps-fmadd", (32, 64)),
+    ("rv{}uf-ps-fmin", (32, 64)),
+    ("rv{}uf-ps-ldst", (32, 64)),
+    ("rv{}uf-ps-move", (32, 64)),
+    ("rv{}uf-ps-recoding", (32, 64)),
+    ("rv{}ui-ps-add", (32, 64)),
+    ("rv{}ui-ps-addi", (32, 64)),
+    ("rv{}ui-ps-addiw", (64,)),
+    ("rv{}ui-ps-addw", (64,)),
+    ("rv{}ui-ps-and", (32, 64)),
+    ("rv{}ui-ps-andi", (32, 64)),
+    ("rv{}ui-ps-auipc", (32, 64)),
+    ("rv{}ui-ps-beq", (32, 64)),
+    ("rv{}ui-ps-bge", (32, 64)),
+    ("rv{}ui-ps-bgeu", (32, 64)),
+    ("rv{}ui-ps-blt", (32, 64)),
+    ("rv{}ui-ps-bltu", (32, 64)),
+    ("rv{}ui-ps-bne", (32, 64)),
+    ("rv{}ui-ps-fence_i", (32, 64)),
+    ("rv{}ui-ps-jal", (32, 64)),
+    ("rv{}ui-ps-jalr", (32, 64)),
+    ("rv{}ui-ps-lb", (32, 64)),
+    ("rv{}ui-ps-lbu", (32, 64)),
+    ("rv{}ui-ps-ld", (64,)),
+    ("rv{}ui-ps-lh", (32, 64)),
+    ("rv{}ui-ps-lhu", (32, 64)),
+    ("rv{}ui-ps-lui", (32, 64)),
+    ("rv{}ui-ps-lw", (32, 64)),
+    ("rv{}ui-ps-lwu", (64,)),
+    ("rv{}ui-ps-or", (32, 64)),
+    ("rv{}ui-ps-ori", (32, 64)),
+    ("rv{}ui-ps-sb", (32, 64)),
+    ("rv{}ui-ps-sd", (64,)),
+    ("rv{}ui-ps-sh", (32, 64)),
+    ("rv{}ui-ps-simple", (32, 64)),
+    ("rv{}ui-ps-sll", (32, 64)),
+    ("rv{}ui-ps-slli", (32, 64)),
+    ("rv{}ui-ps-slliw", (64,)),
+    ("rv{}ui-ps-sllw", (64,)),
+    ("rv{}ui-ps-slt", (32, 64)),
+    ("rv{}ui-ps-slti", (32, 64)),
+    ("rv{}ui-ps-sltiu", (32, 64)),
+    ("rv{}ui-ps-sltu", (32, 64)),
+    ("rv{}ui-ps-sra", (32, 64)),
+    ("rv{}ui-ps-srai", (32, 64)),
+    ("rv{}ui-ps-sraiw", (64,)),
+    ("rv{}ui-ps-sraw", (64,)),
+    ("rv{}ui-ps-srl", (32, 64)),
+    ("rv{}ui-ps-srli", (32, 64)),
+    ("rv{}ui-ps-srliw", (64,)),
+    ("rv{}ui-ps-srlw", (64,)),
+    ("rv{}ui-ps-sub", (32, 64)),
+    ("rv{}ui-ps-subw", (64,)),
+    ("rv{}ui-ps-sw", (32, 64)),
+    ("rv{}ui-ps-xor", (32, 64)),
+    ("rv{}ui-ps-xori", (32, 64)),
+    ("rv{}um-ps-div", (32, 64)),
+    ("rv{}um-ps-divu", (32, 64)),
+    ("rv{}um-ps-divuw", (64,)),
+    ("rv{}um-ps-divw", (64,)),
+    ("rv{}um-ps-mul", (32, 64)),
+    ("rv{}um-ps-mulh", (32, 64)),
+    ("rv{}um-ps-mulhsu", (32, 64)),
+    ("rv{}um-ps-mulhu", (32, 64)),
+    ("rv{}um-ps-mulw", (64,)),
+    ("rv{}um-ps-rem", (32, 64)),
+    ("rv{}um-ps-remu", (32, 64)),
+    ("rv{}um-ps-remuw", (64,)),
+    ("rv{}um-ps-remw", (64,)),
+    ("rv{}uzfh-ps-fadd", (32, 64)),
+    ("rv{}uzfh-ps-fclass", (32, 64)),
+    ("rv{}uzfh-ps-fcmp", (32, 64)),
+    ("rv{}uzfh-ps-fcvt", (32, 64)),
+    ("rv{}uzfh-ps-fcvt_w", (32, 64)),
+    ("rv{}uzfh-ps-fdiv", (32, 64)),
+    ("rv{}uzfh-ps-fmadd", (32, 64)),
+    ("rv{}uzfh-ps-fmin", (32, 64)),
+    ("rv{}uzfh-ps-ldst", (32, 64)),
+    ("rv{}uzfh-ps-move", (32, 64)),
+    ("rv{}uzfh-ps-recoding", (32, 64)),
 )
 
 cpu_types = ("atomic", "timing", "minor", "o3")
 
 for cpu_type in cpu_types:
-    for binary in binaries:
-        gem5_verify_config(
-            name=f"asm-riscv-{binary}-{cpu_type}",
-            verifiers=(),
-            config=joinpath(
-                config.base_dir,
-                "tests",
-                "gem5",
-                "configs",
-                "simple_binary_run.py",
-            ),
-            config_args=[
+    for cfg in binary_configs:
+        template_bin, all_bits = cfg
+        for bits in all_bits:
+            binary = template_bin.format(bits)
+            config_args = [
                 binary,
                 cpu_type,
                 "riscv",
@@ -191,7 +194,20 @@ for cpu_type in cpu_types:
                 "4",
                 "--resource-directory",
                 resource_path,
-            ],
-            valid_isas=(constants.all_compiled_tag,),
-            valid_hosts=constants.supported_hosts,
-        )
+            ]
+            if bits == 32:
+                config_args.extend(["-b", "--riscv-32bits"])
+            gem5_verify_config(
+                name=f"asm-riscv-{binary}-{cpu_type}",
+                verifiers=(),
+                config=joinpath(
+                    config.base_dir,
+                    "tests",
+                    "gem5",
+                    "configs",
+                    "simple_binary_run.py",
+                ),
+                config_args=config_args,
+                valid_isas=(constants.all_compiled_tag,),
+                valid_hosts=constants.supported_hosts,
+            )
diff --git a/tests/gem5/configs/simple_binary_run.py b/tests/gem5/configs/simple_binary_run.py
index d69e1a1169..fbb0313f93 100644
--- a/tests/gem5/configs/simple_binary_run.py
+++ b/tests/gem5/configs/simple_binary_run.py
@@ -1,4 +1,5 @@
 # Copyright (c) 2021 The Regents of the University of California
+# Copyright (c) 2022 Google Inc
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -44,12 +45,23 @@ from gem5.components.processors.simple_core import SimpleCore
 from gem5.components.boards.mem_mode import MemMode
 from gem5.components.processors.cpu_types import CPUTypes
 from gem5.simulate.simulator import Simulator
-from gem5.isas import get_isa_from_str, get_isas_str_set
+from gem5.isas import get_isa_from_str, get_isas_str_set, ISA
+
+from m5.util import fatal
 
 import argparse
+import importlib
 
 from python.gem5.components.processors.base_cpu_core import BaseCPUCore
 
+cpu_types_string_map = {
+    CPUTypes.ATOMIC: "AtomicSimpleCPU",
+    CPUTypes.O3: "O3CPU",
+    CPUTypes.TIMING: "TimingSimpleCPU",
+    CPUTypes.KVM: "KvmCPU",
+    CPUTypes.MINOR: "MinorCPU",
+}
+
 parser = argparse.ArgumentParser(
     description="A gem5 script for running simple binaries in SE mode."
 )
@@ -73,6 +85,12 @@ parser.add_argument(
     help="Use the BaseCPUProcessor instead of the SimpleProcessor.",
 )
 
+parser.add_argument(
+    "--riscv-32bits",
+    action="store_true",
+    help="Use 32 bits core of Riscv CPU",
+)
+
 parser.add_argument(
     "-r",
     "--resource-directory",
@@ -105,26 +123,43 @@ args = parser.parse_args()
 cache_hierarchy = NoCache()
 memory = SingleChannelDDR3_1600()
 
+isa_enum = get_isa_from_str(args.isa)
+cpu_enum = get_cpu_type_from_str(args.cpu)
+
+if isa_enum == ISA.RISCV and args.riscv_32bits and not args.base_cpu_processor:
+    fatal("To use Riscv 32 CPU, the base_cpu_processor must be specify!")
+
 if args.base_cpu_processor:
-    cores = [
-        BaseCPUCore(
-            core=SimpleCore.cpu_simobject_factory(
-                cpu_type=get_cpu_type_from_str(args.cpu),
-                isa=get_isa_from_str(args.isa),
-                core_id=i,
-            ),
-            isa=get_isa_from_str(args.isa),
+
+    if isa_enum == ISA.RISCV and args.riscv_32bits:
+        m5_objects = importlib.import_module("m5.objects")
+        cpu_class = getattr(
+            m5_objects, f"Riscv32{cpu_types_string_map[cpu_enum]}"
         )
-        for i in range(args.num_cores)
-    ]
+        cores = [
+            BaseCPUCore(core=cpu_class(cpu_id=i), isa=isa_enum)
+            for i in range(args.num_cores)
+        ]
+    else:
+        cores = [
+            BaseCPUCore(
+                core=SimpleCore.cpu_simobject_factory(
+                    cpu_type=cpu_enum,
+                    isa=isa_enum,
+                    core_id=i,
+                ),
+                isa=isa_enum,
+            )
+            for i in range(args.num_cores)
+        ]
 
     processor = BaseCPUProcessor(
         cores=cores,
     )
 else:
     processor = SimpleProcessor(
-        cpu_type=get_cpu_type_from_str(args.cpu),
-        isa=get_isa_from_str(args.isa),
+        cpu_type=cpu_enum,
+        isa=isa_enum,
         num_cores=args.num_cores,
     )
 

From 5db889572a31ec49eae5a9d39e704955a1527041 Mon Sep 17 00:00:00 2001
From: Rocky Tatiefo <rtatiefo@google.com>
Date: Thu, 29 Dec 2022 20:11:08 -0800
Subject: [PATCH 076/492] base: Remove unused output.hh dependency from
 trace.cc

Change-Id: Ie80ad5f3fb9fc7ee1e35f0624317e0e58cbf152d
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67011
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/base/trace.cc | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/base/trace.cc b/src/base/trace.cc
index 52faa8d725..272b035e0f 100644
--- a/src/base/trace.cc
+++ b/src/base/trace.cc
@@ -38,7 +38,6 @@
 
 #include "base/atomicio.hh"
 #include "base/logging.hh"
-#include "base/output.hh"
 #include "base/str.hh"
 #include "debug/FmtFlag.hh"
 #include "debug/FmtStackTrace.hh"

From 66d4a158207aba57ad06a524c8e9053745fd8e45 Mon Sep 17 00:00:00 2001
From: Vishnu Ramadas <vramadas@wisc.edu>
Date: Mon, 26 Dec 2022 19:14:11 -0600
Subject: [PATCH 077/492] gpu-compute,mem-ruby: Add support for GPU cache
 bypassing

The GPU cache models do not support cache bypassing when the GLC or SLC
AMDGPU instruction modifiers are used in a load or store. This commit
adds cache bypass support by introducing new transitions in the
coherence protocol used by the GPU memory system. Now, instructions with
the GLC bit set will not cache in the L1 and instructions with SLC bit
set will not cache in L1 or L2.

Change-Id: Id29a47b0fa7e16a21a7718949db802f85e9897c3
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/66991
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Maintainer: Matt Sinclair <mattdsinclair@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com>
---
 src/mem/packet.hh                             |  10 +
 src/mem/request.hh                            |  11 ++
 src/mem/ruby/protocol/GPU_VIPER-TCC.sm        | 178 +++++++++++++++++-
 src/mem/ruby/protocol/GPU_VIPER-TCP.sm        |  45 ++++-
 src/mem/ruby/protocol/MOESI_AMD_Base-dir.sm   |  30 +++
 src/mem/ruby/protocol/MOESI_AMD_Base-msg.sm   |  12 ++
 src/mem/ruby/protocol/RubySlicc_MemControl.sm |   2 +
 src/mem/ruby/protocol/RubySlicc_Types.sm      |   2 +
 src/mem/ruby/slicc_interface/RubyRequest.hh   |  34 ++++
 9 files changed, 316 insertions(+), 8 deletions(-)

diff --git a/src/mem/packet.hh b/src/mem/packet.hh
index 9238dbec00..a80b918798 100644
--- a/src/mem/packet.hh
+++ b/src/mem/packet.hh
@@ -1100,6 +1100,16 @@ class Packet : public Printable
         flags.set(VALID_SIZE);
     }
 
+    /**
+     * Accessor functions for the cache bypass flags. The cache bypass
+     * can specify which levels in the hierarchy to bypass. If GLC_BIT
+     * is set, the requests are globally coherent and bypass TCP.
+     * If SLC_BIT is set, then the requests are system level coherent
+     * and bypass both TCP and TCC.
+     */
+    bool isGLCSet() const { return req->isGLCSet();}
+    bool isSLCSet() const { return req->isSLCSet();}
+
     /**
      * Check if packet corresponds to a given block-aligned address and
      * address space.
diff --git a/src/mem/request.hh b/src/mem/request.hh
index 39d9d7281c..6a0cbc21d4 100644
--- a/src/mem/request.hh
+++ b/src/mem/request.hh
@@ -1071,6 +1071,17 @@ class Request
 
     bool isAcquire() const { return _cacheCoherenceFlags.isSet(ACQUIRE); }
 
+
+    /**
+     * Accessor functions for the cache bypass flags. The cache bypass
+     * can specify which levels in the hierarchy to bypass. If GLC_BIT
+     * is set, the requests are globally coherent and bypass TCP.
+     * If SLC_BIT is set, then the requests are system level coherent
+     * and bypass both TCP and TCC.
+     */
+    bool isGLCSet() const {return _cacheCoherenceFlags.isSet(GLC_BIT); }
+    bool isSLCSet() const {return _cacheCoherenceFlags.isSet(SLC_BIT); }
+
     /**
      * Accessor functions for the memory space configuration flags and used by
      * GPU ISAs such as the Heterogeneous System Architecture (HSA). Note that
diff --git a/src/mem/ruby/protocol/GPU_VIPER-TCC.sm b/src/mem/ruby/protocol/GPU_VIPER-TCC.sm
index 032a64cec4..ae142471fa 100644
--- a/src/mem/ruby/protocol/GPU_VIPER-TCC.sm
+++ b/src/mem/ruby/protocol/GPU_VIPER-TCC.sm
@@ -56,8 +56,10 @@ machine(MachineType:TCC, "TCC Cache")
   enumeration(Event, desc="TCC Events") {
     // Requests coming from the Cores
     RdBlk,                  desc="RdBlk event";
+    RdBypassEvict,          desc="Bypass L2 on reads. Evict if cache block already allocated";
     WrVicBlk,               desc="L1 Write Through";
     WrVicBlkBack,           desc="L1 Write Through(dirty cache)";
+    WrVicBlkEvict,          desc="L1 Write Through(dirty cache) and evict";
     Atomic,                 desc="Atomic Op";
     AtomicDone,             desc="AtomicOps Complete";
     AtomicNotDone,          desc="AtomicOps not Complete";
@@ -68,6 +70,7 @@ machine(MachineType:TCC, "TCC Cache")
     PrbInv,                 desc="Invalidating probe";
     // Coming from Memory Controller
     WBAck,                  desc="writethrough ack from memory";
+    Bypass,                 desc="Bypass the entire L2 cache";
   }
 
   // STATES
@@ -107,6 +110,8 @@ machine(MachineType:TCC, "TCC Cache")
     NetDest Destination, desc="Data destination";
     int numAtomics,     desc="number remaining atomics";
     int atomicDoneCnt,  desc="number AtomicDones triggered";
+    bool isGLCSet,      desc="Bypass L1 Cache";
+    bool isSLCSet,      desc="Bypass L1 and L2 Cache";
   }
 
   structure(TBETable, external="yes") {
@@ -173,7 +178,6 @@ machine(MachineType:TCC, "TCC Cache")
 
   int functionalWrite(Addr addr, Packet *pkt) {
     int num_functional_writes := 0;
-
     TBE tbe := TBEs.lookup(addr);
     if(is_valid(tbe)) {
       num_functional_writes := num_functional_writes +
@@ -279,7 +283,11 @@ machine(MachineType:TCC, "TCC Cache")
       peek(responseFromNB_in, ResponseMsg, block_on="addr") {
         TBE tbe := TBEs.lookup(in_msg.addr);
         Entry cache_entry := getCacheEntry(in_msg.addr);
-        if (in_msg.Type == CoherenceResponseType:NBSysResp) {
+        if (in_msg.isSLCSet) {
+            // If the SLC bit is set, the response needs to bypass the cache
+            // and should not be allocated an entry.
+            trigger(Event:Bypass, in_msg.addr, cache_entry, tbe);
+        } else if (in_msg.Type == CoherenceResponseType:NBSysResp) {
           if(presentOrAvail(in_msg.addr)) {
             trigger(Event:Data, in_msg.addr, cache_entry, tbe);
           } else {
@@ -313,7 +321,18 @@ machine(MachineType:TCC, "TCC Cache")
         TBE tbe := TBEs.lookup(in_msg.addr);
         Entry cache_entry := getCacheEntry(in_msg.addr);
         if (in_msg.Type == CoherenceRequestType:WriteThrough) {
-            if(WB) {
+            if (in_msg.isSLCSet) {
+                // The request should bypass the cache if SLC bit is set.
+                // If the cache entry exists already, then evict it.
+                // Else, perform a normal cache access.
+                // The cache entry is allocated only on response and bypass is
+                // handled there
+                if(presentOrAvail(in_msg.addr)) {
+                    trigger(Event:WrVicBlkEvict, in_msg.addr, cache_entry, tbe);
+                } else {
+                    trigger(Event:WrVicBlk, in_msg.addr, cache_entry, tbe);
+                }
+            } else if(WB) {
                 if(presentOrAvail(in_msg.addr)) {
                     trigger(Event:WrVicBlkBack, in_msg.addr, cache_entry, tbe);
                 } else {
@@ -326,7 +345,13 @@ machine(MachineType:TCC, "TCC Cache")
         } else if (in_msg.Type == CoherenceRequestType:Atomic) {
           trigger(Event:Atomic, in_msg.addr, cache_entry, tbe);
         } else if (in_msg.Type == CoherenceRequestType:RdBlk) {
-          trigger(Event:RdBlk, in_msg.addr, cache_entry, tbe);
+          if (in_msg.isSLCSet) {
+            // If SLC bit is set, the request needs to go directly to memory.
+            // If a cache block already exists, then evict it.
+            trigger(Event:RdBypassEvict, in_msg.addr, cache_entry, tbe);
+          } else {
+            trigger(Event:RdBlk, in_msg.addr, cache_entry, tbe);
+          }
         } else {
           DPRINTF(RubySlicc, "%s\n", in_msg);
           error("Unexpected Response Message to Core");
@@ -354,6 +379,8 @@ machine(MachineType:TCC, "TCC Cache")
         out_msg.MessageSize := MessageSizeType:Response_Data;
         out_msg.Dirty := false;
         out_msg.State := CoherenceState:Shared;
+        out_msg.isGLCSet := in_msg.isGLCSet;
+        out_msg.isSLCSet := in_msg.isSLCSet;
         DPRINTF(RubySlicc, "%s\n", out_msg);
       }
     }
@@ -371,15 +398,46 @@ machine(MachineType:TCC, "TCC Cache")
       out_msg.Dirty := false;
       out_msg.State := CoherenceState:Shared;
       DPRINTF(RubySlicc, "%s\n", out_msg);
+      peek(responseFromNB_in, ResponseMsg) {
+        out_msg.isGLCSet := in_msg.isGLCSet;
+        out_msg.isSLCSet := in_msg.isSLCSet;
+      }
     }
     enqueue(unblockToNB_out, UnblockMsg, 1) {
       out_msg.addr := address;
       out_msg.Destination.add(mapAddressToMachine(address, MachineType:Directory));
       out_msg.MessageSize := MessageSizeType:Unblock_Control;
+      peek(responseFromNB_in, ResponseMsg) {
+        out_msg.isGLCSet := in_msg.isGLCSet;
+        out_msg.isSLCSet := in_msg.isSLCSet;
+      }
       DPRINTF(RubySlicc, "%s\n", out_msg);
     }
   }
 
+  action(rb_bypassDone, "rb", desc="bypass L2 of read access") {
+    peek(responseFromNB_in, ResponseMsg) {
+        enqueue(responseToCore_out, ResponseMsg, l2_response_latency) {
+          out_msg.addr := address;
+          out_msg.Type := CoherenceResponseType:TDSysResp;
+          out_msg.Sender := machineID;
+          out_msg.Destination := tbe.Destination;
+          out_msg.DataBlk := in_msg.DataBlk;
+          out_msg.MessageSize := MessageSizeType:Response_Data;
+          out_msg.Dirty := false;
+          out_msg.State := CoherenceState:Shared;
+          out_msg.isGLCSet := in_msg.isGLCSet;
+          out_msg.isSLCSet := in_msg.isSLCSet;
+          DPRINTF(RubySlicc, "%s\n", out_msg);
+        }
+        enqueue(unblockToNB_out, UnblockMsg, 1) {
+          out_msg.addr := address;
+          out_msg.Destination.add(mapAddressToMachine(address, MachineType:Directory));
+          out_msg.MessageSize := MessageSizeType:Unblock_Control;
+          DPRINTF(RubySlicc, "%s\n", out_msg);
+        }
+    }
+  }
 
   action(rd_requestData, "r", desc="Miss in L2, pass on") {
     if(tbe.Destination.count()==1){
@@ -391,6 +449,8 @@ machine(MachineType:TCC, "TCC Cache")
           out_msg.Destination.add(mapAddressToMachine(address, MachineType:Directory));
           out_msg.Shared := false; // unneeded for this request
           out_msg.MessageSize := in_msg.MessageSize;
+          out_msg.isGLCSet := in_msg.isGLCSet;
+          out_msg.isSLCSet := in_msg.isSLCSet;
           DPRINTF(RubySlicc, "%s\n", out_msg);
         }
       }
@@ -407,6 +467,9 @@ machine(MachineType:TCC, "TCC Cache")
         out_msg.Sender := machineID;
         out_msg.MessageSize := MessageSizeType:Writeback_Control;
         out_msg.instSeqNum := in_msg.instSeqNum;
+        out_msg.isGLCSet := in_msg.isGLCSet;
+        out_msg.isSLCSet := in_msg.isSLCSet;
+
       }
     }
   }
@@ -421,6 +484,9 @@ machine(MachineType:TCC, "TCC Cache")
         out_msg.Sender := machineID;
         out_msg.MessageSize := MessageSizeType:Writeback_Control;
         out_msg.instSeqNum := in_msg.instSeqNum;
+        out_msg.isGLCSet := in_msg.isGLCSet;
+        out_msg.isSLCSet := in_msg.isSLCSet;
+
       }
     }
   }
@@ -434,6 +500,9 @@ machine(MachineType:TCC, "TCC Cache")
           out_msg.Sender := machineID;
           out_msg.MessageSize := in_msg.MessageSize;
           out_msg.DataBlk := in_msg.DataBlk;
+          out_msg.isGLCSet := in_msg.isGLCSet;
+          out_msg.isSLCSet := in_msg.isSLCSet;
+
         }
     }
   }
@@ -466,6 +535,8 @@ machine(MachineType:TCC, "TCC Cache")
       peek(coreRequestNetwork_in, CPURequestMsg) {
         if(in_msg.Type == CoherenceRequestType:RdBlk || in_msg.Type == CoherenceRequestType:Atomic){
           tbe.Destination.add(in_msg.Requestor);
+          tbe.isGLCSet := in_msg.isGLCSet;
+          tbe.isSLCSet := in_msg.isSLCSet;
         }
       }
     }
@@ -505,6 +576,8 @@ machine(MachineType:TCC, "TCC Cache")
         out_msg.DataBlk := in_msg.DataBlk;
         out_msg.writeMask.orMask(in_msg.writeMask);
         out_msg.instSeqNum := in_msg.instSeqNum;
+        out_msg.isGLCSet := in_msg.isGLCSet;
+        out_msg.isSLCSet := in_msg.isSLCSet;
       }
     }
   }
@@ -520,6 +593,10 @@ machine(MachineType:TCC, "TCC Cache")
       out_msg.Dirty := true;
       out_msg.DataBlk := cache_entry.DataBlk;
       out_msg.writeMask.orMask(cache_entry.writeMask);
+      peek(coreRequestNetwork_in, CPURequestMsg) {
+        out_msg.isGLCSet := in_msg.isGLCSet;
+        out_msg.isSLCSet := in_msg.isSLCSet;
+      }
     }
   }
 
@@ -534,6 +611,8 @@ machine(MachineType:TCC, "TCC Cache")
         out_msg.Type := CoherenceRequestType:Atomic;
         out_msg.Dirty := true;
         out_msg.writeMask.orMask(in_msg.writeMask);
+        out_msg.isGLCSet := in_msg.isGLCSet;
+        out_msg.isSLCSet := in_msg.isSLCSet;
       }
     }
   }
@@ -549,6 +628,10 @@ machine(MachineType:TCC, "TCC Cache")
       out_msg.Ntsl := true;
       out_msg.State := CoherenceState:NA;
       out_msg.MessageSize := MessageSizeType:Response_Control;
+      peek(probeNetwork_in, NBProbeRequestMsg) {
+        out_msg.isGLCSet := in_msg.isGLCSet;
+        out_msg.isSLCSet := in_msg.isSLCSet;
+      }
     }
   }
   action(ut_updateTag, "ut", desc="update Tag (i.e. set MRU)") {
@@ -592,6 +675,10 @@ machine(MachineType:TCC, "TCC Cache")
         tbe.atomicDoneCnt := tbe.atomicDoneCnt + 1;
         out_msg.addr := address;
         out_msg.Type := TriggerType:AtomicDone;
+        peek(responseFromNB_in, ResponseMsg) {
+          out_msg.isGLCSet := in_msg.isGLCSet;
+          out_msg.isSLCSet := in_msg.isSLCSet;
+        }
       }
     }
   }
@@ -659,6 +746,54 @@ machine(MachineType:TCC, "TCC Cache")
     p_popRequestQueue;
   }
 
+  transition(I, RdBypassEvict) {TagArrayRead} {
+    p_profileMiss;
+    t_allocateTBE;
+    rd_requestData;
+    p_popRequestQueue;
+  }
+
+// Transition to be called when a read request with SLC flag set arrives at
+// entry in state W. It evicts and invalidates the cache entry before
+// forwarding the request to global memory
+  transition(W, RdBypassEvict, I) {TagArrayRead} {
+    p_profileMiss;
+    t_allocateTBE;
+    wb_writeBack;
+    i_invL2;
+    rd_requestData;
+    p_popRequestQueue;
+  }
+
+// Transition to be called when a read request with SLC flag set arrives at
+// entry in state M. It evicts and invalidates the cache entry before
+// forwarding the request to global memory to main memory
+  transition(M, RdBypassEvict, I) {TagArrayRead} {
+    p_profileMiss;
+    t_allocateTBE;
+    wb_writeBack;
+    i_invL2;
+    rd_requestData;
+    p_popRequestQueue;
+  }
+
+// Transition to be called when a read request with SLC flag set arrives at
+// entry in state V. It invalidates the cache entry before forwarding the
+// request to global memory.
+  transition(V, RdBypassEvict, I) {TagArrayRead} {
+    p_profileMiss;
+    t_allocateTBE;
+    i_invL2;
+    rd_requestData;
+    p_popRequestQueue;
+  }
+
+// Transition to be called when a read request with SLC flag arrives at entry
+// in transient state. The request stalls until the pending transition is complete.
+  transition({WI, IV}, RdBypassEvict)  {
+    st_stallAndWaitRequest;
+  }
+
   transition(V, Atomic, A) {TagArrayRead} {
     p_profileHit;
     i_invL2;
@@ -730,6 +865,31 @@ transition(I, Atomic, A) {TagArrayRead} {
     p_popRequestQueue;
   }
 
+// Transition to be called when a write request with SLC bit set arrives at an
+// entry with state V. The entry has to be evicted and invalidated before the
+// request is forwarded to global memory
+  transition(V, WrVicBlkEvict, I) {TagArrayRead, TagArrayWrite, DataArrayWrite} {
+    p_profileMiss;
+    ut_updateTag;
+    t_allocateTBE;
+    wt_writeThrough;
+    i_invL2;
+    p_popRequestQueue;
+  }
+
+// Transition to be called when a write request with SLC bit set arrives at an
+// entry with state W. The entry has to be evicted and invalidated before the
+// request is forwarded to global memory.
+  transition(W, WrVicBlkEvict, I) {TagArrayRead, TagArrayWrite, DataArrayWrite} {
+    p_profileMiss;
+    ut_updateTag;
+    wdb_writeDirtyBytes;
+    t_allocateTBE;
+    wb_writeBack;
+    i_invL2;
+    p_popRequestQueue;
+  }
+
   transition({W, M}, L2_Repl, WI) {TagArrayRead, DataArrayRead} {
     t_allocateTBE;
     wb_writeBack;
@@ -764,6 +924,16 @@ transition(I, Atomic, A) {TagArrayRead} {
     pp_popProbeQueue;
   }
 
+// Transition to be called when the response for a request with SLC bit set
+// arrives. The request has to be forwarded to the core that needs it while
+// making sure no entry is allocated.
+  transition(I, Bypass, I) {
+    rb_bypassDone;
+    pr_popResponseQueue;
+    wada_wakeUpAllDependentsAddr;
+    dt_deallocateTBE;
+  }
+
   transition(IV, Data, V) {TagArrayRead, TagArrayWrite, DataArrayWrite} {
     a_allocateBlock;
     ut_updateTag;
diff --git a/src/mem/ruby/protocol/GPU_VIPER-TCP.sm b/src/mem/ruby/protocol/GPU_VIPER-TCP.sm
index 775a62b174..3be1397d49 100644
--- a/src/mem/ruby/protocol/GPU_VIPER-TCP.sm
+++ b/src/mem/ruby/protocol/GPU_VIPER-TCP.sm
@@ -60,6 +60,7 @@ machine(MachineType:TCP, "GPU TCP (L1 Data Cache)")
   enumeration(Event, desc="TCP Events") {
     // Core initiated
     Load,           desc="Load";
+    LoadBypassEvict, desc="Bypass L1 on a load. Evict if cache block already allocated";
     Store,          desc="Store to L1 (L1 is dirty)";
     StoreThrough,   desc="Store directly to L2(L1 is clean)";
     Atomic,         desc="Atomic";
@@ -256,8 +257,10 @@ machine(MachineType:TCP, "GPU TCP (L1 Data Cache)")
         Entry cache_entry := getCacheEntry(in_msg.addr);
         TBE tbe := TBEs.lookup(in_msg.addr);
         if (in_msg.Type == CoherenceResponseType:TDSysResp) {
-          // disable L1 cache
-          if (disableL1) {
+          if (disableL1 || in_msg.isGLCSet || in_msg.isSLCSet) {
+              // If L1 is disabled or requests have GLC or SLC flag set,
+              // then, the requests should not cache in the L1. The response
+              // from L2/global memory should bypass the cache
 	          trigger(Event:Bypass, in_msg.addr, cache_entry, tbe);
           } else {
             if (is_valid(cache_entry) || L1cache.cacheAvail(in_msg.addr)) {
@@ -284,13 +287,23 @@ machine(MachineType:TCP, "GPU TCP (L1 Data Cache)")
         TBE tbe := TBEs.lookup(in_msg.LineAddress);
         DPRINTF(RubySlicc, "%s\n", in_msg);
         if (in_msg.Type == RubyRequestType:LD) {
-          trigger(Event:Load, in_msg.LineAddress, cache_entry, tbe);
+          if ((in_msg.isGLCSet || in_msg.isSLCSet) && is_valid(cache_entry)) {
+            // Read rquests with GLC or SLC bit set should not cache in the L1.
+            // They need to bypass the L1 and go to the L2. If an entry exists
+            // in the L1, it needs to be evicted
+            trigger(Event:LoadBypassEvict, in_msg.LineAddress, cache_entry, tbe);
+          }
+          else {
+            trigger(Event:Load, in_msg.LineAddress, cache_entry, tbe);
+          }
         } else if (in_msg.Type == RubyRequestType:ATOMIC ||
                    in_msg.Type == RubyRequestType:ATOMIC_RETURN ||
                    in_msg.Type == RubyRequestType:ATOMIC_NO_RETURN) {
           trigger(Event:Atomic, in_msg.LineAddress, cache_entry, tbe);
         } else if (in_msg.Type == RubyRequestType:ST) {
-          if(disableL1) {
+          if(disableL1 || in_msg.isGLCSet || in_msg.isSLCSet) {
+            // Write requests with GLC or SLC bit set, or when L1 is disabled,
+            // should not cache in the L1. They need to perform a store through
             trigger(Event:StoreThrough, in_msg.LineAddress, cache_entry, tbe);
           } else {
             if (is_valid(cache_entry) || L1cache.cacheAvail(in_msg.LineAddress)) {
@@ -330,6 +343,10 @@ machine(MachineType:TCP, "GPU TCP (L1 Data Cache)")
                               TCC_select_low_bit, TCC_select_num_bits));
       out_msg.MessageSize := MessageSizeType:Request_Control;
       out_msg.InitialRequestTime := curCycle();
+      peek(mandatoryQueue_in, RubyRequest) {
+        out_msg.isGLCSet := in_msg.isGLCSet;
+        out_msg.isSLCSet := in_msg.isSLCSet;
+      }
     }
   }
 
@@ -375,6 +392,8 @@ machine(MachineType:TCP, "GPU TCP (L1 Data Cache)")
                               TCC_select_low_bit, TCC_select_num_bits));
           out_msg.MessageSize := MessageSizeType:Request_Control;
           out_msg.InitialRequestTime := curCycle();
+          out_msg.isGLCSet := in_msg.isGLCSet;
+          out_msg.isSLCSet := in_msg.isSLCSet;
         }
       }
     }
@@ -401,6 +420,8 @@ machine(MachineType:TCP, "GPU TCP (L1 Data Cache)")
       // forward inst sequence number to lower TCC
       peek(mandatoryQueue_in, RubyRequest) {
         out_msg.instSeqNum := in_msg.instSeqNum;
+        out_msg.isGLCSet := in_msg.isGLCSet;
+        out_msg.isSLCSet := in_msg.isSLCSet;
       }
     }
   }
@@ -418,6 +439,11 @@ machine(MachineType:TCP, "GPU TCP (L1 Data Cache)")
         out_msg.Type := CoherenceRequestType:Atomic;
         out_msg.InitialRequestTime := curCycle();
         out_msg.Shared := false;
+        peek(mandatoryQueue_in, RubyRequest) {
+          out_msg.instSeqNum := in_msg.instSeqNum;
+          out_msg.isGLCSet := in_msg.isGLCSet;
+          out_msg.isSLCSet := in_msg.isSLCSet;
+        }
       }
     }
   }
@@ -583,6 +609,17 @@ machine(MachineType:TCP, "GPU TCP (L1 Data Cache)")
     p_popMandatoryQueue;
   }
 
+// Transition to be called when a load request with GLC or SLC flag set arrives
+// at L1. This transition invalidates any existing entry and forwards the
+// request to L2.
+  transition(V, LoadBypassEvict, I) {TagArrayRead, TagArrayWrite} {
+    uu_profileDataMiss;
+    inv_invDone;
+    ic_invCache;
+    n_issueRdBlk;
+    p_popMandatoryQueue;
+}
+
   transition({V, I}, Atomic, A) {TagArrayRead, TagArrayWrite} {
     t_allocateTBE;
     mru_updateMRU;
diff --git a/src/mem/ruby/protocol/MOESI_AMD_Base-dir.sm b/src/mem/ruby/protocol/MOESI_AMD_Base-dir.sm
index 3b38e3b1ff..57edef8f2b 100644
--- a/src/mem/ruby/protocol/MOESI_AMD_Base-dir.sm
+++ b/src/mem/ruby/protocol/MOESI_AMD_Base-dir.sm
@@ -161,6 +161,8 @@ machine(MachineType:Directory, "AMD Baseline protocol")
     uint64_t probe_id,        desc="probe id for lifetime profiling";
     WriteMask writeMask,    desc="outstanding write through mask";
     int Len,            desc="Length of memory request for DMA";
+    bool isGLCSet,      desc="Bypass L1 Cache";
+    bool isSLCSet,      desc="Bypass L1 and L2 Cache";
   }
 
   structure(TBETable, external="yes") {
@@ -483,6 +485,8 @@ machine(MachineType:Directory, "AMD Baseline protocol")
       out_msg.ProbeRequestStartTime := tbe.ProbeRequestStartTime;
       out_msg.OriginalResponder := tbe.LastSender;
       out_msg.L3Hit := tbe.L3Hit;
+      out_msg.isGLCSet := tbe.isGLCSet;
+      out_msg.isSLCSet := tbe.isSLCSet;
       DPRINTF(RubySlicc, "%s\n", out_msg);
     }
   }
@@ -512,6 +516,8 @@ machine(MachineType:Directory, "AMD Baseline protocol")
         out_msg.ForwardRequestTime := tbe.ForwardRequestTime;
         out_msg.ProbeRequestStartTime := tbe.ProbeRequestStartTime;
         out_msg.OriginalResponder := tbe.LastSender;
+        out_msg.isGLCSet := tbe.isGLCSet;
+        out_msg.isSLCSet := tbe.isSLCSet;
         if(tbe.atomicData){
           out_msg.WTRequestor := tbe.WTRequestor;
         }
@@ -540,6 +546,8 @@ machine(MachineType:Directory, "AMD Baseline protocol")
         out_msg.InitialRequestTime := tbe.InitialRequestTime;
         out_msg.ForwardRequestTime := curCycle();
         out_msg.ProbeRequestStartTime := tbe.ProbeRequestStartTime;
+        out_msg.isGLCSet := tbe.isGLCSet;
+        out_msg.isSLCSet := tbe.isSLCSet;
         DPRINTF(RubySlicc, "%s\n", out_msg);
       }
   }
@@ -557,6 +565,8 @@ machine(MachineType:Directory, "AMD Baseline protocol")
         out_msg.ForwardRequestTime := curCycle();
         out_msg.ProbeRequestStartTime := curCycle();
         out_msg.instSeqNum := in_msg.instSeqNum;
+        out_msg.isGLCSet := in_msg.isGLCSet;
+        out_msg.isSLCSet := in_msg.isSLCSet;
       }
     }
   }
@@ -569,6 +579,8 @@ machine(MachineType:Directory, "AMD Baseline protocol")
         out_msg.Sender := machineID;
         out_msg.MessageSize := MessageSizeType:Writeback_Data;
         out_msg.DataBlk := in_msg.DataBlk;
+        out_msg.isGLCSet := in_msg.isGLCSet;
+        out_msg.isSLCSet := in_msg.isSLCSet;
       }
     }
   }
@@ -624,6 +636,8 @@ machine(MachineType:Directory, "AMD Baseline protocol")
           out_msg.Type := MemoryRequestType:MEMORY_READ;
           out_msg.Sender := machineID;
           out_msg.MessageSize := MessageSizeType:Request_Control;
+          out_msg.isGLCSet := in_msg.isGLCSet;
+          out_msg.isSLCSet := in_msg.isSLCSet;
         }
       }
     }
@@ -739,6 +753,8 @@ machine(MachineType:Directory, "AMD Baseline protocol")
           out_msg.MessageSize := MessageSizeType:Control;
           out_msg.Destination := probe_dests;
           tbe.NumPendingAcks := out_msg.Destination.count();
+          out_msg.isGLCSet := in_msg.isGLCSet;
+          out_msg.isSLCSet := in_msg.isSLCSet;
           DPRINTF(RubySlicc, "%s\n", out_msg);
           APPEND_TRANSITION_COMMENT(" dc: Acks remaining: ");
           APPEND_TRANSITION_COMMENT(tbe.NumPendingAcks);
@@ -842,6 +858,8 @@ machine(MachineType:Directory, "AMD Baseline protocol")
           out_msg.ReturnData := true;
           out_msg.MessageSize := MessageSizeType:Control;
           out_msg.Destination := probe_dests;
+          out_msg.isGLCSet := in_msg.isGLCSet;
+          out_msg.isSLCSet := in_msg.isSLCSet;
           tbe.NumPendingAcks := out_msg.Destination.count();
           DPRINTF(RubySlicc, "%s\n", (out_msg));
           APPEND_TRANSITION_COMMENT(" sc: Acks remaining: ");
@@ -897,6 +915,8 @@ machine(MachineType:Directory, "AMD Baseline protocol")
           out_msg.ReturnData := false;
           out_msg.MessageSize := MessageSizeType:Control;
           out_msg.Destination := probe_dests;
+          out_msg.isGLCSet := in_msg.isGLCSet;
+          out_msg.isSLCSet := in_msg.isSLCSet;
           tbe.NumPendingAcks := out_msg.Destination.count();
           APPEND_TRANSITION_COMMENT(" ic: Acks remaining: ");
           APPEND_TRANSITION_COMMENT(tbe.NumPendingAcks);
@@ -923,6 +943,8 @@ machine(MachineType:Directory, "AMD Baseline protocol")
         out_msg.Sender := machineID;
         out_msg.MessageSize := MessageSizeType:Writeback_Data;
         out_msg.DataBlk := in_msg.DataBlk;
+        out_msg.isGLCSet := in_msg.isGLCSet;
+        out_msg.isSLCSet := in_msg.isSLCSet;
       }
       if (tbe.Dirty == false) {
           // have to update the TBE, too, because of how this
@@ -985,6 +1007,8 @@ machine(MachineType:Directory, "AMD Baseline protocol")
       tbe.NumPendingAcks := 0;
       tbe.Cached := in_msg.ForceShared;
       tbe.InitialRequestTime := in_msg.InitialRequestTime;
+      tbe.isGLCSet := in_msg.isGLCSet;
+      tbe.isSLCSet := in_msg.isSLCSet;
     }
   }
 
@@ -1004,6 +1028,8 @@ machine(MachineType:Directory, "AMD Baseline protocol")
         out_msg.Sender := machineID;
         out_msg.MessageSize := MessageSizeType:Writeback_Data;
         out_msg.DataBlk := tbe.DataBlk;
+        out_msg.isGLCSet := tbe.isGLCSet;
+        out_msg.isSLCSet := tbe.isSLCSet;
         DPRINTF(ProtocolTrace, "%s\n", out_msg);
       }
     }
@@ -1104,6 +1130,8 @@ machine(MachineType:Directory, "AMD Baseline protocol")
             out_msg.Sender := machineID;
             out_msg.MessageSize := MessageSizeType:Writeback_Data;
             out_msg.DataBlk := victim_entry.DataBlk;
+            out_msg.isGLCSet := in_msg.isGLCSet;
+            out_msg.isSLCSet := in_msg.isSLCSet;
           }
           L3CacheMemory.deallocate(victim);
         }
@@ -1136,6 +1164,8 @@ machine(MachineType:Directory, "AMD Baseline protocol")
             out_msg.Sender := machineID;
             out_msg.MessageSize := MessageSizeType:Writeback_Data;
             out_msg.DataBlk := victim_entry.DataBlk;
+            out_msg.isGLCSet := tbe.isGLCSet;
+            out_msg.isSLCSet := tbe.isSLCSet;
           }
           L3CacheMemory.deallocate(victim);
         }
diff --git a/src/mem/ruby/protocol/MOESI_AMD_Base-msg.sm b/src/mem/ruby/protocol/MOESI_AMD_Base-msg.sm
index 46bab43c22..6ff19e953b 100644
--- a/src/mem/ruby/protocol/MOESI_AMD_Base-msg.sm
+++ b/src/mem/ruby/protocol/MOESI_AMD_Base-msg.sm
@@ -138,6 +138,9 @@ structure(CPURequestMsg, desc="...", interface="Message") {
   bool NoWriteConflict,             default="true", desc="write collided with CAB entry";
   int ProgramCounter,               desc="PC that accesses to this block";
 
+  bool isGLCSet, default="false", desc="GLC flag value in the request";
+  bool isSLCSet, default="false", desc="SLC flag value in the request";
+
   bool functionalRead(Packet *pkt) {
     // Only PUTX messages contains the data block
     if (Type == CoherenceRequestType:VicDirty) {
@@ -165,6 +168,8 @@ structure(NBProbeRequestMsg, desc="...", interface="Message") {
   MachineID Requestor,          desc="Requestor id for 3-hop requests";
   bool NoAckNeeded, default="false", desc="For short circuting acks";
   int ProgramCounter,           desc="PC that accesses to this block";
+  bool isGLCSet,                desc="Bypass L1 Cache";
+  bool isSLCSet,                desc="Bypass L1 and L2 Caches";
 
   bool functionalRead(Packet *pkt) {
     return false;
@@ -248,6 +253,9 @@ structure(ResponseMsg, desc="...", interface="Message") {
   int ProgramCounter,       desc="PC that issues this request";
   bool mispred,              desc="tell TCP if the block should not be bypassed";
 
+  bool isGLCSet, default="false", desc="GLC flag value in the request that triggered response";
+  bool isSLCSet, default="false", desc="SLC flag value in the request that triggered response";
+
 
   bool functionalRead(Packet *pkt) {
     // Only PUTX messages contains the data block
@@ -277,6 +285,8 @@ structure(UnblockMsg, desc="...", interface="Message") {
   bool wasValid, default="false", desc="Was block valid when evicted";
   bool valid, default="false", desc="Is block valid";
   bool validToInvalid, default="false", desc="Was block valid when evicted";
+  bool isGLCSet, default="false", desc="GLC flag value in the request";
+  bool isSLCSet, default="false", desc="SLC flag value in the request";
 
   bool functionalRead(Packet *pkt) {
     return false;
@@ -321,6 +331,8 @@ structure(TriggerMsg, desc="...", interface="Message") {
   TriggerType Type,             desc="Type of trigger";
   CacheId Dest,         default="CacheId_NA", desc="Cache to invalidate";
   int ProgramCounter,           desc="PC that accesses to this block";
+  bool isGLCSet,        default="false", desc="GLC flag value in the request";
+  bool isSLCSet,        default="false", desc="SLC flag value in the request";
 
   bool functionalRead(Packet *pkt) {
     return false;
diff --git a/src/mem/ruby/protocol/RubySlicc_MemControl.sm b/src/mem/ruby/protocol/RubySlicc_MemControl.sm
index e8517a4a07..012b169dea 100644
--- a/src/mem/ruby/protocol/RubySlicc_MemControl.sm
+++ b/src/mem/ruby/protocol/RubySlicc_MemControl.sm
@@ -74,6 +74,8 @@ structure(MemoryMsg, desc="...", interface="Message") {
   PrefetchBit Prefetch,         desc="Is this a prefetch request";
   bool ReadX,                   desc="Exclusive";
   int Acks,                     desc="How many acks to expect";
+  bool isGLCSet,                desc="Bypass L1 Cache";
+  bool isSLCSet,                desc="Bypass L1 and L2 Caches";
 
   bool functionalRead(Packet *pkt) {
     if ((MessageSize == MessageSizeType:Response_Data) ||
diff --git a/src/mem/ruby/protocol/RubySlicc_Types.sm b/src/mem/ruby/protocol/RubySlicc_Types.sm
index 8d76f78f76..8ba9d935ff 100644
--- a/src/mem/ruby/protocol/RubySlicc_Types.sm
+++ b/src/mem/ruby/protocol/RubySlicc_Types.sm
@@ -177,6 +177,8 @@ structure(RubyRequest, desc="...", interface="Message", external="yes") {
   int htmTransactionUid,     desc="Used to identify the unique HTM transaction that produced this request";
   bool isTlbi,               desc="Memory request is a TLB shootdown (invalidation) operation";
   Addr tlbiTransactionUid,   desc="Unique identifier of the TLB shootdown operation that produced this request";
+  bool isGLCSet,             default="false",desc="If flag is set, bypass GPU L1 cache";
+  bool isSLCSet,             default="false",desc="If flag is set, bypass GPU L1 and L2 caches";
 
   RequestPtr getRequestPtr();
 }
diff --git a/src/mem/ruby/slicc_interface/RubyRequest.hh b/src/mem/ruby/slicc_interface/RubyRequest.hh
index 2345c224fb..89ce83451e 100644
--- a/src/mem/ruby/slicc_interface/RubyRequest.hh
+++ b/src/mem/ruby/slicc_interface/RubyRequest.hh
@@ -79,6 +79,11 @@ class RubyRequest : public Message
     bool m_isTlbi;
     // Should be uint64, but SLICC complains about casts
     Addr m_tlbiTransactionUid;
+    // GPU cache bypass flags. GLC bypasses L1 while SLC bypasses both L1 and
+    // L2 if set to true. They are set to false by default and they must be
+    // explicitly set to true in the program in order to bypass caches
+    bool m_isGLCSet;
+    bool m_isSLCSet;
 
     RubyRequest(Tick curTime, uint64_t _paddr, int _len,
         uint64_t _pc, RubyRequestType _type, RubyAccessMode _access_mode,
@@ -99,6 +104,13 @@ class RubyRequest : public Message
           m_tlbiTransactionUid(0)
     {
         m_LineAddress = makeLineAddress(m_PhysicalAddress);
+        if (_pkt) {
+            m_isGLCSet = m_pkt->req->isGLCSet();
+            m_isSLCSet = m_pkt->req->isSLCSet();
+        } else {
+            m_isGLCSet = 0;
+            m_isSLCSet = 0;
+        }
     }
 
     /** RubyRequest for memory management commands */
@@ -120,6 +132,13 @@ class RubyRequest : public Message
           m_tlbiTransactionUid(0)
     {
         assert(m_pkt->req->isMemMgmt());
+        if (_pkt) {
+            m_isGLCSet = m_pkt->req->isGLCSet();
+            m_isSLCSet = m_pkt->req->isSLCSet();
+        } else {
+            m_isGLCSet = 0;
+            m_isSLCSet = 0;
+        }
     }
 
     RubyRequest(Tick curTime, uint64_t _paddr, int _len,
@@ -148,6 +167,13 @@ class RubyRequest : public Message
           m_tlbiTransactionUid(0)
     {
         m_LineAddress = makeLineAddress(m_PhysicalAddress);
+        if (_pkt) {
+            m_isGLCSet = m_pkt->req->isGLCSet();
+            m_isSLCSet = m_pkt->req->isSLCSet();
+        } else {
+            m_isGLCSet = 0;
+            m_isSLCSet = 0;
+        }
     }
 
     RubyRequest(Tick curTime, uint64_t _paddr, int _len,
@@ -177,6 +203,14 @@ class RubyRequest : public Message
           m_tlbiTransactionUid(0)
     {
         m_LineAddress = makeLineAddress(m_PhysicalAddress);
+        if (_pkt) {
+            m_isGLCSet = m_pkt->req->isGLCSet();
+            m_isSLCSet = m_pkt->req->isSLCSet();
+
+        } else {
+            m_isGLCSet = 0;
+            m_isSLCSet = 0;
+        }
     }
 
     RubyRequest(Tick curTime) : Message(curTime) {}

From 28a871b0374fe6a38fa1738570ebc139e950cf7e Mon Sep 17 00:00:00 2001
From: "Bobby R. Bruce" <bbruce@ucdavis.edu>
Date: Fri, 30 Dec 2022 20:11:43 +0000
Subject: [PATCH 078/492] scons: Re-add -Werror for gem5 develop branch

This is removed from the stable branch to avoid build errors but should
included on the develop branch to aid developers.

This reverts commit 7dd61c865975862b099e1af5e867083ac9307d9b.

Change-Id: I1fe249ce87aa8d70c1f092fc7db1554e6aee7355
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67052
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
---
 SConstruct | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/SConstruct b/SConstruct
index e8107ea2c7..bd26e4552e 100755
--- a/SConstruct
+++ b/SConstruct
@@ -420,6 +420,14 @@ for variant_path in variant_paths:
                     conf.CheckLinkFlag('-Wl,--threads')
                     conf.CheckLinkFlag(
                             '-Wl,--thread-count=%d' % GetOption('num_jobs'))
+
+        # Treat warnings as errors but white list some warnings that we
+        # want to allow (e.g., deprecation warnings).
+        env.Append(CCFLAGS=['-Werror',
+                             '-Wno-error=deprecated-declarations',
+                             '-Wno-error=deprecated',
+                            ])
+
     else:
         error('\n'.join((
               "Don't know what compiler options to use for your compiler.",

From bbeec2d758606470436d972b0ac8d04484000fa8 Mon Sep 17 00:00:00 2001
From: "Bobby R. Bruce" <bbruce@ucdavis.edu>
Date: Fri, 30 Dec 2022 20:28:55 +0000
Subject: [PATCH 079/492] misc: Update version info for develop branch

Change-Id: Icd409acda0e88852938b2af9f170e2a410e91f8c
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67053
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 ext/sst/README.md                       |  4 +-
 ext/testlib/configuration.py            |  2 +-
 src/Doxyfile                            |  2 +-
 src/base/version.cc                     |  2 +-
 src/python/gem5/resources/downloader.py |  2 +-
 tests/compiler-tests.sh                 |  2 +-
 tests/jenkins/presubmit.sh              |  4 +-
 tests/nightly.sh                        |  2 +-
 tests/weekly.sh                         |  2 +-
 util/dockerfiles/docker-compose.yaml    | 50 ++++++++++++-------------
 util/dockerfiles/gcn-gpu/Dockerfile     |  2 +-
 11 files changed, 37 insertions(+), 37 deletions(-)

diff --git a/ext/sst/README.md b/ext/sst/README.md
index 49f56349bb..1f37cb4c44 100644
--- a/ext/sst/README.md
+++ b/ext/sst/README.md
@@ -62,7 +62,7 @@ See `INSTALL.md`.
 Downloading the built bootloader containing a Linux Kernel and a workload,
 
 ```sh
-wget http://dist.gem5.org/dist/v22-1/misc/riscv/bbl-busybox-boot-exit
+wget http://dist.gem5.org/dist/develop/misc/riscv/bbl-busybox-boot-exit
 ```
 
 Running the simulation
@@ -87,7 +87,7 @@ extract them under the $M5_PATH directory (make sure M5_PATH points to a valid
 directory):
 
 ```sh
-wget http://dist.gem5.org/dist/v22-1/arm/aarch-sst-20211207.tar.bz2
+wget http://dist.gem5.org/dist/develop/arm/aarch-sst-20211207.tar.bz2
 tar -xf aarch-sst-20211207.tar.bz2
 
 # copying bootloaders
diff --git a/ext/testlib/configuration.py b/ext/testlib/configuration.py
index 97c637687d..fd47e3b33a 100644
--- a/ext/testlib/configuration.py
+++ b/ext/testlib/configuration.py
@@ -213,7 +213,7 @@ def define_defaults(defaults):
                                                       os.pardir,
                                                       os.pardir))
     defaults.result_path = os.path.join(os.getcwd(), 'testing-results')
-    defaults.resource_url = 'http://dist.gem5.org/dist/v22-1'
+    defaults.resource_url = 'http://dist.gem5.org/dist/develop'
     defaults.resource_path = os.path.abspath(os.path.join(defaults.base_dir,
                                             'tests',
                                             'gem5',
diff --git a/src/Doxyfile b/src/Doxyfile
index 4d14b7ccb7..24d70bbc55 100644
--- a/src/Doxyfile
+++ b/src/Doxyfile
@@ -31,7 +31,7 @@ PROJECT_NAME           = gem5
 # This could be handy for archiving the generated documentation or
 # if some version control system is used.
 
-PROJECT_NUMBER         = v22.1.0.0
+PROJECT_NUMBER         = [DEVELOP-FOR-23.0]
 
 # The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute)
 # base path where the generated documentation will be put.
diff --git a/src/base/version.cc b/src/base/version.cc
index 050aea091f..8131a3197e 100644
--- a/src/base/version.cc
+++ b/src/base/version.cc
@@ -32,6 +32,6 @@ namespace gem5
 /**
  * @ingroup api_base_utils
  */
-const char *gem5Version = "22.1.0.0";
+const char *gem5Version = "[DEVELOP-FOR-23.0]";
 
 } // namespace gem5
diff --git a/src/python/gem5/resources/downloader.py b/src/python/gem5/resources/downloader.py
index f619b9771d..1fda8d86b6 100644
--- a/src/python/gem5/resources/downloader.py
+++ b/src/python/gem5/resources/downloader.py
@@ -55,7 +55,7 @@ def _resources_json_version_required() -> str:
     """
     Specifies the version of resources.json to obtain.
     """
-    return "22.1"
+    return "develop"
 
 
 def _get_resources_json_uri() -> str:
diff --git a/tests/compiler-tests.sh b/tests/compiler-tests.sh
index 044ceb2adc..f5d4bb189f 100755
--- a/tests/compiler-tests.sh
+++ b/tests/compiler-tests.sh
@@ -114,7 +114,7 @@ for compiler in ${images[@]}; do
     # targets for this test
     build_indices=(${build_permutation[@]:0:$builds_count})
 
-    repo_name="${base_url}/${compiler}:v22-1"
+    repo_name="${base_url}/${compiler}:latest"
 
     # Grab compiler image
     docker pull $repo_name >/dev/null
diff --git a/tests/jenkins/presubmit.sh b/tests/jenkins/presubmit.sh
index 36da3facd1..91eb95f81b 100755
--- a/tests/jenkins/presubmit.sh
+++ b/tests/jenkins/presubmit.sh
@@ -37,8 +37,8 @@
 
 set -e
 
-DOCKER_IMAGE_ALL_DEP=gcr.io/gem5-test/ubuntu-22.04_all-dependencies:v22-1
-DOCKER_IMAGE_CLANG_COMPILE=gcr.io/gem5-test/clang-version-14:v22-1
+DOCKER_IMAGE_ALL_DEP=gcr.io/gem5-test/ubuntu-22.04_all-dependencies:latest
+DOCKER_IMAGE_CLANG_COMPILE=gcr.io/gem5-test/clang-version-14:latest
 PRESUBMIT_STAGE2=tests/jenkins/presubmit-stage2.sh
 GEM5ART_TESTS=tests/jenkins/gem5art-tests.sh
 
diff --git a/tests/nightly.sh b/tests/nightly.sh
index bf05154fe7..1360c4435c 100755
--- a/tests/nightly.sh
+++ b/tests/nightly.sh
@@ -37,7 +37,7 @@ docker_mem_limit="18g"
 
 # The docker tag to use (varies between develop, and versions on the staging
 # branch)
-tag="v22-1"
+tag="latest"
 
 # The first argument is the number of threads to be used for compilation. If no
 # argument is given we default to one.
diff --git a/tests/weekly.sh b/tests/weekly.sh
index 9b400b9a83..c7f834b7a5 100755
--- a/tests/weekly.sh
+++ b/tests/weekly.sh
@@ -37,7 +37,7 @@ docker_mem_limit="24g"
 
 # The docker tag to use (varies between develop, and versions on the staging
 # branch)
-tag="v22-1"
+tag="latest"
 
 # We assume the first two arguments are the number of threads followed by the
 # GPU ISA to test. These default to 1 and GCN3_X86 is no argument is given.
diff --git a/util/dockerfiles/docker-compose.yaml b/util/dockerfiles/docker-compose.yaml
index 004052283a..39579962b1 100644
--- a/util/dockerfiles/docker-compose.yaml
+++ b/util/dockerfiles/docker-compose.yaml
@@ -5,154 +5,154 @@ services:
         build:
             context: gcn-gpu
             dockerfile: Dockerfile
-        image: gcr.io/gem5-test/gcn-gpu:v22-1
+        image: gcr.io/gem5-test/gcn-gpu:latest
     gpu-fs:
         build:
             context: gpu-fs
             dockerfile: Dockerfile
-        image: gcr.io/gem5-test/gpu-fs:v22-1
+        image: gcr.io/gem5-test/gpu-fs:latest
     sst:
         build:
             context: sst-11.1.0
             dockerfile: Dockerfile
-        image: gcr.io/gem5-test/sst-env:v22-1
+        image: gcr.io/gem5-test/sst-env:latest
     systemc:
         build:
             context: systemc-2.3.3
             dockerfile: Dockerfile
-        image: gcr.io/gem5-test/systemc-env:v22-1
+        image: gcr.io/gem5-test/systemc-env:latest
     ubuntu-18.04_all-dependencies:
         build:
             context: ubuntu-18.04_all-dependencies
             dockerfile: Dockerfile
-        image: gcr.io/gem5-test/ubuntu-18.04_all-dependencies:v22-1
+        image: gcr.io/gem5-test/ubuntu-18.04_all-dependencies:latest
     ubuntu-20.04_all-dependencies:
         build:
             context: ubuntu-20.04_all-dependencies
             dockerfile: Dockerfile
-        image: gcr.io/gem5-test/ubuntu-20.04_all-dependencies:v22-1
+        image: gcr.io/gem5-test/ubuntu-20.04_all-dependencies:latest
     ubuntu-22.04_all-dependencies:
         build:
             context: ubuntu-22.04_all-dependencies
             dockerfile: Dockerfile
-        image: gcr.io/gem5-test/ubuntu-22.04_all-dependencies:v22-1
+        image: gcr.io/gem5-test/ubuntu-22.04_all-dependencies:latest
     ubuntu-22.04_min-dependencies:
         build:
             context: ubuntu-22.04_min-dependencies
             dockerfile: Dockerfile
-        image: gcr.io/gem5-test/ubuntu-22.04_min-dependencies:v22-1
+        image: gcr.io/gem5-test/ubuntu-22.04_min-dependencies:latest
     gcc-7:
         build:
             context: ubuntu-18.04_gcc-version
             dockerfile: Dockerfile
             args:
                 - version=7
-        image: gcr.io/gem5-test/gcc-version-7:v22-1
+        image: gcr.io/gem5-test/gcc-version-7:latest
     gcc-8:
         build:
             context: ubuntu-18.04_gcc-version
             dockerfile: Dockerfile
             args:
                 - version=8
-        image: gcr.io/gem5-test/gcc-version-8:v22-1
+        image: gcr.io/gem5-test/gcc-version-8:latest
     gcc-9:
         build:
             context: ubuntu-20.04_gcc-version
             dockerfile: Dockerfile
             args:
                 - version=9
-        image: gcr.io/gem5-test/gcc-version-9:v22-1
+        image: gcr.io/gem5-test/gcc-version-9:latest
     gcc-10:
         build:
             context: ubuntu-20.04_gcc-version
             dockerfile: Dockerfile
             args:
                 - version=10
-        image: gcr.io/gem5-test/gcc-version-10:v22-1
+        image: gcr.io/gem5-test/gcc-version-10:latest
     gcc-11:
         build:
             context: ubuntu-22.04_gcc-version
             dockerfile: Dockerfile
             args:
                 - version=11
-        image: gcr.io/gem5-test/gcc-version-11:v22-1
+        image: gcr.io/gem5-test/gcc-version-11:latest
     gcc-12:
         build:
             context: ubuntu-22.04_gcc-version
             dockerfile: Dockerfile
             args:
                 - version=12
-        image: gcr.io/gem5-test/gcc-version-12:v22-1
+        image: gcr.io/gem5-test/gcc-version-12:latest
     clang-6:
         build:
             context: ubuntu-18.04_clang-version
             dockerfile: Dockerfile
             args:
                 - version=6.0
-        image: gcr.io/gem5-test/clang-version-6.0:v22-1
+        image: gcr.io/gem5-test/clang-version-6.0:latest
     clang-7:
         build:
             context: ubuntu-18.04_clang-version
             dockerfile: Dockerfile
             args:
                 - version=7
-        image: gcr.io/gem5-test/clang-version-7:v22-1
+        image: gcr.io/gem5-test/clang-version-7:latest
     clang-8:
         build:
             context: ubuntu-18.04_clang-version
             dockerfile: Dockerfile
             args:
                 - version=8
-        image: gcr.io/gem5-test/clang-version-8:v22-1
+        image: gcr.io/gem5-test/clang-version-8:latest
     clang-9:
         build:
             context: ubuntu-18.04_clang-version
             dockerfile: Dockerfile
             args:
                 - version=9
-        image: gcr.io/gem5-test/clang-version-9:v22-1
+        image: gcr.io/gem5-test/clang-version-9:latest
     clang-10:
         build:
             context: ubuntu-20.04_clang-version
             dockerfile: Dockerfile
             args:
                 - version=10
-        image: gcr.io/gem5-test/clang-version-10:v22-1
+        image: gcr.io/gem5-test/clang-version-10:latest
     clang-11:
         build:
             context: ubuntu-20.04_clang-version
             dockerfile: Dockerfile
             args:
                 - version=11
-        image: gcr.io/gem5-test/clang-version-11:v22-1
+        image: gcr.io/gem5-test/clang-version-11:latest
     clang-12:
         build:
             context: ubuntu-20.04_clang-version
             dockerfile: Dockerfile
             args:
                 - version=12
-        image: gcr.io/gem5-test/clang-version-12:v22-1
+        image: gcr.io/gem5-test/clang-version-12:latest
     clang-13:
         build:
             context: ubuntu-22.04_clang-version
             dockerfile: Dockerfile
             args:
                 - version=13
-        image: gcr.io/gem5-test/clang-version-13:v22-1
+        image: gcr.io/gem5-test/clang-version-13:latest
     clang-14:
         build:
             context: ubuntu-22.04_clang-version
             dockerfile: Dockerfile
             args:
                 - version=14
-        image: gcr.io/gem5-test/clang-version-14:v22-1
+        image: gcr.io/gem5-test/clang-version-14:latest
     llvm-gnu-cross-compiler-riscv64:
         build:
             context: llvm-gnu-cross-compiler-riscv64
             dockerfile: Dockerfile
-        image: gcr.io/gem5-test/llvm-gnu-cross-compiler-riscv64:v22-1
+        image: gcr.io/gem5-test/llvm-gnu-cross-compiler-riscv64:latest
     gem5-all-min-dependencies:
         build:
             context: gem5-all-min-dependencies
             dockerfile: Dockerfile
-        image: gcr.io/gem5-test/gem5-all-min-dependencies:v22-1
+        image: gcr.io/gem5-test/gem5-all-min-dependencies:latest
diff --git a/util/dockerfiles/gcn-gpu/Dockerfile b/util/dockerfiles/gcn-gpu/Dockerfile
index dfff455079..c5db8963a8 100644
--- a/util/dockerfiles/gcn-gpu/Dockerfile
+++ b/util/dockerfiles/gcn-gpu/Dockerfile
@@ -69,7 +69,7 @@ RUN git clone -b rocm-4.0.0 \
 
 WORKDIR /ROCclr
 # The patch allows us to avoid building blit kernels on-the-fly in gem5
-RUN wget -q -O - dist.gem5.org/dist/v22-1/rocm_patches/ROCclr.patch | git apply -v
+RUN wget -q -O - dist.gem5.org/dist/develop/rocm_patches/ROCclr.patch | git apply -v
 
 WORKDIR /ROCclr/build
 RUN cmake -DOPENCL_DIR="/ROCm-OpenCL-Runtime" \

From f99a3c1f96bb4a56cbb9b85d52829d606411649f Mon Sep 17 00:00:00 2001
From: Matthew Poremba <matthew.poremba@amd.com>
Date: Fri, 16 Dec 2022 13:23:01 -0800
Subject: [PATCH 080/492] arch-vega: Fix signed BFE instructions

The bitfield extract instructions come in unsigned and signed variants.
The documentation on this is not correct, however the GCN3 documentation
gives some clues. The instruction should extract an N-bit integer where
N is defined in a source operand starting at some bit also defined by a
source operand. For signed variants of this instruction, the N-bit
integer should be sign extended but is currently not.

This changeset does sign extension using the runtime value of N by ORing
the upper bits with ones if the most significant bit is one. This was
verified by writing these instructions in assembly and running on a real
GPU. Changes are made to v_bfe_i32, s_bfe_i32, and s_bfe_i64.

Change-Id: Ia192f5940200c6de48867b02f709a7f1b2daa974
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/66751
Maintainer: Matt Sinclair <mattdsinclair@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com>
---
 src/arch/amdgpu/vega/insts/instructions.cc | 30 ++++++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/src/arch/amdgpu/vega/insts/instructions.cc b/src/arch/amdgpu/vega/insts/instructions.cc
index f5b08b7ce1..c9e57bc2f7 100644
--- a/src/arch/amdgpu/vega/insts/instructions.cc
+++ b/src/arch/amdgpu/vega/insts/instructions.cc
@@ -1302,6 +1302,21 @@ namespace VegaISA
 
         sdst = (src0.rawData() >> bits(src1.rawData(), 4, 0))
             & ((1 << bits(src1.rawData(), 22, 16)) - 1);
+
+        // Above extracted a signed int of size src1[22:16] bits which needs
+        // to be signed-extended. Check if the MSB of our src1[22:16]-bit
+        // integer is 1, and sign extend it is.
+        //
+        // Note: The description in the Vega ISA manual does not mention to
+        // sign-extend the result. An update description can be found in the
+        // more recent RDNA3 manual here:
+        // https://developer.amd.com/wp-content/resources/
+        //      RDNA3_Shader_ISA_December2022.pdf
+        if (sdst.rawData() >> (bits(src1.rawData(), 22, 16) - 1)) {
+            sdst = sdst.rawData()
+                 | (0xffffffff << bits(src1.rawData(), 22, 16));
+        }
+
         scc = sdst.rawData() ? 1 : 0;
 
         sdst.write();
@@ -1373,6 +1388,14 @@ namespace VegaISA
 
         sdst = (src0.rawData() >> bits(src1.rawData(), 5, 0))
             & ((1 << bits(src1.rawData(), 22, 16)) - 1);
+
+        // Above extracted a signed int of size src1[22:16] bits which needs
+        // to be signed-extended. Check if the MSB of our src1[22:16]-bit
+        // integer is 1, and sign extend it is.
+        if (sdst.rawData() >> (bits(src1.rawData(), 22, 16) - 1)) {
+            sdst = sdst.rawData()
+                 | 0xffffffffffffffff << bits(src1.rawData(), 22, 16);
+        }
         scc = sdst.rawData() ? 1 : 0;
 
         sdst.write();
@@ -30544,6 +30567,13 @@ namespace VegaISA
             if (wf->execMask(lane)) {
                 vdst[lane] = (src0[lane] >> bits(src1[lane], 4, 0))
                     & ((1 << bits(src2[lane], 4, 0)) - 1);
+
+                // Above extracted a signed int of size src2 bits which needs
+                // to be signed-extended. Check if the MSB of our src2-bit
+                // integer is 1, and sign extend it is.
+                if (vdst[lane] >> (bits(src2[lane], 4, 0) - 1)) {
+                    vdst[lane] |= 0xffffffff << bits(src2[lane], 4, 0);
+                }
             }
         }
 

From bbdebc25daec686cc06e159c28de005ca5875f5b Mon Sep 17 00:00:00 2001
From: Matthew Poremba <matthew.poremba@amd.com>
Date: Fri, 16 Dec 2022 13:35:02 -0800
Subject: [PATCH 081/492] arch-vega: Fix several issues with DPP

DPP processing has several issues which are fixed in this changeset:

1) Incorrect comment is updated
2) newLane calculation for shift/rotate instructions is corrected
3) A copy of original data is made so that a copy of a copy is not made
4) Reset all booleans (OOB, zeroSrc, laneDisabled) after each lane
iteration

The shift, rotate, and broadcast variants were tested by implementing
them in assembly and running on silicon.

Change-Id: If86fbb26c87eaca4ef0587fd846978115858b168
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/66752
Maintainer: Matt Sinclair <mattdsinclair@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com>
---
 src/arch/amdgpu/vega/insts/inst_util.hh | 57 +++++++++++++++----------
 1 file changed, 34 insertions(+), 23 deletions(-)

diff --git a/src/arch/amdgpu/vega/insts/inst_util.hh b/src/arch/amdgpu/vega/insts/inst_util.hh
index 01925f9d07..7ec2e2ddd3 100644
--- a/src/arch/amdgpu/vega/insts/inst_util.hh
+++ b/src/arch/amdgpu/vega/insts/inst_util.hh
@@ -303,9 +303,9 @@ namespace VegaISA
      * Currently the values are:
      * 0x0 - 0xFF: full permute of four threads
      * 0x100: reserved
-     * 0x101 - 0x10F: row shift right by 1-15 threads
+     * 0x101 - 0x10F: row shift left by 1-15 threads
      * 0x111 - 0x11F: row shift right by 1-15 threads
-     * 0x121 - 0x12F: row shift right by 1-15 threads
+     * 0x121 - 0x12F: row rotate right by 1-15 threads
      * 0x130: wavefront left shift by 1 thread
      * 0x134: wavefront left rotate by 1 thread
      * 0x138: wavefront right shift by 1 thread
@@ -322,7 +322,8 @@ namespace VegaISA
         // newLane will be the same as the input lane unless swizzling happens
         int newLane = currLane;
         // for shift/rotate permutations; positive values are LEFT rotates
-        int count = 1;
+        // shift/rotate left means lane n -> lane n-1 (e.g., lane 1 -> lane 0)
+        int count = 0;
         int localRowOffset = rowOffset;
         int localRowNum = rowNum;
 
@@ -335,51 +336,47 @@ namespace VegaISA
             panic("ERROR: instruction using reserved DPP_CTRL value\n");
         } else if ((dppCtrl >= SQ_DPP_ROW_SL1) &&
                    (dppCtrl <= SQ_DPP_ROW_SL15)) { // DPP_ROW_SL{1:15}
-            count -= (dppCtrl - SQ_DPP_ROW_SL1 + 1);
+            count = (dppCtrl - SQ_DPP_ROW_SL1 + 1);
             if ((localRowOffset + count >= 0) &&
                 (localRowOffset + count < ROW_SIZE)) {
                 localRowOffset += count;
-                newLane = (rowNum | localRowOffset);
+                newLane = ((rowNum * ROW_SIZE) | localRowOffset);
             } else {
                 outOfBounds = true;
             }
         } else if ((dppCtrl >= SQ_DPP_ROW_SR1) &&
                    (dppCtrl <= SQ_DPP_ROW_SR15)) { // DPP_ROW_SR{1:15}
-            count -= (dppCtrl - SQ_DPP_ROW_SR1 + 1);
+            count = -(dppCtrl - SQ_DPP_ROW_SR1 + 1);
             if ((localRowOffset + count >= 0) &&
                 (localRowOffset + count < ROW_SIZE)) {
                 localRowOffset += count;
-                newLane = (rowNum | localRowOffset);
+                newLane = ((rowNum * ROW_SIZE) | localRowOffset);
             } else {
                 outOfBounds = true;
             }
         } else if ((dppCtrl >= SQ_DPP_ROW_RR1) &&
                    (dppCtrl <= SQ_DPP_ROW_RR15)) { // DPP_ROW_RR{1:15}
-            count -= (dppCtrl - SQ_DPP_ROW_RR1 + 1);
+            count = -(dppCtrl - SQ_DPP_ROW_RR1 + 1);
             localRowOffset = (localRowOffset + count + ROW_SIZE) % ROW_SIZE;
-            newLane = (rowNum | localRowOffset);
+            newLane = ((rowNum * ROW_SIZE) | localRowOffset);
         } else if (dppCtrl == SQ_DPP_WF_SL1) { // DPP_WF_SL1
-            count = 1;
             if ((currLane >= 0) && (currLane < NumVecElemPerVecReg)) {
-                newLane += count;
+                newLane += 1;
             } else {
                 outOfBounds = true;
             }
         } else if (dppCtrl == SQ_DPP_WF_RL1) { // DPP_WF_RL1
-            count = 1;
-            newLane = (currLane + count + NumVecElemPerVecReg) %
+            newLane = (currLane - 1 + NumVecElemPerVecReg) %
                       NumVecElemPerVecReg;
         } else if (dppCtrl == SQ_DPP_WF_SR1) { // DPP_WF_SR1
-            count = -1;
-            int currVal = (currLane + count);
+            int currVal = (currLane - 1);
             if ((currVal >= 0) && (currVal < NumVecElemPerVecReg)) {
-                newLane += count;
+                newLane -= 1;
             } else {
                 outOfBounds = true;
             }
         } else if (dppCtrl == SQ_DPP_WF_RR1) { // DPP_WF_RR1
-            count = -1;
-            newLane = (currLane + count + NumVecElemPerVecReg) %
+            newLane = (currLane - 1 + NumVecElemPerVecReg) %
                       NumVecElemPerVecReg;
         } else if (dppCtrl == SQ_DPP_ROW_MIRROR) { // DPP_ROW_MIRROR
             localRowOffset = (15 - localRowOffset);
@@ -392,12 +389,22 @@ namespace VegaISA
         } else if (dppCtrl == SQ_DPP_ROW_BCAST15) { // DPP_ROW_BCAST15
             count = 15;
             if (currLane > count) {
-                newLane = (currLane & ~count) - 1;
+                // 0x30 selects which set of 16 lanes to use. We broadcast the
+                // last lane of one set to all lanes of the next set (e.g.,
+                // lane 15 is written to 16-31, 31 to 32-47, 47 to 48-63).
+                newLane = (currLane & 0x30) - 1;
+            } else {
+                outOfBounds = true;
             }
         } else if (dppCtrl == SQ_DPP_ROW_BCAST31) { // DPP_ROW_BCAST31
             count = 31;
             if (currLane > count) {
-                newLane = (currLane & ~count) - 1;
+                // 0x20 selects either the upper 32 or lower 32 lanes and
+                // broadcasts the last lane of one set to all lanes of the
+                // next set (e.g., lane 31 is written to 32-63).
+                newLane = (currLane & 0x20) - 1;
+            } else {
+                outOfBounds = true;
             }
         } else {
             panic("Unimplemented DPP control operation: %d\n", dppCtrl);
@@ -443,6 +450,9 @@ namespace VegaISA
             src0.absModifier();
         }
 
+        // Need a copy of the original data since we update one lane at a time
+        T src0_copy = src0;
+
         // iterate over all register lanes, performing steps 2-4
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             threadValid = (0x1LL << lane);
@@ -458,7 +468,6 @@ namespace VegaISA
             if (((rowMask & (0x1 << rowNum)) == 0)   /* row mask */   ||
                 ((bankMask & (0x1 << bankNum)) == 0) /* bank mask */) {
                 laneDisabled = true;
-                continue;
             }
 
             /**
@@ -495,7 +504,7 @@ namespace VegaISA
                 } else {
                     threadValid = 0;
                 }
-            } else if (!gpuDynInst->exec_mask[lane]) {
+            } else if (!gpuDynInst->wavefront()->execMask(lane)) {
                 if (boundCtrl == 1) {
                     zeroSrc = true;
                 } else {
@@ -505,13 +514,15 @@ namespace VegaISA
 
             if (threadValid != 0 && !outOfBounds && !zeroSrc) {
                 assert(!laneDisabled);
-                src0[outLane] = src0[lane];
+                src0[lane] = src0_copy[outLane];
             } else if (zeroSrc) {
                 src0[lane] = 0;
             }
 
             // reset for next iteration
             laneDisabled = false;
+            outOfBounds = false;
+            zeroSrc = false;
         }
     }
 

From 64496338278f337e38190af97b9208bb9882495a Mon Sep 17 00:00:00 2001
From: Matthew Poremba <matthew.poremba@amd.com>
Date: Fri, 16 Dec 2022 13:39:24 -0800
Subject: [PATCH 082/492] arch-vega: Add DPP support for V_AND_B32

A DPP variant of V_AND_B32 was found in rocPRIM. With this changeset the
unit tests for rocPRIM scan_inclusive are passing.

Change-Id: I5a65f2cf6b56ac13609b191e3b3dfeb55e630942
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/66753
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com>
Maintainer: Matt Sinclair <mattdsinclair@gmail.com>
---
 src/arch/amdgpu/vega/insts/instructions.cc | 34 +++++++++++++++++++---
 1 file changed, 30 insertions(+), 4 deletions(-)

diff --git a/src/arch/amdgpu/vega/insts/instructions.cc b/src/arch/amdgpu/vega/insts/instructions.cc
index c9e57bc2f7..1f37ff14cc 100644
--- a/src/arch/amdgpu/vega/insts/instructions.cc
+++ b/src/arch/amdgpu/vega/insts/instructions.cc
@@ -6844,15 +6844,41 @@ namespace VegaISA
     {
         Wavefront *wf = gpuDynInst->wavefront();
         ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
-        ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
+        VecOperandU32 src1(gpuDynInst, instData.VSRC1);
         VecOperandU32 vdst(gpuDynInst, instData.VDST);
 
         src0.readSrc();
         src1.read();
 
-        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
-            if (wf->execMask(lane)) {
-                vdst[lane] = src0[lane] & src1[lane];
+        if (isDPPInst()) {
+            VecOperandU32 src0_dpp(gpuDynInst, extData.iFmt_VOP_DPP.SRC0);
+            src0_dpp.read();
+
+            DPRINTF(VEGA, "Handling V_AND_B32 SRC DPP. SRC0: register v[%d], "
+                    "DPP_CTRL: 0x%#x, SRC0_ABS: %d, SRC0_NEG: %d, "
+                    "SRC1_ABS: %d, SRC1_NEG: %d, BC: %d, "
+                    "BANK_MASK: %d, ROW_MASK: %d\n", extData.iFmt_VOP_DPP.SRC0,
+                    extData.iFmt_VOP_DPP.DPP_CTRL,
+                    extData.iFmt_VOP_DPP.SRC0_ABS,
+                    extData.iFmt_VOP_DPP.SRC0_NEG,
+                    extData.iFmt_VOP_DPP.SRC1_ABS,
+                    extData.iFmt_VOP_DPP.SRC1_NEG,
+                    extData.iFmt_VOP_DPP.BC,
+                    extData.iFmt_VOP_DPP.BANK_MASK,
+                    extData.iFmt_VOP_DPP.ROW_MASK);
+
+            processDPP(gpuDynInst, extData.iFmt_VOP_DPP, src0_dpp, src1);
+
+            for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
+                if (wf->execMask(lane)) {
+                    vdst[lane] = src0_dpp[lane] & src1[lane];
+                }
+            }
+        } else {
+            for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
+                if (wf->execMask(lane)) {
+                    vdst[lane] = src0[lane] & src1[lane];
+                }
             }
         }
 

From e392603d8126f8d7de0c31ae366dc668d90868ab Mon Sep 17 00:00:00 2001
From: Matthew Poremba <matthew.poremba@amd.com>
Date: Mon, 26 Dec 2022 09:08:25 -0800
Subject: [PATCH 083/492] arch-vega: Add missing operand size for
 ds_write2st64_b64

This instruction takes three operands (address, and two datas) but there
were only operand sizes for two operands tripping assert in default
case.

Change-Id: I3f505b6432aee5f3f265acac46b83c0c7daff3e7
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67071
Maintainer: Matt Sinclair <mattdsinclair@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com>
---
 src/arch/amdgpu/vega/insts/instructions.hh | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/arch/amdgpu/vega/insts/instructions.hh b/src/arch/amdgpu/vega/insts/instructions.hh
index 0671df8452..1c422481de 100644
--- a/src/arch/amdgpu/vega/insts/instructions.hh
+++ b/src/arch/amdgpu/vega/insts/instructions.hh
@@ -33553,7 +33553,9 @@ namespace VegaISA
             switch (opIdx) {
               case 0: //vgpr_a
                 return 4;
-              case 1: //vgpr_d1
+              case 1: //vgpr_d0
+                return 8;
+              case 2: //vgpr_d1
                 return 8;
               default:
                 fatal("op idx %i out of bounds\n", opIdx);

From 022a48f9f6e97d04a0f2ed955a174a0791ca2549 Mon Sep 17 00:00:00 2001
From: Matthew Poremba <matthew.poremba@amd.com>
Date: Mon, 26 Dec 2022 09:11:14 -0800
Subject: [PATCH 084/492] arch-vega: Implement ds_add_u32 atomic

This instruction does an atomic add of unsigned 32-bit data with a VGPR
and value in LDS atomically, without return.

Change-Id: I87579a94f6200a9a066f8f7390e57fb5fb6eff8e
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67072
Maintainer: Matt Sinclair <mattdsinclair@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com>
---
 src/arch/amdgpu/vega/insts/instructions.cc | 49 ++++++++++++++++++++--
 src/arch/amdgpu/vega/insts/instructions.hh |  2 +
 2 files changed, 48 insertions(+), 3 deletions(-)

diff --git a/src/arch/amdgpu/vega/insts/instructions.cc b/src/arch/amdgpu/vega/insts/instructions.cc
index 1f37ff14cc..afdfde3855 100644
--- a/src/arch/amdgpu/vega/insts/instructions.cc
+++ b/src/arch/amdgpu/vega/insts/instructions.cc
@@ -34071,6 +34071,10 @@ namespace VegaISA
     Inst_DS__DS_ADD_U32::Inst_DS__DS_ADD_U32(InFmt_DS *iFmt)
         : Inst_DS(iFmt, "ds_add_u32")
     {
+        setFlag(MemoryRef);
+        setFlag(GroupSegment);
+        setFlag(AtomicAdd);
+        setFlag(AtomicNoReturn);
     } // Inst_DS__DS_ADD_U32
 
     Inst_DS__DS_ADD_U32::~Inst_DS__DS_ADD_U32()
@@ -34079,14 +34083,53 @@ namespace VegaISA
 
     // --- description from .arch file ---
     // 32b:
-    // tmp = MEM[ADDR];
     // MEM[ADDR] += DATA;
-    // RETURN_DATA = tmp.
     void
     Inst_DS__DS_ADD_U32::execute(GPUDynInstPtr gpuDynInst)
     {
-        panicUnimplemented();
+        Wavefront *wf = gpuDynInst->wavefront();
+
+        if (gpuDynInst->exec_mask.none()) {
+            wf->decLGKMInstsIssued();
+            return;
+        }
+
+        gpuDynInst->execUnitId = wf->execUnitId;
+        gpuDynInst->latency.init(gpuDynInst->computeUnit());
+        gpuDynInst->latency.set(
+                gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
+        ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
+        ConstVecOperandU32 data(gpuDynInst, extData.DATA0);
+
+        addr.read();
+        data.read();
+
+        calcAddr(gpuDynInst, addr);
+
+        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
+            if (gpuDynInst->exec_mask[lane]) {
+                (reinterpret_cast<VecElemU32*>(gpuDynInst->a_data))[lane]
+                    = data[lane];
+            }
+        }
+
+        gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
     } // execute
+
+    void
+    Inst_DS__DS_ADD_U32::initiateAcc(GPUDynInstPtr gpuDynInst)
+    {
+        Addr offset0 = instData.OFFSET0;
+        Addr offset1 = instData.OFFSET1;
+        Addr offset = (offset1 << 8) | offset0;
+
+        initAtomicAccess<VecElemU32>(gpuDynInst, offset);
+    } // initiateAcc
+
+    void
+    Inst_DS__DS_ADD_U32::completeAcc(GPUDynInstPtr gpuDynInst)
+    {
+    } // completeAcc
     // --- Inst_DS__DS_SUB_U32 class methods ---
 
     Inst_DS__DS_SUB_U32::Inst_DS__DS_SUB_U32(InFmt_DS *iFmt)
diff --git a/src/arch/amdgpu/vega/insts/instructions.hh b/src/arch/amdgpu/vega/insts/instructions.hh
index 1c422481de..33be33ef31 100644
--- a/src/arch/amdgpu/vega/insts/instructions.hh
+++ b/src/arch/amdgpu/vega/insts/instructions.hh
@@ -31211,6 +31211,8 @@ namespace VegaISA
             }
         } // getOperandSize
 
+        void initiateAcc(GPUDynInstPtr gpuDynInst) override;
+        void completeAcc(GPUDynInstPtr gpuDynInst) override;
         void execute(GPUDynInstPtr) override;
     }; // Inst_DS__DS_ADD_U32
 

From 49ac00d060861b20c6715843e90e311bb5034323 Mon Sep 17 00:00:00 2001
From: Hoa Nguyen <hoanguyen@ucdavis.edu>
Date: Sat, 17 Dec 2022 09:02:50 +0000
Subject: [PATCH 085/492] stdlib: Fix errors in
 MESI_Three_Level_Cache_Hierarchy

Change-Id: I60ae47f4336cb1b54bcca3fce3bdd13858daa92a
Signed-off-by: Hoa Nguyen <hoanguyen@ucdavis.edu>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/66771
Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com>
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Maintainer: Matt Sinclair <mattdsinclair@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 .../caches/mesi_three_level/dma_controller.py | 22 ++++++++++++++-----
 .../ruby/caches/mesi_three_level/l1_cache.py  | 11 +++++-----
 .../ruby/caches/mesi_three_level/l2_cache.py  |  9 ++++++--
 .../ruby/caches/mesi_three_level/l3_cache.py  |  9 ++++----
 4 files changed, 32 insertions(+), 19 deletions(-)

diff --git a/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/dma_controller.py b/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/dma_controller.py
index ab76d4cb5e..f731869f54 100644
--- a/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/dma_controller.py
+++ b/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/dma_controller.py
@@ -25,16 +25,26 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 from ......utils.override import overrides
-from ..abstract_dma_controller import AbstractDMAController
 
-from m5.objects import MessageBuffer
+from m5.objects import MessageBuffer, DMA_Controller
 
 
-class DMAController(AbstractDMAController):
-    def __init__(self, network, cache_line_size):
-        super().__init__(network, cache_line_size)
+class DMAController(DMA_Controller):
+    _version = 0
+
+    @classmethod
+    def _get_version(cls):
+        cls._version += 1
+        return cls._version - 1
+
+    def __init__(self, dma_sequencer, ruby_system):
+        super().__init__(
+            version=self._get_version(),
+            dma_sequencer=dma_sequencer,
+            ruby_system=ruby_system,
+        )
+        self.connectQueues(self.ruby_system.network)
 
-    @overrides(AbstractDMAController)
     def connectQueues(self, network):
         self.mandatoryQueue = MessageBuffer()
         self.responseFromDir = MessageBuffer(ordered=True)
diff --git a/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/l1_cache.py b/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/l1_cache.py
index 2ce13d3b08..9f47e411f8 100644
--- a/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/l1_cache.py
+++ b/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/l1_cache.py
@@ -68,14 +68,14 @@ class L1Cache(L0Cache_Controller):
         self.Icache = RubyCache(
             size=l1i_size,
             assoc=l1i_assoc,
-            start_index_bit=self.getBlockSizeBits(),
+            start_index_bit=self.getBlockSizeBits(cache_line_size.value),
             is_icache=True,
             replacement_policy=LRURP(),
         )
         self.Dcache = RubyCache(
             size=l1d_size,
             assoc=l1d_assoc,
-            start_index_bit=self.getBlockSizeBits(),
+            start_index_bit=self.getBlockSizeBits(cache_line_size.value),
             is_icache=False,
             replacement_policy=LRURP(),
         )
@@ -88,12 +88,11 @@ class L1Cache(L0Cache_Controller):
         self.response_latency = 2
 
         self.version = self.versionCount()
-        self._cache_line_size = cache_line_size
         self.connectQueues(network)
 
-    def getBlockSizeBits(self):
-        bits = int(math.log(self._cache_line_size, 2))
-        if 2**bits != self._cache_line_size.value:
+    def getBlockSizeBits(self, cache_line_size):
+        bits = int(math.log(cache_line_size, 2))
+        if 2**bits != cache_line_size:
             raise Exception("Cache line size is not a power of 2!")
         return bits
 
diff --git a/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/l2_cache.py b/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/l2_cache.py
index e29f566191..d8c965924e 100644
--- a/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/l2_cache.py
+++ b/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/l2_cache.py
@@ -67,7 +67,7 @@ class L2Cache(L1Cache_Controller):
         self.cache = RubyCache(
             size=l2_size,
             assoc=l2_assoc,
-            start_index_bit=self.getBlockSizeBits(),
+            start_index_bit=self.getBlockSizeBits(cache_line_size.value),
             is_icache=False,
         )
         # l2_select_num_bits is ruby backend terminology.
@@ -86,9 +86,14 @@ class L2Cache(L1Cache_Controller):
         self.to_l2_latency = 1
 
         self.version = self.versionCount()
-        self._cache_line_size = cache_line_size
         self.connectQueues(network)
 
+    def getBlockSizeBits(self, cache_line_size):
+        bits = int(math.log(cache_line_size, 2))
+        if 2**bits != cache_line_size:
+            raise Exception("Cache line size is not a power of 2!")
+        return bits
+
     def connectQueues(self, network):
         self.mandatoryQueue = MessageBuffer()
         self.optionalQueue = MessageBuffer()
diff --git a/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/l3_cache.py b/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/l3_cache.py
index 6d46d1fdf0..0a93d9b0c8 100644
--- a/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/l3_cache.py
+++ b/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/l3_cache.py
@@ -54,7 +54,7 @@ class L3Cache(L2Cache_Controller):
         self.L2cache = RubyCache(
             size=l3_size,
             assoc=l3_assoc,
-            start_index_bit=self.getIndexBit(num_l3Caches),
+            start_index_bit=self.getIndexBit(num_l3Caches, cache_line_size),
         )
 
         self.transitions_per_cycle = 4
@@ -64,12 +64,11 @@ class L3Cache(L2Cache_Controller):
         self.to_l1_latency = 1
 
         self.version = self.versionCount()
-        self._cache_line_size = cache_line_size
         self.connectQueues(network)
 
-    def getIndexBit(self, num_l3caches):
-        l3_bits = int(math.log(num_l3caches, 2))
-        bits = int(math.log(self._cache_line_size, 2)) + l3_bits
+    def getIndexBit(self, num_l3Caches, cache_line_size):
+        l3_bits = int(math.log(num_l3Caches, 2))
+        bits = int(math.log(cache_line_size, 2)) + l3_bits
         return bits
 
     def connectQueues(self, network):

From 535727703945f44b08ab251bdc243a54c63cf51f Mon Sep 17 00:00:00 2001
From: Gabriel Busnot <gabriel.busnot@arteris.com>
Date: Tue, 3 Jan 2023 15:33:44 +0000
Subject: [PATCH 086/492] ext-testlib: Improve error reporting when test
 definition fails

The error reason is now reported as an element in the XML testing result
summary.

Change-Id: I18b84422bb9580709cf1c5f2a14a5cbb0caf1876
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/66892
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 ext/testlib/result.py | 31 +++++++++++++++++++++----------
 1 file changed, 21 insertions(+), 10 deletions(-)

diff --git a/ext/testlib/result.py b/ext/testlib/result.py
index 5c60342c11..786febde2a 100644
--- a/ext/testlib/result.py
+++ b/ext/testlib/result.py
@@ -191,17 +191,23 @@ class XMLElement(object):
     def begin(self, file_):
         file_.write('<')
         file_.write(self.name)
-        for attr in self.attributes:
-            file_.write(' ')
-            attr.write(file_)
+        if hasattr(self, 'attributes'):
+            for attr in self.attributes:
+                file_.write(' ')
+                attr.write(file_)
         file_.write('>')
 
         self.body(file_)
 
     def body(self, file_):
-        for elem in self.elements:
-            file_.write('\n')
-            elem.write(file_)
+        if hasattr(self, 'elements'):
+            for elem in self.elements:
+                file_.write('\n')
+                elem.write(file_)
+        if hasattr(self, 'content'):
+                file_.write('\n')
+                file_.write(
+                    xml.sax.saxutils.escape(self.content))
         file_.write('\n')
 
     def end(self, file_):
@@ -286,17 +292,22 @@ class JUnitTestCase(XMLElement):
         ]
 
         if str(test_result.result) == 'Failed':
-            self.elements.append(JUnitFailure('Test failed', 'ERROR'))
+            self.elements.append(JUnitFailure(
+                'Test failed',
+                str(test_result.result.reason))
+            )
 
 
 class JUnitFailure(XMLElement):
     name = 'failure'
-    def __init__(self, message, fail_type):
+    def __init__(self, message, cause):
         self.attributes = [
             XMLAttribute('message', message),
-            XMLAttribute('type', fail_type),
         ]
-        self.elements = []
+        cause_element = XMLElement()
+        cause_element.name = 'cause'
+        cause_element.content = cause
+        self.elements = [cause_element]
 
 
 class LargeFileElement(XMLElement):

From 313f557b932786a426f6f613c111005f507f1b24 Mon Sep 17 00:00:00 2001
From: Gabriel Busnot <gabriel.busnot@arteris.com>
Date: Tue, 3 Jan 2023 15:37:38 +0000
Subject: [PATCH 087/492] ext-testlib: Support str-convertible args in
 gem5_verify_config

gem5_verify_config dit not support string-convertible args due to log_call()
not trying to call str() on them. This patch maps str() on the command
paramters.

It is now possible to pass native integers or even string-like types like
pathlib.Path as arguments without manually converting them to string.

Change-Id: Ifa987f5f1a20f17c8710e1a36d99d424e4c9ce6c
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/66893
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 ext/testlib/helper.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/ext/testlib/helper.py b/ext/testlib/helper.py
index ed6e325158..ea102f262b 100644
--- a/ext/testlib/helper.py
+++ b/ext/testlib/helper.py
@@ -149,7 +149,14 @@ def log_call(logger, command, time, *popenargs, **kwargs):
     if isinstance(command, str):
         cmdstr = command
     else:
-        cmdstr = ' '.join(command)
+        try:
+            command = list(map(str, command))
+            cmdstr = " ".join(command)
+        except TypeError as e:
+            logger.trace(
+                "Argument <command> must be an iterable of string-convertible types"
+            )
+            raise e
 
     logger_callback = logger.trace
     logger.trace('Logging call to command: %s' % cmdstr)

From 15cb9c7abef94d53135351422284c8651ce0133b Mon Sep 17 00:00:00 2001
From: Simon Park <seminpark@google.com>
Date: Tue, 3 Jan 2023 01:07:03 -0800
Subject: [PATCH 088/492] base: socket: add UnixSocketAddr for representing
 socket paths

Added UnixSocketAddr that wraps around sockaddr_un. Using this
wrapper, users can create both file based sockets as well as
abstract sockets.

Change-Id: Ibf105b92a6a6ac7fc9136ed307f824c83e45c06c
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/66471
Maintainer: Gabe Black <gabe.black@gmail.com>
Reviewed-by: Gabe Black <gabe.black@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/base/SConscript             |  3 +-
 src/base/socket.cc              | 66 ++++++++++++++++++++++++++++
 src/base/socket.hh              | 34 +++++++++++++++
 src/base/socket.test.cc         | 77 +++++++++++++++++++++++++++++++++
 src/base/str.hh                 |  9 ++++
 src/mem/shared_memory_server.cc | 56 +++++++++++-------------
 src/mem/shared_memory_server.hh |  3 +-
 7 files changed, 216 insertions(+), 32 deletions(-)

diff --git a/src/base/SConscript b/src/base/SConscript
index e751d0b5ef..4a6b65fa72 100644
--- a/src/base/SConscript
+++ b/src/base/SConscript
@@ -68,7 +68,8 @@ Source('pollevent.cc')
 Source('random.cc')
 Source('remote_gdb.cc')
 Source('socket.cc')
-GTest('socket.test', 'socket.test.cc', 'socket.cc')
+SourceLib('z', tags='socket_test')
+GTest('socket.test', 'socket.test.cc', 'socket.cc', 'output.cc', with_tag('socket_test'))
 Source('statistics.cc')
 Source('str.cc', add_tags=['gem5 trace', 'gem5 serialize'])
 GTest('str.test', 'str.test.cc', 'str.cc')
diff --git a/src/base/socket.cc b/src/base/socket.cc
index 5cf67fdd90..23f2b40d1a 100644
--- a/src/base/socket.cc
+++ b/src/base/socket.cc
@@ -35,22 +35,88 @@
 #include <netinet/tcp.h>
 #include <sys/socket.h>
 #include <sys/types.h>
+#include <sys/un.h>
 #include <unistd.h>
 
 #include <cerrno>
 
 #include "base/logging.hh"
+#include "base/output.hh"
+#include "base/str.hh"
 #include "base/types.hh"
 #include "sim/byteswap.hh"
 
 namespace gem5
 {
+namespace
+{
+
+bool
+isSocketNameAbstract(const std::string &path)
+{
+    if (path.empty()) {
+        return false;
+    }
+    // No null byte should be present in the path
+    return path.front() == '@';
+}
+
+std::string
+resolve(const std::string &path)
+{
+    if (path.empty()) {
+        return path;
+    }
+    if (isSocketNameAbstract(path)) {
+        return '\0' + path.substr(1);
+    }
+    return simout.resolve(path);
+}
+
+}  // namespace
 
 bool ListenSocket::listeningDisabled = false;
 bool ListenSocket::anyListening = false;
 
 bool ListenSocket::bindToLoopback = false;
 
+UnixSocketAddr
+UnixSocketAddr::build(const std::string &path)
+{
+    sockaddr_un addr = {.sun_family = AF_UNIX, .sun_path = {}};
+
+    const bool is_abstract = isSocketNameAbstract(path);
+    size_t max_len = sizeof(addr.sun_path);
+    if (!is_abstract) {
+        // File based socket names need to be null terminated
+        max_len -= 1;
+    }
+
+    std::string resolved_path = resolve(path);
+    std::string fmt_path = replace(resolved_path, '\0', '@');
+    if (resolved_path.size() > max_len) {
+        resolved_path = resolved_path.substr(0, max_len);
+        const std::string untruncated_path = std::move(fmt_path);
+        fmt_path = replace(resolved_path, '\0', '@');
+        warn("SocketPath: unix socket path truncated from '%s' to '%s'",
+             untruncated_path, fmt_path);
+    }
+
+    // We can't use strncpy here, since abstract sockets start with \0 which
+    // will make strncpy think that the string is empty.
+    memcpy(addr.sun_path, resolved_path.c_str(), resolved_path.size());
+    // We can't use sizeof(sockaddr_un) for abstract sockets, since all
+    // sizeof(sun_path) bytes are used in representing the path.
+    const size_t path_size =
+        is_abstract ? resolved_path.size() : sizeof(addr.sun_path);
+    const size_t addr_size = offsetof(sockaddr_un, sun_path) + path_size;
+
+    return UnixSocketAddr{.addr = std::move(addr),
+                          .addrSize = addr_size,
+                          .isAbstract = is_abstract,
+                          .formattedPath = std::move(fmt_path)};
+}
+
 void
 ListenSocket::cleanup()
 {
diff --git a/src/base/socket.hh b/src/base/socket.hh
index 3375ccc80a..f3b2760518 100644
--- a/src/base/socket.hh
+++ b/src/base/socket.hh
@@ -31,10 +31,44 @@
 
 #include <sys/socket.h>
 #include <sys/types.h>
+#include <sys/un.h>
+
+#include <string>
 
 namespace gem5
 {
 
+/**
+ * @brief Wrapper around sockaddr_un, so that it can be used for both file
+ * based unix sockets as well as abstract unix sockets.
+ */
+struct UnixSocketAddr
+{
+    /**
+     * @brief Builds UnixSocketAddr from the given path.
+     * @pre: `path` either represents a file based unix socket, or an abstract
+     *       unix socket. If `path` represents an abstract socket, it should
+     *       start with the character '@', and it should not have any null
+     *       bytes in the name.
+     * @param path: Pathname, where the socket should be instantiated.
+     * @return UnixSocketAddr
+     */
+    static UnixSocketAddr build(const std::string &path);
+
+    sockaddr_un addr;
+    // Size of `sockaddr_un addr`. This is equal to sizeof(sockaddr_un) if
+    // `addr` represents a normal file based unix socket. For abstract sockets
+    // however, the size could be different. Because all sizeof(sun_path) is
+    // used to represent the name of an abstract socket, addrSize for abstract
+    // sockets only count the number of characters actually used by sun_path,
+    // excluding any trailing null bytes.
+    size_t addrSize;
+    bool isAbstract;
+    // Formatted string for file based sockets look the same as addr.sun_path.
+    // For abstract sockets however, all null bytes are replaced with @
+    std::string formattedPath;
+};
+
 class ListenSocket
 {
   protected:
diff --git a/src/base/socket.test.cc b/src/base/socket.test.cc
index a267f8ce43..1ab1f21070 100644
--- a/src/base/socket.test.cc
+++ b/src/base/socket.test.cc
@@ -28,6 +28,10 @@
 
 #include <gtest/gtest.h>
 
+#include <cstring>
+#include <sstream>
+#include <utility>
+
 #include "base/gtest/logging.hh"
 #include "base/socket.hh"
 
@@ -41,6 +45,79 @@ using namespace gem5;
  * socket.cc have not been fully tested due to interaction with system-calls.
  */
 
+namespace {
+
+std::string
+repeat(const std::string& str, size_t n)
+{
+    std::stringstream ss;
+    for (int i = 0; i < n; ++i) {
+        ss << str;
+    }
+    return ss.str();
+}
+
+} // namespace
+
+TEST(UnixSocketAddrTest, AbstractSocket)
+{
+    UnixSocketAddr sock_addr = UnixSocketAddr::build("@abstract");
+    EXPECT_EQ(AF_UNIX, sock_addr.addr.sun_family);
+    // null byte will not show, so compare from the first byte
+    EXPECT_STREQ("abstract", sock_addr.addr.sun_path + 1);
+    EXPECT_TRUE(sock_addr.isAbstract);
+    EXPECT_STREQ("@abstract", sock_addr.formattedPath.c_str());
+}
+
+TEST(UnixSocketAddrTest, TruncatedAbstractSocket)
+{
+    // Test that address is truncated if longer than sizeof(sun_path)
+    constexpr size_t MaxSize = sizeof(std::declval<sockaddr_un>().sun_path);
+
+    // >sizeof(sun_path) bytes
+    std::string addr = "@" + repeat("123456789", 100);
+    ASSERT_GT(addr.size(), MaxSize);
+    std::string truncated_addr = addr.substr(0, MaxSize);
+
+    UnixSocketAddr sock_addr = UnixSocketAddr::build(addr);
+    EXPECT_EQ(AF_UNIX, sock_addr.addr.sun_family);
+    // Use memcmp so that we can compare null bytes as well
+    std::string null_formatted = '\0' + truncated_addr.substr(1);
+    EXPECT_EQ(0, std::memcmp(null_formatted.c_str(), sock_addr.addr.sun_path,
+                             MaxSize));
+    EXPECT_TRUE(sock_addr.isAbstract);
+    EXPECT_EQ(truncated_addr, sock_addr.formattedPath);
+}
+
+TEST(UnixSocketAddrTest, FileBasedSocket)
+{
+    std::string addr = "/home/parent/dir/x";
+    UnixSocketAddr sock_addr = UnixSocketAddr::build(addr);
+    EXPECT_EQ(AF_UNIX, sock_addr.addr.sun_family);
+    EXPECT_STREQ(addr.c_str(), sock_addr.addr.sun_path);
+    EXPECT_FALSE(sock_addr.isAbstract);
+    EXPECT_EQ(addr, sock_addr.formattedPath);
+}
+
+TEST(UnixSocketAddrTest, TruncatedFileBasedSocket)
+{
+    // sun_path should null terminate, so test that address is truncated if
+    // longer than sizeof(sun_path) - 1 bytes.
+    constexpr size_t MaxSize =
+        sizeof(std::declval<sockaddr_un>().sun_path) - 1;
+
+    // >sizeof(sun_path) - 1 bytes
+    std::string addr = "/" + repeat("123456789", 100);
+    ASSERT_GT(addr.size(), MaxSize);
+    std::string truncated_addr = addr.substr(0, MaxSize);
+
+    UnixSocketAddr sock_addr = UnixSocketAddr::build(addr);
+    EXPECT_EQ(AF_UNIX, sock_addr.addr.sun_family);
+    EXPECT_STREQ(truncated_addr.c_str(), sock_addr.addr.sun_path);
+    EXPECT_FALSE(sock_addr.isAbstract);
+    EXPECT_EQ(truncated_addr, sock_addr.formattedPath);
+}
+
 class MockListenSocket : public ListenSocket
 {
   public:
diff --git a/src/base/str.hh b/src/base/str.hh
index 00409ff3d7..855fb43b28 100644
--- a/src/base/str.hh
+++ b/src/base/str.hh
@@ -32,6 +32,7 @@
 #ifndef __BASE_STR_HH__
 #define __BASE_STR_HH__
 
+#include <algorithm>
 #include <cstring>
 #include <limits>
 #include <locale>
@@ -251,6 +252,14 @@ startswith(const std::string &s, const std::string &prefix)
     return (s.compare(0, prefix.size(), prefix) == 0);
 }
 
+inline std::string
+replace(const std::string &s, char from, char to)
+{
+    std::string replaced = s;
+    std::replace(replaced.begin(), replaced.end(), from, to);
+    return replaced;
+}
+
 } // namespace gem5
 
 #endif //__BASE_STR_HH__
diff --git a/src/mem/shared_memory_server.cc b/src/mem/shared_memory_server.cc
index bee663bd37..6344ee0388 100644
--- a/src/mem/shared_memory_server.cc
+++ b/src/mem/shared_memory_server.cc
@@ -34,7 +34,6 @@
 #include <sys/socket.h>
 #include <sys/stat.h>
 #include <sys/types.h>
-#include <sys/un.h>
 #include <unistd.h>
 
 #include <algorithm>
@@ -44,7 +43,6 @@
 #include "base/logging.hh"
 #include "base/output.hh"
 #include "base/pollevent.hh"
-#include "base/socket.hh"
 
 namespace gem5
 {
@@ -52,51 +50,49 @@ namespace memory
 {
 
 SharedMemoryServer::SharedMemoryServer(const SharedMemoryServerParams& params)
-    : SimObject(params), unixSocketPath(simout.resolve(params.server_path)),
-      system(params.system), serverFd(-1)
+    : SimObject(params),
+      sockAddr(UnixSocketAddr::build(params.server_path)),
+      system(params.system),
+      serverFd(-1)
 {
     fatal_if(system == nullptr, "Requires a system to share memory from!");
     // Create a new unix socket.
     serverFd = ListenSocket::socketCloexec(AF_UNIX, SOCK_STREAM, 0);
     panic_if(serverFd < 0, "%s: cannot create unix socket: %s", name(),
              strerror(errno));
-    // Bind to the specified path.
-    sockaddr_un serv_addr = {};
-    serv_addr.sun_family = AF_UNIX;
-    strncpy(serv_addr.sun_path, unixSocketPath.c_str(),
-            sizeof(serv_addr.sun_path) - 1);
-    // If the target path is truncated, warn the user that the actual path is
-    // different and update the target path.
-    if (strlen(serv_addr.sun_path) != unixSocketPath.size()) {
-        warn("%s: unix socket path truncated, expect '%s' but get '%s'",
-             name(), unixSocketPath, serv_addr.sun_path);
-        unixSocketPath = serv_addr.sun_path;
+
+    const auto& [serv_addr, addr_size, is_abstract, formatted_path] = sockAddr;
+
+    if (!is_abstract) {
+        // Ensure the unix socket path to use is not occupied. Also, if there's
+        // actually anything to be removed, warn the user something might be
+        // off.
+        bool old_sock_removed = unlink(serv_addr.sun_path) == 0;
+        warn_if(old_sock_removed,
+                "%s: server path %s was occupied and will be replaced. Please "
+                "make sure there is no other server using the same path.",
+                name(), serv_addr.sun_path);
     }
-    // Ensure the unix socket path to use is not occupied. Also, if there's
-    // actually anything to be removed, warn the user something might be off.
-    bool old_sock_removed = unlink(unixSocketPath.c_str()) == 0;
-    warn_if(old_sock_removed,
-            "%s: the server path %s was occupied and will be replaced. Please "
-            "make sure there is no other server using the same path.",
-            name(), unixSocketPath);
-    int bind_retv = bind(serverFd, reinterpret_cast<sockaddr*>(&serv_addr),
-                         sizeof(serv_addr));
-    fatal_if(bind_retv != 0, "%s: cannot bind unix socket: %s", name(),
-             strerror(errno));
+    int bind_retv = bind(
+        serverFd, reinterpret_cast<const sockaddr*>(&serv_addr), addr_size);
+    fatal_if(bind_retv != 0, "%s: cannot bind unix socket '%s': %s", name(),
+             formatted_path, strerror(errno));
     // Start listening.
     int listen_retv = listen(serverFd, 1);
     fatal_if(listen_retv != 0, "%s: listen failed: %s", name(),
              strerror(errno));
     listenSocketEvent.reset(new ListenSocketEvent(serverFd, this));
     pollQueue.schedule(listenSocketEvent.get());
-    inform("%s: listening at %s", name(), unixSocketPath);
+    inform("%s: listening at %s", name(), formatted_path);
 }
 
 SharedMemoryServer::~SharedMemoryServer()
 {
-    int unlink_retv = unlink(unixSocketPath.c_str());
-    warn_if(unlink_retv != 0, "%s: cannot unlink unix socket: %s", name(),
-            strerror(errno));
+    if (!sockAddr.isAbstract) {
+        int unlink_retv = unlink(sockAddr.addr.sun_path);
+        warn_if(unlink_retv != 0, "%s: cannot unlink unix socket: %s", name(),
+                strerror(errno));
+    }
     int close_retv = close(serverFd);
     warn_if(close_retv != 0, "%s: cannot close unix socket: %s", name(),
             strerror(errno));
diff --git a/src/mem/shared_memory_server.hh b/src/mem/shared_memory_server.hh
index 8f573fef3b..d9fbeb3f20 100644
--- a/src/mem/shared_memory_server.hh
+++ b/src/mem/shared_memory_server.hh
@@ -33,6 +33,7 @@
 #include <unordered_map>
 
 #include "base/pollevent.hh"
+#include "base/socket.hh"
 #include "params/SharedMemoryServer.hh"
 #include "sim/sim_object.hh"
 #include "sim/system.hh"
@@ -82,7 +83,7 @@ class SharedMemoryServer : public SimObject
         void process(int revent) override;
     };
 
-    std::string unixSocketPath;
+    UnixSocketAddr sockAddr;
     System* system;
 
     int serverFd;

From a533cb246c9e0fa373a65df3e51f9dc0f570f7ac Mon Sep 17 00:00:00 2001
From: Giacomo Travaglini <giacomo.travaglini@arm.com>
Date: Mon, 19 Dec 2022 16:08:29 +0000
Subject: [PATCH 089/492] scons: Include libraries when building gem5 as a
 shared object

While we include shared libraries in the Executable class, we
are not doing it when linking the SharedLib. This means the
resulting Shared library won't have the library as a dependency
(it won't appear in ldd) and the symbols will remain undefined.

Any executable will fail to link with the shared library as
the executable will contain undefined references.

This bug was exposed when I tried to link util/tlm sources with
libgem5.so. As I have libpng/libpng-dev installed in my machine,
the shared library included libpng headers, but didn't link
to the library as scons didn't append "-lpng" to the linking CL.
Those png functions thus remained ubdefined symbols.

Change-Id: Id9c4a65607a7177f71659f1ac400a67edf7080fd
Signed-off-by: Giacomo Travaglini <giacomo.travaglini@arm.com>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/66855
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Gabe Black <gabe.black@gmail.com>
Maintainer: Daniel Carvalho <odanrc@yahoo.com.br>
Reviewed-by: Daniel Carvalho <odanrc@yahoo.com.br>
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
---
 src/SConscript | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/src/SConscript b/src/SConscript
index 4e7139c064..51b4bd9b3b 100644
--- a/src/SConscript
+++ b/src/SConscript
@@ -376,6 +376,12 @@ class SharedLib(TopLevelBase):
     def declare(self, env):
         objs = self.srcs_to_objs(env, self.sources(env))
 
+        libs = self.libs(env)
+        # Higher priority libraries should be earlier in the list.
+        libs.sort(key=lambda l: l.priority, reverse=True)
+        if libs:
+            env.Append(LIBS=list(lib.source for lib in libs))
+
         date_obj = env.SharedObject(date_source)
         env.Depends(date_obj, objs)
 

From 7238df7859936a826159d93e36dc0bf2611ac4ef Mon Sep 17 00:00:00 2001
From: Giacomo Travaglini <giacomo.travaglini@arm.com>
Date: Tue, 20 Dec 2022 08:43:59 +0000
Subject: [PATCH 090/492] util: Update run_gem5_fs.sh script with AArch64
 platform

The example script is using VExpress_EMM, which is a deprecated platform
and it is referring to an AArch32 kernel. With this patch we
use the VExpress_GEM5_Foundation platform instead and point
to a AArch64 kernel

Change-Id: I961d5d5de71bc284c7492ee7b04088148909ca1b
Signed-off-by: Giacomo Travaglini <giacomo.travaglini@arm.com>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/66856
Maintainer: Daniel Carvalho <odanrc@yahoo.com.br>
Reviewed-by: Matthias Jung <jungma@eit.uni-kl.de>
Reviewed-by: Daniel Carvalho <odanrc@yahoo.com.br>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 util/tlm/README         | 7 +++----
 util/tlm/run_gem5_fs.sh | 6 +++---
 2 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/util/tlm/README b/util/tlm/README
index 8098afa0e5..3ae43c5909 100644
--- a/util/tlm/README
+++ b/util/tlm/README
@@ -145,10 +145,9 @@ C++-configured gem5 using normal gem5
 > ../../build/ARM/gem5.opt ../../configs/example/fs.py               \
   --tlm-memory=transactor --cpu-type=TimingSimpleCPU --num-cpu=1     \
   --mem-type=SimpleMemory --mem-size=512MB --mem-channels=1 --caches \
-  --l2cache --machine-type=VExpress_EMM                              \
-  --dtb-filename=vexpress.aarch32.ll_20131205.0-gem5.1cpu.dtb        \
-  --kernel=vmlinux.aarch32.ll_20131205.0-gem5                        \
-  --disk-image=linux-aarch32-ael.img
+  --l2cache --machine-type=VExpress_GEM5_Foundation                  \
+  --kernel=vmlinux.arm64                                             \
+  --disk-image=ubuntu-18.04-arm64-docker.img
 
 The message "fatal: Can't find port handler type 'tlm_slave'" is okay.
 The configuration will be stored in the m5out/ directory
diff --git a/util/tlm/run_gem5_fs.sh b/util/tlm/run_gem5_fs.sh
index 9065cbf9f5..d8ab847031 100755
--- a/util/tlm/run_gem5_fs.sh
+++ b/util/tlm/run_gem5_fs.sh
@@ -42,9 +42,9 @@ echo -e "\n${BGre}Create gem5 Configuration${RCol}\n"
 --mem-size=512MB                                            \
 --mem-channels=1                                            \
 --caches --l2cache                                          \
---machine-type=VExpress_EMM                                 \
---dtb-filename=vexpress.aarch32.ll_20131205.0-gem5.1cpu.dtb \
---kernel=vmlinux.aarch32.ll_20131205.0-gem5
+--machine-type=VExpress_GEM5_Foundation                     \
+--kernel=vmlinux.arm64                                      \
+--disk-image=ubuntu-18.04-arm64-docker.img
 
 echo -e "\n${BGre}Run gem5 ${RCol}\n"
 

From 160815f482735a5834be37970f9a6e26b4755238 Mon Sep 17 00:00:00 2001
From: Matthew Poremba <matthew.poremba@amd.com>
Date: Mon, 26 Dec 2022 09:18:49 -0800
Subject: [PATCH 091/492] base: Specialize bitwise atomics so FP types can be
 used

The current atomic memory operations are templated so any type can be
used. However floating point types can not perform bitwise operations.
The GPU model contains some instructions which do atomics on floating
point types, so they need to be supported. To allow this, template
specialization is added to atomic AND, OR, and XOR which does nothing
if the type is floating point and operates as normal for integral
types.

Change-Id: I60f935756355462e99c59a9da032c5bf5afa246c
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67073
Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com>
Reviewed-by: Daniel Carvalho <odanrc@yahoo.com.br>
Tested-by: kokoro <noreply+kokoro@google.com>
Maintainer: Matt Sinclair <mattdsinclair@gmail.com>
---
 src/base/amo.hh | 33 ++++++++++++++++++++++++++++++---
 1 file changed, 30 insertions(+), 3 deletions(-)

diff --git a/src/base/amo.hh b/src/base/amo.hh
index 81bf069c50..c990d158b4 100644
--- a/src/base/amo.hh
+++ b/src/base/amo.hh
@@ -129,30 +129,57 @@ class AtomicGenericPair3Op : public TypedAtomicOpFunctor<T>
 template<typename T>
 class AtomicOpAnd : public TypedAtomicOpFunctor<T>
 {
+    // Bitwise operations are only legal on integral types
+    template<typename B>
+    typename std::enable_if<std::is_integral<B>::value, void>::type
+    executeImpl(B *b) { *b &= a; }
+
+    template<typename B>
+    typename std::enable_if<!std::is_integral<B>::value, void>::type
+    executeImpl(B *b) { }
+
   public:
     T a;
     AtomicOpAnd(T _a) : a(_a) { }
-    void execute(T *b) { *b &= a; }
+    void execute(T *b) { executeImpl<T>(b); }
     AtomicOpFunctor* clone () { return new AtomicOpAnd(a); }
 };
 
 template<typename T>
 class AtomicOpOr : public TypedAtomicOpFunctor<T>
 {
+    // Bitwise operations are only legal on integral types
+    template<typename B>
+    typename std::enable_if<std::is_integral<B>::value, void>::type
+    executeImpl(B *b) { *b |= a; }
+
+    template<typename B>
+    typename std::enable_if<!std::is_integral<B>::value, void>::type
+    executeImpl(B *b) { }
+
   public:
     T a;
     AtomicOpOr(T _a) : a(_a) { }
-    void execute(T *b) { *b |= a; }
+    void execute(T *b) { executeImpl<T>(b); }
     AtomicOpFunctor* clone () { return new AtomicOpOr(a); }
 };
 
 template<typename T>
 class AtomicOpXor : public TypedAtomicOpFunctor<T>
 {
+    // Bitwise operations are only legal on integral types
+    template<typename B>
+    typename std::enable_if<std::is_integral<B>::value, void>::type
+    executeImpl(B *b) { *b ^= a; }
+
+    template<typename B>
+    typename std::enable_if<!std::is_integral<B>::value, void>::type
+    executeImpl(B *b) { }
+
   public:
     T a;
     AtomicOpXor(T _a) : a(_a) {}
-    void execute(T *b) { *b ^= a; }
+    void execute(T *b) { executeImpl<T>(b); }
     AtomicOpFunctor* clone () { return new AtomicOpXor(a); }
 };
 

From 892e3057f7b10f2c869bae40e908a018f781611c Mon Sep 17 00:00:00 2001
From: Matthew Poremba <matthew.poremba@amd.com>
Date: Mon, 26 Dec 2022 09:13:06 -0800
Subject: [PATCH 092/492] arch-vega: Implement ds_add_f32 atomic

This instruction does an atomic add of a 32-bit float with a VGPR and
value in LDS atomically without return.

Change-Id: Id4f23a1ab587a23edfd1d88ede1cbcc5bdedc0cb
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67074
Maintainer: Matt Sinclair <mattdsinclair@gmail.com>
Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/arch/amdgpu/vega/insts/instructions.cc | 49 ++++++++++++++++++++--
 src/arch/amdgpu/vega/insts/instructions.hh |  2 +
 2 files changed, 48 insertions(+), 3 deletions(-)

diff --git a/src/arch/amdgpu/vega/insts/instructions.cc b/src/arch/amdgpu/vega/insts/instructions.cc
index afdfde3855..3d9808ac7c 100644
--- a/src/arch/amdgpu/vega/insts/instructions.cc
+++ b/src/arch/amdgpu/vega/insts/instructions.cc
@@ -34755,6 +34755,10 @@ namespace VegaISA
         : Inst_DS(iFmt, "ds_add_f32")
     {
         setFlag(F32);
+        setFlag(MemoryRef);
+        setFlag(GroupSegment);
+        setFlag(AtomicAdd);
+        setFlag(AtomicNoReturn);
     } // Inst_DS__DS_ADD_F32
 
     Inst_DS__DS_ADD_F32::~Inst_DS__DS_ADD_F32()
@@ -34763,15 +34767,54 @@ namespace VegaISA
 
     // --- description from .arch file ---
     // 32b:
-    // tmp = MEM[ADDR];
     // MEM[ADDR] += DATA;
-    // RETURN_DATA = tmp.
     // Floating point add that handles NaN/INF/denormal values.
     void
     Inst_DS__DS_ADD_F32::execute(GPUDynInstPtr gpuDynInst)
     {
-        panicUnimplemented();
+        Wavefront *wf = gpuDynInst->wavefront();
+
+        if (gpuDynInst->exec_mask.none()) {
+            wf->decLGKMInstsIssued();
+            return;
+        }
+
+        gpuDynInst->execUnitId = wf->execUnitId;
+        gpuDynInst->latency.init(gpuDynInst->computeUnit());
+        gpuDynInst->latency.set(
+                gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
+        ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
+        ConstVecOperandF32 data(gpuDynInst, extData.DATA0);
+
+        addr.read();
+        data.read();
+
+        calcAddr(gpuDynInst, addr);
+
+        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
+            if (gpuDynInst->exec_mask[lane]) {
+                (reinterpret_cast<VecElemF32*>(gpuDynInst->a_data))[lane]
+                    = data[lane];
+            }
+        }
+
+        gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
     } // execute
+
+    void
+    Inst_DS__DS_ADD_F32::initiateAcc(GPUDynInstPtr gpuDynInst)
+    {
+        Addr offset0 = instData.OFFSET0;
+        Addr offset1 = instData.OFFSET1;
+        Addr offset = (offset1 << 8) | offset0;
+
+        initAtomicAccess<VecElemF32>(gpuDynInst, offset);
+    } // initiateAcc
+
+    void
+    Inst_DS__DS_ADD_F32::completeAcc(GPUDynInstPtr gpuDynInst)
+    {
+    } // completeAcc
     // --- Inst_DS__DS_WRITE_B8 class methods ---
 
     Inst_DS__DS_WRITE_B8::Inst_DS__DS_WRITE_B8(InFmt_DS *iFmt)
diff --git a/src/arch/amdgpu/vega/insts/instructions.hh b/src/arch/amdgpu/vega/insts/instructions.hh
index 33be33ef31..05a0002b25 100644
--- a/src/arch/amdgpu/vega/insts/instructions.hh
+++ b/src/arch/amdgpu/vega/insts/instructions.hh
@@ -31895,6 +31895,8 @@ namespace VegaISA
             }
         } // getOperandSize
 
+        void initiateAcc(GPUDynInstPtr gpuDynInst) override;
+        void completeAcc(GPUDynInstPtr gpuDynInst) override;
         void execute(GPUDynInstPtr) override;
     }; // Inst_DS__DS_ADD_F32
 

From b83457df0bee2cec66bf377d17b95c867d8ef025 Mon Sep 17 00:00:00 2001
From: Matthew Poremba <matthew.poremba@amd.com>
Date: Mon, 26 Dec 2022 09:54:20 -0800
Subject: [PATCH 093/492] arch-vega: Implement ds_add_u64

This instruction does an atomic add of an unsigned 64-bit data with a
VGPR and value in LDS atomically without return.

Change-Id: I6a7d6713b256607c4e69ddbdef5c83172493c077
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67075
Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com>
Maintainer: Matt Sinclair <mattdsinclair@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/arch/amdgpu/vega/insts/instructions.cc | 49 ++++++++++++++++++++--
 src/arch/amdgpu/vega/insts/instructions.hh |  2 +
 2 files changed, 48 insertions(+), 3 deletions(-)

diff --git a/src/arch/amdgpu/vega/insts/instructions.cc b/src/arch/amdgpu/vega/insts/instructions.cc
index 3d9808ac7c..a54f426837 100644
--- a/src/arch/amdgpu/vega/insts/instructions.cc
+++ b/src/arch/amdgpu/vega/insts/instructions.cc
@@ -36088,6 +36088,10 @@ namespace VegaISA
     Inst_DS__DS_ADD_U64::Inst_DS__DS_ADD_U64(InFmt_DS *iFmt)
         : Inst_DS(iFmt, "ds_add_u64")
     {
+        setFlag(MemoryRef);
+        setFlag(GroupSegment);
+        setFlag(AtomicAdd);
+        setFlag(AtomicNoReturn);
     } // Inst_DS__DS_ADD_U64
 
     Inst_DS__DS_ADD_U64::~Inst_DS__DS_ADD_U64()
@@ -36096,14 +36100,53 @@ namespace VegaISA
 
     // --- description from .arch file ---
     // 64b:
-    // tmp = MEM[ADDR];
     // MEM[ADDR] += DATA[0:1];
-    // RETURN_DATA[0:1] = tmp.
     void
     Inst_DS__DS_ADD_U64::execute(GPUDynInstPtr gpuDynInst)
     {
-        panicUnimplemented();
+        Wavefront *wf = gpuDynInst->wavefront();
+
+        if (gpuDynInst->exec_mask.none()) {
+            wf->decLGKMInstsIssued();
+            return;
+        }
+
+        gpuDynInst->execUnitId = wf->execUnitId;
+        gpuDynInst->latency.init(gpuDynInst->computeUnit());
+        gpuDynInst->latency.set(
+                gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
+        ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
+        ConstVecOperandU64 data(gpuDynInst, extData.DATA0);
+
+        addr.read();
+        data.read();
+
+        calcAddr(gpuDynInst, addr);
+
+        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
+            if (gpuDynInst->exec_mask[lane]) {
+                (reinterpret_cast<VecElemU64*>(gpuDynInst->a_data))[lane]
+                    = data[lane];
+            }
+        }
+
+        gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
     } // execute
+
+    void
+    Inst_DS__DS_ADD_U64::initiateAcc(GPUDynInstPtr gpuDynInst)
+    {
+        Addr offset0 = instData.OFFSET0;
+        Addr offset1 = instData.OFFSET1;
+        Addr offset = (offset1 << 8) | offset0;
+
+        initAtomicAccess<VecElemU64>(gpuDynInst, offset);
+    } // initiateAcc
+
+    void
+    Inst_DS__DS_ADD_U64::completeAcc(GPUDynInstPtr gpuDynInst)
+    {
+    } // completeAcc
     // --- Inst_DS__DS_SUB_U64 class methods ---
 
     Inst_DS__DS_SUB_U64::Inst_DS__DS_SUB_U64(InFmt_DS *iFmt)
diff --git a/src/arch/amdgpu/vega/insts/instructions.hh b/src/arch/amdgpu/vega/insts/instructions.hh
index 05a0002b25..f8fc98b647 100644
--- a/src/arch/amdgpu/vega/insts/instructions.hh
+++ b/src/arch/amdgpu/vega/insts/instructions.hh
@@ -33079,6 +33079,8 @@ namespace VegaISA
             }
         } // getOperandSize
 
+        void initiateAcc(GPUDynInstPtr gpuDynInst) override;
+        void completeAcc(GPUDynInstPtr gpuDynInst) override;
         void execute(GPUDynInstPtr) override;
     }; // Inst_DS__DS_ADD_U64
 

From 3bfa220e4ecd098de36d81a171593b14d7551583 Mon Sep 17 00:00:00 2001
From: Matthew Poremba <matthew.poremba@amd.com>
Date: Mon, 26 Dec 2022 13:27:06 -0800
Subject: [PATCH 094/492] arch-vega: Implement ds_read_i8

Read one byte with sign extended from LDS.

Change-Id: I9cb9b4033c6f834241cba944bc7e6a7ebc5401be
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67076
Maintainer: Matt Sinclair <mattdsinclair@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com>
---
 src/arch/amdgpu/vega/insts/instructions.cc | 44 +++++++++++++++++++++-
 src/arch/amdgpu/vega/insts/instructions.hh |  2 +
 2 files changed, 45 insertions(+), 1 deletion(-)

diff --git a/src/arch/amdgpu/vega/insts/instructions.cc b/src/arch/amdgpu/vega/insts/instructions.cc
index a54f426837..c803656996 100644
--- a/src/arch/amdgpu/vega/insts/instructions.cc
+++ b/src/arch/amdgpu/vega/insts/instructions.cc
@@ -35636,8 +35636,50 @@ namespace VegaISA
     void
     Inst_DS__DS_READ_I8::execute(GPUDynInstPtr gpuDynInst)
     {
-        panicUnimplemented();
+        Wavefront *wf = gpuDynInst->wavefront();
+
+        if (gpuDynInst->exec_mask.none()) {
+            wf->decLGKMInstsIssued();
+            return;
+        }
+
+        gpuDynInst->execUnitId = wf->execUnitId;
+        gpuDynInst->latency.init(gpuDynInst->computeUnit());
+        gpuDynInst->latency.set(
+                gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
+        ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
+
+        addr.read();
+
+        calcAddr(gpuDynInst, addr);
+
+        gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
     } // execute
+
+    void
+    Inst_DS__DS_READ_I8::initiateAcc(GPUDynInstPtr gpuDynInst)
+    {
+        Addr offset0 = instData.OFFSET0;
+        Addr offset1 = instData.OFFSET1;
+        Addr offset = (offset1 << 8) | offset0;
+
+        initMemRead<VecElemI8>(gpuDynInst, offset);
+    } // initiateAcc
+
+    void
+    Inst_DS__DS_READ_I8::completeAcc(GPUDynInstPtr gpuDynInst)
+    {
+        VecOperandU32 vdst(gpuDynInst, extData.VDST);
+
+        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
+            if (gpuDynInst->exec_mask[lane]) {
+                vdst[lane] = (VecElemU32)sext<8>((reinterpret_cast<VecElemI8*>(
+                    gpuDynInst->d_data))[lane]);
+            }
+        }
+
+        vdst.write();
+    } // completeAcc
     // --- Inst_DS__DS_READ_U8 class methods ---
 
     Inst_DS__DS_READ_U8::Inst_DS__DS_READ_U8(InFmt_DS *iFmt)
diff --git a/src/arch/amdgpu/vega/insts/instructions.hh b/src/arch/amdgpu/vega/insts/instructions.hh
index f8fc98b647..b2cf2b9705 100644
--- a/src/arch/amdgpu/vega/insts/instructions.hh
+++ b/src/arch/amdgpu/vega/insts/instructions.hh
@@ -32848,6 +32848,8 @@ namespace VegaISA
         } // getOperandSize
 
         void execute(GPUDynInstPtr) override;
+        void initiateAcc(GPUDynInstPtr) override;
+        void completeAcc(GPUDynInstPtr) override;
     }; // Inst_DS__DS_READ_I8
 
     class Inst_DS__DS_READ_U8 : public Inst_DS

From 450bc254bd31260f24e2c5068e2c6295d603b87a Mon Sep 17 00:00:00 2001
From: Matthew Poremba <matthew.poremba@amd.com>
Date: Mon, 26 Dec 2022 15:08:23 -0800
Subject: [PATCH 095/492] arch-vega: Read one dword for SGPR base global insts

Global instructions in Vega can either use a VGPR base address plus
instruction offset or SGPR base address plus VGPR offset plus
instruction offset. Currently the VGPR address/offset is always read as
two dwords. This causes problems if the VGPR number is the last VGPR
allocated to a wavefront since the second dword would be beyond the
allocation and trip an assert.

This changeset sets the operand size of the VGPR operand to one dword
when SGPR base is used and two dwords otherwise so initDynOperandInfo
does not assert. It also moves the read of the VGPR into the calcAddr
method so that the correct ConstVecOperandU## is used to prevent another
assertion failure when reading from the register file. These two changes
are made to all flat instructions, as global instructions are a
subsegement of flat instructions.

Change-Id: I79030771aa6deec05ffa5853ca2d8b68943ee0a0
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67077
Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com>
Maintainer: Matt Sinclair <mattdsinclair@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/arch/amdgpu/vega/insts/instructions.cc | 80 +++++-----------------
 src/arch/amdgpu/vega/insts/instructions.hh | 80 +++++++++++-----------
 src/arch/amdgpu/vega/insts/op_encodings.hh | 20 ++++--
 3 files changed, 73 insertions(+), 107 deletions(-)

diff --git a/src/arch/amdgpu/vega/insts/instructions.cc b/src/arch/amdgpu/vega/insts/instructions.cc
index c803656996..4b27afa65d 100644
--- a/src/arch/amdgpu/vega/insts/instructions.cc
+++ b/src/arch/amdgpu/vega/insts/instructions.cc
@@ -43831,11 +43831,7 @@ namespace VegaISA
         gpuDynInst->latency.init(gpuDynInst->computeUnit());
         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
 
-        ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
-
-        addr.read();
-
-        calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET);
+        calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET);
 
         issueRequestHelper(gpuDynInst);
     } // execute
@@ -43919,11 +43915,7 @@ namespace VegaISA
         gpuDynInst->latency.init(gpuDynInst->computeUnit());
         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
 
-        ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
-
-        addr.read();
-
-        calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET);
+        calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET);
 
         issueRequestHelper(gpuDynInst);
     } // execute
@@ -44008,11 +44000,7 @@ namespace VegaISA
         gpuDynInst->latency.init(gpuDynInst->computeUnit());
         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
 
-        ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
-
-        addr.read();
-
-        calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET);
+        calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET);
 
         issueRequestHelper(gpuDynInst);
     } // execute
@@ -44067,11 +44055,7 @@ namespace VegaISA
         gpuDynInst->latency.init(gpuDynInst->computeUnit());
         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
 
-        ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
-
-        addr.read();
-
-        calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET);
+        calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET);
 
         issueRequestHelper(gpuDynInst);
     } // execute
@@ -44126,11 +44110,7 @@ namespace VegaISA
         gpuDynInst->latency.init(gpuDynInst->computeUnit());
         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
 
-        ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
-
-        addr.read();
-
-        calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET);
+        calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET);
 
         issueRequestHelper(gpuDynInst);
     } // execute
@@ -44194,11 +44174,7 @@ namespace VegaISA
         gpuDynInst->latency.init(gpuDynInst->computeUnit());
         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
 
-        ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
-
-        addr.read();
-
-        calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET);
+        calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET);
 
         issueRequestHelper(gpuDynInst);
     } // execute
@@ -44266,13 +44242,11 @@ namespace VegaISA
         gpuDynInst->latency.init(gpuDynInst->computeUnit());
         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
 
-        ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
         ConstVecOperandU8 data(gpuDynInst, extData.DATA);
 
-        addr.read();
         data.read();
 
-        calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET);
+        calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET);
 
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (gpuDynInst->exec_mask[lane]) {
@@ -44325,13 +44299,11 @@ namespace VegaISA
         gpuDynInst->latency.init(gpuDynInst->computeUnit());
         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
 
-        ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
         ConstVecOperandU16 data(gpuDynInst, extData.DATA);
 
-        addr.read();
         data.read();
 
-        calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET);
+        calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET);
 
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (gpuDynInst->exec_mask[lane]) {
@@ -44384,13 +44356,11 @@ namespace VegaISA
         gpuDynInst->latency.init(gpuDynInst->computeUnit());
         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
 
-        ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
         ConstVecOperandU32 data(gpuDynInst, extData.DATA);
 
-        addr.read();
         data.read();
 
-        calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET);
+        calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET);
 
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (gpuDynInst->exec_mask[lane]) {
@@ -44444,13 +44414,11 @@ namespace VegaISA
         gpuDynInst->latency.init(gpuDynInst->computeUnit());
         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
 
-        ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
         ConstVecOperandU64 data(gpuDynInst, extData.DATA);
 
-        addr.read();
         data.read();
 
-        calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET);
+        calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET);
 
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (gpuDynInst->exec_mask[lane]) {
@@ -44504,17 +44472,15 @@ namespace VegaISA
         gpuDynInst->latency.init(gpuDynInst->computeUnit());
         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
 
-        ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
         ConstVecOperandU32 data0(gpuDynInst, extData.DATA);
         ConstVecOperandU32 data1(gpuDynInst, extData.DATA + 1);
         ConstVecOperandU32 data2(gpuDynInst, extData.DATA + 2);
 
-        addr.read();
         data0.read();
         data1.read();
         data2.read();
 
-        calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET);
+        calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET);
 
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (gpuDynInst->exec_mask[lane]) {
@@ -44572,19 +44538,17 @@ namespace VegaISA
         gpuDynInst->latency.init(gpuDynInst->computeUnit());
         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
 
-        ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
         ConstVecOperandU32 data0(gpuDynInst, extData.DATA);
         ConstVecOperandU32 data1(gpuDynInst, extData.DATA + 1);
         ConstVecOperandU32 data2(gpuDynInst, extData.DATA + 2);
         ConstVecOperandU32 data3(gpuDynInst, extData.DATA + 3);
 
-        addr.read();
         data0.read();
         data1.read();
         data2.read();
         data3.read();
 
-        calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET);
+        calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET);
 
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (gpuDynInst->exec_mask[lane]) {
@@ -44650,13 +44614,11 @@ namespace VegaISA
         gpuDynInst->latency.init(gpuDynInst->computeUnit());
         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
 
-        ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
         ConstVecOperandU32 data(gpuDynInst, extData.DATA);
 
-        addr.read();
         data.read();
 
-        calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET);
+        calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET);
 
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (gpuDynInst->exec_mask[lane]) {
@@ -44732,15 +44694,13 @@ namespace VegaISA
         gpuDynInst->latency.init(gpuDynInst->computeUnit());
         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
 
-        ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
         ConstVecOperandU32 data(gpuDynInst, extData.DATA);
         ConstVecOperandU32 cmp(gpuDynInst, extData.DATA + 1);
 
-        addr.read();
         data.read();
         cmp.read();
 
-        calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET);
+        calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET);
 
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (gpuDynInst->exec_mask[lane]) {
@@ -44814,13 +44774,11 @@ namespace VegaISA
         gpuDynInst->latency.init(gpuDynInst->computeUnit());
         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
 
-        ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
         ConstVecOperandU32 data(gpuDynInst, extData.DATA);
 
-        addr.read();
         data.read();
 
-        calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET);
+        calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET);
 
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (gpuDynInst->exec_mask[lane]) {
@@ -45204,15 +45162,13 @@ namespace VegaISA
         gpuDynInst->latency.init(gpuDynInst->computeUnit());
         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
 
-        ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
         ConstVecOperandU64 data(gpuDynInst, extData.DATA);
         ConstVecOperandU64 cmp(gpuDynInst, extData.DATA + 2);
 
-        addr.read();
         data.read();
         cmp.read();
 
-        calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET);
+        calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET);
 
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (gpuDynInst->exec_mask[lane]) {
@@ -45287,13 +45243,11 @@ namespace VegaISA
         gpuDynInst->latency.init(gpuDynInst->computeUnit());
         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
 
-        ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
         ConstVecOperandU64 data(gpuDynInst, extData.DATA);
 
-        addr.read();
         data.read();
 
-        calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET);
+        calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET);
 
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (gpuDynInst->exec_mask[lane]) {
diff --git a/src/arch/amdgpu/vega/insts/instructions.hh b/src/arch/amdgpu/vega/insts/instructions.hh
index b2cf2b9705..9f017f9b90 100644
--- a/src/arch/amdgpu/vega/insts/instructions.hh
+++ b/src/arch/amdgpu/vega/insts/instructions.hh
@@ -41892,7 +41892,7 @@ namespace VegaISA
         {
             switch (opIdx) {
               case 0: //vgpr_addr
-                return 8;
+                return vgprIsOffset() ? 4 : 8;
               case 1: //vgpr_dst or saddr
                 return isFlat() ? 1 : 8;
               case 2: //vgpr_dst
@@ -41929,7 +41929,7 @@ namespace VegaISA
         {
             switch (opIdx) {
               case 0: //vgpr_addr
-                return 8;
+                return vgprIsOffset() ? 4 : 8;
               case 1: //vgpr_dst or saddr
                 return isFlat() ? 1 : 8;
               case 2: //vgpr_dst
@@ -41966,7 +41966,7 @@ namespace VegaISA
         {
             switch (opIdx) {
               case 0: //vgpr_addr
-                return 8;
+                return vgprIsOffset() ? 4 : 8;
               case 1: //vgpr_dst or saddr
                 return isFlat() ? 2 : 8;
               case 2: //vgpr_dst
@@ -42003,7 +42003,7 @@ namespace VegaISA
         {
             switch (opIdx) {
               case 0: //vgpr_addr
-                return 8;
+                return vgprIsOffset() ? 4 : 8;
               case 1: //vgpr_dst or saddr
                 return isFlat() ? 2 : 8;
               case 2: //vgpr_dst
@@ -42040,7 +42040,7 @@ namespace VegaISA
         {
             switch (opIdx) {
               case 0: //vgpr_addr
-                return 8;
+                return vgprIsOffset() ? 4 : 8;
               case 1: //vgpr_dst or saddr
                 return isFlat() ? 4 : 8;
               case 2: //vgpr_dst
@@ -42077,7 +42077,7 @@ namespace VegaISA
         {
             switch (opIdx) {
               case 0: //vgpr_addr
-                return 8;
+                return vgprIsOffset() ? 4 : 8;
               case 1: //vgpr_dst or saddr
                 return isFlat() ? 8 : 8;
               case 2: //vgpr_dst
@@ -42114,7 +42114,7 @@ namespace VegaISA
         {
             switch (opIdx) {
               case 0: //vgpr_addr
-                return 8;
+                return vgprIsOffset() ? 4 : 8;
               case 1: //vgpr_dst or saddr
                 return isFlat() ? 12 : 8;
               case 2: //vgpr_dst
@@ -42151,7 +42151,7 @@ namespace VegaISA
         {
             switch (opIdx) {
               case 0: //vgpr_addr
-                return 8;
+                return vgprIsOffset() ? 4 : 8;
               case 1: //vgpr_dst or saddr
                 return isFlat() ? 16 : 8;
               case 2: //vgpr_dst
@@ -42188,7 +42188,7 @@ namespace VegaISA
         {
             switch (opIdx) {
               case 0: //vgpr_addr
-                return 8;
+                return vgprIsOffset() ? 4 : 8;
               case 1: //vgpr_src
                 return 1;
               case 2: //saddr
@@ -42225,7 +42225,7 @@ namespace VegaISA
         {
             switch (opIdx) {
               case 0: //vgpr_addr
-                return 8;
+                return vgprIsOffset() ? 4 : 8;
               case 1: //vgpr_src
                 return 2;
               case 2: //saddr
@@ -42262,7 +42262,7 @@ namespace VegaISA
         {
             switch (opIdx) {
               case 0: //vgpr_addr
-                return 8;
+                return vgprIsOffset() ? 4 : 8;
               case 1: //vgpr_src
                 return 4;
               case 2: //saddr
@@ -42299,7 +42299,7 @@ namespace VegaISA
         {
             switch (opIdx) {
               case 0: //vgpr_addr
-                return 8;
+                return vgprIsOffset() ? 4 : 8;
               case 1: //vgpr_src
                 return 8;
               case 2: //saddr
@@ -42336,7 +42336,7 @@ namespace VegaISA
         {
             switch (opIdx) {
               case 0: //vgpr_addr
-                return 8;
+                return vgprIsOffset() ? 4 : 8;
               case 1: //vgpr_src
                 return 12;
               case 2: //saddr
@@ -42373,7 +42373,7 @@ namespace VegaISA
         {
             switch (opIdx) {
               case 0: //vgpr_addr
-                return 8;
+                return vgprIsOffset() ? 4 : 8;
               case 1: //vgpr_src
                 return 16;
               case 2: //saddr
@@ -42410,7 +42410,7 @@ namespace VegaISA
         {
             switch (opIdx) {
               case 0: //vgpr_addr
-                return 8;
+                return vgprIsOffset() ? 4 : 8;
               case 1: //vgpr_src
                 return 4;
               case 2: //vgpr_dst or saddr
@@ -42449,7 +42449,7 @@ namespace VegaISA
         {
             switch (opIdx) {
               case 0: //vgpr_addr
-                return 8;
+                return vgprIsOffset() ? 4 : 8;
               case 1: //vgpr_src
                 return 8;
               case 2: //vgpr_dst or saddr
@@ -42488,7 +42488,7 @@ namespace VegaISA
         {
             switch (opIdx) {
               case 0: //vgpr_addr
-                return 8;
+                return vgprIsOffset() ? 4 : 8;
               case 1: //vgpr_src
                 return 4;
               case 2: //vgpr_dst or saddr
@@ -42527,7 +42527,7 @@ namespace VegaISA
         {
             switch (opIdx) {
               case 0: //vgpr_addr
-                return 8;
+                return vgprIsOffset() ? 4 : 8;
               case 1: //vgpr_src
                 return 4;
               case 2: //vgpr_dst or saddr
@@ -42564,7 +42564,7 @@ namespace VegaISA
         {
             switch (opIdx) {
               case 0: //vgpr_addr
-                return 8;
+                return vgprIsOffset() ? 4 : 8;
               case 1: //vgpr_src
                 return 4;
               case 2: //vgpr_dst or saddr
@@ -42601,7 +42601,7 @@ namespace VegaISA
         {
             switch (opIdx) {
               case 0: //vgpr_addr
-                return 8;
+                return vgprIsOffset() ? 4 : 8;
               case 1: //vgpr_src
                 return 4;
               case 2: //vgpr_dst or saddr
@@ -42638,7 +42638,7 @@ namespace VegaISA
         {
             switch (opIdx) {
               case 0: //vgpr_addr
-                return 8;
+                return vgprIsOffset() ? 4 : 8;
               case 1: //vgpr_src
                 return 4;
               case 2: //vgpr_dst or saddr
@@ -42675,7 +42675,7 @@ namespace VegaISA
         {
             switch (opIdx) {
               case 0: //vgpr_addr
-                return 8;
+                return vgprIsOffset() ? 4 : 8;
               case 1: //vgpr_src
                 return 4;
               case 2: //vgpr_dst or saddr
@@ -42712,7 +42712,7 @@ namespace VegaISA
         {
             switch (opIdx) {
               case 0: //vgpr_addr
-                return 8;
+                return vgprIsOffset() ? 4 : 8;
               case 1: //vgpr_src
                 return 4;
               case 2: //vgpr_dst or saddr
@@ -42749,7 +42749,7 @@ namespace VegaISA
         {
             switch (opIdx) {
               case 0: //vgpr_addr
-                return 8;
+                return vgprIsOffset() ? 4 : 8;
               case 1: //vgpr_src
                 return 4;
               case 2: //vgpr_dst or saddr
@@ -42786,7 +42786,7 @@ namespace VegaISA
         {
             switch (opIdx) {
               case 0: //vgpr_addr
-                return 8;
+                return vgprIsOffset() ? 4 : 8;
               case 1: //vgpr_src
                 return 4;
               case 2: //vgpr_dst or saddr
@@ -42823,7 +42823,7 @@ namespace VegaISA
         {
             switch (opIdx) {
               case 0: //vgpr_addr
-                return 8;
+                return vgprIsOffset() ? 4 : 8;
               case 1: //vgpr_src
                 return 4;
               case 2: //vgpr_dst or saddr
@@ -42860,7 +42860,7 @@ namespace VegaISA
         {
             switch (opIdx) {
               case 0: //vgpr_addr
-                return 8;
+                return vgprIsOffset() ? 4 : 8;
               case 1: //vgpr_src
                 return 4;
               case 2: //vgpr_dst or saddr
@@ -42897,7 +42897,7 @@ namespace VegaISA
         {
             switch (opIdx) {
               case 0: //vgpr_addr
-                return 8;
+                return vgprIsOffset() ? 4 : 8;
               case 1: //vgpr_src
                 return 8;
               case 2: //vgpr_dst or saddr
@@ -42934,7 +42934,7 @@ namespace VegaISA
         {
             switch (opIdx) {
               case 0: //vgpr_addr
-                return 8;
+                return vgprIsOffset() ? 4 : 8;
               case 1: //vgpr_src
                 return 16;
               case 2: //vgpr_dst or saddr
@@ -42973,7 +42973,7 @@ namespace VegaISA
         {
             switch (opIdx) {
               case 0: //vgpr_addr
-                return 8;
+                return vgprIsOffset() ? 4 : 8;
               case 1: //vgpr_src
                 return 8;
               case 2: //vgpr_dst or saddr
@@ -43012,7 +43012,7 @@ namespace VegaISA
         {
             switch (opIdx) {
               case 0: //vgpr_addr
-                return 8;
+                return vgprIsOffset() ? 4 : 8;
               case 1: //vgpr_src
                 return 8;
               case 2: //vgpr_dst or saddr
@@ -43049,7 +43049,7 @@ namespace VegaISA
         {
             switch (opIdx) {
               case 0: //vgpr_addr
-                return 8;
+                return vgprIsOffset() ? 4 : 8;
               case 1: //vgpr_src
                 return 8;
               case 2: //vgpr_dst or saddr
@@ -43086,7 +43086,7 @@ namespace VegaISA
         {
             switch (opIdx) {
               case 0: //vgpr_addr
-                return 8;
+                return vgprIsOffset() ? 4 : 8;
               case 1: //vgpr_src
                 return 8;
               case 2: //vgpr_dst or saddr
@@ -43123,7 +43123,7 @@ namespace VegaISA
         {
             switch (opIdx) {
               case 0: //vgpr_addr
-                return 8;
+                return vgprIsOffset() ? 4 : 8;
               case 1: //vgpr_src
                 return 8;
               case 2: //vgpr_dst or saddr
@@ -43160,7 +43160,7 @@ namespace VegaISA
         {
             switch (opIdx) {
               case 0: //vgpr_addr
-                return 8;
+                return vgprIsOffset() ? 4 : 8;
               case 1: //vgpr_src
                 return 8;
               case 2: //vgpr_dst or saddr
@@ -43197,7 +43197,7 @@ namespace VegaISA
         {
             switch (opIdx) {
               case 0: //vgpr_addr
-                return 8;
+                return vgprIsOffset() ? 4 : 8;
               case 1: //vgpr_src
                 return 8;
               case 2: //vgpr_dst or saddr
@@ -43234,7 +43234,7 @@ namespace VegaISA
         {
             switch (opIdx) {
               case 0: //vgpr_addr
-                return 8;
+                return vgprIsOffset() ? 4 : 8;
               case 1: //vgpr_src
                 return 8;
               case 2: //vgpr_dst or saddr
@@ -43271,7 +43271,7 @@ namespace VegaISA
         {
             switch (opIdx) {
               case 0: //vgpr_addr
-                return 8;
+                return vgprIsOffset() ? 4 : 8;
               case 1: //vgpr_src
                 return 8;
               case 2: //vgpr_dst or saddr
@@ -43308,7 +43308,7 @@ namespace VegaISA
         {
             switch (opIdx) {
               case 0: //vgpr_addr
-                return 8;
+                return vgprIsOffset() ? 4 : 8;
               case 1: //vgpr_src
                 return 8;
               case 2: //vgpr_dst or saddr
@@ -43345,7 +43345,7 @@ namespace VegaISA
         {
             switch (opIdx) {
               case 0: //vgpr_addr
-                return 8;
+                return vgprIsOffset() ? 4 : 8;
               case 1: //vgpr_src
                 return 8;
               case 2: //vgpr_dst or saddr
diff --git a/src/arch/amdgpu/vega/insts/op_encodings.hh b/src/arch/amdgpu/vega/insts/op_encodings.hh
index 508d706cd3..34f6040495 100644
--- a/src/arch/amdgpu/vega/insts/op_encodings.hh
+++ b/src/arch/amdgpu/vega/insts/op_encodings.hh
@@ -925,7 +925,7 @@ namespace VegaISA
         }
 
         void
-        calcAddr(GPUDynInstPtr gpuDynInst, ConstVecOperandU64 &vaddr,
+        calcAddr(GPUDynInstPtr gpuDynInst, ScalarRegU32 vaddr,
                  ScalarRegU32 saddr, ScalarRegI32 offset)
         {
             // Offset is a 13-bit field w/the following meanings:
@@ -940,14 +940,20 @@ namespace VegaISA
             // be a 64-bit address. Otherwise, saddr is the reg index for a
             // scalar reg used as the base address for a 32-bit address.
             if ((saddr == 0x7f && isFlatGlobal()) || isFlat()) {
-                calcAddrVgpr(gpuDynInst, vaddr, offset);
+                ConstVecOperandU64 vbase(gpuDynInst, vaddr);
+                vbase.read();
+
+                calcAddrVgpr(gpuDynInst, vbase, offset);
             } else {
                 // Assume we are operating in 64-bit mode and read a pair of
                 // SGPRs for the address base.
                 ConstScalarOperandU64 sbase(gpuDynInst, saddr);
                 sbase.read();
 
-                calcAddrSgpr(gpuDynInst, vaddr, sbase, offset);
+                ConstVecOperandU32 voffset(gpuDynInst, vaddr);
+                voffset.read();
+
+                calcAddrSgpr(gpuDynInst, voffset, sbase, offset);
             }
 
             if (isFlat()) {
@@ -974,6 +980,12 @@ namespace VegaISA
             }
         }
 
+        bool
+        vgprIsOffset()
+        {
+            return (extData.SADDR != 0x7f);
+        }
+
         // first instruction DWORD
         InFmt_FLAT instData;
         // second instruction DWORD
@@ -987,7 +999,7 @@ namespace VegaISA
         void generateGlobalDisassembly();
 
         void
-        calcAddrSgpr(GPUDynInstPtr gpuDynInst, ConstVecOperandU64 &vaddr,
+        calcAddrSgpr(GPUDynInstPtr gpuDynInst, ConstVecOperandU32 &vaddr,
                      ConstScalarOperandU64 &saddr, ScalarRegI32 offset)
         {
             // Use SGPR pair as a base address and add VGPR-offset and

From 03083ba5e3bf8a9ce416003e9f8809c54599d831 Mon Sep 17 00:00:00 2001
From: Matthew Poremba <matthew.poremba@amd.com>
Date: Mon, 26 Dec 2022 16:46:40 -0800
Subject: [PATCH 096/492] arch-vega: Implement ds_write2st64_b64

Write two qwords at offsets multiplied by 8 * 64 bytes.

Change-Id: I0d0e05f3e848c2fd02d32095e32b7f023bd8803b
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67078
Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Maintainer: Matt Sinclair <mattdsinclair@gmail.com>
---
 src/arch/amdgpu/vega/insts/instructions.cc | 46 +++++++++++++++++++++-
 src/arch/amdgpu/vega/insts/instructions.hh |  2 +
 2 files changed, 47 insertions(+), 1 deletion(-)

diff --git a/src/arch/amdgpu/vega/insts/instructions.cc b/src/arch/amdgpu/vega/insts/instructions.cc
index 4b27afa65d..6cf01fb8f9 100644
--- a/src/arch/amdgpu/vega/insts/instructions.cc
+++ b/src/arch/amdgpu/vega/insts/instructions.cc
@@ -36595,8 +36595,52 @@ namespace VegaISA
     void
     Inst_DS__DS_WRITE2ST64_B64::execute(GPUDynInstPtr gpuDynInst)
     {
-        panicUnimplemented();
+        Wavefront *wf = gpuDynInst->wavefront();
+
+        if (gpuDynInst->exec_mask.none()) {
+            wf->decLGKMInstsIssued();
+            return;
+        }
+
+        gpuDynInst->execUnitId = wf->execUnitId;
+        gpuDynInst->latency.init(gpuDynInst->computeUnit());
+        gpuDynInst->latency.set(
+                gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
+        ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
+        ConstVecOperandU64 data0(gpuDynInst, extData.DATA0);
+        ConstVecOperandU64 data1(gpuDynInst, extData.DATA1);
+
+        addr.read();
+        data0.read();
+        data1.read();
+
+        calcAddr(gpuDynInst, addr);
+
+        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
+            if (gpuDynInst->exec_mask[lane]) {
+                (reinterpret_cast<VecElemU64*>(
+                    gpuDynInst->d_data))[lane * 2] = data0[lane];
+                (reinterpret_cast<VecElemU64*>(
+                    gpuDynInst->d_data))[lane * 2 + 1] = data1[lane];
+            }
+        }
+
+        gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
     } // execute
+
+    void
+    Inst_DS__DS_WRITE2ST64_B64::initiateAcc(GPUDynInstPtr gpuDynInst)
+    {
+        Addr offset0 = instData.OFFSET0 * 8 * 64;
+        Addr offset1 = instData.OFFSET1 * 8 * 64;
+
+        initDualMemWrite<VecElemU64>(gpuDynInst, offset0, offset1);
+    }
+
+    void
+    Inst_DS__DS_WRITE2ST64_B64::completeAcc(GPUDynInstPtr gpuDynInst)
+    {
+    }
     // --- Inst_DS__DS_CMPST_B64 class methods ---
 
     Inst_DS__DS_CMPST_B64::Inst_DS__DS_CMPST_B64(InFmt_DS *iFmt)
diff --git a/src/arch/amdgpu/vega/insts/instructions.hh b/src/arch/amdgpu/vega/insts/instructions.hh
index 9f017f9b90..289673232b 100644
--- a/src/arch/amdgpu/vega/insts/instructions.hh
+++ b/src/arch/amdgpu/vega/insts/instructions.hh
@@ -33572,6 +33572,8 @@ namespace VegaISA
         } // getOperandSize
 
         void execute(GPUDynInstPtr) override;
+        void initiateAcc(GPUDynInstPtr) override;
+        void completeAcc(GPUDynInstPtr) override;
     }; // Inst_DS__DS_WRITE2ST64_B64
 
     class Inst_DS__DS_CMPST_B64 : public Inst_DS

From ddf43726ef95fb9b64f89109413a62aa070fada8 Mon Sep 17 00:00:00 2001
From: Vishnu Ramadas <vramadas@wisc.edu>
Date: Wed, 4 Jan 2023 21:07:54 -0600
Subject: [PATCH 097/492] gpu-compute, mem-ruby: Update GPU cache bypassing to
 use TBE

An earlier commit added support for GLC and SLC AMDGPU instruction
modifiers. These modifiers enable cache bypassing when set. The GLC/SLC
flag information was being threaded through all the way to memory and
back so that appropriate actions could be taken upon receiving a request
and corresponding response. This commit removes the threading and adds
the bypass flag information to TBE. Requests populate this
entry and responses access it to determine the correct set of actions to
execute.

Change-Id: I20ffa6682d109270adb921de078cfd47fb4e137c
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67191
Maintainer: Matt Sinclair <mattdsinclair@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com>
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
---
 src/mem/ruby/protocol/GPU_VIPER-TCC.sm      | 59 +++++++++------------
 src/mem/ruby/protocol/MOESI_AMD_Base-dir.sm | 30 -----------
 src/mem/ruby/protocol/MOESI_AMD_Base-msg.sm |  2 -
 3 files changed, 25 insertions(+), 66 deletions(-)

diff --git a/src/mem/ruby/protocol/GPU_VIPER-TCC.sm b/src/mem/ruby/protocol/GPU_VIPER-TCC.sm
index ae142471fa..ca4c543722 100644
--- a/src/mem/ruby/protocol/GPU_VIPER-TCC.sm
+++ b/src/mem/ruby/protocol/GPU_VIPER-TCC.sm
@@ -283,7 +283,13 @@ machine(MachineType:TCC, "TCC Cache")
       peek(responseFromNB_in, ResponseMsg, block_on="addr") {
         TBE tbe := TBEs.lookup(in_msg.addr);
         Entry cache_entry := getCacheEntry(in_msg.addr);
-        if (in_msg.isSLCSet) {
+        bool is_slc_set := false;
+
+        if (!is_invalid(tbe)) {
+            is_slc_set := tbe.isSLCSet;
+        }
+
+        if (is_slc_set) {
             // If the SLC bit is set, the response needs to bypass the cache
             // and should not be allocated an entry.
             trigger(Event:Bypass, in_msg.addr, cache_entry, tbe);
@@ -343,6 +349,10 @@ machine(MachineType:TCC, "TCC Cache")
                 trigger(Event:WrVicBlk, in_msg.addr, cache_entry, tbe);
             }
         } else if (in_msg.Type == CoherenceRequestType:Atomic) {
+          // Currently the Atomic requests do not have GLC/SLC bit handing
+          // support. The assert ensures that the requests do not have
+          // these set, and therefore do not expect to bypass the cache
+          assert(!in_msg.isSLCSet);
           trigger(Event:Atomic, in_msg.addr, cache_entry, tbe);
         } else if (in_msg.Type == CoherenceRequestType:RdBlk) {
           if (in_msg.isSLCSet) {
@@ -399,8 +409,8 @@ machine(MachineType:TCC, "TCC Cache")
       out_msg.State := CoherenceState:Shared;
       DPRINTF(RubySlicc, "%s\n", out_msg);
       peek(responseFromNB_in, ResponseMsg) {
-        out_msg.isGLCSet := in_msg.isGLCSet;
-        out_msg.isSLCSet := in_msg.isSLCSet;
+        out_msg.isGLCSet := tbe.isGLCSet;
+        out_msg.isSLCSet := tbe.isSLCSet;
       }
     }
     enqueue(unblockToNB_out, UnblockMsg, 1) {
@@ -408,8 +418,8 @@ machine(MachineType:TCC, "TCC Cache")
       out_msg.Destination.add(mapAddressToMachine(address, MachineType:Directory));
       out_msg.MessageSize := MessageSizeType:Unblock_Control;
       peek(responseFromNB_in, ResponseMsg) {
-        out_msg.isGLCSet := in_msg.isGLCSet;
-        out_msg.isSLCSet := in_msg.isSLCSet;
+        out_msg.isGLCSet := tbe.isGLCSet;
+        out_msg.isSLCSet := tbe.isSLCSet;
       }
       DPRINTF(RubySlicc, "%s\n", out_msg);
     }
@@ -426,8 +436,8 @@ machine(MachineType:TCC, "TCC Cache")
           out_msg.MessageSize := MessageSizeType:Response_Data;
           out_msg.Dirty := false;
           out_msg.State := CoherenceState:Shared;
-          out_msg.isGLCSet := in_msg.isGLCSet;
-          out_msg.isSLCSet := in_msg.isSLCSet;
+          out_msg.isGLCSet := tbe.isGLCSet;
+          out_msg.isSLCSet := tbe.isSLCSet;
           DPRINTF(RubySlicc, "%s\n", out_msg);
         }
         enqueue(unblockToNB_out, UnblockMsg, 1) {
@@ -449,8 +459,8 @@ machine(MachineType:TCC, "TCC Cache")
           out_msg.Destination.add(mapAddressToMachine(address, MachineType:Directory));
           out_msg.Shared := false; // unneeded for this request
           out_msg.MessageSize := in_msg.MessageSize;
-          out_msg.isGLCSet := in_msg.isGLCSet;
-          out_msg.isSLCSet := in_msg.isSLCSet;
+          out_msg.isGLCSet := tbe.isGLCSet;
+          out_msg.isSLCSet := tbe.isSLCSet;
           DPRINTF(RubySlicc, "%s\n", out_msg);
         }
       }
@@ -467,9 +477,6 @@ machine(MachineType:TCC, "TCC Cache")
         out_msg.Sender := machineID;
         out_msg.MessageSize := MessageSizeType:Writeback_Control;
         out_msg.instSeqNum := in_msg.instSeqNum;
-        out_msg.isGLCSet := in_msg.isGLCSet;
-        out_msg.isSLCSet := in_msg.isSLCSet;
-
       }
     }
   }
@@ -484,9 +491,6 @@ machine(MachineType:TCC, "TCC Cache")
         out_msg.Sender := machineID;
         out_msg.MessageSize := MessageSizeType:Writeback_Control;
         out_msg.instSeqNum := in_msg.instSeqNum;
-        out_msg.isGLCSet := in_msg.isGLCSet;
-        out_msg.isSLCSet := in_msg.isSLCSet;
-
       }
     }
   }
@@ -500,9 +504,8 @@ machine(MachineType:TCC, "TCC Cache")
           out_msg.Sender := machineID;
           out_msg.MessageSize := in_msg.MessageSize;
           out_msg.DataBlk := in_msg.DataBlk;
-          out_msg.isGLCSet := in_msg.isGLCSet;
-          out_msg.isSLCSet := in_msg.isSLCSet;
-
+          out_msg.isGLCSet := tbe.isGLCSet;
+          out_msg.isSLCSet := tbe.isSLCSet;
         }
     }
   }
@@ -535,9 +538,9 @@ machine(MachineType:TCC, "TCC Cache")
       peek(coreRequestNetwork_in, CPURequestMsg) {
         if(in_msg.Type == CoherenceRequestType:RdBlk || in_msg.Type == CoherenceRequestType:Atomic){
           tbe.Destination.add(in_msg.Requestor);
-          tbe.isGLCSet := in_msg.isGLCSet;
-          tbe.isSLCSet := in_msg.isSLCSet;
         }
+        tbe.isGLCSet := in_msg.isGLCSet;
+        tbe.isSLCSet := in_msg.isSLCSet;
       }
     }
   }
@@ -576,8 +579,6 @@ machine(MachineType:TCC, "TCC Cache")
         out_msg.DataBlk := in_msg.DataBlk;
         out_msg.writeMask.orMask(in_msg.writeMask);
         out_msg.instSeqNum := in_msg.instSeqNum;
-        out_msg.isGLCSet := in_msg.isGLCSet;
-        out_msg.isSLCSet := in_msg.isSLCSet;
       }
     }
   }
@@ -593,10 +594,6 @@ machine(MachineType:TCC, "TCC Cache")
       out_msg.Dirty := true;
       out_msg.DataBlk := cache_entry.DataBlk;
       out_msg.writeMask.orMask(cache_entry.writeMask);
-      peek(coreRequestNetwork_in, CPURequestMsg) {
-        out_msg.isGLCSet := in_msg.isGLCSet;
-        out_msg.isSLCSet := in_msg.isSLCSet;
-      }
     }
   }
 
@@ -611,8 +608,6 @@ machine(MachineType:TCC, "TCC Cache")
         out_msg.Type := CoherenceRequestType:Atomic;
         out_msg.Dirty := true;
         out_msg.writeMask.orMask(in_msg.writeMask);
-        out_msg.isGLCSet := in_msg.isGLCSet;
-        out_msg.isSLCSet := in_msg.isSLCSet;
       }
     }
   }
@@ -628,10 +623,6 @@ machine(MachineType:TCC, "TCC Cache")
       out_msg.Ntsl := true;
       out_msg.State := CoherenceState:NA;
       out_msg.MessageSize := MessageSizeType:Response_Control;
-      peek(probeNetwork_in, NBProbeRequestMsg) {
-        out_msg.isGLCSet := in_msg.isGLCSet;
-        out_msg.isSLCSet := in_msg.isSLCSet;
-      }
     }
   }
   action(ut_updateTag, "ut", desc="update Tag (i.e. set MRU)") {
@@ -676,8 +667,8 @@ machine(MachineType:TCC, "TCC Cache")
         out_msg.addr := address;
         out_msg.Type := TriggerType:AtomicDone;
         peek(responseFromNB_in, ResponseMsg) {
-          out_msg.isGLCSet := in_msg.isGLCSet;
-          out_msg.isSLCSet := in_msg.isSLCSet;
+          out_msg.isGLCSet := tbe.isGLCSet;
+          out_msg.isSLCSet := tbe.isSLCSet;
         }
       }
     }
diff --git a/src/mem/ruby/protocol/MOESI_AMD_Base-dir.sm b/src/mem/ruby/protocol/MOESI_AMD_Base-dir.sm
index 57edef8f2b..3b38e3b1ff 100644
--- a/src/mem/ruby/protocol/MOESI_AMD_Base-dir.sm
+++ b/src/mem/ruby/protocol/MOESI_AMD_Base-dir.sm
@@ -161,8 +161,6 @@ machine(MachineType:Directory, "AMD Baseline protocol")
     uint64_t probe_id,        desc="probe id for lifetime profiling";
     WriteMask writeMask,    desc="outstanding write through mask";
     int Len,            desc="Length of memory request for DMA";
-    bool isGLCSet,      desc="Bypass L1 Cache";
-    bool isSLCSet,      desc="Bypass L1 and L2 Cache";
   }
 
   structure(TBETable, external="yes") {
@@ -485,8 +483,6 @@ machine(MachineType:Directory, "AMD Baseline protocol")
       out_msg.ProbeRequestStartTime := tbe.ProbeRequestStartTime;
       out_msg.OriginalResponder := tbe.LastSender;
       out_msg.L3Hit := tbe.L3Hit;
-      out_msg.isGLCSet := tbe.isGLCSet;
-      out_msg.isSLCSet := tbe.isSLCSet;
       DPRINTF(RubySlicc, "%s\n", out_msg);
     }
   }
@@ -516,8 +512,6 @@ machine(MachineType:Directory, "AMD Baseline protocol")
         out_msg.ForwardRequestTime := tbe.ForwardRequestTime;
         out_msg.ProbeRequestStartTime := tbe.ProbeRequestStartTime;
         out_msg.OriginalResponder := tbe.LastSender;
-        out_msg.isGLCSet := tbe.isGLCSet;
-        out_msg.isSLCSet := tbe.isSLCSet;
         if(tbe.atomicData){
           out_msg.WTRequestor := tbe.WTRequestor;
         }
@@ -546,8 +540,6 @@ machine(MachineType:Directory, "AMD Baseline protocol")
         out_msg.InitialRequestTime := tbe.InitialRequestTime;
         out_msg.ForwardRequestTime := curCycle();
         out_msg.ProbeRequestStartTime := tbe.ProbeRequestStartTime;
-        out_msg.isGLCSet := tbe.isGLCSet;
-        out_msg.isSLCSet := tbe.isSLCSet;
         DPRINTF(RubySlicc, "%s\n", out_msg);
       }
   }
@@ -565,8 +557,6 @@ machine(MachineType:Directory, "AMD Baseline protocol")
         out_msg.ForwardRequestTime := curCycle();
         out_msg.ProbeRequestStartTime := curCycle();
         out_msg.instSeqNum := in_msg.instSeqNum;
-        out_msg.isGLCSet := in_msg.isGLCSet;
-        out_msg.isSLCSet := in_msg.isSLCSet;
       }
     }
   }
@@ -579,8 +569,6 @@ machine(MachineType:Directory, "AMD Baseline protocol")
         out_msg.Sender := machineID;
         out_msg.MessageSize := MessageSizeType:Writeback_Data;
         out_msg.DataBlk := in_msg.DataBlk;
-        out_msg.isGLCSet := in_msg.isGLCSet;
-        out_msg.isSLCSet := in_msg.isSLCSet;
       }
     }
   }
@@ -636,8 +624,6 @@ machine(MachineType:Directory, "AMD Baseline protocol")
           out_msg.Type := MemoryRequestType:MEMORY_READ;
           out_msg.Sender := machineID;
           out_msg.MessageSize := MessageSizeType:Request_Control;
-          out_msg.isGLCSet := in_msg.isGLCSet;
-          out_msg.isSLCSet := in_msg.isSLCSet;
         }
       }
     }
@@ -753,8 +739,6 @@ machine(MachineType:Directory, "AMD Baseline protocol")
           out_msg.MessageSize := MessageSizeType:Control;
           out_msg.Destination := probe_dests;
           tbe.NumPendingAcks := out_msg.Destination.count();
-          out_msg.isGLCSet := in_msg.isGLCSet;
-          out_msg.isSLCSet := in_msg.isSLCSet;
           DPRINTF(RubySlicc, "%s\n", out_msg);
           APPEND_TRANSITION_COMMENT(" dc: Acks remaining: ");
           APPEND_TRANSITION_COMMENT(tbe.NumPendingAcks);
@@ -858,8 +842,6 @@ machine(MachineType:Directory, "AMD Baseline protocol")
           out_msg.ReturnData := true;
           out_msg.MessageSize := MessageSizeType:Control;
           out_msg.Destination := probe_dests;
-          out_msg.isGLCSet := in_msg.isGLCSet;
-          out_msg.isSLCSet := in_msg.isSLCSet;
           tbe.NumPendingAcks := out_msg.Destination.count();
           DPRINTF(RubySlicc, "%s\n", (out_msg));
           APPEND_TRANSITION_COMMENT(" sc: Acks remaining: ");
@@ -915,8 +897,6 @@ machine(MachineType:Directory, "AMD Baseline protocol")
           out_msg.ReturnData := false;
           out_msg.MessageSize := MessageSizeType:Control;
           out_msg.Destination := probe_dests;
-          out_msg.isGLCSet := in_msg.isGLCSet;
-          out_msg.isSLCSet := in_msg.isSLCSet;
           tbe.NumPendingAcks := out_msg.Destination.count();
           APPEND_TRANSITION_COMMENT(" ic: Acks remaining: ");
           APPEND_TRANSITION_COMMENT(tbe.NumPendingAcks);
@@ -943,8 +923,6 @@ machine(MachineType:Directory, "AMD Baseline protocol")
         out_msg.Sender := machineID;
         out_msg.MessageSize := MessageSizeType:Writeback_Data;
         out_msg.DataBlk := in_msg.DataBlk;
-        out_msg.isGLCSet := in_msg.isGLCSet;
-        out_msg.isSLCSet := in_msg.isSLCSet;
       }
       if (tbe.Dirty == false) {
           // have to update the TBE, too, because of how this
@@ -1007,8 +985,6 @@ machine(MachineType:Directory, "AMD Baseline protocol")
       tbe.NumPendingAcks := 0;
       tbe.Cached := in_msg.ForceShared;
       tbe.InitialRequestTime := in_msg.InitialRequestTime;
-      tbe.isGLCSet := in_msg.isGLCSet;
-      tbe.isSLCSet := in_msg.isSLCSet;
     }
   }
 
@@ -1028,8 +1004,6 @@ machine(MachineType:Directory, "AMD Baseline protocol")
         out_msg.Sender := machineID;
         out_msg.MessageSize := MessageSizeType:Writeback_Data;
         out_msg.DataBlk := tbe.DataBlk;
-        out_msg.isGLCSet := tbe.isGLCSet;
-        out_msg.isSLCSet := tbe.isSLCSet;
         DPRINTF(ProtocolTrace, "%s\n", out_msg);
       }
     }
@@ -1130,8 +1104,6 @@ machine(MachineType:Directory, "AMD Baseline protocol")
             out_msg.Sender := machineID;
             out_msg.MessageSize := MessageSizeType:Writeback_Data;
             out_msg.DataBlk := victim_entry.DataBlk;
-            out_msg.isGLCSet := in_msg.isGLCSet;
-            out_msg.isSLCSet := in_msg.isSLCSet;
           }
           L3CacheMemory.deallocate(victim);
         }
@@ -1164,8 +1136,6 @@ machine(MachineType:Directory, "AMD Baseline protocol")
             out_msg.Sender := machineID;
             out_msg.MessageSize := MessageSizeType:Writeback_Data;
             out_msg.DataBlk := victim_entry.DataBlk;
-            out_msg.isGLCSet := tbe.isGLCSet;
-            out_msg.isSLCSet := tbe.isSLCSet;
           }
           L3CacheMemory.deallocate(victim);
         }
diff --git a/src/mem/ruby/protocol/MOESI_AMD_Base-msg.sm b/src/mem/ruby/protocol/MOESI_AMD_Base-msg.sm
index 6ff19e953b..bb3a013325 100644
--- a/src/mem/ruby/protocol/MOESI_AMD_Base-msg.sm
+++ b/src/mem/ruby/protocol/MOESI_AMD_Base-msg.sm
@@ -168,8 +168,6 @@ structure(NBProbeRequestMsg, desc="...", interface="Message") {
   MachineID Requestor,          desc="Requestor id for 3-hop requests";
   bool NoAckNeeded, default="false", desc="For short circuting acks";
   int ProgramCounter,           desc="PC that accesses to this block";
-  bool isGLCSet,                desc="Bypass L1 Cache";
-  bool isSLCSet,                desc="Bypass L1 and L2 Caches";
 
   bool functionalRead(Packet *pkt) {
     return false;

From c23d7bb3eed3a0f7146b4b9d462ea16cc1e8a036 Mon Sep 17 00:00:00 2001
From: Vishnu Ramadas <vramadas@wisc.edu>
Date: Wed, 4 Jan 2023 21:34:17 -0600
Subject: [PATCH 098/492] gpu-compute, mem-ruby: Add p_popRequestQueue to some
 transitions

Two W->WI transitions, on events RdBlk and Atomic in the GPU L2 cache
coherence protocol do not clear  the request from the request queue upon
completing the transition. This action is not performed in the respone
path. This update adds the p_popRequestQueue action to each of these
transitions to remove the stale request from the queue.

Change-Id: Ia2679fe3dd702f4df2bc114f4607ba40c18d6ff1
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67192
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/mem/ruby/protocol/GPU_VIPER-TCC.sm | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/mem/ruby/protocol/GPU_VIPER-TCC.sm b/src/mem/ruby/protocol/GPU_VIPER-TCC.sm
index ca4c543722..0f93339827 100644
--- a/src/mem/ruby/protocol/GPU_VIPER-TCC.sm
+++ b/src/mem/ruby/protocol/GPU_VIPER-TCC.sm
@@ -721,6 +721,7 @@ machine(MachineType:TCC, "TCC Cache")
     p_profileHit;
     t_allocateTBE;
     wb_writeBack;
+    p_popRequestQueue;
   }
 
   transition(I, RdBlk, IV) {TagArrayRead} {
@@ -815,6 +816,7 @@ transition(I, Atomic, A) {TagArrayRead} {
     p_profileHit;
     t_allocateTBE;
     wb_writeBack;
+    p_popRequestQueue;
   }
 
   transition(I, WrVicBlk) {TagArrayRead} {

From ac54c7ffad6d19820b2b875b58b0e3510d60f7e7 Mon Sep 17 00:00:00 2001
From: Hanhwi Jang <jang.hanhwi@gmail.com>
Date: Thu, 5 Jan 2023 14:52:11 +0900
Subject: [PATCH 099/492] cpu-o3: Resolve the skid buffer overflow issue at
 decode stage

When decode width is larger than fetch width, the skid buffer
overflow happens at decode stage. The decode stage assumes
that fetch stage sends instructions as many as the fetch width,
but it sends them at decode width rate.

This patch makes the decode stage set its skid buffer size
according to the decode width.

Change-Id: I90ee43d16c59a4c9305c77bbfad7e4cdb2b9cffa
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67231
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Reviewed-by: Hanhwi Jang <jang.hanhwi@gmail.com>
Reviewed-by: Tom Rollet <tom.rollet@huawei.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/cpu/o3/decode.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/cpu/o3/decode.cc b/src/cpu/o3/decode.cc
index 9555e32c23..ac728a2f2c 100644
--- a/src/cpu/o3/decode.cc
+++ b/src/cpu/o3/decode.cc
@@ -77,7 +77,7 @@ Decode::Decode(CPU *_cpu, const BaseO3CPUParams &params)
              decodeWidth, static_cast<int>(MaxWidth));
 
     // @todo: Make into a parameter
-    skidBufferMax = (fetchToDecodeDelay + 1) *  params.fetchWidth;
+    skidBufferMax = (fetchToDecodeDelay + 1) *  params.decodeWidth;
     for (int tid = 0; tid < MaxThreads; tid++) {
         stalls[tid] = {false};
         decodeStatus[tid] = Idle;

From 68cf65e9b581b49edaf4744520ca1586dfd6682f Mon Sep 17 00:00:00 2001
From: Nicolas Boichat <drinkcat@google.com>
Date: Fri, 6 Jan 2023 13:53:55 +0000
Subject: [PATCH 100/492] scons: Clone env before modifying it in SharedLib

Without this, modifications to env propagate to unexpected places.
This mirrors behaviour in Executable (where the code was copied from).

Change-Id: I35bbf2f3cc2786eb50ff751c813853971ab284fe
Signed-off-by: Nicolas Boichat <drinkcat@google.com>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67233
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Reviewed-by: Gabe Black <gabe.black@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/SConscript | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/SConscript b/src/SConscript
index 51b4bd9b3b..3179849601 100644
--- a/src/SConscript
+++ b/src/SConscript
@@ -376,6 +376,8 @@ class SharedLib(TopLevelBase):
     def declare(self, env):
         objs = self.srcs_to_objs(env, self.sources(env))
 
+        env = env.Clone()
+
         libs = self.libs(env)
         # Higher priority libraries should be earlier in the list.
         libs.sort(key=lambda l: l.priority, reverse=True)

From 8aa9f52953dfe5bd6bf53e6d509d06cc343534d5 Mon Sep 17 00:00:00 2001
From: Nicolas Boichat <drinkcat@google.com>
Date: Thu, 5 Jan 2023 03:24:29 +0000
Subject: [PATCH 101/492] systemc: Add facilities to add extra SystemC message
 handlers

Some clients (e.g. fastmodel integration) would like to catch specific
warning messages from SystemC. Adding facilities to chain extra report
handler (instead of just replacing the default one), that are run
after the default/set handler.

Change-Id: I8ef140fc897ae5eee0fc78c70caf081f625efbfd
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67234
Reviewed-by: Gabe Black <gabe.black@gmail.com>
Maintainer: Gabe Black <gabe.black@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/systemc/utils/report.cc            | 25 +++++++++++++++++++++++++
 src/systemc/utils/report.hh            |  8 ++++++++
 src/systemc/utils/sc_report_handler.cc |  4 ++++
 3 files changed, 37 insertions(+)

diff --git a/src/systemc/utils/report.cc b/src/systemc/utils/report.cc
index 2b15fced8c..5f3425f398 100644
--- a/src/systemc/utils/report.cc
+++ b/src/systemc/utils/report.cc
@@ -68,6 +68,31 @@ sc_core::sc_actions reportCatchActions = sc_core::SC_DISPLAY;
 sc_core::sc_report_handler_proc reportHandlerProc =
     &sc_core::sc_report_handler::default_handler;
 
+namespace
+{
+
+std::list<sc_core::sc_report_handler_proc> extraReportHandlerProcs;
+
+} // anonymous namespace
+
+const std::list<sc_core::sc_report_handler_proc> &
+getExtraSystemCReportHandlers()
+{
+    return extraReportHandlerProcs;
+}
+
+void
+addExtraSystemCReportHandler(sc_core::sc_report_handler_proc proc)
+{
+    extraReportHandlerProcs.push_back(proc);
+}
+
+void
+removeExtraSystemCReportHandler(sc_core::sc_report_handler_proc proc)
+{
+    extraReportHandlerProcs.remove(proc);
+}
+
 std::unique_ptr<sc_core::sc_report> globalReportCache;
 
 bool reportWarningsAsErrors = false;
diff --git a/src/systemc/utils/report.hh b/src/systemc/utils/report.hh
index 1f12eef6a8..d7ea3401e3 100644
--- a/src/systemc/utils/report.hh
+++ b/src/systemc/utils/report.hh
@@ -29,6 +29,7 @@
 #define __SYSTEMC_UTILS_REPORT_HH__
 
 #include <initializer_list>
+#include <list>
 #include <map>
 #include <memory>
 #include <string>
@@ -103,6 +104,13 @@ extern sc_core::sc_actions reportCatchActions;
 
 extern sc_core::sc_report_handler_proc reportHandlerProc;
 
+// gem5-specific support for extra SystemC report handlers. Called _after_
+// the default/set handler.
+const std::list<sc_core::sc_report_handler_proc>
+    &getExtraSystemCReportHandlers();
+void addExtraSystemCReportHandler(sc_core::sc_report_handler_proc proc);
+void removeExtraSystemCReportHandler(sc_core::sc_report_handler_proc proc);
+
 extern std::unique_ptr<sc_core::sc_report> globalReportCache;
 
 extern bool reportWarningsAsErrors;
diff --git a/src/systemc/utils/sc_report_handler.cc b/src/systemc/utils/sc_report_handler.cc
index b893b1dff3..3421ab912d 100644
--- a/src/systemc/utils/sc_report_handler.cc
+++ b/src/systemc/utils/sc_report_handler.cc
@@ -103,6 +103,10 @@ sc_report_handler::report(sc_severity severity, const char *msg_type,
     }
 
     sc_gem5::reportHandlerProc(report, actions);
+
+    for (auto& handler : sc_gem5::getExtraSystemCReportHandlers()) {
+        handler(report, actions);
+    }
 }
 
 void

From f89973c9e1408bdbba23a3737c4929cb088834d1 Mon Sep 17 00:00:00 2001
From: Nicolas Boichat <drinkcat@google.com>
Date: Thu, 5 Jan 2023 03:32:24 +0000
Subject: [PATCH 102/492] fastmodel: Add handler to catch DMI warnings

Catch DMI warnings from fastmodel, and abort the simulation when
they happen (instead of slowing down simulation).

This is controlled by an exit_on_dmi_warning flag passed to
fm.setup_simulation, defaulting to false.

Change-Id: I07fbc9b2579989d40d601ff0b6af9bfe719309a1
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67235
Reviewed-by: Gabe Black <gabe.black@gmail.com>
Maintainer: Gabe Black <gabe.black@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/arch/arm/fastmodel/arm_fast_model.py |  6 +++++-
 src/arch/arm/fastmodel/fastmodel.cc      | 23 +++++++++++++++++++++++
 2 files changed, 28 insertions(+), 1 deletion(-)

diff --git a/src/arch/arm/fastmodel/arm_fast_model.py b/src/arch/arm/fastmodel/arm_fast_model.py
index 11004177c6..d2d911f5b4 100644
--- a/src/arch/arm/fastmodel/arm_fast_model.py
+++ b/src/arch/arm/fastmodel/arm_fast_model.py
@@ -141,7 +141,11 @@ def scx_get_min_sync_latency(arg=None):
 
 
 # This should be called once per simulation
-def setup_simulation(sim_name, min_sync_latency=100.0 / 100000000):
+def setup_simulation(
+    sim_name, min_sync_latency=100.0 / 100000000, exit_on_dmi_warning=False
+):
     set_armlmd_license_file()
     scx_initialize(sim_name)
     scx_set_min_sync_latency(min_sync_latency)
+    if exit_on_dmi_warning:
+        _m5.arm_fast_model.gem5.enable_exit_on_dmi_warning_handler()
diff --git a/src/arch/arm/fastmodel/fastmodel.cc b/src/arch/arm/fastmodel/fastmodel.cc
index 33a0c43f87..2edf1fa0c3 100644
--- a/src/arch/arm/fastmodel/fastmodel.cc
+++ b/src/arch/arm/fastmodel/fastmodel.cc
@@ -37,9 +37,11 @@
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
+#include "base/logging.hh"
 #include "python/pybind11/pybind.hh"
 #include "scx/scx.h"
 #include "sim/init.hh"
+#include "systemc/utils/report.hh"
 
 namespace gem5
 {
@@ -47,6 +49,21 @@ namespace gem5
 namespace
 {
 
+void
+fastmodel_sc_report_handler(
+     const sc_core::sc_report &report, const sc_core::sc_actions &actions)
+{
+    const char *msg = report.get_msg();
+    if (!msg)
+        return;
+
+    panic_if(
+        strstr(msg, "Simulation code-translation cache failed to gain DMI") ||
+            strstr(msg, "I-side given unusable DMI"),
+        "DMI warning from fastmodel, "
+        "aborting simulation instead of running slowly.");
+}
+
 void
 arm_fast_model_pybind(pybind11::module_ &m_internal)
 {
@@ -118,6 +135,12 @@ arm_fast_model_pybind(pybind11::module_ &m_internal)
              static_cast<sg::ticks_t (*)(sg::Tag<sg::ticks_t> *)>(
                  &scx::scx_get_min_sync_latency))
         ;
+
+    // submodule for gem5-specific functions
+    auto gem5 = arm_fast_model.def_submodule("gem5");
+    gem5.def("enable_exit_on_dmi_warning_handler", []() {
+            sc_gem5::addExtraSystemCReportHandler(fastmodel_sc_report_handler);
+        });
 }
 EmbeddedPyBind embed_("arm_fast_model", &arm_fast_model_pybind);
 

From 24e2ef0b7808f971f3ea651d9946770b3a2a9055 Mon Sep 17 00:00:00 2001
From: Matt Sinclair <mattdsinclair.wisc@gmail.com>
Date: Sat, 7 Jan 2023 16:01:57 -0600
Subject: [PATCH 103/492] mem-ruby, gpu-compute: fix TCP GLC cache bypassing

66d4a158 added support for AMD's GPU cache bypassing flags (GLC
for bypassing L1 caches, SLC for bypassing all caches).  However,
for applications that use the GLC flag but intermix GLC- and
non-GLC accesses to the same address, this previous commit
has a bug.  This bug manifests when the address is currently
valid in the L1 (TCP).  In this case, the previous commit chose
to evict the line before letting the bypassing access to proceed.
However, to do this the previous commit was using the inv_invDone
action as part of the process of evicting it.  This action is only
intended to be called when load acquires are being performed
(i.e., when the entire L1 cache is being flash invalidated).  Thus,
calling inv_invDone for a GLC (or SLC) bypassing request caused an
assert failure since the bypassing request was not performing a
load acquire.

This commit resolves this by changing the support in this case to
simply invalidate the entry in the cache.

Change-Id: Ibaa4976f8714ac93650020af1c0ce2b6732c95a2
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67199
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
---
 src/mem/ruby/protocol/GPU_VIPER-TCP.sm | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/mem/ruby/protocol/GPU_VIPER-TCP.sm b/src/mem/ruby/protocol/GPU_VIPER-TCP.sm
index 3be1397d49..14bdcecbc2 100644
--- a/src/mem/ruby/protocol/GPU_VIPER-TCP.sm
+++ b/src/mem/ruby/protocol/GPU_VIPER-TCP.sm
@@ -614,7 +614,6 @@ machine(MachineType:TCP, "GPU TCP (L1 Data Cache)")
 // request to L2.
   transition(V, LoadBypassEvict, I) {TagArrayRead, TagArrayWrite} {
     uu_profileDataMiss;
-    inv_invDone;
     ic_invCache;
     n_issueRdBlk;
     p_popMandatoryQueue;

From 1d467bed7f6f2b5a362fa5e2bf739ca3cc239d82 Mon Sep 17 00:00:00 2001
From: Matt Sinclair <mattdsinclair.wisc@gmail.com>
Date: Sat, 7 Jan 2023 16:43:20 -0600
Subject: [PATCH 104/492] mem-ruby: fix TCP spacing/spelling

Change-Id: I3fd9009592c8716a3da19dcdccf68f16af6522ef
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67200
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/mem/ruby/protocol/GPU_VIPER-TCP.sm | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/mem/ruby/protocol/GPU_VIPER-TCP.sm b/src/mem/ruby/protocol/GPU_VIPER-TCP.sm
index 14bdcecbc2..6a977c4e9b 100644
--- a/src/mem/ruby/protocol/GPU_VIPER-TCP.sm
+++ b/src/mem/ruby/protocol/GPU_VIPER-TCP.sm
@@ -261,7 +261,7 @@ machine(MachineType:TCP, "GPU TCP (L1 Data Cache)")
               // If L1 is disabled or requests have GLC or SLC flag set,
               // then, the requests should not cache in the L1. The response
               // from L2/global memory should bypass the cache
-	          trigger(Event:Bypass, in_msg.addr, cache_entry, tbe);
+              trigger(Event:Bypass, in_msg.addr, cache_entry, tbe);
           } else {
             if (is_valid(cache_entry) || L1cache.cacheAvail(in_msg.addr)) {
               trigger(Event:TCC_Ack, in_msg.addr, cache_entry, tbe);
@@ -288,7 +288,7 @@ machine(MachineType:TCP, "GPU TCP (L1 Data Cache)")
         DPRINTF(RubySlicc, "%s\n", in_msg);
         if (in_msg.Type == RubyRequestType:LD) {
           if ((in_msg.isGLCSet || in_msg.isSLCSet) && is_valid(cache_entry)) {
-            // Read rquests with GLC or SLC bit set should not cache in the L1.
+            // Read requests with GLC or SLC bit set should not cache in the L1.
             // They need to bypass the L1 and go to the L2. If an entry exists
             // in the L1, it needs to be evicted
             trigger(Event:LoadBypassEvict, in_msg.LineAddress, cache_entry, tbe);
@@ -609,15 +609,15 @@ machine(MachineType:TCP, "GPU TCP (L1 Data Cache)")
     p_popMandatoryQueue;
   }
 
-// Transition to be called when a load request with GLC or SLC flag set arrives
-// at L1. This transition invalidates any existing entry and forwards the
-// request to L2.
+  // Transition to be called when a load request with GLC or SLC flag set arrives
+  // at L1. This transition invalidates any existing entry and forwards the
+  // request to L2.
   transition(V, LoadBypassEvict, I) {TagArrayRead, TagArrayWrite} {
     uu_profileDataMiss;
     ic_invCache;
     n_issueRdBlk;
     p_popMandatoryQueue;
-}
+  }
 
   transition({V, I}, Atomic, A) {TagArrayRead, TagArrayWrite} {
     t_allocateTBE;

From 4e61a9833693283265b338da1696bfea93762f50 Mon Sep 17 00:00:00 2001
From: Matt Sinclair <mattdsinclair.wisc@gmail.com>
Date: Sat, 7 Jan 2023 16:44:25 -0600
Subject: [PATCH 105/492] mem-ruby: add GPU cache bypass I->I transition

66d4a158 added support for AMD's GPU cache bypassing flags (GLC
for bypassing L1 caches, SLC for bypassing all caches).  However,
it did not add a transition for the situation where the cache line
is currently I (Invalid).  This commit adds this support, which
resolves an assert failure in Pannotia workloads when this situation
arises.

Change-Id: I59a62ce70c01dd8b73aacb733fb3d1d0dab2624b
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67201
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
---
 src/mem/ruby/protocol/GPU_VIPER-TCP.sm | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/src/mem/ruby/protocol/GPU_VIPER-TCP.sm b/src/mem/ruby/protocol/GPU_VIPER-TCP.sm
index 6a977c4e9b..7e0ad4ed96 100644
--- a/src/mem/ruby/protocol/GPU_VIPER-TCP.sm
+++ b/src/mem/ruby/protocol/GPU_VIPER-TCP.sm
@@ -619,6 +619,15 @@ machine(MachineType:TCP, "GPU TCP (L1 Data Cache)")
     p_popMandatoryQueue;
   }
 
+  // Transition to be called when a load request with GLC or SLC flag set arrives
+  // at L1. Since the entry is invalid, there isn't anything to forward to L2,
+  // so just issue read.
+  transition(I, LoadBypassEvict) {TagArrayRead, TagArrayWrite} {
+    uu_profileDataMiss;
+    n_issueRdBlk;
+    p_popMandatoryQueue;
+  }
+
   transition({V, I}, Atomic, A) {TagArrayRead, TagArrayWrite} {
     t_allocateTBE;
     mru_updateMRU;

From d6bbccb60a656cc63a8ccb9800672aab411d723b Mon Sep 17 00:00:00 2001
From: Vishnu Ramadas <vramadas@wisc.edu>
Date: Mon, 9 Jan 2023 12:48:42 -0600
Subject: [PATCH 106/492] gpu-compute : Fix incorrect TLB stats when
 FunctionalTLB is used

When FunctionalTLB is used in SE mode, the stats tlbLatency and
tlbCycles report negative values. This patch fixes it by disabling the
updates that result in negative values when FunctionalTLB is set to true

Change-Id: I6962785fc1730b166b6d5b879e9c7618a8d6d4b3
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67202
Reviewed-by: Matt Sinclair <mattdsinclair.wisc@gmail.com>
Maintainer: Matt Sinclair <mattdsinclair@gmail.com>
Maintainer: Matthew Poremba <matthew.poremba@amd.com>
Reviewed-by: Matthew Poremba <matthew.poremba@amd.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/gpu-compute/compute_unit.cc | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/gpu-compute/compute_unit.cc b/src/gpu-compute/compute_unit.cc
index 62cfbf94cf..06fe28f5b8 100644
--- a/src/gpu-compute/compute_unit.cc
+++ b/src/gpu-compute/compute_unit.cc
@@ -1078,7 +1078,9 @@ ComputeUnit::sendRequest(GPUDynInstPtr gpuDynInst, PortID index, PacketPtr pkt)
         fatal("pkt is not a read nor a write\n");
     }
 
-    stats.tlbCycles -= curTick();
+    if (!functionalTLB) {
+        stats.tlbCycles -= curTick();
+    }
     ++stats.tlbRequests;
 
     PortID tlbPort_index = perLaneTLB ? index : 0;

From 3f2c55cb63adfe702c8f6b30f879ae3c926d0a9a Mon Sep 17 00:00:00 2001
From: Roger Chang <rogerycchang@google.com>
Date: Mon, 9 Jan 2023 14:04:06 +0800
Subject: [PATCH 107/492] arch-riscv: Check RISCV process run in matched CPU

1. Remove set RV32 flag in RiscvProcess32
2. Check if binary run appropriate CPU

Change-Id: I00b0725f3eb4f29e45b8ec719317af79355dc728
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67251
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/arch/riscv/process.cc | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/src/arch/riscv/process.cc b/src/arch/riscv/process.cc
index dc7abae790..cd00f5d63a 100644
--- a/src/arch/riscv/process.cc
+++ b/src/arch/riscv/process.cc
@@ -101,8 +101,12 @@ RiscvProcess64::initState()
     Process::initState();
 
     argsInit<uint64_t>(PageBytes);
-    for (ContextID ctx: contextIds)
-        system->threads[ctx]->setMiscRegNoEffect(MISCREG_PRV, PRV_U);
+    for (ContextID ctx: contextIds) {
+        auto *tc = system->threads[ctx];
+        tc->setMiscRegNoEffect(MISCREG_PRV, PRV_U);
+        auto *isa = dynamic_cast<ISA*>(tc->getIsaPtr());
+        fatal_if(isa->rvType() != RV64, "RISC V CPU should run in 64 bits mode");
+    }
 }
 
 void
@@ -114,9 +118,8 @@ RiscvProcess32::initState()
     for (ContextID ctx: contextIds) {
         auto *tc = system->threads[ctx];
         tc->setMiscRegNoEffect(MISCREG_PRV, PRV_U);
-        PCState pc = tc->pcState().as<PCState>();
-        pc.rvType(RV32);
-        tc->pcState(pc);
+        auto *isa = dynamic_cast<ISA*>(tc->getIsaPtr());
+        fatal_if(isa->rvType() != RV32, "RISC V CPU should run in 32 bits mode");
     }
 }
 

From 7c670c16675cd0fa155d04c8966b9b02ca53593d Mon Sep 17 00:00:00 2001
From: Roger Chang <rogerycchang@google.com>
Date: Tue, 3 Jan 2023 14:00:32 +0800
Subject: [PATCH 108/492] arch-riscv: Correct interrupt order

In Section 3.1.14 of Volume II Riscv Spec., the interrupt order
should be MEI, MSI, MTI, SEI, SSI, STI and so on.

issues:
https://gem5.atlassian.net/browse/GEM5-889

Change-Id: I357c86eecd74e9e65bbfd3d4d31e68bc276f8760
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67211
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Reviewed-by: Yu-hsin Wang <yuhsingw@google.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Jui-min Lee <fcrh@google.com>
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
---
 src/arch/riscv/interrupts.hh | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/arch/riscv/interrupts.hh b/src/arch/riscv/interrupts.hh
index f10c5f386a..a1ee396cd4 100644
--- a/src/arch/riscv/interrupts.hh
+++ b/src/arch/riscv/interrupts.hh
@@ -125,9 +125,9 @@ class Interrupts : public BaseInterrupts
             return std::make_shared<NonMaskableInterruptFault>();
         std::bitset<NumInterruptTypes> mask = globalMask();
         const std::vector<int> interrupt_order {
-            INT_EXT_MACHINE, INT_TIMER_MACHINE, INT_SOFTWARE_MACHINE,
-            INT_EXT_SUPER, INT_TIMER_SUPER, INT_SOFTWARE_SUPER,
-            INT_EXT_USER, INT_TIMER_USER, INT_SOFTWARE_USER
+            INT_EXT_MACHINE, INT_SOFTWARE_MACHINE, INT_TIMER_MACHINE,
+            INT_EXT_SUPER, INT_SOFTWARE_SUPER, INT_TIMER_SUPER,
+            INT_EXT_USER, INT_SOFTWARE_USER, INT_TIMER_USER
         };
         for (const int &id : interrupt_order)
             if (checkInterrupt(id) && mask[id])

From 626e445563a80b67150a5e0bc5d55f1b393762e7 Mon Sep 17 00:00:00 2001
From: Gabe Black <gabeblack@google.com>
Date: Tue, 10 Jan 2023 05:02:05 -0800
Subject: [PATCH 109/492] dev: Add a "resetter" callback to the typed register
 class.

When using the typed register template, most functionality of the class
can be controlled using callbacks. For instance, callbacks can be
installed to handle reads or writes to a register without having to
subclass the template and override those methods using inheritance.

The recently added reset() method did not follow this pattern though,
which has two problems. First, it's inconsistent with how the class is
normally used. Second, once you've defined a subclass, the reader,
writer, etc, callbacks still expect the type of the original class.
That means these have to either awkwardly use a type different from the
actual real type of the register, or use awkward, inefficient, and/or
dangerous casting to get back to the true type.

To address these problems, this change adds a resetter(...) method
which works like the reader(...) or writer(...) methods to optionally
install a callback to implement any special reset behavior.

Change-Id: Ia74b36616fd459c1dbed9304568903a76a4b55de
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67203
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Yu-hsin Wang <yuhsingw@google.com>
Maintainer: Gabe Black <gabeblack@google.com>
---
 src/dev/reg_bank.hh      | 40 +++++++++++++++++++++++++++++++-
 src/dev/reg_bank.test.cc | 50 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 89 insertions(+), 1 deletion(-)

diff --git a/src/dev/reg_bank.hh b/src/dev/reg_bank.hh
index 32d9058a15..3d8dc576cb 100644
--- a/src/dev/reg_bank.hh
+++ b/src/dev/reg_bank.hh
@@ -270,6 +270,12 @@
  * is an alternative form of update which also takes a custom bitmask, if you
  * need to update bits other than the normally writeable ones.
  *
+ * Similarly, you can set a "resetter" handler which is responsible for
+ * resetting the register. It takes a reference to the current Register, and
+ * no other parameters. The "initialValue" accessor can retrieve the value the
+ * register was constructed with. The register is simply set to this value
+ * in the default resetter implementation.
+ *
  * = Read only bits =
  *
  * Often registers have bits which are fixed and not affected by writes. To
@@ -554,6 +560,7 @@ class RegisterBank : public RegisterBankBase
         using WriteFunc = std::function<void (This &reg, const Data &value)>;
         using PartialWriteFunc = std::function<
             void (This &reg, const Data &value, int first, int last)>;
+        using ResetFunc = std::function<void (This &reg)>;
 
       private:
         Data _data = {};
@@ -564,6 +571,7 @@ class RegisterBank : public RegisterBankBase
         WriteFunc _writer = defaultWriter;
         PartialWriteFunc _partialWriter = defaultPartialWriter;
         PartialReadFunc _partialReader = defaultPartialReader;
+        ResetFunc _resetter = defaultResetter;
 
       protected:
         static Data defaultReader(This &reg) { return reg.get(); }
@@ -587,6 +595,12 @@ class RegisterBank : public RegisterBankBase
                                                  mask(first, last)));
         }
 
+        static void
+        defaultResetter(This &reg)
+        {
+            reg.get() = reg.initialValue();
+        }
+
         constexpr Data
         htoreg(Data data)
         {
@@ -721,6 +735,30 @@ class RegisterBank : public RegisterBankBase
             return partialWriter(wrapper);
         }
 
+        // Set the callables which handle resetting.
+        //
+        // The default resetter restores the initial value used in the
+        // constructor.
+        constexpr This &
+        resetter(const ResetFunc &new_resetter)
+        {
+            _resetter = new_resetter;
+            return *this;
+        }
+        template <class Parent, class... Args>
+        constexpr This &
+        resetter(Parent *parent, void (Parent::*nr)(Args... args))
+        {
+            auto wrapper = [parent, nr](Args&&... args) {
+                return (parent->*nr)(std::forward<Args>(args)...);
+            };
+            return resetter(wrapper);
+        }
+
+        // An accessor which returns the initial value as set in the
+        // constructor. This is intended to be used in a resetter function.
+        const Data &initialValue() const { return _resetData; }
+
 
         /*
          * Interface for accessing the register's state, for use by the
@@ -817,7 +855,7 @@ class RegisterBank : public RegisterBankBase
         }
 
         // Reset our data to its initial value.
-        void reset() override { get() = _resetData; }
+        void reset() override { _resetter(*this); }
     };
 
   private:
diff --git a/src/dev/reg_bank.test.cc b/src/dev/reg_bank.test.cc
index b4bc969724..4439526e35 100644
--- a/src/dev/reg_bank.test.cc
+++ b/src/dev/reg_bank.test.cc
@@ -868,6 +868,56 @@ TEST_F(TypedRegisterTest, PartialWriterReaderWriter)
     EXPECT_EQ(write_value, 0x0344);
 }
 
+// Use the default resetter for a register.
+TEST_F(TypedRegisterTest, DefaultResetter)
+{
+    BackingType initial_value = reg.get();
+
+    reg.get() = initial_value + 1;
+    EXPECT_EQ(reg.get(), initial_value + 1);
+
+    reg.reset();
+
+    EXPECT_EQ(reg.get(), initial_value);
+}
+
+// Set a custom resetter for a register.
+TEST_F(TypedRegisterTest, Resetter)
+{
+    RegisterBankLE::Register<BackingType> *reg_ptr = nullptr;
+
+    reg.resetter([&reg_ptr](auto &r) {
+        reg_ptr = &r;
+    });
+
+    reg.reset();
+
+    EXPECT_EQ(reg_ptr, &reg);
+}
+
+// Set a custom resetter for a register which is a class method.
+TEST_F(TypedRegisterTest, ResetterMF)
+{
+    using Reg = RegisterBankLE::Register<BackingType>;
+
+    struct ResetStruct
+    {
+        Reg *reg_ptr = nullptr;
+
+        void
+        resetter(Reg &r)
+        {
+            reg_ptr = &r;
+        }
+    } reset_struct;
+
+    reg.resetter(&reset_struct, &ResetStruct::resetter);
+
+    reg.reset();
+
+    EXPECT_EQ(reset_struct.reg_ptr, &reg);
+}
+
 TEST_F(TypedRegisterTest, Serialize)
 {
     std::ostringstream os;

From a7ef5b77d6b8399437865261f862c286f83b0a85 Mon Sep 17 00:00:00 2001
From: Yu-hsin Wang <yuhsingw@google.com>
Date: Wed, 11 Jan 2023 13:19:10 +0800
Subject: [PATCH 110/492] mem: Implemement backdoor interface for Bridge

Change-Id: I5ff62b03c34e41395a957a0799925ddd9c275458
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67291
Reviewed-by: Nicolas Boichat <drinkcat@google.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Maintainer: Gabe Black <gabeblack@google.com>
Reviewed-by: Gabe Black <gabeblack@google.com>
---
 src/mem/bridge.cc | 15 +++++++++++++++
 src/mem/bridge.hh | 28 ++++++++++++++++++++--------
 2 files changed, 35 insertions(+), 8 deletions(-)

diff --git a/src/mem/bridge.cc b/src/mem/bridge.cc
index 0f744f7336..36832ebfc4 100644
--- a/src/mem/bridge.cc
+++ b/src/mem/bridge.cc
@@ -347,6 +347,14 @@ Bridge::BridgeResponsePort::recvAtomic(PacketPtr pkt)
     return delay * bridge.clockPeriod() + memSidePort.sendAtomic(pkt);
 }
 
+Tick
+Bridge::BridgeResponsePort::recvAtomicBackdoor(
+    PacketPtr pkt, MemBackdoorPtr &backdoor)
+{
+    return delay * bridge.clockPeriod() + memSidePort.sendAtomicBackdoor(
+        pkt, backdoor);
+}
+
 void
 Bridge::BridgeResponsePort::recvFunctional(PacketPtr pkt)
 {
@@ -371,6 +379,13 @@ Bridge::BridgeResponsePort::recvFunctional(PacketPtr pkt)
     memSidePort.sendFunctional(pkt);
 }
 
+void
+Bridge::BridgeResponsePort::recvMemBackdoorReq(
+    const MemBackdoorReq &req, MemBackdoorPtr &backdoor)
+{
+    memSidePort.sendMemBackdoorReq(req, backdoor);
+}
+
 bool
 Bridge::BridgeRequestPort::trySatisfyFunctional(PacketPtr pkt)
 {
diff --git a/src/mem/bridge.hh b/src/mem/bridge.hh
index f56cef115f..e4a6837861 100644
--- a/src/mem/bridge.hh
+++ b/src/mem/bridge.hh
@@ -195,23 +195,35 @@ class Bridge : public ClockedObject
 
         /** When receiving a timing request from the peer port,
             pass it to the bridge. */
-        bool recvTimingReq(PacketPtr pkt);
+        bool recvTimingReq(PacketPtr pkt) override;
 
         /** When receiving a retry request from the peer port,
             pass it to the bridge. */
-        void recvRespRetry();
+        void recvRespRetry() override;
 
-        /** When receiving a Atomic requestfrom the peer port,
+        /** When receiving an Atomic request from the peer port,
             pass it to the bridge. */
-        Tick recvAtomic(PacketPtr pkt);
+        Tick recvAtomic(PacketPtr pkt) override;
+
+        /** When receiving an Atomic backdoor request from the peer port,
+            pass it to the bridge. */
+        Tick recvAtomicBackdoor(
+            PacketPtr pkt, MemBackdoorPtr &backdoor) override;
+
 
         /** When receiving a Functional request from the peer port,
             pass it to the bridge. */
-        void recvFunctional(PacketPtr pkt);
+        void recvFunctional(PacketPtr pkt) override;
+
+        /** When receiving a Functional backdoor request from the peer port,
+            pass it to the bridge. */
+        void recvMemBackdoorReq(
+            const MemBackdoorReq &req, MemBackdoorPtr &backdoor) override;
+
 
         /** When receiving a address range request the peer port,
             pass it to the bridge. */
-        AddrRangeList getAddrRanges() const;
+        AddrRangeList getAddrRanges() const override;
     };
 
 
@@ -303,11 +315,11 @@ class Bridge : public ClockedObject
 
         /** When receiving a timing request from the peer port,
             pass it to the bridge. */
-        bool recvTimingResp(PacketPtr pkt);
+        bool recvTimingResp(PacketPtr pkt) override;
 
         /** When receiving a retry request from the peer port,
             pass it to the bridge. */
-        void recvReqRetry();
+        void recvReqRetry() override;
     };
 
     /** Response port of the bridge. */

From a2658f08e5163de3b4c182633e9c730176fac4e5 Mon Sep 17 00:00:00 2001
From: Earl Ou <shunhsingou@google.com>
Date: Tue, 10 Jan 2023 00:27:53 -0800
Subject: [PATCH 111/492] systemc: fix -Wno-free-nonheap-object for building
 scheduler.cc

-Wno-free-nonheap-object can happen at compile or link time depending on
the versions. To better disable this false alarm, we move the memory
management part into .cc file, so the check is always done at link time.

This change also removes the global flags so other code is still checked
with the flags.

Change-Id: I8f1e20197b25c90b5f439e2ecc474bd99e4f82ed
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67237
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Yu-hsin Wang <yuhsingw@google.com>
Maintainer: Gabe Black <gabe.black@gmail.com>
---
 SConstruct                  |  4 ----
 src/sim/eventq.cc           | 26 ++++++++++++++++++++++++++
 src/sim/eventq.hh           | 18 ++++--------------
 src/systemc/core/SConscript |  6 ++----
 4 files changed, 32 insertions(+), 22 deletions(-)

diff --git a/SConstruct b/SConstruct
index bd26e4552e..e08c2984e5 100755
--- a/SConstruct
+++ b/SConstruct
@@ -447,10 +447,6 @@ for variant_path in variant_paths:
             error('gcc version 7 or newer required.\n'
                   'Installed version:', env['CXXVERSION'])
 
-        with gem5_scons.Configure(env) as conf:
-            # This warning has a false positive in the systemc in g++ 11.1.
-            conf.CheckCxxFlag('-Wno-free-nonheap-object')
-
         # Add the appropriate Link-Time Optimization (LTO) flags if
         # `--with-lto` is set.
         if GetOption('with_lto'):
diff --git a/src/sim/eventq.cc b/src/sim/eventq.cc
index 66d03854ac..23ca2f6f4e 100644
--- a/src/sim/eventq.cc
+++ b/src/sim/eventq.cc
@@ -108,6 +108,32 @@ Event::insertBefore(Event *event, Event *curr)
     return event;
 }
 
+void
+Event::acquire()
+{
+    if (flags.isSet(Event::Managed))
+        acquireImpl();
+}
+
+void
+Event::release()
+{
+    if (flags.isSet(Event::Managed))
+        releaseImpl();
+}
+
+void
+Event::acquireImpl()
+{
+}
+
+void
+Event::releaseImpl()
+{
+    if (!scheduled())
+        delete this;
+}
+
 void
 EventQueue::insert(Event *event)
 {
diff --git a/src/sim/eventq.hh b/src/sim/eventq.hh
index cd5d285f93..62495bf86d 100644
--- a/src/sim/eventq.hh
+++ b/src/sim/eventq.hh
@@ -381,26 +381,16 @@ class Event : public EventBase, public Serializable
     /**
      * Managed event scheduled and being held in the event queue.
      */
-    void acquire()
-    {
-        if (flags.isSet(Event::Managed))
-            acquireImpl();
-    }
+    void acquire();
 
     /**
      * Managed event removed from the event queue.
      */
-    void release() {
-        if (flags.isSet(Event::Managed))
-            releaseImpl();
-    }
+    void release();
 
-    virtual void acquireImpl() {}
+    virtual void acquireImpl();
 
-    virtual void releaseImpl() {
-        if (!scheduled())
-            delete this;
-    }
+    virtual void releaseImpl();
 
     /** @} */
 
diff --git a/src/systemc/core/SConscript b/src/systemc/core/SConscript
index 2b8811187c..c7c9dbb40b 100644
--- a/src/systemc/core/SConscript
+++ b/src/systemc/core/SConscript
@@ -40,6 +40,7 @@ if env['CONF']['USE_SYSTEMC']:
     Source('port.cc')
     Source('process.cc')
     Source('sched_event.cc')
+    Source('scheduler.cc')
     Source('sensitivity.cc')
     Source('time.cc')
 
@@ -75,7 +76,4 @@ if env['CONF']['USE_SYSTEMC']:
     # Disable the false positive warning for the event members of the scheduler.
     with gem5_scons.Configure(env) as conf:
         flag = '-Wno-free-nonheap-object'
-        append = {}
-        if conf.CheckCxxFlag(flag, autoadd=False):
-            append['CCFLAGS'] = [flag]
-        Source('scheduler.cc', append=append)
+        conf.CheckLinkFlag(flag)

From 4954167fe51c3072229185356ffb1e55d5eb9f41 Mon Sep 17 00:00:00 2001
From: Earl Ou <shunhsingou@google.com>
Date: Wed, 4 Jan 2023 19:48:18 -0800
Subject: [PATCH 112/492] mem: create port_wrapper classes

The port_wrapper classes convert the Request/ResponsePort from
inherit-base to callback registrations. This help 'composition over
inheritance' that most design pattern follows, which help reducing
code length and increase reusability.

Change-Id: Ia13cc62507ac8425bd7cf143a2e080d041c173f9
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67232
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/mem/SConscript      |   1 +
 src/mem/port_wrapper.cc | 169 ++++++++++++++++++++++++++++++++++++++++
 src/mem/port_wrapper.hh | 159 +++++++++++++++++++++++++++++++++++++
 3 files changed, 329 insertions(+)
 create mode 100644 src/mem/port_wrapper.cc
 create mode 100644 src/mem/port_wrapper.hh

diff --git a/src/mem/SConscript b/src/mem/SConscript
index 3bcfc0d9c5..ca164c1e27 100644
--- a/src/mem/SConscript
+++ b/src/mem/SConscript
@@ -88,6 +88,7 @@ Source('packet.cc')
 Source('port.cc')
 Source('packet_queue.cc')
 Source('port_proxy.cc')
+Source('port_wrapper.cc')
 Source('physical.cc')
 Source('shared_memory_server.cc')
 Source('simple_mem.cc')
diff --git a/src/mem/port_wrapper.cc b/src/mem/port_wrapper.cc
new file mode 100644
index 0000000000..fd5ebbd614
--- /dev/null
+++ b/src/mem/port_wrapper.cc
@@ -0,0 +1,169 @@
+/*
+ * Copyright 2023 Google, LLC.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "mem/port_wrapper.hh"
+
+namespace gem5
+{
+
+RequestPortWrapper::RequestPortWrapper(const std::string& name,
+                                       SimObject* _owner, PortID id)
+    : RequestPort(name, _owner, id)
+{
+}
+
+void
+RequestPortWrapper::recvRangeChange()
+{
+    if (!recvRangeChangeCb) {
+        RequestPort::recvRangeChange();
+        return;
+    }
+    recvRangeChangeCb();
+}
+
+bool
+RequestPortWrapper::recvTimingResp(PacketPtr packet)
+{
+    panic_if(!recvTimingRespCb, "RecvTimingRespCallback is empty.");
+    return recvTimingRespCb(packet);
+}
+
+void
+RequestPortWrapper::recvReqRetry()
+{
+    panic_if(!recvReqRetryCb, "RecvReqRetryCallback is empty.");
+    recvReqRetryCb();
+}
+
+void
+RequestPortWrapper::setRangeChangeCallback(RecvReqRetryCallback cb)
+{
+    recvRangeChangeCb = std::move(cb);
+}
+
+void
+RequestPortWrapper::setTimingCallbacks(RecvTimingRespCallback resp_cb,
+                                       RecvReqRetryCallback retry_cb)
+{
+    recvTimingRespCb = std::move(resp_cb);
+    recvReqRetryCb = std::move(retry_cb);
+}
+
+ResponsePortWrapper::ResponsePortWrapper(const std::string& name,
+                                         SimObject* _owner, PortID id)
+    : ResponsePort(name, _owner, id)
+{
+}
+
+AddrRangeList
+ResponsePortWrapper::getAddrRanges() const
+{
+    panic_if(!getAddrRangesCb, "GetAddrRangesCallback is empty.");
+    return getAddrRangesCb();
+}
+
+bool
+ResponsePortWrapper::recvTimingReq(PacketPtr packet)
+{
+    panic_if(!recvTimingReqCb, "RecvTimingReqCallback is empty.");
+    return recvTimingReqCb(packet);
+}
+
+void
+ResponsePortWrapper::recvRespRetry()
+{
+    panic_if(!recvRespRetryCb, "RecvRespRetryCallback is empty.");
+    recvRespRetryCb();
+}
+
+Tick
+ResponsePortWrapper::recvAtomic(PacketPtr packet)
+{
+    panic_if(!recvAtomicCb, "RecvAtomicCallback is empty.");
+    return recvAtomicCb(packet);
+}
+
+Tick
+ResponsePortWrapper::recvAtomicBackdoor(PacketPtr packet,
+                                        MemBackdoorPtr& backdoor)
+{
+    if (!recvAtomicBackdoorCb) {
+        return ResponsePort::recvAtomicBackdoor(packet, backdoor);
+    }
+    return recvAtomicBackdoorCb(packet, backdoor);
+}
+
+void
+ResponsePortWrapper::recvFunctional(PacketPtr packet)
+{
+    panic_if(!recvFunctionalCb, "RecvFunctionalCallback is empty.");
+    recvTimingReqCb(packet);
+}
+
+void
+ResponsePortWrapper::recvMemBackdoorReq(const MemBackdoorReq& req,
+                                        MemBackdoorPtr& backdoor)
+{
+    if (!recvMemBackdoorReqCb) {
+        ResponsePort::recvMemBackdoorReq(req, backdoor);
+        return;
+    }
+    recvMemBackdoorReqCb(req, backdoor);
+}
+
+void
+ResponsePortWrapper::setGetAddrRangesCallback(GetAddrRangesCallback cb)
+{
+    getAddrRangesCb = std::move(cb);
+}
+
+void
+ResponsePortWrapper::setTimingCallbacks(RecvTimingReqCallback timing_cb,
+                                        RecvRespRetryCallback retry_cb)
+{
+    recvTimingReqCb = std::move(timing_cb);
+    recvRespRetryCb = std::move(retry_cb);
+}
+
+void
+ResponsePortWrapper::setAtomicCallbacks(RecvAtomicCallback atomic_cb,
+                                        RecvAtomicBackdoorCallback backdoor_cb)
+{
+    recvAtomicCb = std::move(atomic_cb);
+    recvAtomicBackdoorCb = std::move(backdoor_cb);
+}
+
+void
+ResponsePortWrapper::setFunctionalCallbacks(
+    RecvFunctionalCallback func_cb, RecvMemBackdoorReqCallback backdoor_cb)
+{
+    recvFunctionalCb = std::move(func_cb);
+    recvMemBackdoorReqCb = std::move(backdoor_cb);
+}
+
+}  // namespace gem5
diff --git a/src/mem/port_wrapper.hh b/src/mem/port_wrapper.hh
new file mode 100644
index 0000000000..5dcdd5dc9b
--- /dev/null
+++ b/src/mem/port_wrapper.hh
@@ -0,0 +1,159 @@
+/*
+ * Copyright 2023 Google, LLC.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * @file
+ * PortWrapper Object Declaration.
+ *
+ * The RequestPortWrapper and ResponsePortWrapper converts inherit-based
+ * RequestPort and ResponsePort into callback-based. This help reducing
+ * redundant code and increase code reusability in most cases, allowing
+ * composition over inheritance pattern.
+ *
+ * Example usage:
+ *
+ * class MySimObject : public SimObject
+ * {
+ *   public:
+ *       ResponsePortWrapper inPort;
+ *
+ *       MySimObject(...) : inPort("in_port", this)... {
+ *         inPort.setGetAddrRangesCallback([this]() {
+ *           return getRange();
+ *         });
+ *
+ *         inPort.setAtomicCallbacks([this](PacketPtr packet) {
+ *           // process the packet
+ *           ...
+ *           return Tick();
+ *         });
+ *       }
+ *
+ *   private:
+ *       AddrRangeList getRange() const {...}
+ * };
+ */
+
+#ifndef __MEM_PORT_WRAPPER_HH__
+#define __MEM_PORT_WRAPPER_HH__
+
+#include <functional>
+
+#include "mem/port.hh"
+
+namespace gem5
+{
+
+/**
+ * The RequestPortWrapper converts inherit-based RequestPort into
+ * callback-based.
+ */
+class RequestPortWrapper : public RequestPort
+{
+  public:
+    using RecvRangeChangeCallback = std::function<void()>;
+    // Timing Protocol
+    using RecvTimingRespCallback = std::function<bool(PacketPtr)>;
+    using RecvReqRetryCallback = std::function<void()>;
+
+    RequestPortWrapper(const std::string& name, SimObject* _owner,
+                       PortID id = InvalidPortID);
+
+    void recvRangeChange() override;
+
+    // TimingRequestProtocol
+    bool recvTimingResp(PacketPtr) override;
+    void recvReqRetry() override;
+
+    void setRangeChangeCallback(RecvReqRetryCallback);
+    void setTimingCallbacks(RecvTimingRespCallback, RecvReqRetryCallback);
+
+  private:
+    RecvRangeChangeCallback recvRangeChangeCb = nullptr;
+    RecvTimingRespCallback recvTimingRespCb = nullptr;
+    RecvReqRetryCallback recvReqRetryCb = nullptr;
+};
+
+/**
+ * The ResponsePortWrapper converts inherit-based ResponsePort into
+ * callback-based.
+ */
+class ResponsePortWrapper : public ResponsePort
+{
+  public:
+    using GetAddrRangesCallback = std::function<AddrRangeList()>;
+    // Timing Protocol
+    using RecvTimingReqCallback = std::function<bool(PacketPtr)>;
+    // Atomic Protocol
+    using RecvAtomicCallback = std::function<Tick(PacketPtr)>;
+    using RecvAtomicBackdoorCallback =
+        std::function<Tick(PacketPtr, MemBackdoorPtr&)>;
+
+    // Functional Protocol
+    using RecvFunctionalCallback = std::function<void(PacketPtr)>;
+    using RecvMemBackdoorReqCallback =
+        std::function<void(const MemBackdoorReq&, MemBackdoorPtr&)>;
+
+    using RecvRespRetryCallback = std::function<void()>;
+
+    ResponsePortWrapper(const std::string& name, SimObject* _owner,
+                        PortID id = InvalidPortID);
+
+    AddrRangeList getAddrRanges() const override;
+
+    // TimingResponseProtocol
+    bool recvTimingReq(PacketPtr) override;
+    void recvRespRetry() override;
+
+    // AtomicResponseProtocol
+    Tick recvAtomic(PacketPtr) override;
+    Tick recvAtomicBackdoor(PacketPtr, MemBackdoorPtr&) override;
+
+    // FunctionalResponseProtocol
+    void recvFunctional(PacketPtr) override;
+    void recvMemBackdoorReq(const MemBackdoorReq&, MemBackdoorPtr&) override;
+
+    void setGetAddrRangesCallback(GetAddrRangesCallback);
+    void setTimingCallbacks(RecvTimingReqCallback, RecvRespRetryCallback);
+    void setAtomicCallbacks(RecvAtomicCallback,
+                            RecvAtomicBackdoorCallback = nullptr);
+    void setFunctionalCallbacks(RecvFunctionalCallback,
+                                RecvMemBackdoorReqCallback = nullptr);
+
+  private:
+    GetAddrRangesCallback getAddrRangesCb = nullptr;
+    RecvTimingReqCallback recvTimingReqCb = nullptr;
+    RecvRespRetryCallback recvRespRetryCb = nullptr;
+    RecvAtomicCallback recvAtomicCb = nullptr;
+    RecvAtomicBackdoorCallback recvAtomicBackdoorCb = nullptr;
+    RecvFunctionalCallback recvFunctionalCb = nullptr;
+    RecvMemBackdoorReqCallback recvMemBackdoorReqCb = nullptr;
+};
+
+}  // namespace gem5
+
+#endif  //__MEM_PORT_WRAPPER_HH__

From 8149245eccff03ebb716ff8d8ae755905bf4801a Mon Sep 17 00:00:00 2001
From: Giacomo Travaglini <giacomo.travaglini@arm.com>
Date: Mon, 24 Oct 2022 17:28:06 +0100
Subject: [PATCH 113/492] cpu: Formalize a CPU cluster class in the gem5
 standard library

Currently the gem5 standard library does not define a class to represent
a cluster of CPUs.
The SubSystem class has been extended in some python modules [1] to
define clock/voltage domains shared by a group of CPUs (the cluster),
and to provide some utility functions for top level configs.

This patch is moving the aforementioned class within the gem5 standard
library, to let other ISAs and scripts make use of it.

Adding a cpu cluster class to the gem5 library will have the
benefit of standardizing the interface to cpus in the toplevel
configs

Most of the new class still resides in the python world: we want the
class to be as generic as possible and we want to make its use
optional

[1]: https://github.com/gem5/gem5/blob/v22.0.0.0/\
    configs/example/arm/devices.py#L96

Change-Id: Idb05263a244e28bffa9eac811c6deb62ebb76a74
Signed-off-by: Giacomo Travaglini <giacomo.travaglini@arm.com>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/65891
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/cpu/CpuCluster.py | 93 +++++++++++++++++++++++++++++++++++++++++++
 src/cpu/SConscript    |  1 +
 src/cpu/cluster.hh    | 58 +++++++++++++++++++++++++++
 3 files changed, 152 insertions(+)
 create mode 100644 src/cpu/CpuCluster.py
 create mode 100644 src/cpu/cluster.hh

diff --git a/src/cpu/CpuCluster.py b/src/cpu/CpuCluster.py
new file mode 100644
index 0000000000..31fdc4977d
--- /dev/null
+++ b/src/cpu/CpuCluster.py
@@ -0,0 +1,93 @@
+# Copyright (c) 2022 Arm Limited
+# All rights reserved.
+#
+# The license below extends only to copyright in the software and shall
+# not be construed as granting a license to any other intellectual
+# property including but not limited to intellectual property relating
+# to a hardware implementation of the functionality of the software
+# licensed hereunder.  You may use the software subject to the license
+# terms below provided that you ensure that this notice is replicated
+# unmodified and in its entirety in all distributions of the software,
+# modified or unmodified, in source code or in binary form.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from m5.params import *
+from m5.objects.SubSystem import SubSystem
+
+
+class CpuCluster(SubSystem):
+    type = "CpuCluster"
+    cxx_header = "cpu/cluster.hh"
+    cxx_class = "gem5::CpuCluster"
+
+    _NUM_CPUS = 0
+    _NUM_CLUSTERS = 0
+
+    voltage_domain = Param.VoltageDomain("Voltage domain")
+    clk_domain = Param.ClockDomain("Clock domain")
+
+    def __iter__(self):
+        return iter(self.cpus)
+
+    def generate_cpus(self, cpu_type: "BaseCPU", num_cpus: int):
+        """
+        Instantiates the cpus within the cluster provided
+        theit type and their number.
+
+        :param cpu_type: The cpu class
+        :param num_cpus: The number of cpus within the cluster
+        """
+        self.cpus = [
+            cpu_type(
+                cpu_id=CpuCluster._NUM_CPUS + idx, clk_domain=self.clk_domain
+            )
+            for idx in range(num_cpus)
+        ]
+
+        for cpu in self.cpus:
+            cpu.createThreads()
+            cpu.createInterruptController()
+            cpu.socket_id = CpuCluster._NUM_CLUSTERS
+
+        # "Register" the cluster/cpus by augmenting the
+        # class variables
+        CpuCluster._NUM_CPUS += num_cpus
+        CpuCluster._NUM_CLUSTERS += 1
+
+    def connect(self, membus: "SystemXBar"):
+        """
+        Connects every cpu within the cluster with the
+        provided bus
+
+        :param membus: The system crossbar
+        """
+        for cpu in self.cpus:
+            cpu.connectBus(membus)
+
+    def memory_mode(self) -> "MemoryMode":
+        return type(self.cpus[0]).memory_mode()
+
+    def require_caches(self) -> bool:
+        return type(self.cpus[0]).require_caches()
diff --git a/src/cpu/SConscript b/src/cpu/SConscript
index 0466f11433..d6dcd2f6ea 100644
--- a/src/cpu/SConscript
+++ b/src/cpu/SConscript
@@ -93,6 +93,7 @@ Source('inst_pb_trace.cc', tags='protobuf')
 SimObject('CheckerCPU.py', sim_objects=['CheckerCPU'])
 
 SimObject('BaseCPU.py', sim_objects=['BaseCPU'])
+SimObject('CpuCluster.py', sim_objects=['CpuCluster'])
 SimObject('CPUTracers.py', sim_objects=[
     'ExeTracer', 'IntelTrace', 'NativeTrace'])
 SimObject('TimingExpr.py', sim_objects=[
diff --git a/src/cpu/cluster.hh b/src/cpu/cluster.hh
new file mode 100644
index 0000000000..623378ac79
--- /dev/null
+++ b/src/cpu/cluster.hh
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2022 Arm Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __CPU_CLUSTER_HH__
+#define __CPU_CLUSTER_HH__
+
+#include "sim/sub_system.hh"
+#include "params/CpuCluster.hh"
+
+namespace gem5
+{
+
+class CpuCluster : public SubSystem
+{
+  public:
+    PARAMS(CpuCluster);
+    CpuCluster(const Params &p)
+      : SubSystem(p)
+    {}
+};
+
+} // namespace gem5
+
+#endif // __CPU_CLUSTER_HH__

From 899f702f122b80f7f0cb24a9a04015cef7daa1b5 Mon Sep 17 00:00:00 2001
From: Giacomo Travaglini <giacomo.travaglini@arm.com>
Date: Tue, 25 Oct 2022 09:31:10 +0100
Subject: [PATCH 114/492] configs: Start using the new CpuCluster class in
 example/arm

Change-Id: I061c6255449dd126cdd1a6935bea510ebe2e8e14
Signed-off-by: Giacomo Travaglini <giacomo.travaglini@arm.com>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/65892
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Reviewed-by: Richard Cooper <richard.cooper@arm.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Reviewed-by: Yu-hsin Wang <yuhsingw@google.com>
---
 configs/example/arm/baremetal.py    |  2 +-
 configs/example/arm/devices.py      | 56 ++++++++---------------------
 configs/example/arm/fs_bigLITTLE.py | 31 +++++++---------
 configs/example/arm/ruby_fs.py      |  2 +-
 configs/example/arm/starter_fs.py   |  2 +-
 configs/example/arm/starter_se.py   |  4 +--
 6 files changed, 32 insertions(+), 65 deletions(-)

diff --git a/configs/example/arm/baremetal.py b/configs/example/arm/baremetal.py
index a8db6bacd4..fc630e5299 100644
--- a/configs/example/arm/baremetal.py
+++ b/configs/example/arm/baremetal.py
@@ -122,7 +122,7 @@ def create(args):
 
     # Add CPU clusters to the system
     system.cpu_cluster = [
-        devices.CpuCluster(
+        devices.ArmCpuCluster(
             system, args.num_cores, args.cpu_freq, "1.0V", *cpu_types[args.cpu]
         )
     ]
diff --git a/configs/example/arm/devices.py b/configs/example/arm/devices.py
index c6560d74dd..3f005a49aa 100644
--- a/configs/example/arm/devices.py
+++ b/configs/example/arm/devices.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2016-2017, 2019, 2021 Arm Limited
+# Copyright (c) 2016-2017, 2019, 2021-2022 Arm Limited
 # All rights reserved.
 #
 # The license below extends only to copyright in the software and shall
@@ -95,7 +95,7 @@ class MemBus(SystemXBar):
     default = Self.badaddr_responder.pio
 
 
-class CpuCluster(SubSystem):
+class ArmCpuCluster(CpuCluster):
     def __init__(
         self,
         system,
@@ -107,7 +107,7 @@ class CpuCluster(SubSystem):
         l1d_type,
         l2_type,
     ):
-        super(CpuCluster, self).__init__()
+        super().__init__()
         self._cpu_type = cpu_type
         self._l1i_type = l1i_type
         self._l1d_type = l1d_type
@@ -120,24 +120,9 @@ class CpuCluster(SubSystem):
             clock=cpu_clock, voltage_domain=self.voltage_domain
         )
 
-        self.cpus = [
-            self._cpu_type(
-                cpu_id=system.numCpus() + idx, clk_domain=self.clk_domain
-            )
-            for idx in range(num_cpus)
-        ]
+        self.generate_cpus(cpu_type, num_cpus)
 
-        for cpu in self.cpus:
-            cpu.createThreads()
-            cpu.createInterruptController()
-            cpu.socket_id = system.numCpuClusters()
-        system.addCpuCluster(self, num_cpus)
-
-    def requireCaches(self):
-        return self._cpu_type.require_caches()
-
-    def memoryMode(self):
-        return self._cpu_type.memory_mode()
+        system.addCpuCluster(self)
 
     def addL1(self):
         for cpu in self.cpus:
@@ -191,7 +176,7 @@ class CpuCluster(SubSystem):
                 cpu.connectCachedPorts(bus.cpu_side_ports)
 
 
-class AtomicCluster(CpuCluster):
+class AtomicCluster(ArmCpuCluster):
     def __init__(self, system, num_cpus, cpu_clock, cpu_voltage="1.0V"):
         cpu_config = [
             ObjectList.cpu_list.get("AtomicSimpleCPU"),
@@ -199,28 +184,24 @@ class AtomicCluster(CpuCluster):
             None,
             None,
         ]
-        super(AtomicCluster, self).__init__(
-            system, num_cpus, cpu_clock, cpu_voltage, *cpu_config
-        )
+        super().__init__(system, num_cpus, cpu_clock, cpu_voltage, *cpu_config)
 
     def addL1(self):
         pass
 
 
-class KvmCluster(CpuCluster):
+class KvmCluster(ArmCpuCluster):
     def __init__(self, system, num_cpus, cpu_clock, cpu_voltage="1.0V"):
         cpu_config = [ObjectList.cpu_list.get("ArmV8KvmCPU"), None, None, None]
-        super(KvmCluster, self).__init__(
-            system, num_cpus, cpu_clock, cpu_voltage, *cpu_config
-        )
+        super().__init__(system, num_cpus, cpu_clock, cpu_voltage, *cpu_config)
 
     def addL1(self):
         pass
 
 
-class FastmodelCluster(SubSystem):
+class FastmodelCluster(CpuCluster):
     def __init__(self, system, num_cpus, cpu_clock, cpu_voltage="1.0V"):
-        super(FastmodelCluster, self).__init__()
+        super().__init__()
 
         # Setup GIC
         gic = system.realview.gic
@@ -285,12 +266,12 @@ class FastmodelCluster(SubSystem):
         self.cpu_hub.a2t = a2t
         self.cpu_hub.t2g = t2g
 
-        system.addCpuCluster(self, num_cpus)
+        system.addCpuCluster(self)
 
-    def requireCaches(self):
+    def require_caches(self):
         return False
 
-    def memoryMode(self):
+    def memory_mode(self):
         return "atomic_noncaching"
 
     def addL1(self):
@@ -330,7 +311,6 @@ class BaseSimpleSystem(ArmSystem):
         self.mem_ranges = self.getMemRanges(int(Addr(mem_size)))
 
         self._clusters = []
-        self._num_cpus = 0
 
     def getMemRanges(self, mem_size):
         """
@@ -357,14 +337,8 @@ class BaseSimpleSystem(ArmSystem):
     def numCpuClusters(self):
         return len(self._clusters)
 
-    def addCpuCluster(self, cpu_cluster, num_cpus):
-        assert cpu_cluster not in self._clusters
-        assert num_cpus > 0
+    def addCpuCluster(self, cpu_cluster):
         self._clusters.append(cpu_cluster)
-        self._num_cpus += num_cpus
-
-    def numCpus(self):
-        return self._num_cpus
 
     def addCaches(self, need_caches, last_cache_level):
         if not need_caches:
diff --git a/configs/example/arm/fs_bigLITTLE.py b/configs/example/arm/fs_bigLITTLE.py
index c188de663a..060c51ec3c 100644
--- a/configs/example/arm/fs_bigLITTLE.py
+++ b/configs/example/arm/fs_bigLITTLE.py
@@ -79,7 +79,7 @@ def _using_pdes(root):
     return False
 
 
-class BigCluster(devices.CpuCluster):
+class BigCluster(devices.ArmCpuCluster):
     def __init__(self, system, num_cpus, cpu_clock, cpu_voltage="1.0V"):
         cpu_config = [
             ObjectList.cpu_list.get("O3_ARM_v7a_3"),
@@ -87,12 +87,10 @@ class BigCluster(devices.CpuCluster):
             devices.L1D,
             devices.L2,
         ]
-        super(BigCluster, self).__init__(
-            system, num_cpus, cpu_clock, cpu_voltage, *cpu_config
-        )
+        super().__init__(system, num_cpus, cpu_clock, cpu_voltage, *cpu_config)
 
 
-class LittleCluster(devices.CpuCluster):
+class LittleCluster(devices.ArmCpuCluster):
     def __init__(self, system, num_cpus, cpu_clock, cpu_voltage="1.0V"):
         cpu_config = [
             ObjectList.cpu_list.get("MinorCPU"),
@@ -100,9 +98,7 @@ class LittleCluster(devices.CpuCluster):
             devices.L1D,
             devices.L2,
         ]
-        super(LittleCluster, self).__init__(
-            system, num_cpus, cpu_clock, cpu_voltage, *cpu_config
-        )
+        super().__init__(system, num_cpus, cpu_clock, cpu_voltage, *cpu_config)
 
 
 class Ex5BigCluster(devices.CpuCluster):
@@ -113,9 +109,7 @@ class Ex5BigCluster(devices.CpuCluster):
             ex5_big.L1D,
             ex5_big.L2,
         ]
-        super(Ex5BigCluster, self).__init__(
-            system, num_cpus, cpu_clock, cpu_voltage, *cpu_config
-        )
+        super().__init__(system, num_cpus, cpu_clock, cpu_voltage, *cpu_config)
 
 
 class Ex5LittleCluster(devices.CpuCluster):
@@ -126,9 +120,7 @@ class Ex5LittleCluster(devices.CpuCluster):
             ex5_LITTLE.L1D,
             ex5_LITTLE.L2,
         ]
-        super(Ex5LittleCluster, self).__init__(
-            system, num_cpus, cpu_clock, cpu_voltage, *cpu_config
-        )
+        super().__init__(system, num_cpus, cpu_clock, cpu_voltage, *cpu_config)
 
 
 def createSystem(
@@ -376,7 +368,7 @@ def build(options):
         system.bigCluster = big_model(
             system, options.big_cpus, options.big_cpu_clock
         )
-        system.mem_mode = system.bigCluster.memoryMode()
+        system.mem_mode = system.bigCluster.memory_mode()
         all_cpus += system.bigCluster.cpus
 
     # little cluster
@@ -384,23 +376,24 @@ def build(options):
         system.littleCluster = little_model(
             system, options.little_cpus, options.little_cpu_clock
         )
-        system.mem_mode = system.littleCluster.memoryMode()
+        system.mem_mode = system.littleCluster.memory_mode()
         all_cpus += system.littleCluster.cpus
 
     # Figure out the memory mode
     if (
         options.big_cpus > 0
         and options.little_cpus > 0
-        and system.bigCluster.memoryMode() != system.littleCluster.memoryMode()
+        and system.bigCluster.memory_mode()
+        != system.littleCluster.memory_mode()
     ):
         m5.util.panic("Memory mode missmatch among CPU clusters")
 
     # create caches
     system.addCaches(options.caches, options.last_cache_level)
     if not options.caches:
-        if options.big_cpus > 0 and system.bigCluster.requireCaches():
+        if options.big_cpus > 0 and system.bigCluster.require_caches():
             m5.util.panic("Big CPU model requires caches")
-        if options.little_cpus > 0 and system.littleCluster.requireCaches():
+        if options.little_cpus > 0 and system.littleCluster.require_caches():
             m5.util.panic("Little CPU model requires caches")
 
     # Create a KVM VM and do KVM-specific configuration
diff --git a/configs/example/arm/ruby_fs.py b/configs/example/arm/ruby_fs.py
index d58184522c..fd36319363 100644
--- a/configs/example/arm/ruby_fs.py
+++ b/configs/example/arm/ruby_fs.py
@@ -115,7 +115,7 @@ def create(args):
 
     # Add CPU clusters to the system
     system.cpu_cluster = [
-        devices.CpuCluster(
+        devices.ArmCpuCluster(
             system,
             args.num_cpus,
             args.cpu_freq,
diff --git a/configs/example/arm/starter_fs.py b/configs/example/arm/starter_fs.py
index 3a9a8762d6..7d7ab71768 100644
--- a/configs/example/arm/starter_fs.py
+++ b/configs/example/arm/starter_fs.py
@@ -128,7 +128,7 @@ def create(args):
 
     # Add CPU clusters to the system
     system.cpu_cluster = [
-        devices.CpuCluster(
+        devices.ArmCpuCluster(
             system, args.num_cores, args.cpu_freq, "1.0V", *cpu_types[args.cpu]
         )
     ]
diff --git a/configs/example/arm/starter_se.py b/configs/example/arm/starter_se.py
index 08c3d74fbd..ccdbe4f847 100644
--- a/configs/example/arm/starter_se.py
+++ b/configs/example/arm/starter_se.py
@@ -102,14 +102,14 @@ class SimpleSeSystem(System):
         # Create a cache hierarchy (unless we are simulating a
         # functional CPU in atomic memory mode) for the CPU cluster
         # and connect it to the shared memory bus.
-        if self.cpu_cluster.memoryMode() == "timing":
+        if self.cpu_cluster.memory_mode() == "timing":
             self.cpu_cluster.addL1()
             self.cpu_cluster.addL2(self.cpu_cluster.clk_domain)
         self.cpu_cluster.connectMemSide(self.membus)
 
         # Tell gem5 about the memory mode used by the CPUs we are
         # simulating.
-        self.mem_mode = self.cpu_cluster.memoryMode()
+        self.mem_mode = self.cpu_cluster.memory_mode()
 
     def numCpuClusters(self):
         return len(self._clusters)

From 76b74fa51f0c691dbb3ea4c5272dac8add8913cb Mon Sep 17 00:00:00 2001
From: Giacomo Travaglini <giacomo.travaglini@arm.com>
Date: Fri, 13 Jan 2023 16:26:03 +0000
Subject: [PATCH 115/492] util: use origin/develop as default upstream branch

The master branch is not in use anymore and it has been
renamed to develop instead

Change-Id: Ib9ea6e137f1b9284fb8147268b8691d002d3f90a
Signed-off-by: Giacomo Travaglini <giacomo.travaglini@arm.com>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67331
Reviewed-by: Richard Cooper <richard.cooper@arm.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 util/maint/list_changes.py         | 2 +-
 util/maint/show_changes_by_file.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/util/maint/list_changes.py b/util/maint/list_changes.py
index 465ae1abb0..87e4ea2d20 100755
--- a/util/maint/list_changes.py
+++ b/util/maint/list_changes.py
@@ -178,7 +178,7 @@ def _main():
         "--upstream",
         "-u",
         type=str,
-        default="origin/master",
+        default="origin/develop",
         help="Upstream branch for comparison. Default: %(default)s",
     )
     parser.add_argument(
diff --git a/util/maint/show_changes_by_file.py b/util/maint/show_changes_by_file.py
index ea739f78fe..d5055c1ff4 100755
--- a/util/maint/show_changes_by_file.py
+++ b/util/maint/show_changes_by_file.py
@@ -94,7 +94,7 @@ def _main():
         "--upstream",
         "-u",
         type=str,
-        default="origin/master",
+        default="origin/develop",
         help="Upstream branch for comparison. Default: %(default)s",
     )
     parser.add_argument(

From f7857867ae54fc868e265d1aa2ea171b413c1776 Mon Sep 17 00:00:00 2001
From: Gabe Black <gabeblack@google.com>
Date: Thu, 12 Jan 2023 01:05:32 -0800
Subject: [PATCH 116/492] fastmodel: Export the "reset_in" reset signal from
 the PL330.

This is essentially the same as how the reset signals were exported
from the CortexR52 which I used as an example, except here there is
only one reset. I passed through with the same name rather than calling
it "model_reset" as in the CortexR52 since the pass through is trivial,
and renaming the signal with no additional functionality seemed like it
would just create confusion. In the CortexR52 case it makes more sense
since there are multiple reset lines that need to be toggled to
actually cause a reset, and a level of abstraction is actually helpful.

Change-Id: I6b61fed6eb1566d131d4b0367fe4ae65031b25f8
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67351
Maintainer: Gabe Black <gabe.black@gmail.com>
Reviewed-by: Yu-hsin Wang <yuhsingw@google.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/arch/arm/fastmodel/PL330_DMAC/FastModelPL330.py | 3 +++
 src/arch/arm/fastmodel/PL330_DMAC/PL330.lisa        | 5 +++++
 src/arch/arm/fastmodel/PL330_DMAC/pl330.cc          | 8 +++++++-
 src/arch/arm/fastmodel/PL330_DMAC/pl330.hh          | 3 +++
 4 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/src/arch/arm/fastmodel/PL330_DMAC/FastModelPL330.py b/src/arch/arm/fastmodel/PL330_DMAC/FastModelPL330.py
index ad43fed237..21ead525d3 100644
--- a/src/arch/arm/fastmodel/PL330_DMAC/FastModelPL330.py
+++ b/src/arch/arm/fastmodel/PL330_DMAC/FastModelPL330.py
@@ -26,6 +26,7 @@
 from m5.params import *
 from m5.objects.FastModel import AmbaInitiatorSocket, AmbaTargetSocket
 from m5.objects.IntPin import IntSourcePin
+from m5.objects.ResetPort import ResetResponsePort
 from m5.objects.SystemC import SystemC_ScModule
 
 
@@ -197,6 +198,8 @@ class FastModelPL330(SystemC_ScModule):
     pio_s = AmbaTargetSocket(64, "Register accesses (secure)")
     pio_ns = AmbaTargetSocket(64, "Register accesses (non-secure)")
 
+    reset_in = ResetResponsePort("System reset")
+
     # irq_abort_master_port
     # irq_master_port
     # pvbus_m
diff --git a/src/arch/arm/fastmodel/PL330_DMAC/PL330.lisa b/src/arch/arm/fastmodel/PL330_DMAC/PL330.lisa
index 3c31c90d87..d57dfdad3d 100644
--- a/src/arch/arm/fastmodel/PL330_DMAC/PL330.lisa
+++ b/src/arch/arm/fastmodel/PL330_DMAC/PL330.lisa
@@ -64,6 +64,9 @@ component PL330
         // Interrupts.
         pl330.irq_master_port => self.irq;
         pl330.irq_abort_master_port => self.irq_abort;
+
+        // Reset signals.
+        self.reset_in => pl330.reset_in;
     }
 
     properties
@@ -85,4 +88,6 @@ component PL330
 
     master port<Signal> irq[32];
     master port<Signal> irq_abort;
+
+    slave port<Signal> reset_in;
 }
diff --git a/src/arch/arm/fastmodel/PL330_DMAC/pl330.cc b/src/arch/arm/fastmodel/PL330_DMAC/pl330.cc
index e582404c8c..13162bd409 100644
--- a/src/arch/arm/fastmodel/PL330_DMAC/pl330.cc
+++ b/src/arch/arm/fastmodel/PL330_DMAC/pl330.cc
@@ -45,7 +45,8 @@ PL330::PL330(const FastModelPL330Params &params,
     dma(amba_m, params.name + ".dma", -1),
     pioS(amba_s, params.name + ".pio_s", -1),
     pioNs(amba_s_ns, params.name + ".pio_ns", -1),
-    irqAbortReceiver("irq_abort_receiver")
+    irqAbortReceiver("irq_abort_receiver"),
+    resetIn("reset_in", 0)
 {
     set_parameter("pl330.fifo_size", params.fifo_size);
     set_parameter("pl330.max_transfer", params.max_transfer);
@@ -211,6 +212,9 @@ PL330::PL330(const FastModelPL330Params &params,
 
     // And install it.
     irqAbortReceiver.onChange(abort_change);
+
+    // Plumb the reset signal.
+    resetIn.signal_out.bind(this->reset_in);
 }
 
 void
@@ -250,6 +254,8 @@ PL330::gem5_getPort(const std::string &if_name, int idx)
         }
         if (port != -1 && port < irqPort.size())
             return *irqPort[port].at(idx);
+    } else if (if_name == "reset_in") {
+        return resetIn;
     }
 
     return scx_evs_PL330::gem5_getPort(if_name, idx);
diff --git a/src/arch/arm/fastmodel/PL330_DMAC/pl330.hh b/src/arch/arm/fastmodel/PL330_DMAC/pl330.hh
index 3af56f2e6e..389f7047c7 100644
--- a/src/arch/arm/fastmodel/PL330_DMAC/pl330.hh
+++ b/src/arch/arm/fastmodel/PL330_DMAC/pl330.hh
@@ -39,6 +39,7 @@
 
 #include "arch/arm/fastmodel/amba_ports.hh"
 #include "arch/arm/fastmodel/common/signal_receiver.hh"
+#include "arch/arm/fastmodel/common/signal_sender.hh"
 #include "arch/arm/fastmodel/protocol/exported_clock_rate_control.hh"
 #include "dev/intpin.hh"
 #include "params/FastModelPL330.hh"
@@ -73,6 +74,8 @@ class PL330 : public scx_evs_PL330
 
     void allocateIrq(int idx, int count);
 
+    SignalSender resetIn;
+
   public:
     PL330(const FastModelPL330Params &params, sc_core::sc_module_name _name);
     PL330(const FastModelPL330Params &params) :

From 6e74deb46f3f296107eb9bfbfe96f87d7d1940be Mon Sep 17 00:00:00 2001
From: Nathanael Premillieu <nathanael.premillieu@huawei.com>
Date: Tue, 13 Dec 2022 14:31:12 +0100
Subject: [PATCH 117/492] mem-cache: use MMU instead of TLB in prefetchers

BaseMMU object is now the entry point for translation
requests. In the prefetchers, a BaseTLB object is still
used if translation is needed.
This patch is changing it to a BaseMMU object.

Change-Id: I47dc92d4bc4a5c4f7c4c6181f7b7e126db6bd529
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/66831
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Daniel Carvalho <odanrc@yahoo.com.br>
Maintainer: Daniel Carvalho <odanrc@yahoo.com.br>
---
 src/mem/cache/prefetch/Prefetcher.py | 12 ++++++------
 src/mem/cache/prefetch/base.cc       |  8 ++++----
 src/mem/cache/prefetch/base.hh       | 10 +++++-----
 src/mem/cache/prefetch/queued.cc     | 12 ++++++------
 src/mem/cache/prefetch/queued.hh     |  6 +++---
 5 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/src/mem/cache/prefetch/Prefetcher.py b/src/mem/cache/prefetch/Prefetcher.py
index 397711c09a..a350319258 100644
--- a/src/mem/cache/prefetch/Prefetcher.py
+++ b/src/mem/cache/prefetch/Prefetcher.py
@@ -64,7 +64,7 @@ class BasePrefetcher(ClockedObject):
     abstract = True
     cxx_class = "gem5::prefetch::Base"
     cxx_header = "mem/cache/prefetch/base.hh"
-    cxx_exports = [PyBindMethod("addEventProbe"), PyBindMethod("addTLB")]
+    cxx_exports = [PyBindMethod("addEventProbe"), PyBindMethod("addMMU")]
     sys = Param.System(Parent.any, "System this prefetcher belongs to")
 
     # Get the block size from the parent (system)
@@ -93,7 +93,7 @@ class BasePrefetcher(ClockedObject):
     def __init__(self, **kwargs):
         super().__init__(**kwargs)
         self._events = []
-        self._tlbs = []
+        self._mmus = []
 
     def addEvent(self, newObject):
         self._events.append(newObject)
@@ -101,8 +101,8 @@ class BasePrefetcher(ClockedObject):
     # Override the normal SimObject::regProbeListeners method and
     # register deferred event handlers.
     def regProbeListeners(self):
-        for tlb in self._tlbs:
-            self.getCCObject().addTLB(tlb.getCCObject())
+        for mmu in self._mmus:
+            self.getCCObject().addMMU(mmu.getCCObject())
         for event in self._events:
             event.register()
         self.getCCObject().regProbeListeners()
@@ -114,10 +114,10 @@ class BasePrefetcher(ClockedObject):
             raise TypeError("probeNames must have at least one element")
         self.addEvent(HWPProbeEvent(self, simObj, *probeNames))
 
-    def registerTLB(self, simObj):
+    def registerMMU(self, simObj):
         if not isinstance(simObj, SimObject):
             raise TypeError("argument must be a SimObject type")
-        self._tlbs.append(simObj)
+        self._mmus.append(simObj)
 
 
 class MultiPrefetcher(BasePrefetcher):
diff --git a/src/mem/cache/prefetch/base.cc b/src/mem/cache/prefetch/base.cc
index cb4c1e8118..9ff81fba68 100644
--- a/src/mem/cache/prefetch/base.cc
+++ b/src/mem/cache/prefetch/base.cc
@@ -103,7 +103,7 @@ Base::Base(const BasePrefetcherParams &p)
       prefetchOnPfHit(p.prefetch_on_pf_hit),
       useVirtualAddresses(p.use_virtual_addresses),
       prefetchStats(this), issuedPrefetches(0),
-      usefulPrefetches(0), tlb(nullptr)
+      usefulPrefetches(0), mmu(nullptr)
 {
 }
 
@@ -299,10 +299,10 @@ Base::addEventProbe(SimObject *obj, const char *name)
 }
 
 void
-Base::addTLB(BaseTLB *t)
+Base::addMMU(BaseMMU *m)
 {
-    fatal_if(tlb != nullptr, "Only one TLB can be registered");
-    tlb = t;
+    fatal_if(mmu != nullptr, "Only one MMU can be registered");
+    mmu = m;
 }
 
 } // namespace prefetch
diff --git a/src/mem/cache/prefetch/base.hh b/src/mem/cache/prefetch/base.hh
index f2a8207d35..e5e43e534f 100644
--- a/src/mem/cache/prefetch/base.hh
+++ b/src/mem/cache/prefetch/base.hh
@@ -364,8 +364,8 @@ class Base : public ClockedObject
     /** Total prefetches that has been useful */
     uint64_t usefulPrefetches;
 
-    /** Registered tlb for address translations */
-    BaseTLB * tlb;
+    /** Registered mmu for address translations */
+    BaseMMU * mmu;
 
   public:
     Base(const BasePrefetcherParams &p);
@@ -437,12 +437,12 @@ class Base : public ClockedObject
     void addEventProbe(SimObject *obj, const char *name);
 
     /**
-     * Add a BaseTLB object to be used whenever a translation is needed.
+     * Add a BaseMMU object to be used whenever a translation is needed.
      * This is generally required when the prefetcher is allowed to generate
      * page crossing references and/or uses virtual addresses for training.
-     * @param tlb pointer to the BaseTLB object to add
+     * @param mmu pointer to the BaseMMU object to add
      */
-    void addTLB(BaseTLB *tlb);
+    void addMMU(BaseMMU *mmu);
 };
 
 } // namespace prefetch
diff --git a/src/mem/cache/prefetch/queued.cc b/src/mem/cache/prefetch/queued.cc
index da9cbf479e..b85a227f00 100644
--- a/src/mem/cache/prefetch/queued.cc
+++ b/src/mem/cache/prefetch/queued.cc
@@ -78,13 +78,13 @@ Queued::DeferredPacket::createPkt(Addr paddr, unsigned blk_size,
 }
 
 void
-Queued::DeferredPacket::startTranslation(BaseTLB *tlb)
+Queued::DeferredPacket::startTranslation(BaseMMU *mmu)
 {
     assert(translationRequest != nullptr);
     if (!ongoingTranslation) {
         ongoingTranslation = true;
         // Prefetchers only operate in Timing mode
-        tlb->translateTiming(translationRequest, tc, this, BaseMMU::Read);
+        mmu->translateTiming(translationRequest, tc, this, BaseMMU::Read);
     }
 }
 
@@ -216,7 +216,7 @@ Queued::notify(const PacketPtr &pkt, const PrefetchInfo &pfi)
             }
         }
 
-        bool can_cross_page = (tlb != nullptr);
+        bool can_cross_page = (mmu != nullptr);
         if (can_cross_page || samePage(addr_prio.first, pfi.getAddr())) {
             PrefetchInfo new_pfi(pfi,addr_prio.first);
             statsQueued.pfIdentified++;
@@ -293,7 +293,7 @@ Queued::processMissingTranslations(unsigned max)
         // Increase the iterator first because dp.startTranslation can end up
         // calling finishTranslation, which will erase "it"
         it++;
-        dp.startTranslation(tlb);
+        dp.startTranslation(mmu);
         count += 1;
     }
 }
@@ -311,7 +311,7 @@ Queued::translationComplete(DeferredPacket *dp, bool failed)
     assert(it != pfqMissingTranslation.end());
     if (!failed) {
         DPRINTF(HWPrefetch, "%s Translation of vaddr %#x succeeded: "
-                "paddr %#x \n", tlb->name(),
+                "paddr %#x \n", mmu->name(),
                 it->translationRequest->getVaddr(),
                 it->translationRequest->getPaddr());
         Addr target_paddr = it->translationRequest->getPaddr();
@@ -329,7 +329,7 @@ Queued::translationComplete(DeferredPacket *dp, bool failed)
         }
     } else {
         DPRINTF(HWPrefetch, "%s Translation of vaddr %#x failed, dropping "
-                "prefetch request %#x \n", tlb->name(),
+                "prefetch request %#x \n", mmu->name(),
                 it->translationRequest->getVaddr());
     }
     pfqMissingTranslation.erase(it);
diff --git a/src/mem/cache/prefetch/queued.hh b/src/mem/cache/prefetch/queued.hh
index c769b3875a..87d3456def 100644
--- a/src/mem/cache/prefetch/queued.hh
+++ b/src/mem/cache/prefetch/queued.hh
@@ -134,10 +134,10 @@ class Queued : public Base
                             ThreadContext *tc, BaseMMU::Mode mode) override;
 
         /**
-         * Issues the translation request to the provided TLB
-         * @param tlb the tlb that has to translate the address
+         * Issues the translation request to the provided MMU
+         * @param mmu the mmu that has to translate the address
          */
-        void startTranslation(BaseTLB *tlb);
+        void startTranslation(BaseMMU *mmu);
     };
 
     std::list<DeferredPacket> pfq;

From d4c1904ce63082c30c01f2acebca7097e7eb612e Mon Sep 17 00:00:00 2001
From: "Daniel R. Carvalho" <odanrc@yahoo.com.br>
Date: Sat, 14 Jan 2023 09:35:26 -0300
Subject: [PATCH 118/492] mem-cache: Remove the ReplacementPolicy namespace

This namespace has gone through the deprecation period
and can now be removed.

Change-Id: If4904706b897999e9200b163d47679519f01e4d4
Signed-off-by: Daniel R. Carvalho <odanrc@yahoo.com.br>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67352
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Reviewed-by: Richard Cooper <richard.cooper@arm.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/mem/cache/prefetch/stride.hh                        | 1 -
 src/mem/cache/replacement_policies/base.hh              | 1 -
 src/mem/cache/replacement_policies/bip_rp.cc            | 1 -
 src/mem/cache/replacement_policies/bip_rp.hh            | 1 -
 src/mem/cache/replacement_policies/brrip_rp.cc          | 1 -
 src/mem/cache/replacement_policies/brrip_rp.hh          | 1 -
 src/mem/cache/replacement_policies/dueling_rp.hh        | 1 -
 src/mem/cache/replacement_policies/fifo_rp.cc           | 3 ++-
 src/mem/cache/replacement_policies/fifo_rp.hh           | 1 -
 src/mem/cache/replacement_policies/lfu_rp.cc            | 1 -
 src/mem/cache/replacement_policies/lfu_rp.hh            | 1 -
 src/mem/cache/replacement_policies/lru_rp.cc            | 1 -
 src/mem/cache/replacement_policies/lru_rp.hh            | 1 -
 src/mem/cache/replacement_policies/mru_rp.cc            | 1 -
 src/mem/cache/replacement_policies/mru_rp.hh            | 1 -
 src/mem/cache/replacement_policies/random_rp.cc         | 1 -
 src/mem/cache/replacement_policies/random_rp.hh         | 1 -
 src/mem/cache/replacement_policies/replaceable_entry.hh | 1 -
 src/mem/cache/replacement_policies/second_chance_rp.cc  | 1 -
 src/mem/cache/replacement_policies/second_chance_rp.hh  | 1 -
 src/mem/cache/replacement_policies/ship_rp.hh           | 1 -
 src/mem/cache/replacement_policies/tree_plru_rp.cc      | 1 -
 src/mem/cache/replacement_policies/tree_plru_rp.hh      | 1 -
 src/mem/cache/replacement_policies/weighted_lru_rp.cc   | 1 -
 src/mem/cache/replacement_policies/weighted_lru_rp.hh   | 1 -
 src/mem/cache/tags/sector_tags.hh                       | 1 -
 26 files changed, 2 insertions(+), 26 deletions(-)

diff --git a/src/mem/cache/prefetch/stride.hh b/src/mem/cache/prefetch/stride.hh
index 2b70765ba4..27fa917436 100644
--- a/src/mem/cache/prefetch/stride.hh
+++ b/src/mem/cache/prefetch/stride.hh
@@ -64,7 +64,6 @@ namespace gem5
 {
 
 class BaseIndexingPolicy;
-GEM5_DEPRECATED_NAMESPACE(ReplacementPolicy, replacement_policy);
 namespace replacement_policy
 {
     class Base;
diff --git a/src/mem/cache/replacement_policies/base.hh b/src/mem/cache/replacement_policies/base.hh
index fc92ecb6ae..2c23c950b2 100644
--- a/src/mem/cache/replacement_policies/base.hh
+++ b/src/mem/cache/replacement_policies/base.hh
@@ -45,7 +45,6 @@ namespace gem5
  */
 typedef std::vector<ReplaceableEntry*> ReplacementCandidates;
 
-GEM5_DEPRECATED_NAMESPACE(ReplacementPolicy, replacement_policy);
 namespace replacement_policy
 {
 
diff --git a/src/mem/cache/replacement_policies/bip_rp.cc b/src/mem/cache/replacement_policies/bip_rp.cc
index 102037ddfa..812c36bb71 100644
--- a/src/mem/cache/replacement_policies/bip_rp.cc
+++ b/src/mem/cache/replacement_policies/bip_rp.cc
@@ -37,7 +37,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(ReplacementPolicy, replacement_policy);
 namespace replacement_policy
 {
 
diff --git a/src/mem/cache/replacement_policies/bip_rp.hh b/src/mem/cache/replacement_policies/bip_rp.hh
index 486f4597dd..0b830e0b79 100644
--- a/src/mem/cache/replacement_policies/bip_rp.hh
+++ b/src/mem/cache/replacement_policies/bip_rp.hh
@@ -49,7 +49,6 @@ namespace gem5
 
 struct BIPRPParams;
 
-GEM5_DEPRECATED_NAMESPACE(ReplacementPolicy, replacement_policy);
 namespace replacement_policy
 {
 
diff --git a/src/mem/cache/replacement_policies/brrip_rp.cc b/src/mem/cache/replacement_policies/brrip_rp.cc
index a28ad339d5..06dad0d9fb 100644
--- a/src/mem/cache/replacement_policies/brrip_rp.cc
+++ b/src/mem/cache/replacement_policies/brrip_rp.cc
@@ -38,7 +38,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(ReplacementPolicy, replacement_policy);
 namespace replacement_policy
 {
 
diff --git a/src/mem/cache/replacement_policies/brrip_rp.hh b/src/mem/cache/replacement_policies/brrip_rp.hh
index f4f815e056..5649a64070 100644
--- a/src/mem/cache/replacement_policies/brrip_rp.hh
+++ b/src/mem/cache/replacement_policies/brrip_rp.hh
@@ -60,7 +60,6 @@ namespace gem5
 
 struct BRRIPRPParams;
 
-GEM5_DEPRECATED_NAMESPACE(ReplacementPolicy, replacement_policy);
 namespace replacement_policy
 {
 
diff --git a/src/mem/cache/replacement_policies/dueling_rp.hh b/src/mem/cache/replacement_policies/dueling_rp.hh
index a4510508ef..c7400b4972 100644
--- a/src/mem/cache/replacement_policies/dueling_rp.hh
+++ b/src/mem/cache/replacement_policies/dueling_rp.hh
@@ -41,7 +41,6 @@ namespace gem5
 
 struct DuelingRPParams;
 
-GEM5_DEPRECATED_NAMESPACE(ReplacementPolicy, replacement_policy);
 namespace replacement_policy
 {
 
diff --git a/src/mem/cache/replacement_policies/fifo_rp.cc b/src/mem/cache/replacement_policies/fifo_rp.cc
index bc0680bc8a..199ba0a429 100644
--- a/src/mem/cache/replacement_policies/fifo_rp.cc
+++ b/src/mem/cache/replacement_policies/fifo_rp.cc
@@ -36,9 +36,10 @@
 
 namespace gem5
 {
-GEM5_DEPRECATED_NAMESPACE(ReplacementPolicy, replacement_policy);
+
 namespace replacement_policy
 {
+
 FIFO::FIFO(const Params &p)
   : Base(p)
 {
diff --git a/src/mem/cache/replacement_policies/fifo_rp.hh b/src/mem/cache/replacement_policies/fifo_rp.hh
index 4b62fd220a..255666865d 100644
--- a/src/mem/cache/replacement_policies/fifo_rp.hh
+++ b/src/mem/cache/replacement_policies/fifo_rp.hh
@@ -44,7 +44,6 @@ namespace gem5
 
 struct FIFORPParams;
 
-GEM5_DEPRECATED_NAMESPACE(ReplacementPolicy, replacement_policy);
 namespace replacement_policy
 {
 
diff --git a/src/mem/cache/replacement_policies/lfu_rp.cc b/src/mem/cache/replacement_policies/lfu_rp.cc
index a715f7d0dc..fc3495465e 100644
--- a/src/mem/cache/replacement_policies/lfu_rp.cc
+++ b/src/mem/cache/replacement_policies/lfu_rp.cc
@@ -36,7 +36,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(ReplacementPolicy, replacement_policy);
 namespace replacement_policy
 {
 
diff --git a/src/mem/cache/replacement_policies/lfu_rp.hh b/src/mem/cache/replacement_policies/lfu_rp.hh
index aa058c46bd..58c057ce35 100644
--- a/src/mem/cache/replacement_policies/lfu_rp.hh
+++ b/src/mem/cache/replacement_policies/lfu_rp.hh
@@ -44,7 +44,6 @@ namespace gem5
 
 struct LFURPParams;
 
-GEM5_DEPRECATED_NAMESPACE(ReplacementPolicy, replacement_policy);
 namespace replacement_policy
 {
 
diff --git a/src/mem/cache/replacement_policies/lru_rp.cc b/src/mem/cache/replacement_policies/lru_rp.cc
index c22f3fe2ba..cbec50eb73 100644
--- a/src/mem/cache/replacement_policies/lru_rp.cc
+++ b/src/mem/cache/replacement_policies/lru_rp.cc
@@ -37,7 +37,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(ReplacementPolicy, replacement_policy);
 namespace replacement_policy
 {
 
diff --git a/src/mem/cache/replacement_policies/lru_rp.hh b/src/mem/cache/replacement_policies/lru_rp.hh
index 620117dca5..6feaa4f73d 100644
--- a/src/mem/cache/replacement_policies/lru_rp.hh
+++ b/src/mem/cache/replacement_policies/lru_rp.hh
@@ -42,7 +42,6 @@ namespace gem5
 
 struct LRURPParams;
 
-GEM5_DEPRECATED_NAMESPACE(ReplacementPolicy, replacement_policy);
 namespace replacement_policy
 {
 
diff --git a/src/mem/cache/replacement_policies/mru_rp.cc b/src/mem/cache/replacement_policies/mru_rp.cc
index 18b0d65e89..5040c22206 100644
--- a/src/mem/cache/replacement_policies/mru_rp.cc
+++ b/src/mem/cache/replacement_policies/mru_rp.cc
@@ -37,7 +37,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(ReplacementPolicy, replacement_policy);
 namespace replacement_policy
 {
 
diff --git a/src/mem/cache/replacement_policies/mru_rp.hh b/src/mem/cache/replacement_policies/mru_rp.hh
index 1657ace3d5..5b5f0bf03d 100644
--- a/src/mem/cache/replacement_policies/mru_rp.hh
+++ b/src/mem/cache/replacement_policies/mru_rp.hh
@@ -44,7 +44,6 @@ namespace gem5
 
 struct MRURPParams;
 
-GEM5_DEPRECATED_NAMESPACE(ReplacementPolicy, replacement_policy);
 namespace replacement_policy
 {
 
diff --git a/src/mem/cache/replacement_policies/random_rp.cc b/src/mem/cache/replacement_policies/random_rp.cc
index fc6c431b9d..8711c85ba3 100644
--- a/src/mem/cache/replacement_policies/random_rp.cc
+++ b/src/mem/cache/replacement_policies/random_rp.cc
@@ -37,7 +37,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(ReplacementPolicy, replacement_policy);
 namespace replacement_policy
 {
 
diff --git a/src/mem/cache/replacement_policies/random_rp.hh b/src/mem/cache/replacement_policies/random_rp.hh
index 9c383d5418..a2b384563c 100644
--- a/src/mem/cache/replacement_policies/random_rp.hh
+++ b/src/mem/cache/replacement_policies/random_rp.hh
@@ -42,7 +42,6 @@ namespace gem5
 
 struct RandomRPParams;
 
-GEM5_DEPRECATED_NAMESPACE(ReplacementPolicy, replacement_policy);
 namespace replacement_policy
 {
 
diff --git a/src/mem/cache/replacement_policies/replaceable_entry.hh b/src/mem/cache/replacement_policies/replaceable_entry.hh
index 6c56bca394..bb88cefd1d 100644
--- a/src/mem/cache/replacement_policies/replaceable_entry.hh
+++ b/src/mem/cache/replacement_policies/replaceable_entry.hh
@@ -38,7 +38,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(ReplacementPolicy, replacement_policy);
 namespace replacement_policy
 {
 
diff --git a/src/mem/cache/replacement_policies/second_chance_rp.cc b/src/mem/cache/replacement_policies/second_chance_rp.cc
index df506c638b..963052ee7f 100644
--- a/src/mem/cache/replacement_policies/second_chance_rp.cc
+++ b/src/mem/cache/replacement_policies/second_chance_rp.cc
@@ -35,7 +35,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(ReplacementPolicy, replacement_policy);
 namespace replacement_policy
 {
 
diff --git a/src/mem/cache/replacement_policies/second_chance_rp.hh b/src/mem/cache/replacement_policies/second_chance_rp.hh
index 4d0a36cdeb..79085d1d53 100644
--- a/src/mem/cache/replacement_policies/second_chance_rp.hh
+++ b/src/mem/cache/replacement_policies/second_chance_rp.hh
@@ -46,7 +46,6 @@ namespace gem5
 
 struct SecondChanceRPParams;
 
-GEM5_DEPRECATED_NAMESPACE(ReplacementPolicy, replacement_policy);
 namespace replacement_policy
 {
 
diff --git a/src/mem/cache/replacement_policies/ship_rp.hh b/src/mem/cache/replacement_policies/ship_rp.hh
index fa27540adb..edf4ff5a45 100644
--- a/src/mem/cache/replacement_policies/ship_rp.hh
+++ b/src/mem/cache/replacement_policies/ship_rp.hh
@@ -51,7 +51,6 @@ struct SHiPRPParams;
 struct SHiPMemRPParams;
 struct SHiPPCRPParams;
 
-GEM5_DEPRECATED_NAMESPACE(ReplacementPolicy, replacement_policy);
 namespace replacement_policy
 {
 
diff --git a/src/mem/cache/replacement_policies/tree_plru_rp.cc b/src/mem/cache/replacement_policies/tree_plru_rp.cc
index 2ee987c959..5014785093 100644
--- a/src/mem/cache/replacement_policies/tree_plru_rp.cc
+++ b/src/mem/cache/replacement_policies/tree_plru_rp.cc
@@ -43,7 +43,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(ReplacementPolicy, replacement_policy);
 namespace replacement_policy
 {
 
diff --git a/src/mem/cache/replacement_policies/tree_plru_rp.hh b/src/mem/cache/replacement_policies/tree_plru_rp.hh
index 335670457c..1f7e91c33e 100644
--- a/src/mem/cache/replacement_policies/tree_plru_rp.hh
+++ b/src/mem/cache/replacement_policies/tree_plru_rp.hh
@@ -80,7 +80,6 @@ namespace gem5
 
 struct TreePLRURPParams;
 
-GEM5_DEPRECATED_NAMESPACE(ReplacementPolicy, replacement_policy);
 namespace replacement_policy
 {
 
diff --git a/src/mem/cache/replacement_policies/weighted_lru_rp.cc b/src/mem/cache/replacement_policies/weighted_lru_rp.cc
index ed6a7f6166..ac8fd1015b 100644
--- a/src/mem/cache/replacement_policies/weighted_lru_rp.cc
+++ b/src/mem/cache/replacement_policies/weighted_lru_rp.cc
@@ -39,7 +39,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(ReplacementPolicy, replacement_policy);
 namespace replacement_policy
 {
 
diff --git a/src/mem/cache/replacement_policies/weighted_lru_rp.hh b/src/mem/cache/replacement_policies/weighted_lru_rp.hh
index bc0e5735af..117b73b10f 100644
--- a/src/mem/cache/replacement_policies/weighted_lru_rp.hh
+++ b/src/mem/cache/replacement_policies/weighted_lru_rp.hh
@@ -42,7 +42,6 @@ namespace gem5
 
 struct WeightedLRURPParams;
 
-GEM5_DEPRECATED_NAMESPACE(ReplacementPolicy, replacement_policy);
 namespace replacement_policy
 {
 
diff --git a/src/mem/cache/tags/sector_tags.hh b/src/mem/cache/tags/sector_tags.hh
index c64621213d..bad132158c 100644
--- a/src/mem/cache/tags/sector_tags.hh
+++ b/src/mem/cache/tags/sector_tags.hh
@@ -47,7 +47,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(ReplacementPolicy, replacement_policy);
 namespace replacement_policy
 {
     class Base;

From 65c15ba18884492888daee4e33f93e017566da02 Mon Sep 17 00:00:00 2001
From: "Daniel R. Carvalho" <odanrc@yahoo.com.br>
Date: Sat, 14 Jan 2023 09:41:36 -0300
Subject: [PATCH 119/492] mem-cache: Remove the Prefetcher namespace

This namespace has gone through the deprecation period
and can now be removed.

Change-Id: I31953be7ce8566576de94c9296eeeec601c9906a
Signed-off-by: Daniel R. Carvalho <odanrc@yahoo.com.br>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67353
Reviewed-by: Richard Cooper <richard.cooper@arm.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/mem/cache/base.hh                                         | 1 -
 src/mem/cache/prefetch/access_map_pattern_matching.cc         | 1 -
 src/mem/cache/prefetch/access_map_pattern_matching.hh         | 1 -
 src/mem/cache/prefetch/base.cc                                | 1 -
 src/mem/cache/prefetch/base.hh                                | 1 -
 src/mem/cache/prefetch/bop.cc                                 | 1 -
 src/mem/cache/prefetch/bop.hh                                 | 1 -
 src/mem/cache/prefetch/delta_correlating_prediction_tables.cc | 1 -
 src/mem/cache/prefetch/delta_correlating_prediction_tables.hh | 1 -
 src/mem/cache/prefetch/indirect_memory.cc                     | 1 -
 src/mem/cache/prefetch/indirect_memory.hh                     | 1 -
 src/mem/cache/prefetch/irregular_stream_buffer.cc             | 1 -
 src/mem/cache/prefetch/irregular_stream_buffer.hh             | 1 -
 src/mem/cache/prefetch/multi.cc                               | 1 -
 src/mem/cache/prefetch/multi.hh                               | 1 -
 src/mem/cache/prefetch/pif.cc                                 | 1 -
 src/mem/cache/prefetch/pif.hh                                 | 1 -
 src/mem/cache/prefetch/queued.cc                              | 1 -
 src/mem/cache/prefetch/queued.hh                              | 1 -
 src/mem/cache/prefetch/sbooe.cc                               | 1 -
 src/mem/cache/prefetch/sbooe.hh                               | 1 -
 src/mem/cache/prefetch/signature_path.cc                      | 1 -
 src/mem/cache/prefetch/signature_path.hh                      | 1 -
 src/mem/cache/prefetch/signature_path_v2.cc                   | 1 -
 src/mem/cache/prefetch/signature_path_v2.hh                   | 1 -
 src/mem/cache/prefetch/slim_ampm.cc                           | 1 -
 src/mem/cache/prefetch/slim_ampm.hh                           | 1 -
 src/mem/cache/prefetch/spatio_temporal_memory_streaming.cc    | 1 -
 src/mem/cache/prefetch/spatio_temporal_memory_streaming.hh    | 1 -
 src/mem/cache/prefetch/stride.cc                              | 1 -
 src/mem/cache/prefetch/stride.hh                              | 1 -
 src/mem/cache/prefetch/tagged.cc                              | 1 -
 src/mem/cache/prefetch/tagged.hh                              | 1 -
 33 files changed, 33 deletions(-)

diff --git a/src/mem/cache/base.hh b/src/mem/cache/base.hh
index 6fc76282e9..78571ceb3c 100644
--- a/src/mem/cache/base.hh
+++ b/src/mem/cache/base.hh
@@ -79,7 +79,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(Prefetcher, prefetch);
 namespace prefetch
 {
     class Base;
diff --git a/src/mem/cache/prefetch/access_map_pattern_matching.cc b/src/mem/cache/prefetch/access_map_pattern_matching.cc
index 6bf5d9bca8..989f3c6be1 100644
--- a/src/mem/cache/prefetch/access_map_pattern_matching.cc
+++ b/src/mem/cache/prefetch/access_map_pattern_matching.cc
@@ -36,7 +36,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(Prefetcher, prefetch);
 namespace prefetch
 {
 
diff --git a/src/mem/cache/prefetch/access_map_pattern_matching.hh b/src/mem/cache/prefetch/access_map_pattern_matching.hh
index 3b0bc28f4d..893d30dec2 100644
--- a/src/mem/cache/prefetch/access_map_pattern_matching.hh
+++ b/src/mem/cache/prefetch/access_map_pattern_matching.hh
@@ -49,7 +49,6 @@ namespace gem5
 struct AccessMapPatternMatchingParams;
 struct AMPMPrefetcherParams;
 
-GEM5_DEPRECATED_NAMESPACE(Prefetcher, prefetch);
 namespace prefetch
 {
 
diff --git a/src/mem/cache/prefetch/base.cc b/src/mem/cache/prefetch/base.cc
index 9ff81fba68..e3e4b24cf2 100644
--- a/src/mem/cache/prefetch/base.cc
+++ b/src/mem/cache/prefetch/base.cc
@@ -55,7 +55,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(Prefetcher, prefetch);
 namespace prefetch
 {
 
diff --git a/src/mem/cache/prefetch/base.hh b/src/mem/cache/prefetch/base.hh
index e5e43e534f..6bae73519c 100644
--- a/src/mem/cache/prefetch/base.hh
+++ b/src/mem/cache/prefetch/base.hh
@@ -65,7 +65,6 @@ namespace gem5
 class BaseCache;
 struct BasePrefetcherParams;
 
-GEM5_DEPRECATED_NAMESPACE(Prefetcher, prefetch);
 namespace prefetch
 {
 
diff --git a/src/mem/cache/prefetch/bop.cc b/src/mem/cache/prefetch/bop.cc
index a60c1fe95e..ce2502bee6 100644
--- a/src/mem/cache/prefetch/bop.cc
+++ b/src/mem/cache/prefetch/bop.cc
@@ -34,7 +34,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(Prefetcher, prefetch);
 namespace prefetch
 {
 
diff --git a/src/mem/cache/prefetch/bop.hh b/src/mem/cache/prefetch/bop.hh
index 7fdba2bbf2..bb1b05dfa9 100644
--- a/src/mem/cache/prefetch/bop.hh
+++ b/src/mem/cache/prefetch/bop.hh
@@ -46,7 +46,6 @@ namespace gem5
 
 struct BOPPrefetcherParams;
 
-GEM5_DEPRECATED_NAMESPACE(Prefetcher, prefetch);
 namespace prefetch
 {
 
diff --git a/src/mem/cache/prefetch/delta_correlating_prediction_tables.cc b/src/mem/cache/prefetch/delta_correlating_prediction_tables.cc
index c5e126c4c0..b59394ce25 100644
--- a/src/mem/cache/prefetch/delta_correlating_prediction_tables.cc
+++ b/src/mem/cache/prefetch/delta_correlating_prediction_tables.cc
@@ -36,7 +36,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(Prefetcher, prefetch);
 namespace prefetch
 {
 
diff --git a/src/mem/cache/prefetch/delta_correlating_prediction_tables.hh b/src/mem/cache/prefetch/delta_correlating_prediction_tables.hh
index 8ad21a6691..0218e9138a 100644
--- a/src/mem/cache/prefetch/delta_correlating_prediction_tables.hh
+++ b/src/mem/cache/prefetch/delta_correlating_prediction_tables.hh
@@ -39,7 +39,6 @@ namespace gem5
 struct DeltaCorrelatingPredictionTablesParams;
 struct DCPTPrefetcherParams;
 
-GEM5_DEPRECATED_NAMESPACE(Prefetcher, prefetch);
 namespace prefetch
 {
 
diff --git a/src/mem/cache/prefetch/indirect_memory.cc b/src/mem/cache/prefetch/indirect_memory.cc
index 7bb1545f7f..ab84ce25a2 100644
--- a/src/mem/cache/prefetch/indirect_memory.cc
+++ b/src/mem/cache/prefetch/indirect_memory.cc
@@ -35,7 +35,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(Prefetcher, prefetch);
 namespace prefetch
 {
 
diff --git a/src/mem/cache/prefetch/indirect_memory.hh b/src/mem/cache/prefetch/indirect_memory.hh
index 85fb50e5a7..da3e894cfa 100644
--- a/src/mem/cache/prefetch/indirect_memory.hh
+++ b/src/mem/cache/prefetch/indirect_memory.hh
@@ -50,7 +50,6 @@ namespace gem5
 
 struct IndirectMemoryPrefetcherParams;
 
-GEM5_DEPRECATED_NAMESPACE(Prefetcher, prefetch);
 namespace prefetch
 {
 
diff --git a/src/mem/cache/prefetch/irregular_stream_buffer.cc b/src/mem/cache/prefetch/irregular_stream_buffer.cc
index fc0d71faa3..ce30b41aa6 100644
--- a/src/mem/cache/prefetch/irregular_stream_buffer.cc
+++ b/src/mem/cache/prefetch/irregular_stream_buffer.cc
@@ -35,7 +35,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(Prefetcher, prefetch);
 namespace prefetch
 {
 
diff --git a/src/mem/cache/prefetch/irregular_stream_buffer.hh b/src/mem/cache/prefetch/irregular_stream_buffer.hh
index 20dadd60a2..39373010bb 100644
--- a/src/mem/cache/prefetch/irregular_stream_buffer.hh
+++ b/src/mem/cache/prefetch/irregular_stream_buffer.hh
@@ -48,7 +48,6 @@ namespace gem5
 
 struct IrregularStreamBufferPrefetcherParams;
 
-GEM5_DEPRECATED_NAMESPACE(Prefetcher, prefetch);
 namespace prefetch
 {
 
diff --git a/src/mem/cache/prefetch/multi.cc b/src/mem/cache/prefetch/multi.cc
index ddf0e30d59..1f7298f354 100644
--- a/src/mem/cache/prefetch/multi.cc
+++ b/src/mem/cache/prefetch/multi.cc
@@ -42,7 +42,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(Prefetcher, prefetch);
 namespace prefetch
 {
 
diff --git a/src/mem/cache/prefetch/multi.hh b/src/mem/cache/prefetch/multi.hh
index ff17918346..7890f090b5 100644
--- a/src/mem/cache/prefetch/multi.hh
+++ b/src/mem/cache/prefetch/multi.hh
@@ -47,7 +47,6 @@ namespace gem5
 
 struct MultiPrefetcherParams;
 
-GEM5_DEPRECATED_NAMESPACE(Prefetcher, prefetch);
 namespace prefetch
 {
 
diff --git a/src/mem/cache/prefetch/pif.cc b/src/mem/cache/prefetch/pif.cc
index 95b9f4f60f..79e8e6d747 100644
--- a/src/mem/cache/prefetch/pif.cc
+++ b/src/mem/cache/prefetch/pif.cc
@@ -37,7 +37,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(Prefetcher, prefetch);
 namespace prefetch
 {
 
diff --git a/src/mem/cache/prefetch/pif.hh b/src/mem/cache/prefetch/pif.hh
index e48d8fbc52..296087e8e0 100644
--- a/src/mem/cache/prefetch/pif.hh
+++ b/src/mem/cache/prefetch/pif.hh
@@ -49,7 +49,6 @@ namespace gem5
 
 struct PIFPrefetcherParams;
 
-GEM5_DEPRECATED_NAMESPACE(Prefetcher, prefetch);
 namespace prefetch
 {
 
diff --git a/src/mem/cache/prefetch/queued.cc b/src/mem/cache/prefetch/queued.cc
index b85a227f00..1ab34d2e9b 100644
--- a/src/mem/cache/prefetch/queued.cc
+++ b/src/mem/cache/prefetch/queued.cc
@@ -51,7 +51,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(Prefetcher, prefetch);
 namespace prefetch
 {
 
diff --git a/src/mem/cache/prefetch/queued.hh b/src/mem/cache/prefetch/queued.hh
index 87d3456def..1d1a3faef4 100644
--- a/src/mem/cache/prefetch/queued.hh
+++ b/src/mem/cache/prefetch/queued.hh
@@ -53,7 +53,6 @@ namespace gem5
 
 struct QueuedPrefetcherParams;
 
-GEM5_DEPRECATED_NAMESPACE(Prefetcher, prefetch);
 namespace prefetch
 {
 
diff --git a/src/mem/cache/prefetch/sbooe.cc b/src/mem/cache/prefetch/sbooe.cc
index a3f023126d..44a10c232d 100644
--- a/src/mem/cache/prefetch/sbooe.cc
+++ b/src/mem/cache/prefetch/sbooe.cc
@@ -34,7 +34,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(Prefetcher, prefetch);
 namespace prefetch
 {
 
diff --git a/src/mem/cache/prefetch/sbooe.hh b/src/mem/cache/prefetch/sbooe.hh
index 9b25816b23..7914b88f45 100644
--- a/src/mem/cache/prefetch/sbooe.hh
+++ b/src/mem/cache/prefetch/sbooe.hh
@@ -47,7 +47,6 @@ namespace gem5
 
 struct SBOOEPrefetcherParams;
 
-GEM5_DEPRECATED_NAMESPACE(Prefetcher, prefetch);
 namespace prefetch
 {
 
diff --git a/src/mem/cache/prefetch/signature_path.cc b/src/mem/cache/prefetch/signature_path.cc
index 2f9477b703..a36ef809ce 100644
--- a/src/mem/cache/prefetch/signature_path.cc
+++ b/src/mem/cache/prefetch/signature_path.cc
@@ -38,7 +38,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(Prefetcher, prefetch);
 namespace prefetch
 {
 
diff --git a/src/mem/cache/prefetch/signature_path.hh b/src/mem/cache/prefetch/signature_path.hh
index 9cffa33d9a..9613fe0886 100644
--- a/src/mem/cache/prefetch/signature_path.hh
+++ b/src/mem/cache/prefetch/signature_path.hh
@@ -50,7 +50,6 @@ namespace gem5
 
 struct SignaturePathPrefetcherParams;
 
-GEM5_DEPRECATED_NAMESPACE(Prefetcher, prefetch);
 namespace prefetch
 {
 
diff --git a/src/mem/cache/prefetch/signature_path_v2.cc b/src/mem/cache/prefetch/signature_path_v2.cc
index 230bc76256..b50721ca69 100644
--- a/src/mem/cache/prefetch/signature_path_v2.cc
+++ b/src/mem/cache/prefetch/signature_path_v2.cc
@@ -37,7 +37,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(Prefetcher, prefetch);
 namespace prefetch
 {
 
diff --git a/src/mem/cache/prefetch/signature_path_v2.hh b/src/mem/cache/prefetch/signature_path_v2.hh
index b7f745cd94..417b7ec540 100644
--- a/src/mem/cache/prefetch/signature_path_v2.hh
+++ b/src/mem/cache/prefetch/signature_path_v2.hh
@@ -50,7 +50,6 @@ namespace gem5
 
 struct SignaturePathPrefetcherV2Params;
 
-GEM5_DEPRECATED_NAMESPACE(Prefetcher, prefetch);
 namespace prefetch
 {
 
diff --git a/src/mem/cache/prefetch/slim_ampm.cc b/src/mem/cache/prefetch/slim_ampm.cc
index 85f89663ca..950994a4bd 100644
--- a/src/mem/cache/prefetch/slim_ampm.cc
+++ b/src/mem/cache/prefetch/slim_ampm.cc
@@ -33,7 +33,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(Prefetcher, prefetch);
 namespace prefetch
 {
 
diff --git a/src/mem/cache/prefetch/slim_ampm.hh b/src/mem/cache/prefetch/slim_ampm.hh
index 4a07b9bf45..54f38d4885 100644
--- a/src/mem/cache/prefetch/slim_ampm.hh
+++ b/src/mem/cache/prefetch/slim_ampm.hh
@@ -48,7 +48,6 @@ namespace gem5
 
 struct SlimAMPMPrefetcherParams;
 
-GEM5_DEPRECATED_NAMESPACE(Prefetcher, prefetch);
 namespace prefetch
 {
 
diff --git a/src/mem/cache/prefetch/spatio_temporal_memory_streaming.cc b/src/mem/cache/prefetch/spatio_temporal_memory_streaming.cc
index 3c9b9eb64c..0e3211579c 100644
--- a/src/mem/cache/prefetch/spatio_temporal_memory_streaming.cc
+++ b/src/mem/cache/prefetch/spatio_temporal_memory_streaming.cc
@@ -35,7 +35,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(Prefetcher, prefetch);
 namespace prefetch
 {
 
diff --git a/src/mem/cache/prefetch/spatio_temporal_memory_streaming.hh b/src/mem/cache/prefetch/spatio_temporal_memory_streaming.hh
index bee746c5c7..cdd2788104 100644
--- a/src/mem/cache/prefetch/spatio_temporal_memory_streaming.hh
+++ b/src/mem/cache/prefetch/spatio_temporal_memory_streaming.hh
@@ -53,7 +53,6 @@ namespace gem5
 
 struct STeMSPrefetcherParams;
 
-GEM5_DEPRECATED_NAMESPACE(Prefetcher, prefetch);
 namespace prefetch
 {
 
diff --git a/src/mem/cache/prefetch/stride.cc b/src/mem/cache/prefetch/stride.cc
index 1d375a6228..0a77b28a1c 100644
--- a/src/mem/cache/prefetch/stride.cc
+++ b/src/mem/cache/prefetch/stride.cc
@@ -60,7 +60,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(Prefetcher, prefetch);
 namespace prefetch
 {
 
diff --git a/src/mem/cache/prefetch/stride.hh b/src/mem/cache/prefetch/stride.hh
index 27fa917436..7e55abea21 100644
--- a/src/mem/cache/prefetch/stride.hh
+++ b/src/mem/cache/prefetch/stride.hh
@@ -70,7 +70,6 @@ namespace replacement_policy
 }
 struct StridePrefetcherParams;
 
-GEM5_DEPRECATED_NAMESPACE(Prefetcher, prefetch);
 namespace prefetch
 {
 
diff --git a/src/mem/cache/prefetch/tagged.cc b/src/mem/cache/prefetch/tagged.cc
index d385ac0611..0d4d79b006 100644
--- a/src/mem/cache/prefetch/tagged.cc
+++ b/src/mem/cache/prefetch/tagged.cc
@@ -38,7 +38,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(Prefetcher, prefetch);
 namespace prefetch
 {
 
diff --git a/src/mem/cache/prefetch/tagged.hh b/src/mem/cache/prefetch/tagged.hh
index d7f77a8841..5c91f654b1 100644
--- a/src/mem/cache/prefetch/tagged.hh
+++ b/src/mem/cache/prefetch/tagged.hh
@@ -42,7 +42,6 @@ namespace gem5
 
 struct TaggedPrefetcherParams;
 
-GEM5_DEPRECATED_NAMESPACE(Prefetcher, prefetch);
 namespace prefetch
 {
 

From de408fbd4e4cb0b957f45554daba94273218cb80 Mon Sep 17 00:00:00 2001
From: "Daniel R. Carvalho" <odanrc@yahoo.com.br>
Date: Sat, 14 Jan 2023 09:48:54 -0300
Subject: [PATCH 120/492] mem-cache: Remove the Compressor namespace

This namespace has gone through the deprecation period
and can now be removed.

Change-Id: Ibbcc8221ed6042d55f56a94bf499a4c1c564ea82
Signed-off-by: Daniel R. Carvalho <odanrc@yahoo.com.br>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67354
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Richard Cooper <richard.cooper@arm.com>
---
 src/mem/cache/compressors/base.cc                       | 1 -
 src/mem/cache/compressors/base.hh                       | 1 -
 src/mem/cache/compressors/base_delta.cc                 | 1 -
 src/mem/cache/compressors/base_delta.hh                 | 1 -
 src/mem/cache/compressors/base_delta_impl.hh            | 1 -
 src/mem/cache/compressors/base_dictionary_compressor.cc | 1 -
 src/mem/cache/compressors/cpack.cc                      | 1 -
 src/mem/cache/compressors/cpack.hh                      | 1 -
 src/mem/cache/compressors/dictionary_compressor.hh      | 1 -
 src/mem/cache/compressors/dictionary_compressor_impl.hh | 1 -
 src/mem/cache/compressors/encoders/base.hh              | 1 -
 src/mem/cache/compressors/encoders/huffman.cc           | 1 -
 src/mem/cache/compressors/encoders/huffman.hh           | 1 -
 src/mem/cache/compressors/fpc.cc                        | 1 -
 src/mem/cache/compressors/fpc.hh                        | 1 -
 src/mem/cache/compressors/fpcd.cc                       | 1 -
 src/mem/cache/compressors/fpcd.hh                       | 1 -
 src/mem/cache/compressors/frequent_values.cc            | 1 -
 src/mem/cache/compressors/frequent_values.hh            | 1 -
 src/mem/cache/compressors/multi.cc                      | 1 -
 src/mem/cache/compressors/multi.hh                      | 1 -
 src/mem/cache/compressors/perfect.cc                    | 1 -
 src/mem/cache/compressors/perfect.hh                    | 1 -
 src/mem/cache/compressors/repeated_qwords.cc            | 1 -
 src/mem/cache/compressors/repeated_qwords.hh            | 1 -
 src/mem/cache/compressors/zero.cc                       | 1 -
 src/mem/cache/compressors/zero.hh                       | 1 -
 27 files changed, 27 deletions(-)

diff --git a/src/mem/cache/compressors/base.cc b/src/mem/cache/compressors/base.cc
index cafd691bbc..df3020dbf8 100644
--- a/src/mem/cache/compressors/base.cc
+++ b/src/mem/cache/compressors/base.cc
@@ -48,7 +48,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(Compressor, compression);
 namespace compression
 {
 
diff --git a/src/mem/cache/compressors/base.hh b/src/mem/cache/compressors/base.hh
index 4945176cd7..110c6a44f8 100644
--- a/src/mem/cache/compressors/base.hh
+++ b/src/mem/cache/compressors/base.hh
@@ -50,7 +50,6 @@ class BaseCache;
 class CacheBlk;
 struct BaseCacheCompressorParams;
 
-GEM5_DEPRECATED_NAMESPACE(Compressor, compression);
 namespace compression
 {
 
diff --git a/src/mem/cache/compressors/base_delta.cc b/src/mem/cache/compressors/base_delta.cc
index 9b2e67c023..308dabf2b2 100644
--- a/src/mem/cache/compressors/base_delta.cc
+++ b/src/mem/cache/compressors/base_delta.cc
@@ -42,7 +42,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(Compressor, compression);
 namespace compression
 {
 
diff --git a/src/mem/cache/compressors/base_delta.hh b/src/mem/cache/compressors/base_delta.hh
index 81f2c4b546..a0e666886c 100644
--- a/src/mem/cache/compressors/base_delta.hh
+++ b/src/mem/cache/compressors/base_delta.hh
@@ -52,7 +52,6 @@ struct Base32Delta8Params;
 struct Base32Delta16Params;
 struct Base16Delta8Params;
 
-GEM5_DEPRECATED_NAMESPACE(Compressor, compression);
 namespace compression
 {
 
diff --git a/src/mem/cache/compressors/base_delta_impl.hh b/src/mem/cache/compressors/base_delta_impl.hh
index c4a841de36..c43283c814 100644
--- a/src/mem/cache/compressors/base_delta_impl.hh
+++ b/src/mem/cache/compressors/base_delta_impl.hh
@@ -40,7 +40,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(Compressor, compression);
 namespace compression
 {
 
diff --git a/src/mem/cache/compressors/base_dictionary_compressor.cc b/src/mem/cache/compressors/base_dictionary_compressor.cc
index 6a1ed925f4..d289db1872 100644
--- a/src/mem/cache/compressors/base_dictionary_compressor.cc
+++ b/src/mem/cache/compressors/base_dictionary_compressor.cc
@@ -37,7 +37,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(Compressor, compression);
 namespace compression
 {
 
diff --git a/src/mem/cache/compressors/cpack.cc b/src/mem/cache/compressors/cpack.cc
index 64376b9237..44f47bbf4c 100644
--- a/src/mem/cache/compressors/cpack.cc
+++ b/src/mem/cache/compressors/cpack.cc
@@ -38,7 +38,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(Compressor, compression);
 namespace compression
 {
 
diff --git a/src/mem/cache/compressors/cpack.hh b/src/mem/cache/compressors/cpack.hh
index 51f5ce17bc..d1005d1dc8 100644
--- a/src/mem/cache/compressors/cpack.hh
+++ b/src/mem/cache/compressors/cpack.hh
@@ -46,7 +46,6 @@ namespace gem5
 
 struct CPackParams;
 
-GEM5_DEPRECATED_NAMESPACE(Compressor, compression);
 namespace compression
 {
 
diff --git a/src/mem/cache/compressors/dictionary_compressor.hh b/src/mem/cache/compressors/dictionary_compressor.hh
index c283280980..6efdb73e20 100644
--- a/src/mem/cache/compressors/dictionary_compressor.hh
+++ b/src/mem/cache/compressors/dictionary_compressor.hh
@@ -61,7 +61,6 @@ namespace gem5
 
 struct BaseDictionaryCompressorParams;
 
-GEM5_DEPRECATED_NAMESPACE(Compressor, compression);
 namespace compression
 {
 
diff --git a/src/mem/cache/compressors/dictionary_compressor_impl.hh b/src/mem/cache/compressors/dictionary_compressor_impl.hh
index 9eb265b1c6..6fef9482d3 100644
--- a/src/mem/cache/compressors/dictionary_compressor_impl.hh
+++ b/src/mem/cache/compressors/dictionary_compressor_impl.hh
@@ -43,7 +43,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(Compressor, compression);
 namespace compression
 {
 
diff --git a/src/mem/cache/compressors/encoders/base.hh b/src/mem/cache/compressors/encoders/base.hh
index 92971afe74..c5f22977e6 100644
--- a/src/mem/cache/compressors/encoders/base.hh
+++ b/src/mem/cache/compressors/encoders/base.hh
@@ -36,7 +36,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(Compressor, compression);
 namespace compression
 {
 GEM5_DEPRECATED_NAMESPACE(Encoder, encoder);
diff --git a/src/mem/cache/compressors/encoders/huffman.cc b/src/mem/cache/compressors/encoders/huffman.cc
index 7a47aa93e8..a7f24cff94 100644
--- a/src/mem/cache/compressors/encoders/huffman.cc
+++ b/src/mem/cache/compressors/encoders/huffman.cc
@@ -35,7 +35,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(Compressor, compression);
 namespace compression
 {
 GEM5_DEPRECATED_NAMESPACE(Encoder, encoder);
diff --git a/src/mem/cache/compressors/encoders/huffman.hh b/src/mem/cache/compressors/encoders/huffman.hh
index 3f29f2c264..2ea53641da 100644
--- a/src/mem/cache/compressors/encoders/huffman.hh
+++ b/src/mem/cache/compressors/encoders/huffman.hh
@@ -42,7 +42,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(Compressor, compression);
 namespace compression
 {
 GEM5_DEPRECATED_NAMESPACE(Encoder, encoder);
diff --git a/src/mem/cache/compressors/fpc.cc b/src/mem/cache/compressors/fpc.cc
index 80713552e2..f910eb1494 100644
--- a/src/mem/cache/compressors/fpc.cc
+++ b/src/mem/cache/compressors/fpc.cc
@@ -34,7 +34,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(Compressor, compression);
 namespace compression
 {
 
diff --git a/src/mem/cache/compressors/fpc.hh b/src/mem/cache/compressors/fpc.hh
index 9dd40fbd2a..629b3f1711 100644
--- a/src/mem/cache/compressors/fpc.hh
+++ b/src/mem/cache/compressors/fpc.hh
@@ -51,7 +51,6 @@ namespace gem5
 
 struct FPCParams;
 
-GEM5_DEPRECATED_NAMESPACE(Compressor, compression);
 namespace compression
 {
 
diff --git a/src/mem/cache/compressors/fpcd.cc b/src/mem/cache/compressors/fpcd.cc
index 480d34f445..b0ea55c892 100644
--- a/src/mem/cache/compressors/fpcd.cc
+++ b/src/mem/cache/compressors/fpcd.cc
@@ -38,7 +38,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(Compressor, compression);
 namespace compression
 {
 
diff --git a/src/mem/cache/compressors/fpcd.hh b/src/mem/cache/compressors/fpcd.hh
index d1ee015957..4df5036037 100644
--- a/src/mem/cache/compressors/fpcd.hh
+++ b/src/mem/cache/compressors/fpcd.hh
@@ -52,7 +52,6 @@ namespace gem5
 
 struct FPCDParams;
 
-GEM5_DEPRECATED_NAMESPACE(Compressor, compression);
 namespace compression
 {
 
diff --git a/src/mem/cache/compressors/frequent_values.cc b/src/mem/cache/compressors/frequent_values.cc
index f9f73a9003..b5eca3b096 100644
--- a/src/mem/cache/compressors/frequent_values.cc
+++ b/src/mem/cache/compressors/frequent_values.cc
@@ -42,7 +42,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(Compressor, compression);
 namespace compression
 {
 
diff --git a/src/mem/cache/compressors/frequent_values.hh b/src/mem/cache/compressors/frequent_values.hh
index c2874e9f43..e7eac2644f 100644
--- a/src/mem/cache/compressors/frequent_values.hh
+++ b/src/mem/cache/compressors/frequent_values.hh
@@ -48,7 +48,6 @@ namespace gem5
 
 struct FrequentValuesCompressorParams;
 
-GEM5_DEPRECATED_NAMESPACE(Compressor, compression);
 namespace compression
 {
 
diff --git a/src/mem/cache/compressors/multi.cc b/src/mem/cache/compressors/multi.cc
index cbc307accb..d86ea2c168 100644
--- a/src/mem/cache/compressors/multi.cc
+++ b/src/mem/cache/compressors/multi.cc
@@ -45,7 +45,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(Compressor, compression);
 namespace compression
 {
 
diff --git a/src/mem/cache/compressors/multi.hh b/src/mem/cache/compressors/multi.hh
index 2cdf78fc97..bb9bd57a0d 100644
--- a/src/mem/cache/compressors/multi.hh
+++ b/src/mem/cache/compressors/multi.hh
@@ -46,7 +46,6 @@ namespace gem5
 
 struct MultiCompressorParams;
 
-GEM5_DEPRECATED_NAMESPACE(Compressor, compression);
 namespace compression
 {
 
diff --git a/src/mem/cache/compressors/perfect.cc b/src/mem/cache/compressors/perfect.cc
index e271fa0556..76c37f8a47 100644
--- a/src/mem/cache/compressors/perfect.cc
+++ b/src/mem/cache/compressors/perfect.cc
@@ -41,7 +41,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(Compressor, compression);
 namespace compression
 {
 
diff --git a/src/mem/cache/compressors/perfect.hh b/src/mem/cache/compressors/perfect.hh
index 0d91c50ed2..eaa43ca86a 100644
--- a/src/mem/cache/compressors/perfect.hh
+++ b/src/mem/cache/compressors/perfect.hh
@@ -46,7 +46,6 @@ namespace gem5
 
 struct PerfectCompressorParams;
 
-GEM5_DEPRECATED_NAMESPACE(Compressor, compression);
 namespace compression
 {
 
diff --git a/src/mem/cache/compressors/repeated_qwords.cc b/src/mem/cache/compressors/repeated_qwords.cc
index 8d5c32da86..01e83961e3 100644
--- a/src/mem/cache/compressors/repeated_qwords.cc
+++ b/src/mem/cache/compressors/repeated_qwords.cc
@@ -41,7 +41,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(Compressor, compression);
 namespace compression
 {
 
diff --git a/src/mem/cache/compressors/repeated_qwords.hh b/src/mem/cache/compressors/repeated_qwords.hh
index 3e900a1655..25deb1ff58 100644
--- a/src/mem/cache/compressors/repeated_qwords.hh
+++ b/src/mem/cache/compressors/repeated_qwords.hh
@@ -46,7 +46,6 @@ namespace gem5
 
 struct RepeatedQwordsCompressorParams;
 
-GEM5_DEPRECATED_NAMESPACE(Compressor, compression);
 namespace compression
 {
 
diff --git a/src/mem/cache/compressors/zero.cc b/src/mem/cache/compressors/zero.cc
index 42a3c7c613..3dca1ecfa0 100644
--- a/src/mem/cache/compressors/zero.cc
+++ b/src/mem/cache/compressors/zero.cc
@@ -41,7 +41,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(Compressor, compression);
 namespace compression
 {
 
diff --git a/src/mem/cache/compressors/zero.hh b/src/mem/cache/compressors/zero.hh
index 6e8ce34b2a..5ab994f6ee 100644
--- a/src/mem/cache/compressors/zero.hh
+++ b/src/mem/cache/compressors/zero.hh
@@ -46,7 +46,6 @@ namespace gem5
 
 struct ZeroCompressorParams;
 
-GEM5_DEPRECATED_NAMESPACE(Compressor, compression);
 namespace compression
 {
 

From 82aa4c835846d9c95c75cc8d06ab82a4b6cc7caa Mon Sep 17 00:00:00 2001
From: "Daniel R. Carvalho" <odanrc@yahoo.com.br>
Date: Sat, 14 Jan 2023 10:01:34 -0300
Subject: [PATCH 121/492] mem-cache: Remove the Encoder namespace

This namespace has gone through the deprecation period
and can now be removed.

Change-Id: Iabe3b61eb2409a10c582ab1f1c26abc649c1646a
Signed-off-by: Daniel R. Carvalho <odanrc@yahoo.com.br>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67355
Reviewed-by: Richard Cooper <richard.cooper@arm.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/mem/cache/compressors/encoders/base.hh    | 1 -
 src/mem/cache/compressors/encoders/huffman.cc | 1 -
 src/mem/cache/compressors/encoders/huffman.hh | 1 -
 3 files changed, 3 deletions(-)

diff --git a/src/mem/cache/compressors/encoders/base.hh b/src/mem/cache/compressors/encoders/base.hh
index c5f22977e6..ddc8c67567 100644
--- a/src/mem/cache/compressors/encoders/base.hh
+++ b/src/mem/cache/compressors/encoders/base.hh
@@ -38,7 +38,6 @@ namespace gem5
 
 namespace compression
 {
-GEM5_DEPRECATED_NAMESPACE(Encoder, encoder);
 namespace encoder
 {
 
diff --git a/src/mem/cache/compressors/encoders/huffman.cc b/src/mem/cache/compressors/encoders/huffman.cc
index a7f24cff94..5be3bceaef 100644
--- a/src/mem/cache/compressors/encoders/huffman.cc
+++ b/src/mem/cache/compressors/encoders/huffman.cc
@@ -37,7 +37,6 @@ namespace gem5
 
 namespace compression
 {
-GEM5_DEPRECATED_NAMESPACE(Encoder, encoder);
 namespace encoder
 {
 
diff --git a/src/mem/cache/compressors/encoders/huffman.hh b/src/mem/cache/compressors/encoders/huffman.hh
index 2ea53641da..761485486e 100644
--- a/src/mem/cache/compressors/encoders/huffman.hh
+++ b/src/mem/cache/compressors/encoders/huffman.hh
@@ -44,7 +44,6 @@ namespace gem5
 
 namespace compression
 {
-GEM5_DEPRECATED_NAMESPACE(Encoder, encoder);
 namespace encoder
 {
 

From 813c27c97a6ef0a283da32fa5b3322b4a6e9f57a Mon Sep 17 00:00:00 2001
From: "Daniel R. Carvalho" <odanrc@yahoo.com.br>
Date: Sat, 14 Jan 2023 09:59:35 -0300
Subject: [PATCH 122/492] mem: Remove the QoS namespace

This namespace has gone through the deprecation period
and can now be removed.

Change-Id: I2fa66e5fc77f19beaac3251602617704dadaec99
Signed-off-by: Daniel R. Carvalho <odanrc@yahoo.com.br>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67356
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Richard Cooper <richard.cooper@arm.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
---
 src/mem/qos/mem_ctrl.cc                | 1 -
 src/mem/qos/mem_ctrl.hh                | 1 -
 src/mem/qos/mem_sink.cc                | 1 -
 src/mem/qos/mem_sink.hh                | 1 -
 src/mem/qos/policy.cc                  | 1 -
 src/mem/qos/policy.hh                  | 1 -
 src/mem/qos/policy_fixed_prio.cc       | 1 -
 src/mem/qos/policy_fixed_prio.hh       | 1 -
 src/mem/qos/policy_pf.cc               | 1 -
 src/mem/qos/policy_pf.hh               | 1 -
 src/mem/qos/q_policy.cc                | 1 -
 src/mem/qos/q_policy.hh                | 1 -
 src/mem/qos/turnaround_policy.hh       | 1 -
 src/mem/qos/turnaround_policy_ideal.cc | 1 -
 src/mem/qos/turnaround_policy_ideal.hh | 1 -
 15 files changed, 15 deletions(-)

diff --git a/src/mem/qos/mem_ctrl.cc b/src/mem/qos/mem_ctrl.cc
index 5bb031c9ed..9bf13280da 100644
--- a/src/mem/qos/mem_ctrl.cc
+++ b/src/mem/qos/mem_ctrl.cc
@@ -48,7 +48,6 @@ namespace gem5
 namespace memory
 {
 
-GEM5_DEPRECATED_NAMESPACE(QoS, qos);
 namespace qos
 {
 
diff --git a/src/mem/qos/mem_ctrl.hh b/src/mem/qos/mem_ctrl.hh
index 11e787d484..359e2858be 100644
--- a/src/mem/qos/mem_ctrl.hh
+++ b/src/mem/qos/mem_ctrl.hh
@@ -64,7 +64,6 @@ namespace gem5
 namespace memory
 {
 
-GEM5_DEPRECATED_NAMESPACE(QoS, qos);
 namespace qos
 {
 
diff --git a/src/mem/qos/mem_sink.cc b/src/mem/qos/mem_sink.cc
index 2dec5d574a..3ffe7f4d61 100644
--- a/src/mem/qos/mem_sink.cc
+++ b/src/mem/qos/mem_sink.cc
@@ -50,7 +50,6 @@ namespace gem5
 namespace memory
 {
 
-GEM5_DEPRECATED_NAMESPACE(QoS, qos);
 namespace qos
 {
 
diff --git a/src/mem/qos/mem_sink.hh b/src/mem/qos/mem_sink.hh
index a2e975a483..d2310c65fe 100644
--- a/src/mem/qos/mem_sink.hh
+++ b/src/mem/qos/mem_sink.hh
@@ -59,7 +59,6 @@ struct QoSMemSinkInterfaceParams;
 namespace memory
 {
 
-GEM5_DEPRECATED_NAMESPACE(QoS, qos);
 namespace qos
 {
 
diff --git a/src/mem/qos/policy.cc b/src/mem/qos/policy.cc
index 6d41e7d452..5ca7eae6b6 100644
--- a/src/mem/qos/policy.cc
+++ b/src/mem/qos/policy.cc
@@ -45,7 +45,6 @@ namespace gem5
 namespace memory
 {
 
-GEM5_DEPRECATED_NAMESPACE(QoS, qos);
 namespace qos
 {
 
diff --git a/src/mem/qos/policy.hh b/src/mem/qos/policy.hh
index a7e7666a86..c5bd2be91c 100644
--- a/src/mem/qos/policy.hh
+++ b/src/mem/qos/policy.hh
@@ -57,7 +57,6 @@ struct QoSPolicyParams;
 namespace memory
 {
 
-GEM5_DEPRECATED_NAMESPACE(QoS, qos);
 namespace qos
 {
 
diff --git a/src/mem/qos/policy_fixed_prio.cc b/src/mem/qos/policy_fixed_prio.cc
index 140817e55f..f64aae9aaf 100644
--- a/src/mem/qos/policy_fixed_prio.cc
+++ b/src/mem/qos/policy_fixed_prio.cc
@@ -51,7 +51,6 @@ namespace gem5
 namespace memory
 {
 
-GEM5_DEPRECATED_NAMESPACE(QoS, qos);
 namespace qos
 {
 
diff --git a/src/mem/qos/policy_fixed_prio.hh b/src/mem/qos/policy_fixed_prio.hh
index 77e7a2515e..18ff6ac8d9 100644
--- a/src/mem/qos/policy_fixed_prio.hh
+++ b/src/mem/qos/policy_fixed_prio.hh
@@ -52,7 +52,6 @@ struct QoSFixedPriorityPolicyParams;
 namespace memory
 {
 
-GEM5_DEPRECATED_NAMESPACE(QoS, qos);
 namespace qos
 {
 
diff --git a/src/mem/qos/policy_pf.cc b/src/mem/qos/policy_pf.cc
index ae15045857..adbcdb448c 100644
--- a/src/mem/qos/policy_pf.cc
+++ b/src/mem/qos/policy_pf.cc
@@ -48,7 +48,6 @@ namespace gem5
 namespace memory
 {
 
-GEM5_DEPRECATED_NAMESPACE(QoS, qos);
 namespace qos
 {
 
diff --git a/src/mem/qos/policy_pf.hh b/src/mem/qos/policy_pf.hh
index acc2a4a6a6..4c215e54c6 100644
--- a/src/mem/qos/policy_pf.hh
+++ b/src/mem/qos/policy_pf.hh
@@ -52,7 +52,6 @@ struct QoSPropFairPolicyParams;
 namespace memory
 {
 
-GEM5_DEPRECATED_NAMESPACE(QoS, qos);
 namespace qos
 {
 
diff --git a/src/mem/qos/q_policy.cc b/src/mem/qos/q_policy.cc
index de2e31660e..a6d13feb7e 100644
--- a/src/mem/qos/q_policy.cc
+++ b/src/mem/qos/q_policy.cc
@@ -52,7 +52,6 @@ namespace gem5
 namespace memory
 {
 
-GEM5_DEPRECATED_NAMESPACE(QoS, qos);
 namespace qos
 {
 
diff --git a/src/mem/qos/q_policy.hh b/src/mem/qos/q_policy.hh
index 7af52b6d34..fc9200d0af 100644
--- a/src/mem/qos/q_policy.hh
+++ b/src/mem/qos/q_policy.hh
@@ -53,7 +53,6 @@ namespace gem5
 namespace memory
 {
 
-GEM5_DEPRECATED_NAMESPACE(QoS, qos);
 namespace qos
 {
 
diff --git a/src/mem/qos/turnaround_policy.hh b/src/mem/qos/turnaround_policy.hh
index 2d5696f60d..9bbb446a12 100644
--- a/src/mem/qos/turnaround_policy.hh
+++ b/src/mem/qos/turnaround_policy.hh
@@ -49,7 +49,6 @@ namespace gem5
 namespace memory
 {
 
-GEM5_DEPRECATED_NAMESPACE(QoS, qos);
 namespace qos
 {
 
diff --git a/src/mem/qos/turnaround_policy_ideal.cc b/src/mem/qos/turnaround_policy_ideal.cc
index c67e40b2c6..8d3d7d0b11 100644
--- a/src/mem/qos/turnaround_policy_ideal.cc
+++ b/src/mem/qos/turnaround_policy_ideal.cc
@@ -48,7 +48,6 @@ namespace gem5
 namespace memory
 {
 
-GEM5_DEPRECATED_NAMESPACE(QoS, qos);
 namespace qos
 {
 
diff --git a/src/mem/qos/turnaround_policy_ideal.hh b/src/mem/qos/turnaround_policy_ideal.hh
index 0a75f79bf8..de416c475c 100644
--- a/src/mem/qos/turnaround_policy_ideal.hh
+++ b/src/mem/qos/turnaround_policy_ideal.hh
@@ -47,7 +47,6 @@ namespace gem5
 namespace memory
 {
 
-GEM5_DEPRECATED_NAMESPACE(QoS, qos);
 namespace qos
 {
 

From e881f2603cc31a3a7d628ccb7890d0b5e3d5a3a5 Mon Sep 17 00:00:00 2001
From: "Daniel R. Carvalho" <odanrc@yahoo.com.br>
Date: Sat, 14 Jan 2023 10:14:33 -0300
Subject: [PATCH 123/492] mem: Remove the ContextSwitchTaskId namespace

This namespace has gone through the deprecation period
and can now be removed.

Change-Id: Iab4bb6ac6e8d603fb508330691796ccdac4b9cb6
Signed-off-by: Daniel R. Carvalho <odanrc@yahoo.com.br>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67357
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Richard Cooper <richard.cooper@arm.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
---
 src/mem/request.hh | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/mem/request.hh b/src/mem/request.hh
index 6a0cbc21d4..be91c71cc0 100644
--- a/src/mem/request.hh
+++ b/src/mem/request.hh
@@ -74,7 +74,6 @@ namespace gem5
  * doesn't cause a problem with stats and is large enough to realistic
  * benchmarks (Linux/Android boot, BBench, etc.)
  */
-GEM5_DEPRECATED_NAMESPACE(ContextSwitchTaskId, context_switch_task_id);
 namespace context_switch_task_id
 {
     enum TaskId

From 65317b6fc93f480f588b55d3d1979be572125eb3 Mon Sep 17 00:00:00 2001
From: "Daniel R. Carvalho" <odanrc@yahoo.com.br>
Date: Sat, 14 Jan 2023 09:50:13 -0300
Subject: [PATCH 124/492] base: Remove the BloomFilter namespace

This namespace has gone through the deprecation period
and can now be removed.

Change-Id: Ib919285c6270eb53bd29ab534f3f9b5612417bb2
Signed-off-by: Daniel R. Carvalho <odanrc@yahoo.com.br>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67358
Tested-by: kokoro <noreply+kokoro@google.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Reviewed-by: Richard Cooper <richard.cooper@arm.com>
---
 src/base/filters/base.hh                       | 1 -
 src/base/filters/block_bloom_filter.cc         | 1 -
 src/base/filters/block_bloom_filter.hh         | 1 -
 src/base/filters/bulk_bloom_filter.cc          | 1 -
 src/base/filters/bulk_bloom_filter.hh          | 1 -
 src/base/filters/h3_bloom_filter.cc            | 1 -
 src/base/filters/h3_bloom_filter.hh            | 1 -
 src/base/filters/multi_bit_sel_bloom_filter.cc | 1 -
 src/base/filters/multi_bit_sel_bloom_filter.hh | 1 -
 src/base/filters/multi_bloom_filter.cc         | 1 -
 src/base/filters/multi_bloom_filter.hh         | 1 -
 src/base/filters/perfect_bloom_filter.cc       | 1 -
 src/base/filters/perfect_bloom_filter.hh       | 1 -
 13 files changed, 13 deletions(-)

diff --git a/src/base/filters/base.hh b/src/base/filters/base.hh
index f2b9fce7c9..858e265dc0 100644
--- a/src/base/filters/base.hh
+++ b/src/base/filters/base.hh
@@ -42,7 +42,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(BloomFilter, bloom_filter);
 namespace bloom_filter
 {
 
diff --git a/src/base/filters/block_bloom_filter.cc b/src/base/filters/block_bloom_filter.cc
index e1ae116783..7a3c170057 100644
--- a/src/base/filters/block_bloom_filter.cc
+++ b/src/base/filters/block_bloom_filter.cc
@@ -36,7 +36,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(BloomFilter, bloom_filter);
 namespace bloom_filter
 {
 
diff --git a/src/base/filters/block_bloom_filter.hh b/src/base/filters/block_bloom_filter.hh
index 0375d30a1f..f7040064a1 100644
--- a/src/base/filters/block_bloom_filter.hh
+++ b/src/base/filters/block_bloom_filter.hh
@@ -39,7 +39,6 @@ namespace gem5
 
 struct BloomFilterBlockParams;
 
-GEM5_DEPRECATED_NAMESPACE(BloomFilter, bloom_filter);
 namespace bloom_filter
 {
 
diff --git a/src/base/filters/bulk_bloom_filter.cc b/src/base/filters/bulk_bloom_filter.cc
index 3a2ac58cc7..cf28bf90f3 100644
--- a/src/base/filters/bulk_bloom_filter.cc
+++ b/src/base/filters/bulk_bloom_filter.cc
@@ -38,7 +38,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(BloomFilter, bloom_filter);
 namespace bloom_filter
 {
 
diff --git a/src/base/filters/bulk_bloom_filter.hh b/src/base/filters/bulk_bloom_filter.hh
index 985fcb3f7a..6c474760ae 100644
--- a/src/base/filters/bulk_bloom_filter.hh
+++ b/src/base/filters/bulk_bloom_filter.hh
@@ -37,7 +37,6 @@ namespace gem5
 
 struct BloomFilterBulkParams;
 
-GEM5_DEPRECATED_NAMESPACE(BloomFilter, bloom_filter);
 namespace bloom_filter
 {
 
diff --git a/src/base/filters/h3_bloom_filter.cc b/src/base/filters/h3_bloom_filter.cc
index e1aeba7e73..9e973d88fa 100644
--- a/src/base/filters/h3_bloom_filter.cc
+++ b/src/base/filters/h3_bloom_filter.cc
@@ -38,7 +38,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(BloomFilter, bloom_filter);
 namespace bloom_filter
 {
 
diff --git a/src/base/filters/h3_bloom_filter.hh b/src/base/filters/h3_bloom_filter.hh
index a60c21217a..fc6ba657e2 100644
--- a/src/base/filters/h3_bloom_filter.hh
+++ b/src/base/filters/h3_bloom_filter.hh
@@ -37,7 +37,6 @@ namespace gem5
 
 struct BloomFilterH3Params;
 
-GEM5_DEPRECATED_NAMESPACE(BloomFilter, bloom_filter);
 namespace bloom_filter
 {
 
diff --git a/src/base/filters/multi_bit_sel_bloom_filter.cc b/src/base/filters/multi_bit_sel_bloom_filter.cc
index f12d1f766d..e6f6c14576 100644
--- a/src/base/filters/multi_bit_sel_bloom_filter.cc
+++ b/src/base/filters/multi_bit_sel_bloom_filter.cc
@@ -38,7 +38,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(BloomFilter, bloom_filter);
 namespace bloom_filter
 {
 
diff --git a/src/base/filters/multi_bit_sel_bloom_filter.hh b/src/base/filters/multi_bit_sel_bloom_filter.hh
index 8c5b34cdd5..a746b1d0ad 100644
--- a/src/base/filters/multi_bit_sel_bloom_filter.hh
+++ b/src/base/filters/multi_bit_sel_bloom_filter.hh
@@ -37,7 +37,6 @@ namespace gem5
 
 struct BloomFilterMultiBitSelParams;
 
-GEM5_DEPRECATED_NAMESPACE(BloomFilter, bloom_filter);
 namespace bloom_filter
 {
 
diff --git a/src/base/filters/multi_bloom_filter.cc b/src/base/filters/multi_bloom_filter.cc
index 401d84401d..f6b4892800 100644
--- a/src/base/filters/multi_bloom_filter.cc
+++ b/src/base/filters/multi_bloom_filter.cc
@@ -35,7 +35,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(BloomFilter, bloom_filter);
 namespace bloom_filter
 {
 
diff --git a/src/base/filters/multi_bloom_filter.hh b/src/base/filters/multi_bloom_filter.hh
index ec9838a7b3..9445b81d5c 100644
--- a/src/base/filters/multi_bloom_filter.hh
+++ b/src/base/filters/multi_bloom_filter.hh
@@ -39,7 +39,6 @@ namespace gem5
 
 struct BloomFilterMultiParams;
 
-GEM5_DEPRECATED_NAMESPACE(BloomFilter, bloom_filter);
 namespace bloom_filter
 {
 
diff --git a/src/base/filters/perfect_bloom_filter.cc b/src/base/filters/perfect_bloom_filter.cc
index 7583a1a196..f6f9d8b106 100644
--- a/src/base/filters/perfect_bloom_filter.cc
+++ b/src/base/filters/perfect_bloom_filter.cc
@@ -34,7 +34,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(BloomFilter, bloom_filter);
 namespace bloom_filter
 {
 
diff --git a/src/base/filters/perfect_bloom_filter.hh b/src/base/filters/perfect_bloom_filter.hh
index 65ef01544c..2bcecb8987 100644
--- a/src/base/filters/perfect_bloom_filter.hh
+++ b/src/base/filters/perfect_bloom_filter.hh
@@ -38,7 +38,6 @@ namespace gem5
 
 struct BloomFilterPerfectParams;
 
-GEM5_DEPRECATED_NAMESPACE(BloomFilter, bloom_filter);
 namespace bloom_filter
 {
 

From 4f480fc6fc5d639ca16cc7f4a9bdacc597251b02 Mon Sep 17 00:00:00 2001
From: "Daniel R. Carvalho" <odanrc@yahoo.com.br>
Date: Sat, 14 Jan 2023 09:51:32 -0300
Subject: [PATCH 125/492] base: Remove the Stats namespace

This namespace has gone through the deprecation period
and can now be removed.

Change-Id: I80f25af68e03fff3df8316cb4d1d2669687d0fe4
Signed-off-by: Daniel R. Carvalho <odanrc@yahoo.com.br>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67359
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/base/statistics.cc               | 1 -
 src/base/statistics.hh               | 1 -
 src/base/stats/group.cc              | 1 -
 src/base/stats/group.hh              | 1 -
 src/base/stats/group.test.cc         | 6 +++---
 src/base/stats/hdf5.cc               | 1 -
 src/base/stats/hdf5.hh               | 1 -
 src/base/stats/info.cc               | 1 -
 src/base/stats/info.hh               | 1 -
 src/base/stats/output.hh             | 1 -
 src/base/stats/storage.cc            | 1 -
 src/base/stats/storage.hh            | 1 -
 src/base/stats/text.cc               | 1 -
 src/base/stats/text.hh               | 1 -
 src/base/stats/types.hh              | 1 -
 src/base/stats/units.hh              | 1 -
 src/python/pybind11/stats.cc         | 1 -
 src/sim/power/mathexpr_powermodel.hh | 1 -
 src/sim/stat_control.cc              | 1 -
 src/sim/stat_control.hh              | 1 -
 src/sim/stat_register.cc             | 1 -
 src/sim/stat_register.hh             | 1 -
 22 files changed, 3 insertions(+), 24 deletions(-)

diff --git a/src/base/statistics.cc b/src/base/statistics.cc
index c3801436e7..2fddf1bab6 100644
--- a/src/base/statistics.cc
+++ b/src/base/statistics.cc
@@ -53,7 +53,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(Stats, statistics);
 namespace statistics
 {
 
diff --git a/src/base/statistics.hh b/src/base/statistics.hh
index 24cbf714f5..8156be5a79 100644
--- a/src/base/statistics.hh
+++ b/src/base/statistics.hh
@@ -91,7 +91,6 @@ namespace gem5
 {
 
 /* A namespace for all of the Statistics */
-GEM5_DEPRECATED_NAMESPACE(Stats, statistics);
 namespace statistics
 {
 
diff --git a/src/base/stats/group.cc b/src/base/stats/group.cc
index d5626e6bb1..93e7183f0e 100644
--- a/src/base/stats/group.cc
+++ b/src/base/stats/group.cc
@@ -47,7 +47,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(Stats, statistics);
 namespace statistics
 {
 
diff --git a/src/base/stats/group.hh b/src/base/stats/group.hh
index bd1183e4a9..3c11e61138 100644
--- a/src/base/stats/group.hh
+++ b/src/base/stats/group.hh
@@ -74,7 +74,6 @@ namespace gem5
 
 #define ADD_STAT(n, ...) n(this, #n, __VA_ARGS__)
 
-GEM5_DEPRECATED_NAMESPACE(Stats, statistics);
 namespace statistics
 {
 
diff --git a/src/base/stats/group.test.cc b/src/base/stats/group.test.cc
index e2e059830c..5a7cde4928 100644
--- a/src/base/stats/group.test.cc
+++ b/src/base/stats/group.test.cc
@@ -255,10 +255,10 @@ TEST(StatsGroupTest, ConstructTwoLevelsUnbalancedTree)
     ASSERT_EQ(node2_2.getStatGroups().size(), 0);
 }
 
-class DummyInfo : public Stats::Info
+class DummyInfo : public statistics::Info
 {
   public:
-    using Stats::Info::Info;
+    using statistics::Info::Info;
 
     int value = 0;
 
@@ -266,7 +266,7 @@ class DummyInfo : public Stats::Info
     void prepare() override {}
     void reset() override { value = 0; }
     bool zero() const override { return false; }
-    void visit(Stats::Output &visitor) override {}
+    void visit(statistics::Output &visitor) override {}
 };
 
 /** Test adding stats to a group. */
diff --git a/src/base/stats/hdf5.cc b/src/base/stats/hdf5.cc
index 03574b2e4d..be548bf806 100644
--- a/src/base/stats/hdf5.cc
+++ b/src/base/stats/hdf5.cc
@@ -59,7 +59,6 @@ bool emptyStrings(const T &labels)
 }
 
 
-GEM5_DEPRECATED_NAMESPACE(Stats, statistics);
 namespace statistics
 {
 
diff --git a/src/base/stats/hdf5.hh b/src/base/stats/hdf5.hh
index 7fa99991a6..ac21ee8af1 100644
--- a/src/base/stats/hdf5.hh
+++ b/src/base/stats/hdf5.hh
@@ -53,7 +53,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(Stats, statistics);
 namespace statistics
 {
 
diff --git a/src/base/stats/info.cc b/src/base/stats/info.cc
index c40b55918e..06e7ec977d 100644
--- a/src/base/stats/info.cc
+++ b/src/base/stats/info.cc
@@ -52,7 +52,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(Stats, statistics);
 namespace statistics
 {
 
diff --git a/src/base/stats/info.hh b/src/base/stats/info.hh
index 9a5e2e77ab..98859cb0d9 100644
--- a/src/base/stats/info.hh
+++ b/src/base/stats/info.hh
@@ -43,7 +43,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(Stats, statistics);
 namespace statistics
 {
 
diff --git a/src/base/stats/output.hh b/src/base/stats/output.hh
index 39b0804a40..23531e8493 100644
--- a/src/base/stats/output.hh
+++ b/src/base/stats/output.hh
@@ -49,7 +49,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(Stats, statistics);
 namespace statistics
 {
 
diff --git a/src/base/stats/storage.cc b/src/base/stats/storage.cc
index 6b32dc501a..3b2c091815 100644
--- a/src/base/stats/storage.cc
+++ b/src/base/stats/storage.cc
@@ -46,7 +46,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(Stats, statistics);
 namespace statistics
 {
 
diff --git a/src/base/stats/storage.hh b/src/base/stats/storage.hh
index cf22e10080..eb1873b934 100644
--- a/src/base/stats/storage.hh
+++ b/src/base/stats/storage.hh
@@ -42,7 +42,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(Stats, statistics);
 namespace statistics
 {
 
diff --git a/src/base/stats/text.cc b/src/base/stats/text.cc
index db5743ac57..36282a35d7 100644
--- a/src/base/stats/text.cc
+++ b/src/base/stats/text.cc
@@ -67,7 +67,6 @@ constexpr auto Nan = std::numeric_limits<float>::quiet_NaN();
 
 } // anonymous namespace
 
-GEM5_DEPRECATED_NAMESPACE(Stats, statistics);
 namespace statistics
 {
 
diff --git a/src/base/stats/text.hh b/src/base/stats/text.hh
index 4bbe3eadfe..7be498d8da 100644
--- a/src/base/stats/text.hh
+++ b/src/base/stats/text.hh
@@ -53,7 +53,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(Stats, statistics);
 namespace statistics
 {
 
diff --git a/src/base/stats/types.hh b/src/base/stats/types.hh
index 92d594ab77..14f89caff3 100644
--- a/src/base/stats/types.hh
+++ b/src/base/stats/types.hh
@@ -39,7 +39,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(Stats, statistics);
 namespace statistics
 {
 
diff --git a/src/base/stats/units.hh b/src/base/stats/units.hh
index 52e2e57ce8..fe5b23d878 100644
--- a/src/base/stats/units.hh
+++ b/src/base/stats/units.hh
@@ -75,7 +75,6 @@ namespace gem5
         UNIT_UNSPECIFIED, statistics::units::Unspecified::get(), \
         "Use statistics::units::Unspecified::get()")
 
-GEM5_DEPRECATED_NAMESPACE(Stats, statistics);
 namespace statistics
 {
 
diff --git a/src/python/pybind11/stats.cc b/src/python/pybind11/stats.cc
index 2c60b47ee4..266f47e52a 100644
--- a/src/python/pybind11/stats.cc
+++ b/src/python/pybind11/stats.cc
@@ -83,7 +83,6 @@ cast_stat_info(const statistics::Info *info)
 #undef TRY_CAST
 }
 
-GEM5_DEPRECATED_NAMESPACE(Stats, statistics);
 namespace statistics
 {
 
diff --git a/src/sim/power/mathexpr_powermodel.hh b/src/sim/power/mathexpr_powermodel.hh
index 25338eea43..f05214a16f 100644
--- a/src/sim/power/mathexpr_powermodel.hh
+++ b/src/sim/power/mathexpr_powermodel.hh
@@ -47,7 +47,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(Stats, statistics);
 namespace statistics
 {
     class Info;
diff --git a/src/sim/stat_control.cc b/src/sim/stat_control.cc
index c388539551..99c694a384 100644
--- a/src/sim/stat_control.cc
+++ b/src/sim/stat_control.cc
@@ -57,7 +57,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(Stats, statistics);
 namespace statistics
 {
 
diff --git a/src/sim/stat_control.hh b/src/sim/stat_control.hh
index 22d3134370..35d3ea8fcb 100644
--- a/src/sim/stat_control.hh
+++ b/src/sim/stat_control.hh
@@ -48,7 +48,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(Stats, statistics);
 namespace statistics
 {
 
diff --git a/src/sim/stat_register.cc b/src/sim/stat_register.cc
index fb3db1e4b1..5e4bf3908e 100644
--- a/src/sim/stat_register.cc
+++ b/src/sim/stat_register.cc
@@ -42,7 +42,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(Stats, statistics);
 namespace statistics
 {
 
diff --git a/src/sim/stat_register.hh b/src/sim/stat_register.hh
index d2504f3f02..e84e8ebdb6 100644
--- a/src/sim/stat_register.hh
+++ b/src/sim/stat_register.hh
@@ -47,7 +47,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(Stats, statistics);
 namespace statistics
 {
 

From d2bfb4aeef4dea175482093c42744fbdb8f55f33 Mon Sep 17 00:00:00 2001
From: "Daniel R. Carvalho" <odanrc@yahoo.com.br>
Date: Sat, 14 Jan 2023 09:58:30 -0300
Subject: [PATCH 126/492] base: Remove the Debug namespace

This namespace has gone through the deprecation period
and can now be removed.

Change-Id: I4241501f3683c1daa8554693cba7aa2c022db130
Signed-off-by: Daniel R. Carvalho <odanrc@yahoo.com.br>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67360
Reviewed-by: Richard Cooper <richard.cooper@arm.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
---
 build_tools/debugflaghh.py | 1 -
 src/base/debug.cc          | 1 -
 src/base/debug.hh          | 1 -
 3 files changed, 3 deletions(-)

diff --git a/build_tools/debugflaghh.py b/build_tools/debugflaghh.py
index 2e861e2790..1a4a379204 100644
--- a/build_tools/debugflaghh.py
+++ b/build_tools/debugflaghh.py
@@ -82,7 +82,6 @@ code(
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(Debug, debug);
 namespace debug
 {
 
diff --git a/src/base/debug.cc b/src/base/debug.cc
index aa4092afc1..73b52f311f 100644
--- a/src/base/debug.cc
+++ b/src/base/debug.cc
@@ -52,7 +52,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(Debug, debug);
 namespace debug
 {
 
diff --git a/src/base/debug.hh b/src/base/debug.hh
index f6b03ae2a7..3941e66022 100644
--- a/src/base/debug.hh
+++ b/src/base/debug.hh
@@ -53,7 +53,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(Debug, debug);
 namespace debug
 {
 

From 544d53798b9f931c68de98c5f9c7d741eb0a14b1 Mon Sep 17 00:00:00 2001
From: "Daniel R. Carvalho" <odanrc@yahoo.com.br>
Date: Sat, 14 Jan 2023 09:53:34 -0300
Subject: [PATCH 127/492] base: Remove the Units namespace

This namespace has gone through the deprecation period
and can now be removed.

Change-Id: I3d885e656caea0f96dfbdda69713832ff5f79d28
Signed-off-by: Daniel R. Carvalho <odanrc@yahoo.com.br>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67361
Reviewed-by: Richard Cooper <richard.cooper@arm.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/base/stats/units.hh | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/base/stats/units.hh b/src/base/stats/units.hh
index fe5b23d878..1d7d640ddb 100644
--- a/src/base/stats/units.hh
+++ b/src/base/stats/units.hh
@@ -109,7 +109,6 @@ namespace statistics
  *   - The new unit is significant enough to be not included in Count unit.
  *     (e.g. Cycle unit, Tick unit)
  */
-GEM5_DEPRECATED_NAMESPACE(Units, units);
 namespace units
 {
 

From cc3d75ad72e533f6daabcb4722091bf6199e0c48 Mon Sep 17 00:00:00 2001
From: "Daniel R. Carvalho" <odanrc@yahoo.com.br>
Date: Sat, 14 Jan 2023 09:59:03 -0300
Subject: [PATCH 128/492] base: Remove the Loader namespace

This namespace has gone through the deprecation period
and can now be removed.

Change-Id: I87b763fccfcdf720909dfbda9c3fc8f6dea36a61
Signed-off-by: Daniel R. Carvalho <odanrc@yahoo.com.br>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67362
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/arch/mips/process.hh           |   1 -
 src/arch/power/process.hh          |   1 -
 src/arch/riscv/process.hh          |   1 -
 src/base/loader/dtb_file.cc        |   1 -
 src/base/loader/dtb_file.hh        |   1 -
 src/base/loader/elf_object.cc      |   1 -
 src/base/loader/elf_object.hh      |   1 -
 src/base/loader/image_file.hh      |   1 -
 src/base/loader/image_file_data.cc |   1 -
 src/base/loader/image_file_data.hh |   1 -
 src/base/loader/memory_image.cc    |   1 -
 src/base/loader/memory_image.hh    |   1 -
 src/base/loader/object_file.cc     |   1 -
 src/base/loader/object_file.hh     |   1 -
 src/base/loader/raw_image.hh       |   1 -
 src/base/loader/symtab.cc          |   1 -
 src/base/loader/symtab.hh          |   1 -
 src/base/loader/symtab.test.cc     | 290 ++++++++++++++---------------
 src/cpu/profile.hh                 |   1 -
 src/cpu/static_inst.hh             |   1 -
 src/sim/process.hh                 |   1 -
 21 files changed, 145 insertions(+), 165 deletions(-)

diff --git a/src/arch/mips/process.hh b/src/arch/mips/process.hh
index 181dd25497..8b84ec198c 100644
--- a/src/arch/mips/process.hh
+++ b/src/arch/mips/process.hh
@@ -34,7 +34,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(Loader, loader);
 namespace loader
 {
 class ObjectFile;
diff --git a/src/arch/power/process.hh b/src/arch/power/process.hh
index c8d8a79864..9576c352b9 100644
--- a/src/arch/power/process.hh
+++ b/src/arch/power/process.hh
@@ -36,7 +36,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(Loader, loader);
 namespace loader
 {
 class ObjectFile;
diff --git a/src/arch/riscv/process.hh b/src/arch/riscv/process.hh
index ca0a050349..64b9593965 100644
--- a/src/arch/riscv/process.hh
+++ b/src/arch/riscv/process.hh
@@ -40,7 +40,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(Loader, loader);
 namespace loader
 {
 class ObjectFile;
diff --git a/src/base/loader/dtb_file.cc b/src/base/loader/dtb_file.cc
index 13e0264e92..f083b3e1fe 100644
--- a/src/base/loader/dtb_file.cc
+++ b/src/base/loader/dtb_file.cc
@@ -40,7 +40,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(Loader, loader);
 namespace loader
 {
 
diff --git a/src/base/loader/dtb_file.hh b/src/base/loader/dtb_file.hh
index c11b19539e..bed7cfc6f6 100644
--- a/src/base/loader/dtb_file.hh
+++ b/src/base/loader/dtb_file.hh
@@ -35,7 +35,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(Loader, loader);
 namespace loader
 {
 
diff --git a/src/base/loader/elf_object.cc b/src/base/loader/elf_object.cc
index dc2abb8dfc..4b1467acf0 100644
--- a/src/base/loader/elf_object.cc
+++ b/src/base/loader/elf_object.cc
@@ -61,7 +61,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(Loader, loader);
 namespace loader
 {
 
diff --git a/src/base/loader/elf_object.hh b/src/base/loader/elf_object.hh
index 6159b35a7b..f08449206e 100644
--- a/src/base/loader/elf_object.hh
+++ b/src/base/loader/elf_object.hh
@@ -51,7 +51,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(Loader, loader);
 namespace loader
 {
 
diff --git a/src/base/loader/image_file.hh b/src/base/loader/image_file.hh
index 194c9567d7..f1d39555ec 100644
--- a/src/base/loader/image_file.hh
+++ b/src/base/loader/image_file.hh
@@ -39,7 +39,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(Loader, loader);
 namespace loader
 {
 
diff --git a/src/base/loader/image_file_data.cc b/src/base/loader/image_file_data.cc
index 57fb47fd4c..525d577936 100644
--- a/src/base/loader/image_file_data.cc
+++ b/src/base/loader/image_file_data.cc
@@ -42,7 +42,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(Loader, loader);
 namespace loader
 {
 
diff --git a/src/base/loader/image_file_data.hh b/src/base/loader/image_file_data.hh
index d02c499d1e..4d1701d9a3 100644
--- a/src/base/loader/image_file_data.hh
+++ b/src/base/loader/image_file_data.hh
@@ -38,7 +38,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(Loader, loader);
 namespace loader
 {
 
diff --git a/src/base/loader/memory_image.cc b/src/base/loader/memory_image.cc
index 5537f28023..a3f378c10b 100644
--- a/src/base/loader/memory_image.cc
+++ b/src/base/loader/memory_image.cc
@@ -32,7 +32,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(Loader, loader);
 namespace loader
 {
 
diff --git a/src/base/loader/memory_image.hh b/src/base/loader/memory_image.hh
index 2c56f4c088..1207e7458d 100644
--- a/src/base/loader/memory_image.hh
+++ b/src/base/loader/memory_image.hh
@@ -46,7 +46,6 @@ namespace gem5
 
 class PortProxy;
 
-GEM5_DEPRECATED_NAMESPACE(Loader, loader);
 namespace loader
 {
 
diff --git a/src/base/loader/object_file.cc b/src/base/loader/object_file.cc
index 3aa5915cdb..287f9107a6 100644
--- a/src/base/loader/object_file.cc
+++ b/src/base/loader/object_file.cc
@@ -48,7 +48,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(Loader, loader);
 namespace loader
 {
 
diff --git a/src/base/loader/object_file.hh b/src/base/loader/object_file.hh
index 0415bec62e..f0781165a1 100644
--- a/src/base/loader/object_file.hh
+++ b/src/base/loader/object_file.hh
@@ -55,7 +55,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(Loader, loader);
 namespace loader
 {
 
diff --git a/src/base/loader/raw_image.hh b/src/base/loader/raw_image.hh
index 7321ea40bf..29f4340beb 100644
--- a/src/base/loader/raw_image.hh
+++ b/src/base/loader/raw_image.hh
@@ -35,7 +35,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(Loader, loader);
 namespace loader
 {
 
diff --git a/src/base/loader/symtab.cc b/src/base/loader/symtab.cc
index f2f54e937b..941ea101c9 100644
--- a/src/base/loader/symtab.cc
+++ b/src/base/loader/symtab.cc
@@ -37,7 +37,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(Loader, loader);
 namespace loader
 {
 
diff --git a/src/base/loader/symtab.hh b/src/base/loader/symtab.hh
index e48e400a4f..2e50523c32 100644
--- a/src/base/loader/symtab.hh
+++ b/src/base/loader/symtab.hh
@@ -44,7 +44,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(Loader, loader);
 namespace loader
 {
 
diff --git a/src/base/loader/symtab.test.cc b/src/base/loader/symtab.test.cc
index e9edb113be..313055392a 100644
--- a/src/base/loader/symtab.test.cc
+++ b/src/base/loader/symtab.test.cc
@@ -48,7 +48,7 @@ using namespace gem5;
  * @return The error string, if any.
  */
 std::string
-getSymbolError(const Loader::Symbol& symbol, const Loader::Symbol& expected)
+getSymbolError(const loader::Symbol& symbol, const loader::Symbol& expected)
 {
     std::stringstream ss;
 
@@ -83,7 +83,7 @@ getSymbolError(const Loader::Symbol& symbol, const Loader::Symbol& expected)
  */
 ::testing::AssertionResult
 checkSymbol(const char* m_symbol, const char* m_expected,
-    const Loader::Symbol& symbol, const Loader::Symbol& expected)
+    const loader::Symbol& symbol, const loader::Symbol& expected)
 {
     const std::string error = getSymbolError(symbol, expected);
     if (!error.empty()) {
@@ -101,8 +101,8 @@ checkSymbol(const char* m_symbol, const char* m_expected,
  * @return A GTest's assertion result, with error message on failure.
  */
 ::testing::AssertionResult
-checkTable(const Loader::SymbolTable& symtab,
-    const std::initializer_list<Loader::Symbol>& expected)
+checkTable(const loader::SymbolTable& symtab,
+    const std::initializer_list<loader::Symbol>& expected)
 {
     if (expected.size() != (symtab.end() - symtab.begin())) {
         return ::testing::AssertionFailure() << "the number of symbols in "
@@ -126,7 +126,7 @@ checkTable(const Loader::SymbolTable& symtab,
 /** Test that the constructor creates an empty table. */
 TEST(LoaderSymtabTest, EmptyConstruction)
 {
-    Loader::SymbolTable symtab;
+    loader::SymbolTable symtab;
     ASSERT_TRUE(symtab.empty());
     ASSERT_TRUE(checkTable(symtab, {}));
 }
@@ -134,9 +134,9 @@ TEST(LoaderSymtabTest, EmptyConstruction)
 /** Test that the insertion of a symbol with no name fails. */
 TEST(LoaderSymtabTest, InsertSymbolNoName)
 {
-    Loader::SymbolTable symtab;
+    loader::SymbolTable symtab;
 
-    Loader::Symbol symbol = {Loader::Symbol::Binding::Local, "", 0x10};
+    loader::Symbol symbol = {loader::Symbol::Binding::Local, "", 0x10};
     ASSERT_FALSE(symtab.insert(symbol));
     ASSERT_TRUE(checkTable(symtab, {}));
 }
@@ -144,9 +144,9 @@ TEST(LoaderSymtabTest, InsertSymbolNoName)
 /** Test that the insertion of one symbol in an empty table works. */
 TEST(LoaderSymtabTest, InsertOneSymbol)
 {
-    Loader::SymbolTable symtab;
+    loader::SymbolTable symtab;
 
-    Loader::Symbol symbol = {Loader::Symbol::Binding::Local, "symbol", 0x10};
+    loader::Symbol symbol = {loader::Symbol::Binding::Local, "symbol", 0x10};
     ASSERT_TRUE(symtab.insert(symbol));
 
     ASSERT_FALSE(symtab.empty());
@@ -156,12 +156,12 @@ TEST(LoaderSymtabTest, InsertOneSymbol)
 /** Test that the insertion of a symbol with an existing name fails. */
 TEST(LoaderSymtabTest, InsertSymbolExistingName)
 {
-    Loader::SymbolTable symtab;
+    loader::SymbolTable symtab;
 
     const std::string name = "symbol";
-    Loader::Symbol symbols[] = {
-        {Loader::Symbol::Binding::Local, name, 0x10},
-        {Loader::Symbol::Binding::Local, name, 0x20},
+    loader::Symbol symbols[] = {
+        {loader::Symbol::Binding::Local, name, 0x10},
+        {loader::Symbol::Binding::Local, name, 0x20},
     };
     ASSERT_TRUE(symtab.insert(symbols[0]));
     ASSERT_FALSE(symtab.insert(symbols[1]));
@@ -173,12 +173,12 @@ TEST(LoaderSymtabTest, InsertSymbolExistingName)
 /** Test that the insertion of a symbol with an existing address works. */
 TEST(LoaderSymtabTest, InsertSymbolExistingAddress)
 {
-    Loader::SymbolTable symtab;
+    loader::SymbolTable symtab;
 
     const Addr addr = 0x10;
-    Loader::Symbol symbols[] = {
-        {Loader::Symbol::Binding::Local, "symbol", addr},
-        {Loader::Symbol::Binding::Local, "symbol2", addr},
+    loader::Symbol symbols[] = {
+        {loader::Symbol::Binding::Local, "symbol", addr},
+        {loader::Symbol::Binding::Local, "symbol2", addr},
     };
     ASSERT_TRUE(symtab.insert(symbols[0]));
     ASSERT_TRUE(symtab.insert(symbols[1]));
@@ -190,12 +190,12 @@ TEST(LoaderSymtabTest, InsertSymbolExistingAddress)
 /** Test that the insertion of one symbol in a non-empty table works. */
 TEST(LoaderSymtabTest, InsertMultipleSymbols)
 {
-    Loader::SymbolTable symtab;
+    loader::SymbolTable symtab;
 
-    Loader::Symbol symbols[] = {
-        {Loader::Symbol::Binding::Local, "symbol", 0x10},
-        {Loader::Symbol::Binding::Local, "symbol2", 0x20},
-        {Loader::Symbol::Binding::Local, "symbol3", 0x30},
+    loader::Symbol symbols[] = {
+        {loader::Symbol::Binding::Local, "symbol", 0x10},
+        {loader::Symbol::Binding::Local, "symbol2", 0x20},
+        {loader::Symbol::Binding::Local, "symbol3", 0x30},
     };
     EXPECT_TRUE(symtab.insert(symbols[0]));
     EXPECT_TRUE(symtab.insert(symbols[1]));
@@ -209,12 +209,12 @@ TEST(LoaderSymtabTest, InsertMultipleSymbols)
  */
 TEST(LoaderSymtabTest, ClearMultiple)
 {
-    Loader::SymbolTable symtab;
+    loader::SymbolTable symtab;
 
-    Loader::Symbol symbols[] = {
-        {Loader::Symbol::Binding::Local, "symbol", 0x10},
-        {Loader::Symbol::Binding::Local, "symbol2", 0x20},
-        {Loader::Symbol::Binding::Local, "symbol3", 0x30},
+    loader::Symbol symbols[] = {
+        {loader::Symbol::Binding::Local, "symbol", 0x10},
+        {loader::Symbol::Binding::Local, "symbol2", 0x20},
+        {loader::Symbol::Binding::Local, "symbol3", 0x30},
     };
     EXPECT_TRUE(symtab.insert(symbols[0]));
     EXPECT_TRUE(symtab.insert(symbols[1]));
@@ -231,12 +231,12 @@ TEST(LoaderSymtabTest, ClearMultiple)
  */
 TEST(LoaderSymtabTest, Offset)
 {
-    Loader::SymbolTable symtab;
+    loader::SymbolTable symtab;
 
-    Loader::Symbol symbols[] = {
-        {Loader::Symbol::Binding::Local, "symbol", 0x10},
-        {Loader::Symbol::Binding::Local, "symbol2", 0x20},
-        {Loader::Symbol::Binding::Local, "symbol3", 0x30},
+    loader::Symbol symbols[] = {
+        {loader::Symbol::Binding::Local, "symbol", 0x10},
+        {loader::Symbol::Binding::Local, "symbol2", 0x20},
+        {loader::Symbol::Binding::Local, "symbol3", 0x30},
     };
     EXPECT_TRUE(symtab.insert(symbols[0]));
     EXPECT_TRUE(symtab.insert(symbols[1]));
@@ -249,7 +249,7 @@ TEST(LoaderSymtabTest, Offset)
     ASSERT_TRUE(checkTable(symtab, {symbols[0], symbols[1], symbols[2]}));
 
     // Check that the new table is offset
-    Loader::Symbol expected_symbols[] = {
+    loader::Symbol expected_symbols[] = {
         {symbols[0].binding, symbols[0].name, symbols[0].address + offset},
         {symbols[1].binding, symbols[1].name, symbols[1].address + offset},
         {symbols[2].binding, symbols[2].name, symbols[2].address + offset},
@@ -264,13 +264,13 @@ TEST(LoaderSymtabTest, Offset)
  */
 TEST(LoaderSymtabTest, Mask)
 {
-    Loader::SymbolTable symtab;
+    loader::SymbolTable symtab;
 
-    Loader::Symbol symbols[] = {
-        {Loader::Symbol::Binding::Local, "symbol", 0x1310},
-        {Loader::Symbol::Binding::Local, "symbol2", 0x2810},
-        {Loader::Symbol::Binding::Local, "symbol3", 0x2920},
-        {Loader::Symbol::Binding::Local, "symbol4", 0x3C20},
+    loader::Symbol symbols[] = {
+        {loader::Symbol::Binding::Local, "symbol", 0x1310},
+        {loader::Symbol::Binding::Local, "symbol2", 0x2810},
+        {loader::Symbol::Binding::Local, "symbol3", 0x2920},
+        {loader::Symbol::Binding::Local, "symbol4", 0x3C20},
     };
     EXPECT_TRUE(symtab.insert(symbols[0]));
     EXPECT_TRUE(symtab.insert(symbols[1]));
@@ -285,7 +285,7 @@ TEST(LoaderSymtabTest, Mask)
         symbols[3]}));
 
     // Check that the new table is masked
-    Loader::Symbol expected_symbols[] = {
+    loader::Symbol expected_symbols[] = {
         {symbols[0].binding, symbols[0].name, symbols[0].address & mask},
         {symbols[1].binding, symbols[1].name, symbols[1].address & mask},
         {symbols[2].binding, symbols[2].name, symbols[2].address & mask},
@@ -301,13 +301,13 @@ TEST(LoaderSymtabTest, Mask)
  */
 TEST(LoaderSymtabTest, Rename)
 {
-    Loader::SymbolTable symtab;
+    loader::SymbolTable symtab;
 
-    Loader::Symbol symbols[] = {
-        {Loader::Symbol::Binding::Local, "symbol", 0x10},
-        {Loader::Symbol::Binding::Local, "symbol2", 0x20},
-        {Loader::Symbol::Binding::Local, "symbol3", 0x30},
-        {Loader::Symbol::Binding::Local, "symbol4", 0x40},
+    loader::Symbol symbols[] = {
+        {loader::Symbol::Binding::Local, "symbol", 0x10},
+        {loader::Symbol::Binding::Local, "symbol2", 0x20},
+        {loader::Symbol::Binding::Local, "symbol3", 0x30},
+        {loader::Symbol::Binding::Local, "symbol4", 0x40},
     };
     EXPECT_TRUE(symtab.insert(symbols[0]));
     EXPECT_TRUE(symtab.insert(symbols[1]));
@@ -322,7 +322,7 @@ TEST(LoaderSymtabTest, Rename)
         symbols[3]}));
 
     // Check that the new table's symbols have been renamed
-    Loader::Symbol expected_symbols[] = {
+    loader::Symbol expected_symbols[] = {
         {symbols[0].binding, symbols[0].name + "_suffix", symbols[0].address},
         {symbols[1].binding, symbols[1].name + "_suffix", symbols[1].address},
         {symbols[2].binding, symbols[2].name + "_suffix", symbols[2].address},
@@ -338,13 +338,13 @@ TEST(LoaderSymtabTest, Rename)
  */
 TEST(LoaderSymtabTest, RenameNonUnique)
 {
-    Loader::SymbolTable symtab;
+    loader::SymbolTable symtab;
 
-    Loader::Symbol symbols[] = {
-        {Loader::Symbol::Binding::Local, "symbol", 0x10},
-        {Loader::Symbol::Binding::Local, "symbol2", 0x20},
-        {Loader::Symbol::Binding::Local, "symbol3", 0x30},
-        {Loader::Symbol::Binding::Local, "symbol4", 0x40},
+    loader::Symbol symbols[] = {
+        {loader::Symbol::Binding::Local, "symbol", 0x10},
+        {loader::Symbol::Binding::Local, "symbol2", 0x20},
+        {loader::Symbol::Binding::Local, "symbol3", 0x30},
+        {loader::Symbol::Binding::Local, "symbol4", 0x40},
     };
     EXPECT_TRUE(symtab.insert(symbols[0]));
     EXPECT_TRUE(symtab.insert(symbols[1]));
@@ -365,7 +365,7 @@ TEST(LoaderSymtabTest, RenameNonUnique)
 
     // Check that the new table's symbols have been renamed, yet it does not
     // contain the symbols with duplicated names
-    Loader::Symbol expected_symbols[] = {
+    loader::Symbol expected_symbols[] = {
         {symbols[0].binding, "NonUniqueName", symbols[0].address},
         {symbols[1].binding, symbols[1].name, symbols[1].address},
         {symbols[3].binding, symbols[3].name, symbols[3].address},
@@ -380,14 +380,14 @@ TEST(LoaderSymtabTest, RenameNonUnique)
  */
 TEST(LoaderSymtabTest, Globals)
 {
-    Loader::SymbolTable symtab;
+    loader::SymbolTable symtab;
 
-    Loader::Symbol symbols[] = {
-        {Loader::Symbol::Binding::Local, "symbol", 0x10},
-        {Loader::Symbol::Binding::Global, "symbol2", 0x20},
-        {Loader::Symbol::Binding::Local, "symbol3", 0x30},
-        {Loader::Symbol::Binding::Weak, "symbol4", 0x40},
-        {Loader::Symbol::Binding::Weak, "symbol5", 0x50}
+    loader::Symbol symbols[] = {
+        {loader::Symbol::Binding::Local, "symbol", 0x10},
+        {loader::Symbol::Binding::Global, "symbol2", 0x20},
+        {loader::Symbol::Binding::Local, "symbol3", 0x30},
+        {loader::Symbol::Binding::Weak, "symbol4", 0x40},
+        {loader::Symbol::Binding::Weak, "symbol5", 0x50}
     };
     EXPECT_TRUE(symtab.insert(symbols[0]));
     EXPECT_TRUE(symtab.insert(symbols[1]));
@@ -411,14 +411,14 @@ TEST(LoaderSymtabTest, Globals)
  */
 TEST(LoaderSymtabTest, Locals)
 {
-    Loader::SymbolTable symtab;
+    loader::SymbolTable symtab;
 
-    Loader::Symbol symbols[] = {
-        {Loader::Symbol::Binding::Local, "symbol", 0x10},
-        {Loader::Symbol::Binding::Global, "symbol2", 0x20},
-        {Loader::Symbol::Binding::Local, "symbol3", 0x30},
-        {Loader::Symbol::Binding::Weak, "symbol4", 0x40},
-        {Loader::Symbol::Binding::Weak, "symbol5", 0x50}
+    loader::Symbol symbols[] = {
+        {loader::Symbol::Binding::Local, "symbol", 0x10},
+        {loader::Symbol::Binding::Global, "symbol2", 0x20},
+        {loader::Symbol::Binding::Local, "symbol3", 0x30},
+        {loader::Symbol::Binding::Weak, "symbol4", 0x40},
+        {loader::Symbol::Binding::Weak, "symbol5", 0x50}
     };
     EXPECT_TRUE(symtab.insert(symbols[0]));
     EXPECT_TRUE(symtab.insert(symbols[1]));
@@ -442,14 +442,14 @@ TEST(LoaderSymtabTest, Locals)
  */
 TEST(LoaderSymtabTest, Weaks)
 {
-    Loader::SymbolTable symtab;
+    loader::SymbolTable symtab;
 
-    Loader::Symbol symbols[] = {
-        {Loader::Symbol::Binding::Local, "symbol", 0x10},
-        {Loader::Symbol::Binding::Global, "symbol2", 0x20},
-        {Loader::Symbol::Binding::Local, "symbol3", 0x30},
-        {Loader::Symbol::Binding::Weak, "symbol4", 0x40},
-        {Loader::Symbol::Binding::Weak, "symbol5", 0x50}
+    loader::Symbol symbols[] = {
+        {loader::Symbol::Binding::Local, "symbol", 0x10},
+        {loader::Symbol::Binding::Global, "symbol2", 0x20},
+        {loader::Symbol::Binding::Local, "symbol3", 0x30},
+        {loader::Symbol::Binding::Weak, "symbol4", 0x40},
+        {loader::Symbol::Binding::Weak, "symbol5", 0x50}
     };
     EXPECT_TRUE(symtab.insert(symbols[0]));
     EXPECT_TRUE(symtab.insert(symbols[1]));
@@ -470,9 +470,9 @@ TEST(LoaderSymtabTest, Weaks)
 /** Test searching for a non-existent address. */
 TEST(LoaderSymtabTest, FindNonExistentAddress)
 {
-    Loader::SymbolTable symtab;
+    loader::SymbolTable symtab;
 
-    Loader::Symbol symbol = {Loader::Symbol::Binding::Local, "symbol", 0x10};
+    loader::Symbol symbol = {loader::Symbol::Binding::Local, "symbol", 0x10};
     EXPECT_TRUE(symtab.insert(symbol));
 
     ASSERT_EQ(symtab.find(0x0), symtab.end());
@@ -481,12 +481,12 @@ TEST(LoaderSymtabTest, FindNonExistentAddress)
 /** Test searching for a unique address. */
 TEST(LoaderSymtabTest, FindUniqueAddress)
 {
-    Loader::SymbolTable symtab;
+    loader::SymbolTable symtab;
 
-    Loader::Symbol symbols[] = {
-        {Loader::Symbol::Binding::Local, "symbol", 0x10},
-        {Loader::Symbol::Binding::Local, "symbol2", 0x20},
-        {Loader::Symbol::Binding::Local, "symbol3", 0x30},
+    loader::Symbol symbols[] = {
+        {loader::Symbol::Binding::Local, "symbol", 0x10},
+        {loader::Symbol::Binding::Local, "symbol2", 0x20},
+        {loader::Symbol::Binding::Local, "symbol3", 0x30},
     };
     EXPECT_TRUE(symtab.insert(symbols[0]));
     EXPECT_TRUE(symtab.insert(symbols[1]));
@@ -502,13 +502,13 @@ TEST(LoaderSymtabTest, FindUniqueAddress)
  */
 TEST(LoaderSymtabTest, FindNonUniqueAddress)
 {
-    Loader::SymbolTable symtab;
+    loader::SymbolTable symtab;
 
     const Addr addr = 0x20;
-    Loader::Symbol symbols[] = {
-        {Loader::Symbol::Binding::Local, "symbol", 0x10},
-        {Loader::Symbol::Binding::Local, "symbol2", addr},
-        {Loader::Symbol::Binding::Local, "symbol3", addr},
+    loader::Symbol symbols[] = {
+        {loader::Symbol::Binding::Local, "symbol", 0x10},
+        {loader::Symbol::Binding::Local, "symbol2", addr},
+        {loader::Symbol::Binding::Local, "symbol3", addr},
     };
     EXPECT_TRUE(symtab.insert(symbols[0]));
     EXPECT_TRUE(symtab.insert(symbols[1]));
@@ -522,9 +522,9 @@ TEST(LoaderSymtabTest, FindNonUniqueAddress)
 /** Test searching for a non-existent name. */
 TEST(LoaderSymtabTest, FindNonExistentName)
 {
-    Loader::SymbolTable symtab;
+    loader::SymbolTable symtab;
 
-    Loader::Symbol symbol = {Loader::Symbol::Binding::Local, "symbol", 0x10};
+    loader::Symbol symbol = {loader::Symbol::Binding::Local, "symbol", 0x10};
     EXPECT_TRUE(symtab.insert(symbol));
 
     const auto it = symtab.find("symbol2");
@@ -534,12 +534,12 @@ TEST(LoaderSymtabTest, FindNonExistentName)
 /** Test searching for an existing name. */
 TEST(LoaderSymtabTest, FindExistingName)
 {
-    Loader::SymbolTable symtab;
+    loader::SymbolTable symtab;
 
-    Loader::Symbol symbols[] = {
-        {Loader::Symbol::Binding::Local, "symbol", 0x10},
-        {Loader::Symbol::Binding::Local, "symbol2", 0x20},
-        {Loader::Symbol::Binding::Local, "symbol3", 0x30},
+    loader::Symbol symbols[] = {
+        {loader::Symbol::Binding::Local, "symbol", 0x10},
+        {loader::Symbol::Binding::Local, "symbol2", 0x20},
+        {loader::Symbol::Binding::Local, "symbol3", 0x30},
     };
     EXPECT_TRUE(symtab.insert(symbols[0]));
     EXPECT_TRUE(symtab.insert(symbols[1]));
@@ -553,11 +553,11 @@ TEST(LoaderSymtabTest, FindExistingName)
 /** Test searching for an existent address using findNearest. */
 TEST(LoaderSymtabTest, FindNearestExact)
 {
-    Loader::SymbolTable symtab;
+    loader::SymbolTable symtab;
 
-    Loader::Symbol symbols[] = {
-        {Loader::Symbol::Binding::Local, "symbol", 0x10},
-        {Loader::Symbol::Binding::Local, "symbol2", 0x20},
+    loader::Symbol symbols[] = {
+        {loader::Symbol::Binding::Local, "symbol", 0x10},
+        {loader::Symbol::Binding::Local, "symbol2", 0x20},
     };
     EXPECT_TRUE(symtab.insert(symbols[0]));
     EXPECT_TRUE(symtab.insert(symbols[1]));
@@ -573,9 +573,9 @@ TEST(LoaderSymtabTest, FindNearestExact)
  */
 TEST(LoaderSymtabTest, FindNearestRound)
 {
-    Loader::SymbolTable symtab;
+    loader::SymbolTable symtab;
 
-    Loader::Symbol symbol = {Loader::Symbol::Binding::Local, "symbol", 0x10};
+    loader::Symbol symbol = {loader::Symbol::Binding::Local, "symbol", 0x10};
     EXPECT_TRUE(symtab.insert(symbol));
 
     const auto it = symtab.findNearest(symbol.address + 0x1);
@@ -590,11 +590,11 @@ TEST(LoaderSymtabTest, FindNearestRound)
  */
 TEST(LoaderSymtabTest, FindNearestRoundWithNext)
 {
-    Loader::SymbolTable symtab;
+    loader::SymbolTable symtab;
 
-    Loader::Symbol symbols[] = {
-        {Loader::Symbol::Binding::Local, "symbol", 0x10},
-        {Loader::Symbol::Binding::Local, "symbol2", 0x20},
+    loader::Symbol symbols[] = {
+        {loader::Symbol::Binding::Local, "symbol", 0x10},
+        {loader::Symbol::Binding::Local, "symbol2", 0x20},
     };
     EXPECT_TRUE(symtab.insert(symbols[0]));
     EXPECT_TRUE(symtab.insert(symbols[1]));
@@ -613,9 +613,9 @@ TEST(LoaderSymtabTest, FindNearestRoundWithNext)
  */
 TEST(LoaderSymtabTest, FindNearestRoundWithNextNonExistent)
 {
-    Loader::SymbolTable symtab;
+    loader::SymbolTable symtab;
 
-    Loader::Symbol symbol = {Loader::Symbol::Binding::Local, "symbol", 0x10};
+    loader::Symbol symbol = {loader::Symbol::Binding::Local, "symbol", 0x10};
     EXPECT_TRUE(symtab.insert(symbol));
 
     Addr next_addr;
@@ -631,9 +631,9 @@ TEST(LoaderSymtabTest, FindNearestRoundWithNextNonExistent)
  */
 TEST(LoaderSymtabTest, FindNearestNonExistent)
 {
-    Loader::SymbolTable symtab;
+    loader::SymbolTable symtab;
 
-    Loader::Symbol symbol = {Loader::Symbol::Binding::Local, "symbol", 0x10};
+    loader::Symbol symbol = {loader::Symbol::Binding::Local, "symbol", 0x10};
     EXPECT_TRUE(symtab.insert(symbol));
 
     const auto it = symtab.findNearest(symbol.address - 0x1);
@@ -647,23 +647,23 @@ TEST(LoaderSymtabTest, FindNearestNonExistent)
 TEST(LoaderSymtabTest, InsertTableConflicting)
 {
     const std::string name = "symbol";
-    Loader::Symbol symbols[] = {
-        {Loader::Symbol::Binding::Local, name, 0x10},
-        {Loader::Symbol::Binding::Local, "symbol2", 0x20},
-        {Loader::Symbol::Binding::Local, "symbol3", 0x30},
-        {Loader::Symbol::Binding::Local, "symbol4", 0x40},
+    loader::Symbol symbols[] = {
+        {loader::Symbol::Binding::Local, name, 0x10},
+        {loader::Symbol::Binding::Local, "symbol2", 0x20},
+        {loader::Symbol::Binding::Local, "symbol3", 0x30},
+        {loader::Symbol::Binding::Local, "symbol4", 0x40},
         // Introduce name conflict
-        {Loader::Symbol::Binding::Local, name, 0x50},
+        {loader::Symbol::Binding::Local, name, 0x50},
     };
 
     // Populate table 1
-    Loader::SymbolTable symtab;
+    loader::SymbolTable symtab;
     EXPECT_TRUE(symtab.insert(symbols[0]));
     EXPECT_TRUE(symtab.insert(symbols[1]));
     EXPECT_TRUE(symtab.insert(symbols[2]));
 
     // Populate table 2
-    Loader::SymbolTable symtab2;
+    loader::SymbolTable symtab2;
     EXPECT_TRUE(symtab2.insert(symbols[3]));
     EXPECT_TRUE(symtab2.insert(symbols[4]));
 
@@ -681,22 +681,22 @@ TEST(LoaderSymtabTest, InsertTableConflicting)
  */
 TEST(LoaderSymtabTest, InsertTable)
 {
-    Loader::Symbol symbols[] = {
-        {Loader::Symbol::Binding::Local, "symbol", 0x10},
-        {Loader::Symbol::Binding::Local, "symbol2", 0x20},
-        {Loader::Symbol::Binding::Local, "symbol3", 0x30},
-        {Loader::Symbol::Binding::Local, "symbol4", 0x40},
-        {Loader::Symbol::Binding::Local, "symbol5", 0x50},
+    loader::Symbol symbols[] = {
+        {loader::Symbol::Binding::Local, "symbol", 0x10},
+        {loader::Symbol::Binding::Local, "symbol2", 0x20},
+        {loader::Symbol::Binding::Local, "symbol3", 0x30},
+        {loader::Symbol::Binding::Local, "symbol4", 0x40},
+        {loader::Symbol::Binding::Local, "symbol5", 0x50},
     };
 
     // Populate table 1
-    Loader::SymbolTable symtab;
+    loader::SymbolTable symtab;
     EXPECT_TRUE(symtab.insert(symbols[0]));
     EXPECT_TRUE(symtab.insert(symbols[1]));
     EXPECT_TRUE(symtab.insert(symbols[2]));
 
     // Populate table 2
-    Loader::SymbolTable symtab2;
+    loader::SymbolTable symtab2;
     EXPECT_TRUE(symtab2.insert(symbols[3]));
     EXPECT_TRUE(symtab2.insert(symbols[4]));
 
@@ -717,11 +717,11 @@ using LoaderSymtabSerializationFixture = SerializationFixture;
 TEST_F(LoaderSymtabSerializationFixture, Serialization)
 {
     // Populate the table
-    Loader::SymbolTable symtab;
-    Loader::Symbol symbols[] = {
-        {Loader::Symbol::Binding::Local, "symbol", 0x10},
-        {Loader::Symbol::Binding::Local, "symbol2", 0x20},
-        {Loader::Symbol::Binding::Local, "symbol3", 0x30},
+    loader::SymbolTable symtab;
+    loader::Symbol symbols[] = {
+        {loader::Symbol::Binding::Local, "symbol", 0x10},
+        {loader::Symbol::Binding::Local, "symbol2", 0x20},
+        {loader::Symbol::Binding::Local, "symbol3", 0x30},
     };
     EXPECT_TRUE(symtab.insert(symbols[0]));
     EXPECT_TRUE(symtab.insert(symbols[1]));
@@ -742,17 +742,17 @@ TEST_F(LoaderSymtabSerializationFixture, Serialization)
 /** Test unserialization. */
 TEST_F(LoaderSymtabSerializationFixture, Unserialization)
 {
-    Loader::Symbol symbols[] = {
-        {Loader::Symbol::Binding::Local, "symbol", 0x10},
-        {Loader::Symbol::Binding::Local, "symbol2", 0x20},
-        {Loader::Symbol::Binding::Local, "symbol3", 0x30},
+    loader::Symbol symbols[] = {
+        {loader::Symbol::Binding::Local, "symbol", 0x10},
+        {loader::Symbol::Binding::Local, "symbol2", 0x20},
+        {loader::Symbol::Binding::Local, "symbol3", 0x30},
     };
     simulateSerialization("\n[Section1]\ntest.size=3\n"
         "test.addr_0=16\ntest.symbol_0=symbol\ntest.binding_0=1\n"
         "test.addr_1=32\ntest.symbol_1=symbol2\ntest.binding_1=1\n"
         "test.addr_2=48\ntest.symbol_2=symbol3\ntest.binding_2=1\n");
 
-    Loader::SymbolTable unserialized_symtab;
+    loader::SymbolTable unserialized_symtab;
     CheckpointIn cp(getDirName());
     Serializable::ScopedCheckpointSection scs(cp, "Section1");
     unserialized_symtab.unserialize("test", cp);
@@ -770,17 +770,17 @@ TEST_F(LoaderSymtabSerializationFixture, Unserialization)
  */
 TEST_F(LoaderSymtabSerializationFixture, UnserializationMissingBinding)
 {
-    Loader::Symbol symbols[] = {
-        {Loader::Symbol::Binding::Local, "symbol", 0x10},
-        {Loader::Symbol::Binding::Global, "symbol2", 0x20},
-        {Loader::Symbol::Binding::Local, "symbol3", 0x30},
+    loader::Symbol symbols[] = {
+        {loader::Symbol::Binding::Local, "symbol", 0x10},
+        {loader::Symbol::Binding::Global, "symbol2", 0x20},
+        {loader::Symbol::Binding::Local, "symbol3", 0x30},
     };
     simulateSerialization("\n[Section1]\ntest.size=3\n"
         "test.addr_0=16\ntest.symbol_0=symbol\ntest.binding_0=1\n"
         "test.addr_1=32\ntest.symbol_1=symbol2\n"
         "test.addr_2=48\ntest.symbol_2=symbol3\ntest.binding_2=1\n");
 
-    Loader::SymbolTable unserialized_symtab;
+    loader::SymbolTable unserialized_symtab;
     CheckpointIn cp(getDirName());
     Serializable::ScopedCheckpointSection scs(cp, "Section1");
 
@@ -800,22 +800,22 @@ TEST_F(LoaderSymtabSerializationFixture, UnserializationMissingBinding)
 TEST_F(LoaderSymtabSerializationFixture,
     UnserializationMissingBindingChangeDefault)
 {
-    Loader::Symbol symbols[] = {
-        {Loader::Symbol::Binding::Local, "symbol", 0x10},
-        {Loader::Symbol::Binding::Weak, "symbol2", 0x20},
-        {Loader::Symbol::Binding::Local, "symbol3", 0x30},
+    loader::Symbol symbols[] = {
+        {loader::Symbol::Binding::Local, "symbol", 0x10},
+        {loader::Symbol::Binding::Weak, "symbol2", 0x20},
+        {loader::Symbol::Binding::Local, "symbol3", 0x30},
     };
     simulateSerialization("\n[Section1]\ntest.size=3\n"
         "test.addr_0=16\ntest.symbol_0=symbol\ntest.binding_0=1\n"
         "test.addr_1=32\ntest.symbol_1=symbol2\n"
         "test.addr_2=48\ntest.symbol_2=symbol3\ntest.binding_2=1\n");
 
-    Loader::SymbolTable unserialized_symtab;
+    loader::SymbolTable unserialized_symtab;
     CheckpointIn cp(getDirName());
     Serializable::ScopedCheckpointSection scs(cp, "Section1");
 
     unserialized_symtab.unserialize("test", cp,
-        Loader::Symbol::Binding::Weak);
+        loader::Symbol::Binding::Weak);
 
     // Make sure that the symbols in symtab are present in the
     // unserialized table
diff --git a/src/cpu/profile.hh b/src/cpu/profile.hh
index a5e16d6ac1..68283f5b06 100644
--- a/src/cpu/profile.hh
+++ b/src/cpu/profile.hh
@@ -43,7 +43,6 @@ namespace gem5
 class ThreadContext;
 class FunctionProfile;
 
-GEM5_DEPRECATED_NAMESPACE(Loader, loader);
 namespace loader
 {
     class SymbolTable;
diff --git a/src/cpu/static_inst.hh b/src/cpu/static_inst.hh
index 21ce2aaf8b..3ab78345bc 100644
--- a/src/cpu/static_inst.hh
+++ b/src/cpu/static_inst.hh
@@ -66,7 +66,6 @@ class Packet;
 class ExecContext;
 class ThreadContext;
 
-GEM5_DEPRECATED_NAMESPACE(Loader, loader);
 namespace loader
 {
 class SymbolTable;
diff --git a/src/sim/process.hh b/src/sim/process.hh
index cece212d14..d6d30cebc8 100644
--- a/src/sim/process.hh
+++ b/src/sim/process.hh
@@ -49,7 +49,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(Loader, loader);
 namespace loader
 {
 class ObjectFile;

From c1839aad77a4b8e128864b860d34329a918479ea Mon Sep 17 00:00:00 2001
From: "Daniel R. Carvalho" <odanrc@yahoo.com.br>
Date: Sat, 14 Jan 2023 09:52:45 -0300
Subject: [PATCH 129/492] fastmodel: Remove the FastModel namespace

This namespace has gone through the deprecation period
and can now be removed.

Change-Id: Ic0a42f7349ccf15f8c1dd276a647e7cb2a56c1cb
Signed-off-by: Daniel R. Carvalho <odanrc@yahoo.com.br>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67363
Reviewed-by: Richard Cooper <richard.cooper@arm.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
---
 src/arch/arm/fastmodel/CortexA76/cortex_a76.cc     | 1 -
 src/arch/arm/fastmodel/CortexA76/cortex_a76.hh     | 1 -
 src/arch/arm/fastmodel/CortexA76/evs.cc            | 1 -
 src/arch/arm/fastmodel/CortexA76/evs.hh            | 1 -
 src/arch/arm/fastmodel/CortexA76/thread_context.cc | 1 -
 src/arch/arm/fastmodel/CortexA76/thread_context.hh | 1 -
 src/arch/arm/fastmodel/CortexR52/cortex_r52.cc     | 1 -
 src/arch/arm/fastmodel/CortexR52/cortex_r52.hh     | 1 -
 src/arch/arm/fastmodel/CortexR52/evs.cc            | 1 -
 src/arch/arm/fastmodel/CortexR52/evs.hh            | 1 -
 src/arch/arm/fastmodel/CortexR52/thread_context.cc | 1 -
 src/arch/arm/fastmodel/CortexR52/thread_context.hh | 1 -
 src/arch/arm/fastmodel/GIC/gic.cc                  | 1 -
 src/arch/arm/fastmodel/GIC/gic.hh                  | 1 -
 src/arch/arm/fastmodel/PL330_DMAC/pl330.cc         | 1 -
 src/arch/arm/fastmodel/PL330_DMAC/pl330.hh         | 1 -
 src/arch/arm/fastmodel/amba_from_tlm_bridge.cc     | 1 -
 src/arch/arm/fastmodel/amba_from_tlm_bridge.hh     | 1 -
 src/arch/arm/fastmodel/amba_ports.hh               | 1 -
 src/arch/arm/fastmodel/amba_to_tlm_bridge.cc       | 1 -
 src/arch/arm/fastmodel/amba_to_tlm_bridge.hh       | 1 -
 src/arch/arm/fastmodel/common/signal_receiver.hh   | 1 -
 src/arch/arm/fastmodel/common/signal_sender.hh     | 1 -
 23 files changed, 23 deletions(-)

diff --git a/src/arch/arm/fastmodel/CortexA76/cortex_a76.cc b/src/arch/arm/fastmodel/CortexA76/cortex_a76.cc
index 9280a042ee..ea1f477f59 100644
--- a/src/arch/arm/fastmodel/CortexA76/cortex_a76.cc
+++ b/src/arch/arm/fastmodel/CortexA76/cortex_a76.cc
@@ -36,7 +36,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(FastModel, fastmodel);
 namespace fastmodel
 {
 
diff --git a/src/arch/arm/fastmodel/CortexA76/cortex_a76.hh b/src/arch/arm/fastmodel/CortexA76/cortex_a76.hh
index 39f916e4e3..61bf501554 100644
--- a/src/arch/arm/fastmodel/CortexA76/cortex_a76.hh
+++ b/src/arch/arm/fastmodel/CortexA76/cortex_a76.hh
@@ -42,7 +42,6 @@ namespace gem5
 
 class BaseCPU;
 
-GEM5_DEPRECATED_NAMESPACE(FastModel, fastmodel);
 namespace fastmodel
 {
 
diff --git a/src/arch/arm/fastmodel/CortexA76/evs.cc b/src/arch/arm/fastmodel/CortexA76/evs.cc
index c9ce3cc656..b299ad1a28 100644
--- a/src/arch/arm/fastmodel/CortexA76/evs.cc
+++ b/src/arch/arm/fastmodel/CortexA76/evs.cc
@@ -37,7 +37,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(FastModel, fastmodel);
 namespace fastmodel
 {
 
diff --git a/src/arch/arm/fastmodel/CortexA76/evs.hh b/src/arch/arm/fastmodel/CortexA76/evs.hh
index 7c4ef601a7..9f08071dae 100644
--- a/src/arch/arm/fastmodel/CortexA76/evs.hh
+++ b/src/arch/arm/fastmodel/CortexA76/evs.hh
@@ -52,7 +52,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(FastModel, fastmodel);
 namespace fastmodel
 {
 
diff --git a/src/arch/arm/fastmodel/CortexA76/thread_context.cc b/src/arch/arm/fastmodel/CortexA76/thread_context.cc
index 672f3b724f..c6704852fc 100644
--- a/src/arch/arm/fastmodel/CortexA76/thread_context.cc
+++ b/src/arch/arm/fastmodel/CortexA76/thread_context.cc
@@ -35,7 +35,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(FastModel, fastmodel);
 namespace fastmodel
 {
 
diff --git a/src/arch/arm/fastmodel/CortexA76/thread_context.hh b/src/arch/arm/fastmodel/CortexA76/thread_context.hh
index d7b8ed541c..6e3d85485e 100644
--- a/src/arch/arm/fastmodel/CortexA76/thread_context.hh
+++ b/src/arch/arm/fastmodel/CortexA76/thread_context.hh
@@ -33,7 +33,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(FastModel, fastmodel);
 namespace fastmodel
 {
 
diff --git a/src/arch/arm/fastmodel/CortexR52/cortex_r52.cc b/src/arch/arm/fastmodel/CortexR52/cortex_r52.cc
index 9dfe7a5158..a22492e932 100644
--- a/src/arch/arm/fastmodel/CortexR52/cortex_r52.cc
+++ b/src/arch/arm/fastmodel/CortexR52/cortex_r52.cc
@@ -35,7 +35,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(FastModel, fastmodel);
 namespace fastmodel
 {
 
diff --git a/src/arch/arm/fastmodel/CortexR52/cortex_r52.hh b/src/arch/arm/fastmodel/CortexR52/cortex_r52.hh
index 76c7d33ea4..186383d728 100644
--- a/src/arch/arm/fastmodel/CortexR52/cortex_r52.hh
+++ b/src/arch/arm/fastmodel/CortexR52/cortex_r52.hh
@@ -42,7 +42,6 @@ namespace gem5
 
 class BaseCPU;
 
-GEM5_DEPRECATED_NAMESPACE(FastModel, fastmodel);
 namespace fastmodel
 {
 
diff --git a/src/arch/arm/fastmodel/CortexR52/evs.cc b/src/arch/arm/fastmodel/CortexR52/evs.cc
index 0ad3f18412..47fbc36313 100644
--- a/src/arch/arm/fastmodel/CortexR52/evs.cc
+++ b/src/arch/arm/fastmodel/CortexR52/evs.cc
@@ -36,7 +36,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(FastModel, fastmodel);
 namespace fastmodel
 {
 
diff --git a/src/arch/arm/fastmodel/CortexR52/evs.hh b/src/arch/arm/fastmodel/CortexR52/evs.hh
index 9cebec3846..6516f4c687 100644
--- a/src/arch/arm/fastmodel/CortexR52/evs.hh
+++ b/src/arch/arm/fastmodel/CortexR52/evs.hh
@@ -54,7 +54,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(FastModel, fastmodel);
 namespace fastmodel
 {
 
diff --git a/src/arch/arm/fastmodel/CortexR52/thread_context.cc b/src/arch/arm/fastmodel/CortexR52/thread_context.cc
index f3e170941e..a20f8e0a89 100644
--- a/src/arch/arm/fastmodel/CortexR52/thread_context.cc
+++ b/src/arch/arm/fastmodel/CortexR52/thread_context.cc
@@ -35,7 +35,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(FastModel, fastmodel);
 namespace fastmodel
 {
 
diff --git a/src/arch/arm/fastmodel/CortexR52/thread_context.hh b/src/arch/arm/fastmodel/CortexR52/thread_context.hh
index 7126a371a1..5a0d34f274 100644
--- a/src/arch/arm/fastmodel/CortexR52/thread_context.hh
+++ b/src/arch/arm/fastmodel/CortexR52/thread_context.hh
@@ -33,7 +33,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(FastModel, fastmodel);
 namespace fastmodel
 {
 
diff --git a/src/arch/arm/fastmodel/GIC/gic.cc b/src/arch/arm/fastmodel/GIC/gic.cc
index fbe863a166..493aa81fcd 100644
--- a/src/arch/arm/fastmodel/GIC/gic.cc
+++ b/src/arch/arm/fastmodel/GIC/gic.cc
@@ -34,7 +34,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(FastModel, fastmodel);
 namespace fastmodel
 {
 
diff --git a/src/arch/arm/fastmodel/GIC/gic.hh b/src/arch/arm/fastmodel/GIC/gic.hh
index 33a172ded2..0e502fc633 100644
--- a/src/arch/arm/fastmodel/GIC/gic.hh
+++ b/src/arch/arm/fastmodel/GIC/gic.hh
@@ -48,7 +48,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(FastModel, fastmodel);
 namespace fastmodel
 {
 
diff --git a/src/arch/arm/fastmodel/PL330_DMAC/pl330.cc b/src/arch/arm/fastmodel/PL330_DMAC/pl330.cc
index 13162bd409..f9e6e2dc3e 100644
--- a/src/arch/arm/fastmodel/PL330_DMAC/pl330.cc
+++ b/src/arch/arm/fastmodel/PL330_DMAC/pl330.cc
@@ -35,7 +35,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(FastModel, fastmodel);
 namespace fastmodel
 {
 
diff --git a/src/arch/arm/fastmodel/PL330_DMAC/pl330.hh b/src/arch/arm/fastmodel/PL330_DMAC/pl330.hh
index 389f7047c7..e7811fc576 100644
--- a/src/arch/arm/fastmodel/PL330_DMAC/pl330.hh
+++ b/src/arch/arm/fastmodel/PL330_DMAC/pl330.hh
@@ -50,7 +50,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(FastModel, fastmodel);
 namespace fastmodel
 {
 
diff --git a/src/arch/arm/fastmodel/amba_from_tlm_bridge.cc b/src/arch/arm/fastmodel/amba_from_tlm_bridge.cc
index f84e58121e..4baf0ef7aa 100644
--- a/src/arch/arm/fastmodel/amba_from_tlm_bridge.cc
+++ b/src/arch/arm/fastmodel/amba_from_tlm_bridge.cc
@@ -34,7 +34,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(FastModel, fastmodel);
 namespace fastmodel
 {
 
diff --git a/src/arch/arm/fastmodel/amba_from_tlm_bridge.hh b/src/arch/arm/fastmodel/amba_from_tlm_bridge.hh
index 4484ea92c6..8ea8b8a731 100644
--- a/src/arch/arm/fastmodel/amba_from_tlm_bridge.hh
+++ b/src/arch/arm/fastmodel/amba_from_tlm_bridge.hh
@@ -38,7 +38,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(FastModel, fastmodel);
 namespace fastmodel
 {
 
diff --git a/src/arch/arm/fastmodel/amba_ports.hh b/src/arch/arm/fastmodel/amba_ports.hh
index 845c5e97a4..8e3dca055e 100644
--- a/src/arch/arm/fastmodel/amba_ports.hh
+++ b/src/arch/arm/fastmodel/amba_ports.hh
@@ -38,7 +38,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(FastModel, fastmodel);
 namespace fastmodel
 {
 
diff --git a/src/arch/arm/fastmodel/amba_to_tlm_bridge.cc b/src/arch/arm/fastmodel/amba_to_tlm_bridge.cc
index e8807c474f..58f6eeab6b 100644
--- a/src/arch/arm/fastmodel/amba_to_tlm_bridge.cc
+++ b/src/arch/arm/fastmodel/amba_to_tlm_bridge.cc
@@ -68,7 +68,6 @@ struct FarAtomicOpFunctor : public AtomicOpFunctor
 
 }
 
-GEM5_DEPRECATED_NAMESPACE(FastModel, fastmodel);
 namespace fastmodel
 {
 
diff --git a/src/arch/arm/fastmodel/amba_to_tlm_bridge.hh b/src/arch/arm/fastmodel/amba_to_tlm_bridge.hh
index 6874052a56..addaac67f9 100644
--- a/src/arch/arm/fastmodel/amba_to_tlm_bridge.hh
+++ b/src/arch/arm/fastmodel/amba_to_tlm_bridge.hh
@@ -38,7 +38,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(FastModel, fastmodel);
 namespace fastmodel
 {
 
diff --git a/src/arch/arm/fastmodel/common/signal_receiver.hh b/src/arch/arm/fastmodel/common/signal_receiver.hh
index 9ec760e32b..3036e3447d 100644
--- a/src/arch/arm/fastmodel/common/signal_receiver.hh
+++ b/src/arch/arm/fastmodel/common/signal_receiver.hh
@@ -44,7 +44,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(FastModel, fastmodel);
 namespace fastmodel
 {
 
diff --git a/src/arch/arm/fastmodel/common/signal_sender.hh b/src/arch/arm/fastmodel/common/signal_sender.hh
index c596ed108e..fb835c9fe6 100644
--- a/src/arch/arm/fastmodel/common/signal_sender.hh
+++ b/src/arch/arm/fastmodel/common/signal_sender.hh
@@ -38,7 +38,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(FastModel, fastmodel);
 namespace fastmodel
 {
 

From d14cde6bd709e6d338ec1a1ae6082ec384ac21d0 Mon Sep 17 00:00:00 2001
From: "Daniel R. Carvalho" <odanrc@yahoo.com.br>
Date: Sat, 14 Jan 2023 09:55:26 -0300
Subject: [PATCH 130/492] misc: Remove the Linux namespace

This namespace has gone through the deprecation period
and can now be removed.

Change-Id: I73d7792ab8897d00b143d82d0fb70987ca410438
Signed-off-by: Daniel R. Carvalho <odanrc@yahoo.com.br>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67364
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Richard Cooper <richard.cooper@arm.com>
---
 src/arch/generic/linux/threadinfo.hh | 1 -
 src/arch/mips/linux/hwrpb.hh         | 1 -
 src/arch/mips/linux/thread_info.hh   | 1 -
 src/kern/linux/events.cc             | 1 -
 src/kern/linux/events.hh             | 1 -
 src/kern/linux/helpers.hh            | 1 -
 src/kern/linux/printk.cc             | 1 -
 src/kern/linux/printk.hh             | 1 -
 8 files changed, 8 deletions(-)

diff --git a/src/arch/generic/linux/threadinfo.hh b/src/arch/generic/linux/threadinfo.hh
index 7702f0e0b9..70511c47fa 100644
--- a/src/arch/generic/linux/threadinfo.hh
+++ b/src/arch/generic/linux/threadinfo.hh
@@ -36,7 +36,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(Linux, linux);
 namespace linux
 {
 
diff --git a/src/arch/mips/linux/hwrpb.hh b/src/arch/mips/linux/hwrpb.hh
index b5dcb18b77..3c5e439098 100644
--- a/src/arch/mips/linux/hwrpb.hh
+++ b/src/arch/mips/linux/hwrpb.hh
@@ -30,7 +30,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(Linux, linux);
 namespace linux
 {
     struct pcb_struct
diff --git a/src/arch/mips/linux/thread_info.hh b/src/arch/mips/linux/thread_info.hh
index df376f0c11..986c896257 100644
--- a/src/arch/mips/linux/thread_info.hh
+++ b/src/arch/mips/linux/thread_info.hh
@@ -34,7 +34,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(Linux, linux);
 namespace linux
 {
     struct thread_info
diff --git a/src/kern/linux/events.cc b/src/kern/linux/events.cc
index 6ec883c2e7..35767596af 100644
--- a/src/kern/linux/events.cc
+++ b/src/kern/linux/events.cc
@@ -54,7 +54,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(Linux, linux);
 namespace linux
 {
 
diff --git a/src/kern/linux/events.hh b/src/kern/linux/events.hh
index 75492093f4..966c1ba075 100644
--- a/src/kern/linux/events.hh
+++ b/src/kern/linux/events.hh
@@ -57,7 +57,6 @@ namespace gem5
 
 class ThreadContext;
 
-GEM5_DEPRECATED_NAMESPACE(Linux, linux);
 namespace linux
 {
 
diff --git a/src/kern/linux/helpers.hh b/src/kern/linux/helpers.hh
index 1ad5b413fb..b8d3c49a36 100644
--- a/src/kern/linux/helpers.hh
+++ b/src/kern/linux/helpers.hh
@@ -47,7 +47,6 @@ namespace gem5
 
 class ThreadContext;
 
-GEM5_DEPRECATED_NAMESPACE(Linux, linux);
 namespace linux
 {
 
diff --git a/src/kern/linux/printk.cc b/src/kern/linux/printk.cc
index c356016985..ccb1e8ab88 100644
--- a/src/kern/linux/printk.cc
+++ b/src/kern/linux/printk.cc
@@ -42,7 +42,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(Linux, linux);
 namespace linux
 {
 
diff --git a/src/kern/linux/printk.hh b/src/kern/linux/printk.hh
index 7b545bc498..1e265a7f3d 100644
--- a/src/kern/linux/printk.hh
+++ b/src/kern/linux/printk.hh
@@ -38,7 +38,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(Linux, linux);
 namespace linux
 {
 

From b2bf811aeaf91ab5e30181f43d6966739294c327 Mon Sep 17 00:00:00 2001
From: "Daniel R. Carvalho" <odanrc@yahoo.com.br>
Date: Sat, 14 Jan 2023 09:56:06 -0300
Subject: [PATCH 131/492] misc: Remove the FreeBSD namespace

This namespace has gone through the deprecation period
and can now be removed.

Change-Id: Ic0c838709121278584a295ea19a8283d5765b9c9
Signed-off-by: Daniel R. Carvalho <odanrc@yahoo.com.br>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67365
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Reviewed-by: Richard Cooper <richard.cooper@arm.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/arch/generic/freebsd/threadinfo.hh | 1 -
 src/kern/freebsd/events.cc             | 1 -
 src/kern/freebsd/events.hh             | 1 -
 3 files changed, 3 deletions(-)

diff --git a/src/arch/generic/freebsd/threadinfo.hh b/src/arch/generic/freebsd/threadinfo.hh
index f77772a878..443367f38d 100644
--- a/src/arch/generic/freebsd/threadinfo.hh
+++ b/src/arch/generic/freebsd/threadinfo.hh
@@ -39,7 +39,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(FreeBSD, free_bsd);
 namespace free_bsd
 {
 
diff --git a/src/kern/freebsd/events.cc b/src/kern/freebsd/events.cc
index ce2291ed0c..667b10b49d 100644
--- a/src/kern/freebsd/events.cc
+++ b/src/kern/freebsd/events.cc
@@ -44,7 +44,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(FreeBSD, free_bsd);
 namespace free_bsd
 {
 
diff --git a/src/kern/freebsd/events.hh b/src/kern/freebsd/events.hh
index c89ad0cad8..f4e350f11a 100644
--- a/src/kern/freebsd/events.hh
+++ b/src/kern/freebsd/events.hh
@@ -40,7 +40,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(FreeBSD, free_bsd);
 namespace free_bsd
 {
 

From 1e80ba78627716824908340e83eb6711509fc332 Mon Sep 17 00:00:00 2001
From: "Daniel R. Carvalho" <odanrc@yahoo.com.br>
Date: Sat, 14 Jan 2023 09:56:59 -0300
Subject: [PATCH 132/492] misc: Remove the Net namespace

This namespace has gone through the deprecation period
and can now be removed.

Change-Id: Ia2e1ef1619f51a0d7c0da9c7b4a160cd88ed8a65
Signed-off-by: Daniel R. Carvalho <odanrc@yahoo.com.br>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67366
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Richard Cooper <richard.cooper@arm.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
---
 src/base/inet.cc | 1 -
 src/base/inet.hh | 1 -
 2 files changed, 2 deletions(-)

diff --git a/src/base/inet.cc b/src/base/inet.cc
index ab4bfe460c..fc7505ecb7 100644
--- a/src/base/inet.cc
+++ b/src/base/inet.cc
@@ -54,7 +54,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(Net, networking);
 namespace networking
 {
 
diff --git a/src/base/inet.hh b/src/base/inet.hh
index 3897f6364c..2cc3c6a3c8 100644
--- a/src/base/inet.hh
+++ b/src/base/inet.hh
@@ -68,7 +68,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(Net, networking);
 namespace networking
 {
 

From 93f0de95d6db1ddcd79473d4d25f830c132316dd Mon Sep 17 00:00:00 2001
From: "Daniel R. Carvalho" <odanrc@yahoo.com.br>
Date: Sat, 14 Jan 2023 09:57:55 -0300
Subject: [PATCH 133/492] misc: Remove the m5 namespace

This namespace has gone through the deprecation period
and can now be removed.

Change-Id: Iffc6d903da1d619c0914379d0ceabc88453b3ac7
Signed-off-by: Daniel R. Carvalho <odanrc@yahoo.com.br>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67367
Reviewed-by: Richard Cooper <richard.cooper@arm.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/base/coroutine.hh   | 1 -
 src/base/stl_helpers.hh | 1 -
 2 files changed, 2 deletions(-)

diff --git a/src/base/coroutine.hh b/src/base/coroutine.hh
index 63b26aa3eb..000a0bf0f9 100644
--- a/src/base/coroutine.hh
+++ b/src/base/coroutine.hh
@@ -44,7 +44,6 @@
 #include "base/compiler.hh"
 #include "base/fiber.hh"
 
-GEM5_DEPRECATED_NAMESPACE(m5, gem5);
 namespace gem5
 {
 
diff --git a/src/base/stl_helpers.hh b/src/base/stl_helpers.hh
index d16446d5c3..d12f266350 100644
--- a/src/base/stl_helpers.hh
+++ b/src/base/stl_helpers.hh
@@ -36,7 +36,6 @@
 
 #include "base/compiler.hh"
 
-GEM5_DEPRECATED_NAMESPACE(m5, gem5);
 namespace gem5
 {
 

From 2ec3f64af80b50b0bb853d2225f020682e0b09f7 Mon Sep 17 00:00:00 2001
From: "Daniel R. Carvalho" <odanrc@yahoo.com.br>
Date: Sat, 14 Jan 2023 10:00:09 -0300
Subject: [PATCH 134/492] cpu: Remove the DecodeCache namespace

This namespace has gone through the deprecation period
and can now be removed.

Change-Id: Ia1b2ab5444464f7c0ee85c8d288e38be4d7c013f
Signed-off-by: Daniel R. Carvalho <odanrc@yahoo.com.br>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67368
Reviewed-by: Richard Cooper <richard.cooper@arm.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/cpu/decode_cache.hh | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/cpu/decode_cache.hh b/src/cpu/decode_cache.hh
index 4e5631a460..cbd3c933b2 100644
--- a/src/cpu/decode_cache.hh
+++ b/src/cpu/decode_cache.hh
@@ -38,7 +38,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(DecodeCache, decode_cache);
 namespace decode_cache
 {
 

From 161519177e0022a53daf02d20b1d1a0d9dc685ee Mon Sep 17 00:00:00 2001
From: "Daniel R. Carvalho" <odanrc@yahoo.com.br>
Date: Sat, 14 Jan 2023 10:00:39 -0300
Subject: [PATCH 135/492] cpu: Remove the Minor namespace

This namespace has gone through the deprecation period
and can now be removed.

Change-Id: I603134248a05c988627bbd3c59c962b085b3b2ad
Signed-off-by: Daniel R. Carvalho <odanrc@yahoo.com.br>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67369
Reviewed-by: Richard Cooper <richard.cooper@arm.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/cpu/minor/activity.cc     | 1 -
 src/cpu/minor/activity.hh     | 1 -
 src/cpu/minor/buffers.hh      | 1 -
 src/cpu/minor/cpu.hh          | 1 -
 src/cpu/minor/decode.cc       | 1 -
 src/cpu/minor/decode.hh       | 1 -
 src/cpu/minor/dyn_inst.cc     | 1 -
 src/cpu/minor/dyn_inst.hh     | 1 -
 src/cpu/minor/exec_context.hh | 1 -
 src/cpu/minor/execute.cc      | 1 -
 src/cpu/minor/execute.hh      | 1 -
 src/cpu/minor/fetch1.cc       | 1 -
 src/cpu/minor/fetch1.hh       | 1 -
 src/cpu/minor/fetch2.cc       | 1 -
 src/cpu/minor/fetch2.hh       | 1 -
 src/cpu/minor/func_unit.cc    | 1 -
 src/cpu/minor/func_unit.hh    | 1 -
 src/cpu/minor/lsq.cc          | 1 -
 src/cpu/minor/lsq.hh          | 1 -
 src/cpu/minor/pipe_data.cc    | 1 -
 src/cpu/minor/pipe_data.hh    | 1 -
 src/cpu/minor/pipeline.cc     | 1 -
 src/cpu/minor/pipeline.hh     | 1 -
 src/cpu/minor/scoreboard.cc   | 1 -
 src/cpu/minor/scoreboard.hh   | 1 -
 src/cpu/minor/stats.cc        | 1 -
 src/cpu/minor/stats.hh        | 1 -
 src/cpu/minor/trace.hh        | 1 -
 28 files changed, 28 deletions(-)

diff --git a/src/cpu/minor/activity.cc b/src/cpu/minor/activity.cc
index f78e927bce..f2f65b37f1 100644
--- a/src/cpu/minor/activity.cc
+++ b/src/cpu/minor/activity.cc
@@ -44,7 +44,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(Minor, minor);
 namespace minor
 {
 
diff --git a/src/cpu/minor/activity.hh b/src/cpu/minor/activity.hh
index b94221730a..d052e0f41c 100644
--- a/src/cpu/minor/activity.hh
+++ b/src/cpu/minor/activity.hh
@@ -50,7 +50,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(Minor, minor);
 namespace minor
 {
 
diff --git a/src/cpu/minor/buffers.hh b/src/cpu/minor/buffers.hh
index 648ec49336..e461a5cdaf 100644
--- a/src/cpu/minor/buffers.hh
+++ b/src/cpu/minor/buffers.hh
@@ -59,7 +59,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(Minor, minor);
 namespace minor
 {
 
diff --git a/src/cpu/minor/cpu.hh b/src/cpu/minor/cpu.hh
index b5b04ae908..acf4295ac9 100644
--- a/src/cpu/minor/cpu.hh
+++ b/src/cpu/minor/cpu.hh
@@ -56,7 +56,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(Minor, minor);
 namespace minor
 {
 
diff --git a/src/cpu/minor/decode.cc b/src/cpu/minor/decode.cc
index 53c02f321d..a4516a0c65 100644
--- a/src/cpu/minor/decode.cc
+++ b/src/cpu/minor/decode.cc
@@ -45,7 +45,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(Minor, minor);
 namespace minor
 {
 
diff --git a/src/cpu/minor/decode.hh b/src/cpu/minor/decode.hh
index 156b92038c..913e03f5c1 100644
--- a/src/cpu/minor/decode.hh
+++ b/src/cpu/minor/decode.hh
@@ -56,7 +56,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(Minor, minor);
 namespace minor
 {
 
diff --git a/src/cpu/minor/dyn_inst.cc b/src/cpu/minor/dyn_inst.cc
index ac8f94835c..68415ecd09 100644
--- a/src/cpu/minor/dyn_inst.cc
+++ b/src/cpu/minor/dyn_inst.cc
@@ -50,7 +50,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(Minor, minor);
 namespace minor
 {
 
diff --git a/src/cpu/minor/dyn_inst.hh b/src/cpu/minor/dyn_inst.hh
index d9a85f9db6..9c6d6fd384 100644
--- a/src/cpu/minor/dyn_inst.hh
+++ b/src/cpu/minor/dyn_inst.hh
@@ -62,7 +62,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(Minor, minor);
 namespace minor
 {
 
diff --git a/src/cpu/minor/exec_context.hh b/src/cpu/minor/exec_context.hh
index f3dc3ba3d3..33641f37a9 100644
--- a/src/cpu/minor/exec_context.hh
+++ b/src/cpu/minor/exec_context.hh
@@ -59,7 +59,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(Minor, minor);
 namespace minor
 {
 
diff --git a/src/cpu/minor/execute.cc b/src/cpu/minor/execute.cc
index 6eccec0be4..5eaaf5804e 100644
--- a/src/cpu/minor/execute.cc
+++ b/src/cpu/minor/execute.cc
@@ -57,7 +57,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(Minor, minor);
 namespace minor
 {
 
diff --git a/src/cpu/minor/execute.hh b/src/cpu/minor/execute.hh
index 8a8c26302f..0a1dde1424 100644
--- a/src/cpu/minor/execute.hh
+++ b/src/cpu/minor/execute.hh
@@ -59,7 +59,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(Minor, minor);
 namespace minor
 {
 
diff --git a/src/cpu/minor/fetch1.cc b/src/cpu/minor/fetch1.cc
index daf8d560b3..dd427b7570 100644
--- a/src/cpu/minor/fetch1.cc
+++ b/src/cpu/minor/fetch1.cc
@@ -54,7 +54,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(Minor, minor);
 namespace minor
 {
 
diff --git a/src/cpu/minor/fetch1.hh b/src/cpu/minor/fetch1.hh
index e33eb0493b..f6a796ce82 100644
--- a/src/cpu/minor/fetch1.hh
+++ b/src/cpu/minor/fetch1.hh
@@ -58,7 +58,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(Minor, minor);
 namespace minor
 {
 
diff --git a/src/cpu/minor/fetch2.cc b/src/cpu/minor/fetch2.cc
index 0ff0140518..b02294bfe6 100644
--- a/src/cpu/minor/fetch2.cc
+++ b/src/cpu/minor/fetch2.cc
@@ -52,7 +52,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(Minor, minor);
 namespace minor
 {
 
diff --git a/src/cpu/minor/fetch2.hh b/src/cpu/minor/fetch2.hh
index 85012bf927..26c3a5ad1f 100644
--- a/src/cpu/minor/fetch2.hh
+++ b/src/cpu/minor/fetch2.hh
@@ -57,7 +57,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(Minor, minor);
 namespace minor
 {
 
diff --git a/src/cpu/minor/func_unit.cc b/src/cpu/minor/func_unit.cc
index 428a8b3508..f508d58333 100644
--- a/src/cpu/minor/func_unit.cc
+++ b/src/cpu/minor/func_unit.cc
@@ -74,7 +74,6 @@ MinorFUTiming::MinorFUTiming(
     opClasses(params.opClasses)
 { }
 
-GEM5_DEPRECATED_NAMESPACE(Minor, minor);
 namespace minor
 {
 
diff --git a/src/cpu/minor/func_unit.hh b/src/cpu/minor/func_unit.hh
index 9400f91790..13ae01957a 100644
--- a/src/cpu/minor/func_unit.hh
+++ b/src/cpu/minor/func_unit.hh
@@ -198,7 +198,6 @@ class MinorFUPool : public SimObject
     { }
 };
 
-GEM5_DEPRECATED_NAMESPACE(Minor, minor);
 namespace minor
 {
 
diff --git a/src/cpu/minor/lsq.cc b/src/cpu/minor/lsq.cc
index f2fa5be115..4b31b26577 100644
--- a/src/cpu/minor/lsq.cc
+++ b/src/cpu/minor/lsq.cc
@@ -53,7 +53,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(Minor, minor);
 namespace minor
 {
 
diff --git a/src/cpu/minor/lsq.hh b/src/cpu/minor/lsq.hh
index 4a95bf75c0..4d7c351e7a 100644
--- a/src/cpu/minor/lsq.hh
+++ b/src/cpu/minor/lsq.hh
@@ -58,7 +58,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(Minor, minor);
 namespace minor
 {
 
diff --git a/src/cpu/minor/pipe_data.cc b/src/cpu/minor/pipe_data.cc
index d7f113cfa3..3bda659de0 100644
--- a/src/cpu/minor/pipe_data.cc
+++ b/src/cpu/minor/pipe_data.cc
@@ -40,7 +40,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(Minor, minor);
 namespace minor
 {
 
diff --git a/src/cpu/minor/pipe_data.hh b/src/cpu/minor/pipe_data.hh
index 97651b4b46..15de50e984 100644
--- a/src/cpu/minor/pipe_data.hh
+++ b/src/cpu/minor/pipe_data.hh
@@ -57,7 +57,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(Minor, minor);
 namespace minor
 {
 
diff --git a/src/cpu/minor/pipeline.cc b/src/cpu/minor/pipeline.cc
index e94181fcd8..c914843ec1 100644
--- a/src/cpu/minor/pipeline.cc
+++ b/src/cpu/minor/pipeline.cc
@@ -51,7 +51,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(Minor, minor);
 namespace minor
 {
 
diff --git a/src/cpu/minor/pipeline.hh b/src/cpu/minor/pipeline.hh
index ce0ae07d3e..b1c85e37dd 100644
--- a/src/cpu/minor/pipeline.hh
+++ b/src/cpu/minor/pipeline.hh
@@ -57,7 +57,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(Minor, minor);
 namespace minor
 {
 
diff --git a/src/cpu/minor/scoreboard.cc b/src/cpu/minor/scoreboard.cc
index 2bb69668a7..356fdc7e01 100644
--- a/src/cpu/minor/scoreboard.cc
+++ b/src/cpu/minor/scoreboard.cc
@@ -44,7 +44,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(Minor, minor);
 namespace minor
 {
 
diff --git a/src/cpu/minor/scoreboard.hh b/src/cpu/minor/scoreboard.hh
index ac11533443..bccb9c5b80 100644
--- a/src/cpu/minor/scoreboard.hh
+++ b/src/cpu/minor/scoreboard.hh
@@ -56,7 +56,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(Minor, minor);
 namespace minor
 {
 
diff --git a/src/cpu/minor/stats.cc b/src/cpu/minor/stats.cc
index 187687d00c..64d4c475e0 100644
--- a/src/cpu/minor/stats.cc
+++ b/src/cpu/minor/stats.cc
@@ -40,7 +40,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(Minor, minor);
 namespace minor
 {
 
diff --git a/src/cpu/minor/stats.hh b/src/cpu/minor/stats.hh
index 47b9f0f30e..1ab81f4407 100644
--- a/src/cpu/minor/stats.hh
+++ b/src/cpu/minor/stats.hh
@@ -51,7 +51,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(Minor, minor);
 namespace minor
 {
 
diff --git a/src/cpu/minor/trace.hh b/src/cpu/minor/trace.hh
index 8a98764ae6..9617d05030 100644
--- a/src/cpu/minor/trace.hh
+++ b/src/cpu/minor/trace.hh
@@ -57,7 +57,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(Minor, minor);
 namespace minor
 {
 

From 5f5aae8940ed7255dfe1b4435ae10a30f2319c7a Mon Sep 17 00:00:00 2001
From: "Daniel R. Carvalho" <odanrc@yahoo.com.br>
Date: Sat, 14 Jan 2023 10:02:16 -0300
Subject: [PATCH 136/492] dev: Remove a couple of deprecated namespaces

These namespaces have gone through the deprecation period
and can now be removed: Sinic, SCMI, Ps2, Regs, Keyboard,
Mouse, TxdOp, iGbReg, CopyEngineReg.

Change-Id: Icfaf458bffca2658650318508c0bb376719cf911
Signed-off-by: Daniel R. Carvalho <odanrc@yahoo.com.br>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67370
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Richard Cooper <richard.cooper@arm.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
---
 src/dev/arm/css/scmi_platform.hh  | 1 -
 src/dev/arm/css/scmi_protocols.hh | 1 -
 src/dev/net/i8254xGBe_defs.hh     | 2 --
 src/dev/net/sinic.cc              | 1 -
 src/dev/net/sinic.hh              | 1 -
 src/dev/net/sinicreg.hh           | 2 --
 src/dev/pci/copy_engine_defs.hh   | 1 -
 src/dev/ps2/types.cc              | 1 -
 src/dev/ps2/types.hh              | 3 ---
 9 files changed, 13 deletions(-)

diff --git a/src/dev/arm/css/scmi_platform.hh b/src/dev/arm/css/scmi_platform.hh
index 581408dde2..92bec89408 100644
--- a/src/dev/arm/css/scmi_platform.hh
+++ b/src/dev/arm/css/scmi_platform.hh
@@ -49,7 +49,6 @@ namespace gem5
 
 class Doorbell;
 
-GEM5_DEPRECATED_NAMESPACE(SCMI, scmi);
 namespace scmi
 {
 
diff --git a/src/dev/arm/css/scmi_protocols.hh b/src/dev/arm/css/scmi_protocols.hh
index 03d6ea4f83..85e157baf2 100644
--- a/src/dev/arm/css/scmi_protocols.hh
+++ b/src/dev/arm/css/scmi_protocols.hh
@@ -46,7 +46,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(SCMI, scmi);
 namespace scmi
 {
 
diff --git a/src/dev/net/i8254xGBe_defs.hh b/src/dev/net/i8254xGBe_defs.hh
index 015ca7dee6..ef013a244f 100644
--- a/src/dev/net/i8254xGBe_defs.hh
+++ b/src/dev/net/i8254xGBe_defs.hh
@@ -35,7 +35,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(iGbReg, igbreg);
 namespace igbreg
 {
 
@@ -239,7 +238,6 @@ struct TxDesc
     uint64_t d2;
 };
 
-GEM5_DEPRECATED_NAMESPACE(TxdOp, txd_op);
 namespace txd_op
 {
 
diff --git a/src/dev/net/sinic.cc b/src/dev/net/sinic.cc
index c1afb284c0..69a42edb88 100644
--- a/src/dev/net/sinic.cc
+++ b/src/dev/net/sinic.cc
@@ -48,7 +48,6 @@ namespace gem5
 
 using namespace networking;
 
-GEM5_DEPRECATED_NAMESPACE(Sinic, sinic);
 namespace sinic
 {
 
diff --git a/src/dev/net/sinic.hh b/src/dev/net/sinic.hh
index 2b0f9fa8cd..adad53b37f 100644
--- a/src/dev/net/sinic.hh
+++ b/src/dev/net/sinic.hh
@@ -45,7 +45,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(Sinic, sinic);
 namespace sinic
 {
 
diff --git a/src/dev/net/sinicreg.hh b/src/dev/net/sinicreg.hh
index 120b9a194f..47588df71a 100644
--- a/src/dev/net/sinicreg.hh
+++ b/src/dev/net/sinicreg.hh
@@ -59,11 +59,9 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(Sinic, sinic);
 namespace sinic
 {
 
-GEM5_DEPRECATED_NAMESPACE(Regs, registers);
 namespace registers
 {
 
diff --git a/src/dev/pci/copy_engine_defs.hh b/src/dev/pci/copy_engine_defs.hh
index 9e687e3324..107edee77b 100644
--- a/src/dev/pci/copy_engine_defs.hh
+++ b/src/dev/pci/copy_engine_defs.hh
@@ -36,7 +36,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(CopyEngineReg, copy_engine_reg);
 namespace copy_engine_reg
 {
 
diff --git a/src/dev/ps2/types.cc b/src/dev/ps2/types.cc
index 99e740e246..00e442e209 100644
--- a/src/dev/ps2/types.cc
+++ b/src/dev/ps2/types.cc
@@ -45,7 +45,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(Ps2, ps2);
 namespace ps2
 {
 
diff --git a/src/dev/ps2/types.hh b/src/dev/ps2/types.hh
index 4ad7b05886..3286c97f57 100644
--- a/src/dev/ps2/types.hh
+++ b/src/dev/ps2/types.hh
@@ -53,7 +53,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(Ps2, ps2);
 namespace ps2
 {
 
@@ -70,7 +69,6 @@ enum
     Reset              = 0xFF,
 };
 
-GEM5_DEPRECATED_NAMESPACE(Keyboard, keyboard);
 namespace keyboard
 {
 
@@ -93,7 +91,6 @@ extern const std::vector<uint8_t> ID;
 
 } // namespace keyboard
 
-GEM5_DEPRECATED_NAMESPACE(Mouse, mouse);
 namespace mouse
 {
 

From c1c79615e0eeaaa5d5b2c4afd4444679d47d6ffc Mon Sep 17 00:00:00 2001
From: "Daniel R. Carvalho" <odanrc@yahoo.com.br>
Date: Sat, 14 Jan 2023 10:07:08 -0300
Subject: [PATCH 137/492] sim: Remove the ProbePoints namespace

This namespace has gone through the deprecation period
and can now be removed.

Change-Id: Iddf30ea24a579cf5a94d6217c1d015a0c68d68d0
Signed-off-by: Daniel R. Carvalho <odanrc@yahoo.com.br>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67371
Reviewed-by: Richard Cooper <richard.cooper@arm.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
---
 src/sim/probe/mem.hh   | 1 -
 src/sim/probe/pmu.hh   | 1 -
 src/sim/probe/probe.hh | 1 -
 3 files changed, 3 deletions(-)

diff --git a/src/sim/probe/mem.hh b/src/sim/probe/mem.hh
index df3280cfc9..0496de9b23 100644
--- a/src/sim/probe/mem.hh
+++ b/src/sim/probe/mem.hh
@@ -46,7 +46,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(ProbePoints, probing);
 namespace probing
 {
 
diff --git a/src/sim/probe/pmu.hh b/src/sim/probe/pmu.hh
index acf47501e2..b589ce7d4b 100644
--- a/src/sim/probe/pmu.hh
+++ b/src/sim/probe/pmu.hh
@@ -45,7 +45,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(ProbePoints, probing);
 namespace probing
 {
 
diff --git a/src/sim/probe/probe.hh b/src/sim/probe/probe.hh
index dede7adeb5..3dd428effd 100644
--- a/src/sim/probe/probe.hh
+++ b/src/sim/probe/probe.hh
@@ -86,7 +86,6 @@ struct ProbeListenerObjectParams;
  * common instrumentation interface for devices such as PMUs that have
  * different implementations in different ISAs.
  */
-GEM5_DEPRECATED_NAMESPACE(ProbePoints, probing);
 namespace probing
 {
 /* Note: This is only here for documentation purposes, new probe

From c8e3708d8993d091a8fdec9f001bafc1c80c0fbe Mon Sep 17 00:00:00 2001
From: "Daniel R. Carvalho" <odanrc@yahoo.com.br>
Date: Sat, 14 Jan 2023 10:11:21 -0300
Subject: [PATCH 138/492] sim: Remove the Enums namespace

This namespace has gone through the deprecation period
and can now be removed.

Change-Id: If4daad57a421b076ae6661812c2255c7f06f30b9
Signed-off-by: Daniel R. Carvalho <odanrc@yahoo.com.br>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67372
Reviewed-by: Richard Cooper <richard.cooper@arm.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 build_tools/enum_cc.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/build_tools/enum_cc.py b/build_tools/enum_cc.py
index cd192c56fb..5d82b401b2 100644
--- a/build_tools/enum_cc.py
+++ b/build_tools/enum_cc.py
@@ -97,8 +97,7 @@ const char *${name}Strings[static_cast<int>(${name}::Num_${name})] =
         )
     else:
         code(
-            """GEM5_DEPRECATED_NAMESPACE(Enums, enums);
-namespace enums
+            """namespace enums
 {"""
         )
         code.indent(1)

From 31a1d485afcda352b1a9fc4f63a79d38399e22be Mon Sep 17 00:00:00 2001
From: "Daniel R. Carvalho" <odanrc@yahoo.com.br>
Date: Sat, 14 Jan 2023 10:07:43 -0300
Subject: [PATCH 139/492] sim: Remove a couple of deprecated namespaces

These namespaces have gone through the deprecation period
and can now be removed: Int, Float, SimClock, PseudoInst

Change-Id: Iec8e0fff021d8d7696e466e2ad52f2d51305d811
Signed-off-by: Daniel R. Carvalho <odanrc@yahoo.com.br>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67373
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Richard Cooper <richard.cooper@arm.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
---
 src/arch/x86/bios/intelmp.hh | 1 -
 src/arch/x86/fs_workload.hh  | 1 -
 src/sim/core.cc              | 3 ---
 src/sim/core.hh              | 3 ---
 src/sim/pseudo_inst.cc       | 1 -
 src/sim/pseudo_inst.hh       | 1 -
 6 files changed, 10 deletions(-)

diff --git a/src/arch/x86/bios/intelmp.hh b/src/arch/x86/bios/intelmp.hh
index 19f2f7a9be..207b4ab61f 100644
--- a/src/arch/x86/bios/intelmp.hh
+++ b/src/arch/x86/bios/intelmp.hh
@@ -84,7 +84,6 @@ uint8_t writeOutString(PortProxy& proxy, Addr addr, std::string str,
 namespace X86ISA
 {
 
-GEM5_DEPRECATED_NAMESPACE(IntelMP, intelmp);
 namespace intelmp
 {
 
diff --git a/src/arch/x86/fs_workload.hh b/src/arch/x86/fs_workload.hh
index b40b69b3c4..5c1187cda4 100644
--- a/src/arch/x86/fs_workload.hh
+++ b/src/arch/x86/fs_workload.hh
@@ -63,7 +63,6 @@ class SMBiosTable;
 
 } // namespace smbios
 
-GEM5_DEPRECATED_NAMESPACE(IntelMP, intelmp);
 namespace intelmp
 {
 
diff --git a/src/sim/core.cc b/src/sim/core.cc
index c388652fa6..d836b550be 100644
--- a/src/sim/core.cc
+++ b/src/sim/core.cc
@@ -41,13 +41,11 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(SimClock, sim_clock);
 namespace sim_clock
 {
 /// The simulated frequency of curTick(). (In ticks per second)
 Tick Frequency;
 
-GEM5_DEPRECATED_NAMESPACE(Float, as_float);
 namespace as_float
 {
 double s;
@@ -62,7 +60,6 @@ double MHz;
 double GHz;
 } // namespace as_float
 
-GEM5_DEPRECATED_NAMESPACE(Int, as_int);
 namespace as_int
 {
 Tick s;
diff --git a/src/sim/core.hh b/src/sim/core.hh
index bd432c2d21..bac4e40003 100644
--- a/src/sim/core.hh
+++ b/src/sim/core.hh
@@ -46,12 +46,10 @@ namespace gem5
 
 /// These are variables that are set based on the simulator frequency
 ///@{
-GEM5_DEPRECATED_NAMESPACE(SimClock, sim_clock);
 namespace sim_clock
 {
 extern Tick Frequency; ///< The number of ticks that equal one second
 
-GEM5_DEPRECATED_NAMESPACE(Float, as_float);
 namespace as_float
 {
 
@@ -81,7 +79,6 @@ extern double GHz; ///< GHz
  *
  * @{
  */
-GEM5_DEPRECATED_NAMESPACE(Int, as_int);
 namespace as_int
 {
 extern Tick s;  ///< second
diff --git a/src/sim/pseudo_inst.cc b/src/sim/pseudo_inst.cc
index 28b5619a16..55e44c7adc 100644
--- a/src/sim/pseudo_inst.cc
+++ b/src/sim/pseudo_inst.cc
@@ -76,7 +76,6 @@ namespace gem5
 
 using namespace statistics;
 
-GEM5_DEPRECATED_NAMESPACE(PseudoInst, pseudo_inst);
 namespace pseudo_inst
 {
 
diff --git a/src/sim/pseudo_inst.hh b/src/sim/pseudo_inst.hh
index 4794a41ffe..ba15370c55 100644
--- a/src/sim/pseudo_inst.hh
+++ b/src/sim/pseudo_inst.hh
@@ -55,7 +55,6 @@
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(PseudoInst, pseudo_inst);
 namespace pseudo_inst
 {
 

From 39bbd9c05e5d634027ff936732b5e2ba87f7538f Mon Sep 17 00:00:00 2001
From: "Daniel R. Carvalho" <odanrc@yahoo.com.br>
Date: Sat, 14 Jan 2023 10:13:31 -0300
Subject: [PATCH 140/492] sim,arch: Remove the GuestABI namespace

This namespace has gone through the deprecation period
and can now be removed.

Change-Id: I476815491314f4222da43da75c91654b4f3d1228
Signed-off-by: Daniel R. Carvalho <odanrc@yahoo.com.br>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67374
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Reviewed-by: Richard Cooper <richard.cooper@arm.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/arch/arm/aapcs32.hh             | 2 --
 src/arch/arm/aapcs64.hh             | 1 -
 src/arch/arm/freebsd/se_workload.hh | 1 -
 src/arch/arm/linux/se_workload.hh   | 1 -
 src/arch/arm/reg_abi.hh             | 1 -
 src/arch/arm/semihosting.cc         | 1 -
 src/arch/arm/semihosting.hh         | 1 -
 src/arch/mips/se_workload.hh        | 1 -
 src/arch/power/se_workload.hh       | 1 -
 src/arch/riscv/se_workload.hh       | 1 -
 src/arch/sparc/pseudo_inst_abi.hh   | 1 -
 src/arch/sparc/se_workload.hh       | 1 -
 src/arch/x86/linux/linux.hh         | 1 -
 src/arch/x86/linux/se_workload.hh   | 1 -
 src/arch/x86/pseudo_inst_abi.hh     | 1 -
 src/sim/guest_abi.test.cc           | 1 -
 src/sim/guest_abi/definition.hh     | 1 -
 src/sim/guest_abi/dispatch.hh       | 1 -
 src/sim/guest_abi/layout.hh         | 1 -
 src/sim/guest_abi/varargs.hh        | 1 -
 src/sim/proxy_ptr.hh                | 1 -
 src/sim/proxy_ptr.test.cc           | 1 -
 src/sim/syscall_abi.hh              | 1 -
 23 files changed, 24 deletions(-)

diff --git a/src/arch/arm/aapcs32.hh b/src/arch/arm/aapcs32.hh
index 383b8eb36b..1d727e2634 100644
--- a/src/arch/arm/aapcs32.hh
+++ b/src/arch/arm/aapcs32.hh
@@ -70,7 +70,6 @@ struct Aapcs32
     };
 };
 
-GEM5_DEPRECATED_NAMESPACE(GuestABI, guest_abi);
 namespace guest_abi
 {
 
@@ -446,7 +445,6 @@ struct Aapcs32Vfp : public Aapcs32
     };
 };
 
-GEM5_DEPRECATED_NAMESPACE(GuestABI, guest_abi);
 namespace guest_abi
 {
 
diff --git a/src/arch/arm/aapcs64.hh b/src/arch/arm/aapcs64.hh
index 2f53822a70..62926d34b2 100644
--- a/src/arch/arm/aapcs64.hh
+++ b/src/arch/arm/aapcs64.hh
@@ -67,7 +67,6 @@ struct Aapcs64
     };
 };
 
-GEM5_DEPRECATED_NAMESPACE(GuestABI, guest_abi);
 namespace guest_abi
 {
 
diff --git a/src/arch/arm/freebsd/se_workload.hh b/src/arch/arm/freebsd/se_workload.hh
index b944dbd8f9..47e41f2590 100644
--- a/src/arch/arm/freebsd/se_workload.hh
+++ b/src/arch/arm/freebsd/se_workload.hh
@@ -70,7 +70,6 @@ class EmuFreebsd : public SEWorkload
 
 } // namespace ArmISA
 
-GEM5_DEPRECATED_NAMESPACE(GuestABI, guest_abi);
 namespace guest_abi
 {
 
diff --git a/src/arch/arm/linux/se_workload.hh b/src/arch/arm/linux/se_workload.hh
index 0939af1dda..29bd30a6bf 100644
--- a/src/arch/arm/linux/se_workload.hh
+++ b/src/arch/arm/linux/se_workload.hh
@@ -62,7 +62,6 @@ class EmuLinux : public SEWorkload
 
 } // namespace ArmISA
 
-GEM5_DEPRECATED_NAMESPACE(GuestABI, guest_abi);
 namespace guest_abi
 {
 
diff --git a/src/arch/arm/reg_abi.hh b/src/arch/arm/reg_abi.hh
index 1d5272c66d..e892166c5e 100644
--- a/src/arch/arm/reg_abi.hh
+++ b/src/arch/arm/reg_abi.hh
@@ -51,7 +51,6 @@ struct RegABI64 : public GenericSyscallABI64
 
 } // namespace ArmISA
 
-GEM5_DEPRECATED_NAMESPACE(GuestABI, guest_abi);
 namespace guest_abi
 {
 
diff --git a/src/arch/arm/semihosting.cc b/src/arch/arm/semihosting.cc
index 8efe841623..4ce52e8741 100644
--- a/src/arch/arm/semihosting.cc
+++ b/src/arch/arm/semihosting.cc
@@ -714,7 +714,6 @@ struct SemiPseudoAbi64 : public ArmSemihosting::Abi64
     };
 };
 
-GEM5_DEPRECATED_NAMESPACE(GuestABI, guest_abi);
 namespace guest_abi
 {
 
diff --git a/src/arch/arm/semihosting.hh b/src/arch/arm/semihosting.hh
index fe7819cae9..557eb76636 100644
--- a/src/arch/arm/semihosting.hh
+++ b/src/arch/arm/semihosting.hh
@@ -599,7 +599,6 @@ class ArmSemihosting : public SimObject
 std::ostream &operator << (
         std::ostream &os, const ArmSemihosting::InPlaceArg &ipa);
 
-GEM5_DEPRECATED_NAMESPACE(GuestABI, guest_abi);
 namespace guest_abi
 {
 
diff --git a/src/arch/mips/se_workload.hh b/src/arch/mips/se_workload.hh
index dc6f1dd5e3..18c0bda9c1 100644
--- a/src/arch/mips/se_workload.hh
+++ b/src/arch/mips/se_workload.hh
@@ -68,7 +68,6 @@ class SEWorkload : public gem5::SEWorkload
 
 } // namespace MipsISA
 
-GEM5_DEPRECATED_NAMESPACE(GuestABI, guest_abi);
 namespace guest_abi
 {
 
diff --git a/src/arch/power/se_workload.hh b/src/arch/power/se_workload.hh
index d041c45728..3c2bb936f4 100644
--- a/src/arch/power/se_workload.hh
+++ b/src/arch/power/se_workload.hh
@@ -68,7 +68,6 @@ class SEWorkload : public gem5::SEWorkload
 
 } // namespace PowerISA
 
-GEM5_DEPRECATED_NAMESPACE(GuestABI, guest_abi);
 namespace guest_abi
 {
 
diff --git a/src/arch/riscv/se_workload.hh b/src/arch/riscv/se_workload.hh
index 6f7c2edb70..9ae3be4c05 100644
--- a/src/arch/riscv/se_workload.hh
+++ b/src/arch/riscv/se_workload.hh
@@ -66,7 +66,6 @@ class SEWorkload : public gem5::SEWorkload
 
 } // namespace RiscvISA
 
-GEM5_DEPRECATED_NAMESPACE(GuestABI, guest_abi);
 namespace guest_abi
 {
 
diff --git a/src/arch/sparc/pseudo_inst_abi.hh b/src/arch/sparc/pseudo_inst_abi.hh
index 993e11bff3..989f0e7dfc 100644
--- a/src/arch/sparc/pseudo_inst_abi.hh
+++ b/src/arch/sparc/pseudo_inst_abi.hh
@@ -40,7 +40,6 @@ struct SparcPseudoInstABI
     using State = int;
 };
 
-GEM5_DEPRECATED_NAMESPACE(GuestABI, guest_abi);
 namespace guest_abi
 {
 
diff --git a/src/arch/sparc/se_workload.hh b/src/arch/sparc/se_workload.hh
index 8cb373ac90..e0f7467fae 100644
--- a/src/arch/sparc/se_workload.hh
+++ b/src/arch/sparc/se_workload.hh
@@ -80,7 +80,6 @@ class SEWorkload : public gem5::SEWorkload
 
 } // namespace SparcISA
 
-GEM5_DEPRECATED_NAMESPACE(GuestABI, guest_abi);
 namespace guest_abi
 {
 
diff --git a/src/arch/x86/linux/linux.hh b/src/arch/x86/linux/linux.hh
index 0c34d09330..b9598224a6 100644
--- a/src/arch/x86/linux/linux.hh
+++ b/src/arch/x86/linux/linux.hh
@@ -77,7 +77,6 @@ class X86Linux : public Linux
     class SyscallABI {};
 };
 
-GEM5_DEPRECATED_NAMESPACE(GuestABI, guest_abi);
 namespace guest_abi
 {
 
diff --git a/src/arch/x86/linux/se_workload.hh b/src/arch/x86/linux/se_workload.hh
index f170776d82..d8d60e759f 100644
--- a/src/arch/x86/linux/se_workload.hh
+++ b/src/arch/x86/linux/se_workload.hh
@@ -96,7 +96,6 @@ class EmuLinux : public SEWorkload
 
 } // namespace X86ISA
 
-GEM5_DEPRECATED_NAMESPACE(GuestABI, guest_abi);
 namespace guest_abi
 {
 
diff --git a/src/arch/x86/pseudo_inst_abi.hh b/src/arch/x86/pseudo_inst_abi.hh
index 05bf66f5f2..e465c7abd5 100644
--- a/src/arch/x86/pseudo_inst_abi.hh
+++ b/src/arch/x86/pseudo_inst_abi.hh
@@ -46,7 +46,6 @@ struct X86PseudoInstABI
     using State = int;
 };
 
-GEM5_DEPRECATED_NAMESPACE(GuestABI, guest_abi);
 namespace guest_abi
 {
 
diff --git a/src/sim/guest_abi.test.cc b/src/sim/guest_abi.test.cc
index 5b59874c3e..64f4122a51 100644
--- a/src/sim/guest_abi.test.cc
+++ b/src/sim/guest_abi.test.cc
@@ -98,7 +98,6 @@ struct TestABI_TcInit
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(GuestABI, guest_abi);
 namespace guest_abi
 {
 
diff --git a/src/sim/guest_abi/definition.hh b/src/sim/guest_abi/definition.hh
index 2857b5b616..43aafecdc8 100644
--- a/src/sim/guest_abi/definition.hh
+++ b/src/sim/guest_abi/definition.hh
@@ -35,7 +35,6 @@ namespace gem5
 
 class ThreadContext;
 
-GEM5_DEPRECATED_NAMESPACE(GuestABI, guest_abi);
 namespace guest_abi
 {
 
diff --git a/src/sim/guest_abi/dispatch.hh b/src/sim/guest_abi/dispatch.hh
index 7ada2e2ac4..8846354e06 100644
--- a/src/sim/guest_abi/dispatch.hh
+++ b/src/sim/guest_abi/dispatch.hh
@@ -43,7 +43,6 @@ namespace gem5
 
 class ThreadContext;
 
-GEM5_DEPRECATED_NAMESPACE(GuestABI, guest_abi);
 namespace guest_abi
 {
 
diff --git a/src/sim/guest_abi/layout.hh b/src/sim/guest_abi/layout.hh
index 02681510d0..4d469b177c 100644
--- a/src/sim/guest_abi/layout.hh
+++ b/src/sim/guest_abi/layout.hh
@@ -38,7 +38,6 @@ namespace gem5
 
 class ThreadContext;
 
-GEM5_DEPRECATED_NAMESPACE(GuestABI, guest_abi);
 namespace guest_abi
 {
 
diff --git a/src/sim/guest_abi/varargs.hh b/src/sim/guest_abi/varargs.hh
index 9bb04786ff..1a34f2086c 100644
--- a/src/sim/guest_abi/varargs.hh
+++ b/src/sim/guest_abi/varargs.hh
@@ -39,7 +39,6 @@ namespace gem5
 
 class ThreadContext;
 
-GEM5_DEPRECATED_NAMESPACE(GuestABI, guest_abi);
 namespace guest_abi
 {
 
diff --git a/src/sim/proxy_ptr.hh b/src/sim/proxy_ptr.hh
index 03ab9472c9..5e766f7cc9 100644
--- a/src/sim/proxy_ptr.hh
+++ b/src/sim/proxy_ptr.hh
@@ -357,7 +357,6 @@ operator + (A a, const ProxyPtr<T, Proxy> &other)
     return other + a;
 }
 
-GEM5_DEPRECATED_NAMESPACE(GuestABI, guest_abi);
 namespace guest_abi
 {
 
diff --git a/src/sim/proxy_ptr.test.cc b/src/sim/proxy_ptr.test.cc
index 6f49d166e2..57fdb973bb 100644
--- a/src/sim/proxy_ptr.test.cc
+++ b/src/sim/proxy_ptr.test.cc
@@ -474,7 +474,6 @@ struct TestABI
 namespace gem5
 {
 
-GEM5_DEPRECATED_NAMESPACE(GuestABI, guest_abi);
 namespace guest_abi
 {
 
diff --git a/src/sim/syscall_abi.hh b/src/sim/syscall_abi.hh
index b91dc8ea95..90dbd9747b 100644
--- a/src/sim/syscall_abi.hh
+++ b/src/sim/syscall_abi.hh
@@ -75,7 +75,6 @@ struct GenericSyscallABI32 : public GenericSyscallABI
     }
 };
 
-GEM5_DEPRECATED_NAMESPACE(GuestABI, guest_abi);
 namespace guest_abi
 {
 

From 41b5276c1cedae275e189e3404818d105107aa0b Mon Sep 17 00:00:00 2001
From: Sascha Bischoff <sascha.bischoff@arm.com>
Date: Tue, 9 Aug 2022 10:47:28 +0100
Subject: [PATCH 141/492] cpu-o3: Remove obsolete getRegIds and getTrueId

These have been obsolete since
https://gem5-review.googlesource.com/c/public/gem5/+/49147, hence
removing.

Change-Id: I06f6c3058f652907d996b9e6267888e2d991622a
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/64332
Maintainer: Giacomo Travaglini <giacomo.travaglini@arm.com>
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Reviewed-by: Giacomo Travaglini <giacomo.travaglini@arm.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/cpu/o3/regfile.cc | 42 ------------------------------------------
 src/cpu/o3/regfile.hh | 14 --------------
 2 files changed, 56 deletions(-)

diff --git a/src/cpu/o3/regfile.cc b/src/cpu/o3/regfile.cc
index fecb891ca6..dcb8f704f0 100644
--- a/src/cpu/o3/regfile.cc
+++ b/src/cpu/o3/regfile.cc
@@ -175,47 +175,5 @@ PhysRegFile::initFreeList(UnifiedFreeList *freeList)
     freeList->addRegs(ccRegIds.begin(), ccRegIds.end());
 }
 
-PhysRegFile::IdRange
-PhysRegFile::getRegIds(RegClassType cls)
-{
-    switch (cls)
-    {
-      case IntRegClass:
-        return std::make_pair(intRegIds.begin(), intRegIds.end());
-      case FloatRegClass:
-        return std::make_pair(floatRegIds.begin(), floatRegIds.end());
-      case VecRegClass:
-        return std::make_pair(vecRegIds.begin(), vecRegIds.end());
-      case VecElemClass:
-        return std::make_pair(vecElemIds.begin(), vecElemIds.end());
-      case VecPredRegClass:
-        return std::make_pair(vecPredRegIds.begin(), vecPredRegIds.end());
-      case CCRegClass:
-        return std::make_pair(ccRegIds.begin(), ccRegIds.end());
-      case MiscRegClass:
-        return std::make_pair(miscRegIds.begin(), miscRegIds.end());
-      case InvalidRegClass:
-        panic("Tried to get register IDs for the invalid class.");
-    }
-    /* There is no way to make an empty iterator */
-    return std::make_pair(PhysIds::iterator(),
-                          PhysIds::iterator());
-}
-
-PhysRegIdPtr
-PhysRegFile::getTrueId(PhysRegIdPtr reg)
-{
-    switch (reg->classValue()) {
-    case VecRegClass:
-        return &vecRegIds[reg->index()];
-    case VecElemClass:
-        return &vecElemIds[reg->index()];
-    default:
-        panic_if(!reg->is(VecElemClass),
-            "Trying to get the register of a %s register", reg->className());
-    }
-    return nullptr;
-}
-
 } // namespace o3
 } // namespace gem5
diff --git a/src/cpu/o3/regfile.hh b/src/cpu/o3/regfile.hh
index 3ddf1a2a79..0130c55625 100644
--- a/src/cpu/o3/regfile.hh
+++ b/src/cpu/o3/regfile.hh
@@ -309,20 +309,6 @@ class PhysRegFile
             panic("Unrecognized register class type %d.", type);
         }
     }
-
-    /**
-     * Get the PhysRegIds of the elems of all vector registers.
-     * Auxiliary function to transition from Full vector mode to Elem mode
-     * and to initialise the rename map.
-     */
-    IdRange getRegIds(RegClassType cls);
-
-    /**
-     * Get the true physical register id.
-     * As many parts work with PhysRegIdPtr, we need to be able to produce
-     * the pointer out of just class and register idx.
-     */
-    PhysRegIdPtr getTrueId(PhysRegIdPtr reg);
 };
 
 } // namespace o3

From befa5baa78bce145e47ae4ef6a9e1b4da6e46978 Mon Sep 17 00:00:00 2001
From: Sascha Bischoff <sascha.bischoff@arm.com>
Date: Wed, 31 Aug 2022 10:37:02 +0100
Subject: [PATCH 142/492] cpu-o3: print VecPredReg not VecReg

Fix a DPRINTF to print the VecPredReg instead of the VecReg.

Change-Id: Iaba255b6b9a98826ddcd67eb83b4169e1bf5056e
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/64342
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Reviewed-by: Giacomo Travaglini <giacomo.travaglini@arm.com>
Maintainer: Giacomo Travaglini <giacomo.travaglini@arm.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/cpu/o3/regfile.hh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/cpu/o3/regfile.hh b/src/cpu/o3/regfile.hh
index 0130c55625..4fea589ad7 100644
--- a/src/cpu/o3/regfile.hh
+++ b/src/cpu/o3/regfile.hh
@@ -299,7 +299,7 @@ class PhysRegFile
             break;
           case VecPredRegClass:
             DPRINTF(IEW, "RegFile: Setting predicate register %i to %s\n",
-                    idx, vectorRegFile.regClass.valString(val));
+                    idx, vecPredRegFile.regClass.valString(val));
             vecPredRegFile.set(idx, val);
             break;
           case CCRegClass:

From dd6595bf565b95fbb74e7d438030f48392becc20 Mon Sep 17 00:00:00 2001
From: Sascha Bischoff <sascha.bischoff@arm.com>
Date: Wed, 3 Aug 2022 17:11:30 +0100
Subject: [PATCH 143/492] mem-cache: masked writes are not whole-line writes

We now explicitly check in both the cache and the MSHRs if writes are
masked or not before promoting to a whole-line write. Failure to do
this previously was resulting in data loss when dirty data was present
in lower level caches and a coincidentally aligned and
cache-line-sized masked write occured.

Change-Id: I9434590d8b22e4d993167d789eb9d15a2e866bf1
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/64340
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/mem/cache/mshr.cc | 3 ++-
 src/mem/packet.hh     | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/mem/cache/mshr.cc b/src/mem/cache/mshr.cc
index 871125a17e..b7e9357029 100644
--- a/src/mem/cache/mshr.cc
+++ b/src/mem/cache/mshr.cc
@@ -140,6 +140,7 @@ MSHR::TargetList::updateWriteFlags(PacketPtr pkt)
             Request::MEM_SWAP_COND | Request::SECURE | Request::LOCKED_RMW;
         const auto &req_flags = pkt->req->getFlags();
         bool compat_write = !req_flags.isSet(no_merge_flags);
+        bool masked_write = pkt->isMaskedWrite();
 
         // if this is the first write, it might be a whole
         // line write and even if we can't merge any
@@ -147,7 +148,7 @@ MSHR::TargetList::updateWriteFlags(PacketPtr pkt)
         // it as a whole line write (e.g., SECURE whole line
         // write)
         bool first_write = empty();
-        if (first_write || compat_write) {
+        if (!masked_write && (first_write || compat_write)) {
             auto offset = pkt->getOffset(blkSize);
             auto begin = writesBitmap.begin() + offset;
             std::fill(begin, begin + pkt->getSize(), true);
diff --git a/src/mem/packet.hh b/src/mem/packet.hh
index a80b918798..9d720fb9a0 100644
--- a/src/mem/packet.hh
+++ b/src/mem/packet.hh
@@ -625,7 +625,8 @@ class Packet : public Printable
     bool isWholeLineWrite(unsigned blk_size)
     {
         return (cmd == MemCmd::WriteReq || cmd == MemCmd::WriteLineReq) &&
-            getOffset(blk_size) == 0 && getSize() == blk_size;
+            getOffset(blk_size) == 0 && getSize() == blk_size &&
+            !isMaskedWrite();
     }
 
     //@{

From fed81f34084ad46fc663ec236ce0e700881cc3c2 Mon Sep 17 00:00:00 2001
From: Sascha Bischoff <sascha.bischoff@arm.com>
Date: Tue, 9 Aug 2022 09:37:47 +0100
Subject: [PATCH 144/492] arch,cpu: Add boilerplate support for matrix
 registers

We add initial support for matrix registers to the CPU models and add
stubs in each architecture. There are no implementations of matrix
registers added, but this provides the basic support for using them in
the future.

Jira Issue: https://gem5.atlassian.net/browse/GEM5-1289

Change-Id: I2ca6a21da932a58a801a0d08f0ad0cdca4968d02
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/64333
Maintainer: Giacomo Travaglini <giacomo.travaglini@arm.com>
Reviewed-by: Giacomo Travaglini <giacomo.travaglini@arm.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/arch/SConscript            |  3 ++-
 src/arch/arm/isa.cc            |  3 +++
 src/arch/mips/isa.cc           |  3 +++
 src/arch/power/isa.cc          |  3 +++
 src/arch/riscv/isa.cc          |  3 +++
 src/arch/sparc/isa.cc          |  3 +++
 src/arch/x86/isa.cc            |  3 +++
 src/cpu/StaticInstFlags.py     |  1 +
 src/cpu/minor/scoreboard.cc    |  4 ++++
 src/cpu/minor/scoreboard.hh    |  5 ++++-
 src/cpu/o3/BaseO3CPU.py        |  1 +
 src/cpu/o3/cpu.cc              |  3 +++
 src/cpu/o3/inst_queue.cc       |  1 +
 src/cpu/o3/regfile.cc          | 18 ++++++++++++++++++
 src/cpu/o3/regfile.hh          | 22 ++++++++++++++++++++++
 src/cpu/o3/rename.cc           |  9 ++++++++-
 src/cpu/o3/rename.hh           |  1 +
 src/cpu/reg_class.hh           |  2 ++
 src/cpu/simple/base.cc         |  6 ++++++
 src/cpu/simple/exec_context.hh | 16 ++++++++++++++++
 src/cpu/simple_thread.cc       |  1 +
 src/cpu/simple_thread.hh       |  1 +
 src/cpu/static_inst.hh         |  1 +
 src/cpu/thread_context.cc      | 14 ++++++++++++++
 24 files changed, 124 insertions(+), 3 deletions(-)

diff --git a/src/arch/SConscript b/src/arch/SConscript
index 90d7ad7700..7285c0ec59 100644
--- a/src/arch/SConscript
+++ b/src/arch/SConscript
@@ -231,10 +231,11 @@ DebugFlag('IntRegs')
 DebugFlag('FloatRegs')
 DebugFlag('VecRegs')
 DebugFlag('VecPredRegs')
+DebugFlag('MatRegs')
 DebugFlag('CCRegs')
 DebugFlag('MiscRegs')
 CompoundFlag('Registers', [ 'IntRegs', 'FloatRegs', 'VecRegs', 'VecPredRegs',
-                            'CCRegs', 'MiscRegs' ])
+                            'MatRegs', 'CCRegs', 'MiscRegs' ])
 
 DebugFlag('Decoder', "Decoder debug output")
 DebugFlag('Faults', "Information about faults, exceptions, interrupts, etc")
diff --git a/src/arch/arm/isa.cc b/src/arch/arm/isa.cc
index 543e0eba7b..617f144bae 100644
--- a/src/arch/arm/isa.cc
+++ b/src/arch/arm/isa.cc
@@ -54,6 +54,7 @@
 #include "cpu/reg_class.hh"
 #include "debug/Arm.hh"
 #include "debug/LLSC.hh"
+#include "debug/MatRegs.hh"
 #include "debug/VecPredRegs.hh"
 #include "debug/VecRegs.hh"
 #include "dev/arm/generic_timer.hh"
@@ -75,6 +76,7 @@ namespace
 
 /* Not applicable to ARM */
 RegClass floatRegClass(FloatRegClass, FloatRegClassName, 0, debug::FloatRegs);
+RegClass matRegClass(MatRegClass, MatRegClassName, 1, debug::MatRegs);
 
 } // anonymous namespace
 
@@ -86,6 +88,7 @@ ISA::ISA(const Params &p) : BaseISA(p), system(NULL),
     _regClasses.push_back(&vecRegClass);
     _regClasses.push_back(&vecElemClass);
     _regClasses.push_back(&vecPredRegClass);
+    _regClasses.push_back(&matRegClass);
     _regClasses.push_back(&ccRegClass);
     _regClasses.push_back(&miscRegClass);
 
diff --git a/src/arch/mips/isa.cc b/src/arch/mips/isa.cc
index 6f39a81244..92799ab291 100644
--- a/src/arch/mips/isa.cc
+++ b/src/arch/mips/isa.cc
@@ -38,6 +38,7 @@
 #include "cpu/base.hh"
 #include "cpu/reg_class.hh"
 #include "cpu/thread_context.hh"
+#include "debug/MatRegs.hh"
 #include "debug/MipsPRA.hh"
 #include "params/MipsISA.hh"
 
@@ -104,6 +105,7 @@ constexpr RegClass vecElemClass(VecElemClass, VecElemClassName, 2,
         debug::IntRegs);
 constexpr RegClass vecPredRegClass(VecPredRegClass, VecPredRegClassName, 1,
         debug::IntRegs);
+constexpr RegClass matRegClass(MatRegClass, MatRegClassName, 1, debug::MatRegs);
 constexpr RegClass ccRegClass(CCRegClass, CCRegClassName, 0, debug::IntRegs);
 
 } // anonymous namespace
@@ -116,6 +118,7 @@ ISA::ISA(const Params &p) : BaseISA(p), numThreads(p.num_threads),
     _regClasses.push_back(&vecRegClass);
     _regClasses.push_back(&vecElemClass);
     _regClasses.push_back(&vecPredRegClass);
+    _regClasses.push_back(&matRegClass);
     _regClasses.push_back(&ccRegClass);
     _regClasses.push_back(&miscRegClass);
 
diff --git a/src/arch/power/isa.cc b/src/arch/power/isa.cc
index 80c984cfc4..ecaebade9a 100644
--- a/src/arch/power/isa.cc
+++ b/src/arch/power/isa.cc
@@ -41,6 +41,7 @@
 #include "arch/power/regs/int.hh"
 #include "arch/power/regs/misc.hh"
 #include "cpu/thread_context.hh"
+#include "debug/MatRegs.hh"
 #include "params/PowerISA.hh"
 
 namespace gem5
@@ -56,6 +57,7 @@ RegClass vecRegClass(VecRegClass, VecRegClassName, 1, debug::IntRegs);
 RegClass vecElemClass(VecElemClass, VecElemClassName, 2, debug::IntRegs);
 RegClass vecPredRegClass(VecPredRegClass, VecPredRegClassName, 1,
         debug::IntRegs);
+RegClass matRegClass(MatRegClass, MatRegClassName, 1, debug::MatRegs);
 RegClass ccRegClass(CCRegClass, CCRegClassName, 0, debug::IntRegs);
 
 } // anonymous namespace
@@ -67,6 +69,7 @@ ISA::ISA(const Params &p) : BaseISA(p)
     _regClasses.push_back(&vecRegClass);
     _regClasses.push_back(&vecElemClass);
     _regClasses.push_back(&vecPredRegClass);
+    _regClasses.push_back(&matRegClass);
     _regClasses.push_back(&ccRegClass);
     _regClasses.push_back(&miscRegClass);
     clear();
diff --git a/src/arch/riscv/isa.cc b/src/arch/riscv/isa.cc
index 3b4f378afa..6e4c380d98 100644
--- a/src/arch/riscv/isa.cc
+++ b/src/arch/riscv/isa.cc
@@ -48,6 +48,7 @@
 #include "cpu/base.hh"
 #include "debug/Checkpoint.hh"
 #include "debug/LLSC.hh"
+#include "debug/MatRegs.hh"
 #include "debug/RiscvMisc.hh"
 #include "mem/packet.hh"
 #include "mem/request.hh"
@@ -235,6 +236,7 @@ RegClass vecRegClass(VecRegClass, VecRegClassName, 1, debug::IntRegs);
 RegClass vecElemClass(VecElemClass, VecElemClassName, 2, debug::IntRegs);
 RegClass vecPredRegClass(VecPredRegClass, VecPredRegClassName, 1,
         debug::IntRegs);
+RegClass matRegClass(MatRegClass, MatRegClassName, 1, debug::MatRegs);
 RegClass ccRegClass(CCRegClass, CCRegClassName, 0, debug::IntRegs);
 
 } // anonymous namespace
@@ -247,6 +249,7 @@ ISA::ISA(const Params &p) :
     _regClasses.push_back(&vecRegClass);
     _regClasses.push_back(&vecElemClass);
     _regClasses.push_back(&vecPredRegClass);
+    _regClasses.push_back(&matRegClass);
     _regClasses.push_back(&ccRegClass);
     _regClasses.push_back(&miscRegClass);
 
diff --git a/src/arch/sparc/isa.cc b/src/arch/sparc/isa.cc
index 255dbb0b09..38b3d1c3e2 100644
--- a/src/arch/sparc/isa.cc
+++ b/src/arch/sparc/isa.cc
@@ -39,6 +39,7 @@
 #include "base/trace.hh"
 #include "cpu/base.hh"
 #include "cpu/thread_context.hh"
+#include "debug/MatRegs.hh"
 #include "debug/Timer.hh"
 #include "params/SparcISA.hh"
 
@@ -73,6 +74,7 @@ RegClass vecRegClass(VecRegClass, VecRegClassName, 1, debug::IntRegs);
 RegClass vecElemClass(VecElemClass, VecElemClassName, 2, debug::IntRegs);
 RegClass vecPredRegClass(VecPredRegClass, VecPredRegClassName, 1,
         debug::IntRegs);
+RegClass matRegClass(MatRegClass, MatRegClassName, 1, debug::MatRegs);
 RegClass ccRegClass(CCRegClass, CCRegClassName, 0, debug::IntRegs);
 
 } // anonymous namespace
@@ -84,6 +86,7 @@ ISA::ISA(const Params &p) : BaseISA(p)
     _regClasses.push_back(&vecRegClass);
     _regClasses.push_back(&vecElemClass);
     _regClasses.push_back(&vecPredRegClass);
+    _regClasses.push_back(&matRegClass);
     _regClasses.push_back(&ccRegClass);
     _regClasses.push_back(&miscRegClass);
 
diff --git a/src/arch/x86/isa.cc b/src/arch/x86/isa.cc
index 6578b1c716..31efae3a43 100644
--- a/src/arch/x86/isa.cc
+++ b/src/arch/x86/isa.cc
@@ -37,6 +37,7 @@
 #include "base/compiler.hh"
 #include "cpu/base.hh"
 #include "cpu/thread_context.hh"
+#include "debug/MatRegs.hh"
 #include "params/X86ISA.hh"
 #include "sim/serialize.hh"
 
@@ -146,6 +147,7 @@ RegClass vecRegClass(VecRegClass, VecRegClassName, 1, debug::IntRegs);
 RegClass vecElemClass(VecElemClass, VecElemClassName, 2, debug::IntRegs);
 RegClass vecPredRegClass(VecPredRegClass, VecPredRegClassName, 1,
         debug::IntRegs);
+RegClass matRegClass(MatRegClass, MatRegClassName, 1, debug::MatRegs);
 
 } // anonymous namespace
 
@@ -159,6 +161,7 @@ ISA::ISA(const X86ISAParams &p) : BaseISA(p), vendorString(p.vendor_string)
     _regClasses.push_back(&vecRegClass);
     _regClasses.push_back(&vecElemClass);
     _regClasses.push_back(&vecPredRegClass);
+    _regClasses.push_back(&matRegClass);
     _regClasses.push_back(&ccRegClass);
     _regClasses.push_back(&miscRegClass);
 
diff --git a/src/cpu/StaticInstFlags.py b/src/cpu/StaticInstFlags.py
index b7e03a6fb9..d562dd5645 100644
--- a/src/cpu/StaticInstFlags.py
+++ b/src/cpu/StaticInstFlags.py
@@ -52,6 +52,7 @@ class StaticInstFlags(Enum):
         "IsFloating",  # References FP regs.
         "IsVector",  # References Vector regs.
         "IsVectorElem",  # References Vector reg elems.
+        "IsMatrix",  # References Matrix regs.
         "IsLoad",  # Reads from memory (load or prefetch).
         "IsStore",  # Writes to memory.
         "IsAtomic",  # Does atomic RMW to memory.
diff --git a/src/cpu/minor/scoreboard.cc b/src/cpu/minor/scoreboard.cc
index 356fdc7e01..475d650d3a 100644
--- a/src/cpu/minor/scoreboard.cc
+++ b/src/cpu/minor/scoreboard.cc
@@ -70,6 +70,10 @@ Scoreboard::findIndex(const RegId& reg, Index &scoreboard_index)
         scoreboard_index = vecPredRegOffset + reg.index();
         ret = true;
         break;
+      case MatRegClass:
+        scoreboard_index = matRegOffset + reg.index();
+        ret = true;
+        break;
       case CCRegClass:
         scoreboard_index = ccRegOffset + reg.index();
         ret = true;
diff --git a/src/cpu/minor/scoreboard.hh b/src/cpu/minor/scoreboard.hh
index bccb9c5b80..d3df324b99 100644
--- a/src/cpu/minor/scoreboard.hh
+++ b/src/cpu/minor/scoreboard.hh
@@ -72,6 +72,7 @@ class Scoreboard : public Named
     const unsigned ccRegOffset;
     const unsigned vecRegOffset;
     const unsigned vecPredRegOffset;
+    const unsigned matRegOffset;
 
     /** The number of registers in the Scoreboard.  These
      *  are just the integer, CC and float registers packed
@@ -116,7 +117,9 @@ class Scoreboard : public Named
         vecRegOffset(ccRegOffset + reg_classes.at(CCRegClass)->numRegs()),
         vecPredRegOffset(vecRegOffset +
                 reg_classes.at(VecElemClass)->numRegs()),
-        numRegs(vecPredRegOffset + reg_classes.at(VecPredRegClass)->numRegs()),
+        matRegOffset(vecPredRegOffset +
+                reg_classes.at(VecPredRegClass)->numRegs()),
+        numRegs(matRegOffset + reg_classes.at(MatRegClass)->numRegs()),
         numResults(numRegs, 0),
         numUnpredictableResults(numRegs, 0),
         fuIndices(numRegs, invalidFUIndex),
diff --git a/src/cpu/o3/BaseO3CPU.py b/src/cpu/o3/BaseO3CPU.py
index 07d9df6b7f..2e1a602e4c 100644
--- a/src/cpu/o3/BaseO3CPU.py
+++ b/src/cpu/o3/BaseO3CPU.py
@@ -168,6 +168,7 @@ class BaseO3CPU(BaseCPU):
     numPhysVecPredRegs = Param.Unsigned(
         32, "Number of physical predicate registers"
     )
+    numPhysMatRegs = Param.Unsigned(2, "Number of physical matrix registers")
     # most ISAs don't use condition-code regs, so default is 0
     numPhysCCRegs = Param.Unsigned(0, "Number of physical cc registers")
     numIQEntries = Param.Unsigned(64, "Number of instruction queue entries")
diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc
index 48ccd94b54..d2bacaa523 100644
--- a/src/cpu/o3/cpu.cc
+++ b/src/cpu/o3/cpu.cc
@@ -90,6 +90,7 @@ CPU::CPU(const BaseO3CPUParams &params)
               params.numPhysFloatRegs,
               params.numPhysVecRegs,
               params.numPhysVecPredRegs,
+              params.numPhysMatRegs,
               params.numPhysCCRegs,
               params.isa[0]->regClasses()),
 
@@ -200,6 +201,8 @@ CPU::CPU(const BaseO3CPUParams &params)
             numThreads * regClasses.at(VecRegClass)->numRegs());
     assert(params.numPhysVecPredRegs >=
             numThreads * regClasses.at(VecPredRegClass)->numRegs());
+    assert(params.numPhysMatRegs >=
+            numThreads * regClasses.at(MatRegClass)->numRegs());
     assert(params.numPhysCCRegs >=
             numThreads * regClasses.at(CCRegClass)->numRegs());
 
diff --git a/src/cpu/o3/inst_queue.cc b/src/cpu/o3/inst_queue.cc
index 72cb7356ef..ee286fc585 100644
--- a/src/cpu/o3/inst_queue.cc
+++ b/src/cpu/o3/inst_queue.cc
@@ -108,6 +108,7 @@ InstructionQueue::InstructionQueue(CPU *cpu_ptr, IEW *iew_ptr,
                             reg_classes.at(VecElemClass)->numRegs() /
                             reg_classes.at(VecRegClass)->numRegs()) +
                     params.numPhysVecPredRegs +
+                    params.numPhysMatRegs +
                     params.numPhysCCRegs;
 
     //Create an entry for each physical register within the
diff --git a/src/cpu/o3/regfile.cc b/src/cpu/o3/regfile.cc
index dcb8f704f0..1bc7032ebd 100644
--- a/src/cpu/o3/regfile.cc
+++ b/src/cpu/o3/regfile.cc
@@ -53,6 +53,7 @@ PhysRegFile::PhysRegFile(unsigned _numPhysicalIntRegs,
                          unsigned _numPhysicalFloatRegs,
                          unsigned _numPhysicalVecRegs,
                          unsigned _numPhysicalVecPredRegs,
+                         unsigned _numPhysicalMatRegs,
                          unsigned _numPhysicalCCRegs,
                          const BaseISA::RegClasses &reg_classes)
     : intRegFile(*reg_classes.at(IntRegClass), _numPhysicalIntRegs),
@@ -63,6 +64,7 @@ PhysRegFile::PhysRegFile(unsigned _numPhysicalIntRegs,
                   reg_classes.at(VecRegClass)->numRegs())),
       vecPredRegFile(*reg_classes.at(VecPredRegClass),
               _numPhysicalVecPredRegs),
+      matRegFile(*reg_classes.at(MatRegClass), _numPhysicalMatRegs),
       ccRegFile(*reg_classes.at(CCRegClass), _numPhysicalCCRegs),
       numPhysicalIntRegs(_numPhysicalIntRegs),
       numPhysicalFloatRegs(_numPhysicalFloatRegs),
@@ -71,12 +73,14 @@ PhysRegFile::PhysRegFile(unsigned _numPhysicalIntRegs,
                   reg_classes.at(VecElemClass)->numRegs() /
                   reg_classes.at(VecRegClass)->numRegs())),
       numPhysicalVecPredRegs(_numPhysicalVecPredRegs),
+      numPhysicalMatRegs(_numPhysicalMatRegs),
       numPhysicalCCRegs(_numPhysicalCCRegs),
       totalNumRegs(_numPhysicalIntRegs
                    + _numPhysicalFloatRegs
                    + _numPhysicalVecRegs
                    + numPhysicalVecElemRegs
                    + _numPhysicalVecPredRegs
+                   + _numPhysicalMatRegs
                    + _numPhysicalCCRegs)
 {
     RegIndex phys_reg;
@@ -115,6 +119,13 @@ PhysRegFile::PhysRegFile(unsigned _numPhysicalIntRegs,
                 flat_reg_idx++);
     }
 
+    // The next batch of the registers are the matrix physical
+    // registers; put them onto the matrix free list.
+    for (phys_reg = 0; phys_reg < numPhysicalMatRegs; phys_reg++) {
+        matRegIds.emplace_back(*reg_classes.at(MatRegClass), phys_reg,
+                flat_reg_idx++);
+    }
+
     // The rest of the registers are the condition-code physical
     // registers; put them onto the condition-code free list.
     for (phys_reg = 0; phys_reg < numPhysicalCCRegs; phys_reg++) {
@@ -167,6 +178,13 @@ PhysRegFile::initFreeList(UnifiedFreeList *freeList)
     }
     freeList->addRegs(vecPredRegIds.begin(), vecPredRegIds.end());
 
+    /* The next batch of the registers are the matrix physical
+     * registers; put them onto the matrix free list. */
+    for (reg_idx = 0; reg_idx < numPhysicalMatRegs; reg_idx++) {
+        assert(matRegIds[reg_idx].index() == reg_idx);
+    }
+    freeList->addRegs(matRegIds.begin(), matRegIds.end());
+
     // The rest of the registers are the condition-code physical
     // registers; put them onto the condition-code free list.
     for (reg_idx = 0; reg_idx < numPhysicalCCRegs; reg_idx++) {
diff --git a/src/cpu/o3/regfile.hh b/src/cpu/o3/regfile.hh
index 4fea589ad7..13c9899f13 100644
--- a/src/cpu/o3/regfile.hh
+++ b/src/cpu/o3/regfile.hh
@@ -91,6 +91,10 @@ class PhysRegFile
     RegFile vecPredRegFile;
     std::vector<PhysRegId> vecPredRegIds;
 
+    /** Matrix register file. */
+    RegFile matRegFile;
+    std::vector<PhysRegId> matRegIds;
+
     /** Condition-code register file. */
     RegFile ccRegFile;
     std::vector<PhysRegId> ccRegIds;
@@ -123,6 +127,11 @@ class PhysRegFile
      */
     unsigned numPhysicalVecPredRegs;
 
+    /**
+     * Number of physical matrix registers
+     */
+    unsigned numPhysicalMatRegs;
+
     /**
      * Number of physical CC registers
      */
@@ -140,6 +149,7 @@ class PhysRegFile
                 unsigned _numPhysicalFloatRegs,
                 unsigned _numPhysicalVecRegs,
                 unsigned _numPhysicalVecPredRegs,
+                unsigned _numPhysicalMatRegs,
                 unsigned _numPhysicalCCRegs,
                 const BaseISA::RegClasses &classes);
 
@@ -218,6 +228,11 @@ class PhysRegFile
             DPRINTF(IEW, "RegFile: Access to predicate register %i, has "
                     "data %s\n", idx, vecPredRegFile.regClass.valString(val));
             break;
+          case MatRegClass:
+            matRegFile.get(idx, val);
+            DPRINTF(IEW, "RegFile: Access to matrix register %i, has "
+                    "data %s\n", idx, matRegFile.regClass.valString(val));
+            break;
           case CCRegClass:
             *(RegVal *)val = getReg(phys_reg);
             break;
@@ -237,6 +252,8 @@ class PhysRegFile
             return vectorRegFile.ptr(idx);
           case VecPredRegClass:
             return vecPredRegFile.ptr(idx);
+          case MatRegClass:
+            return matRegFile.ptr(idx);
           default:
             panic("Unrecognized register class type %d.", type);
         }
@@ -302,6 +319,11 @@ class PhysRegFile
                     idx, vecPredRegFile.regClass.valString(val));
             vecPredRegFile.set(idx, val);
             break;
+          case MatRegClass:
+            DPRINTF(IEW, "RegFile: Setting matrix register %i to %s\n",
+                    idx, matRegFile.regClass.valString(val));
+            matRegFile.set(idx, val);
+            break;
           case CCRegClass:
             setReg(phys_reg, *(RegVal *)val);
             break;
diff --git a/src/cpu/o3/rename.cc b/src/cpu/o3/rename.cc
index f3783d402a..f8c305eb1c 100644
--- a/src/cpu/o3/rename.cc
+++ b/src/cpu/o3/rename.cc
@@ -134,6 +134,8 @@ Rename::RenameStats::RenameStats(statistics::Group *parent)
                "Number of vector rename lookups"),
       ADD_STAT(vecPredLookups, statistics::units::Count::get(),
                "Number of vector predicate rename lookups"),
+      ADD_STAT(matLookups, statistics::units::Count::get(),
+               "Number of matrix rename lookups"),
       ADD_STAT(committedMaps, statistics::units::Count::get(),
                "Number of HB maps that are committed"),
       ADD_STAT(undoneMaps, statistics::units::Count::get(),
@@ -167,6 +169,7 @@ Rename::RenameStats::RenameStats(statistics::Group *parent)
     fpLookups.prereq(fpLookups);
     vecLookups.prereq(vecLookups);
     vecPredLookups.prereq(vecPredLookups);
+    matLookups.prereq(matLookups);
 
     committedMaps.prereq(committedMaps);
     undoneMaps.prereq(undoneMaps);
@@ -1034,6 +1037,9 @@ Rename::renameSrcRegs(const DynInstPtr &inst, ThreadID tid)
           case VecPredRegClass:
             stats.vecPredLookups++;
             break;
+          case MatRegClass:
+            stats.matLookups++;
+            break;
           case CCRegClass:
           case MiscRegClass:
             break;
@@ -1248,7 +1254,7 @@ Rename::readFreeEntries(ThreadID tid)
     }
 
     DPRINTF(Rename, "[tid:%i] Free IQ: %i, Free ROB: %i, "
-                    "Free LQ: %i, Free SQ: %i, FreeRM %i(%i %i %i %i %i %i)\n",
+                    "Free LQ: %i, Free SQ: %i, FreeRM %i(%i %i %i %i %i %i %i)\n",
             tid,
             freeEntries[tid].iqEntries,
             freeEntries[tid].robEntries,
@@ -1260,6 +1266,7 @@ Rename::readFreeEntries(ThreadID tid)
             renameMap[tid]->numFreeEntries(VecRegClass),
             renameMap[tid]->numFreeEntries(VecElemClass),
             renameMap[tid]->numFreeEntries(VecPredRegClass),
+            renameMap[tid]->numFreeEntries(MatRegClass),
             renameMap[tid]->numFreeEntries(CCRegClass));
 
     DPRINTF(Rename, "[tid:%i] %i instructions not yet in ROB\n",
diff --git a/src/cpu/o3/rename.hh b/src/cpu/o3/rename.hh
index 0b42b6eaa0..61ef476501 100644
--- a/src/cpu/o3/rename.hh
+++ b/src/cpu/o3/rename.hh
@@ -521,6 +521,7 @@ class Rename
         statistics::Scalar fpLookups;
         statistics::Scalar vecLookups;
         statistics::Scalar vecPredLookups;
+        statistics::Scalar matLookups;
         /** Stat for total number of committed renaming mappings. */
         statistics::Scalar committedMaps;
         /** Stat for total number of mappings that were undone due to a
diff --git a/src/cpu/reg_class.hh b/src/cpu/reg_class.hh
index 080c758413..37618e530a 100644
--- a/src/cpu/reg_class.hh
+++ b/src/cpu/reg_class.hh
@@ -64,6 +64,7 @@ enum RegClassType
     /** Vector Register Native Elem lane. */
     VecElemClass,
     VecPredRegClass,
+    MatRegClass,        ///< Matrix Register
     CCRegClass,         ///< Condition-code register
     MiscRegClass,       ///< Control (misc) register
     InvalidRegClass = -1
@@ -75,6 +76,7 @@ inline constexpr char FloatRegClassName[] = "floating_point";
 inline constexpr char VecRegClassName[] = "vector";
 inline constexpr char VecElemClassName[] = "vector_element";
 inline constexpr char VecPredRegClassName[] = "vector_predicate";
+inline constexpr char MatRegClassName[] = "matrix";
 inline constexpr char CCRegClassName[] = "condition_code";
 inline constexpr char MiscRegClassName[] = "miscellaneous";
 
diff --git a/src/cpu/simple/base.cc b/src/cpu/simple/base.cc
index ab67f39496..768f63ede5 100644
--- a/src/cpu/simple/base.cc
+++ b/src/cpu/simple/base.cc
@@ -418,6 +418,12 @@ BaseSimpleCPU::postExecute()
         t_info.execContextStats.numVecInsts++;
     }
 
+    //Matrix alu accesses
+    if (curStaticInst->isMatrix()){
+        t_info.execContextStats.numMatAluAccesses++;
+        t_info.execContextStats.numMatInsts++;
+    }
+
     //number of function calls/returns to get window accesses
     if (curStaticInst->isCall() || curStaticInst->isReturn()){
         t_info.execContextStats.numCallsReturns++;
diff --git a/src/cpu/simple/exec_context.hh b/src/cpu/simple/exec_context.hh
index e51ec88dce..0f20763f28 100644
--- a/src/cpu/simple/exec_context.hh
+++ b/src/cpu/simple/exec_context.hh
@@ -96,6 +96,8 @@ class SimpleExecContext : public ExecContext
                        "Number of float alu accesses"),
               ADD_STAT(numVecAluAccesses, statistics::units::Count::get(),
                        "Number of vector alu accesses"),
+              ADD_STAT(numMatAluAccesses, statistics::units::Count::get(),
+                       "Number of matrix alu accesses"),
               ADD_STAT(numCallsReturns, statistics::units::Count::get(),
                        "Number of times a function call or return occured"),
               ADD_STAT(numCondCtrlInsts, statistics::units::Count::get(),
@@ -106,6 +108,8 @@ class SimpleExecContext : public ExecContext
                        "Number of float instructions"),
               ADD_STAT(numVecInsts, statistics::units::Count::get(),
                        "Number of vector instructions"),
+              ADD_STAT(numMatInsts, statistics::units::Count::get(),
+                       "Number of matrix instructions"),
               ADD_STAT(numIntRegReads, statistics::units::Count::get(),
                        "Number of times the integer registers were read"),
               ADD_STAT(numIntRegWrites, statistics::units::Count::get(),
@@ -162,6 +166,7 @@ class SimpleExecContext : public ExecContext
                   &numVecRegReads,
                   &numVecRegReads,
                   &numVecPredRegReads,
+                  &numMatRegReads,
                   &numCCRegReads
               },
               numRegWrites{
@@ -170,6 +175,7 @@ class SimpleExecContext : public ExecContext
                   &numVecRegWrites,
                   &numVecRegWrites,
                   &numVecPredRegWrites,
+                  &numMatRegWrites,
                   &numCCRegWrites
               }
         {
@@ -220,6 +226,9 @@ class SimpleExecContext : public ExecContext
         // Number of vector alu accesses
         statistics::Scalar numVecAluAccesses;
 
+        // Number of matrix alu accesses
+        statistics::Scalar numMatAluAccesses;
+
         // Number of function calls/returns
         statistics::Scalar numCallsReturns;
 
@@ -235,6 +244,9 @@ class SimpleExecContext : public ExecContext
         // Number of vector instructions
         statistics::Scalar numVecInsts;
 
+        // Number of matrix instructions
+        statistics::Scalar numMatInsts;
+
         // Number of integer register file accesses
         statistics::Scalar numIntRegReads;
         statistics::Scalar numIntRegWrites;
@@ -251,6 +263,10 @@ class SimpleExecContext : public ExecContext
         mutable statistics::Scalar numVecPredRegReads;
         statistics::Scalar numVecPredRegWrites;
 
+        // Number of matrix register file accesses
+        mutable statistics::Scalar numMatRegReads;
+        statistics::Scalar numMatRegWrites;
+
         // Number of condition code register file accesses
         statistics::Scalar numCCRegReads;
         statistics::Scalar numCCRegWrites;
diff --git a/src/cpu/simple_thread.cc b/src/cpu/simple_thread.cc
index 4c4e7dcdb6..c28359a4ed 100644
--- a/src/cpu/simple_thread.cc
+++ b/src/cpu/simple_thread.cc
@@ -75,6 +75,7 @@ SimpleThread::SimpleThread(BaseCPU *_cpu, int _thread_num, System *_sys,
           {*_isa->regClasses().at(VecRegClass)},
           {*_isa->regClasses().at(VecElemClass)},
           {*_isa->regClasses().at(VecPredRegClass)},
+          {*_isa->regClasses().at(MatRegClass)},
           {*_isa->regClasses().at(CCRegClass)}
       }},
       isa(_isa),
diff --git a/src/cpu/simple_thread.hh b/src/cpu/simple_thread.hh
index 5a60d2ac16..b9129734f1 100644
--- a/src/cpu/simple_thread.hh
+++ b/src/cpu/simple_thread.hh
@@ -57,6 +57,7 @@
 #include "debug/CCRegs.hh"
 #include "debug/FloatRegs.hh"
 #include "debug/IntRegs.hh"
+#include "debug/MatRegs.hh"
 #include "debug/VecPredRegs.hh"
 #include "debug/VecRegs.hh"
 #include "mem/htm.hh"
diff --git a/src/cpu/static_inst.hh b/src/cpu/static_inst.hh
index 3ab78345bc..7ecc57d2f0 100644
--- a/src/cpu/static_inst.hh
+++ b/src/cpu/static_inst.hh
@@ -155,6 +155,7 @@ class StaticInst : public RefCounted, public StaticInstFlags
     bool isInteger()      const { return flags[IsInteger]; }
     bool isFloating()     const { return flags[IsFloating]; }
     bool isVector()       const { return flags[IsVector]; }
+    bool isMatrix()       const { return flags[IsMatrix]; }
 
     bool isControl()      const { return flags[IsControl]; }
     bool isCall()         const { return flags[IsCall]; }
diff --git a/src/cpu/thread_context.cc b/src/cpu/thread_context.cc
index 69094f87af..146f9e74d0 100644
--- a/src/cpu/thread_context.cc
+++ b/src/cpu/thread_context.cc
@@ -109,6 +109,20 @@ ThreadContext::compare(ThreadContext *one, ThreadContext *two)
         }
     }
 
+    // Then loop through the matrix registers.
+    const auto *mat_class = regClasses.at(MatRegClass);
+    std::vector<uint8_t> mat1(mat_class->regBytes());
+    std::vector<uint8_t> mat2(mat_class->regBytes());
+    for (auto &id: *regClasses.at(MatRegClass)) {
+        one->getReg(id, mat1.data());
+        two->getReg(id, mat2.data());
+        if (mat1 != mat2) {
+            panic("Mat reg idx %d doesn't match, one: %#x, two: %#x",
+                  id.index(), mat_class->valString(mat1.data()),
+                  mat_class->valString(mat2.data()));
+        }
+    }
+
     for (int i = 0; i < regClasses.at(MiscRegClass)->numRegs(); ++i) {
         RegVal t1 = one->readMiscRegNoEffect(i);
         RegVal t2 = two->readMiscRegNoEffect(i);

From 5c43523d53d0fbada0f48d1cefbcadd9959d8068 Mon Sep 17 00:00:00 2001
From: Sascha Bischoff <sascha.bischoff@arm.com>
Date: Tue, 9 Aug 2022 16:42:01 +0100
Subject: [PATCH 145/492] arch-arm: Add matrix register support for SME

We add support for the matrix registers to the Arm architecture. This
will be used to implement support for Arm's Scalable Matrix Extension
(SME) in subsequent commits.

We add an implementation of a matrix register for the Arm
architecture. These are akin to 2D vector registers in the sense that
they can be dynamically viewed as a variety of element sizes. As
widening the element size would reduce the matrix size by a factor of
element size, we instead layer multiple tiles of wider elements onto
the underlying matrix storage in order to retain square matrices.

We separate the storage of the matrix from the different views one can
have. The potential views are:

* Tiles: View the matrix as one or more tiles using a specified
  element size. As the element size increases the number of indexable
  tiles increases. When using the smallest granularity element size
  (bytes) there is a single tile. As an example, using 32-bit elements
  yields 4 tiles. Tiles are interleaved onto the underlaying matrix
  modulo element size. A tile supports 2D indexing ([][]), with the
  first index specifying the row index, and the second the column
  (element index within the row).

* A Horizontal/Vertical slice (row or a column) of a tile: Take the
  aforementioned tile, and extract a specified row or column slice
  from it. A slice supports standard []-based indexing. A tile slice
  must use the same underlying element type as is used for the tile.

* A Horizontal/Vertical slice (row or column) of the underlying matrix
  storage: Treat the matrix register as an array of vectors (rows or
  columns, rows preferred due to them being indepependent of the
  element size being used).

On simulator start-up the matrix registers are initialised to a
maximum size. At run-time the used size can by dynamically
adjusted. However, please note that as the matrix register class
doesn't know if a smaller size is being used, the class itself doesn't
do any bounds checking itself. This is left to the user.

Jira Issue: https://gem5.atlassian.net/browse/GEM5-1289

Change-Id: I6a6a05154846e4802e9822bbbac00ab2c39538ed
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/64334
Reviewed-by: Giacomo Travaglini <giacomo.travaglini@arm.com>
Maintainer: Giacomo Travaglini <giacomo.travaglini@arm.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/arch/arm/SConscript     |   1 +
 src/arch/arm/isa.cc         |   7 +-
 src/arch/arm/isa.hh         |   3 +-
 src/arch/arm/matrix.hh      | 572 ++++++++++++++++++++++++++++++++++++
 src/arch/arm/matrix.test.cc | 453 ++++++++++++++++++++++++++++
 src/arch/arm/regs/mat.hh    | 136 +++++++++
 src/arch/arm/types.hh       |  12 +
 7 files changed, 1182 insertions(+), 2 deletions(-)
 create mode 100644 src/arch/arm/matrix.hh
 create mode 100644 src/arch/arm/matrix.test.cc
 create mode 100644 src/arch/arm/regs/mat.hh

diff --git a/src/arch/arm/SConscript b/src/arch/arm/SConscript
index 6c359fb255..935f082c11 100644
--- a/src/arch/arm/SConscript
+++ b/src/arch/arm/SConscript
@@ -54,6 +54,7 @@ if env['USE_ARM_ISA']:
           '../../cpu/reg_class.cc',
           '../../sim/bufval.cc', '../../sim/cur_tick.cc',
           'regs/int.cc')
+    GTest('matrix.test', 'matrix.test.cc')
 Source('decoder.cc', tags='arm isa')
 Source('faults.cc', tags='arm isa')
 Source('htm.cc', tags='arm isa')
diff --git a/src/arch/arm/isa.cc b/src/arch/arm/isa.cc
index 617f144bae..c6bb2bd8d1 100644
--- a/src/arch/arm/isa.cc
+++ b/src/arch/arm/isa.cc
@@ -76,7 +76,6 @@ namespace
 
 /* Not applicable to ARM */
 RegClass floatRegClass(FloatRegClass, FloatRegClassName, 0, debug::FloatRegs);
-RegClass matRegClass(MatRegClass, MatRegClassName, 1, debug::MatRegs);
 
 } // anonymous namespace
 
@@ -561,6 +560,12 @@ ISA::copyRegsFrom(ThreadContext *src)
     for (auto &id: vecElemClass)
         tc->setReg(id, src->getReg(id));
 
+    ArmISA::MatRegContainer mc;
+    for (auto &id: matRegClass) {
+        src->getReg(id, &mc);
+        tc->setReg(id, &mc);
+    }
+
     // setMiscReg "with effect" will set the misc register mapping correctly.
     // e.g. updateRegMap(val)
     tc->setMiscReg(MISCREG_CPSR, src->readMiscRegNoEffect(MISCREG_CPSR));
diff --git a/src/arch/arm/isa.hh b/src/arch/arm/isa.hh
index 9e1afa714b..6f9478298d 100644
--- a/src/arch/arm/isa.hh
+++ b/src/arch/arm/isa.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2010, 2012-2021 ARM Limited
+ * Copyright (c) 2010, 2012-2022 ARM Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -45,6 +45,7 @@
 #include "arch/arm/mmu.hh"
 #include "arch/arm/pcstate.hh"
 #include "arch/arm/regs/int.hh"
+#include "arch/arm/regs/mat.hh"
 #include "arch/arm/regs/misc.hh"
 #include "arch/arm/regs/vec.hh"
 #include "arch/arm/self_debug.hh"
diff --git a/src/arch/arm/matrix.hh b/src/arch/arm/matrix.hh
new file mode 100644
index 0000000000..ae9a8e5627
--- /dev/null
+++ b/src/arch/arm/matrix.hh
@@ -0,0 +1,572 @@
+/*
+ * Copyright (c) 2022 Arm Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file arch/arm/matrix.hh
+ * Matrix Register Specification.
+ *
+ * In this file we add three new classes which are used to provide both
+ * the backing storage for matrix registers (MatStore) and for accessing
+ * them using a set of views onto the backing store (Tile, TileSlice).
+ *
+ * The MatStore provides the backing store for the matrix, handles the
+ * serialisation/unserialisation, and provides interfaces to obtain
+ * views of the matrix. The underlying element for the MatStore is a
+ * byte, and it uses two templated parameters, X and Y, to set the
+ * overall size of the matrix. The common use case will be that X and Y
+ * are the same size, yielding a square matrix, but this is not a
+ * requirement - it is possible to create non-square matricies too if
+ * such a thing is desired.
+ *
+ * The Tile provides a view on top of the MatStore which is intended to
+ * preserve the original aspect ratio of the underlying MatStore as the
+ * element size scales. It does so by row-wise interleaving one or more
+ * sub-matrices on top of the MatStore, where the number of sub-matrices
+ * is governed by the element size (in bytes) itself. As an example, if
+ * the elements are half-words, i.e. 2 bytes wide, then there are two
+ * interleaved matrices with even rows belonging to sub-matrix 0 and odd
+ * rows belonging to sub-matrix 1. However, each of these sub-matricies
+ * maintains the original aspect ratio of the MatStore - the element
+ * size has doubled (bytes => half words), hence each row contains half
+ * the original number of elements, and each sub-matrix contains half of
+ * the number of rows themselves.
+ *
+ * The TileSlice class provides a view of either a row or a column of a
+ * matrix, and can be generated from either the MatStore directly, or
+ * from the Tile. In the former case this allows a matrix to be viewed
+ * as a set of rows or columns, and in the latter this same approach is
+ * applied to the Tile. In both cases this is achieved by adjusting the
+ * striding through the backing store accordingly.
+ *
+ * The intended usage of the views is as follows:
+ *
+ * // declare an 8x8 matrix of bytes
+ * using Mat8x8 = MatStore<8, 8>;
+ *
+ * // Create a matrix and make sure that it is zeroed
+ * Mat8x8 mat;
+ * mat.zero();
+ *
+ * // Interleave four tiles of int32_t onto the 8x8 matrix, and get
+ * // tile 0. (Each of these tiles will be a 2x2 matrix)
+ * auto mat0 = mat.asTile<int32_t>(0);
+ *
+ * // Set both elements of row 0 to 10
+ * for (auto i = 0; i < 2; ++i) {
+ *     mat0[0][i] = 10;
+ * }
+ *
+ * // Sum both elements of row 1
+ * int32_t sum = 0;
+ * auto row = mat0.asHSlice(1);
+ * for (auto i = 0; i < 2; ++i) {
+ *     sum += row[i];
+ * }
+ *
+ * // print column 1 of the whole MatStore when viewed as uint16_t
+ * col = mat.asVSlice<uint16_t>(1);
+ * for (auto i = 0; i < 4; ++i) {
+ *     std::cout << col[i] << std::endl;
+ * }
+ *
+ */
+
+#ifndef __ARCH_ARM_MATRIX_HH__
+#define __ARCH_ARM_MATRIX_HH__
+
+#include <array>
+#include <cassert>
+#include <cstring>
+#include <iostream>
+#include <type_traits>
+
+#include "base/cprintf.hh"
+#include "base/logging.hh"
+#include "base/types.hh"
+#include "sim/serialize_handlers.hh"
+
+namespace gem5
+{
+
+constexpr unsigned MaxMatRegRowLenInBytes = 256;
+constexpr unsigned MaxMatRegRows = 256;
+
+// Forward declarations
+template <size_t X, size_t Y>
+class MatStore;
+template <typename ElemType, typename Container>
+class Tile;
+
+template <size_t X, size_t Y>
+struct ParseParam<MatStore<X, Y>>;
+
+/**
+ * @brief Provides a view of a horizontal slice of either a
+ *        MatStore or a Tile.
+ *
+ * Based on whether this view it is being used from the MatStore
+ * directly or from the Tile different parameters are
+ * used. Behind the scenes the parameters are used to stride through the
+ * (linear) backing store in order to return or maniplate the desired
+ * elements of the row/column.
+ *
+ * @tparam ElemType The type of element to use for the view.
+ * @tparam Container The type of container being used as the backing store.
+ * @tparam FromTile Set true if operating on an interleaved tile.
+ */
+template <typename ElemType, typename Container, bool FromTile>
+class HorizontalSlice
+{
+    template <size_t, size_t> friend class MatStore;
+    template <typename, typename> friend class Tile;
+
+  private:
+    Container * container;
+    size_t index;
+    size_t xElems;
+    size_t yElems;
+    size_t startElts;
+    size_t strideElts;
+
+  private:
+    HorizontalSlice(Container& cnt, size_t _startBytes, size_t _strideBytes,
+                    size_t idx)
+      : container(&cnt), index(idx),
+        xElems(container->xSize() / sizeof(ElemType)),
+        yElems(container->ySize() / (FromTile ? sizeof(ElemType): 1)),
+        startElts(_startBytes / sizeof(ElemType)),
+        strideElts(_strideBytes / sizeof(ElemType))
+    {
+        gem5_assert(xElems > 0, "The number of xElems cannot be 0");
+        gem5_assert(yElems > 0, "The number of yElems cannot be 0");
+
+        // Make sure that we have a whole multiple of an element size
+        assert (_startBytes % sizeof(ElemType) == 0);
+        assert (_strideBytes % sizeof(ElemType) == 0);
+
+        if constexpr (!FromTile) {
+            // If we are not operating on a tile, the stride must be the
+            // same as the row length, X.
+            assert(_strideBytes == container->xSize());
+        } else {
+            // If we are operating on a tile, then the stride must be
+            // sizeof(ElemSize) greater than X.
+            assert(_strideBytes / container->xSize() == sizeof(ElemType));
+        }
+    };
+
+  public:
+    ElemType&
+    operator[](size_t elem_idx)
+    {
+        assert(elem_idx < xElems);
+        size_t linear_index = startElts + index * strideElts + elem_idx;
+        return container->template rawPtr<ElemType>()[linear_index];
+    };
+
+    void
+    zero()
+    {
+        for (int i = 0; i < xElems; ++i) {
+            (*this)[i] = (ElemType)0;
+        }
+    };
+};
+
+/**
+ * @brief Provides a view of a vertical slice of either a
+ *        MatStore or a Tile.
+ *
+ * Based on whether this view it is being used from the MatStore
+ * directly or from the Tile different parameters are used. Behind the
+ * scenes the parameters are used to stride through the (linear) backing
+ * store in order to return or maniplate the desired elements of the
+ * row/column.
+ *
+ * @tparam ElemType The type of element to use for the view.
+ * @tparam Container The type of container being used as the backing store.
+ * @tparam FromTile Set true if operating on an interleaved tile.
+ */
+template <typename ElemType, typename Container, bool FromTile>
+class VerticalSlice
+{
+    template <size_t, size_t> friend class MatStore;
+    template <typename, typename> friend class Tile;
+
+  private:
+    Container * container;
+    size_t index;
+    size_t xElems;
+    size_t yElems;
+    size_t startElts;
+    size_t strideElts;
+
+  private:
+    VerticalSlice(Container& cnt, size_t _startBytes, size_t _strideBytes, size_t idx)
+      : container(&cnt), index(idx),
+        xElems(container->xSize() / sizeof(ElemType)),
+        yElems(container->ySize() / (FromTile ? sizeof(ElemType): 1)),
+        startElts(_startBytes / sizeof(ElemType)),
+        strideElts(_strideBytes / sizeof(ElemType))
+    {
+        gem5_assert(xElems > 0, "The number of xElems cannot be 0");
+        gem5_assert(yElems > 0, "The number of yElems cannot be 0");
+
+        // Make sure that we have a whole multiple of an element size
+        assert (_startBytes % sizeof(ElemType) == 0);
+        assert (_strideBytes % sizeof(ElemType) == 0);
+
+        if constexpr (!FromTile) {
+            // If we are not operating on a tile, the stride must be the
+            // same as the row length, X.
+            assert(_strideBytes == container->xSize());
+        } else {
+            // If we are operating on a tile, then the stride must be
+            // sizeof(ElemSize) greater than X.
+            assert(_strideBytes / container->xSize() == sizeof(ElemType));
+        }
+    };
+
+  public:
+    ElemType&
+    operator[](size_t elem_idx)
+    {
+        assert(elem_idx < yElems);
+        size_t linear_index = startElts + elem_idx * strideElts + index;
+        return container->template rawPtr<ElemType>()[linear_index];
+    };
+
+    void
+    zero()
+    {
+        for (int i = 0; i < yElems; ++i) {
+            (*this)[i] = (ElemType)0;
+        }
+    };
+};
+
+/**
+ * @brief Provides a view of a matrix that is row-interleaved onto a
+ *        MatStore.
+ *
+ * This class largely acts as a shim between the MatStore and the
+ * TileSlice view. The size of the ElemType and the index passed to the
+ * constructor are used to calculate the stride and start which are
+ * passed to the TileSlice view to control how it strides through the
+ * backing store.
+ *
+ * @tparam ElemType The type of element to use for the view.
+ * @tparam Container The type of container being used as the backing store.
+ */
+template <typename ElemType, typename Container>
+class Tile
+{
+    template <size_t, size_t> friend class MatStore;
+
+    // We "calculate" the number of possible tiles based on the element size
+    static constexpr size_t NUM_TILES = sizeof(ElemType);
+
+  private:
+    Container * container;
+    size_t index;
+    size_t startBytes;
+    size_t strideBytes;
+
+  private:
+    Tile(Container& cnt, size_t idx)
+      : container(&cnt), index(idx)
+    {
+        assert(index < NUM_TILES);
+        startBytes = container->xSize() * index;
+        strideBytes = NUM_TILES * container->xSize();
+    };
+
+  public:
+    auto
+    operator[](size_t idx)
+    {
+        assert(idx < (container->ySize() / NUM_TILES));
+        return asHSlice(idx);
+    };
+
+    Container*
+    getContainer()
+    {
+        return container;
+    };
+
+    auto
+    asHSlice(size_t row_idx)
+    {
+        assert(row_idx < container->ySize() / NUM_TILES);
+        return HorizontalSlice<ElemType, Container, true>(*container,
+                                                          startBytes,
+                                                          strideBytes,
+                                                          row_idx);
+    };
+
+    auto
+    asVSlice(size_t col_idx)
+    {
+        assert(col_idx < container->xSize());
+        return VerticalSlice<ElemType, Container, true>(*container, startBytes,
+                                                        strideBytes, col_idx);
+    };
+
+    void
+    zero()
+    {
+        for (int i = 0; i < container->ySize() / NUM_TILES; ++i) {
+            // We zero the tile by rows. We need to do it this way due
+            // to the interleaving.
+            auto row = this->asHSlice(i);
+            row.zero();
+        }
+    };
+};
+
+// Base container class for a matrix. Allows for non-square matricies.
+/**
+ * @brief Backing store for matrices.
+ *
+ * This class provides the backing store for matricies, and is largely a
+ * wrapper around an std::array of bytes. This class provides some basic
+ * interfaces for assignment (copy the backing store) and comparison,
+ * and provides the interface for generating views onto the backing
+ * store. It is these views that are intended to be used by the end-user
+ * of the matrix in most cases.
+ *
+ * This class is also responsible for handling the
+ * serialisation/unserialisation of matrix registers (see ShowParam and
+ * ParseParam).
+ *
+ * @tparam X X size in bytes (number of columns).
+ * @tparam Y Y size in bytes (number of rows).
+ */
+template <size_t X, size_t Y>
+class MatStore
+{
+    static_assert(X > 0, "X size cannot be 0");
+    static_assert(Y > 0, "Y size cannot be 0");
+
+    static constexpr size_t LINEAR_SIZE = X * Y;
+
+    template <typename, typename, bool> friend class HorizontalSlice;
+    template <typename, typename, bool> friend class VerticalSlice;
+
+  public:
+    static constexpr inline size_t xSize() { return X; };
+    static constexpr inline size_t ySize() { return Y; };
+    static constexpr inline size_t linearSize() { return LINEAR_SIZE; };
+
+    using Container = std::array<uint8_t, LINEAR_SIZE>;
+    using MyClass = MatStore<X, Y>;
+  private:
+    // We need to be able to handle 128-bit types; align accordingly
+    alignas(16) Container container;
+
+  public:
+    /** Constructor */
+    MatStore() {};
+
+    MatStore(const MatStore&) = default;
+
+    void
+    zero()
+    {
+        memset(container.data(), 0 , LINEAR_SIZE);
+    }
+
+    /** Assignment operators. */
+    /** @{ */
+    /** From MatStore */
+    MyClass&
+    operator=(const MyClass& that)
+    {
+        if (&that == this)
+            return *this;
+        memcpy(container.data(), that.container.data(), LINEAR_SIZE);
+        return *this;
+    }
+    /** @} */
+
+    /** Equality operator.
+     * Required to compare thread contexts.
+     */
+    template<size_t X2, size_t Y2>
+    inline bool
+    operator==(const MatStore<X2, Y2>& that) const
+    {
+        return X == X2 && Y == Y2 &&
+               !memcmp(container.data(), that.container.data(), LINEAR_SIZE);
+    }
+
+    /** Inequality operator.
+     * Required to compare thread contexts.
+     */
+    template<size_t X2, size_t Y2>
+    bool
+    operator!=(const MatStore<X2, Y2>& that) const
+    {
+        return !operator==(that);
+    }
+
+  private:
+    /** Get pointer to the raw data. */
+    template <typename ElemType>
+    const ElemType* rawPtr() const
+    {
+        return reinterpret_cast<const ElemType*>(container.data());
+    }
+
+    template <typename ElemType>
+    ElemType* rawPtr() { return reinterpret_cast<ElemType*>(container.data()); }
+
+  public:
+    template <typename ElemType>
+    auto
+    asTile(size_t index)
+    {
+        return Tile<ElemType, MyClass>(*this, index);
+    }
+
+    template <typename ElemType>
+    auto
+    asHSlice(size_t row_idx)
+    {
+        return HorizontalSlice<ElemType, MyClass, false>(*this, 0, X, row_idx);
+    }
+
+    template <typename ElemType>
+    auto
+    asVSlice(size_t col_idx)
+    {
+        return VerticalSlice<ElemType, MyClass, false>(*this, 0, X, col_idx);
+    }
+
+    friend std::ostream&
+    operator<<(std::ostream& os, const MatStore<X, Y>& v)
+    {
+        // When printing for human consumption, break into 4 byte chunks.
+        ccprintf(os, "[");
+        size_t count = 0;
+        for (auto& b: v.container) {
+            if (count && (count % 4) == 0)
+                os << "_";
+            ccprintf(os, "%02x", b);
+            count++;
+        }
+        ccprintf(os, "]");
+        return os;
+    }
+
+    /** @} */
+    /**
+     * Used for serialization/unserialisation.
+     */
+    friend ParseParam<MatStore<X, Y>>;
+    friend ShowParam<MatStore<X, Y>>;
+
+};
+
+/**
+ * Calls required for serialization/deserialization
+ */
+/** @{ */
+template <size_t X, size_t Y>
+struct ParseParam<MatStore<X, Y>>
+{
+    static bool
+    parse(const std::string &str, MatStore<X, Y> &value)
+    {
+        fatal_if(str.size() > 2 * X * Y,
+                 "Matrix register value overflow at unserialize");
+        fatal_if(str.size() < 2 * X * Y,
+                 "Matrix register value underflow at unserialize");
+
+        for (int i = 0; i < X * Y; i++) {
+            uint8_t b = 0;
+            if (2 * i < str.size())
+                b = stoul(str.substr(i * 2, 2), nullptr, 16);
+            value.template rawPtr<uint8_t>()[i] = b;
+        }
+        return true;
+    }
+};
+
+template <size_t X, size_t Y>
+struct ShowParam<MatStore<X, Y>>
+{
+    static void
+    show(std::ostream &os, const MatStore<X, Y> &value)
+    {
+        for (auto& b: value.container)
+            ccprintf(os, "%02x", b);
+    }
+};
+/** @} */
+
+/**
+ * Dummy type aliases and constants for architectures that do not
+ * implement matrix registers.
+ */
+/** @{ */
+struct DummyMatRegContainer
+{
+    RegVal filler = 0;
+    bool operator == (const DummyMatRegContainer &d) const { return true; }
+    bool operator != (const DummyMatRegContainer &d) const { return true; }
+};
+template <>
+struct ParseParam<DummyMatRegContainer>
+{
+    static bool
+    parse(const std::string &s, DummyMatRegContainer &value)
+    {
+        return false;
+    }
+};
+static_assert(sizeof(DummyMatRegContainer) == sizeof(RegVal));
+static inline std::ostream &
+operator<<(std::ostream &os, const DummyMatRegContainer &d)
+{
+    return os;
+}
+/** @} */
+
+} // namespace gem5
+
+#endif // __ARCH_ARM_MATRIX_HH__
diff --git a/src/arch/arm/matrix.test.cc b/src/arch/arm/matrix.test.cc
new file mode 100644
index 0000000000..16d3da7048
--- /dev/null
+++ b/src/arch/arm/matrix.test.cc
@@ -0,0 +1,453 @@
+/*
+ * Copyright (c) 2022 Arm Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <gtest/gtest.h>
+
+#include "arch/arm/matrix.hh"
+
+using namespace gem5;
+
+TEST(Matrix, Size)
+{
+    {
+        // Minimum size
+        MatStore<1, 1> mat;
+        ASSERT_EQ(1, mat.linearSize());
+    }
+
+    {
+        // Medium size
+        constexpr size_t x_size = MaxMatRegRowLenInBytes / 2;
+        constexpr size_t y_size = MaxMatRegRows / 2;
+        MatStore<x_size, y_size> mat;
+        ASSERT_EQ(x_size * y_size, mat.linearSize());
+    }
+
+    {
+        // Maximum size
+        MatStore<MaxMatRegRowLenInBytes, MaxMatRegRows> mat;
+        ASSERT_EQ(MaxMatRegRowLenInBytes * MaxMatRegRows, mat.linearSize());
+    }
+}
+
+TEST(Matrix, Zero)
+{
+    constexpr size_t size = 16;
+    MatStore<size, size> mat;
+    auto tile = mat.asTile<uint8_t>(0);
+
+    // Initializing with non-zero value
+    for (auto i = 0; i < size; i++) {
+        for (auto j = 0; j < size; j++) {
+            tile[i][j] = 0xAA;
+        }
+    }
+
+    // zeroing the matrix
+    mat.zero();
+
+    // checking if every matrix element is set to zero
+    for (auto i = 0; i < size; i++) {
+        for (auto j = 0; j < size; j++) {
+            ASSERT_EQ(tile[i][j], 0);
+        }
+    }
+}
+
+TEST(Matrix, ZeroTiles)
+{
+    constexpr size_t size = 16;
+    MatStore<size, size> mat;
+    auto byte_tile = mat.asTile<uint8_t>(0);
+
+    // Initializing the whole tile with non-zero value
+    for (auto i = 0; i < size; i++) {
+        for (auto j = 0; j < size; j++) {
+            byte_tile[i][j] = 0xAA;
+        }
+    }
+
+    // zeroing the half-word tile 0 of matrix
+    auto half_word_tile = mat.asTile<uint16_t>(0);
+    half_word_tile.zero();
+
+    // Check that every element of half-word tile 0 is zero
+    for (auto i = 0; i < size / 2; i++) {
+        for (auto j = 0; j < size / 2; j++) {
+            ASSERT_EQ(half_word_tile[i][j], 0);
+        }
+    }
+
+    // Check that every element of half-word tile 1 is 0xAAAA (note the
+    // double width of the element)
+    half_word_tile = mat.asTile<uint16_t>(1);
+    for (auto i = 0; i < size / 2; i++) {
+        for (auto j = 0; j < size / 2; j++) {
+            ASSERT_EQ(half_word_tile[i][j], 0xAAAA);
+        }
+    }
+
+    // Check if every matrix element on an even row is set to zero
+    for (auto i = 0; i < size; i += 2) {
+        for (auto j = 0; j < size; j++) {
+            ASSERT_EQ(byte_tile[i][j], 0);
+        }
+    }
+
+    // Check if every matrix element on an odd row is set to 0xAA
+    for (auto i = 1; i < size; i += 2) {
+        for (auto j = 0; j < size; j++) {
+            ASSERT_EQ(byte_tile[i][j], 0xAA);
+        }
+    }
+}
+
+TEST(Matrix, ZeroTileHSlice)
+{
+    constexpr size_t size = 16;
+    MatStore<size, size> mat;
+    auto byte_tile = mat.asTile<uint8_t>(0);
+
+    // Initializing the whole tile with non-zero value
+    for (auto i = 0; i < size; i++) {
+        for (auto j = 0; j < size; j++) {
+            byte_tile[i][j] = 0xAA;
+        }
+    }
+
+    // zeroing the 0th row of half-word tile 0
+    auto half_word_tile = mat.asTile<uint16_t>(0);
+    auto row = half_word_tile.asHSlice(0);
+    row.zero();
+
+    // Check that every element of the row is zero
+    for (auto i = 0; i < size / 2; i++) {
+        ASSERT_EQ(row[i], 0);
+    }
+
+    // Check that every element of row 1 is 0xAAAA
+    row = half_word_tile.asHSlice(1);
+    for (auto i = 0; i < size / 2; i++) {
+        ASSERT_EQ(row[i], 0xAAAA);
+    }
+
+    // Check that row 0 of the byte tile is zero, and that all remaining
+    // rows are unaffected
+    for (auto i = 0; i < size; i++) {
+        for (auto j = 0; j < size; j++) {
+            if (i == 0) {
+                ASSERT_EQ(byte_tile[i][j], 0);
+            } else {
+                ASSERT_EQ(byte_tile[i][j], 0xAA);
+            }
+        }
+    }
+}
+
+TEST(Matrix, ZeroTileVSlice)
+{
+    constexpr size_t size = 16;
+    MatStore<size, size> mat;
+    auto byte_tile = mat.asTile<uint8_t>(0);
+
+    // Initializing the whole tile with non-zero value
+    for (auto i = 0; i < size; i++) {
+        for (auto j = 0; j < size; j++) {
+            byte_tile[i][j] = 0xAA;
+        }
+    }
+
+    // zeroing the 0th column of half-word tile 0
+    auto half_word_tile = mat.asTile<uint16_t>(0);
+    auto col = half_word_tile.asVSlice(0);
+    col.zero();
+
+    // Check that every element of the column is zero
+    for (auto i = 0; i < size / 2; i++) {
+        ASSERT_EQ(col[i], 0);
+    }
+
+    // Check that every element of column 1 is 0xAAAA
+    col = half_word_tile.asVSlice(1);
+    for (auto i = 0; i < size / 2; i++) {
+        ASSERT_EQ(col[i], 0xAAAA);
+    }
+
+    // Check that elements 0 & 1 of the byte tile are zero for even rows,
+    // and that all remaining elements are unaffected
+    for (auto i = 0; i < size; i++) {
+        for (auto j = 0; j < size; j++) {
+            if (i % 2 == 0 && (j == 0 || j == 1)) {
+                ASSERT_EQ(byte_tile[i][j], 0);
+            } else {
+                ASSERT_EQ(byte_tile[i][j], 0xAA);
+            }
+        }
+    }
+}
+
+TEST(Matrix, ZeroHSlice)
+{
+    constexpr size_t size = 16;
+    MatStore<size, size> mat;
+    auto byte_tile = mat.asTile<uint8_t>(0);
+
+    // Initializing the whole tile with non-zero value
+    for (auto i = 0; i < size; i++) {
+        for (auto j = 0; j < size; j++) {
+            byte_tile[i][j] = 0xAA;
+        }
+    }
+
+    // Now we get a row directly from the matrix (as words, because it
+    // should make no difference), zero it
+    auto row = mat.asHSlice<uint32_t>(4);
+    row.zero();
+
+    // Check that every element of the row is zero
+    for (auto i = 0; i < size / 4; i++) {
+        ASSERT_EQ(row[i], 0);
+    }
+
+    // Check that row 4 of the byte tile is zero, and that all remaining
+    // rows are unaffected
+    for (auto i = 0; i < size; i++) {
+        for (auto j = 0; j < size; j++) {
+            if (i == 4) {
+                ASSERT_EQ(byte_tile[i][j], 0);
+            } else {
+                ASSERT_EQ(byte_tile[i][j], 0xAA);
+            }
+        }
+    }
+}
+
+TEST(Matrix, ZeroVSlice)
+{
+    constexpr size_t size = 16;
+    MatStore<size, size> mat;
+    auto byte_tile = mat.asTile<uint8_t>(0);
+
+    // Initializing the whole tile with non-zero value
+    for (auto i = 0; i < size; i++) {
+        for (auto j = 0; j < size; j++) {
+            byte_tile[i][j] = 0xAA;
+        }
+    }
+
+    // Now we get a column directly from the matrix, zero it
+    auto col = mat.asVSlice<uint8_t>(4);
+    col.zero();
+
+    // Check that every element of the column is zero
+    for (auto i = 0; i < size; i++) {
+        ASSERT_EQ(col[i], 0);
+    }
+
+    // Check that col 4 of the byte tile is zero, and that all remaining
+    // rows are unaffected
+    for (auto i = 0; i < size; i++) {
+        for (auto j = 0; j < size; j++) {
+            if (j == 4) {
+                ASSERT_EQ(byte_tile[i][j], 0);
+            } else {
+                ASSERT_EQ(byte_tile[i][j], 0xAA);
+            }
+        }
+    }
+
+    // Now we repeat with a wider element type too. Reinitializing the
+    // whole tile with non-zero value
+    for (auto i = 0; i < size; i++) {
+        for (auto j = 0; j < size; j++) {
+            byte_tile[i][j] = 0xAA;
+        }
+    }
+
+    // Now we get a word-wide column directly from the matrix, zero it
+    auto wide_col = mat.asVSlice<uint32_t>(1);
+    wide_col.zero();
+
+    // Check that every element of the column is zero
+    for (auto i = 0; i < size; i++) {
+        ASSERT_EQ(wide_col[i], 0);
+    }
+
+    // Check that cols 4-7 of the byte tile are zero, and that all
+    // remaining rows are unaffected
+    for (auto i = 0; i < size; i++) {
+        for (auto j = 0; j < size; j++) {
+            if (j >= 4 && j <= 7) {
+                ASSERT_EQ(byte_tile[i][j], 0);
+            } else {
+                ASSERT_EQ(byte_tile[i][j], 0xAA);
+            }
+        }
+    }
+}
+
+class TwoDifferentMatRegs : public testing::Test
+{
+  protected:
+    static constexpr size_t size = 4;
+
+    MatStore<size, size> mat1;
+    MatStore<size, size> mat2;
+
+    void
+    SetUp() override
+    {
+        auto tile1 = mat1.asTile<uint8_t>(0);
+        auto tile2 = mat2.asTile<uint8_t>(0);
+
+        // Initializing with non-zero value for matrix 1
+        for (auto i = 0; i < size; i++) {
+            for (auto j = 0; j < size; j++) {
+                tile1[i][j] = 0xAA;
+            }
+        }
+
+        // Initializing with zero value for matrix 2
+        for (auto i = 0; i < size; i++) {
+            for (auto j = 0; j < size; j++) {
+                tile2[i][j] = 0x0;
+            }
+        }
+    }
+};
+
+// Testing operator=
+TEST_F(TwoDifferentMatRegs, Assignment)
+{
+    // Copying the matrix
+    mat2 = mat1;
+
+    auto tile2 = mat2.asTile<uint8_t>(0);
+
+    // Checking if matrix 2 elements are 0xAA
+    for (auto i = 0; i < size; i++) {
+        for (auto j = 0; j < size; j++) {
+            ASSERT_EQ(tile2[i][j], 0xAA);
+        }
+    }
+}
+
+// Testing operator==
+TEST_F(TwoDifferentMatRegs, Equality)
+{
+    // Equality check
+    ASSERT_TRUE(mat1 == mat1);
+    ASSERT_TRUE(mat2 == mat2);
+    ASSERT_FALSE(mat1 == mat2);
+}
+
+// Testing operator!=
+TEST_F(TwoDifferentMatRegs, Inequality)
+{
+    // Inequality check
+    ASSERT_FALSE(mat1 != mat1);
+    ASSERT_FALSE(mat2 != mat2);
+    ASSERT_TRUE(mat1 != mat2);
+}
+
+// Testing operator<<
+TEST_F(TwoDifferentMatRegs, Printing)
+{
+    {
+        std::ostringstream stream;
+        stream << mat1;
+        ASSERT_EQ(stream.str(), "[aaaaaaaa_aaaaaaaa_aaaaaaaa_aaaaaaaa]");
+    }
+
+    {
+        std::ostringstream stream;
+        stream << mat2;
+        ASSERT_EQ(stream.str(), "[00000000_00000000_00000000_00000000]");
+    }
+}
+
+// Testing ParseParam
+TEST_F(TwoDifferentMatRegs, ParseParam)
+{
+    ParseParam<decltype(mat1)> parser;
+
+    parser.parse("bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb", mat1);
+    parser.parse("cccccccccccccccccccccccccccccccc", mat2);
+
+    for (auto i = 0; i < size; i++) {
+        for (auto j = 0; j < size; j++) {
+            ASSERT_EQ(mat1.asTile<uint8_t>(0)[i][j], 0xbb);
+            ASSERT_EQ(mat2.asTile<uint8_t>(0)[i][j], 0xcc);
+        }
+    }
+}
+
+// Testing ParseParam Underflow
+TEST_F(TwoDifferentMatRegs, ParseParamUnderflow)
+{
+    ParseParam<decltype(mat1)> parser;
+
+    // We should trigger a fatal() here.
+    EXPECT_ANY_THROW(parser.parse("b", mat1));
+}
+
+// Testing ParseParam Overflow
+TEST_F(TwoDifferentMatRegs, ParseParamOverflow)
+{
+    ParseParam<decltype(mat1)> parser;
+
+    // We should trigger a fatal() here.
+    EXPECT_ANY_THROW(parser.parse("bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb", mat1));
+}
+
+// Testing ShowParam
+TEST_F(TwoDifferentMatRegs, ShowParam)
+{
+    ShowParam<decltype(mat1)> parser;
+
+    {
+        std::stringstream ss;
+        parser.show(ss, mat1);
+        ASSERT_EQ(ss.str(), "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa");
+    }
+
+    {
+        std::stringstream ss;
+        parser.show(ss, mat2);
+        ASSERT_EQ(ss.str(), "00000000000000000000000000000000");
+    }
+}
diff --git a/src/arch/arm/regs/mat.hh b/src/arch/arm/regs/mat.hh
new file mode 100644
index 0000000000..196e876f92
--- /dev/null
+++ b/src/arch/arm/regs/mat.hh
@@ -0,0 +1,136 @@
+/*
+ * Copyright (c) 2022 Arm Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __ARCH_ARM_REGS_MAT_HH__
+#define __ARCH_ARM_REGS_MAT_HH__
+
+#include "arch/arm/types.hh"
+#include "arch/arm/matrix.hh"
+#include "cpu/reg_class.hh"
+#include "debug/MatRegs.hh"
+
+namespace gem5
+{
+
+namespace ArmISA
+{
+
+/*
+ * We do the same as is done for vector registers when creating the
+ * matricies. One of the things to note is that this allocates the
+ * largest architecturally possible matrix - this is a bit inefficient
+ * from a memory point of view, but at this point we do not know which
+ * vector length will be chosen (and this can potentially vary during
+ * runtime).
+ */
+using MatRegContainer = gem5::MatStore<MaxSmeVecLenInBytes,
+                                       MaxSmeVecLenInBytes>;
+
+template<typename ElemType>
+using MatTile = gem5::Tile<ElemType,
+                           MatRegContainer>;
+
+template<typename ElemType>
+using MatTileRow = gem5::HorizontalSlice<ElemType,
+                                         MatRegContainer,
+                                         true>;
+
+template<typename ElemType>
+using MatTileCol = gem5::VerticalSlice<ElemType,
+                                       MatRegContainer,
+                                       true>;
+
+template<typename ElemType>
+using MatRow = gem5::HorizontalSlice<ElemType,
+                                     MatRegContainer,
+                                     false>;
+
+template<typename ElemType>
+using MatCol = gem5::VerticalSlice<ElemType,
+                                   MatRegContainer,
+                                   false>;
+
+// SME ZA tile, i.e. matrix
+const int NumMatrixRegs = 1;
+
+static inline TypedRegClassOps<ArmISA::MatRegContainer> matRegClassOps;
+
+inline constexpr RegClass matRegClass =
+    RegClass(MatRegClass, MatRegClassName, NumMatrixRegs, debug::MatRegs).
+        ops(matRegClassOps).
+        regType<MatRegContainer>();
+
+/*
+ * Helpers for providing access to the different views of a matrix
+ * register. Intended to be called from the instruction implementations
+ * themselves.
+ */
+template<typename ElemType>
+MatTile<ElemType>
+getTile(MatRegContainer &reg, uint8_t tile_idx)
+{
+    return reg.asTile<ElemType>(tile_idx);
+}
+template<typename ElemType>
+MatTileRow<ElemType>
+getTileHSlice(MatRegContainer &reg, uint8_t tile_idx, uint8_t row_idx)
+{
+    return reg.asTile<ElemType>(tile_idx).asHSlice(row_idx);
+}
+template<typename ElemType>
+MatTileCol<ElemType>
+getTileVSlice(MatRegContainer &reg, uint8_t tile_idx, uint8_t col_idx)
+{
+    return reg.asTile<ElemType>(tile_idx).asVSlice(col_idx);
+}
+template<typename ElemType>
+MatRow<ElemType>
+getHSlice(MatRegContainer &reg, uint8_t row_idx)
+{
+    return reg.asHSlice<ElemType>(row_idx);
+}
+template<typename ElemType>
+MatCol<ElemType>
+getVSlice(MatRegContainer &reg, uint8_t col_idx)
+{
+    return reg.asVSlice<ElemType>(col_idx);
+}
+
+} // namespace ArmISA
+} // namespace gem5
+
+#endif
diff --git a/src/arch/arm/types.hh b/src/arch/arm/types.hh
index 44b20476d2..50db9bc9af 100644
--- a/src/arch/arm/types.hh
+++ b/src/arch/arm/types.hh
@@ -472,6 +472,18 @@ namespace ArmISA
     constexpr unsigned VecRegSizeBytes = MaxSveVecLenInBytes;
     constexpr unsigned VecPredRegSizeBits = MaxSveVecLenInBytes;
 
+    constexpr unsigned MaxSmeVecLenInBits = 2048;
+    static_assert(MaxSmeVecLenInBits >= 128 &&
+                  MaxSmeVecLenInBits <= 2048 &&
+                  // Only powers of two are supported. We don't need to
+                  // check for the zero case here as we already know it
+                  // is over 128.
+                  (MaxSmeVecLenInBits & (MaxSmeVecLenInBits - 1)) == 0,
+                  "Unsupported max. SME vector length");
+    constexpr unsigned MaxSmeVecLenInBytes  = MaxSmeVecLenInBits >> 3;
+    constexpr unsigned MaxSmeVecLenInWords  = MaxSmeVecLenInBits >> 5;
+    constexpr unsigned MaxSmeVecLenInDWords = MaxSmeVecLenInBits >> 6;
+
 } // namespace ArmISA
 } // namespace gem5
 

From dfd151d52d10c85cc0503fa60d3169b3d32456c1 Mon Sep 17 00:00:00 2001
From: Sascha Bischoff <sascha.bischoff@arm.com>
Date: Wed, 3 Aug 2022 13:40:02 +0100
Subject: [PATCH 146/492] arch-arm: Add system registers added/used by SME

We add the following registers which are added by SME:

* ID_AA64SMFR0_EL1
* SVCR
* SMIDR_EL1
* SMPRI_EL1
* SMPRIMAP_EL2
* SMCR_EL3
* SMCR_EL2
* SMCR_EL12
* SMCR_EL1
* TPIDR2_EL0
* MPAMSM_EL1

In addition we extend some of the existing registers with SME support
(SCR_EL3, CPACR_EL1, CPTR_EL2, CPTR_EL3, etc). These regisers are
responsible for enabling SME itself, or for configuring the trapping
behaviour for the differernt ELs.

In addition we implement some dummy registers as they are officially
required by SME, but gem5 itself doesn't actually support the features
yet (FGT, HCX).

Jira Issue: https://gem5.atlassian.net/browse/GEM5-1289

Change-Id: I18ba65fb9ac2b7a4b4f361998564fb5d472d1789
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/64335
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Giacomo Travaglini <giacomo.travaglini@arm.com>
Maintainer: Giacomo Travaglini <giacomo.travaglini@arm.com>
---
 src/arch/arm/ArmISA.py          | 13 +++--
 src/arch/arm/ArmSystem.py       | 26 +++++++++
 src/arch/arm/isa.cc             | 97 ++++++++++++++++++++++++++++++++-
 src/arch/arm/isa.hh             |  3 +
 src/arch/arm/regs/misc.cc       | 52 +++++++++++++++++-
 src/arch/arm/regs/misc.hh       | 35 ++++++++++++
 src/arch/arm/regs/misc_types.hh | 35 +++++++++++-
 src/arch/arm/system.cc          |  1 +
 src/arch/arm/system.hh          |  8 ++-
 9 files changed, 261 insertions(+), 9 deletions(-)

diff --git a/src/arch/arm/ArmISA.py b/src/arch/arm/ArmISA.py
index 4f336e04cd..e73046d08b 100644
--- a/src/arch/arm/ArmISA.py
+++ b/src/arch/arm/ArmISA.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2012-2013, 2015-2021 ARM Limited
+# Copyright (c) 2012-2013, 2015-2022 ARM Limited
 # All rights reserved.
 #
 # The license below extends only to copyright in the software and shall
@@ -38,7 +38,7 @@ from m5.proxy import *
 
 from m5.SimObject import SimObject
 from m5.objects.ArmPMU import ArmPMU
-from m5.objects.ArmSystem import SveVectorLength, ArmRelease
+from m5.objects.ArmSystem import SveVectorLength, SmeVectorLength, ArmRelease
 from m5.objects.BaseISA import BaseISA
 
 # Enum for DecoderFlavor
@@ -58,6 +58,8 @@ class ArmDefaultSERelease(ArmRelease):
         "FEAT_FCMA",
         "FEAT_JSCVT",
         "FEAT_PAuth",
+        # Armv9.2
+        "FEAT_SME",
         # Other
         "TME",
     ]
@@ -160,11 +162,14 @@ class ArmISA(BaseISA):
         "Any access to a MISCREG_IMPDEF_UNIMPL register is executed as NOP",
     )
 
-    # This is required because in SE mode a generic System SimObject is
-    # allocated, instead of an ArmSystem
+    # These are required because in SE mode a generic System SimObject
+    # is allocated, instead of an ArmSystem
     sve_vl_se = Param.SveVectorLength(
         1, "SVE vector length in quadwords (128-bit), SE-mode only"
     )
+    sme_vl_se = Param.SmeVectorLength(
+        1, "SME vector length in quadwords (128-bit), SE-mode only"
+    )
 
     # Recurse into subnodes to generate DTB entries. This is mainly needed to
     # generate the PMU entry.
diff --git a/src/arch/arm/ArmSystem.py b/src/arch/arm/ArmSystem.py
index 936c032780..25cf8b2172 100644
--- a/src/arch/arm/ArmSystem.py
+++ b/src/arch/arm/ArmSystem.py
@@ -49,6 +49,21 @@ class SveVectorLength(UInt8):
     max = 16
 
 
+class SmeVectorLength(UInt8):
+    min = 1
+    max = 16
+
+    def _check(self):
+        super()._check()
+
+        # SME needs to be a whole power of 2. We already know value is
+        # not zero. Hence:
+        if self.value & (self.value - 1) != 0:
+            raise TypeError(
+                "SME vector length is not a power of 2: %d" % self.value
+            )
+
+
 class ArmExtension(ScopedEnum):
     vals = [
         # Armv8.1
@@ -69,6 +84,8 @@ class ArmExtension(ScopedEnum):
         "FEAT_PAuth",
         # Armv8.4
         "FEAT_SEL2",
+        # Armv9.2
+        "FEAT_SME",  # Optional in Armv9.2
         # Others
         "SECURITY",
         "LPAE",
@@ -145,6 +162,8 @@ class ArmDefaultRelease(Armv8):
         "FEAT_PAuth",
         # Armv8.4
         "FEAT_SEL2",
+        # Armv9.2
+        "FEAT_SME",
     ]
 
 
@@ -176,6 +195,10 @@ class Armv84(Armv83):
     extensions = Armv83.extensions + ["FEAT_SEL2"]
 
 
+class Armv92(Armv84):
+    extensions = Armv84.extensions + ["FEAT_SME"]
+
+
 class ArmSystem(System):
     type = "ArmSystem"
     cxx_header = "arch/arm/system.hh"
@@ -205,6 +228,9 @@ class ArmSystem(System):
     sve_vl = Param.SveVectorLength(
         1, "SVE vector length in quadwords (128-bit)"
     )
+    sme_vl = Param.SveVectorLength(
+        1, "SME vector length in quadwords (128-bit)"
+    )
     semihosting = Param.ArmSemihosting(
         NULL,
         "Enable support for the Arm semihosting by settings this parameter",
diff --git a/src/arch/arm/isa.cc b/src/arch/arm/isa.cc
index c6bb2bd8d1..78a1f4fc9f 100644
--- a/src/arch/arm/isa.cc
+++ b/src/arch/arm/isa.cc
@@ -110,6 +110,7 @@ ISA::ISA(const Params &p) : BaseISA(p), system(NULL),
         haveLargeAsid64 = system->haveLargeAsid64();
         physAddrRange = system->physAddrRange();
         sveVL = system->sveVL();
+        smeVL = system->smeVL();
 
         release = system->releaseFS();
     } else {
@@ -117,6 +118,7 @@ ISA::ISA(const Params &p) : BaseISA(p), system(NULL),
         haveLargeAsid64 = false;
         physAddrRange = 32;  // dummy value
         sveVL = p.sve_vl_se;
+        smeVL = p.sme_vl_se;
 
         release = p.release_se;
     }
@@ -406,6 +408,49 @@ ISA::initID64(const ArmISAParams &p)
         miscRegs[MISCREG_ZCR_EL1] = sveVL - 1;
     }
 
+    // SME
+
+    // Set up the SME SMIDR
+    // [63:32] RES0
+    // [31:24] Implementer - default this to Arm Limited
+    // [23:16] SMCU Revision - set to 0 as we don't model an SMCU
+    // [15]    SMPS - We don't do priorities in gem5, so disable
+    // [14:12] RES0
+    // [11:0]  Affinity - we implement per-CPU SME, so set to 0 (no SMCU)
+    miscRegs[MISCREG_SMIDR_EL1] = 0 | // Affinity
+        0 << 15 |                     // SMPS
+        0x41 << 24;                   // Implementer
+
+    miscRegs[MISCREG_ID_AA64SMFR0_EL1] = 0;
+    miscRegs[MISCREG_ID_AA64SMFR0_EL1] |= 0x1UL << 32; // F32F32
+    // The following BF16F32 is actually not implemented due to a lack
+    // of BF16 support in gem5's fplib. However, as per the SME spec the
+    // _only_ allowed value is 0x1.
+    miscRegs[MISCREG_ID_AA64SMFR0_EL1] |= 0x1UL << 34; // BF16F32
+    miscRegs[MISCREG_ID_AA64SMFR0_EL1] |= 0x1UL << 35; // F16F32
+    miscRegs[MISCREG_ID_AA64SMFR0_EL1] |= 0xFUL << 36; // I8I32
+    miscRegs[MISCREG_ID_AA64SMFR0_EL1] |= 0x1UL << 48; // F64F64
+    miscRegs[MISCREG_ID_AA64SMFR0_EL1] |= 0xFUL << 52; // I16I64
+    miscRegs[MISCREG_ID_AA64SMFR0_EL1] |= 0x0UL << 56; // SMEver
+    miscRegs[MISCREG_ID_AA64SMFR0_EL1] |= 0x1UL << 32; // FA64
+
+    // We want to support FEAT_SME_FA64. Therefore, we enable it in all
+    // SMCR_ELx registers by default. Runtime software might change this
+    // later, but given that gem5 doesn't disable instructions based on
+    // this flag we default to the most representative value.
+    miscRegs[MISCREG_SMCR_EL3] = 0x1 << 31;
+    miscRegs[MISCREG_SMCR_EL2] = 0x1 << 31;
+    miscRegs[MISCREG_SMCR_EL1] = 0x1 << 31;
+
+    // Set the vector default vector length
+    if (release->has(ArmExtension::SECURITY)) {
+        miscRegs[MISCREG_SMCR_EL3] |= ((smeVL - 1) & 0xF);
+    } else if (release->has(ArmExtension::VIRTUALIZATION)) {
+        miscRegs[MISCREG_SMCR_EL2] |= ((smeVL - 1) & 0xF);
+    } else {
+        miscRegs[MISCREG_SMCR_EL1] |= ((smeVL - 1) & 0xF);
+    }
+
     // Enforce consistency with system-level settings...
 
     // EL3
@@ -420,6 +465,10 @@ ISA::initID64(const ArmISAParams &p)
     miscRegs[MISCREG_ID_AA64PFR0_EL1] = insertBits(
         miscRegs[MISCREG_ID_AA64PFR0_EL1], 35, 32,
         release->has(ArmExtension::FEAT_SVE) ? 0x1 : 0x0);
+    // SME
+    miscRegs[MISCREG_ID_AA64PFR1_EL1] = insertBits(
+        miscRegs[MISCREG_ID_AA64PFR1_EL1], 27, 24,
+        release->has(ArmExtension::FEAT_SME) ? 0x1 : 0x0);
     // SecEL2
     miscRegs[MISCREG_ID_AA64PFR0_EL1] = insertBits(
         miscRegs[MISCREG_ID_AA64PFR0_EL1], 39, 36,
@@ -962,6 +1011,10 @@ ISA::readMiscReg(RegIndex idx)
         {
             return miscRegs[MISCREG_CPSR] & 0x800000;
         }
+      case MISCREG_SVCR:
+        {
+            return miscRegs[MISCREG_SVCR];
+        }
       case MISCREG_L2CTLR:
         {
             // mostly unimplemented, just set NumCPUs field from sim and return
@@ -1037,7 +1090,9 @@ ISA::readMiscReg(RegIndex idx)
                     0x0000001000000000 : 0) | // SecEL2
                (gicv3CpuInterface     ? 0x0000000001000000 : 0);
       case MISCREG_ID_AA64PFR1_EL1:
-        return 0; // bits [63:0] RES0 (reserved for future use)
+        return 0x0 |
+               (release->has(ArmExtension::FEAT_SME) ?
+                    0x1 << 24 : 0); // SME
 
       // Generic Timer registers
       case MISCREG_CNTFRQ ... MISCREG_CNTVOFF:
@@ -1188,6 +1243,9 @@ ISA::setMiscReg(RegIndex idx, RegVal val)
                 if (release->has(ArmExtension::FEAT_SVE)) {
                     cpacrMask.zen = ones;
                 }
+                if (release->has(ArmExtension::FEAT_SME)) {
+                    cpacrMask.smen = ones;
+                }
                 newVal &= cpacrMask;
                 DPRINTF(MiscRegs, "Writing misc reg %s: %#x\n",
                         miscRegName[idx], newVal);
@@ -1205,14 +1263,21 @@ ISA::setMiscReg(RegIndex idx, RegVal val)
                     cptrMask.tz = ones;
                     cptrMask.zen = hcr.e2h ? ones : 0;
                 }
+                if (release->has(ArmExtension::FEAT_SME)) {
+                    cptrMask.tsm = ones;
+                    cptrMask.smen = hcr.e2h ? ones : 0;
+                }
                 cptrMask.fpen = hcr.e2h ? ones : 0;
                 newVal &= cptrMask;
                 cptrMask = 0;
-                cptrMask.res1_13_12_el2 = ones;
+                cptrMask.res1_13_el2 = ones;
                 cptrMask.res1_7_0_el2 = ones;
                 if (!release->has(ArmExtension::FEAT_SVE)) {
                     cptrMask.res1_8_el2 = ones;
                 }
+                if (!release->has(ArmExtension::FEAT_SME)) {
+                    cptrMask.res1_12_el2 = ones;
+                }
                 cptrMask.res1_9_el2 = ones;
                 newVal |= cptrMask;
                 DPRINTF(MiscRegs, "Writing misc reg %s: %#x\n",
@@ -1229,6 +1294,9 @@ ISA::setMiscReg(RegIndex idx, RegVal val)
                 if (release->has(ArmExtension::FEAT_SVE)) {
                     cptrMask.ez = ones;
                 }
+                if (release->has(ArmExtension::FEAT_SME)) {
+                    cptrMask.esm = ones;
+                }
                 newVal &= cptrMask;
                 DPRINTF(MiscRegs, "Writing misc reg %s: %#x\n",
                         miscRegName[idx], newVal);
@@ -1917,6 +1985,21 @@ ISA::setMiscReg(RegIndex idx, RegVal val)
                 idx = MISCREG_CPSR;
             }
             break;
+          case MISCREG_SVCR:
+            {
+                SVCR svcr = miscRegs[MISCREG_SVCR];
+                SVCR newSvcr = newVal;
+
+                // Don't allow other bits to be set
+                svcr.sm = newSvcr.sm;
+                svcr.za = newSvcr.za;
+                newVal = svcr;
+            }
+            break;
+          case MISCREG_SMPRI_EL1:
+            // Only the bottom 4 bits are settable
+            newVal = newVal & 0xF;
+            break;
           case MISCREG_AT_S1E1R_Xt:
             addressTranslation64(MMU::S1E1Tran, BaseMMU::Read, 0, val);
             return;
@@ -1982,6 +2065,16 @@ ISA::setMiscReg(RegIndex idx, RegVal val)
             tc->getDecoderPtr()->as<Decoder>().setSveLen(
                     (getCurSveVecLenInBits() >> 7) - 1);
             return;
+          case MISCREG_SMCR_EL3:
+          case MISCREG_SMCR_EL2:
+          case MISCREG_SMCR_EL1:
+            // Set the value here as we need to update the regs before
+            // reading them back in getCurSmeVecLenInBits (not
+            // implemented yet) to avoid setting stale vector lengths in
+            // the decoder.
+            setMiscRegNoEffect(idx, newVal);
+            // TODO: set the SME vector length
+            return;
         }
         setMiscRegNoEffect(idx, newVal);
     }
diff --git a/src/arch/arm/isa.hh b/src/arch/arm/isa.hh
index 6f9478298d..bc0ab7683e 100644
--- a/src/arch/arm/isa.hh
+++ b/src/arch/arm/isa.hh
@@ -96,6 +96,9 @@ namespace ArmISA
         /** SVE vector length in quadwords */
         unsigned sveVL;
 
+        /** SME vector length in quadwords */
+        unsigned smeVL;
+
         /** This could be either a FS or a SE release */
         const ArmRelease *release;
 
diff --git a/src/arch/arm/regs/misc.cc b/src/arch/arm/regs/misc.cc
index 142b25f8c4..382b63e8bd 100644
--- a/src/arch/arm/regs/misc.cc
+++ b/src/arch/arm/regs/misc.cc
@@ -912,7 +912,7 @@ std::unordered_map<MiscRegNum64, MiscRegIndex> miscRegNumToIdx{
     { MiscRegNum64(3, 0, 0, 4, 2), MISCREG_RAZ },
     { MiscRegNum64(3, 0, 0, 4, 3), MISCREG_RAZ },
     { MiscRegNum64(3, 0, 0, 4, 4), MISCREG_ID_AA64ZFR0_EL1 },
-    { MiscRegNum64(3, 0, 0, 4, 5), MISCREG_RAZ },
+    { MiscRegNum64(3, 0, 0, 4, 5), MISCREG_ID_AA64SMFR0_EL1 },
     { MiscRegNum64(3, 0, 0, 4, 6), MISCREG_RAZ },
     { MiscRegNum64(3, 0, 0, 4, 7), MISCREG_RAZ },
     { MiscRegNum64(3, 0, 0, 5, 0), MISCREG_ID_AA64DFR0_EL1 },
@@ -943,6 +943,8 @@ std::unordered_map<MiscRegNum64, MiscRegIndex> miscRegNumToIdx{
     { MiscRegNum64(3, 0, 1, 0, 1), MISCREG_ACTLR_EL1 },
     { MiscRegNum64(3, 0, 1, 0, 2), MISCREG_CPACR_EL1 },
     { MiscRegNum64(3, 0, 1, 2, 0), MISCREG_ZCR_EL1 },
+    { MiscRegNum64(3, 0, 1, 2, 4), MISCREG_SMPRI_EL1 },
+    { MiscRegNum64(3, 0, 1, 2, 6), MISCREG_SMCR_EL1 },
     { MiscRegNum64(3, 0, 2, 0, 0), MISCREG_TTBR0_EL1 },
     { MiscRegNum64(3, 0, 2, 0, 1), MISCREG_TTBR1_EL1 },
     { MiscRegNum64(3, 0, 2, 0, 2), MISCREG_TCR_EL1 },
@@ -981,6 +983,7 @@ std::unordered_map<MiscRegNum64, MiscRegIndex> miscRegNumToIdx{
     { MiscRegNum64(3, 0, 9, 14, 2), MISCREG_PMINTENCLR_EL1 },
     { MiscRegNum64(3, 0, 10, 2, 0), MISCREG_MAIR_EL1 },
     { MiscRegNum64(3, 0, 10, 3, 0), MISCREG_AMAIR_EL1 },
+    { MiscRegNum64(3, 0, 10, 5, 3), MISCREG_MPAMSM_EL1 },
     { MiscRegNum64(3, 0, 12, 0, 0), MISCREG_VBAR_EL1 },
     { MiscRegNum64(3, 0, 12, 0, 1), MISCREG_RVBAR_EL1 },
     { MiscRegNum64(3, 0, 12, 1, 0), MISCREG_ISR_EL1 },
@@ -1024,6 +1027,7 @@ std::unordered_map<MiscRegNum64, MiscRegIndex> miscRegNumToIdx{
     { MiscRegNum64(3, 0, 15, 1, 4), MISCREG_DL1DATA4_EL1 },
     { MiscRegNum64(3, 1, 0, 0, 0), MISCREG_CCSIDR_EL1 },
     { MiscRegNum64(3, 1, 0, 0, 1), MISCREG_CLIDR_EL1 },
+    { MiscRegNum64(3, 1, 0, 0, 6), MISCREG_SMIDR_EL1 },
     { MiscRegNum64(3, 1, 0, 0, 7), MISCREG_AIDR_EL1 },
     { MiscRegNum64(3, 1, 11, 0, 2), MISCREG_L2CTLR_EL1 },
     { MiscRegNum64(3, 1, 11, 0, 3), MISCREG_L2ECTLR_EL1 },
@@ -1038,6 +1042,7 @@ std::unordered_map<MiscRegNum64, MiscRegIndex> miscRegNumToIdx{
     { MiscRegNum64(3, 3, 0, 0, 7), MISCREG_DCZID_EL0 },
     { MiscRegNum64(3, 3, 4, 2, 0), MISCREG_NZCV },
     { MiscRegNum64(3, 3, 4, 2, 1), MISCREG_DAIF },
+    { MiscRegNum64(3, 3, 4, 2, 2), MISCREG_SVCR },
     { MiscRegNum64(3, 3, 4, 4, 0), MISCREG_FPCR },
     { MiscRegNum64(3, 3, 4, 4, 1), MISCREG_FPSR },
     { MiscRegNum64(3, 3, 4, 5, 0), MISCREG_DSPSR_EL0 },
@@ -1057,6 +1062,7 @@ std::unordered_map<MiscRegNum64, MiscRegIndex> miscRegNumToIdx{
     { MiscRegNum64(3, 3, 9, 14, 3), MISCREG_PMOVSSET_EL0 },
     { MiscRegNum64(3, 3, 13, 0, 2), MISCREG_TPIDR_EL0 },
     { MiscRegNum64(3, 3, 13, 0, 3), MISCREG_TPIDRRO_EL0 },
+    { MiscRegNum64(3, 3, 13, 0, 5), MISCREG_TPIDR2_EL0 },
     { MiscRegNum64(3, 3, 14, 0, 0), MISCREG_CNTFRQ_EL0 },
     { MiscRegNum64(3, 3, 14, 0, 1), MISCREG_CNTPCT_EL0 },
     { MiscRegNum64(3, 3, 14, 0, 2), MISCREG_CNTVCT_EL0 },
@@ -1087,8 +1093,13 @@ std::unordered_map<MiscRegNum64, MiscRegIndex> miscRegNumToIdx{
     { MiscRegNum64(3, 4, 1, 1, 1), MISCREG_MDCR_EL2 },
     { MiscRegNum64(3, 4, 1, 1, 2), MISCREG_CPTR_EL2 },
     { MiscRegNum64(3, 4, 1, 1, 3), MISCREG_HSTR_EL2 },
+    { MiscRegNum64(3, 4, 1, 1, 4), MISCREG_HFGRTR_EL2 },
+    { MiscRegNum64(3, 4, 1, 1, 5), MISCREG_HFGWTR_EL2 },
     { MiscRegNum64(3, 4, 1, 1, 7), MISCREG_HACR_EL2 },
     { MiscRegNum64(3, 4, 1, 2, 0), MISCREG_ZCR_EL2 },
+    { MiscRegNum64(3, 4, 1, 2, 2), MISCREG_HCRX_EL2 },
+    { MiscRegNum64(3, 4, 1, 2, 5), MISCREG_SMPRIMAP_EL2 },
+    { MiscRegNum64(3, 4, 1, 2, 6), MISCREG_SMCR_EL2 },
     { MiscRegNum64(3, 4, 2, 0, 0), MISCREG_TTBR0_EL2 },
     { MiscRegNum64(3, 4, 2, 0, 1), MISCREG_TTBR1_EL2 },
     { MiscRegNum64(3, 4, 2, 0, 2), MISCREG_TCR_EL2 },
@@ -1167,6 +1178,7 @@ std::unordered_map<MiscRegNum64, MiscRegIndex> miscRegNumToIdx{
     { MiscRegNum64(3, 5, 1, 0, 0), MISCREG_SCTLR_EL12 },
     { MiscRegNum64(3, 5, 1, 0, 2), MISCREG_CPACR_EL12 },
     { MiscRegNum64(3, 5, 1, 2, 0), MISCREG_ZCR_EL12 },
+    { MiscRegNum64(3, 5, 1, 2, 6), MISCREG_SMCR_EL12 },
     { MiscRegNum64(3, 5, 2, 0, 0), MISCREG_TTBR0_EL12 },
     { MiscRegNum64(3, 5, 2, 0, 1), MISCREG_TTBR1_EL12 },
     { MiscRegNum64(3, 5, 2, 0, 2), MISCREG_TCR_EL12 },
@@ -1193,6 +1205,7 @@ std::unordered_map<MiscRegNum64, MiscRegIndex> miscRegNumToIdx{
     { MiscRegNum64(3, 6, 1, 1, 1), MISCREG_SDER32_EL3 },
     { MiscRegNum64(3, 6, 1, 1, 2), MISCREG_CPTR_EL3 },
     { MiscRegNum64(3, 6, 1, 2, 0), MISCREG_ZCR_EL3 },
+    { MiscRegNum64(3, 6, 1, 2, 6), MISCREG_SMCR_EL3 },
     { MiscRegNum64(3, 6, 1, 3, 1), MISCREG_MDCR_EL3 },
     { MiscRegNum64(3, 6, 2, 0, 0), MISCREG_TTBR0_EL3 },
     { MiscRegNum64(3, 6, 2, 0, 2), MISCREG_TCR_EL3 },
@@ -4932,6 +4945,30 @@ ISA::initializeMiscRegMetadata()
         .fault(EL3, faultZcrEL3)
         .allPrivileges().exceptUserMode();
 
+    // SME
+    InitReg(MISCREG_ID_AA64SMFR0_EL1)
+        .allPrivileges().exceptUserMode().writes(0);
+    InitReg(MISCREG_SVCR)
+        .allPrivileges();
+    InitReg(MISCREG_SMIDR_EL1)
+        .allPrivileges().exceptUserMode().writes(0);
+    InitReg(MISCREG_SMPRI_EL1)
+        .allPrivileges().exceptUserMode().reads(1);
+    InitReg(MISCREG_SMPRIMAP_EL2)
+        .hyp().mon();
+    InitReg(MISCREG_SMCR_EL3)
+        .mon();
+    InitReg(MISCREG_SMCR_EL2)
+        .hyp().mon();
+    InitReg(MISCREG_SMCR_EL12)
+        .allPrivileges().exceptUserMode();
+    InitReg(MISCREG_SMCR_EL1)
+        .allPrivileges().exceptUserMode();
+    InitReg(MISCREG_TPIDR2_EL0)
+        .allPrivileges();
+    InitReg(MISCREG_MPAMSM_EL1)
+        .allPrivileges().exceptUserMode();
+
     // Dummy registers
     InitReg(MISCREG_NOP)
       .allPrivileges();
@@ -4979,6 +5016,19 @@ ISA::initializeMiscRegMetadata()
       .warnNotFail()
       .fault(faultUnimplemented);
 
+    // HCX extension (unimplemented)
+    InitReg(MISCREG_HCRX_EL2)
+      .unimplemented()
+      .warnNotFail();
+
+    // FGT extension (unimplemented)
+    InitReg(MISCREG_HFGRTR_EL2)
+      .unimplemented()
+      .warnNotFail();
+    InitReg(MISCREG_HFGWTR_EL2)
+      .unimplemented()
+      .warnNotFail();
+
     // Register mappings for some unimplemented registers:
     // ESR_EL1 -> DFSR
     // RMR_EL1 -> RMR
diff --git a/src/arch/arm/regs/misc.hh b/src/arch/arm/regs/misc.hh
index 0b61eec2d6..999993b0de 100644
--- a/src/arch/arm/regs/misc.hh
+++ b/src/arch/arm/regs/misc.hh
@@ -1062,6 +1062,19 @@ namespace ArmISA
         MISCREG_ZCR_EL12,
         MISCREG_ZCR_EL1,
 
+        // SME
+        MISCREG_ID_AA64SMFR0_EL1,
+        MISCREG_SVCR,
+        MISCREG_SMIDR_EL1,
+        MISCREG_SMPRI_EL1,
+        MISCREG_SMPRIMAP_EL2,
+        MISCREG_SMCR_EL3,
+        MISCREG_SMCR_EL2,
+        MISCREG_SMCR_EL12,
+        MISCREG_SMCR_EL1,
+        MISCREG_TPIDR2_EL0,
+        MISCREG_MPAMSM_EL1,
+
         // NUM_PHYS_MISCREGS specifies the number of actual physical
         // registers, not considering the following pseudo-registers
         // (dummy registers), like MISCREG_UNKNOWN, MISCREG_IMPDEF_UNIMPL.
@@ -1092,6 +1105,13 @@ namespace ArmISA
         MISCREG_VSESR_EL2,
         MISCREG_VDISR_EL2,
 
+        // HCX extension (unimplemented)
+        MISCREG_HCRX_EL2,
+
+        // FGT extension (unimplemented)
+        MISCREG_HFGRTR_EL2,
+        MISCREG_HFGWTR_EL2,
+
         // PSTATE
         MISCREG_PAN,
         MISCREG_UAO,
@@ -2684,6 +2704,18 @@ namespace ArmISA
         "zcr_el12",
         "zcr_el1",
 
+        "id_aa64smfr0_el1",
+        "svcr",
+        "smidr_el1",
+        "smpri_el1",
+        "smprimap_el2",
+        "smcr_el3",
+        "smcr_el2",
+        "smcr_el12",
+        "smcr_el1",
+        "tpidr2_el0",
+        "mpamsm_el1",
+
         "num_phys_regs",
 
         // Dummy registers
@@ -2702,6 +2734,9 @@ namespace ArmISA
         "disr_el1",
         "vsesr_el2",
         "vdisr_el2",
+        "hcrx_el2",
+        "hfgrtr_el2",
+        "hfgwtr_el2",
 
         // PSTATE
         "pan",
diff --git a/src/arch/arm/regs/misc_types.hh b/src/arch/arm/regs/misc_types.hh
index 05bf19bf5a..0fe5a00ae8 100644
--- a/src/arch/arm/regs/misc_types.hh
+++ b/src/arch/arm/regs/misc_types.hh
@@ -416,6 +416,7 @@ namespace ArmISA
         Bitfield<21, 20> cp10;
         Bitfield<21, 20> fpen;  // AArch64
         Bitfield<23, 22> cp11;
+        Bitfield<25, 24> smen; // SME
         Bitfield<25, 24> cp12;
         Bitfield<27, 26> cp13;
         Bitfield<29, 28> rsvd;
@@ -734,10 +735,14 @@ namespace ArmISA
         Bitfield<31> tcpac;
         Bitfield<30> tam;
         Bitfield<28> tta_e2h;
+        Bitfield<25, 24> smen;
         Bitfield<21, 20> fpen;
         Bitfield<20> tta;
         Bitfield<17, 16> zen;
-        Bitfield<13, 12> res1_13_12_el2;
+        Bitfield<13, 13> res1_13_el2;
+        Bitfield<12, 12> res1_12_el2;
+        Bitfield<12> esm;  // SME (CPTR_EL3)
+        Bitfield<12> tsm;  // SME (CPTR_EL2)
         Bitfield<10> tfp;
         Bitfield<9> res1_9_el2;
         Bitfield<8> res1_8_el2;
@@ -750,6 +755,34 @@ namespace ArmISA
         Bitfield<3, 0> len;
     EndBitUnion(ZCR)
 
+    BitUnion64(SMCR)
+        Bitfield<63, 32> res0_63_32;
+        Bitfield<31, 31> fa64;
+        Bitfield<30, 9> res0_30_9;
+        Bitfield<8, 4> razwi_8_4;
+        Bitfield<3, 0> len;
+    EndBitUnion(SMCR)
+
+    BitUnion64(SVCR)
+        Bitfield<63, 2> res0_63_2;
+        Bitfield<1, 1> za;
+        Bitfield<0, 0> sm;
+    EndBitUnion(SVCR)
+
+    BitUnion64(SMIDR)
+        Bitfield<63, 32> res0_63_32;
+        Bitfield<31, 24> implementer;
+        Bitfield<23, 16> revision;
+        Bitfield<15, 15> smps;
+        Bitfield<14, 12> res0_14_12;
+        Bitfield<11, 0>  affinity;
+    EndBitUnion(SMIDR)
+
+    BitUnion64(SMPRI)
+        Bitfield<63, 4> res0_63_4;
+        Bitfield<3, 0> priority;
+    EndBitUnion(SMPRI)
+
    BitUnion32(OSL)
         Bitfield<64, 4> res0;
         Bitfield<3> oslm_3;
diff --git a/src/arch/arm/system.cc b/src/arch/arm/system.cc
index 747695f16e..d6e28e70d9 100644
--- a/src/arch/arm/system.cc
+++ b/src/arch/arm/system.cc
@@ -78,6 +78,7 @@ ArmSystem::ArmSystem(const Params &p)
       _physAddrRange64(p.phys_addr_range_64),
       _haveLargeAsid64(p.have_large_asid_64),
       _sveVL(p.sve_vl),
+      _smeVL(p.sme_vl),
       semihosting(p.semihosting),
       release(p.release),
       multiProc(p.multi_proc)
diff --git a/src/arch/arm/system.hh b/src/arch/arm/system.hh
index a712615b6e..7b0e6be322 100644
--- a/src/arch/arm/system.hh
+++ b/src/arch/arm/system.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2010, 2012-2013, 2015-2021 ARM Limited
+ * Copyright (c) 2010, 2012-2013, 2015-2022 ARM Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -128,6 +128,9 @@ class ArmSystem : public System
     /** SVE vector length at reset, in quadwords */
     const unsigned _sveVL;
 
+    /** SME vector length at reset, in quadwords */
+    const unsigned _smeVL;
+
     /**
      * True if the Semihosting interface is enabled.
      */
@@ -205,6 +208,9 @@ class ArmSystem : public System
     /** Returns the SVE vector length at reset, in quadwords */
     unsigned sveVL() const { return _sveVL; }
 
+    /** Returns the SME vector length at reset, in quadwords */
+    unsigned smeVL() const { return _smeVL; }
+
     /** Returns the supported physical address range in bits if the highest
      * implemented exception level is 64 bits (ARMv8) */
     uint8_t physAddrRange64() const { return _physAddrRange64; }

From 72e4f614a2ecf81fa4053d973805276da451f539 Mon Sep 17 00:00:00 2001
From: Sascha Bischoff <sascha.bischoff@arm.com>
Date: Wed, 3 Aug 2022 14:54:04 +0100
Subject: [PATCH 147/492] arch-arm: Add interfaces to set and get SME vector
 length

We add interfaces which roughly mirror those already present for
manipulating the SVE vector lengths to set/get the SME vector length.

In the case of the SME vector length we also need to do some checking
to ensure that the vector length itself is aligned to a whole power of
two (one of the SME requirements).

Jira Issue: https://gem5.atlassian.net/browse/GEM5-1289

Change-Id: Ib89a4804466f5445adea6de8d65df512e366d618
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/64336
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Reviewed-by: Giacomo Travaglini <giacomo.travaglini@arm.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/arch/arm/decoder.cc           |  4 ++
 src/arch/arm/decoder.hh           | 12 ++++++
 src/arch/arm/insts/static_inst.cc |  8 ++++
 src/arch/arm/insts/static_inst.hh | 15 +++++++
 src/arch/arm/isa.cc               | 67 +++++++++++++++++++++++++++++--
 src/arch/arm/isa.hh               |  4 ++
 6 files changed, 106 insertions(+), 4 deletions(-)

diff --git a/src/arch/arm/decoder.cc b/src/arch/arm/decoder.cc
index c315ecfefb..9fc4be0e9a 100644
--- a/src/arch/arm/decoder.cc
+++ b/src/arch/arm/decoder.cc
@@ -67,6 +67,10 @@ Decoder::Decoder(const ArmDecoderParams &params)
     sveLen = (safe_cast<ISA *>(params.isa)->
             getCurSveVecLenInBitsAtReset() >> 7) - 1;
 
+    // Initialize SME vector length
+    smeLen = (safe_cast<ISA *>(params.isa)
+            ->getCurSmeVecLenInBitsAtReset() >> 7) - 1;
+
     if (dvmEnabled) {
         warn_once(
             "DVM Ops instructions are micro-architecturally "
diff --git a/src/arch/arm/decoder.hh b/src/arch/arm/decoder.hh
index 8e486a3458..83690936c0 100644
--- a/src/arch/arm/decoder.hh
+++ b/src/arch/arm/decoder.hh
@@ -85,6 +85,12 @@ class Decoder : public InstDecoder
      */
     int sveLen;
 
+    /**
+     * SME vector length, encoded in the same format as the SMCR_EL<x>.LEN
+     * bitfields.
+     */
+    int smeLen;
+
     enums::DecoderFlavor decoderFlavor;
 
     /// A cache of decoded instruction objects.
@@ -158,6 +164,12 @@ class Decoder : public InstDecoder
     {
         sveLen = len;
     }
+
+    void
+    setSmeLen(uint8_t len)
+    {
+        smeLen = len;
+    }
 };
 
 } // namespace ArmISA
diff --git a/src/arch/arm/insts/static_inst.cc b/src/arch/arm/insts/static_inst.cc
index c07fb3922a..446f2afd18 100644
--- a/src/arch/arm/insts/static_inst.cc
+++ b/src/arch/arm/insts/static_inst.cc
@@ -1233,5 +1233,13 @@ ArmStaticInst::getCurSveVecLenInBits(ThreadContext *tc)
     return isa->getCurSveVecLenInBits();
 }
 
+unsigned
+ArmStaticInst::getCurSmeVecLenInBits(ThreadContext *tc)
+{
+    auto *isa = static_cast<ArmISA::ISA *>(tc->getIsaPtr());
+    return isa->getCurSmeVecLenInBits();
+}
+
+
 } // namespace ArmISA
 } // namespace gem5
diff --git a/src/arch/arm/insts/static_inst.hh b/src/arch/arm/insts/static_inst.hh
index fa58f98de9..3b67e6b253 100644
--- a/src/arch/arm/insts/static_inst.hh
+++ b/src/arch/arm/insts/static_inst.hh
@@ -583,6 +583,21 @@ class ArmStaticInst : public StaticInst
         return getCurSveVecLenInBits(tc) / (8 * sizeof(T));
     }
 
+    static unsigned getCurSmeVecLenInBits(ThreadContext *tc);
+
+    static unsigned
+    getCurSmeVecLenInQWords(ThreadContext *tc)
+    {
+        return getCurSmeVecLenInBits(tc) >> 6;
+    }
+
+    template<typename T>
+    static unsigned
+    getCurSmeVecLen(ThreadContext *tc)
+    {
+        return getCurSmeVecLenInBits(tc) / (8 * sizeof(T));
+    }
+
     inline Fault
     undefined(bool disabled=false) const
     {
diff --git a/src/arch/arm/isa.cc b/src/arch/arm/isa.cc
index 78a1f4fc9f..aec824387b 100644
--- a/src/arch/arm/isa.cc
+++ b/src/arch/arm/isa.cc
@@ -1162,6 +1162,8 @@ ISA::setMiscReg(RegIndex idx, RegVal val)
 
         tc->getDecoderPtr()->as<Decoder>().setSveLen(
                 (getCurSveVecLenInBits() >> 7) - 1);
+        tc->getDecoderPtr()->as<Decoder>().setSmeLen(
+                (getCurSmeVecLenInBits() >> 7) - 1);
 
         // Follow slightly different semantics if a CheckerCPU object
         // is connected
@@ -2069,11 +2071,11 @@ ISA::setMiscReg(RegIndex idx, RegVal val)
           case MISCREG_SMCR_EL2:
           case MISCREG_SMCR_EL1:
             // Set the value here as we need to update the regs before
-            // reading them back in getCurSmeVecLenInBits (not
-            // implemented yet) to avoid setting stale vector lengths in
-            // the decoder.
+            // reading them back in getCurSmeVecLenInBits to avoid
+            // setting stale vector lengths in the decoder.
             setMiscRegNoEffect(idx, newVal);
-            // TODO: set the SME vector length
+            tc->getDecoderPtr()->as<Decoder>().setSmeLen(
+                    (getCurSmeVecLenInBits() >> 7) - 1);
             return;
         }
         setMiscRegNoEffect(idx, newVal);
@@ -2161,6 +2163,13 @@ ISA::currEL() const
 unsigned
 ISA::getCurSveVecLenInBits() const
 {
+    SVCR svcr = miscRegs[MISCREG_SVCR];
+    // If we are in Streaming Mode, we should return the Streaming Mode vector
+    // length instead.
+    if (svcr.sm) {
+        return getCurSmeVecLenInBits();
+    }
+
     if (!FullSystem) {
         return sveVL * 128;
     }
@@ -2202,6 +2211,56 @@ ISA::getCurSveVecLenInBits() const
     return (len + 1) * 128;
 }
 
+unsigned
+ISA::getCurSmeVecLenInBits() const
+{
+    if (!FullSystem) {
+        return smeVL * 128;
+    }
+
+    panic_if(!tc,
+             "A ThreadContext is needed to determine the SME vector length "
+             "in full-system mode");
+
+    CPSR cpsr = miscRegs[MISCREG_CPSR];
+    ExceptionLevel el = (ExceptionLevel) (uint8_t) cpsr.el;
+
+    unsigned len = 0;
+
+    if (el == EL1 || (el == EL0 && !ELIsInHost(tc, el))) {
+        len = static_cast<SMCR>(miscRegs[MISCREG_SMCR_EL1]).len;
+    }
+
+    if (el == EL2 || (el == EL0 && ELIsInHost(tc, el))) {
+        len = static_cast<SMCR>(miscRegs[MISCREG_SMCR_EL2]).len;
+    } else if (release->has(ArmExtension::VIRTUALIZATION) && !isSecure(tc) &&
+               (el == EL0 || el == EL1)) {
+        len = std::min(
+            len,
+            static_cast<unsigned>(
+                static_cast<SMCR>(miscRegs[MISCREG_SMCR_EL2]).len));
+    }
+
+    if (el == EL3) {
+        len = static_cast<SMCR>(miscRegs[MISCREG_SMCR_EL3]).len;
+    } else if (release->has(ArmExtension::SECURITY)) {
+        len = std::min(
+            len,
+            static_cast<unsigned>(
+                static_cast<SMCR>(miscRegs[MISCREG_SMCR_EL3]).len));
+    }
+
+    len = std::min(len, smeVL - 1);
+
+    // len + 1 must be a power of 2! Round down to the nearest whole power of
+    // two.
+    static const unsigned LUT[16] = {0, 1, 1, 3, 3, 3, 3, 7,
+                                     7, 7, 7, 7, 7, 7, 7, 15};
+    len = LUT[len];
+
+    return (len + 1) * 128;
+}
+
 void
 ISA::serialize(CheckpointOut &cp) const
 {
diff --git a/src/arch/arm/isa.hh b/src/arch/arm/isa.hh
index bc0ab7683e..512799feed 100644
--- a/src/arch/arm/isa.hh
+++ b/src/arch/arm/isa.hh
@@ -367,6 +367,10 @@ namespace ArmISA
 
         unsigned getCurSveVecLenInBitsAtReset() const { return sveVL * 128; }
 
+        unsigned getCurSmeVecLenInBits() const;
+
+        unsigned getCurSmeVecLenInBitsAtReset() const { return smeVL * 128; }
+
         template <typename Elem>
         static void
         zeroSveVecRegUpperPart(Elem *v, unsigned eCount)

From 142d562b2f3df36401f3a5b2b68ef6026a599717 Mon Sep 17 00:00:00 2001
From: Sascha Bischoff <sascha.bischoff@arm.com>
Date: Wed, 3 Aug 2022 14:57:33 +0100
Subject: [PATCH 148/492] arch-arm: Implement SME access traps and extend the
 SVE ones

We add the SME access checks and trapping, which roughly mirrors that
used by SVE.

SME adds a new mode called streaming mode. When a core is in streaming
mode the behaviour of the SVE instructions changes such that they
check the SME traps and enables as opposed to the SVE ones. We
therefore update the existing SVE trap/access checking code to check
the SME equivalents when a core is in streaming mode. Else, the
original behaviour is preserved.

Jira Issue: https://gem5.atlassian.net/browse/GEM5-1289

Change-Id: I7eba70da9d41d2899b753fababbd6074ed732501
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/64337
Reviewed-by: Giacomo Travaglini <giacomo.travaglini@arm.com>
Maintainer: Giacomo Travaglini <giacomo.travaglini@arm.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/arch/arm/insts/static_inst.cc  | 127 +++++++++++++++++++++++++++++
 src/arch/arm/insts/static_inst.hh  |  29 +++++++
 src/arch/arm/isa/templates/sve.isa |   4 +-
 src/arch/arm/types.hh              |   1 +
 4 files changed, 160 insertions(+), 1 deletion(-)

diff --git a/src/arch/arm/insts/static_inst.cc b/src/arch/arm/insts/static_inst.cc
index 446f2afd18..54045f2fb1 100644
--- a/src/arch/arm/insts/static_inst.cc
+++ b/src/arch/arm/insts/static_inst.cc
@@ -1026,6 +1026,13 @@ ArmStaticInst::sveAccessTrap(ExceptionLevel el) const
 Fault
 ArmStaticInst::checkSveEnabled(ThreadContext *tc, CPSR cpsr, CPACR cpacr) const
 {
+    // We first check if we are in streaming mode or not. If we are in
+    // streaming mode, we actually check the SME traps, not the SVE traps!
+    SVCR svcr_sm_check = tc->readMiscReg(MISCREG_SVCR);
+    if (svcr_sm_check.sm) {
+        return checkSmeEnabled(tc, cpsr, cpacr);
+    }
+
     const ExceptionLevel el = (ExceptionLevel) (uint8_t) cpsr.el;
     // Check if access disabled in CPACR_EL1
     if (el <= EL1 && !ELIsInHost(tc, el)) {
@@ -1073,6 +1080,126 @@ ArmStaticInst::checkSveEnabled(ThreadContext *tc, CPSR cpsr, CPACR cpacr) const
     return NoFault;
 }
 
+Fault
+ArmStaticInst::smeAccessTrap(ExceptionLevel el, uint32_t iss) const
+{
+    switch (el) {
+      case EL1:
+        return std::make_shared<SupervisorTrap>(
+            machInst, iss, ExceptionClass::TRAPPED_SME);
+      case EL2:
+        return std::make_shared<HypervisorTrap>(
+            machInst, iss, ExceptionClass::TRAPPED_SME);
+      case EL3:
+        return std::make_shared<SecureMonitorTrap>(
+            machInst, iss, ExceptionClass::TRAPPED_SME);
+
+      default:
+        panic("Illegal EL in smeAccessTrap\n");
+    }
+}
+
+Fault
+ArmStaticInst::checkSmeEnabled(ThreadContext *tc, CPSR cpsr, CPACR cpacr) const
+{
+    const ExceptionLevel el = (ExceptionLevel) (uint8_t) cpsr.el;
+    // Check if access disabled in CPACR_EL1
+    if (el <= EL1 && !ELIsInHost(tc, el)) {
+        if ((el == EL0 && cpacr.smen == 0x1) ||
+            (!(cpacr.smen & 0x1)))
+            return smeAccessTrap(EL1);
+
+        if ((el == EL0 && cpacr.fpen == 0x1) ||
+            (!(cpacr.fpen & 0x1)))
+            return advSIMDFPAccessTrap64(EL1);
+    }
+
+    // Check if access disabled in CPTR_EL2
+    if (el <= EL2 && EL2Enabled(tc)) {
+        CPTR cptr_en_check = tc->readMiscReg(MISCREG_CPTR_EL2);
+        HCR hcr = tc->readMiscReg(MISCREG_HCR_EL2);
+        if (HaveExt(tc, ArmExtension::FEAT_VHE) && hcr.e2h) {
+            if (((cptr_en_check.smen & 0x1) == 0x0) ||
+                (cptr_en_check.smen == 0x1 && el == EL0 &&
+                 hcr.tge == 0x1)) {
+                return smeAccessTrap(EL2);
+            }
+            if (((cptr_en_check.fpen & 0x1) == 0x0) ||
+                (cptr_en_check.fpen == 0x1 && el == EL0 &&
+                 hcr.tge == 0x1)) {
+                return advSIMDFPAccessTrap64(EL2);
+            }
+        } else {
+            if (cptr_en_check.tsm == 1)
+                return smeAccessTrap(EL2);
+            if (cptr_en_check.tfp == 1)
+                return advSIMDFPAccessTrap64(EL2);
+        }
+    }
+
+    // Check if access disabled in CPTR_EL3
+    if (ArmSystem::haveEL(tc, EL3)) {
+        CPTR cptr_en_check = tc->readMiscReg(MISCREG_CPTR_EL3);
+        if (!cptr_en_check.esm)
+            return smeAccessTrap(EL3);
+        if (cptr_en_check.tfp)
+            return advSIMDFPAccessTrap64(EL3);
+    }
+
+    return NoFault;
+}
+
+Fault
+ArmStaticInst::checkSmeAccess(ThreadContext *tc, CPSR cpsr, CPACR cpacr) const
+{
+    const ExceptionLevel el = (ExceptionLevel) (uint8_t) cpsr.el;
+    // Check if access disabled in CPACR_EL1
+    if (el <= EL1 && !ELIsInHost(tc, el)) {
+        if ((el == EL0 && cpacr.smen == 0x1) || (!(cpacr.smen & 0x1))) {
+            return smeAccessTrap(EL1);
+        }
+    }
+
+    // Check if access disabled in CPTR_EL2
+    if (el <= EL2 && EL2Enabled(tc)) {
+        CPTR cptr_en_check = tc->readMiscReg(MISCREG_CPTR_EL2);
+        HCR hcr = tc->readMiscReg(MISCREG_HCR_EL2);
+        if (HaveExt(tc, ArmExtension::FEAT_VHE) && hcr.e2h) {
+            if (((cptr_en_check.smen & 0x1) == 0x0) ||
+                (cptr_en_check.smen == 0x1 && el == EL0 &&
+                 hcr.tge == 0x1)) {
+                return smeAccessTrap(EL2);
+            }
+        } else {
+            if (cptr_en_check.tsm == 1)
+                return smeAccessTrap(EL2);
+        }
+    }
+
+    // Check if access disabled in CPTR_EL3
+    if (ArmSystem::haveEL(tc, EL3)) {
+        CPTR cptr_en_check = tc->readMiscReg(MISCREG_CPTR_EL3);
+        if (!cptr_en_check.esm)
+            return smeAccessTrap(EL3);
+    }
+
+    return NoFault;
+}
+
+Fault
+ArmStaticInst::checkSveSmeEnabled(ThreadContext *tc, CPSR cpsr,
+                                  CPACR cpacr) const
+{
+    // If we are not in streaming mode, check the SVE traps, else check the SME
+    // traps.
+    SVCR svcr = tc->readMiscReg(MISCREG_SVCR);
+    if (!svcr.sm) {
+        return checkSveEnabled(tc, cpsr, cpacr);
+    } else {
+        return checkSmeEnabled(tc, cpsr, cpacr);
+    }
+}
+
 static uint8_t
 getRestoredITBits(ThreadContext *tc, CPSR spsr)
 {
diff --git a/src/arch/arm/insts/static_inst.hh b/src/arch/arm/insts/static_inst.hh
index 3b67e6b253..cc96dd9269 100644
--- a/src/arch/arm/insts/static_inst.hh
+++ b/src/arch/arm/insts/static_inst.hh
@@ -513,6 +513,35 @@ class ArmStaticInst : public StaticInst
      */
     Fault checkSveEnabled(ThreadContext *tc, CPSR cpsr, CPACR cpacr) const;
 
+
+    /**
+     * Trap an access to SME registers due to access control bits.
+     *
+     * @param el Target EL for the trap.
+     * @param iss ISS to be used for the trap.
+     */
+    Fault smeAccessTrap(ExceptionLevel el, uint32_t iss = 0) const;
+
+    /**
+     * Check if SME is enabled by checking the SME and FP bits of
+     * CPACR_EL1, CPTR_EL2, and CPTR_EL3
+     */
+    Fault checkSmeEnabled(ThreadContext *tc, CPSR cpsr, CPACR cpacr) const;
+
+    /**
+     * Check an SME access against CPACR_EL1, CPTR_EL2, and CPTR_EL3.
+     * This is purely used from the management instructions as it should
+     * be possible to call SMSTART/SMSTOP without having the floating
+     * point flags correctly set up.
+     */
+    Fault checkSmeAccess(ThreadContext *tc, CPSR cpsr, CPACR cpacr) const;
+
+    /**
+     * Check an SVE access against CPACR_EL1, CPTR_EL2, and CPTR_EL3, but
+     * choosing the correct set of traps to check based on Streaming Mode
+     */
+    Fault checkSveSmeEnabled(ThreadContext *tc, CPSR cpsr, CPACR cpacr) const;
+
     /**
      * Get the new PSTATE from a SPSR register in preparation for an
      * exception return.
diff --git a/src/arch/arm/isa/templates/sve.isa b/src/arch/arm/isa/templates/sve.isa
index 87316f1440..fc38a2b979 100644
--- a/src/arch/arm/isa/templates/sve.isa
+++ b/src/arch/arm/isa/templates/sve.isa
@@ -36,7 +36,9 @@
 let {{
     sveEnabledCheckCode = '''
         if (FullSystem) {
-            fault = this->checkSveEnabled(xc->tcBase(), Cpsr, Cpacr64);
+            // Check an SVE inst against the appropriate traps/enables based
+            // on the Streaming Mode.
+            fault = this->checkSveSmeEnabled(xc->tcBase(), Cpsr, Cpacr64);
             if (fault != NoFault) {
                 return fault;
             }
diff --git a/src/arch/arm/types.hh b/src/arch/arm/types.hh
index 50db9bc9af..2251d57c0b 100644
--- a/src/arch/arm/types.hh
+++ b/src/arch/arm/types.hh
@@ -323,6 +323,7 @@ namespace ArmISA
         SMC_64                  = 0x17,
         TRAPPED_MSR_MRS_64      = 0x18,
         TRAPPED_SVE             = 0x19,
+        TRAPPED_SME             = 0x1D,
         PREFETCH_ABORT_TO_HYP   = 0x20,
         PREFETCH_ABORT_LOWER_EL = 0x20,  // AArch64 alias
         PREFETCH_ABORT_FROM_HYP = 0x21,

From fe8eda9c4ee53dec780463b3506c3bba30a57da9 Mon Sep 17 00:00:00 2001
From: Sascha Bischoff <sascha.bischoff@arm.com>
Date: Wed, 3 Aug 2022 15:38:46 +0100
Subject: [PATCH 149/492] arch, arch-arm, cpu: Add matrix reg support to the
 ISA Parser

The ISA parser now emits the code required to access matrix
registers. In the case where a register is both a source and a
destination, the ISA parser generates appropriate code to make sure
that the contents of the source is copied to the destination. This is
required for the O3 CPU which treats these as two different physical
registers, and hence data is lost if not explicitly preserved.

Jira Issue: https://gem5.atlassian.net/browse/GEM5-1289

Change-Id: I8796bd1ea55b5edf5fb8ab92ef1a6060ccc58fa1
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/64338
Maintainer: Giacomo Travaglini <giacomo.travaglini@arm.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Giacomo Travaglini <giacomo.travaglini@arm.com>
---
 src/arch/arm/isa/operands.isa        |  8 ++++
 src/arch/isa_parser/isa_parser.py    |  5 ++-
 src/arch/isa_parser/operand_types.py | 65 +++++++++++++++++++++++++++-
 src/cpu/FuncUnit.py                  |  5 ++-
 src/cpu/minor/BaseMinorCPU.py        |  3 ++
 src/cpu/op_class.hh                  |  5 ++-
 6 files changed, 87 insertions(+), 4 deletions(-)

diff --git a/src/arch/arm/isa/operands.isa b/src/arch/arm/isa/operands.isa
index 2addd10def..5919ae974e 100644
--- a/src/arch/arm/isa/operands.isa
+++ b/src/arch/arm/isa/operands.isa
@@ -53,6 +53,7 @@ def operand_types {{
     'sf' : 'float',
     'df' : 'double',
     'vc' : 'ArmISA::VecRegContainer',
+    'mc' : 'ArmISA::MatRegContainer',
     # For operations that are implemented as a template
     'x' : 'TPElem',
     'xs' : 'TPSElem',
@@ -99,6 +100,10 @@ let {{
         def __init__(self, idx):
             super().__init__('pc', idx, sort_pri=srtNormal)
 
+    class MatrixReg(MatRegOp):
+        def __init__(self, idx, suffix=''):
+            super().__init__('mc', idx, 'IsMatrix', srtNormal)
+
     class IntRegNPC(IntRegOp):
         @overrideInOperand
         def regId(self):
@@ -454,6 +459,9 @@ def operands {{
     'FfrAux': VecPredReg('PREDREG_FFR'),
     'PUreg0': VecPredReg('PREDREG_UREG0'),
 
+    # SME ZA Register:
+    'ZA': MatrixReg('0'),
+
     #Abstracted control reg operands
     'MiscDest': CntrlReg('dest'),
     'MiscOp1': CntrlReg('op1'),
diff --git a/src/arch/isa_parser/isa_parser.py b/src/arch/isa_parser/isa_parser.py
index aff3c9f63c..39b50f06b6 100755
--- a/src/arch/isa_parser/isa_parser.py
+++ b/src/arch/isa_parser/isa_parser.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2014, 2016, 2018-2019 ARM Limited
+# Copyright (c) 2014, 2016, 2018-2019, 2022 ARM Limited
 # All rights reserved
 #
 # The license below extends only to copyright in the software and shall
@@ -481,6 +481,8 @@ class InstObjParams(object):
                 self.op_class = "FloatAddOp"
             elif "IsVector" in self.flags:
                 self.op_class = "SimdAddOp"
+            elif "IsMatrix" in self.flags:
+                self.op_class = "MatrixOp"
             else:
                 self.op_class = "IntAluOp"
 
@@ -564,6 +566,7 @@ class ISAParser(Grammar):
                 "VecElemOp": VecElemOperandDesc,
                 "VecRegOp": VecRegOperandDesc,
                 "VecPredRegOp": VecPredRegOperandDesc,
+                "MatRegOp": MatRegOperandDesc,
                 "ControlRegOp": ControlRegOperandDesc,
                 "MemOp": MemOperandDesc,
                 "PCStateOp": PCStateOperandDesc,
diff --git a/src/arch/isa_parser/operand_types.py b/src/arch/isa_parser/operand_types.py
index 63ca765a09..4786f88774 100755
--- a/src/arch/isa_parser/operand_types.py
+++ b/src/arch/isa_parser/operand_types.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2014, 2016, 2018-2019 ARM Limited
+# Copyright (c) 2014, 2016, 2018-2019, 2022 ARM Limited
 # All rights reserved
 #
 # The license below extends only to copyright in the software and shall
@@ -447,6 +447,69 @@ class VecPredRegOperandDesc(RegOperandDesc):
         super().__init__("vecPredRegClass", VecPredRegOperand, *args, **kwargs)
 
 
+class MatRegOperand(RegOperand):
+    reg_class = "MatRegClass"
+
+    def __init__(self, parser, full_name, ext, is_src, is_dest):
+        super().__init__(parser, full_name, ext, is_src, is_dest)
+
+    def makeDecl(self):
+        return ""
+
+    def makeReadW(self):
+        c_readw = (
+            f"\t\tauto &tmp_d{self.dest_reg_idx} = \n"
+            f"\t\t    *({self.parser.namespace}::MatRegContainer *)\n"
+            f"\t\t    xc->getWritableRegOperand(this, \n"
+            f"\t\t        {self.dest_reg_idx});\n"
+            f"\t\tauto &{self.base_name} = tmp_d{self.dest_reg_idx};\n"
+        )
+
+        return c_readw
+
+    def makeRead(self):
+        name = self.base_name
+        if self.is_dest and self.is_src:
+            name += "_merger"
+
+        c_read = (
+            f"\t\t{self.parser.namespace}::MatRegContainer "
+            f"\t\t        tmp_s{self.src_reg_idx};\n"
+            f"\t\txc->getRegOperand(this, {self.src_reg_idx},\n"
+            f"\t\t        &tmp_s{self.src_reg_idx});\n"
+            f"\t\tauto &{name} = tmp_s{self.src_reg_idx};\n"
+        )
+
+        # The following is required due to the way that the O3 CPU
+        # works. The ZA register is seen as two physical registers; one
+        # for reading from and one for writing to. We need to make sure
+        # to copy the data from the read-only copy to the writable
+        # reference (the destination). Failure to do this results in
+        # data loss for the O3 CPU. Other CPU models don't appear to
+        # require this.
+        if self.is_dest and self.is_src:
+            c_read += f"{self.base_name} = {name};"
+
+        return c_read
+
+    def makeWrite(self):
+        return f"""
+        if (traceData) {{
+            traceData->setData({self.reg_class}, &tmp_d{self.dest_reg_idx});
+        }}
+        """
+
+    def finalize(self):
+        super().finalize()
+        if self.is_dest:
+            self.op_rd = self.makeReadW() + self.op_rd
+
+
+class MatRegOperandDesc(RegOperandDesc):
+    def __init__(self, *args, **kwargs):
+        super().__init__("matRegClass", MatRegOperand, *args, **kwargs)
+
+
 class ControlRegOperand(Operand):
     reg_class = "miscRegClass"
 
diff --git a/src/cpu/FuncUnit.py b/src/cpu/FuncUnit.py
index c5137ac970..4a2733afc0 100644
--- a/src/cpu/FuncUnit.py
+++ b/src/cpu/FuncUnit.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2010, 2017-2018 ARM Limited
+# Copyright (c) 2010, 2017-2018, 2022 ARM Limited
 # All rights reserved.
 #
 # The license below extends only to copyright in the software and shall
@@ -89,6 +89,9 @@ class OpClass(Enum):
         "SimdShaSigma2",
         "SimdShaSigma3",
         "SimdPredAlu",
+        "Matrix",
+        "MatrixMov",
+        "MatrixOP",
         "MemRead",
         "MemWrite",
         "FloatMemRead",
diff --git a/src/cpu/minor/BaseMinorCPU.py b/src/cpu/minor/BaseMinorCPU.py
index bcdab1bad5..6641a39b4e 100644
--- a/src/cpu/minor/BaseMinorCPU.py
+++ b/src/cpu/minor/BaseMinorCPU.py
@@ -215,6 +215,9 @@ class MinorDefaultFloatSimdFU(MinorFU):
             "SimdSha256Hash2",
             "SimdShaSigma2",
             "SimdShaSigma3",
+            "Matrix",
+            "MatrixMov",
+            "MatrixOP",
         ]
     )
 
diff --git a/src/cpu/op_class.hh b/src/cpu/op_class.hh
index 94730f3d5d..4de018f21b 100644
--- a/src/cpu/op_class.hh
+++ b/src/cpu/op_class.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2010, 2017-2018 ARM Limited
+ * Copyright (c) 2010, 2017-2018, 2022 ARM Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -99,6 +99,9 @@ static const OpClass SimdSha256Hash2Op = enums::SimdSha256Hash2;
 static const OpClass SimdShaSigma2Op = enums::SimdShaSigma2;
 static const OpClass SimdShaSigma3Op = enums::SimdShaSigma3;
 static const OpClass SimdPredAluOp = enums::SimdPredAlu;
+static const OpClass MatrixOp = enums::Matrix;
+static const OpClass MatrixMovOp = enums::MatrixMov;
+static const OpClass MatrixOPOp = enums::MatrixOP;
 static const OpClass MemReadOp = enums::MemRead;
 static const OpClass MemWriteOp = enums::MemWrite;
 static const OpClass FloatMemReadOp = enums::FloatMemRead;

From c694d8589f1023f6e565d83b1c636da6a9088bc0 Mon Sep 17 00:00:00 2001
From: Sascha Bischoff <sascha.bischoff@arm.com>
Date: Wed, 3 Aug 2022 17:10:29 +0100
Subject: [PATCH 150/492] arch-arm, cpu: Implement instructions added by
 FEAT_SME

We add the full set of instructions added by Arm's FEAT_SME, with the
exception of BMOPA/BMOPS which are BrainFloat16-based outer product
instructions. These have been omitted due to the lack of support for
BF16 in fplib - the software FP library used for the Arm ISA
implementation.

The SMEv1 specification can be found at the following location:
https://developer.arm.com/documentation/ddi0616/latest

Jira Issue: https://gem5.atlassian.net/browse/GEM5-1289

Change-Id: I4882ab452bfc48770419860f89f1f60c7af8aceb
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/64339
Reviewed-by: Giacomo Travaglini <giacomo.travaglini@arm.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Maintainer: Giacomo Travaglini <giacomo.travaglini@arm.com>
---
 src/arch/arm/SConscript                    |   1 +
 src/arch/arm/insts/sme.cc                  | 183 +++++
 src/arch/arm/insts/sme.hh                  | 229 ++++++
 src/arch/arm/insts/sve.cc                  |  32 +
 src/arch/arm/insts/sve.hh                  |  41 +
 src/arch/arm/isa/formats/aarch64.isa       |  37 +-
 src/arch/arm/isa/formats/formats.isa       |   3 +
 src/arch/arm/isa/formats/sme.isa           | 738 ++++++++++++++++++
 src/arch/arm/isa/formats/sve_2nd_level.isa | 135 +++-
 src/arch/arm/isa/formats/sve_top_level.isa |   9 +
 src/arch/arm/isa/includes.isa              |   1 +
 src/arch/arm/isa/insts/insts.isa           |   3 +
 src/arch/arm/isa/insts/sme.isa             | 821 +++++++++++++++++++++
 src/arch/arm/isa/insts/sve.isa             |  63 ++
 src/arch/arm/isa/operands.isa              |   5 +
 src/arch/arm/isa/templates/sme.isa         | 773 +++++++++++++++++++
 src/arch/arm/isa/templates/sve.isa         |  53 ++
 src/arch/arm/isa/templates/templates.isa   |   3 +
 18 files changed, 3103 insertions(+), 27 deletions(-)
 create mode 100644 src/arch/arm/insts/sme.cc
 create mode 100644 src/arch/arm/insts/sme.hh
 create mode 100644 src/arch/arm/isa/formats/sme.isa
 create mode 100644 src/arch/arm/isa/insts/sme.isa
 create mode 100644 src/arch/arm/isa/templates/sme.isa

diff --git a/src/arch/arm/SConscript b/src/arch/arm/SConscript
index 935f082c11..ee5efebf13 100644
--- a/src/arch/arm/SConscript
+++ b/src/arch/arm/SConscript
@@ -68,6 +68,7 @@ Source('insts/misc.cc', tags='arm isa')
 Source('insts/misc64.cc', tags='arm isa')
 Source('insts/pred_inst.cc', tags='arm isa')
 Source('insts/pseudo.cc', tags='arm isa')
+Source('insts/sme.cc', tags='arm isa')
 Source('insts/static_inst.cc', tags='arm isa')
 Source('insts/sve.cc', tags='arm isa')
 Source('insts/sve_mem.cc', tags='arm isa')
diff --git a/src/arch/arm/insts/sme.cc b/src/arch/arm/insts/sme.cc
new file mode 100644
index 0000000000..305d332514
--- /dev/null
+++ b/src/arch/arm/insts/sme.cc
@@ -0,0 +1,183 @@
+/*
+ * Copyright (c) 2022 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "arch/arm/insts/sme.hh"
+
+namespace gem5
+{
+
+namespace ArmISA
+{
+
+std::string
+SmeAddOp::generateDisassembly(Addr pc,
+                              const Loader::SymbolTable *symtab) const
+{
+    std::stringstream ss;
+    printMnemonic(ss, "", false);
+    ccprintf(ss, "#%d", imm);
+    ss << ", ";
+    printVecReg(ss, op1, true);
+    ss << ", ";
+    printVecPredReg(ss, gp1);
+    ss << ", ";
+    printVecPredReg(ss, gp2);
+    return ss.str();
+}
+
+std::string
+SmeAddVlOp::generateDisassembly(Addr pc,
+                                const Loader::SymbolTable *symtab) const
+{
+    std::stringstream ss;
+    printMnemonic(ss, "", false);
+    ss << ", ";
+    printVecReg(ss, dest);
+    ss << ", ";
+    printVecReg(ss, op1);
+    ss << ", ";
+    ccprintf(ss, "#%d", imm);
+    return ss.str();
+}
+
+std::string
+SmeLd1xSt1xOp::generateDisassembly(Addr pc,
+                                   const Loader::SymbolTable *symtab) const
+{
+    std::stringstream ss;
+    printMnemonic(ss, "", false);
+    ccprintf(ss, "#%d", imm);
+    ss << ", ";
+    printIntReg(ss, op1);
+    ss << ", ";
+    printVecPredReg(ss, gp);
+    ss << ", ";
+    printIntReg(ss, op2);
+    ss << ", ";
+    printIntReg(ss, op3);
+    return ss.str();
+}
+
+std::string
+SmeLdrStrOp::generateDisassembly(Addr pc,
+                                 const Loader::SymbolTable *symtab) const
+{
+    std::stringstream ss;
+    printMnemonic(ss, "", false);
+    ccprintf(ss, "#%d", imm);
+    ss << ", ";
+    printIntReg(ss, op1, true);
+    ss << ", ";
+    printIntReg(ss, op2, true);
+    return ss.str();
+}
+
+std::string
+SmeMovExtractOp::generateDisassembly(Addr pc,
+                                     const Loader::SymbolTable *symtab) const
+{
+    std::stringstream ss;
+    printMnemonic(ss, "", false);
+    printVecReg(ss, op1, true);
+    ss << ", ";
+    ccprintf(ss, "#%d", imm);
+    ss << ", ";
+    printVecPredReg(ss, gp);
+    ss << ", ";
+    printIntReg(ss, op2);
+    return ss.str();
+}
+
+std::string
+SmeMovInsertOp::generateDisassembly(Addr pc,
+                                    const Loader::SymbolTable *symtab) const
+{
+    std::stringstream ss;
+    printMnemonic(ss, "", false);
+    ccprintf(ss, "#%d", imm);
+    ss << ", ";
+    printVecReg(ss, op1, true);
+    ss << ", ";
+    printVecPredReg(ss, gp);
+    ss << ", ";
+    printIntReg(ss, op2);
+    return ss.str();
+}
+
+std::string
+SmeOPOp::generateDisassembly(Addr pc,
+                             const Loader::SymbolTable *symtab) const
+{
+    std::stringstream ss;
+    printMnemonic(ss, "", false);
+    ccprintf(ss, "#%d", imm);
+    ss << ", ";
+    printVecPredReg(ss, gp1);
+    ss << ", ";
+    printVecPredReg(ss, gp2);
+    ss << ", ";
+    printVecReg(ss, op1, true);
+    ss << ", ";
+    printVecReg(ss, op2, true);
+    return ss.str();
+}
+
+std::string
+SmeRdsvlOp::generateDisassembly(Addr pc,
+                                const Loader::SymbolTable *symtab) const
+{
+    std::stringstream ss;
+    printMnemonic(ss, "", false);
+    ss << ", ";
+    printVecReg(ss, dest);
+    ss << ", ";
+    ccprintf(ss, "#%d", imm);
+    return ss.str();
+}
+
+std::string
+SmeZeroOp::generateDisassembly(Addr pc,
+                               const Loader::SymbolTable *symtab) const
+{
+    std::stringstream ss;
+    ArmStaticInst::printMnemonic(ss, "", false);
+    ccprintf(ss, "#%d", imm);
+    return ss.str();
+}
+
+} // namespace ArmISA
+} // namespace gem5
diff --git a/src/arch/arm/insts/sme.hh b/src/arch/arm/insts/sme.hh
new file mode 100644
index 0000000000..d6cbdde5a7
--- /dev/null
+++ b/src/arch/arm/insts/sme.hh
@@ -0,0 +1,229 @@
+/*
+ * Copyright (c) 2022 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __ARCH_ARM_INSTS_SME_HH__
+#define __ARCH_ARM_INSTS_SME_HH__
+
+#include "arch/arm/insts/static_inst.hh"
+
+namespace gem5
+{
+
+namespace ArmISA
+{
+
+// Used for SME ADDHA/ADDVA
+class SmeAddOp : public ArmStaticInst
+{
+  protected:
+    uint64_t imm;
+    RegIndex op1;
+    RegIndex gp1;
+    RegIndex gp2;
+
+    SmeAddOp(const char *mnem, ExtMachInst _machInst,
+             OpClass __opClass, uint64_t _imm, RegIndex _op1,
+             RegIndex _gp1, RegIndex _gp2) :
+        ArmStaticInst(mnem, _machInst, __opClass),
+        imm(_imm), op1(_op1), gp1(_gp1), gp2(_gp2)
+    {}
+
+    std::string generateDisassembly(
+            Addr pc, const Loader::SymbolTable *symtab) const override;
+};
+
+// Used for the SME ADDSPL/ADDSVL instructions
+class SmeAddVlOp : public ArmStaticInst
+{
+  protected:
+    RegIndex dest;
+    RegIndex op1;
+    int8_t imm;
+
+    SmeAddVlOp(const char *mnem, ExtMachInst _machInst,
+               OpClass __opClass, RegIndex _dest, RegIndex _op1,
+               int8_t _imm) :
+        ArmStaticInst(mnem, _machInst, __opClass),
+        dest(_dest), op1(_op1), imm(_imm)
+    {}
+
+    std::string generateDisassembly(
+            Addr pc, const Loader::SymbolTable *symtab) const override;
+};
+
+// Used for SME LD1x/ST1x instrucions
+class SmeLd1xSt1xOp : public ArmStaticInst
+{
+  protected:
+    uint64_t imm;
+    RegIndex op1;
+    RegIndex gp;
+    RegIndex op2;
+    RegIndex op3;
+    bool V;
+
+    SmeLd1xSt1xOp(const char *mnem, ExtMachInst _machInst,
+                    OpClass __opClass, uint64_t _imm, RegIndex _op1,
+                    RegIndex _gp, RegIndex _op2,
+                    RegIndex _op3, bool _V) :
+        ArmStaticInst(mnem, _machInst, __opClass),
+        imm(_imm), op1(_op1), gp(_gp), op2(_op2), op3(_op3), V(_V)
+    {}
+
+    std::string generateDisassembly(
+            Addr pc, const Loader::SymbolTable *symtab) const override;
+};
+
+// Used for SME LDR/STR instructions
+class SmeLdrStrOp : public ArmStaticInst
+{
+  protected:
+    uint64_t imm;
+    RegIndex op1;
+    RegIndex op2;
+
+    SmeLdrStrOp(const char *mnem, ExtMachInst _machInst,
+                OpClass __opClass, uint64_t _imm, RegIndex _op1,
+                RegIndex _op2) :
+        ArmStaticInst(mnem, _machInst, __opClass),
+        imm(_imm), op1(_op1), op2(_op2)
+    {}
+
+    std::string generateDisassembly(
+            Addr pc, const Loader::SymbolTable *symtab) const override;
+};
+
+// Used for SME MOVA (Tile to Vector)
+class SmeMovExtractOp : public ArmStaticInst
+{
+  protected:
+    RegIndex op1;
+    uint8_t imm;
+    RegIndex gp;
+    RegIndex op2;
+    bool v;
+
+    SmeMovExtractOp(const char *mnem, ExtMachInst _machInst,
+                    OpClass __opClass, RegIndex _op1, uint8_t _imm,
+                    RegIndex _gp, RegIndex _op2, bool _v) :
+        ArmStaticInst(mnem, _machInst, __opClass),
+        op1(_op1), imm(_imm), gp(_gp), op2(_op2), v(_v)
+    {}
+
+    std::string generateDisassembly(
+            Addr pc, const Loader::SymbolTable *symtab) const override;
+};
+
+// Used for SME MOVA (Vector to Tile)
+class SmeMovInsertOp : public ArmStaticInst
+{
+  protected:
+    uint8_t imm;
+    RegIndex op1;
+    RegIndex gp;
+    RegIndex op2;
+    bool v;
+
+    SmeMovInsertOp(const char *mnem, ExtMachInst _machInst,
+                    OpClass __opClass, uint8_t _imm, RegIndex _op1,
+                    RegIndex _gp, RegIndex _op2, bool _v) :
+        ArmStaticInst(mnem, _machInst, __opClass),
+        imm(_imm), op1(_op1), gp(_gp), op2(_op2), v(_v)
+    {}
+
+    std::string generateDisassembly(
+            Addr pc, const Loader::SymbolTable *symtab) const override;
+};
+
+// Used for SME output product instructions
+class SmeOPOp : public ArmStaticInst
+{
+  protected:
+    uint64_t imm;
+    RegIndex op1;
+    RegIndex gp1;
+    RegIndex gp2;
+    RegIndex op2;
+
+    SmeOPOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
+            uint64_t _imm, RegIndex _op1, RegIndex _gp1,
+            RegIndex _gp2, RegIndex _op2) :
+        ArmStaticInst(mnem, _machInst, __opClass),
+        imm(_imm), op1(_op1), gp1(_gp1), gp2(_gp2), op2(_op2)
+    {}
+
+    std::string generateDisassembly(
+            Addr pc, const Loader::SymbolTable *symtab) const override;
+};
+
+// Used for the SME RDSVL instruction
+class SmeRdsvlOp : public ArmStaticInst
+{
+  protected:
+    RegIndex dest;
+    int8_t imm;
+
+    SmeRdsvlOp(const char *mnem, ExtMachInst _machInst,
+               OpClass __opClass, RegIndex _dest, int8_t _imm) :
+        ArmStaticInst(mnem, _machInst, __opClass),
+        dest(_dest), imm(_imm)
+    {}
+
+    std::string generateDisassembly(
+            Addr pc, const Loader::SymbolTable *symtab) const override;
+};
+
+// Used for SME ZERO
+class SmeZeroOp : public ArmStaticInst
+{
+  protected:
+    uint8_t imm;
+
+    SmeZeroOp(const char *mnem, ExtMachInst _machInst,
+                OpClass __opClass, uint8_t _imm) :
+        ArmStaticInst(mnem, _machInst, __opClass),
+        imm(_imm)
+    {}
+
+    std::string generateDisassembly(
+            Addr pc, const Loader::SymbolTable *symtab) const override;
+};
+
+} // namespace ArmISA
+} // namespace gem5
+
+#endif  // __ARCH_ARM_INSTS_SME_HH__
diff --git a/src/arch/arm/insts/sve.cc b/src/arch/arm/insts/sve.cc
index 9a525b195d..9d9c2bcb1c 100644
--- a/src/arch/arm/insts/sve.cc
+++ b/src/arch/arm/insts/sve.cc
@@ -161,6 +161,24 @@ SveWhileOp::generateDisassembly(
     return ss.str();
 }
 
+std::string
+SvePselOp::generateDisassembly(Addr pc,
+                                const Loader::SymbolTable *symtab) const
+{
+    std::stringstream ss;
+    printMnemonic(ss, "", false);
+    printVecPredReg(ss, dest);
+    ss << ", ";
+    printVecPredReg(ss, op1);
+    ss << ", ";
+    printVecPredReg(ss, gp);
+    ss << ", ";
+    printIntReg(ss, op2);
+    ss << ", ";
+    ccprintf(ss, "#%d", imm);
+    return ss.str();
+}
+
 std::string
 SveCompTermOp::generateDisassembly(
         Addr pc, const loader::SymbolTable *symtab) const
@@ -831,6 +849,20 @@ SveComplexIdxOp::generateDisassembly(
     return ss.str();
 }
 
+std::string
+SveClampOp::generateDisassembly(
+        Addr pc, const Loader::SymbolTable *symtab) const
+{
+    std::stringstream ss;
+    printMnemonic(ss, "", false);
+    printVecReg(ss, dest, true);
+    ss << ", ";
+    printVecReg(ss, op1, true);
+    ss << ", ";
+    printVecReg(ss, op2, true);
+    return ss.str();
+}
+
 std::string
 sveDisasmPredCountImm(uint8_t imm)
 {
diff --git a/src/arch/arm/insts/sve.hh b/src/arch/arm/insts/sve.hh
index f9939e1f22..63a59d493a 100644
--- a/src/arch/arm/insts/sve.hh
+++ b/src/arch/arm/insts/sve.hh
@@ -180,6 +180,28 @@ class SveWhileOp : public ArmStaticInst
             Addr pc, const loader::SymbolTable *symtab) const override;
 };
 
+/// Psel predicate selection SVE instruction.
+class SvePselOp : public ArmStaticInst
+{
+  protected:
+    RegIndex dest;
+    RegIndex op1;
+    RegIndex gp;
+    RegIndex op2;
+    uint64_t imm;
+
+    SvePselOp(const char *mnem, ExtMachInst _machInst,
+              OpClass __opClass, RegIndex _dest,
+              RegIndex _op1, RegIndex _gp,
+              RegIndex _op2, uint64_t _imm) :
+        ArmStaticInst(mnem, _machInst, __opClass),
+        dest(_dest), op1(_op1), gp(_gp), op2(_op2), imm(_imm)
+    {}
+
+    std::string generateDisassembly(
+            Addr pc, const Loader::SymbolTable *symtab) const override;
+};
+
 /// Compare and terminate loop SVE instruction.
 class SveCompTermOp : public ArmStaticInst
 {
@@ -951,6 +973,25 @@ class SveComplexIdxOp : public ArmStaticInst
             Addr pc, const loader::SymbolTable *symtab) const override;
 };
 
+// SVE2 SCLAMP/UCLAMP instructions
+class SveClampOp : public ArmStaticInst
+{
+  protected:
+    RegIndex dest;
+    RegIndex op1;
+    RegIndex op2;
+
+    SveClampOp(const char *mnem, ExtMachInst _machInst,
+                  OpClass __opClass, RegIndex _dest,
+                  RegIndex _op1, RegIndex _op2) :
+        ArmStaticInst(mnem, _machInst, __opClass),
+        dest(_dest), op1(_op1), op2(_op2)
+    {}
+
+    std::string generateDisassembly(
+            Addr pc, const Loader::SymbolTable *symtab) const override;
+};
+
 
 /// Returns the symbolic name associated with pattern `imm` for PTRUE(S)
 /// instructions.
diff --git a/src/arch/arm/isa/formats/aarch64.isa b/src/arch/arm/isa/formats/aarch64.isa
index 37eb995bfd..2fd28f8209 100644
--- a/src/arch/arm/isa/formats/aarch64.isa
+++ b/src/arch/arm/isa/formats/aarch64.isa
@@ -436,6 +436,9 @@ namespace Aarch64
                             // SP
                             return new MsrImm64(
                                 machInst, MISCREG_SPSEL, crm);
+                          case 0x1b:
+                            // SVE SVCR - SMSTART/SMSTOP
+                            return decodeSmeMgmt(machInst);
                           case 0x1e:
                             // DAIFSet
                             return new MsrImmDAIFSet64(
@@ -3073,20 +3076,30 @@ def format Aarch64() {{
         using namespace Aarch64;
         if (bits(machInst, 27) == 0x0) {
             if (bits(machInst, 28) == 0x0) {
-                if (bits(machInst, 26, 25) != 0x2) {
-                    return new Unknown64(machInst);
-                }
-                if (bits(machInst, 31) == 0x0) {
-                    switch (bits(machInst, 30, 29)) {
-                      case 0x0:
-                      case 0x1:
-                      case 0x2:
-                        return decodeSveInt(machInst);
-                      case 0x3:
-                        return decodeSveFp(machInst);
+                if (bits(machInst, 26) == 0x1) {
+                    if (bits(machInst, 31) == 0x0) {
+                        if (bits(machInst, 25) == 0x1) {
+                            return new Unknown64(machInst);
+                        }
+                        switch (bits(machInst, 30, 29)) {
+                            case 0x0:
+                            case 0x1:
+                            case 0x2:
+                            return decodeSveInt(machInst);
+                            case 0x3:
+                            return decodeSveFp(machInst);
+                        }
+                    } else {
+                            return decodeSveMem(machInst);
                     }
                 } else {
-                    return decodeSveMem(machInst);
+                    if ((bits(machInst, 25) == 0x0) && \
+                        (bits(machInst, 31) == 0x1)) {
+                        // bit 31:25=1xx0000
+                        return decodeSmeInst(machInst);
+                    } else {
+                        return new Unknown64(machInst);
+                    }
                 }
             } else if (bits(machInst, 26) == 0)
                 // bit 28:26=100
diff --git a/src/arch/arm/isa/formats/formats.isa b/src/arch/arm/isa/formats/formats.isa
index 5ef65966af..0a1f8f8ce2 100644
--- a/src/arch/arm/isa/formats/formats.isa
+++ b/src/arch/arm/isa/formats/formats.isa
@@ -52,6 +52,9 @@
 ##include "sve_top_level.isa"
 ##include "sve_2nd_level.isa"
 
+//Include support for decoding SME instructions (AArch64-only)
+##include "sme.isa"
+
 //Include support for predicated instructions
 ##include "pred.isa"
 
diff --git a/src/arch/arm/isa/formats/sme.isa b/src/arch/arm/isa/formats/sme.isa
new file mode 100644
index 0000000000..ac75d151ce
--- /dev/null
+++ b/src/arch/arm/isa/formats/sme.isa
@@ -0,0 +1,738 @@
+// Copyright (c) 2022 ARM Limited
+// All rights reserved
+//
+// The license below extends only to copyright in the software and shall
+// not be construed as granting a license to any other intellectual
+// property including but not limited to intellectual property relating
+// to a hardware implementation of the functionality of the software
+// licensed hereunder.  You may use the software subject to the license
+// terms below provided that you ensure that this notice is replicated
+// unmodified and in its entirety in all distributions of the software,
+// modified or unmodified, in source code or in binary form.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met: redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer;
+// redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution;
+// neither the name of the copyright holders nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+output header {{
+namespace Aarch64
+{
+    StaticInstPtr decodeSmeMgmt(ExtMachInst);
+    StaticInstPtr decodeSmeInst(ExtMachInst);
+
+        StaticInstPtr decodeSmeOp32(ExtMachInst);
+            StaticInstPtr decodeSmeOpFp32(ExtMachInst);
+            StaticInstPtr decodeSmeOpBf16(ExtMachInst);
+            StaticInstPtr decodeSmeOpFp16(ExtMachInst);
+            StaticInstPtr decodeSmeOpInt8(ExtMachInst);
+
+        StaticInstPtr decodeSmeOp64(ExtMachInst);
+            StaticInstPtr decodeSmeOpFp64(ExtMachInst);
+            StaticInstPtr decodeSmeOpInt16(ExtMachInst);
+
+        StaticInstPtr decodeSmeMovaInsert(ExtMachInst);
+        StaticInstPtr decodeSmeMovaExtract(ExtMachInst);
+
+        StaticInstPtr decodeSmeMisc(ExtMachInst);
+            StaticInstPtr decodeSmeZero(ExtMachInst);
+
+        StaticInstPtr decodeSmeAddArray(ExtMachInst);
+            StaticInstPtr decodeSmeAddhv(ExtMachInst);
+
+        StaticInstPtr decodeSmeMemory(ExtMachInst);
+            StaticInstPtr decodeSmeLoad(ExtMachInst);
+            StaticInstPtr decodeSmeStore(ExtMachInst);
+            StaticInstPtr decodeSmeLoadStoreArray(ExtMachInst);
+            StaticInstPtr decodeSmeLoadQuadWord(ExtMachInst);
+            StaticInstPtr decodeSmeStoreQuadWord(ExtMachInst);
+}
+}};
+
+output decoder {{
+namespace Aarch64
+{
+    // NOTE: This is called from a different decode tree (aarch64.isa).
+    // For neatness and clarity we keep the code here order to keep all
+    // SME things together.
+    StaticInstPtr
+    decodeSmeMgmt(ExtMachInst machInst)
+    {
+        const uint8_t imm = (uint8_t)bits(machInst, 10, 8);
+
+        if (bits(machInst, 8)) {
+            return new SmeSmstart(machInst, imm);
+        } else {
+            return new SmeSmstop(machInst, imm);
+        }
+    }
+
+    StaticInstPtr
+    decodeSmeInst(ExtMachInst machInst)
+    {
+        // Starting point for decoding: bits 31:25=1xx0000
+
+        const uint8_t op0 = (uint8_t)bits(machInst, 30, 29);
+        const uint8_t op1 = (uint8_t)bits(machInst, 24, 19);
+        const uint8_t op2 = (uint8_t)bits(machInst, 17);
+        const uint8_t op3 = (uint8_t)bits(machInst, 4, 2);
+
+        if ((op0 & 0b10) == 0b00) {
+            if ((op1 & 0b011000) == 0b010000) {
+                if ((op3 & 0b001) == 0b000) {
+                    return decodeSmeOp32(machInst);
+                }
+            }
+
+            if ((op1 & 0b011000) == 0b011000) {
+                if ((op3 & 0b010) == 0b000) {
+                    return decodeSmeOp64(machInst);
+                }
+            }
+        }
+
+        if (op0 == 0b10) {
+            if ((op1 & 0b100111) == 0b000000) {
+                if (op2 == 0b0) {
+                    if ((op3 & 0b100) == 0b000) {
+                        return decodeSmeMovaInsert(machInst);
+                    }
+                }
+
+                if (op2 ==0b1) {
+                    return decodeSmeMovaExtract(machInst);
+                }
+            }
+
+            if ((op1 & 0b100111) == 0b000001) {
+                return decodeSmeMisc(machInst);
+            }
+
+            if ((op1 & 0b100111) == 0b000010) {
+                if ((op3 & 0b010) == 0b000) {
+                    return decodeSmeAddArray(machInst);
+                }
+            }
+        }
+
+        if (op0 == 0b11) {
+            return decodeSmeMemory(machInst);
+        }
+
+        // We should not get here
+        return new Unknown64(machInst);
+    }
+
+    StaticInstPtr
+    decodeSmeOp32(ExtMachInst machInst)
+    {
+        const uint8_t op0 = (uint8_t)bits(machInst, 29);
+        const uint8_t op1 = (uint8_t)bits(machInst, 24);
+        const uint8_t op2 = (uint8_t)bits(machInst, 21);
+        const uint8_t op3 = (uint8_t)bits(machInst, 3);
+
+        if (op0 == 0) {
+            if (op1 == 0) {
+                if (op2 == 0) {
+                    if (op3 == 0) {
+                        return decodeSmeOpFp32(machInst);
+                    }
+                }
+            }
+
+            if (op1 == 1) {
+                if (op2 == 0) {
+                    if (op3 == 0) {
+                        return decodeSmeOpBf16(machInst);
+                    }
+                }
+
+                if (op2 == 1) {
+                    if (op3 == 0) {
+                        return decodeSmeOpFp16(machInst);
+                    }
+                }
+            }
+        }
+
+        if (op0 == 1) {
+            if (op3 == 0) {
+                return decodeSmeOpInt8(machInst);
+            }
+        }
+
+        return new Unknown64(machInst);
+    }
+
+    StaticInstPtr
+    decodeSmeOpFp32(ExtMachInst machInst)
+    {
+        const uint32_t S = (uint32_t)bits(machInst, 4, 4);
+
+        const RegIndex Zm = (RegIndex)(uint32_t)(bits(machInst, 20, 16));
+        const RegIndex Zn = (RegIndex)(uint32_t)(bits(machInst, 9, 5));
+        const RegIndex Pn = (RegIndex)(uint32_t)(bits(machInst, 12, 10));
+        const RegIndex Pm = (RegIndex)(uint32_t)(bits(machInst, 15, 13));
+        const RegIndex ZAda = (RegIndex)(uint32_t)(bits(machInst, 1, 0));
+
+        if (S == 0) {
+            return new SmeFmopa<uint32_t, uint32_t>(machInst, ZAda, Zn,
+                                                    Pn, Pm, Zm);
+        } else {
+            return new SmeFmops<uint32_t, uint32_t>(machInst, ZAda, Zn,
+                                                    Pn, Pm, Zm);
+        }
+        return new Unknown64(machInst);
+    }
+
+    StaticInstPtr
+    decodeSmeOpBf16(ExtMachInst machInst)
+    {
+        // The following code is functionally correct for decode, but
+        // remains commented out as the current gem5 fplib implementation
+        // doesn't support BF16, and hence the instructions themselves
+        // remain unimplemented. Once these have been implemented, this code
+        // can be safely uncommented to enable decode for the two BF16 Outer
+        // Product instructions added by FEAT_SME.
+
+        // const uint32_t S = (uint32_t)bits(machInst, 4, 4);
+
+        // const RegIndex Zm = (RegIndex)(uint32_t)(
+        //     bits(machInst, 20, 16));
+        // const RegIndex Zn = (RegIndex)(uint32_t)(
+        //     bits(machInst, 9, 5));
+        // const RegIndex Pn = (RegIndex)(uint32_t)(
+        //     bits(machInst, 12, 10));
+        // const RegIndex Pm = (RegIndex)(uint32_t)(
+        //     bits(machInst, 15, 13));
+        // const RegIndex ZAda = (RegIndex)(uint32_t)(
+        //     bits(machInst, 1, 0));
+
+        // if (S == 0) {
+        //     return new SmeBmopa(machInst);
+        // } else {
+        //     return new SmeBmops(machInst);
+        // }
+
+        return new Unknown64(machInst);
+    }
+
+    StaticInstPtr
+    decodeSmeOpFp16(ExtMachInst machInst)
+    {
+        const uint32_t S = (uint32_t)bits(machInst, 4, 4);
+
+        const RegIndex Zm = (RegIndex)(uint32_t)(bits(machInst, 20, 16));
+        const RegIndex Zn = (RegIndex)(uint32_t)(bits(machInst, 9, 5));
+        const RegIndex Pn = (RegIndex)(uint32_t)(bits(machInst, 12, 10));
+        const RegIndex Pm = (RegIndex)(uint32_t)(bits(machInst, 15, 13));
+        const RegIndex ZAda = (RegIndex)(uint32_t)(bits(machInst, 1, 0));
+
+        if (S == 0) {
+            return new SmeFmopaWidening<uint16_t, uint32_t>(machInst, ZAda, Zn,
+                                                            Pn, Pm, Zm);
+        } else {
+            return new SmeFmopsWidening<uint16_t, uint32_t>(machInst, ZAda, Zn,
+                                                            Pn, Pm, Zm);
+        }
+        return new Unknown64(machInst);
+    }
+
+    StaticInstPtr
+    decodeSmeOpInt8(ExtMachInst machInst)
+    {
+        const uint32_t u0 = (uint32_t)bits(machInst, 24);
+        const uint32_t u1 = (uint32_t)bits(machInst, 21);
+        const uint32_t S = (uint32_t)bits(machInst, 4);
+
+        const RegIndex Zm = (RegIndex)(uint32_t)(bits(machInst, 20, 16));
+        const RegIndex Zn = (RegIndex)(uint32_t)(bits(machInst, 9, 5));
+        const RegIndex Pn = (RegIndex)(uint32_t)(bits(machInst, 12, 10));
+        const RegIndex Pm = (RegIndex)(uint32_t)(bits(machInst, 15, 13));
+        const RegIndex ZAda = (RegIndex)(uint32_t)(bits(machInst, 1, 0));
+
+        if (u0 == 0) {
+            if (u1 == 0) {
+                if (S == 0) {
+                    return new SmeSmopa<int8_t, int8_t, int32_t>(
+                        machInst, ZAda, Zn, Pn, Pm, Zm);
+                } else {
+                    return new SmeSmops<int8_t, int8_t, int32_t>(
+                        machInst, ZAda, Zn, Pn, Pm, Zm);
+                }
+            } else {
+                if (S == 0) {
+                    return new SmeSumopa<int8_t, uint8_t, int32_t>(
+                        machInst, ZAda, Zn, Pn, Pm, Zm);
+                } else {
+                    return new SmeSumops<int8_t, uint8_t, int32_t>(
+                        machInst, ZAda, Zn, Pn, Pm, Zm);
+                }
+            }
+        } else {
+            if (u1 == 0) {
+                if (S == 0) {
+                    return new SmeUsmopa<uint8_t, int8_t, int32_t>(
+                        machInst, ZAda, Zn, Pn, Pm, Zm);
+                } else {
+                    return new SmeUsmops<uint8_t, int8_t, int32_t>(
+                        machInst, ZAda, Zn, Pn, Pm, Zm);
+                }
+            } else {
+                if (S == 0) {
+                    return new SmeUmopa<uint8_t, uint8_t, int32_t>(
+                        machInst, ZAda, Zn, Pn, Pm, Zm);
+                } else {
+                    return new SmeUmops<uint8_t, uint8_t, int32_t>(
+                        machInst, ZAda, Zn, Pn, Pm, Zm);
+                }
+            }
+        }
+
+        return new Unknown64(machInst);
+    }
+
+    StaticInstPtr
+    decodeSmeOp64(ExtMachInst machInst)
+    {
+        const uint8_t op0 = (uint8_t)bits(machInst, 29);
+        const uint8_t op1 = (uint8_t)bits(machInst, 24);
+        const uint8_t op2 = (uint8_t)bits(machInst, 21);
+
+        if (op0 == 0) {
+            if (op1 == 0) {
+                if (op2 == 0) {
+                    return decodeSmeOpFp64(machInst);
+                }
+            }
+        }
+
+        if (op0 == 1) {
+            return decodeSmeOpInt16(machInst);
+        }
+
+        return new Unknown64(machInst);
+    }
+
+    StaticInstPtr
+    decodeSmeOpFp64(ExtMachInst machInst)
+    {
+        const uint32_t S = (uint32_t)bits(machInst, 4, 4);
+
+        const RegIndex Zm = (RegIndex)(uint32_t)(bits(machInst, 20, 16));
+        const RegIndex Zn = (RegIndex)(uint32_t)(bits(machInst, 9, 5));
+        const RegIndex Pn = (RegIndex)(uint32_t)(bits(machInst, 12, 10));
+        const RegIndex Pm = (RegIndex)(uint32_t)(bits(machInst, 15, 13));
+        const RegIndex ZAda = (RegIndex)(uint32_t)(bits(machInst, 2, 0));
+
+        if (S == 0) {
+            return new SmeFmopa<uint64_t, uint64_t>(machInst, ZAda, Zn,
+                                                    Pn, Pm, Zm);
+        } else {
+            return new SmeFmops<uint64_t, uint64_t>(machInst, ZAda, Zn,
+                                                    Pn, Pm, Zm);
+        }
+
+        return new Unknown64(machInst);
+    }
+
+    StaticInstPtr
+    decodeSmeOpInt16(ExtMachInst machInst)
+    {
+        const uint32_t u0 = (uint32_t)bits(machInst, 24);
+        const uint32_t u1 = (uint32_t)bits(machInst, 21);
+        const uint32_t S = (uint32_t)bits(machInst, 4);
+
+        const RegIndex Zm = (RegIndex)(uint32_t)(bits(machInst, 20, 16));
+        const RegIndex Zn = (RegIndex)(uint32_t)(bits(machInst, 9, 5));
+        const RegIndex Pn = (RegIndex)(uint32_t)(bits(machInst, 12, 10));
+        const RegIndex Pm = (RegIndex)(uint32_t)(bits(machInst, 15, 13));
+        const RegIndex ZAda = (RegIndex)(uint32_t)(bits(machInst, 2, 0));
+
+        if (u0 == 0) {
+            if (u1 == 0) {
+                if (S == 0) {
+                    return new SmeSmopa<int16_t, int16_t, int64_t>(
+                        machInst, ZAda, Zn, Pn, Pm, Zm);
+                } else {
+                    return new SmeSmops<int16_t, int16_t, int64_t>(
+                        machInst, ZAda, Zn, Pn, Pm, Zm);
+                }
+            } else {
+                if (S == 0) {
+                    return new SmeSumopa<int16_t, uint16_t, int64_t>(
+                        machInst, ZAda, Zn, Pn, Pm, Zm);
+                } else {
+                    return new SmeSumops<int16_t, uint16_t, int64_t>(
+                        machInst, ZAda, Zn, Pn, Pm, Zm);
+                }
+            }
+        } else {
+            if (u1 == 0) {
+                if (S == 0) {
+                    return new SmeUsmopa<uint16_t, int16_t, int64_t>(
+                        machInst, ZAda, Zn, Pn, Pm, Zm);
+                } else {
+                    return new SmeUsmops<uint16_t, int16_t, int64_t>(
+                        machInst, ZAda, Zn, Pn, Pm, Zm);
+                }
+            } else {
+                if (S == 0) {
+                    return new SmeUmopa<uint16_t, uint16_t, int64_t>(
+                        machInst, ZAda, Zn, Pn, Pm, Zm);
+                } else {
+                    return new SmeUmops<uint16_t, uint16_t, int64_t>(
+                        machInst, ZAda, Zn, Pn, Pm, Zm);
+                }
+            }
+        }
+        return new Unknown64(machInst);
+    }
+
+    StaticInstPtr
+    decodeSmeMovaInsert(ExtMachInst machInst)
+    {
+        const uint8_t op0 = (uint8_t)bits(machInst, 18);
+
+        if (op0 == 1) {
+            return new Unknown64(machInst);
+        }
+
+        const uint32_t size = (uint32_t)bits(machInst, 23, 22);
+        const uint32_t Q = (uint32_t)bits(machInst, 16, 16);
+
+        const RegIndex Zn = (RegIndex)(uint32_t)(bits(machInst, 9, 5));
+        const RegIndex Ws = (RegIndex)(uint32_t)(
+            bits(machInst, 14, 13) + 12);
+        const RegIndex Pg = (RegIndex)(uint32_t)(bits(machInst, 12, 10));
+        const RegIndex ZAd_imm = (RegIndex)(uint32_t)(
+            bits(machInst, 3, 0));
+        const bool V = (bool)bits(machInst, 15);
+
+        if (Q == 0) {
+            switch (size) {
+              case 0b00:
+                return new SmeMovaInsert<uint8_t>(machInst, ZAd_imm,
+                                                  Zn, Pg, Ws, V);
+              case 0b01:
+                return new SmeMovaInsert<uint16_t>(machInst, ZAd_imm,
+                                                   Zn, Pg, Ws, V);
+              case 0b10:
+                return new SmeMovaInsert<uint32_t>(machInst, ZAd_imm,
+                                                   Zn, Pg, Ws, V);
+              case 0b11:
+                return new SmeMovaInsert<uint64_t>(machInst, ZAd_imm,
+                                                   Zn, Pg, Ws, V);
+              default:
+                break;
+            }
+        }
+
+        if ((Q == 1) && (size == 0b11)) {
+            return new SmeMovaInsert<__uint128_t>(machInst, ZAd_imm,
+                                                  Zn, Pg, Ws, V);
+        }
+
+        return new Unknown64(machInst);
+    }
+
+    StaticInstPtr
+    decodeSmeMovaExtract(ExtMachInst machInst)
+    {
+        const uint8_t op0 = (uint8_t)bits(machInst, 18);
+        const uint8_t op1 = (uint8_t)bits(machInst, 9);
+
+        if ((op0 == 1) || (op1 == 1)) {
+            return new Unknown64(machInst);
+        }
+
+        const uint32_t size = (uint32_t)bits(machInst, 23, 22);
+        const uint32_t Q = (uint32_t)bits(machInst, 16, 16);
+
+        const RegIndex Zd = (RegIndex)(uint32_t)(bits(machInst, 4, 0));
+        const RegIndex Ws = (RegIndex)(uint32_t)(
+            bits(machInst, 14, 13) + 12);
+        const RegIndex Pg = (RegIndex)(uint32_t)(bits(machInst, 12, 10));
+        const RegIndex ZAn_imm = (RegIndex)(uint32_t)(
+            bits(machInst, 8, 5));
+        const bool V = (bool)bits(machInst, 15);
+
+        if (Q == 0) {
+            switch (size) {
+              case 0b00:
+                return new SmeMovaExtract<uint8_t>(machInst, Zd,
+                                                   ZAn_imm, Pg, Ws, V);
+              case 0b01:
+                return new SmeMovaExtract<uint16_t>(machInst, Zd,
+                                                    ZAn_imm, Pg, Ws, V);
+              case 0b10:
+                return new SmeMovaExtract<uint32_t>(machInst, Zd,
+                                                    ZAn_imm, Pg, Ws, V);
+              case 0b11:
+                return new SmeMovaExtract<uint64_t>(machInst, Zd,
+                                                    ZAn_imm, Pg, Ws, V);
+              default:
+                break;
+            }
+        }
+
+        if ((Q == 1) && (size == 0b11)) {
+           return new SmeMovaExtract<__uint128_t>(machInst, Zd,
+                                                  ZAn_imm, Pg, Ws, V);
+        }
+
+        return new Unknown64(machInst);
+    }
+
+    StaticInstPtr
+    decodeSmeMisc(ExtMachInst machInst)
+    {
+        const uint32_t op0 = (uint32_t)bits(machInst, 23, 22);
+        const uint32_t op1 = (uint32_t)bits(machInst, 18, 8);
+
+        if (op0 == 0b00) {
+            if (op1 == 0b00000000000) {
+                return decodeSmeZero(machInst);
+            }
+        }
+
+        return new Unknown64(machInst);
+    }
+
+    StaticInstPtr
+    decodeSmeZero(ExtMachInst machInst)
+    {
+        const uint8_t imm8 = (uint8_t)bits(machInst, 7, 0);
+
+        return new SmeZero<uint64_t>(machInst, imm8);
+    }
+
+    StaticInstPtr
+    decodeSmeAddArray(ExtMachInst machInst)
+    {
+        const uint32_t op0 = (uint32_t)bits(machInst, 23);
+        const uint32_t op1 = (uint32_t)bits(machInst, 18, 17);
+        const uint32_t op2 = (uint32_t)bits(machInst, 4);
+
+        if (op0 == 1) {
+            if (op1 == 0b00) {
+                if (op2 == 0) {
+                    return decodeSmeAddhv(machInst);
+                }
+            }
+        }
+
+        return new Unknown64(machInst);
+    }
+
+    StaticInstPtr
+    decodeSmeAddhv(ExtMachInst machInst)
+    {
+        const uint32_t V = (uint32_t)bits(machInst, 16, 16);
+        const uint32_t op = (uint32_t)bits(machInst, 22, 22);
+        const uint32_t op2 = (uint32_t)bits(machInst, 2, 0);
+
+        const RegIndex Zn = (RegIndex)(uint32_t)(bits(machInst, 9, 5));
+        const RegIndex Pn = (RegIndex)(uint32_t)(bits(machInst, 12, 10));
+        const RegIndex Pm = (RegIndex)(uint32_t)(bits(machInst, 15, 13));
+        const RegIndex ZAda = (RegIndex)(uint32_t)(bits(machInst, 2, 0));
+
+        if (op == 0) { // 32-bit
+            if (V == 0) {
+                if ((op2 & 0b100) == 0b000) {
+                    return new SmeAddha<int32_t>(machInst, ZAda, Zn, Pn, Pm);
+                }
+            } else {
+                if ((op2 & 0b100) == 0b000) {
+                    return new SmeAddva<int32_t>(machInst, ZAda, Zn, Pn, Pm);
+                }
+            }
+        } else {
+            if (V == 0) {
+                return new SmeAddha<int64_t>(machInst, ZAda, Zn, Pn, Pm);
+            } else {
+                return new SmeAddva<int64_t>(machInst, ZAda, Zn, Pn, Pm);
+            }
+        }
+
+        return new Unknown64(machInst);
+    }
+
+    StaticInstPtr
+    decodeSmeMemory(ExtMachInst machInst)
+    {
+        const uint8_t op0 = (uint8_t)bits(machInst, 24, 21);
+        const uint8_t op1 = (uint8_t)bits(machInst, 20, 15);
+        const uint8_t op2 = (uint8_t)bits(machInst, 12, 10);
+        const uint8_t op3 = (uint8_t)bits(machInst, 4);
+
+        if ((op0 & 0b1001) == 0b0000) {
+            if (op3 == 0b0) {
+                return decodeSmeLoad(machInst);
+            }
+        }
+
+        if ((op0 & 0b1001) == 0b0001) {
+            if (op3 == 0b0) {
+                return decodeSmeStore(machInst);
+            }
+        }
+
+        if ((op0 & 0b1110) == 0b1000) {
+            if (op1 == 0b000000) {
+                if (op2 == 0b000) {
+                    if (op3 == 0b0) {
+                        return decodeSmeLoadStoreArray(machInst);
+                    }
+                }
+            }
+        }
+
+        if (op0 == 0b1110) {
+            if (op3 == 0b0) {
+                return decodeSmeLoadQuadWord(machInst);
+            }
+        }
+
+        if (op0 == 0b1111) {
+            if (op3 == 0b0) {
+                return decodeSmeStoreQuadWord(machInst);
+            }
+        }
+
+        return new Unknown64(machInst);
+    }
+
+    StaticInstPtr
+    decodeSmeLoad(ExtMachInst machInst)
+    {
+        const uint8_t msz = (uint8_t)bits(machInst, 23, 22);
+        const bool V = (bool)bits(machInst, 15);
+
+        const RegIndex Rn = makeSP(
+                                (RegIndex)(uint32_t)bits(machInst, 9, 5));
+        const RegIndex Rm = (RegIndex)(uint32_t)(bits(machInst, 20, 16));
+        const RegIndex Rs = (RegIndex)(uint32_t)(
+            bits(machInst, 14, 13) + 12);
+        const uint32_t ZAt_imm = (uint32_t)bits(machInst, 3, 0);
+        const RegIndex Pg = (RegIndex)(uint32_t)(bits(machInst, 12, 10));
+
+        switch(msz)
+        {
+          case 0b00:
+            return new SmeLd1b<uint8_t>(machInst, ZAt_imm, Rn, Pg, Rs, Rm, V);
+          case 0b01:
+            return new SmeLd1h<uint16_t>(machInst, ZAt_imm, Rn, Pg, Rs, Rm, V);
+          case 0b10:
+            return new SmeLd1w<uint32_t>(machInst, ZAt_imm, Rn, Pg, Rs, Rm, V);
+          case 0b11:
+            return new SmeLd1d<uint64_t>(machInst, ZAt_imm, Rn, Pg, Rs, Rm, V);
+          default:
+            break;
+        }
+
+        return new Unknown64(machInst);
+    }
+
+    StaticInstPtr
+    decodeSmeStore(ExtMachInst machInst)
+    {
+        const uint8_t msz = (uint8_t)bits(machInst, 23, 22);
+        const bool V = (bool)bits(machInst, 15);
+
+        const RegIndex Rn = makeSP(
+                                (RegIndex)(uint32_t)bits(machInst, 9, 5));
+        const RegIndex Rm = (RegIndex)(uint32_t)(bits(machInst, 20, 16));
+        const RegIndex Rs = (RegIndex)(uint32_t)(
+            bits(machInst, 14, 13) + 12);
+        const uint32_t ZAt_imm = (uint32_t)bits(machInst, 3, 0);
+        const RegIndex Pg = (RegIndex)(uint32_t)(bits(machInst, 12, 10));
+
+        switch(msz)
+        {
+          case 0b00:
+            return new SmeSt1b<uint8_t>(machInst, ZAt_imm, Rn, Pg, Rs, Rm, V);
+          case 0b01:
+            return new SmeSt1h<uint16_t>(machInst, ZAt_imm, Rn, Pg, Rs, Rm, V);
+          case 0b10:
+            return new SmeSt1w<uint32_t>(machInst, ZAt_imm, Rn, Pg, Rs, Rm, V);
+          case 0b11:
+            return new SmeSt1d<uint64_t>(machInst, ZAt_imm, Rn, Pg, Rs, Rm, V);
+          default:
+            break;
+        }
+
+        return new Unknown64(machInst);
+    }
+
+    StaticInstPtr
+    decodeSmeLoadStoreArray(ExtMachInst machInst)
+    {
+        const uint8_t op = (uint8_t)bits(machInst, 21);
+
+        const RegIndex Rn = makeSP(
+                                (RegIndex)(uint32_t)bits(machInst, 9, 5));
+        const RegIndex Rv = (RegIndex)(uint32_t)(
+            bits(machInst, 14, 13) + 12);
+        const uint32_t imm4 = (uint32_t)bits(machInst, 3, 0);
+
+        if (op == 0) {
+            return new SmeLdr(machInst, imm4, Rn, Rv);
+        } else {
+            return new SmeStr(machInst, imm4, Rn, Rv);
+        }
+
+        return new Unknown64(machInst);
+    }
+
+    StaticInstPtr
+    decodeSmeLoadQuadWord(ExtMachInst machInst)
+    {
+        const bool V = (bool)bits(machInst, 15);
+
+        const RegIndex Rn = makeSP(
+                                (RegIndex)(uint32_t)bits(machInst, 9, 5));
+        const RegIndex Rm = (RegIndex)(uint32_t)(bits(machInst, 20, 16));
+        const RegIndex Rs = (RegIndex)(uint32_t)(
+            bits(machInst, 14, 13) + 12);
+        const uint32_t ZAt = (uint32_t)bits(machInst, 3, 0);
+        const RegIndex Pg = (RegIndex)(uint32_t)(bits(machInst, 12, 10));
+
+        return new SmeLd1q<__uint128_t>(machInst, ZAt, Rn, Pg, Rs, Rm, V);
+    }
+
+    StaticInstPtr
+    decodeSmeStoreQuadWord(ExtMachInst machInst)
+    {
+        const bool V = (bool)bits(machInst, 15);
+
+        const RegIndex Rn = makeSP(
+                                (RegIndex)(uint32_t)bits(machInst, 9, 5));
+        const RegIndex Rm = (RegIndex)(uint32_t)(bits(machInst, 20, 16));
+        const RegIndex Rs = (RegIndex)(uint32_t)(
+            bits(machInst, 14, 13) + 12);
+        const uint32_t ZAt = (uint32_t)bits(machInst, 3, 0);
+        const RegIndex Pg = (RegIndex)(uint32_t)(bits(machInst, 12, 10));
+
+        return new SmeSt1q<__uint128_t>(machInst, ZAt, Rn, Pg, Rs, Rm, V);
+    }
+}
+}};
diff --git a/src/arch/arm/isa/formats/sve_2nd_level.isa b/src/arch/arm/isa/formats/sve_2nd_level.isa
index cbd5466b82..2ee3817445 100644
--- a/src/arch/arm/isa/formats/sve_2nd_level.isa
+++ b/src/arch/arm/isa/formats/sve_2nd_level.isa
@@ -605,22 +605,43 @@ namespace Aarch64
     {
         uint8_t b23_22 = bits(machInst, 23, 22);
         uint8_t b11 = bits(machInst, 11);
-        if ((b23_22 & 0x2) == 0x0 && b11 == 0x0) {
-            RegIndex rd = makeSP(
-                (RegIndex) (uint8_t) bits(machInst, 4, 0));
-            RegIndex rn = makeSP(
-                (RegIndex) (uint8_t) bits(machInst, 20, 16));
-            uint64_t imm = sext<6>(bits(machInst, 10, 5));
-            if ((b23_22 & 0x1) == 0x0) {
-                return new AddvlXImm(machInst, rd, rn, imm);
-            } else {
-                return new AddplXImm(machInst, rd, rn, imm);
+        if (b11 == 0x0) {
+            if ((b23_22 & 0x2) == 0x0) {
+                RegIndex rd = makeSP(
+                    (RegIndex) (uint8_t) bits(machInst, 4, 0));
+                RegIndex rn = makeSP(
+                    (RegIndex) (uint8_t) bits(machInst, 20, 16));
+                uint64_t imm = sext<6>(bits(machInst, 10, 5));
+                if ((b23_22 & 0x1) == 0x0) {
+                    return new AddvlXImm(machInst, rd, rn, imm);
+                } else {
+                    return new AddplXImm(machInst, rd, rn, imm);
+                }
+            } else if (b23_22 == 0x2) {
+                RegIndex rd = (RegIndex) (uint8_t) bits(machInst, 4, 0);
+                uint64_t imm = sext<6>(bits(machInst, 10, 5));
+                if (bits(machInst, 20, 16) == 0x1f) {
+                    return new SveRdvl(machInst, rd, imm);
+                }
             }
-        } else if (b23_22 == 0x2 && b11 == 0x0) {
-            RegIndex rd = (RegIndex) (uint8_t) bits(machInst, 4, 0);
-            uint64_t imm = sext<6>(bits(machInst, 10, 5));
-            if (bits(machInst, 20, 16) == 0x1f) {
-                return new SveRdvl(machInst, rd, imm);
+        } else { // b11 == 1
+            if ((b23_22 & 0x2) == 0x0) {
+                RegIndex rd = makeSP(
+                    (RegIndex) (uint8_t) bits(machInst, 4, 0));
+                RegIndex rn = makeSP(
+                    (RegIndex) (uint8_t) bits(machInst, 20, 16));
+                uint64_t imm = sext<6>(bits(machInst, 10, 5));
+                if ((b23_22 & 0x1) == 0x0) {
+                    return new SmeAddsvl(machInst, rd, rn, imm);
+                } else {
+                    return new SmeAddspl(machInst, rd, rn, imm);
+                }
+            } else if (b23_22 == 0x2) {
+                RegIndex rd = (RegIndex) (uint8_t) bits(machInst, 4, 0);
+                uint64_t imm = sext<6>(bits(machInst, 10, 5));
+                if (bits(machInst, 20, 16) == 0x1f) {
+                    return new SmeRdsvl(machInst, rd, imm);
+                }
             }
         }
         return new Unknown64(machInst);
@@ -1201,6 +1222,18 @@ namespace Aarch64
                         zdn, zm, pg);
             }
             break;
+          case 0xE:
+            if(!b13) {
+                unsigned size = (unsigned) bits(machInst, 23, 22);
+                RegIndex pg = (RegIndex)(uint8_t) bits(machInst, 12, 10);
+                RegIndex zn = (RegIndex)(uint8_t) bits(machInst, 9, 5);
+                RegIndex zd = (RegIndex)(uint8_t) bits(machInst, 4, 0);
+
+                if (size == 0b00) {
+                    return new SveRevd<__uint128_t>(machInst, zd, zn, pg);
+                }
+            }
+            break;
         }
         switch (bits(machInst, 20, 17)) {
           case 0x0:
@@ -1951,6 +1984,36 @@ namespace Aarch64
         return new Unknown64(machInst);
     }  // decodeSveIntCmpSca
 
+    StaticInstPtr
+    decodeSvePsel(ExtMachInst machInst)
+    {
+        RegIndex Pd = (RegIndex)(uint8_t)bits(machInst, 3, 0);
+        RegIndex Pn = (RegIndex)(uint8_t)bits(machInst, 8, 5);
+        RegIndex Pg = (RegIndex)(uint8_t)bits(machInst, 13, 10);
+        RegIndex Rm = (RegIndex)(0b01100 +
+                         (uint8_t)bits(machInst, 17, 16));
+        uint8_t imm = (uint8_t)bits(machInst, 20, 18);
+        imm += (uint8_t)bits(machInst, 23, 22) << 3;
+
+        const uint8_t size = imm & 0xF;
+
+        if (size == 0) {
+            return new Unknown64(machInst);
+        }
+
+        if (size & 0b0001) {
+            return new SvePsel<uint8_t>(machInst, Pd, Pn, Pg, Rm, imm >> 1);
+        } else if (size & 0b0010) {
+            return new SvePsel<uint16_t>(machInst, Pd, Pn, Pg, Rm, imm >> 2);
+        } else if (size & 0b0100) {
+            return new SvePsel<uint32_t>(machInst, Pd, Pn, Pg, Rm, imm >> 3);
+        } else if (size & 0b1000) {
+            return new SvePsel<uint64_t>(machInst, Pd, Pn, Pg, Rm, imm >> 4);
+        }
+
+        return new Unknown64(machInst);
+    } // decodeSvePsel
+
     StaticInstPtr
     decodeSveIntWideImmUnpred0(ExtMachInst machInst)
     {
@@ -2106,6 +2169,48 @@ namespace Aarch64
         return new Unknown64(machInst);
     }  // decodeSveIntWideImmUnpred
 
+    StaticInstPtr
+    decodeSveClamp(ExtMachInst machInst)
+    {
+        RegIndex zda = (RegIndex)(uint8_t)bits(machInst, 4, 0);
+        RegIndex zn = (RegIndex)(uint8_t)bits(machInst, 9, 5);
+        RegIndex zm = (RegIndex)(uint8_t)bits(machInst, 20, 16);
+
+        switch(bits(machInst, 10)) {
+            case 0:
+                switch(bits(machInst, 23, 22)) {
+                    case 0x0:
+                        return new SveSclamp<int8_t>(machInst, zm, zn, zda);
+                    case 0x1:
+                        return new SveSclamp<int16_t>(machInst, zm, zn, zda);
+                    case 0x2:
+                        return new SveSclamp<int32_t>(machInst, zm, zn, zda);
+                    case 0x3:
+                        return new SveSclamp<int64_t>(machInst, zm, zn, zda);
+                    default:
+                        break;
+                }
+                break;
+            case 1:
+                switch(bits(machInst, 23, 22)) {
+                    case 0x0:
+                        return new SveUclamp<uint8_t>(machInst, zm, zn, zda);
+                    case 0x1:
+                        return new SveUclamp<uint16_t>(machInst, zm, zn, zda);
+                    case 0x2:
+                        return new SveUclamp<uint32_t>(machInst, zm, zn, zda);
+                    case 0x3:
+                        return new SveUclamp<uint64_t>(machInst, zm, zn, zda);
+                    default:
+                        break;
+                }
+            default:
+                break;
+        }
+
+        return new Unknown64(machInst);
+    }
+
     StaticInstPtr
     decodeSveMultiplyAddUnpred(ExtMachInst machInst)
     {
diff --git a/src/arch/arm/isa/formats/sve_top_level.isa b/src/arch/arm/isa/formats/sve_top_level.isa
index 803029a2a4..155ec1c42f 100644
--- a/src/arch/arm/isa/formats/sve_top_level.isa
+++ b/src/arch/arm/isa/formats/sve_top_level.isa
@@ -66,7 +66,9 @@ namespace Aarch64
     StaticInstPtr decodeSvePredGen(ExtMachInst machInst);
     StaticInstPtr decodeSvePredCount(ExtMachInst machInst);
     StaticInstPtr decodeSveIntCmpSca(ExtMachInst machInst);
+    StaticInstPtr decodeSvePsel(ExtMachInst machInst);
     StaticInstPtr decodeSveIntWideImmUnpred(ExtMachInst machInst);
+    StaticInstPtr decodeSveClamp(ExtMachInst machInst);
 
     StaticInstPtr decodeSveMultiplyAddUnpred(ExtMachInst machInst);
     StaticInstPtr decodeSveMultiplyIndexed(ExtMachInst machInst);
@@ -107,6 +109,9 @@ namespace Aarch64
           case 0x0:
             {
                 if (bits(machInst, 14)) {
+                    if (bits(machInst, 15, 11) == 0b11000) {
+                        return decodeSveClamp(machInst);
+                    }
                     return decodeSveIntMulAdd(machInst);
                 } else {
                     uint8_t b_15_13 = (bits(machInst, 15) << 1) |
@@ -210,10 +215,14 @@ namespace Aarch64
           case 0x7:
             {
                 uint8_t b_15_14 = bits(machInst, 15, 14);
+                uint8_t b_4 = bits(machInst, 4, 4);
                 switch (b_15_14) {
                   case 0x0:
                     return decodeSveIntCmpSca(machInst);
                   case 0x1:
+                    if (b_4 == 0) {
+                        return decodeSvePsel(machInst);
+                    }
                     return new Unknown64(machInst);
                   case 0x2:
                     return decodeSvePredCount(machInst);
diff --git a/src/arch/arm/isa/includes.isa b/src/arch/arm/isa/includes.isa
index 386af4e05d..e2534a6728 100644
--- a/src/arch/arm/isa/includes.isa
+++ b/src/arch/arm/isa/includes.isa
@@ -61,6 +61,7 @@ output header {{
 #include "arch/arm/insts/neon64_mem.hh"
 #include "arch/arm/insts/pred_inst.hh"
 #include "arch/arm/insts/pseudo.hh"
+#include "arch/arm/insts/sme.hh"
 #include "arch/arm/insts/static_inst.hh"
 #include "arch/arm/insts/sve.hh"
 #include "arch/arm/insts/sve_mem.hh"
diff --git a/src/arch/arm/isa/insts/insts.isa b/src/arch/arm/isa/insts/insts.isa
index 0697ca49d2..cdc162f2b5 100644
--- a/src/arch/arm/isa/insts/insts.isa
+++ b/src/arch/arm/isa/insts/insts.isa
@@ -105,6 +105,9 @@ split decoder;
 ##include "sve.isa"
 ##include "sve_mem.isa"
 
+//SME
+##include "sme.isa"
+
 //m5 Pseudo-ops
 ##include "m5ops.isa"
 
diff --git a/src/arch/arm/isa/insts/sme.isa b/src/arch/arm/isa/insts/sme.isa
new file mode 100644
index 0000000000..b9f6115432
--- /dev/null
+++ b/src/arch/arm/isa/insts/sme.isa
@@ -0,0 +1,821 @@
+// Copyright (c) 2022 ARM Limited
+// All rights reserved
+//
+// The license below extends only to copyright in the software and shall
+// not be construed as granting a license to any other intellectual
+// property including but not limited to intellectual property relating
+// to a hardware implementation of the functionality of the software
+// licensed hereunder.  You may use the software subject to the license
+// terms below provided that you ensure that this notice is replicated
+// unmodified and in its entirety in all distributions of the software,
+// modified or unmodified, in source code or in binary form.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met: redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer;
+// redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution;
+// neither the name of the copyright holders nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// @file Definition of SME instructions.
+
+let {{
+
+    header_output = ""
+    decoder_output = ""
+    exec_output = ""
+
+    def smeAddInst(name, Name, opClass, types, op):
+        global header_output, decoder_output, exec_output
+        code = smEnCheckCode + smeZaWrite + '''
+            // imm stores the tile index
+            // op1 is the source SVE vector register
+            // gp1 is the row predecate register
+            // gp2 is the column predecate register
+
+            unsigned eCount = ArmStaticInst::getCurSmeVecLen<TPElem>(
+                xc->tcBase());
+
+            uint8_t tile_index = imm & 0x7;
+
+            // View the tile as the correct data type, extract the sub-tile
+            auto tile = getTile<TPElem>(ZA, tile_index);
+            '''
+        code += op
+
+        iop = InstObjParams(name, "Sme" + Name, "SmeAddOp",
+                            {'code': code, 'op_class': opClass},
+                            ['IsNonSpeculative'])
+        header_output += SmeAddDeclare.subst(iop)
+        exec_output += SmeTemplatedExecute.subst(iop)
+
+        for type in types:
+            substDict = {'targs' : type,
+                         'class_name' : 'Sme' + Name}
+            exec_output += SmeOpExecDeclare.subst(substDict)
+
+    def smeAddVlInst(name, Name, opClass, op):
+        global header_output, decoder_output, exec_output
+        code = smEnCheckCodeNoPstate + '''
+            // dest is the 64-bit destination register
+            // op1 is the 64-bit source register
+            // imm is a signed multiplier
+            '''
+        code += op
+
+        iop = InstObjParams(name, "Sme" + Name, "SmeAddVlOp",
+                            {'code': code, 'op_class': opClass},
+                            ['IsNonSpeculative'])
+        header_output += SmeAddVlDeclare.subst(iop)
+        exec_output += SmeExecute.subst(iop)
+
+    def smeLd1xInst(name, Name, opClass, types):
+        global header_output, decoder_output, exec_output
+        code = smEnCheckCode + smeZaWrite + '''
+            // imm stores the tile number as well as the vector offset. The
+            // size of the fields changes based on the data type being used.
+            // XOp1 stores Rn
+            // GpOp stores the governing predicate register
+            // WOp2 stores Rs - the vector index register
+            // XOp3 stores Rm - the offset register (applied to Rn)
+
+
+            unsigned eCount = ArmStaticInst::getCurSmeVecLen<TPElem>(
+                            xc->tcBase());
+
+            uint8_t offset = imm & (0xf >> (findMsbSet(sizeof(TPElem))));
+            M5_VAR_USED uint8_t tile_idx =
+                imm >> (4 - findMsbSet(sizeof(TPElem)));
+            M5_VAR_USED uint8_t vec_idx = (WOp2 + offset) % eCount;
+
+            // Calculate the address
+            M5_VAR_USED Addr EA = XOp1 + XOp3 * sizeof(TPElem);
+
+            // Calculate the read predicate. One boolean per byte,
+            // initialised to all true.
+            auto rdEn = std::vector<bool>(eCount * sizeof(TPElem), true);
+            for (int i = 0; i < eCount; ++i) {
+                if (GpOp_x[i]) {
+                    continue;
+                }
+
+                // Mark each byte of the corresponding elem as false
+                for (int j = 0; j < sizeof(TPElem); ++j) {
+                    rdEn[i * sizeof(TPElem) + j] = false;
+                }
+            }
+            '''
+
+        zaWriteCode = '''
+            // Here we write the data we just got from memory to the tile:
+            if (V) {
+                auto col = getTileVSlice<TPElem>(ZA, tile_idx, vec_idx);
+                for(int i = 0; i < eCount; ++i) {
+                    col[i] = GpOp_x[i] ? data[i] : 0;
+                }
+            } else {
+                auto row = getTileHSlice<TPElem>(ZA, tile_idx, vec_idx);
+                for(int i = 0; i < eCount; ++i) {
+                    row[i] = GpOp_x[i] ? data[i] : 0;
+                }
+            }
+        '''
+
+        iop = InstObjParams(name, "Sme" + Name, "SmeLd1xSt1xOp",
+                            {'code': code, 'za_write': zaWriteCode,
+                             'op_class': opClass}, ['IsLoad',
+                             'IsNonSpeculative'])
+        header_output += SmeLd1xDeclare.subst(iop)
+        exec_output += SmeLd1xExecute.subst(iop)
+        exec_output += SmeLd1xInitiateAcc.subst(iop)
+        exec_output += SmeLd1xCompleteAcc.subst(iop)
+        for type in types:
+            substDict = {'targs' : type,
+                         'class_name' : 'Sme' + Name}
+            exec_output += SmeLd1xExecDeclare.subst(substDict)
+
+    def smeLdrInst(name, Name, opClass):
+        global header_output, decoder_output, exec_output
+        code = smEnCheckCodeNoSM + smeZaWrite + '''
+            // imm stores the vector offset. We do not have a tile number as
+            // we target the whole accumulator array.
+            // imm also stores the offset applied to the base memory access
+            // register.
+            // Op1 stores Rn, which is the base memory access register
+            // Op2 stores Rv, which is the vector select register
+
+
+            unsigned eCount = ArmStaticInst::getCurSmeVecLen<uint8_t>(
+                            xc->tcBase());
+
+            M5_VAR_USED uint8_t vec_index = (WOp2 + imm) % eCount;
+
+            // Calculate the address
+            M5_VAR_USED Addr EA = XOp1 + imm;
+            '''
+
+        iop = InstObjParams(name, "Sme" + Name, "SmeLdrStrOp",
+                            {'code': code, 'op_class': opClass},
+                            ['IsLoad', 'IsNonSpeculative'])
+        header_output += SmeLdrDeclare.subst(iop)
+        exec_output += SmeLdrExecute.subst(iop)
+        exec_output += SmeLdrInitiateAcc.subst(iop)
+        exec_output += SmeLdrCompleteAcc.subst(iop)
+
+    def smeMovaExtractInst(name, Name, opClass, types):
+        global header_output, decoder_output, exec_output
+        code = smEnCheckCode + '''
+            // imm stores the tile index
+            // op1 is the source SVE vector register
+            // gp is the governing predecate register
+            // op2 is the slice index register
+            // v is the row/col select immediate - true for column accesses
+
+            unsigned eCount = ArmStaticInst::getCurSmeVecLen<TPElem>(
+                xc->tcBase());
+
+            uint8_t offset = imm & (0xf >> (findMsbSet(sizeof(TPElem))));
+            uint8_t tile_idx = imm >> (4 - findMsbSet(sizeof(TPElem)));
+
+            uint32_t vec_idx = (WOp2 + offset) % eCount;
+
+            if (!v) { // Horizontal (row) access
+                auto row = getTileHSlice<TPElem>(ZA, tile_idx, vec_idx);
+                for (int i = 0; i < eCount; ++i) {
+                    if (!GpOp_x[i]) {
+                        continue;
+                    }
+
+                    AA64FpOp1_x[i] = row[i];
+                }
+            } else { // Vertical (column) access
+                auto col = getTileVSlice<TPElem>(ZA, tile_idx, vec_idx);
+                for (int i = 0; i < eCount; ++i) {
+                    if (!GpOp_x[i]) {
+                        continue;
+                    }
+
+                    AA64FpOp1_x[i] = col[i];
+                }
+            }
+            '''
+
+        iop = InstObjParams(name, "Sme" + Name, "SmeMovExtractOp",
+                            {'code': code, 'op_class': opClass},
+                            ['IsNonSpeculative'])
+        header_output += SmeMovaExtractDeclare.subst(iop)
+        exec_output += SmeTemplatedExecute.subst(iop)
+
+        for type in types:
+            substDict = {'targs' : type,
+                         'class_name' : 'Sme' + Name}
+            exec_output += SmeOpExecDeclare.subst(substDict)
+
+    def smeMovaInsertInst(name, Name, opClass, types):
+        global header_output, decoder_output, exec_output
+        code = smEnCheckCode + smeZaWrite + '''
+            // imm stores the tile index
+            // op1 is the source SVE vector register
+            // gp is the governing predecate register
+            // op2 is the slice index register
+            // v is the row/col select immediate - true for column accesses
+
+            unsigned eCount = ArmStaticInst::getCurSmeVecLen<TPElem>(
+                xc->tcBase());
+
+            uint8_t offset = imm & (0xf >> (findMsbSet(sizeof(TPElem))));
+            uint8_t tile_idx = imm >> (4 - findMsbSet(sizeof(TPElem)));
+
+            uint32_t vec_idx = (WOp2 + offset) % eCount;
+
+            if (!v) { // Horizontal (row) access
+                auto row = getTileHSlice<TPElem>(ZA, tile_idx, vec_idx);
+                for (int i = 0; i < eCount; ++i) {
+                    if (!GpOp_x[i]) {
+                        continue;
+                    }
+
+                    row[i] = AA64FpOp1_x[i];
+                }
+            } else { // Vertical (column) access
+                auto col = getTileVSlice<TPElem>(ZA, tile_idx, vec_idx);
+                for (int i = 0; i < eCount; ++i) {
+                    if (!GpOp_x[i]) {
+                        continue;
+                    }
+
+                    col[i] = AA64FpOp1_x[i];
+                }
+            }
+            '''
+
+        iop = InstObjParams(name, "Sme" + Name, "SmeMovInsertOp",
+                            {'code': code, 'op_class': opClass},
+                            ['IsNonSpeculative'])
+        header_output += SmeMovaInsertDeclare.subst(iop)
+        exec_output += SmeTemplatedExecute.subst(iop)
+
+        for type in types:
+            substDict = {'targs' : type,
+                         'class_name' : 'Sme' + Name}
+            exec_output += SmeOpExecDeclare.subst(substDict)
+
+    def smeMsrInst(name, Name, opClass, op):
+        global header_output, decoder_output, exec_output
+        code = '''
+            if (FullSystem) {
+                fault = this->checkSmeAccess(xc->tcBase(), Cpsr, Cpacr64);
+                if (fault != NoFault) {
+                    return fault;
+                }
+            }
+        ''' + op
+
+        iop = InstObjParams(name, "Sme" + Name, "ImmOp64",
+                            {'code': code, 'op_class': opClass},
+                            ['IsNonSpeculative', 'IsSerializeAfter'])
+        header_output += SMEMgmtDeclare.subst(iop)
+        exec_output += SmeExecute.subst(iop)
+
+    def smeFPOPInst(name, Name, opClass, srcTypes, dstTypes, op):
+        global header_output, decoder_output, exec_output
+        code = smEnCheckCode + smeZaWrite + '''
+            // imm stores the tile index
+            // op1 is the first SVE vector register
+            // gp1 is the predecate register corresponding to the first
+            //      SVE vector register
+            // gp2 is the predecate register corresponding to the second
+            //      SVE vector register
+            // op2 is the second SVE vector register
+
+            unsigned eCount = ArmStaticInst::getCurSmeVecLen<TPDElem>(
+                xc->tcBase());
+            '''
+        code += op
+
+        iop = InstObjParams(name, "Sme" + Name, "SmeOPOp",
+                            {'code': code, 'op_class': opClass},
+                            ['IsNonSpeculative'])
+        header_output += SmeFPOPDeclare.subst(iop)
+        exec_output += SmeDualTemplatedExecute.subst(iop)
+        for src, dst in zip(srcTypes, dstTypes):
+            substDict = {'targs' : "{}, {}".format(src, dst),
+                         'class_name' : 'Sme' + Name}
+            exec_output += SmeOpExecDeclare.subst(substDict)
+
+    def smeIntOPInst(name, Name, opClass, src1Types, src2Types, dstTypes, op):
+        global header_output, decoder_output, exec_output
+        code = smEnCheckCode + smeZaWrite + '''
+            // imm stores the tile index
+            // op1 is the first SVE vector register
+            // gp1 is the predecate register corresponding to the first
+            //      SVE vector register
+            // gp2 is the predecate register corresponding to the second
+            //      SVE vector register
+            // op2 is the second SVE vector register
+
+            unsigned eCount = ArmStaticInst::getCurSmeVecLen<TPDElem>(
+                xc->tcBase());
+            '''
+        code += op
+
+        iop = InstObjParams(name, "Sme" + Name, "SmeOPOp",
+                            {'code': code, 'op_class': opClass},
+                            ['IsNonSpeculative'])
+        header_output += SmeIntOPDeclare.subst(iop)
+        exec_output += SmeTripleTemplatedExecute.subst(iop)
+        for src1, src2, dst in zip(src1Types, src2Types, dstTypes):
+            substDict = {'targs' : "{}, {}, {}".format(src1, src2, dst),
+                         'class_name' : 'Sme' + Name}
+            exec_output += SmeOpExecDeclare.subst(substDict)
+
+    def smeRdsvlInst(name, Name, opClass):
+        global header_output, decoder_output, exec_output
+        code = smEnCheckCodeNoPstate + '''
+            // dest is the 64-bit destination register
+            // imm is a signed multiplier
+
+            unsigned eCount = ArmStaticInst::getCurSmeVecLen<uint8_t>(
+                xc->tcBase());
+
+            Dest64 = eCount * imm;
+            '''
+
+        iop = InstObjParams(name, "Sme" + Name, "SmeRdsvlOp",
+                            {'code': code, 'op_class': opClass},
+                            ['IsNonSpeculative'])
+        header_output += SmeRdsvlDeclare.subst(iop)
+        exec_output += SmeExecute.subst(iop)
+
+    def smeSt1xInst(name, Name, opClass, types):
+        global header_output, decoder_output, exec_output
+        code = smEnCheckCode + '''
+            // imm stores the tile number as well as the vector offset. The
+            // size of the fields changes based on the data type being used.
+            // XOp1 stores Rn
+            // GpOp stores the governing predicate register
+            // WOp2 stores Rs - the vector index register
+            // XOp3 stores Rm - the offset register (applied to Rn)
+
+
+            unsigned eCount = ArmStaticInst::getCurSmeVecLen<TPElem>(
+                            xc->tcBase());
+
+            uint8_t offset = imm & (0xf >> (findMsbSet(sizeof(TPElem))));
+            M5_VAR_USED uint8_t tile_idx =
+                imm >> (4 - findMsbSet(sizeof(TPElem)));
+            M5_VAR_USED uint8_t vec_idx = (WOp2 + offset) % eCount;
+
+            // Calculate the address
+            M5_VAR_USED Addr EA = XOp1 + XOp3 * sizeof(TPElem);
+
+            // Calculate the write predicate. One boolean per byte,
+            // initialised to all true.
+            auto wrEn = std::vector<bool>(eCount * sizeof(TPElem), true);
+            for (int i = 0; i < eCount; ++i) {
+                if (GpOp_x[i]) {
+                    continue;
+                }
+
+                // Mark each byte of the corresponding elem as false
+                for (int j = 0; j < sizeof(TPElem); ++j) {
+                    wrEn[i * sizeof(TPElem) + j] = false;
+                }
+            }
+
+            // Extract the data to be stored from the tile. We don't worry
+            // about the predicate here as that's already handled by wrEn.
+            TPElem data[MaxSmeVecLenInBytes / sizeof(TPElem)];
+            if(V) {
+                auto col = getTileVSlice<TPElem>(ZA, tile_idx, vec_idx);
+                for (int i = 0; i < eCount; ++i) {
+                    data[i] = col[i];
+                }
+            } else {
+                auto row = getTileHSlice<TPElem>(ZA, tile_idx, vec_idx);
+                for (int i = 0; i < eCount; ++i) {
+                    data[i] = row[i];
+                }
+            }
+            '''
+
+        iop = InstObjParams(name, "Sme" + Name, "SmeLd1xSt1xOp",
+                            {'code': code, 'op_class': opClass},
+                            ['IsStore', 'IsNonSpeculative'])
+        header_output += SmeSt1xDeclare.subst(iop)
+        exec_output += SmeSt1xExecute.subst(iop)
+        exec_output += SmeSt1xInitiateAcc.subst(iop)
+        exec_output += SmeSt1xCompleteAcc.subst(iop)
+        for type in types:
+            substDict = {'targs' : type,
+                         'class_name' : 'Sme' + Name}
+            exec_output += SmeSt1xExecDeclare.subst(substDict)
+
+    def smeStrInst(name, Name, opClass):
+        global header_output, decoder_output, exec_output
+        code = smEnCheckCodeNoSM + '''
+            // imm stores the vector offset. We do not have a tile number
+            // as we target the whole accumulator array.
+            // imm also stores the offset applied to the base memory access
+            // register.
+            // Op1 stores Rn, which is the base memory access register
+            // Op2 stores Rv, which is the vector select register
+
+
+            unsigned eCount = ArmStaticInst::getCurSmeVecLen<uint8_t>(
+                            xc->tcBase());
+
+            uint8_t vec_index = (WOp2 + imm) % eCount;
+
+            auto row = getTileHSlice<uint8_t>(ZA, 0, vec_index);
+
+            // Calculate the address
+            M5_VAR_USED Addr EA = XOp1 + imm;
+
+            uint8_t data[MaxSmeVecLenInBytes];
+
+            // Update data which will then by used to store the row to memory
+            for (int i = 0; i < eCount; ++i) {
+                data[i] = row[i];
+            }
+            '''
+
+        iop = InstObjParams(name, "Sme" + Name, "SmeLdrStrOp",
+                            {'code': code, 'op_class': opClass},
+                            ['IsStore', 'IsNonSpeculative'])
+        header_output += SmeStrDeclare.subst(iop)
+        exec_output += SmeStrExecute.subst(iop)
+        exec_output += SmeStrInitiateAcc.subst(iop)
+        exec_output += SmeStrCompleteAcc.subst(iop)
+
+    def smeZeroInst(name, Name, opClass, types):
+        global header_output, decoder_output, exec_output
+        code = smEnCheckCodeNoSM + smeZaWrite + '''
+            // When zeroing tiles, we use  64-bit elements. This means
+            // that we have up to eight subtiles to clear in the ZA tile.
+
+            ZA = ZA;
+
+            for (int i = 0; i < 8; ++i) {
+                if (((imm >> i) & 0x1) == 0x1) {
+                    getTile<TPElem>(ZA, i).zero();
+                }
+            }'''
+
+        iop = InstObjParams(name, "Sme" + Name, "SmeZeroOp",
+                            {'code': code, 'op_class': opClass},
+                            ['IsNonSpeculative'])
+        header_output += SmeZeroDeclare.subst(iop)
+        exec_output += SmeTemplatedExecute.subst(iop)
+
+        for type in types:
+            substDict = {'targs' : type,
+                         'class_name' : 'Sme' + Name}
+            exec_output += SmeOpExecDeclare.subst(substDict)
+
+    # ADDHA
+    addCode = '''
+        for (int col = 0; col < eCount; ++col) {
+            TPElem val = AA64FpOp1_x[col];
+
+            for (int row = 0; row < eCount; ++row) {
+                if (!(GpOp1_x[row] && GpOp2_x[col])) {
+                    continue;
+                }
+
+                tile[col][row] += val;
+            }
+        }
+        '''
+    smeAddInst('addha', "Addha", "SimdAddOp", ['int32_t', 'int64_t'], addCode)
+
+    # ADDSPL
+    addSplCode = '''
+        Dest64 = imm * ArmStaticInst::getCurSmeVecLen<uint8_t>(xc->tcBase());
+        // Divide down to get the predicate length in bytes
+        Dest64 /= 8;
+        Dest64 += XOp1;
+    '''
+    smeAddVlInst('addspl', "Addspl", "SimdAddOp", addSplCode)
+
+    # ADDSVL
+    addSvlCode = '''
+        Dest64 = imm * ArmStaticInst::getCurSmeVecLen<uint8_t>(xc->tcBase());
+        Dest64 += XOp1;
+    '''
+    smeAddVlInst('addsvl', "Addsvl", "SimdAddOp", addSvlCode)
+
+    # ADDVA
+    addCode = '''
+        for (int row = 0; row < eCount; ++row) {
+            TPElem val = AA64FpOp1_x[row];
+
+            for (int col = 0; col < eCount; ++col) {
+                if (!(GpOp1_x[row] && GpOp2_x[col])) {
+                    continue;
+                }
+
+                tile[col][row] += val;
+            }
+        }
+        '''
+    smeAddInst('addva', "Addva", "SimdAddOp", ['int32_t', 'int64_t'], addCode)
+
+    # BFMOPA
+    # BFMOPS
+
+    # FMOPA (non-widening)
+    fmopxCode = '''
+        auto tile = getTile<TPDElem>(ZA, imm);
+        FPSCR fpscr = (FPSCR) Fpscr;
+
+        for (int j = 0; j < eCount; ++j) {
+            if (!GpOp1_xd[j]) {
+                continue;
+            }
+
+            TPDElem val1 = AA64FpOp1_xd[j];
+
+            for (int i = 0; i < eCount; ++i) {
+                if (!GpOp2_xd[i]) {
+                    continue;
+                }
+
+                TPDElem val2 = AA64FpOp2_xd[i];
+
+        #if %s
+                val2 = fplibNeg(val2);
+        #endif
+
+                TPDElem res = fplibMul(val1, val2, fpscr);
+
+                tile[j][i] = fplibAdd(tile[j][i],
+                                      res, fpscr);
+            }
+        }
+        '''
+    smeFPOPInst('fmopa', 'Fmopa', 'MatrixOPOp', ['uint32_t', 'uint64_t'],
+              ['uint32_t', 'uint64_t'], fmopxCode % "0")
+
+    # FMOPA (widening)
+    wideningFmopxCode = '''
+        auto tile = getTile<TPDElem>(ZA, imm);
+        FPSCR fpscr = (FPSCR) Fpscr;
+
+        for (int j = 0; j < eCount; ++j) {
+            if (!GpOp1_xd[j]) {
+                continue;
+            }
+            for (int i = 0; i < eCount; ++i) {
+                if (!GpOp2_xd[i]) {
+                    continue;
+                }
+
+                for (int k = 0; k < 2; ++k) {
+                    TPSElem temp1 = (AA64FpOp1_xd[j] >> (16 * k)) & 0xFFFF;
+                    TPSElem temp2 = (AA64FpOp2_xd[j] >> (16 * k)) & 0xFFFF;
+                    TPDElem val1 = fplibConvert<TPSElem, TPDElem>(temp1,
+                        FPCRRounding(fpscr), fpscr);
+                    TPDElem val2 = fplibConvert<TPSElem, TPDElem>(temp2,
+                        FPCRRounding(fpscr), fpscr);
+
+        #if %s
+                    val2 = fplibNeg(val2);
+        #endif
+
+                    TPDElem res = fplibMul(val1, val2, fpscr);
+                    tile[j][i] = fplibAdd(tile[j][i], res, fpscr);
+                }
+            }
+        }
+        '''
+    smeFPOPInst('fmopa', 'FmopaWidening', 'MatrixOPOp',
+              ['uint16_t'], ['uint32_t'], wideningFmopxCode % "0")
+
+    # FMOPS (non-widening)
+    smeFPOPInst('fmops', 'Fmops', 'MatrixOPOp', ['uint32_t', 'uint64_t'],
+              ['uint32_t', 'uint64_t'], fmopxCode % "1")
+
+    # FMOPS (widening)
+    smeFPOPInst('fmops', 'FmopsWidening', 'MatrixOPOp',
+              ['uint16_t'], ['uint32_t'], wideningFmopxCode % "1")
+
+    # LD1B
+    smeLd1xInst('ld1b', 'Ld1b', 'MemReadOp', ['uint8_t'])
+
+    # LD1D
+    smeLd1xInst('ld1d', 'Ld1d', 'MemReadOp', ['uint64_t'])
+
+    # LD1H
+    smeLd1xInst('ld1h', 'Ld1h', 'MemReadOp', ['uint16_t'])
+
+    # LD1Q
+    smeLd1xInst('ld1q', 'Ld1q', 'MemReadOp', ['__uint128_t'])
+
+    # LD1W
+    smeLd1xInst('ld1w', 'Ld1w', 'MemReadOp', ['uint32_t'])
+
+    # LDR
+    smeLdrInst("ldr", "Ldr", 'MemReadOp')
+
+    # MOV (tile to vector) - ALIAS; see MOVA
+    # MOV (vector to tile) - ALIAS; see MOVA
+    # MOVA (tile to vector)
+    smeMovaExtractInst("mova", "MovaExtract", 'MatrixMovOp',
+                      ["uint8_t", "uint16_t", "uint32_t", "uint64_t",
+                      "__uint128_t"])
+
+    # MOVA (vector to tile)
+    smeMovaInsertInst("mova", "MovaInsert", 'MatrixMovOp',
+                      ["uint8_t", "uint16_t", "uint32_t", "uint64_t",
+                      "__uint128_t"])
+
+    # RDSVL
+    smeRdsvlInst('rdsvl', 'Rdsvl', 'SimdAddOp')
+
+    # SMOPA
+    intMopxCode = '''
+        auto tile = getTile<TPDElem>(ZA, imm);
+
+        size_t shift = 8 * sizeof(TPS1Elem);
+        size_t mask = (1 << shift) - 1;
+
+        for (int j = 0; j < eCount; ++j) {
+            for (int i = 0; i < eCount; ++i) {
+                for (int k = 0; k < 4; ++k) {
+                    if (!GpOp1_xs1[4 * j + k]) {
+                        continue;
+                    }
+
+                    if (!GpOp2_xs2[4 * i + k]) {
+                        continue;
+                    }
+
+                    TPS1Elem temp1 =
+                        (TPS1Elem)(AA64FpOp1_xd[j] >> (shift * k)) & mask;
+                    TPS2Elem temp2 =
+                        (TPS2Elem)(AA64FpOp2_xd[i] >> (shift * k)) & mask;
+
+                    tile[j][i] %s= (TPDElem)temp1 * (TPDElem)temp2;
+                }
+            }
+        }
+        '''
+    smeIntOPInst('smopa', 'Smopa', 'MatrixOPOp', ['int8_t', 'int16_t'],
+                 ['int8_t', 'int16_t'], ['int32_t', 'int64_t'],
+                 intMopxCode % "+")
+
+    # SMOPS
+    smeIntOPInst('smops', 'Smops', 'MatrixOPOp', ['int8_t', 'int16_t'],
+                 ['int8_t', 'int16_t'], ['int32_t', 'int64_t'],
+                 intMopxCode % "-")
+
+    # SMSTART
+    smstartSmstopCode = '''
+        // Bit 0 of imm determines if we are setting or clearing
+        // (smstart vs smstop)
+        // Bit 1 means that we are applying this to SM
+        // Bit 2 means that we are applying this to ZA
+        bool new_state    = imm & 0x1;
+        bool sm_affected  = imm & 0x2;
+        bool za_affected  = imm & 0x4;
+        bool old_sm_state = Svcr & 0x1;
+        bool old_za_state = Svcr & 0x2;
+
+        bool sm_changed = sm_affected && old_sm_state != new_state;
+        bool za_changed = za_affected && old_za_state != new_state;
+
+        if (sm_changed) {
+            // We need to zero the SVE Z, P, FFR registers on SM change. Also,
+            // set FPSR to a default value. Note that we use the max SVE len
+            // instead of the actual vector length.
+            //
+            // For the Z, P registers we are directly setting these to zero
+            // without going through the ISA parser (which generates the
+            // dependencies) as otherwise the O3 CPU can deadlock when there
+            // are too few free physical registers. We therefore rely on this
+            // instruction being a barrier (IsSerialiseAfter).
+
+            // Z Registers, including special and interleave registers
+            ArmISA::VecRegContainer zeroed_z_reg;
+            zeroed_z_reg.zero();
+
+            for (int reg_idx = 0; reg_idx < NumVecRegs; ++reg_idx) {
+                auto reg_id = ArmISA::vecRegClass[reg_idx];
+                xc->tcBase()->setReg(reg_id, &zeroed_z_reg);
+            }
+
+            // P Registers, including the FFR
+            ArmISA::VecPredRegContainer zeroed_p_reg;
+            zeroed_p_reg.reset();
+
+            for (int reg_idx = 0; reg_idx < NumVecPredRegs; ++reg_idx) {
+                auto reg_id = ArmISA::vecPredRegClass[reg_idx];
+                xc->tcBase()->setReg(reg_id, &zeroed_p_reg);
+            }
+
+            // FPSR
+            Fpsr = 0x0800009f;
+        }
+
+        if (za_changed) {
+            // ZA write
+            ZA = ZA;
+            ZA.zero();
+        }
+
+        // Now that we've handled the zeroing of the appropriate registers,
+        // we update the pstate accordingly.
+
+        if (sm_changed) {
+            if (new_state == 1) {
+                Svcr = Svcr | 0x1; // Set SM
+            } else {
+                Svcr = Svcr & ~(uint64_t)0x1; // Clear SM
+            }
+        }
+
+        if (za_changed) {
+            if (new_state == 1) {
+                Svcr = Svcr | 0x2; // Set ZA
+            } else {
+                Svcr = Svcr & ~(uint64_t)0x2; // Clear ZA
+            }
+        }
+    '''
+
+    smeMsrInst('smstart', 'Smstart', 'IntAluOp',
+               smstartSmstopCode)
+
+    # SMSTOP
+    smeMsrInst('smstop', 'Smstop', 'IntAluOp',
+               smstartSmstopCode)
+
+    # ST1B
+    smeSt1xInst('st1b', 'St1b', 'MemWriteOp', ['uint8_t'])
+
+    # ST1D
+    smeSt1xInst('st1d', 'St1d', 'MemWriteOp', ['uint64_t'])
+
+    # ST1H
+    smeSt1xInst('st1h', 'St1h', 'MemWriteOp', ['uint16_t'])
+
+    # ST1Q
+    smeSt1xInst('st1q', 'St1q', 'MemWriteOp', ['__uint128_t'])
+
+    # ST1W
+    smeSt1xInst('st1w', 'St1w', 'MemWriteOp', ['uint32_t'])
+
+    # STR
+    smeStrInst("str", "Str", "MemWriteOp")
+
+    # SUMOPA
+    smeIntOPInst('sumopa', 'Sumopa', 'MatrixOPOp', ['int8_t', 'int16_t'],
+                 ['uint8_t', 'uint16_t'], ['int32_t', 'int64_t'],
+                 intMopxCode % "+")
+
+    # SUMOPS
+    smeIntOPInst('sumops', 'Sumops', 'MatrixOPOp', ['int8_t', 'int16_t'],
+                 ['uint8_t', 'uint16_t'], ['int32_t', 'int64_t'],
+                 intMopxCode % "-")
+
+    # UMOPA
+    smeIntOPInst('umopa', 'Umopa', 'MatrixOPOp', ['uint8_t', 'uint16_t'],
+                 ['uint8_t', 'uint16_t'], ['int32_t', 'int64_t'],
+                 intMopxCode % "+")
+
+    # UMOPS
+    smeIntOPInst('umops', 'Umops', 'MatrixOPOp', ['uint8_t', 'uint16_t'],
+                 ['uint8_t', 'uint16_t'], ['int32_t', 'int64_t'],
+                 intMopxCode % "-")
+
+    # USMOPA
+    smeIntOPInst('usmopa', 'Usmopa', 'MatrixOPOp', ['uint8_t', 'uint16_t'],
+                 ['int8_t', 'int16_t'], ['int32_t', 'int64_t'],
+                 intMopxCode % "+")
+
+    # USMOPS
+    smeIntOPInst('usmops', 'Usmops', 'MatrixOPOp', ['uint8_t', 'uint16_t'],
+                 ['int8_t', 'int16_t'], ['int32_t', 'int64_t'],
+                 intMopxCode % "-")
+
+    # ZERO
+    smeZeroInst("zero", "Zero", "MatrixOp", ["uint64_t"])
+
+}};
diff --git a/src/arch/arm/isa/insts/sve.isa b/src/arch/arm/isa/insts/sve.isa
index 7cb733100f..97d4ec7e56 100644
--- a/src/arch/arm/isa/insts/sve.isa
+++ b/src/arch/arm/isa/insts/sve.isa
@@ -1310,6 +1310,34 @@ let {{
             substDict = {'targs' : type, 'class_name' : 'Sve' + Name}
             exec_output += SveOpExecDeclare.subst(substDict);
 
+    # Generates definition for SVE psel predicate selection instructions
+    def svePselInst(name, Name, opClass, types):
+        global header_output, exec_output, decoders
+        code = sveEnabledCheckCode + '''
+        unsigned eCount = ArmStaticInst::getCurSveVecLen<TPElem>(
+                xc->tcBase());
+
+        uint8_t index = ((uint32_t)Op2 + imm) % eCount;
+
+        bool copy = POp1_x[index];
+        if (copy) {
+            for (int i = 0; i < eCount; ++i) {
+                PDest_x[i] = GpOp_x[i];
+            }
+        } else {
+            for (int i = 0; i < eCount; ++i) {
+                PDest_x[i] = false;
+            }
+        }
+        '''
+        iop = ArmInstObjParams(name, 'Sve' + Name, 'SvePselOp',
+                {'code': code, 'op_class': opClass}, [])
+        header_output += SvePselOpDeclare.subst(iop)
+        exec_output += SveOpExecute.subst(iop)
+        for type in types:
+            substDict = {'targs' : type, 'class_name' : 'Sve' + Name}
+            exec_output += SveOpExecDeclare.subst(substDict);
+
     # Generate definition for SVE compare & terminate instructions
     def sveCompTermInst(name, Name, opClass, types, op):
         global header_output, exec_output, decoders
@@ -3096,6 +3124,31 @@ let {{
                     'class_name' : 'Sve' + Name}
             exec_output += SveOpExecDeclare.subst(substDict)
 
+    # Generate definitions for clamp to min/max instructions
+    def sveClampInst(name, Name, opClass, types,
+            decoder = 'Generic'):
+        global header_output, exec_output, decoders
+        code = sveEnabledCheckCode + '''
+        unsigned eCount = ArmStaticInst::getCurSveVecLen<TPElem>(
+                xc->tcBase());
+
+        for (int i = 0 ; i < eCount ; ++i) {
+            if (AA64FpDestMerge_x[i] < AA64FpOp2_x[i]) {
+                AA64FpDest_x[i] = AA64FpOp2_x[i];
+            } else if (AA64FpDestMerge_x[i] > AA64FpOp1_x[i]) {
+                AA64FpDest_x[i] = AA64FpOp1_x[i];
+            }
+        }
+        '''
+        iop = ArmInstObjParams(name, 'Sve' + Name, 'SveClampOp',
+                {'code': code, 'op_class': opClass}, [])
+        header_output += SveClampOpDeclare.subst(iop)
+        exec_output += SveOpExecute.subst(iop)
+        for type in types:
+            substDict = {'targs' : type,
+                         'class_name' : 'Sve' + Name}
+            exec_output += SveOpExecDeclare.subst(substDict)
+
     fpTypes = ('uint16_t', 'uint32_t', 'uint64_t')
     signedTypes = ('int8_t', 'int16_t', 'int32_t', 'int64_t')
     unsignedTypes = ('uint8_t', 'uint16_t', 'uint32_t', 'uint64_t')
@@ -4071,6 +4124,8 @@ let {{
     svePFirstInst('pfirst', 'Pfirst', 'SimdPredAluOp')
     # PNEXT
     svePNextInst('pnext', 'Pnext', 'SimdPredAluOp', unsignedTypes)
+    # PSEL
+    svePselInst('psel', 'Psel', 'SimdPredAluOp', unsignedTypes)
     # PTEST
     svePredTestInst('ptest', 'Ptest', 'SimdPredAluOp')
     # PTRUE
@@ -4140,6 +4195,10 @@ let {{
             ['uint16_t', 'uint32_t', 'uint64_t'],
             revCode % {'revtype' : 'uint8_t'}, predType=PredType.MERGE,
             srcRegType=SrcRegType.Vector, decoder='Generic')
+    # REVD
+    sveUnaryInst('revd', 'Revd', 'SimdAluOp', ['__uint128_t'],
+            revCode % {'revtype' : 'uint64_t'}, predType=PredType.MERGE,
+            srcRegType=SrcRegType.Vector, decoder='Generic')
     # REVH
     sveUnaryInst('revh', 'Revh', 'SimdAluOp', ['uint32_t', 'uint64_t'],
             revCode % {'revtype' : 'uint16_t'}, predType=PredType.MERGE,
@@ -4160,6 +4219,8 @@ let {{
     sveWideningAssocReducInst('saddv', 'Saddv', 'SimdReduceAddOp',
             ['int8_t, int64_t', 'int16_t, int64_t', 'int32_t, int64_t'],
             addvCode, '0')
+    # SCLAMP
+    sveClampInst('sclamp', 'Sclamp', 'SimdAluOp', signedTypes)
     # SCVTF
     scvtfCode = fpOp % ('fplibFixedToFP<DElement>('
                         'sext<sizeof(SElement) * 8>(srcElem1), 0,'
@@ -4545,6 +4606,8 @@ let {{
             ['uint8_t, uint64_t', 'uint16_t, uint64_t', 'uint32_t, uint64_t',
              'uint64_t, uint64_t'],
             addvCode, '0')
+    # UCLAMP
+    sveClampInst('uclamp', 'Uclamp', 'SimdAluOp', unsignedTypes)
     # UCVTF
     ucvtfCode = fpOp % ('fplibFixedToFP<DElement>(srcElem1, 0, true,'
                         ' FPCRRounding(fpscr), fpscr)')
diff --git a/src/arch/arm/isa/operands.isa b/src/arch/arm/isa/operands.isa
index 5919ae974e..24a0af9155 100644
--- a/src/arch/arm/isa/operands.isa
+++ b/src/arch/arm/isa/operands.isa
@@ -57,6 +57,8 @@ def operand_types {{
     # For operations that are implemented as a template
     'x' : 'TPElem',
     'xs' : 'TPSElem',
+    'xs1' : 'TPS1Elem',
+    'xs2' : 'TPS2Elem',
     'xd' : 'TPDElem',
     'pc' : 'ArmISA::VecPredRegContainer',
     'pb' : 'uint8_t'
@@ -451,6 +453,8 @@ def operands {{
 
     # Predicate register operands
     'GpOp': VecPredReg('gp'),
+    'GpOp1': VecPredReg('gp1'),
+    'GpOp2': VecPredReg('gp2'),
     'POp1': VecPredReg('op1'),
     'POp2': VecPredReg('op2'),
     'PDest': VecPredReg('dest'),
@@ -496,6 +500,7 @@ def operands {{
     'LLSCLock': CntrlRegNC('MISCREG_LOCKFLAG'),
     'Dczid' : CntrlRegNC('MISCREG_DCZID_EL0'),
     'PendingDvm': CntrlRegNC('MISCREG_TLBINEEDSYNC'),
+    'Svcr' : CntrlReg('MISCREG_SVCR'),
 
     #Register fields for microops
     'URa' : IntReg('ura'),
diff --git a/src/arch/arm/isa/templates/sme.isa b/src/arch/arm/isa/templates/sme.isa
new file mode 100644
index 0000000000..1bec2a3a71
--- /dev/null
+++ b/src/arch/arm/isa/templates/sme.isa
@@ -0,0 +1,773 @@
+// Copyright (c) 2022 ARM Limited
+// All rights reserved
+//
+// The license below extends only to copyright in the software and shall
+// not be construed as granting a license to any other intellectual
+// property including but not limited to intellectual property relating
+// to a hardware implementation of the functionality of the software
+// licensed hereunder.  You may use the software subject to the license
+// terms below provided that you ensure that this notice is replicated
+// unmodified and in its entirety in all distributions of the software,
+// modified or unmodified, in source code or in binary form.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met: redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer;
+// redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution;
+// neither the name of the copyright holders nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// @file Definition of SME instruction templates.
+
+let {{
+    # All SME instructions should be checking if Streaming Mode is
+    # enabled in the PSTATE. The following call checks both the SME and
+    # the FP enable flags in the relevant registers depending on the
+    # current EL.
+    smEnCheckCodeNoPstate = '''
+        if (FullSystem) {
+            fault = this->checkSmeEnabled(xc->tcBase(), Cpsr, Cpacr64);
+            if (fault != NoFault) {
+                return fault;
+            }
+        }
+    '''
+
+    smPreamble = '''
+        CPSR cpsr = (CPSR) Cpsr;
+        ExceptionLevel target_el = (ExceptionLevel) (uint8_t) cpsr.el;
+        if (target_el == EL0) {
+            target_el = EL1;
+        }
+    '''
+
+    smCheckCode = '''
+        // Check streaming mode first
+        if ((Svcr & 1) != 0b1) {
+            fault = smeAccessTrap(target_el, 0b10);
+            return fault;
+        }
+    '''
+
+    zaCheckCode = '''
+        // Check if ZA is enabled
+        if ((Svcr & 2) >> 1 != 0b1) {
+            fault = smeAccessTrap(target_el, 0b11);
+            return fault;
+        }
+    '''
+
+    # If streaming mode is disabled or ZA is disabled we trap
+    smEnCheckCode = smPreamble + smCheckCode + zaCheckCode + \
+                    smEnCheckCodeNoPstate
+
+    # If ZA is disabled we trap
+    smEnCheckCodeNoSM = smPreamble + zaCheckCode + smEnCheckCodeNoPstate
+
+    # If streaming mode is disabled we trap
+    smEnCheckCodeNoZA = smPreamble + smCheckCode + smEnCheckCodeNoPstate
+
+    smeZaWrite = '''
+        // Force the ISA parser to see the access to ZA as a write,
+        // not a read.
+        ZA = ZA;
+        '''
+}};
+
+def template SmeAddDeclare {{
+    template <typename TPElem>
+    class %(class_name)s : public %(base_class)s
+    {
+      private:
+        %(reg_idx_arr_decl)s;
+
+      public:
+        /// Constructor.
+        %(class_name)s(ExtMachInst machInst, uint64_t imm,
+                       RegIndex op1, RegIndex gp1,
+                       RegIndex gp2)
+            : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s,
+                             imm, op1, gp1, gp2)
+        {
+            %(set_reg_idx_arr)s;
+            %(constructor)s;
+        }
+
+        Fault execute(ExecContext *, trace::InstRecord *) const override;
+    };
+}};
+
+def template SmeAddVlDeclare {{
+    class %(class_name)s : public %(base_class)s
+    {
+      private:
+        %(reg_idx_arr_decl)s;
+
+      public:
+        /// Constructor.
+        %(class_name)s(ExtMachInst machInst,
+                       RegIndex dest, RegIndex op1,
+                       int8_t imm)
+            : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s,
+                             dest, op1, imm)
+        {
+            %(set_reg_idx_arr)s;
+            %(constructor)s;
+        }
+
+        Fault execute(ExecContext *, trace::InstRecord *) const override;
+    };
+}};
+
+def template SmeLd1xDeclare {{
+    template <typename TPElem>
+    class %(class_name)s : public %(base_class)s
+    {
+      private:
+        %(reg_idx_arr_decl)s;
+
+      public:
+        /// Constructor.
+        %(class_name)s(ExtMachInst machInst, uint64_t imm,
+                       RegIndex op1, RegIndex mpop1,
+                       RegIndex op2, RegIndex op3,
+                       bool V)
+            : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s,
+                             imm, op1, mpop1, op2, op3, V)
+        {
+            %(set_reg_idx_arr)s;
+            %(constructor)s;
+        }
+
+        Fault execute(ExecContext *, trace::InstRecord *) const override;
+        Fault initiateAcc(ExecContext *, trace::InstRecord *) const override;
+        Fault completeAcc(PacketPtr, ExecContext *,
+                          trace::InstRecord *) const override;
+    };
+}};
+
+def template SmeLd1xExecute {{
+    template <typename TPElem>
+    Fault %(class_name)s<TPElem>::execute(ExecContext *xc,
+                                          trace::InstRecord *traceData) const
+    {
+        Fault fault = NoFault;
+        Request::Flags flags = 0;
+
+        %(op_decl)s;
+        %(op_rd)s;
+        %(code)s;
+
+        // We need a buffer in which to store the data:
+        TPElem data[MaxSmeVecLenInBytes / sizeof(TPElem)];
+
+        if (fault == NoFault) {
+            // The size of the access is controlled by the type of data, and
+            // the number of elements.
+            fault = xc->readMem(EA, (uint8_t*)data, eCount * sizeof(TPElem),
+                                flags, rdEn);
+        }
+
+        if (fault == NoFault) {
+            %(za_write)s
+
+            // Write back the changes to the actual tile
+            %(op_wb)s;
+        }
+
+        return fault;
+    }
+}};
+
+def template SmeLd1xInitiateAcc {{
+    template <typename TPElem>
+    Fault %(class_name)s<TPElem>::initiateAcc(ExecContext *xc,
+                                            trace::InstRecord *traceData) const
+    {
+        Fault fault = NoFault;
+        Request::Flags flags = 0;
+
+        %(op_decl)s;
+        %(op_rd)s;
+        %(code)s;
+
+        if (fault == NoFault) {
+            fault = xc->initiateMemRead(EA, eCount * sizeof(TPElem),
+                                        flags, rdEn);
+        }
+
+        return fault;
+    }
+}};
+
+def template SmeLd1xCompleteAcc {{
+    template <typename TPElem>
+    Fault %(class_name)s<TPElem>::completeAcc(PacketPtr pkt, ExecContext *xc,
+                                            trace::InstRecord *traceData) const
+    {
+        Fault fault = NoFault;
+
+        %(op_decl)s;
+        %(op_rd)s;
+        %(code)s;
+
+        // The O3 CPU will call this with a NULL-pointer if the access was
+        // disabled. Just return.
+        if (pkt == NULL) {
+            return fault;
+        }
+
+        if (fault == NoFault) {
+            // We need a buffer in which to store the data:
+            TPElem data[MaxSmeVecLenInBytes / sizeof(TPElem)];
+
+            // The size for the amount of data returned here should
+            // have been set in initiateAcc.
+            memcpy((uint8_t*)data, pkt->getPtr<uint8_t>(), pkt->getSize());
+
+            %(za_write)s
+
+            // Write back the changes to the tile
+            %(op_wb)s;
+        }
+        return fault;
+    }
+}};
+
+def template SmeLd1xExecDeclare {{
+    template
+    Fault %(class_name)s<%(targs)s>::execute(
+            ExecContext *, trace::InstRecord *) const;
+    template
+    Fault %(class_name)s<%(targs)s>::initiateAcc(
+            ExecContext *, trace::InstRecord *) const;
+    template
+    Fault %(class_name)s<%(targs)s>::completeAcc(
+            PacketPtr, ExecContext *, trace::InstRecord *) const;
+}};
+
+def template SmeLdrDeclare {{
+    class %(class_name)s : public %(base_class)s
+    {
+      private:
+        %(reg_idx_arr_decl)s;
+
+      public:
+        /// Constructor.
+        %(class_name)s(ExtMachInst machInst, uint64_t imm,
+                       RegIndex op1, RegIndex op2)
+            : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s,
+                             imm, op1, op2)
+        {
+            %(set_reg_idx_arr)s;
+            %(constructor)s;
+        }
+
+        Fault execute(ExecContext *, trace::InstRecord *) const override;
+        Fault initiateAcc(ExecContext *, trace::InstRecord *) const override;
+        Fault completeAcc(PacketPtr, ExecContext *,
+                          trace::InstRecord *) const override;
+    };
+}};
+
+def template SmeLdrExecute {{
+    Fault %(class_name)s::execute(ExecContext *xc,
+                                  trace::InstRecord *traceData) const
+    {
+        Fault fault = NoFault;
+        Request::Flags flags = 0;
+
+        %(op_decl)s;
+        %(op_rd)s;
+        %(code)s;
+
+        auto rdEn = std::vector<bool>(eCount, true);
+
+        // We need a buffer in which to store the data:
+        uint8_t data[MaxSmeVecLenInBytes];
+
+        if (fault == NoFault) {
+            fault = xc->readMem(EA, (uint8_t*)data, eCount, flags, rdEn);
+        }
+
+        if (fault == NoFault) {
+            auto row = getTileHSlice<uint8_t>(ZA, 0, vec_index);
+            for (int i = 0; i < eCount; ++i) {
+                row[i] = data[i];
+            }
+
+            %(op_wb)s;
+        }
+
+        return fault;
+    }
+}};
+
+def template SmeLdrInitiateAcc {{
+    Fault %(class_name)s::initiateAcc(ExecContext *xc,
+                                  trace::InstRecord *traceData) const
+    {
+        Fault fault = NoFault;
+        Request::Flags flags = 0;
+
+        %(op_decl)s;
+        %(op_rd)s;
+        %(code)s;
+
+        auto rdEn = std::vector<bool>(eCount, true);
+
+        if (fault == NoFault) {
+            fault = xc->initiateMemRead(EA, eCount, flags, rdEn);
+        }
+
+        return fault;
+    }
+}};
+
+def template SmeLdrCompleteAcc {{
+        Fault %(class_name)s::completeAcc(PacketPtr pkt, ExecContext *xc,
+                                  trace::InstRecord *traceData) const
+    {
+        Fault fault = NoFault;
+
+        %(op_decl)s;
+        %(op_rd)s;
+        %(code)s;
+
+        // The O3 CPU will call this with a NULL-pointer if the access was
+        // disabled. Just return.
+        if (pkt == NULL) {
+            return fault;
+        }
+
+        if (fault == NoFault) {
+            // Get the data out of the packet
+            auto row = getTileHSlice<uint8_t>(ZA, 0, vec_index);
+            for (int i = 0; i < eCount; ++i) {
+                row[i] = pkt->getPtr<uint8_t>()[i];
+            }
+
+            %(op_wb)s;
+        }
+
+        return fault;
+    }
+}};
+
+def template SMEMgmtDeclare {{
+    class %(class_name)s : public %(base_class)s
+    {
+      private:
+        %(reg_idx_arr_decl)s;
+
+      public:
+        /// Constructor.
+        %(class_name)s(ExtMachInst machInst, uint64_t imm)
+            : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, imm)
+        {
+            %(set_reg_idx_arr)s;
+            %(constructor)s;
+        }
+
+        Fault execute(ExecContext *, trace::InstRecord *) const override;
+    };
+}};
+
+def template SmeMovaExtractDeclare {{
+    template <typename TPElem>
+    class %(class_name)s : public %(base_class)s
+    {
+      private:
+        %(reg_idx_arr_decl)s;
+
+      public:
+        /// Constructor.
+        %(class_name)s(ExtMachInst machInst, RegIndex op1,
+                       uint8_t imm, RegIndex gp,
+                       RegIndex op2, bool v)
+            : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s,
+                             op1, imm, gp, op2, v)
+        {
+            %(set_reg_idx_arr)s;
+            %(constructor)s;
+        }
+
+        Fault execute(ExecContext *, trace::InstRecord *) const override;
+    };
+}};
+
+def template SmeMovaInsertDeclare {{
+    template <typename TPElem>
+    class %(class_name)s : public %(base_class)s
+    {
+      private:
+        %(reg_idx_arr_decl)s;
+
+      public:
+        /// Constructor.
+        %(class_name)s(ExtMachInst machInst, uint8_t imm,
+                       RegIndex op1, RegIndex gp,
+                       RegIndex op2, bool v)
+            : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s,
+                             imm, op1, gp, op2, v)
+        {
+            %(set_reg_idx_arr)s;
+            %(constructor)s;
+        }
+
+        Fault execute(ExecContext *, trace::InstRecord *) const override;
+    };
+}};
+
+def template SmeFPOPDeclare {{
+    template <typename TPSElem, typename TPDElem>
+    class %(class_name)s : public %(base_class)s
+    {
+      private:
+        %(reg_idx_arr_decl)s;
+
+      public:
+        /// Constructor.
+        %(class_name)s(ExtMachInst machInst, uint64_t imm,
+                                   RegIndex op1, RegIndex gp1,
+                                   RegIndex gp2, RegIndex op2)
+            : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s,
+                             imm, op1, gp1, gp2, op2)
+        {
+            %(set_reg_idx_arr)s;
+            %(constructor)s;
+        }
+
+        Fault execute(ExecContext *, trace::InstRecord *) const override;
+    };
+}};
+
+def template SmeIntOPDeclare {{
+    template <typename TPS1Elem, typename TPS2Elem, typename TPDElem>
+    class %(class_name)s : public %(base_class)s
+    {
+      private:
+        %(reg_idx_arr_decl)s;
+
+      public:
+        /// Constructor.
+        %(class_name)s(ExtMachInst machInst, uint64_t imm,
+                                   RegIndex op1, RegIndex gp1,
+                                   RegIndex gp2, RegIndex op2)
+            : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s,
+                             imm, op1, gp1, gp2, op2)
+        {
+            %(set_reg_idx_arr)s;
+            %(constructor)s;
+        }
+
+        Fault execute(ExecContext *, trace::InstRecord *) const override;
+    };
+}};
+
+def template SmeRdsvlDeclare {{
+    class %(class_name)s : public %(base_class)s
+    {
+      private:
+        %(reg_idx_arr_decl)s;
+
+      public:
+        /// Constructor.
+        %(class_name)s(ExtMachInst machInst,
+                       RegIndex dest, int8_t imm)
+            : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s,
+                             dest, imm)
+        {
+            %(set_reg_idx_arr)s;
+            %(constructor)s;
+        }
+
+        Fault execute(ExecContext *, trace::InstRecord *) const override;
+    };
+}};
+
+def template SmeSt1xDeclare {{
+    template <typename TPElem>
+    class %(class_name)s : public %(base_class)s
+    {
+      private:
+        %(reg_idx_arr_decl)s;
+
+      public:
+        /// Constructor.
+        %(class_name)s(ExtMachInst machInst, uint64_t imm,
+                       RegIndex op1, RegIndex mpop1,
+                       RegIndex op2, RegIndex op3, bool V)
+            : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s,
+                             imm, op1, mpop1, op2, op3, V)
+        {
+            %(set_reg_idx_arr)s;
+            %(constructor)s;
+        }
+
+        Fault execute(ExecContext *, trace::InstRecord *) const override;
+        Fault initiateAcc(ExecContext *, trace::InstRecord *) const override;
+        Fault completeAcc(PacketPtr, ExecContext *,
+                          trace::InstRecord *) const override;
+    };
+}};
+
+def template SmeSt1xExecute {{
+    template <typename TPElem>
+    Fault %(class_name)s<TPElem>::execute(ExecContext *xc,
+                                  trace::InstRecord *traceData) const
+    {
+        Fault fault = NoFault;
+        Request::Flags flags = 0;
+
+        %(op_decl)s;
+        %(op_rd)s;
+        %(code)s;
+
+        if (fault == NoFault) {
+            fault = xc->writeMem((uint8_t*)data, eCount * sizeof(TPElem), EA,
+                                 flags, NULL, wrEn);
+        }
+
+        return fault;
+    }
+}};
+
+def template SmeSt1xInitiateAcc {{
+    template <typename TPElem>
+    Fault %(class_name)s<TPElem>::initiateAcc(ExecContext *xc,
+                                  trace::InstRecord *traceData) const
+    {
+        Fault fault = NoFault;
+        Request::Flags flags = 0;
+
+        %(op_decl)s;
+        %(op_rd)s;
+        %(code)s;
+
+        if (fault == NoFault) {
+            fault = xc->writeMem((uint8_t*)data, eCount * sizeof(TPElem), EA,
+                                 flags, NULL, wrEn);
+        }
+
+        return fault;
+    }
+}};
+
+def template SmeSt1xCompleteAcc {{
+    template <typename TPElem>
+    Fault %(class_name)s<TPElem>::completeAcc(PacketPtr pkt, ExecContext *xc,
+                                  trace::InstRecord *traceData) const
+    {
+        return NoFault;
+    }
+}};
+
+def template SmeStrDeclare {{
+    class %(class_name)s : public %(base_class)s
+    {
+      private:
+        %(reg_idx_arr_decl)s;
+
+      public:
+        /// Constructor.
+        %(class_name)s(ExtMachInst machInst, uint64_t imm,
+                       RegIndex op1, RegIndex op2)
+            : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s,
+                              imm, op1, op2)
+        {
+            %(set_reg_idx_arr)s;
+            %(constructor)s;
+        }
+
+        Fault execute(ExecContext *, trace::InstRecord *) const override;
+        Fault initiateAcc(ExecContext *, trace::InstRecord *) const override;
+        Fault completeAcc(PacketPtr, ExecContext *,
+                          trace::InstRecord *) const override;
+    };
+}};
+
+def template SmeStrExecute {{
+    Fault %(class_name)s::execute(ExecContext *xc,
+                                  trace::InstRecord *traceData) const
+    {
+        Fault fault = NoFault;
+        Request::Flags flags = 0;
+
+        %(op_decl)s;
+        %(op_rd)s;
+        %(code)s;
+
+
+        if (fault == NoFault) {
+            auto wrEn = std::vector<bool>(eCount, true);
+            fault = xc->writeMem((uint8_t*)data, eCount, EA,
+                                 flags, NULL, wrEn);
+        }
+
+        return fault;
+    }
+}};
+
+def template SmeStrInitiateAcc {{
+    Fault %(class_name)s::initiateAcc(ExecContext *xc,
+                                  trace::InstRecord *traceData) const
+    {
+        Fault fault = NoFault;
+        Request::Flags flags = 0;
+
+        %(op_decl)s;
+        %(op_rd)s;
+        %(code)s;
+
+        if (fault == NoFault) {
+            auto wrEn = std::vector<bool>(eCount, true);
+            fault = xc->writeMem((uint8_t*)data, eCount, EA,
+                                 flags, NULL, wrEn);
+        }
+
+        return fault;
+    }
+}};
+
+def template SmeStrCompleteAcc {{
+        Fault %(class_name)s::completeAcc(PacketPtr pkt, ExecContext *xc,
+                                  trace::InstRecord *traceData) const
+    {
+        // TODO-SME: Can this fail?
+        return NoFault;
+    }
+}};
+
+def template SmeSt1xExecDeclare {{
+    template
+    Fault %(class_name)s<%(targs)s>::execute(
+            ExecContext *, trace::InstRecord *) const;
+    template
+    Fault %(class_name)s<%(targs)s>::initiateAcc(
+            ExecContext *, trace::InstRecord *) const;
+    template
+    Fault %(class_name)s<%(targs)s>::completeAcc(
+            PacketPtr, ExecContext *, trace::InstRecord *) const;
+}};
+
+def template SmeZeroDeclare {{
+    template <typename TPElem>
+    class %(class_name)s : public %(base_class)s
+    {
+      private:
+        %(reg_idx_arr_decl)s;
+
+      public:
+        /// Constructor.
+        %(class_name)s(ExtMachInst machInst, uint8_t imm)
+         : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, imm)
+        {
+            %(set_reg_idx_arr)s;
+            %(constructor)s;
+        }
+
+        Fault execute(ExecContext *, trace::InstRecord *) const override;
+    };
+}};
+
+def template SmeExecute {{
+    Fault
+    %(class_name)s::execute(ExecContext *xc,
+                            trace::InstRecord *traceData) const
+    {
+        Fault fault = NoFault;
+
+        %(op_decl)s;
+        %(op_rd)s;
+        %(code)s;
+
+        if (fault == NoFault) {
+            %(op_wb)s;
+        }
+
+        return fault;
+    }
+}};
+
+def template SmeTemplatedExecute {{
+    template <typename TPElem>
+    Fault
+    %(class_name)s<TPElem>::execute(ExecContext *xc,
+                                    trace::InstRecord *traceData) const
+    {
+        Fault fault = NoFault;
+
+        %(op_decl)s;
+        %(op_rd)s;
+        %(code)s;
+
+        if (fault == NoFault) {
+            %(op_wb)s;
+        }
+
+        return fault;
+    }
+}};
+
+def template SmeDualTemplatedExecute {{
+    template <typename TPSElem, typename TPDElem>
+    Fault
+    %(class_name)s<TPSElem, TPDElem>::execute(ExecContext *xc,
+                                        trace::InstRecord *traceData) const
+    {
+        Fault fault = NoFault;
+
+        %(op_decl)s;
+        %(op_rd)s;
+        %(code)s;
+
+        if (fault == NoFault) {
+            %(op_wb)s;
+        }
+
+        return fault;
+    }
+}};
+
+def template SmeTripleTemplatedExecute {{
+    template <typename TPS1Elem, typename TPS2Elem, typename TPDElem>
+    Fault
+    %(class_name)s<TPS1Elem, TPS2Elem, TPDElem>::execute(ExecContext *xc,
+                                        trace::InstRecord *traceData) const
+    {
+        Fault fault = NoFault;
+
+        %(op_decl)s;
+        %(op_rd)s;
+        %(code)s;
+
+        if (fault == NoFault) {
+            %(op_wb)s;
+        }
+
+        return fault;
+    }
+}};
+
+def template SmeOpExecDeclare {{
+    template
+    Fault %(class_name)s<%(targs)s>::execute(
+            ExecContext *, trace::InstRecord *) const;
+}};
diff --git a/src/arch/arm/isa/templates/sve.isa b/src/arch/arm/isa/templates/sve.isa
index fc38a2b979..9260441c2c 100644
--- a/src/arch/arm/isa/templates/sve.isa
+++ b/src/arch/arm/isa/templates/sve.isa
@@ -800,6 +800,33 @@ class %(class_name)s : public %(base_class)s
 };
 }};
 
+def template SvePselOpDeclare {{
+template <class _Element>
+class %(class_name)s : public %(base_class)s
+{
+  private:
+    %(reg_idx_arr_decl)s;
+
+  protected:
+    typedef _Element Element;
+    typedef _Element TPElem;
+
+  public:
+    %(class_name)s(ExtMachInst machInst,
+                   RegIndex dest, RegIndex op1,
+                   RegIndex gp, RegIndex op2,
+                   uint64_t imm)
+        : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s,
+                          dest, op1, gp, op2, imm)
+    {
+        %(set_reg_idx_arr)s;
+        %(constructor)s;
+    }
+
+    Fault execute(ExecContext *, trace::InstRecord *) const override;
+};
+}};
+
 def template SveCompTermOpDeclare {{
 template <class _Element>
 class %(class_name)s : public %(base_class)s
@@ -1170,6 +1197,32 @@ class %(class_name)s : public %(base_class)s
 };
 }};
 
+def template SveClampOpDeclare {{
+template <class _Element>
+class %(class_name)s : public %(base_class)s
+{
+  private:
+    %(reg_idx_arr_decl)s;
+
+  protected:
+    typedef _Element Element;
+    typedef _Element TPElem;
+
+  public:
+    // Constructor
+    %(class_name)s(ExtMachInst machInst,
+                   RegIndex dest, RegIndex op1, RegIndex op2)
+        : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s,
+                         dest, op1, op2)
+    {
+        %(set_reg_idx_arr)s;
+        %(constructor)s;
+    }
+
+    Fault execute(ExecContext *, trace::InstRecord *) const override;
+};
+}};
+
 def template SveWideningOpExecute {{
     template <class SElement, class DElement>
     Fault
diff --git a/src/arch/arm/isa/templates/templates.isa b/src/arch/arm/isa/templates/templates.isa
index 0b4abfcce4..047cd1ef79 100644
--- a/src/arch/arm/isa/templates/templates.isa
+++ b/src/arch/arm/isa/templates/templates.isa
@@ -82,3 +82,6 @@
 //Templates for SVE instructions
 ##include "sve.isa"
 ##include "sve_mem.isa"
+
+//Templates for SME instructions
+##include "sme.isa"

From b860e2039baa3bd14386b8d294dca3cc763e106b Mon Sep 17 00:00:00 2001
From: Sascha Bischoff <sascha.bischoff@arm.com>
Date: Wed, 3 Aug 2022 17:21:48 +0100
Subject: [PATCH 151/492] system-arm: Enable SME in the bootloader

In addition to SVE (which was already being enabled by the bootloader)
we also enable SME to allow lower ELs to use it.

Jira Issue: https://gem5.atlassian.net/browse/GEM5-1289

Change-Id: I7078a80e9a857c7cf91e3c1e52fe3812fa422394
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/64341
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
---
 system/arm/bootloader/arm64/boot.S | 1 +
 1 file changed, 1 insertion(+)

diff --git a/system/arm/bootloader/arm64/boot.S b/system/arm/bootloader/arm64/boot.S
index 50415615d2..4f26953318 100644
--- a/system/arm/bootloader/arm64/boot.S
+++ b/system/arm/bootloader/arm64/boot.S
@@ -64,6 +64,7 @@ _start:
         msr	scr_el3, x0
 
         mov	x0, #(1 << 8)			// Disable SVE trap to EL3
+        orr	x0, x0, #(1 << 12)		// Disable SME trap to EL3
         msr	cptr_el3, x0			// Disable copro. traps to EL3
 
         /*

From 0bce2e56d9faa3e88554487233c60f5fef98d0c4 Mon Sep 17 00:00:00 2001
From: Matthew Poremba <matthew.poremba@amd.com>
Date: Thu, 15 Dec 2022 13:11:44 -0800
Subject: [PATCH 152/492] dev: Ignore MC146818 UIP bit / Fix x86 Linux 5.11+
 boot

As of Linux 5.11, the MC146818 code was changed to avoid reading garbage
data that may occur if the is a read while the registers are being
updated:

github.com/torvalds/linux/commit/05a0302c35481e9b47fb90ba40922b0a4cae40d8

Previously toggling this bit was fine as Linux would check twice. It now
checks before and after reading time information, causing it to retry
infinitely until eventually Linux bootup fails due to watchdog timeout.

This changeset always sets update in progress to false. Since this is a
simulation, the updates probably will not be occurring at the same time
a read is occurring.

Change-Id: If0f440de9f9a6bc5a773fc935d1d5af5b98a9a4b
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/66731
Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
---
 src/dev/mc146818.cc | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/dev/mc146818.cc b/src/dev/mc146818.cc
index 919efb06f7..2bfe877b95 100644
--- a/src/dev/mc146818.cc
+++ b/src/dev/mc146818.cc
@@ -233,8 +233,9 @@ MC146818::readData(uint8_t addr)
     else {
         switch (addr) {
           case RTC_STAT_REGA:
-            // toggle UIP bit for linux
-            stat_regA.uip = !stat_regA.uip;
+            // Linux after v5.10 checks this multiple times so toggling
+            // leads to a deadlock on bootup.
+            stat_regA.uip = 0;
             return stat_regA;
             break;
           case RTC_STAT_REGB:

From 5a1414d78233357d361d47aa3a74c16eb240a28c Mon Sep 17 00:00:00 2001
From: "Daniel R. Carvalho" <odanrc@yahoo.com.br>
Date: Sat, 14 Jan 2023 10:16:09 -0300
Subject: [PATCH 153/492] arch: Remove a couple of deprecated namespaces

These namespaces have gone through the deprecation period
and can now be removed: X86Macroops, SMBios, RomLabels,
DeliveryMode, ConditionTests.

Change-Id: I6ff5e98319d92e27743a9fbeeab054497a2392e0
Signed-off-by: Daniel R. Carvalho <odanrc@yahoo.com.br>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67375
Tested-by: kokoro <noreply+kokoro@google.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
---
 src/arch/x86/bios/smbios.hh   | 1 -
 src/arch/x86/fs_workload.hh   | 1 -
 src/arch/x86/insts/microop.hh | 1 -
 src/arch/x86/intmessage.hh    | 1 -
 src/arch/x86/isa/macroop.isa  | 1 -
 src/arch/x86/isa/rom.isa      | 4 +---
 6 files changed, 1 insertion(+), 8 deletions(-)

diff --git a/src/arch/x86/bios/smbios.hh b/src/arch/x86/bios/smbios.hh
index dc38676445..88d3344cbb 100644
--- a/src/arch/x86/bios/smbios.hh
+++ b/src/arch/x86/bios/smbios.hh
@@ -61,7 +61,6 @@ struct X86SMBiosSMBiosTableParams;
 namespace X86ISA
 {
 
-GEM5_DEPRECATED_NAMESPACE(SMBios, smbios);
 namespace smbios
 {
 
diff --git a/src/arch/x86/fs_workload.hh b/src/arch/x86/fs_workload.hh
index 5c1187cda4..9d14f91bb5 100644
--- a/src/arch/x86/fs_workload.hh
+++ b/src/arch/x86/fs_workload.hh
@@ -55,7 +55,6 @@ namespace gem5
 namespace X86ISA
 {
 
-GEM5_DEPRECATED_NAMESPACE(SMBios, smbios);
 namespace smbios
 {
 
diff --git a/src/arch/x86/insts/microop.hh b/src/arch/x86/insts/microop.hh
index 9cbdec87d1..384e15e7e4 100644
--- a/src/arch/x86/insts/microop.hh
+++ b/src/arch/x86/insts/microop.hh
@@ -48,7 +48,6 @@ namespace gem5
 namespace X86ISA
 {
 
-GEM5_DEPRECATED_NAMESPACE(ConditionTests, condition_tests);
 namespace condition_tests
 {
 
diff --git a/src/arch/x86/intmessage.hh b/src/arch/x86/intmessage.hh
index f7692e25a3..71e4765c76 100644
--- a/src/arch/x86/intmessage.hh
+++ b/src/arch/x86/intmessage.hh
@@ -52,7 +52,6 @@ namespace X86ISA
         Bitfield<21> trigger;
     EndBitUnion(TriggerIntMessage)
 
-    GEM5_DEPRECATED_NAMESPACE(DeliveryMode, delivery_mode);
     namespace delivery_mode
     {
         enum IntDeliveryMode
diff --git a/src/arch/x86/isa/macroop.isa b/src/arch/x86/isa/macroop.isa
index 691e8d011c..d1b9e22032 100644
--- a/src/arch/x86/isa/macroop.isa
+++ b/src/arch/x86/isa/macroop.isa
@@ -76,7 +76,6 @@ output header {{
 
 // Basic instruction class declaration template.
 def template MacroDeclare {{
-    GEM5_DEPRECATED_NAMESPACE(X86Macroop, x86_macroop);
     namespace x86_macroop
     {
         /**
diff --git a/src/arch/x86/isa/rom.isa b/src/arch/x86/isa/rom.isa
index 9aef3ba3eb..bf2f9ff8e2 100644
--- a/src/arch/x86/isa/rom.isa
+++ b/src/arch/x86/isa/rom.isa
@@ -42,9 +42,7 @@ let {{
 
     class X86MicrocodeRom(Rom):
         def getDeclaration(self):
-            declareLabels = \
-                "GEM5_DEPRECATED_NAMESPACE(RomLabels, rom_labels);\n"
-            declareLabels += "namespace rom_labels\n{\n"
+            declareLabels = "namespace rom_labels\n{\n"
             for (label, microop) in self.labels.items():
                 declareLabels += "const static uint64_t label_%s = %d;\n" \
                                   % (label, microop.micropc)

From 8110a422665f8a40dc639aab8db7a0fe33fc23ca Mon Sep 17 00:00:00 2001
From: Giacomo Travaglini <giacomo.travaglini@arm.com>
Date: Wed, 18 Jan 2023 08:53:02 +0000
Subject: [PATCH 154/492] arch-arm: Replace Loader with loader namespace in SME
 code

This is fixing our nightly tests [1].
There was a merge conflict between the removal of the Loader namespace
and the SME patches which were still using the old capitalized version

[1]: https://jenkins.gem5.org/job/nightly/491/

Change-Id: I9f709b2fff252ed6fcc76cc984592e713ab53766
Signed-off-by: Giacomo Travaglini <giacomo.travaglini@arm.com>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67333
Reviewed-by: Daniel Carvalho <odanrc@yahoo.com.br>
Maintainer: Daniel Carvalho <odanrc@yahoo.com.br>
Reviewed-by: Richard Cooper <richard.cooper@arm.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/arch/arm/insts/sme.cc | 18 +++++++++---------
 src/arch/arm/insts/sme.hh | 18 +++++++++---------
 src/arch/arm/insts/sve.cc |  4 ++--
 src/arch/arm/insts/sve.hh |  4 ++--
 4 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/src/arch/arm/insts/sme.cc b/src/arch/arm/insts/sme.cc
index 305d332514..43f4579842 100644
--- a/src/arch/arm/insts/sme.cc
+++ b/src/arch/arm/insts/sme.cc
@@ -45,7 +45,7 @@ namespace ArmISA
 
 std::string
 SmeAddOp::generateDisassembly(Addr pc,
-                              const Loader::SymbolTable *symtab) const
+                              const loader::SymbolTable *symtab) const
 {
     std::stringstream ss;
     printMnemonic(ss, "", false);
@@ -61,7 +61,7 @@ SmeAddOp::generateDisassembly(Addr pc,
 
 std::string
 SmeAddVlOp::generateDisassembly(Addr pc,
-                                const Loader::SymbolTable *symtab) const
+                                const loader::SymbolTable *symtab) const
 {
     std::stringstream ss;
     printMnemonic(ss, "", false);
@@ -76,7 +76,7 @@ SmeAddVlOp::generateDisassembly(Addr pc,
 
 std::string
 SmeLd1xSt1xOp::generateDisassembly(Addr pc,
-                                   const Loader::SymbolTable *symtab) const
+                                   const loader::SymbolTable *symtab) const
 {
     std::stringstream ss;
     printMnemonic(ss, "", false);
@@ -94,7 +94,7 @@ SmeLd1xSt1xOp::generateDisassembly(Addr pc,
 
 std::string
 SmeLdrStrOp::generateDisassembly(Addr pc,
-                                 const Loader::SymbolTable *symtab) const
+                                 const loader::SymbolTable *symtab) const
 {
     std::stringstream ss;
     printMnemonic(ss, "", false);
@@ -108,7 +108,7 @@ SmeLdrStrOp::generateDisassembly(Addr pc,
 
 std::string
 SmeMovExtractOp::generateDisassembly(Addr pc,
-                                     const Loader::SymbolTable *symtab) const
+                                     const loader::SymbolTable *symtab) const
 {
     std::stringstream ss;
     printMnemonic(ss, "", false);
@@ -124,7 +124,7 @@ SmeMovExtractOp::generateDisassembly(Addr pc,
 
 std::string
 SmeMovInsertOp::generateDisassembly(Addr pc,
-                                    const Loader::SymbolTable *symtab) const
+                                    const loader::SymbolTable *symtab) const
 {
     std::stringstream ss;
     printMnemonic(ss, "", false);
@@ -140,7 +140,7 @@ SmeMovInsertOp::generateDisassembly(Addr pc,
 
 std::string
 SmeOPOp::generateDisassembly(Addr pc,
-                             const Loader::SymbolTable *symtab) const
+                             const loader::SymbolTable *symtab) const
 {
     std::stringstream ss;
     printMnemonic(ss, "", false);
@@ -158,7 +158,7 @@ SmeOPOp::generateDisassembly(Addr pc,
 
 std::string
 SmeRdsvlOp::generateDisassembly(Addr pc,
-                                const Loader::SymbolTable *symtab) const
+                                const loader::SymbolTable *symtab) const
 {
     std::stringstream ss;
     printMnemonic(ss, "", false);
@@ -171,7 +171,7 @@ SmeRdsvlOp::generateDisassembly(Addr pc,
 
 std::string
 SmeZeroOp::generateDisassembly(Addr pc,
-                               const Loader::SymbolTable *symtab) const
+                               const loader::SymbolTable *symtab) const
 {
     std::stringstream ss;
     ArmStaticInst::printMnemonic(ss, "", false);
diff --git a/src/arch/arm/insts/sme.hh b/src/arch/arm/insts/sme.hh
index d6cbdde5a7..198ce52f77 100644
--- a/src/arch/arm/insts/sme.hh
+++ b/src/arch/arm/insts/sme.hh
@@ -63,7 +63,7 @@ class SmeAddOp : public ArmStaticInst
     {}
 
     std::string generateDisassembly(
-            Addr pc, const Loader::SymbolTable *symtab) const override;
+            Addr pc, const loader::SymbolTable *symtab) const override;
 };
 
 // Used for the SME ADDSPL/ADDSVL instructions
@@ -82,7 +82,7 @@ class SmeAddVlOp : public ArmStaticInst
     {}
 
     std::string generateDisassembly(
-            Addr pc, const Loader::SymbolTable *symtab) const override;
+            Addr pc, const loader::SymbolTable *symtab) const override;
 };
 
 // Used for SME LD1x/ST1x instrucions
@@ -105,7 +105,7 @@ class SmeLd1xSt1xOp : public ArmStaticInst
     {}
 
     std::string generateDisassembly(
-            Addr pc, const Loader::SymbolTable *symtab) const override;
+            Addr pc, const loader::SymbolTable *symtab) const override;
 };
 
 // Used for SME LDR/STR instructions
@@ -124,7 +124,7 @@ class SmeLdrStrOp : public ArmStaticInst
     {}
 
     std::string generateDisassembly(
-            Addr pc, const Loader::SymbolTable *symtab) const override;
+            Addr pc, const loader::SymbolTable *symtab) const override;
 };
 
 // Used for SME MOVA (Tile to Vector)
@@ -145,7 +145,7 @@ class SmeMovExtractOp : public ArmStaticInst
     {}
 
     std::string generateDisassembly(
-            Addr pc, const Loader::SymbolTable *symtab) const override;
+            Addr pc, const loader::SymbolTable *symtab) const override;
 };
 
 // Used for SME MOVA (Vector to Tile)
@@ -166,7 +166,7 @@ class SmeMovInsertOp : public ArmStaticInst
     {}
 
     std::string generateDisassembly(
-            Addr pc, const Loader::SymbolTable *symtab) const override;
+            Addr pc, const loader::SymbolTable *symtab) const override;
 };
 
 // Used for SME output product instructions
@@ -187,7 +187,7 @@ class SmeOPOp : public ArmStaticInst
     {}
 
     std::string generateDisassembly(
-            Addr pc, const Loader::SymbolTable *symtab) const override;
+            Addr pc, const loader::SymbolTable *symtab) const override;
 };
 
 // Used for the SME RDSVL instruction
@@ -204,7 +204,7 @@ class SmeRdsvlOp : public ArmStaticInst
     {}
 
     std::string generateDisassembly(
-            Addr pc, const Loader::SymbolTable *symtab) const override;
+            Addr pc, const loader::SymbolTable *symtab) const override;
 };
 
 // Used for SME ZERO
@@ -220,7 +220,7 @@ class SmeZeroOp : public ArmStaticInst
     {}
 
     std::string generateDisassembly(
-            Addr pc, const Loader::SymbolTable *symtab) const override;
+            Addr pc, const loader::SymbolTable *symtab) const override;
 };
 
 } // namespace ArmISA
diff --git a/src/arch/arm/insts/sve.cc b/src/arch/arm/insts/sve.cc
index 9d9c2bcb1c..546074c8fd 100644
--- a/src/arch/arm/insts/sve.cc
+++ b/src/arch/arm/insts/sve.cc
@@ -163,7 +163,7 @@ SveWhileOp::generateDisassembly(
 
 std::string
 SvePselOp::generateDisassembly(Addr pc,
-                                const Loader::SymbolTable *symtab) const
+                               const loader::SymbolTable *symtab) const
 {
     std::stringstream ss;
     printMnemonic(ss, "", false);
@@ -851,7 +851,7 @@ SveComplexIdxOp::generateDisassembly(
 
 std::string
 SveClampOp::generateDisassembly(
-        Addr pc, const Loader::SymbolTable *symtab) const
+        Addr pc, const loader::SymbolTable *symtab) const
 {
     std::stringstream ss;
     printMnemonic(ss, "", false);
diff --git a/src/arch/arm/insts/sve.hh b/src/arch/arm/insts/sve.hh
index 63a59d493a..66d82f0a3f 100644
--- a/src/arch/arm/insts/sve.hh
+++ b/src/arch/arm/insts/sve.hh
@@ -199,7 +199,7 @@ class SvePselOp : public ArmStaticInst
     {}
 
     std::string generateDisassembly(
-            Addr pc, const Loader::SymbolTable *symtab) const override;
+            Addr pc, const loader::SymbolTable *symtab) const override;
 };
 
 /// Compare and terminate loop SVE instruction.
@@ -989,7 +989,7 @@ class SveClampOp : public ArmStaticInst
     {}
 
     std::string generateDisassembly(
-            Addr pc, const Loader::SymbolTable *symtab) const override;
+            Addr pc, const loader::SymbolTable *symtab) const override;
 };
 
 

From 8d0fde19612e5e3502947a324cdf102664685e7d Mon Sep 17 00:00:00 2001
From: Gabriel Busnot <gabriel.busnot@arteris.com>
Date: Thu, 19 Jan 2023 11:08:51 +0000
Subject: [PATCH 155/492] python: Fix deprecated decorator

The deprecation message was firing during the decoration process instead of firing upon first call to deprecated function. The message now fires only if the deprected function is called.

Change-Id: I2d510eb24884fdba0123e71e8472db68ae9d2ce4
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67334
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Reviewed-by: Daniel Carvalho <odanrc@yahoo.com.br>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Richard Cooper <richard.cooper@arm.com>
---
 src/python/m5/util/__init__.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/python/m5/util/__init__.py b/src/python/m5/util/__init__.py
index bc4ab4a0f5..5ae48754ab 100644
--- a/src/python/m5/util/__init__.py
+++ b/src/python/m5/util/__init__.py
@@ -108,8 +108,12 @@ def deprecated(replacement=None, logger=warn):
                     message += f" Prefer {replacement} instead."
             logger(message)
 
-        notifyDeprecation()
-        return func
+        @wraps(func)
+        def wrapper(*args, **kwargs):
+            notifyDeprecation()
+            return func(*args, **kwargs)
+
+        return wrapper
 
     return decorator
 

From 534d9dea10847ed0efbc289c3591e0f671c05765 Mon Sep 17 00:00:00 2001
From: Gabriel Busnot <gabriel.busnot@arteris.com>
Date: Tue, 24 Jan 2023 08:28:48 +0000
Subject: [PATCH 156/492] scons: Raise bin size limit for sanitized builds.

Sanitizers can enlarge binary size drammatically, north of 2GB. This
can prevent successful linkage due to symbol relocation outside from
the 2GB region allocated by the small x86_64 code model that is
enabled by default (32-bit relative offset limitation). Switching to
the medium model in x86_64 enables 64-bit relative offset for large
objects (>64KB by default) while sticking to 32-bit relative
addressing for code and smaller objects. Note this comes at a
potential performance cost so it should not be enabled in all cases.
This should still be a very happy medium for non-perf-critical
sanitized builds.

Jira issue: https://gem5.atlassian.net/browse/GEM5-1313

Change-Id: I9aceacfcda99cc29c8fb24b7c69aaab019ce97fd
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67451
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
---
 SConstruct | 40 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 40 insertions(+)

diff --git a/SConstruct b/SConstruct
index e08c2984e5..43ca3ac118 100755
--- a/SConstruct
+++ b/SConstruct
@@ -290,6 +290,17 @@ main['CLANG'] = CXX_version and CXX_version.find('clang') >= 0
 if main['GCC'] + main['CLANG'] > 1:
     error('Two compilers enabled at once?')
 
+# Find the gem5 binary target architecture (usually host architecture). The
+# "Target: <target>" is consistent accross gcc and clang at the time of
+# writting this.
+bin_target_arch = readCommand([main['CXX'], '--verbose'], exception=False)
+main["BIN_TARGET_ARCH"] = (
+    "x86_64"
+    if bin_target_arch.find("Target: x86_64") != -1
+    else "aarch64"
+    if bin_target_arch.find("Target: aarch64") != -1
+    else "unknown"
+)
 
 ########################################################################
 #
@@ -516,6 +527,35 @@ for variant_path in variant_paths:
             env.Append(CCFLAGS=['-fsanitize=%s' % sanitizers,
                                  '-fno-omit-frame-pointer'],
                         LINKFLAGS='-fsanitize=%s' % sanitizers)
+            if main["BIN_TARGET_ARCH"] == "x86_64":
+                # Sanitizers can enlarge binary size drammatically, north of
+                # 2GB.  This can prevent successful linkage due to symbol
+                # relocation outside from the 2GB region allocated by the small
+                # x86_64 code model that is enabled by default (32-bit relative
+                # offset limitation).  Switching to the medium model in x86_64
+                # enables 64-bit relative offset for large objects (>64KB by
+                # default) while sticking to 32-bit relative addressing for
+                # code and smaller objects. Note this comes at a potential
+                # performance cost so it should not be enabled in all cases.
+                # This should still be a very happy medium for
+                # non-perf-critical sanitized builds.
+                env.Append(CCFLAGS='-mcmodel=medium')
+                env.Append(LINKFLAGS='-mcmodel=medium')
+            elif main["BIN_TARGET_ARCH"] == "aarch64":
+                # aarch64 default code model is small but with different
+                # constrains than for x86_64. With aarch64, the small code
+                # model enables 4GB distance between symbols. This is
+                # sufficient for the largest ALL/gem5.debug target with all
+                # sanitizers enabled at the time of writting this. Note that
+                # the next aarch64 code model is "large" which prevents dynamic
+                # linkage so it should be avoided when possible.
+                pass
+            else:
+                warning(
+                    "Unknown code model options for your architecture. "
+                    "Linkage might fail for larger binaries "
+                    "(e.g., ALL/gem5.debug with sanitizers enabled)."
+                )
         else:
             warning("Don't know how to enable %s sanitizer(s) for your "
                     "compiler." % sanitizers)

From d48e53e0a2cece73856b8ae91cd537654ac6581b Mon Sep 17 00:00:00 2001
From: Johnny <johnnyko@google.com>
Date: Wed, 18 Jan 2023 13:43:49 +0800
Subject: [PATCH 157/492] scons: force libasan to static linking

The asan(enable with --with-asan) sanitizer interpret calls to dlopen().
That replaces the RUNPATH of an executable with RUNPATH of libasan.so
after libasan.so is loaded by loader. Then it may cause some shared
libraries missing, i.e. the error messages is like
"cannot open shared object file: No such file or directory" since the
RUNPATH is no longer correct. Force the libasan to static linking
can avoid this issue since libasan.a does not have a RUNPATH, thus
the replacement will never happen.

Change-Id: I8e5ff4d1fbe4644a258054be6e9f6d4db9062e56
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67298
Reviewed-by: Earl Ou <shunhsingou@google.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Maintainer: Gabe Black <gabe.black@gmail.com>
---
 SConstruct | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/SConstruct b/SConstruct
index 43ca3ac118..2e525a3407 100755
--- a/SConstruct
+++ b/SConstruct
@@ -526,7 +526,9 @@ for variant_path in variant_paths:
         if env['GCC'] or env['CLANG']:
             env.Append(CCFLAGS=['-fsanitize=%s' % sanitizers,
                                  '-fno-omit-frame-pointer'],
-                        LINKFLAGS='-fsanitize=%s' % sanitizers)
+                        LINKFLAGS=['-fsanitize=%s' % sanitizers,
+                                   '-static-libasan'])
+
             if main["BIN_TARGET_ARCH"] == "x86_64":
                 # Sanitizers can enlarge binary size drammatically, north of
                 # 2GB.  This can prevent successful linkage due to symbol

From a33b4931d7270e0497fdc882f901e0a9fda5edf1 Mon Sep 17 00:00:00 2001
From: Nathanael Premillieu <nathanael.premillieu@huawei.com>
Date: Fri, 27 Jan 2023 15:11:19 +0100
Subject: [PATCH 158/492] mem-cache: schedule already ready pf next cycle

Fix a bug where a ready prefetch request was not
sent directly because the schedMemSideSendEvent
was not called with the right time.
This fix mimics what is done in recvTimingResp.

Change-Id: Ib11f8003ca1b006d976c8cc8ea541434b8902beb
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67473
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
---
 src/mem/cache/base.cc | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/mem/cache/base.cc b/src/mem/cache/base.cc
index cf6c9fe226..639d02610e 100644
--- a/src/mem/cache/base.cc
+++ b/src/mem/cache/base.cc
@@ -464,7 +464,8 @@ BaseCache::recvTimingReq(PacketPtr pkt)
 
     if (prefetcher) {
         // track time of availability of next prefetch, if any
-        Tick next_pf_time = prefetcher->nextPrefetchReadyTime();
+        Tick next_pf_time = std::max(
+                            prefetcher->nextPrefetchReadyTime(), clockEdge());
         if (next_pf_time != MaxTick) {
             schedMemSideSendEvent(next_pf_time);
         }

From 13dca0ebcbc7562e4b26ce40dd91f3bd65052feb Mon Sep 17 00:00:00 2001
From: Gabriel Busnot <gabriel.busnot@arteris.com>
Date: Wed, 18 Jan 2023 15:22:11 +0000
Subject: [PATCH 159/492] scons: Link tcmalloc_minimal by default instead of
 tcmalloc

tcmalloc triggers asan while tcmalloc_minimal does not. The feature
difference is not significant for regular gem5 use.

Jira issue: https://gem5.atlassian.net/browse/GEM5-1312

Change-Id: I410a26d2ecdf422c456d44276d9e7ec60582b8cc
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67431
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
---
 SConstruct | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/SConstruct b/SConstruct
index 2e525a3407..6abbb51e00 100755
--- a/SConstruct
+++ b/SConstruct
@@ -609,9 +609,9 @@ for variant_path in variant_paths:
 
     if not GetOption('without_tcmalloc'):
         with gem5_scons.Configure(env) as conf:
-            if conf.CheckLib('tcmalloc'):
+            if conf.CheckLib('tcmalloc_minimal'):
                 conf.env.Append(CCFLAGS=conf.env['TCMALLOC_CCFLAGS'])
-            elif conf.CheckLib('tcmalloc_minimal'):
+            elif conf.CheckLib('tcmalloc'):
                 conf.env.Append(CCFLAGS=conf.env['TCMALLOC_CCFLAGS'])
             else:
                 warning("You can get a 12% performance improvement by "

From e4be93b55f2f71aeaf2cc0a4ef69ed9eed7be152 Mon Sep 17 00:00:00 2001
From: Gabe Black <gabeblack@google.com>
Date: Tue, 31 Jan 2023 09:38:02 -0800
Subject: [PATCH 160/492] sim: Add some helpers for setting up Signal*Ports in
 python.

The only difference between these types in python are the compatibility
strings which restrict what can connect to what. For ports which are
generally useful like interrupts or resets, they should have port types
with special names and even more restrictive compatibility. For other
ports which are one off signals between components, that would be
overkill, and these helpers will let you make a signal port which is
only restricted to ports which carry the same type of data.

The helpers are intended to look similar to their C++ counterpart
templates, and are functions which take a type signature as a string
as their argument, and return a class which is specialized to use that
type signature. The class itself can be stored, or used immediately.

foo = SignalSourcePort('bool')('A port for the foo signal')

Change-Id: If6359b2c69f34ff775cd9aa01272ac487db08bf7
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67511
Maintainer: Gabe Black <gabe.black@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Yu-hsin Wang <yuhsingw@google.com>
---
 src/sim/SConscript    |  1 +
 src/sim/SignalPort.py | 77 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 78 insertions(+)
 create mode 100644 src/sim/SignalPort.py

diff --git a/src/sim/SConscript b/src/sim/SConscript
index 75b595b8a1..e26676c00a 100644
--- a/src/sim/SConscript
+++ b/src/sim/SConscript
@@ -42,6 +42,7 @@ SimObject('SubSystem.py', sim_objects=['SubSystem'])
 SimObject('RedirectPath.py', sim_objects=['RedirectPath'])
 SimObject('PowerState.py', sim_objects=['PowerState'], enums=['PwrState'])
 SimObject('PowerDomain.py', sim_objects=['PowerDomain'])
+SimObject('SignalPort.py', sim_objects=[])
 
 Source('async.cc')
 Source('backtrace_%s.cc' % env['BACKTRACE_IMPL'], add_tags='gem5 trace')
diff --git a/src/sim/SignalPort.py b/src/sim/SignalPort.py
new file mode 100644
index 0000000000..fc529a8b45
--- /dev/null
+++ b/src/sim/SignalPort.py
@@ -0,0 +1,77 @@
+# Copyright 2023 Google, Inc.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from m5.params import Port, VectorPort
+
+SIGNAL_SOURCE_ROLE_TEMPLATE = "Signal source <%s>"
+SIGNAL_SINK_ROLE_TEMPLATE = "Signal sink <%s>"
+
+
+def SignalSourcePort(type_signature):
+    source_role = SIGNAL_SOURCE_ROLE_TEMPLATE % type_signature
+    sink_role = SIGNAL_SINK_ROLE_TEMPLATE % type_signature
+    Port.compat(source_role, sink_role)
+
+    class SignalSourcePort(Port):
+        def __init__(self, desc):
+            super().__init__(source_role, desc, is_source=True)
+
+    return SignalSourcePort
+
+
+def VectorSignalSourcePort(type_signature):
+    source_role = SIGNAL_SOURCE_ROLE_TEMPLATE % type_signature
+    sink_role = SIGNAL_SINK_ROLE_TEMPLATE % type_signature
+    Port.compat(source_role, sink_role)
+
+    class VectorSignalSourcePort(VectorPort):
+        def __init__(self, desc):
+            super().__init__(source_role, desc, is_source=True)
+
+    return VectorSignalSourcePort
+
+
+def SignalSinkPort(type_signature):
+    source_role = SIGNAL_SOURCE_ROLE_TEMPLATE % type_signature
+    sink_role = SIGNAL_SINK_ROLE_TEMPLATE % type_signature
+    Port.compat(source_role, sink_role)
+
+    class SignalSinkPort(Port):
+        def __init__(self, desc):
+            super().__init__(sink_role, desc)
+
+    return SignalSinkPort
+
+
+def VectorSignalSinkPort(type_signature):
+    source_role = SIGNAL_SOURCE_ROLE_TEMPLATE % type_signature
+    sink_role = SIGNAL_SINK_ROLE_TEMPLATE % type_signature
+    Port.compat(source_role, sink_role)
+
+    class VectorSignalSinkPort(VectorPort):
+        def __init__(self, desc):
+            super().__init__(sink_role, desc)
+
+    return VectorSignalSinkPort

From f2562152e800a8a4af3633e64ca83733cf024abb Mon Sep 17 00:00:00 2001
From: Roger Chang <rogerycchang@google.com>
Date: Tue, 15 Nov 2022 13:48:53 +0000
Subject: [PATCH 161/492] arch-riscv,sim-se: Support RV32 register ABI call

1. Add RegABI32, SyscallABI32
2. Support parse function arguments to host and save result to registers
3. Add write to ThreadPointerReg in archClone
4. Support RV32 M5Op syscall

Change-Id: Ie327b517f41b5d633d2741b6abb5be955281c838
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/65532
Tested-by: kokoro <noreply+kokoro@google.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Reviewed-by: Yu-hsin Wang <yuhsingw@google.com>
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
---
 src/arch/riscv/isa/formats/m5ops.isa |  8 ++++++--
 src/arch/riscv/linux/linux.hh        |  4 +++-
 src/arch/riscv/linux/se_workload.cc  |  4 ++--
 src/arch/riscv/linux/se_workload.hh  |  4 ++--
 src/arch/riscv/reg_abi.cc            |  6 ++++++
 src/arch/riscv/reg_abi.hh            | 29 ++++++++++++++++++++++++++++
 src/arch/riscv/se_workload.hh        | 22 ++++++++++++++++++---
 7 files changed, 67 insertions(+), 10 deletions(-)

diff --git a/src/arch/riscv/isa/formats/m5ops.isa b/src/arch/riscv/isa/formats/m5ops.isa
index edc965a7f7..034a0dd2b5 100644
--- a/src/arch/riscv/isa/formats/m5ops.isa
+++ b/src/arch/riscv/isa/formats/m5ops.isa
@@ -38,8 +38,12 @@
 def format M5Op() {{
     iop = InstObjParams(name, Name, 'PseudoOp', '''
             uint64_t result;
-            pseudo_inst::pseudoInst<RegABI64>(xc->tcBase(), M5FUNC, result);
-            a0 = result''',
+            if (machInst.rv_type == RV32) {
+                pseudo_inst::pseudoInst<RegABI32>(xc->tcBase(), M5FUNC, result);
+            } else {
+                pseudo_inst::pseudoInst<RegABI64>(xc->tcBase(), M5FUNC, result);
+            }
+            a0 = rvSext(result)''',
             ['IsNonSpeculative', 'IsSerializeAfter'])
     header_output = BasicDeclare.subst(iop)
     decoder_output = BasicConstructor.subst(iop)
diff --git a/src/arch/riscv/linux/linux.hh b/src/arch/riscv/linux/linux.hh
index b0721836a5..de8bccc85e 100644
--- a/src/arch/riscv/linux/linux.hh
+++ b/src/arch/riscv/linux/linux.hh
@@ -371,8 +371,10 @@ class RiscvLinux32 : public RiscvLinux, public OpenFlagTable<RiscvLinux32>
               uint64_t stack, uint64_t tls)
     {
         ctc->getIsaPtr()->copyRegsFrom(ptc);
+        if (flags & TGT_CLONE_SETTLS)
+            ctc->setReg(RiscvISA::ThreadPointerReg, sext<32>(tls));
         if (stack)
-            ctc->setReg(RiscvISA::StackPointerReg, stack);
+            ctc->setReg(RiscvISA::StackPointerReg, sext<32>(stack));
     }
 };
 
diff --git a/src/arch/riscv/linux/se_workload.cc b/src/arch/riscv/linux/se_workload.cc
index dac28071f4..952fe7140f 100644
--- a/src/arch/riscv/linux/se_workload.cc
+++ b/src/arch/riscv/linux/se_workload.cc
@@ -123,7 +123,7 @@ unameFunc32(SyscallDesc *desc, ThreadContext *tc, VPtr<Linux::utsname> name)
     return 0;
 }
 
-SyscallDescTable<SEWorkload::SyscallABI> EmuLinux::syscallDescs64 = {
+SyscallDescTable<SEWorkload::SyscallABI64> EmuLinux::syscallDescs64 = {
     { 0,    "io_setup" },
     { 1,    "io_destroy" },
     { 2,    "io_submit" },
@@ -462,7 +462,7 @@ SyscallDescTable<SEWorkload::SyscallABI> EmuLinux::syscallDescs64 = {
     { 2011, "getmainvars" }
 };
 
-SyscallDescTable<SEWorkload::SyscallABI> EmuLinux::syscallDescs32 = {
+SyscallDescTable<SEWorkload::SyscallABI32> EmuLinux::syscallDescs32 = {
     { 0,    "io_setup" },
     { 1,    "io_destroy" },
     { 2,    "io_submit" },
diff --git a/src/arch/riscv/linux/se_workload.hh b/src/arch/riscv/linux/se_workload.hh
index 41a3d41f61..4ec818b2ab 100644
--- a/src/arch/riscv/linux/se_workload.hh
+++ b/src/arch/riscv/linux/se_workload.hh
@@ -47,10 +47,10 @@ class EmuLinux : public SEWorkload
   protected:
 
     /// 64 bit syscall descriptors, indexed by call number.
-    static SyscallDescTable<SEWorkload::SyscallABI> syscallDescs64;
+    static SyscallDescTable<SEWorkload::SyscallABI64> syscallDescs64;
 
     /// 32 bit syscall descriptors, indexed by call number.
-    static SyscallDescTable<SEWorkload::SyscallABI> syscallDescs32;
+    static SyscallDescTable<SEWorkload::SyscallABI32> syscallDescs32;
 
   public:
     using Params = RiscvEmuLinuxParams;
diff --git a/src/arch/riscv/reg_abi.cc b/src/arch/riscv/reg_abi.cc
index b9827f74cf..3d48056b14 100644
--- a/src/arch/riscv/reg_abi.cc
+++ b/src/arch/riscv/reg_abi.cc
@@ -39,5 +39,11 @@ const std::vector<RegId> RegABI64::ArgumentRegs = {
     int_reg::A4, int_reg::A5, int_reg::A6
 };
 
+const std::vector<RegId> RegABI32::ArgumentRegs = {
+    int_reg::A0, int_reg::A1, int_reg::A2, int_reg::A3,
+    int_reg::A4, int_reg::A5, int_reg::A6
+};
+
+
 } // namespace RiscvISA
 } // namespace gem5
diff --git a/src/arch/riscv/reg_abi.hh b/src/arch/riscv/reg_abi.hh
index 3419c31222..4c965321f7 100644
--- a/src/arch/riscv/reg_abi.hh
+++ b/src/arch/riscv/reg_abi.hh
@@ -44,7 +44,36 @@ struct RegABI64 : public GenericSyscallABI64
     static const std::vector<RegId> ArgumentRegs;
 };
 
+struct RegABI32 : public GenericSyscallABI32
+{
+    static const std::vector<RegId> ArgumentRegs;
+};
+
 } // namespace RiscvISA
+
+namespace guest_abi
+{
+
+// This method will be used if the size of argument type of function is
+// greater than 4 for Riscv 32.
+template <typename ABI, typename Arg>
+struct Argument<ABI, Arg,
+    typename std::enable_if_t<
+        std::is_base_of_v<RiscvISA::RegABI32, ABI> &&
+        std::is_integral_v<Arg> &&
+        ABI::template IsWideV<Arg>>>
+{
+    static Arg
+    get(ThreadContext *tc, typename ABI::State &state)
+    {
+        panic_if(state >= ABI::ArgumentRegs.size(),
+                "Ran out of syscall argument registers.");
+        return bits(tc->getReg(ABI::ArgumentRegs[state++]), 31, 0);
+    }
+};
+
+}
+
 } // namespace gem5
 
 #endif // __ARCH_RISCV_REG_ABI_HH__
diff --git a/src/arch/riscv/se_workload.hh b/src/arch/riscv/se_workload.hh
index 9ae3be4c05..dd18a92905 100644
--- a/src/arch/riscv/se_workload.hh
+++ b/src/arch/riscv/se_workload.hh
@@ -60,8 +60,8 @@ class SEWorkload : public gem5::SEWorkload
 
     loader::Arch getArch() const override { return loader::Riscv64; }
 
-    //FIXME RISCV needs to handle 64 bit arguments in its 32 bit ISA.
-    using SyscallABI = RegABI64;
+    using SyscallABI64 = RegABI64;
+    using SyscallABI32 = RegABI32;
 };
 
 } // namespace RiscvISA
@@ -70,7 +70,7 @@ namespace guest_abi
 {
 
 template <>
-struct Result<RiscvISA::SEWorkload::SyscallABI, SyscallReturn>
+struct Result<RiscvISA::SEWorkload::SyscallABI64, SyscallReturn>
 {
     static void
     store(ThreadContext *tc, const SyscallReturn &ret)
@@ -85,6 +85,22 @@ struct Result<RiscvISA::SEWorkload::SyscallABI, SyscallReturn>
     }
 };
 
+template <>
+struct Result<RiscvISA::SEWorkload::SyscallABI32, SyscallReturn>
+{
+    static void
+    store(ThreadContext *tc, const SyscallReturn &ret)
+    {
+        if (ret.successful()) {
+            // no error
+            tc->setReg(RiscvISA::ReturnValueReg, sext<32>(ret.returnValue()));
+        } else {
+            // got an error, return details
+            tc->setReg(RiscvISA::ReturnValueReg, sext<32>(ret.encodedValue()));
+        }
+    }
+};
+
 } // namespace guest_abi
 } // namespace gem5
 

From 1b949e975922b03e5366011a646c3a41b298dc51 Mon Sep 17 00:00:00 2001
From: Earl Ou <shunhsingou@google.com>
Date: Wed, 1 Feb 2023 21:55:05 -0800
Subject: [PATCH 162/492] dev: terminal: run pollevent in terminal eventq

Change-Id: Idefda0ca1cd71d3e790d470458fa1cd370393c4a
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67532
Reviewed-by: Gabe Black <gabe.black@gmail.com>
Maintainer: Gabe Black <gabe.black@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/dev/serial/terminal.cc | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/dev/serial/terminal.cc b/src/dev/serial/terminal.cc
index 52dbb9ede6..d4108a372d 100644
--- a/src/dev/serial/terminal.cc
+++ b/src/dev/serial/terminal.cc
@@ -87,6 +87,10 @@ Terminal::ListenEvent::ListenEvent(Terminal *t, int fd, int e)
 void
 Terminal::ListenEvent::process(int revent)
 {
+    // As a consequence of being called from the PollQueue, we might
+    // have been called from a different thread. Migrate to "our"
+    // thread.
+    EventQueue::ScopedMigration migrate(term->eventQueue());
     term->accept();
 }
 

From 3bdbe482c23369f2c19c4074d252858e35552341 Mon Sep 17 00:00:00 2001
From: Gabriel Busnot <gabriel.busnot@arteris.com>
Date: Tue, 24 Jan 2023 09:59:30 +0000
Subject: [PATCH 163/492] base: Strengthen safe_cast and make it work for
 reference types

safe_cast now supports the exact same types as dynamic_cast would. In
particular, it now supports l-value references and rejects r-value
references.

The non-debug version has also been updated to make it build only in
the same cases as the debug version of safe_cast would.

Change-Id: I86692561c169b1ad063000c990a52ea80c6637ca
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67453
Reviewed-by: Giacomo Travaglini <giacomo.travaglini@arm.com>
Maintainer: Giacomo Travaglini <giacomo.travaglini@arm.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/base/cast.hh | 32 +++++++++++++++++++++++++++-----
 1 file changed, 27 insertions(+), 5 deletions(-)

diff --git a/src/base/cast.hh b/src/base/cast.hh
index cdc3c624a7..01464d9b2c 100644
--- a/src/base/cast.hh
+++ b/src/base/cast.hh
@@ -30,6 +30,8 @@
 #define __BASE_CAST_HH__
 
 #include <cassert>
+#include <type_traits>
+#include "base/logging.hh"
 
 namespace gem5
 {
@@ -44,10 +46,20 @@ namespace gem5
 
 template <class T, class U>
 inline T
-safe_cast(U ptr)
+safe_cast(U&& ref_or_ptr)
 {
-    T ret = dynamic_cast<T>(ptr);
-    assert(ret);
+    /*
+     * srd::forward used in conjunction with forwarding references (template T
+     * + T&&) ensures that dynamic_cast will see the exact same type that was
+     * passed to safe_cast (a.k.a., perfect forwarding).
+     *
+     * Not using std::forward would make safe_cast compile with references to
+     * temporary objects and thus return a dangling reference.
+     */
+    T ret = dynamic_cast<T>(std::forward<U>(ref_or_ptr));
+    if constexpr (std::is_pointer_v<T>) {
+        gem5_assert(ret);
+    }
     return ret;
 }
 
@@ -59,9 +71,19 @@ safe_cast(U ptr)
 
 template <class T, class U>
 inline T
-safe_cast(U ptr)
+safe_cast(U&& ref_or_ptr)
 {
-    return static_cast<T>(ptr);
+    /*
+     * safe_cast should be reserved to polymorphic types while static_cast is
+     * also allowed for non-polymorphic types. It could make safe_cast able to
+     * compile in a non-debug build and fail in a debug build.
+     */
+    static_assert(std::is_polymorphic_v<
+        std::remove_pointer_t<
+        std::remove_reference_t<
+        U>>
+    >);
+    return static_cast<T>(std::forward<U>(ref_or_ptr));
 }
 
 #endif

From c1b1a702f96adcb12eb1a58ebe6170a229033ad0 Mon Sep 17 00:00:00 2001
From: Gabriel Busnot <gabriel.busnot@arteris.com>
Date: Wed, 25 Jan 2023 07:28:39 +0000
Subject: [PATCH 164/492] tests: Make the GTestException type accessible to
 unit tests

Change-Id: I654589a3d90377657393d98e75c0697ba0e72c76
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67455
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
---
 src/base/gtest/logging.hh      | 8 ++++++++
 src/base/gtest/logging_mock.cc | 8 --------
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/src/base/gtest/logging.hh b/src/base/gtest/logging.hh
index 12d4e5afb4..1d5a39c6bb 100644
--- a/src/base/gtest/logging.hh
+++ b/src/base/gtest/logging.hh
@@ -32,6 +32,14 @@
 namespace gem5
 {
 
+// This custom exception type will help prevent fatal exceptions from being
+// caught by other code in gem5 and let them escape to the gtest framework.
+// Unfortunately that results in a somewhat confusing message about an unknown
+// exception being thrown after the panic/fatal message has been printed, but
+// there will at least be some indication what went wrong.
+struct GTestException
+{};
+
 class GTestLogOutput : public std::ostringstream
 {
   private:
diff --git a/src/base/gtest/logging_mock.cc b/src/base/gtest/logging_mock.cc
index 101374eecc..07a20ea1ec 100644
--- a/src/base/gtest/logging_mock.cc
+++ b/src/base/gtest/logging_mock.cc
@@ -36,14 +36,6 @@ namespace gem5
 
 namespace {
 
-// This custom exception type will help prevent fatal exceptions from being
-// caught by other code in gem5 and let them escape to the gtest framework.
-// Unfortunately that results in a somewhat confusing message about an unknown
-// exception being thrown after the panic/fatal message has been printed, but
-// there will at least be some indication what went wrong.
-struct GTestException
-{};
-
 class GTestLogger : public Logger
 {
   public:

From d40ed0f82614fdc5bf70b6dadeb5658e08cd6d9f Mon Sep 17 00:00:00 2001
From: Gabriel Busnot <gabriel.busnot@arteris.com>
Date: Thu, 2 Feb 2023 08:31:07 +0000
Subject: [PATCH 165/492] mem: Deprecate RequestPort and ResponsePort owner ref
 member

The reference can be bound to an invalid object (*nullptr) in
situations where no proper owner SimObject can be provided to the port
constructor. This rightfully triggers a UBSAN warning.

Also, these two classes do not make use of the owner reference member
themselves and expose it as a protected member reference to
subclasses. This desing has several drawbacks: requires the reference
to owner to travel the class hierarchy up and down, loosing its true
static type in the process ; non-private member variable should not be
part of the API of such fundamental classes, if only for
maintainability ; a reference bound from a nullable pointer is a lying
API as it hides the optional aspect of ownership.

Note that the reference to invalid object can't be properly fixed until
the complete removal of the owner reference. This patch lays the path
toward that fix.

Change-Id: I8b42bc57d7826656726f7708492c43366f20633a
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67551
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
Tested-by: kokoro <noreply+kokoro@google.com>
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
---
 src/mem/port.cc | 47 +++++++++++++++++++++++++++++++++++++++++------
 src/mem/port.hh | 18 +++++++++++++-----
 2 files changed, 54 insertions(+), 11 deletions(-)

diff --git a/src/mem/port.cc b/src/mem/port.cc
index 18793d487b..e36323fb74 100644
--- a/src/mem/port.cc
+++ b/src/mem/port.cc
@@ -120,9 +120,23 @@ DefaultResponsePort defaultResponsePort;
 /**
  * Request port
  */
-RequestPort::RequestPort(const std::string& name, SimObject* _owner,
-    PortID _id) : Port(name, _id), _responsePort(&defaultResponsePort),
-    owner(*_owner)
+[[deprecated]]
+RequestPort::RequestPort(const std::string& name,
+                         SimObject* _owner,
+                         PortID _id):
+    Port(name, _id), _responsePort(&defaultResponsePort), owner{*_owner}
+{
+}
+
+/*** FIXME:
+ * The owner reference member is going through a deprecation path. In the
+ * meantime, it must be initialized but no valid reference is available here.
+ * Using 1 instead of nullptr prevents warning upon dereference. It should be
+ * OK until definitive removal of owner.
+ */
+RequestPort::RequestPort(const std::string& name, PortID _id) :
+    Port(name, _id), _responsePort(&defaultResponsePort),
+    owner{*reinterpret_cast<SimObject*>(1)}
 {
 }
 
@@ -175,9 +189,30 @@ RequestPort::printAddr(Addr a)
 /**
  * Response port
  */
-ResponsePort::ResponsePort(const std::string& name, SimObject* _owner,
-    PortID id) : Port(name, id), _requestPort(&defaultRequestPort),
-    defaultBackdoorWarned(false), owner(*_owner)
+
+[[deprecated]]
+ResponsePort::ResponsePort(const std::string& name,
+                           SimObject* _owner,
+                           PortID _id):
+    Port(name, _id),
+    _requestPort(&defaultRequestPort),
+    defaultBackdoorWarned(false),
+    owner{*_owner}
+{
+}
+
+
+/*** FIXME:
+ * The owner reference member is going through a deprecation path. In the
+ * meantime, it must be initialized but no valid reference is available here.
+ * Using 1 instead of nullptr prevents warning upon dereference. It should be
+ * OK until definitive removal of owner.
+ */
+ResponsePort::ResponsePort(const std::string& name, PortID id) :
+    Port(name, id),
+    _requestPort(&defaultRequestPort),
+    defaultBackdoorWarned(false),
+    owner{*reinterpret_cast<SimObject*>(1)}
 {
 }
 
diff --git a/src/mem/port.hh b/src/mem/port.hh
index fb0f4b8812..0d61787f62 100644
--- a/src/mem/port.hh
+++ b/src/mem/port.hh
@@ -86,8 +86,13 @@ class RequestPort: public Port, public AtomicRequestProtocol,
     SimObject &owner;
 
   public:
+    [[deprecated("RequestPort ownership is deprecated. "
+                 "Owner should now be registered in derived classes.")]]
     RequestPort(const std::string& name, SimObject* _owner,
-               PortID id=InvalidPortID);
+                PortID id=InvalidPortID);
+
+    RequestPort(const std::string& name, PortID id=InvalidPortID);
+
     virtual ~RequestPort();
 
     /**
@@ -266,9 +271,7 @@ class RequestPort: public Port, public AtomicRequestProtocol,
 class [[deprecated]] MasterPort : public RequestPort
 {
   public:
-    MasterPort(const std::string& name, SimObject* _owner,
-               PortID id=InvalidPortID) : RequestPort(name, _owner, id)
-               {}
+    using RequestPort::RequestPort;
 };
 
 /**
@@ -294,8 +297,13 @@ class ResponsePort : public Port, public AtomicResponseProtocol,
     SimObject& owner;
 
   public:
+    [[deprecated("ResponsePort ownership is deprecated. "
+                 "Owner should now be registered in derived classes.")]]
     ResponsePort(const std::string& name, SimObject* _owner,
-              PortID id=InvalidPortID);
+                 PortID id=InvalidPortID);
+
+    ResponsePort(const std::string& name, PortID id=InvalidPortID);
+
     virtual ~ResponsePort();
 
     /**

From 7f4c92c91032a2e5938e0f4a650ae79c1bf45479 Mon Sep 17 00:00:00 2001
From: Gabriel Busnot <gabriel.busnot@arteris.com>
Date: Fri, 6 Jan 2023 11:22:26 +0000
Subject: [PATCH 166/492] mem,arch-arm,mem-ruby,cpu: Remove use of deprecated
 base port owner

Change-Id: I29214278c3dd4829c89a6f7c93214b8123912e74
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67452
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
Tested-by: kokoro <noreply+kokoro@google.com>
Maintainer: Daniel Carvalho <odanrc@yahoo.com.br>
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
Reviewed-by: Daniel Carvalho <odanrc@yahoo.com.br>
---
 src/arch/amdgpu/common/tlb.hh                 |   4 +-
 src/arch/amdgpu/common/tlb_coalescer.hh       |   2 +-
 src/arch/amdgpu/vega/pagetable_walker.hh      |   2 +-
 src/arch/amdgpu/vega/tlb.hh                   |   4 +-
 src/arch/amdgpu/vega/tlb_coalescer.hh         |   4 +-
 src/arch/arm/table_walker.cc                  |  11 +-
 src/arch/arm/table_walker.hh                  |   4 +-
 src/arch/riscv/pagetable_walker.hh            |   2 +-
 src/arch/x86/pagetable_walker.hh              |   2 +-
 src/cpu/kvm/base.hh                           |   2 +-
 src/cpu/minor/cpu.hh                          |   2 +-
 src/cpu/o3/fetch.cc                           |   2 +-
 src/cpu/o3/lsq.cc                             |   2 +-
 src/cpu/simple/atomic.cc                      |   5 +-
 src/cpu/simple/atomic.hh                      |   6 +-
 src/cpu/simple/timing.hh                      |   2 +-
 .../directedtest/RubyDirectedTester.hh        |   2 +-
 .../GarnetSyntheticTraffic.hh                 |   2 +-
 .../testers/gpu_ruby_test/protocol_tester.hh  |   2 +-
 src/cpu/testers/memtest/memtest.hh            |   2 +-
 src/cpu/testers/rubytest/RubyTester.hh        |   2 +-
 src/cpu/testers/traffic_gen/base.hh           |   2 +-
 src/cpu/testers/traffic_gen/gups_gen.hh       |   2 +-
 src/cpu/trace/trace_cpu.hh                    |   4 +-
 src/dev/arm/gic_v3_its.hh                     |   2 +-
 src/dev/arm/smmu_v3_ports.cc                  |  10 +-
 src/dev/dma_device.cc                         |   2 +-
 src/dev/x86/intdev.hh                         |   2 +-
 src/gpu-compute/compute_unit.hh               |  14 +-
 src/gpu-compute/lds_state.hh                  |   2 +-
 src/learning_gem5/part2/simple_cache.hh       |   4 +-
 src/learning_gem5/part2/simple_memobj.hh      |   4 +-
 src/mem/addr_mapper.hh                        |   4 +-
 src/mem/bridge.cc                             |   4 +-
 src/mem/cache/base.cc                         |  45 ++++---
 src/mem/cache/base.hh                         |  15 +--
 src/mem/cfi_mem.cc                            |   2 +-
 src/mem/coherent_xbar.hh                      |   6 +-
 src/mem/comm_monitor.hh                       |   4 +-
 src/mem/dramsim2.cc                           |   2 +-
 src/mem/dramsim3.cc                           |   2 +-
 src/mem/external_master.hh                    |   2 +-
 src/mem/external_slave.hh                     |   2 +-
 src/mem/mem_checker_monitor.hh                |   4 +-
 src/mem/mem_ctrl.cc                           |   2 +-
 src/mem/mem_delay.cc                          |   5 +-
 src/mem/noncoherent_xbar.hh                   |   4 +-
 src/mem/port.cc                               |   4 +-
 src/mem/port.hh                               |   4 +-
 src/mem/port_terminator.cc                    |   4 +-
 src/mem/port_terminator.hh                    |   8 +-
 src/mem/port_wrapper.cc                       |  10 +-
 src/mem/port_wrapper.hh                       |   6 +-
 src/mem/qos/mem_sink.cc                       |   2 +-
 src/mem/qport.hh                              |  13 +-
 .../slicc_interface/AbstractController.cc     |   2 +-
 src/mem/ruby/system/RubyPort.cc               | 126 +++++++++---------
 src/mem/ruby/system/RubyPort.hh               |  17 ++-
 src/mem/serial_link.cc                        |   4 +-
 src/mem/simple_mem.cc                         |   2 +-
 src/mem/sys_bridge.cc                         |   4 +-
 src/mem/sys_bridge.hh                         |   8 +-
 src/mem/thread_bridge.cc                      |   4 +-
 src/mem/token_port.hh                         |   6 +-
 src/mem/tport.cc                              |   2 +-
 src/sim/system.cc                             |   2 +-
 src/sim/system.hh                             |   4 +-
 src/sst/outgoing_request_bridge.cc            |   2 +-
 src/systemc/tlm_bridge/gem5_to_tlm.hh         |   2 +-
 src/systemc/tlm_bridge/tlm_to_gem5.hh         |   2 +-
 70 files changed, 223 insertions(+), 232 deletions(-)

diff --git a/src/arch/amdgpu/common/tlb.hh b/src/arch/amdgpu/common/tlb.hh
index 6e9014e8aa..9bd0441340 100644
--- a/src/arch/amdgpu/common/tlb.hh
+++ b/src/arch/amdgpu/common/tlb.hh
@@ -214,7 +214,7 @@ namespace X86ISA
           public:
             CpuSidePort(const std::string &_name, GpuTLB * gpu_TLB,
                         PortID _index)
-                : ResponsePort(_name, gpu_TLB), tlb(gpu_TLB), index(_index) { }
+                : ResponsePort(_name), tlb(gpu_TLB), index(_index) { }
 
           protected:
             GpuTLB *tlb;
@@ -241,7 +241,7 @@ namespace X86ISA
           public:
             MemSidePort(const std::string &_name, GpuTLB * gpu_TLB,
                         PortID _index)
-                : RequestPort(_name, gpu_TLB), tlb(gpu_TLB), index(_index) { }
+                : RequestPort(_name), tlb(gpu_TLB), index(_index) { }
 
             std::deque<PacketPtr> retries;
 
diff --git a/src/arch/amdgpu/common/tlb_coalescer.hh b/src/arch/amdgpu/common/tlb_coalescer.hh
index 6c940b1dd1..59d8ebe888 100644
--- a/src/arch/amdgpu/common/tlb_coalescer.hh
+++ b/src/arch/amdgpu/common/tlb_coalescer.hh
@@ -124,7 +124,7 @@ class TLBCoalescer : public ClockedObject
       public:
         CpuSidePort(const std::string &_name, TLBCoalescer *tlb_coalescer,
                     PortID _index)
-            : ResponsePort(_name, tlb_coalescer), coalescer(tlb_coalescer),
+            : ResponsePort(_name), coalescer(tlb_coalescer),
               index(_index) { }
 
       protected:
diff --git a/src/arch/amdgpu/vega/pagetable_walker.hh b/src/arch/amdgpu/vega/pagetable_walker.hh
index b00c0a0003..2ad0748c14 100644
--- a/src/arch/amdgpu/vega/pagetable_walker.hh
+++ b/src/arch/amdgpu/vega/pagetable_walker.hh
@@ -59,7 +59,7 @@ class Walker : public ClockedObject
     {
       public:
         WalkerPort(const std::string &_name, Walker * _walker) :
-            RequestPort(_name, _walker), walker(_walker)
+            RequestPort(_name), walker(_walker)
         {}
 
       protected:
diff --git a/src/arch/amdgpu/vega/tlb.hh b/src/arch/amdgpu/vega/tlb.hh
index c38f5914e0..e48962108c 100644
--- a/src/arch/amdgpu/vega/tlb.hh
+++ b/src/arch/amdgpu/vega/tlb.hh
@@ -215,7 +215,7 @@ class GpuTLB : public ClockedObject
       public:
         CpuSidePort(const std::string &_name, GpuTLB * gpu_TLB,
                     PortID _index)
-            : ResponsePort(_name, gpu_TLB), tlb(gpu_TLB), index(_index) { }
+            : ResponsePort(_name), tlb(gpu_TLB), index(_index) { }
 
       protected:
         GpuTLB *tlb;
@@ -242,7 +242,7 @@ class GpuTLB : public ClockedObject
       public:
         MemSidePort(const std::string &_name, GpuTLB * gpu_TLB,
                     PortID _index)
-            : RequestPort(_name, gpu_TLB), tlb(gpu_TLB), index(_index) { }
+            : RequestPort(_name), tlb(gpu_TLB), index(_index) { }
 
         std::deque<PacketPtr> retries;
 
diff --git a/src/arch/amdgpu/vega/tlb_coalescer.hh b/src/arch/amdgpu/vega/tlb_coalescer.hh
index 4ff9324715..6b7af60381 100644
--- a/src/arch/amdgpu/vega/tlb_coalescer.hh
+++ b/src/arch/amdgpu/vega/tlb_coalescer.hh
@@ -137,7 +137,7 @@ class VegaTLBCoalescer : public ClockedObject
       public:
         CpuSidePort(const std::string &_name, VegaTLBCoalescer *tlb_coalescer,
                     PortID _index)
-            : ResponsePort(_name, tlb_coalescer), coalescer(tlb_coalescer),
+            : ResponsePort(_name), coalescer(tlb_coalescer),
               index(_index) { }
 
       protected:
@@ -165,7 +165,7 @@ class VegaTLBCoalescer : public ClockedObject
       public:
         MemSidePort(const std::string &_name, VegaTLBCoalescer *tlb_coalescer,
                     PortID _index)
-            : RequestPort(_name, tlb_coalescer), coalescer(tlb_coalescer),
+            : RequestPort(_name), coalescer(tlb_coalescer),
               index(_index) { }
 
         std::deque<PacketPtr> retries;
diff --git a/src/arch/arm/table_walker.cc b/src/arch/arm/table_walker.cc
index bbf102dad7..60f9e3f76e 100644
--- a/src/arch/arm/table_walker.cc
+++ b/src/arch/arm/table_walker.cc
@@ -62,7 +62,7 @@ using namespace ArmISA;
 TableWalker::TableWalker(const Params &p)
     : ClockedObject(p),
       requestorId(p.sys->getRequestorId(this)),
-      port(new Port(this, requestorId)),
+      port(new Port(*this, requestorId)),
       isStage2(p.is_stage2), tlb(NULL),
       currState(NULL), pending(false),
       numSquashable(p.num_squash_per_cycle),
@@ -138,10 +138,11 @@ TableWalker::WalkerState::WalkerState() :
 {
 }
 
-TableWalker::Port::Port(TableWalker *_walker, RequestorID id)
-  : QueuedRequestPort(_walker->name() + ".port", _walker,
-        reqQueue, snoopRespQueue),
-    reqQueue(*_walker, *this), snoopRespQueue(*_walker, *this),
+TableWalker::Port::Port(TableWalker& _walker, RequestorID id)
+  : QueuedRequestPort(_walker.name() + ".port", reqQueue, snoopRespQueue),
+    owner{_walker},
+    reqQueue(_walker, *this),
+    snoopRespQueue(_walker, *this),
     requestorId(id)
 {
 }
diff --git a/src/arch/arm/table_walker.hh b/src/arch/arm/table_walker.hh
index 6ba7ffcd73..b511fd44d0 100644
--- a/src/arch/arm/table_walker.hh
+++ b/src/arch/arm/table_walker.hh
@@ -941,7 +941,7 @@ class TableWalker : public ClockedObject
     class Port : public QueuedRequestPort
     {
       public:
-        Port(TableWalker* _walker, RequestorID id);
+        Port(TableWalker& _walker, RequestorID id);
 
         void sendFunctionalReq(Addr desc_addr, int size,
             uint8_t *data, Request::Flags flag);
@@ -963,6 +963,8 @@ class TableWalker : public ClockedObject
                                Tick delay, Event *event);
 
       private:
+        TableWalker& owner;
+
         /** Packet queue used to store outgoing requests. */
         ReqPacketQueue reqQueue;
 
diff --git a/src/arch/riscv/pagetable_walker.hh b/src/arch/riscv/pagetable_walker.hh
index 55db814471..b12b263403 100644
--- a/src/arch/riscv/pagetable_walker.hh
+++ b/src/arch/riscv/pagetable_walker.hh
@@ -68,7 +68,7 @@ namespace RiscvISA
         {
           public:
             WalkerPort(const std::string &_name, Walker * _walker) :
-                  RequestPort(_name, _walker), walker(_walker)
+                  RequestPort(_name), walker(_walker)
             {}
 
           protected:
diff --git a/src/arch/x86/pagetable_walker.hh b/src/arch/x86/pagetable_walker.hh
index 469be6641c..14e7c9976f 100644
--- a/src/arch/x86/pagetable_walker.hh
+++ b/src/arch/x86/pagetable_walker.hh
@@ -65,7 +65,7 @@ namespace X86ISA
         {
           public:
             WalkerPort(const std::string &_name, Walker * _walker) :
-                  RequestPort(_name, _walker), walker(_walker)
+                  RequestPort(_name), walker(_walker)
             {}
 
           protected:
diff --git a/src/cpu/kvm/base.hh b/src/cpu/kvm/base.hh
index 6b4b88af49..2d81c7c7eb 100644
--- a/src/cpu/kvm/base.hh
+++ b/src/cpu/kvm/base.hh
@@ -601,7 +601,7 @@ class BaseKvmCPU : public BaseCPU
 
       public:
         KVMCpuPort(const std::string &_name, BaseKvmCPU *_cpu)
-            : RequestPort(_name, _cpu), cpu(_cpu), activeMMIOReqs(0)
+            : RequestPort(_name), cpu(_cpu), activeMMIOReqs(0)
         { }
         /**
          * Interface to send Atomic or Timing IO request.  Assumes that the pkt
diff --git a/src/cpu/minor/cpu.hh b/src/cpu/minor/cpu.hh
index acf4295ac9..a966519c56 100644
--- a/src/cpu/minor/cpu.hh
+++ b/src/cpu/minor/cpu.hh
@@ -110,7 +110,7 @@ class MinorCPU : public BaseCPU
 
       public:
         MinorCPUPort(const std::string& name_, MinorCPU &cpu_)
-            : RequestPort(name_, &cpu_), cpu(cpu_)
+            : RequestPort(name_), cpu(cpu_)
         { }
 
     };
diff --git a/src/cpu/o3/fetch.cc b/src/cpu/o3/fetch.cc
index 49416bf754..d3cdd2c761 100644
--- a/src/cpu/o3/fetch.cc
+++ b/src/cpu/o3/fetch.cc
@@ -76,7 +76,7 @@ namespace o3
 {
 
 Fetch::IcachePort::IcachePort(Fetch *_fetch, CPU *_cpu) :
-        RequestPort(_cpu->name() + ".icache_port", _cpu), fetch(_fetch)
+        RequestPort(_cpu->name() + ".icache_port"), fetch(_fetch)
 {}
 
 
diff --git a/src/cpu/o3/lsq.cc b/src/cpu/o3/lsq.cc
index 654fd67d41..d30a399f92 100644
--- a/src/cpu/o3/lsq.cc
+++ b/src/cpu/o3/lsq.cc
@@ -65,7 +65,7 @@ namespace o3
 {
 
 LSQ::DcachePort::DcachePort(LSQ *_lsq, CPU *_cpu) :
-    RequestPort(_cpu->name() + ".dcache_port", _cpu), lsq(_lsq), cpu(_cpu)
+    RequestPort(_cpu->name() + ".dcache_port"), lsq(_lsq), cpu(_cpu)
 {}
 
 LSQ::LSQ(CPU *cpu_ptr, IEW *iew_ptr, const BaseO3CPUParams &params)
diff --git a/src/cpu/simple/atomic.cc b/src/cpu/simple/atomic.cc
index d6638b3654..5c9fc29b64 100644
--- a/src/cpu/simple/atomic.cc
+++ b/src/cpu/simple/atomic.cc
@@ -78,7 +78,7 @@ AtomicSimpleCPU::AtomicSimpleCPU(const BaseAtomicSimpleCPUParams &p)
       width(p.width), locked(false),
       simulate_data_stalls(p.simulate_data_stalls),
       simulate_inst_stalls(p.simulate_inst_stalls),
-      icachePort(name() + ".icache_port", this),
+      icachePort(name() + ".icache_port"),
       dcachePort(name() + ".dcache_port", this),
       dcache_access(false), dcache_latency(0),
       ppCommit(nullptr)
@@ -281,8 +281,6 @@ AtomicSimpleCPU::AtomicCPUDPort::recvAtomicSnoop(PacketPtr pkt)
             __func__, pkt->getAddr(), pkt->cmdString());
 
     // X86 ISA: Snooping an invalidation for monitor/mwait
-    AtomicSimpleCPU *cpu = (AtomicSimpleCPU *)(&owner);
-
     for (ThreadID tid = 0; tid < cpu->numThreads; tid++) {
         if (cpu->getCpuAddrMonitor(tid)->doMonitor(pkt)) {
             cpu->wakeup(tid);
@@ -312,7 +310,6 @@ AtomicSimpleCPU::AtomicCPUDPort::recvFunctionalSnoop(PacketPtr pkt)
             __func__, pkt->getAddr(), pkt->cmdString());
 
     // X86 ISA: Snooping an invalidation for monitor/mwait
-    AtomicSimpleCPU *cpu = (AtomicSimpleCPU *)(&owner);
     for (ThreadID tid = 0; tid < cpu->numThreads; tid++) {
         if (cpu->getCpuAddrMonitor(tid)->doMonitor(pkt)) {
             cpu->wakeup(tid);
diff --git a/src/cpu/simple/atomic.hh b/src/cpu/simple/atomic.hh
index 6fd790ee2f..a194f107d3 100644
--- a/src/cpu/simple/atomic.hh
+++ b/src/cpu/simple/atomic.hh
@@ -117,8 +117,8 @@ class AtomicSimpleCPU : public BaseSimpleCPU
 
       public:
 
-        AtomicCPUPort(const std::string &_name, BaseSimpleCPU* _cpu)
-            : RequestPort(_name, _cpu)
+        AtomicCPUPort(const std::string &_name)
+            : RequestPort(_name)
         { }
 
       protected:
@@ -142,7 +142,7 @@ class AtomicSimpleCPU : public BaseSimpleCPU
 
       public:
         AtomicCPUDPort(const std::string &_name, BaseSimpleCPU *_cpu)
-            : AtomicCPUPort(_name, _cpu), cpu(_cpu)
+            : AtomicCPUPort(_name), cpu(_cpu)
         {
             cacheBlockMask = ~(cpu->cacheLineSize() - 1);
         }
diff --git a/src/cpu/simple/timing.hh b/src/cpu/simple/timing.hh
index ca6c0e26a3..86ac7b9358 100644
--- a/src/cpu/simple/timing.hh
+++ b/src/cpu/simple/timing.hh
@@ -164,7 +164,7 @@ class TimingSimpleCPU : public BaseSimpleCPU
       public:
 
         TimingCPUPort(const std::string& _name, TimingSimpleCPU* _cpu)
-            : RequestPort(_name, _cpu), cpu(_cpu),
+            : RequestPort(_name), cpu(_cpu),
               retryRespEvent([this]{ sendRetryResp(); }, name())
         { }
 
diff --git a/src/cpu/testers/directedtest/RubyDirectedTester.hh b/src/cpu/testers/directedtest/RubyDirectedTester.hh
index 83f8863573..16d21b8cc0 100644
--- a/src/cpu/testers/directedtest/RubyDirectedTester.hh
+++ b/src/cpu/testers/directedtest/RubyDirectedTester.hh
@@ -58,7 +58,7 @@ class RubyDirectedTester : public ClockedObject
       public:
         CpuPort(const std::string &_name, RubyDirectedTester *_tester,
                 PortID _id)
-            : RequestPort(_name, _tester, _id), tester(_tester)
+            : RequestPort(_name, _id), tester(_tester)
         {}
 
       protected:
diff --git a/src/cpu/testers/garnet_synthetic_traffic/GarnetSyntheticTraffic.hh b/src/cpu/testers/garnet_synthetic_traffic/GarnetSyntheticTraffic.hh
index 1667da1afe..def3ed29a2 100644
--- a/src/cpu/testers/garnet_synthetic_traffic/GarnetSyntheticTraffic.hh
+++ b/src/cpu/testers/garnet_synthetic_traffic/GarnetSyntheticTraffic.hh
@@ -84,7 +84,7 @@ class GarnetSyntheticTraffic : public ClockedObject
       public:
 
         CpuPort(const std::string &_name, GarnetSyntheticTraffic *_tester)
-            : RequestPort(_name, _tester), tester(_tester)
+            : RequestPort(_name), tester(_tester)
         { }
 
       protected:
diff --git a/src/cpu/testers/gpu_ruby_test/protocol_tester.hh b/src/cpu/testers/gpu_ruby_test/protocol_tester.hh
index 758ece50f1..dcd5b35018 100644
--- a/src/cpu/testers/gpu_ruby_test/protocol_tester.hh
+++ b/src/cpu/testers/gpu_ruby_test/protocol_tester.hh
@@ -74,7 +74,7 @@ class ProtocolTester : public ClockedObject
       public:
         SeqPort(const std::string &_name, ProtocolTester *_tester, PortID _id,
                 PortID _index)
-            : RequestPort(_name, _tester, _id)
+            : RequestPort(_name, _id)
         {}
 
       protected:
diff --git a/src/cpu/testers/memtest/memtest.hh b/src/cpu/testers/memtest/memtest.hh
index 2dc1f13dd1..3fd1674191 100644
--- a/src/cpu/testers/memtest/memtest.hh
+++ b/src/cpu/testers/memtest/memtest.hh
@@ -100,7 +100,7 @@ class MemTest : public ClockedObject
       public:
 
         CpuPort(const std::string &_name, MemTest &_memtest)
-            : RequestPort(_name, &_memtest), memtest(_memtest)
+            : RequestPort(_name), memtest(_memtest)
         { }
 
       protected:
diff --git a/src/cpu/testers/rubytest/RubyTester.hh b/src/cpu/testers/rubytest/RubyTester.hh
index 1a8b993e0a..9397126180 100644
--- a/src/cpu/testers/rubytest/RubyTester.hh
+++ b/src/cpu/testers/rubytest/RubyTester.hh
@@ -76,7 +76,7 @@ class RubyTester : public ClockedObject
 
         CpuPort(const std::string &_name, RubyTester *_tester, PortID _id,
                 PortID _index)
-            : RequestPort(_name, _tester, _id), tester(_tester),
+            : RequestPort(_name, _id), tester(_tester),
               globalIdx(_index)
         {}
 
diff --git a/src/cpu/testers/traffic_gen/base.hh b/src/cpu/testers/traffic_gen/base.hh
index 5a9af61009..530da6d718 100644
--- a/src/cpu/testers/traffic_gen/base.hh
+++ b/src/cpu/testers/traffic_gen/base.hh
@@ -132,7 +132,7 @@ class BaseTrafficGen : public ClockedObject
       public:
 
         TrafficGenPort(const std::string& name, BaseTrafficGen& traffic_gen)
-            : RequestPort(name, &traffic_gen), trafficGen(traffic_gen)
+            : RequestPort(name), trafficGen(traffic_gen)
         { }
 
       protected:
diff --git a/src/cpu/testers/traffic_gen/gups_gen.hh b/src/cpu/testers/traffic_gen/gups_gen.hh
index f33f7dae1e..38865b5480 100644
--- a/src/cpu/testers/traffic_gen/gups_gen.hh
+++ b/src/cpu/testers/traffic_gen/gups_gen.hh
@@ -87,7 +87,7 @@ class GUPSGen : public ClockedObject
       public:
 
         GenPort(const std::string& name, GUPSGen *owner) :
-            RequestPort(name, owner), owner(owner), _blocked(false),
+            RequestPort(name), owner(owner), _blocked(false),
             blockedPacket(nullptr)
         {}
 
diff --git a/src/cpu/trace/trace_cpu.hh b/src/cpu/trace/trace_cpu.hh
index 9d3ae527d7..87f820fe6d 100644
--- a/src/cpu/trace/trace_cpu.hh
+++ b/src/cpu/trace/trace_cpu.hh
@@ -218,7 +218,7 @@ class TraceCPU : public BaseCPU
       public:
         /** Default constructor. */
         IcachePort(TraceCPU* _cpu) :
-            RequestPort(_cpu->name() + ".icache_port", _cpu), owner(_cpu)
+            RequestPort(_cpu->name() + ".icache_port"), owner(_cpu)
         {}
 
       public:
@@ -258,7 +258,7 @@ class TraceCPU : public BaseCPU
       public:
         /** Default constructor. */
         DcachePort(TraceCPU* _cpu) :
-            RequestPort(_cpu->name() + ".dcache_port", _cpu), owner(_cpu)
+            RequestPort(_cpu->name() + ".dcache_port"), owner(_cpu)
         {}
 
       public:
diff --git a/src/dev/arm/gic_v3_its.hh b/src/dev/arm/gic_v3_its.hh
index 27293941c9..2e2fc29609 100644
--- a/src/dev/arm/gic_v3_its.hh
+++ b/src/dev/arm/gic_v3_its.hh
@@ -94,7 +94,7 @@ class Gicv3Its : public BasicPioDevice
 
       public:
         DataPort(const std::string &_name, Gicv3Its &_its) :
-            RequestPort(_name, &_its),
+            RequestPort(_name),
             its(_its)
         {}
 
diff --git a/src/dev/arm/smmu_v3_ports.cc b/src/dev/arm/smmu_v3_ports.cc
index 95915b2bcf..4059be7df7 100644
--- a/src/dev/arm/smmu_v3_ports.cc
+++ b/src/dev/arm/smmu_v3_ports.cc
@@ -45,7 +45,7 @@ namespace gem5
 {
 
 SMMURequestPort::SMMURequestPort(const std::string &_name, SMMUv3 &_smmu) :
-    RequestPort(_name, &_smmu),
+    RequestPort(_name),
     smmu(_smmu)
 {}
 
@@ -63,7 +63,7 @@ SMMURequestPort::recvReqRetry()
 
 SMMUTableWalkPort::SMMUTableWalkPort(const std::string &_name,
                                                  SMMUv3 &_smmu) :
-    RequestPort(_name, &_smmu),
+    RequestPort(_name),
     smmu(_smmu)
 {}
 
@@ -83,7 +83,7 @@ SMMUDevicePort::SMMUDevicePort(const std::string &_name,
                              SMMUv3DeviceInterface &_ifc,
                              PortID _id)
 :
-    QueuedResponsePort(_name, &_ifc, respQueue, _id),
+    QueuedResponsePort(_name, respQueue, _id),
     ifc(_ifc),
     respQueue(_ifc, *this)
 {}
@@ -141,7 +141,7 @@ SMMUControlPort::getAddrRanges() const
 
 SMMUATSMemoryPort::SMMUATSMemoryPort(const std::string &_name,
                                      SMMUv3DeviceInterface &_ifc) :
-    QueuedRequestPort(_name, &_ifc, reqQueue, snoopRespQueue),
+    QueuedRequestPort(_name, reqQueue, snoopRespQueue),
     ifc(_ifc),
     reqQueue(_ifc, *this),
     snoopRespQueue(_ifc, *this)
@@ -155,7 +155,7 @@ SMMUATSMemoryPort::recvTimingResp(PacketPtr pkt)
 
 SMMUATSDevicePort::SMMUATSDevicePort(const std::string &_name,
                                    SMMUv3DeviceInterface &_ifc) :
-    QueuedResponsePort(_name, &_ifc, respQueue),
+    QueuedResponsePort(_name, respQueue),
     ifc(_ifc),
     respQueue(_ifc, *this)
 {}
diff --git a/src/dev/dma_device.cc b/src/dev/dma_device.cc
index ee871aa8c3..ebda635442 100644
--- a/src/dev/dma_device.cc
+++ b/src/dev/dma_device.cc
@@ -57,7 +57,7 @@ namespace gem5
 
 DmaPort::DmaPort(ClockedObject *dev, System *s,
                  uint32_t sid, uint32_t ssid)
-    : RequestPort(dev->name() + ".dma", dev),
+    : RequestPort(dev->name() + ".dma"),
       device(dev), sys(s), requestorId(s->getRequestorId(dev)),
       sendEvent([this]{ sendDma(); }, dev->name()),
       defaultSid(sid), defaultSSid(ssid), cacheLineSize(s->cacheLineSize())
diff --git a/src/dev/x86/intdev.hh b/src/dev/x86/intdev.hh
index 0c30ef5b57..f410ae4c2e 100644
--- a/src/dev/x86/intdev.hh
+++ b/src/dev/x86/intdev.hh
@@ -118,7 +118,7 @@ class IntRequestPort : public QueuedRequestPort
   public:
     IntRequestPort(const std::string& _name, SimObject* _parent,
                   Device* dev, Tick _latency) :
-        QueuedRequestPort(_name, _parent, reqQueue, snoopRespQueue),
+        QueuedRequestPort(_name, reqQueue, snoopRespQueue),
         reqQueue(*_parent, *this), snoopRespQueue(*_parent, *this),
         device(dev), latency(_latency)
     {
diff --git a/src/gpu-compute/compute_unit.hh b/src/gpu-compute/compute_unit.hh
index fcc4468ec1..cf73aa2723 100644
--- a/src/gpu-compute/compute_unit.hh
+++ b/src/gpu-compute/compute_unit.hh
@@ -514,7 +514,7 @@ class ComputeUnit : public ClockedObject
     {
       public:
         DataPort(const std::string &_name, ComputeUnit *_cu, PortID id)
-            : RequestPort(_name, _cu, id), computeUnit(_cu) { }
+            : RequestPort(_name, id), computeUnit(_cu) { }
 
         bool snoopRangeSent;
 
@@ -586,7 +586,7 @@ class ComputeUnit : public ClockedObject
     {
       public:
         ScalarDataPort(const std::string &_name, ComputeUnit *_cu)
-            : RequestPort(_name, _cu), computeUnit(_cu)
+            : RequestPort(_name), computeUnit(_cu)
         {
         }
 
@@ -657,7 +657,7 @@ class ComputeUnit : public ClockedObject
     {
       public:
         SQCPort(const std::string &_name, ComputeUnit *_cu)
-            : RequestPort(_name, _cu), computeUnit(_cu) { }
+            : RequestPort(_name), computeUnit(_cu) { }
 
         bool snoopRangeSent;
 
@@ -698,7 +698,7 @@ class ComputeUnit : public ClockedObject
     {
       public:
         DTLBPort(const std::string &_name, ComputeUnit *_cu, PortID id)
-            : RequestPort(_name, _cu, id), computeUnit(_cu),
+            : RequestPort(_name, id), computeUnit(_cu),
               stalled(false)
         { }
 
@@ -745,7 +745,7 @@ class ComputeUnit : public ClockedObject
     {
       public:
         ScalarDTLBPort(const std::string &_name, ComputeUnit *_cu)
-            : RequestPort(_name, _cu), computeUnit(_cu), stalled(false)
+            : RequestPort(_name), computeUnit(_cu), stalled(false)
         {
         }
 
@@ -773,7 +773,7 @@ class ComputeUnit : public ClockedObject
     {
       public:
         ITLBPort(const std::string &_name, ComputeUnit *_cu)
-            : RequestPort(_name, _cu), computeUnit(_cu), stalled(false) { }
+            : RequestPort(_name), computeUnit(_cu), stalled(false) { }
 
 
         bool isStalled() { return stalled; }
@@ -815,7 +815,7 @@ class ComputeUnit : public ClockedObject
     {
       public:
         LDSPort(const std::string &_name, ComputeUnit *_cu)
-        : RequestPort(_name, _cu), computeUnit(_cu)
+        : RequestPort(_name), computeUnit(_cu)
         {
         }
 
diff --git a/src/gpu-compute/lds_state.hh b/src/gpu-compute/lds_state.hh
index 5fe259506b..3228b7822c 100644
--- a/src/gpu-compute/lds_state.hh
+++ b/src/gpu-compute/lds_state.hh
@@ -183,7 +183,7 @@ class LdsState: public ClockedObject
     {
       public:
         CuSidePort(const std::string &_name, LdsState *_ownerLds) :
-                ResponsePort(_name, _ownerLds), ownerLds(_ownerLds)
+                ResponsePort(_name), ownerLds(_ownerLds)
         {
         }
 
diff --git a/src/learning_gem5/part2/simple_cache.hh b/src/learning_gem5/part2/simple_cache.hh
index 8869985ffd..25d195d4f1 100644
--- a/src/learning_gem5/part2/simple_cache.hh
+++ b/src/learning_gem5/part2/simple_cache.hh
@@ -74,7 +74,7 @@ class SimpleCache : public ClockedObject
          * Constructor. Just calls the superclass constructor.
          */
         CPUSidePort(const std::string& name, int id, SimpleCache *owner) :
-            ResponsePort(name, owner), id(id), owner(owner), needRetry(false),
+            ResponsePort(name), id(id), owner(owner), needRetry(false),
             blockedPacket(nullptr)
         { }
 
@@ -154,7 +154,7 @@ class SimpleCache : public ClockedObject
          * Constructor. Just calls the superclass constructor.
          */
         MemSidePort(const std::string& name, SimpleCache *owner) :
-            RequestPort(name, owner), owner(owner), blockedPacket(nullptr)
+            RequestPort(name), owner(owner), blockedPacket(nullptr)
         { }
 
         /**
diff --git a/src/learning_gem5/part2/simple_memobj.hh b/src/learning_gem5/part2/simple_memobj.hh
index 37afeb161f..9f09d96cd3 100644
--- a/src/learning_gem5/part2/simple_memobj.hh
+++ b/src/learning_gem5/part2/simple_memobj.hh
@@ -68,7 +68,7 @@ class SimpleMemobj : public SimObject
          * Constructor. Just calls the superclass constructor.
          */
         CPUSidePort(const std::string& name, SimpleMemobj *owner) :
-            ResponsePort(name, owner), owner(owner), needRetry(false),
+            ResponsePort(name), owner(owner), needRetry(false),
             blockedPacket(nullptr)
         { }
 
@@ -147,7 +147,7 @@ class SimpleMemobj : public SimObject
          * Constructor. Just calls the superclass constructor.
          */
         MemSidePort(const std::string& name, SimpleMemobj *owner) :
-            RequestPort(name, owner), owner(owner), blockedPacket(nullptr)
+            RequestPort(name), owner(owner), blockedPacket(nullptr)
         { }
 
         /**
diff --git a/src/mem/addr_mapper.hh b/src/mem/addr_mapper.hh
index 2f37bbaf2e..40a0bb033b 100644
--- a/src/mem/addr_mapper.hh
+++ b/src/mem/addr_mapper.hh
@@ -101,7 +101,7 @@ class AddrMapper : public SimObject
     {
       public:
         MapperRequestPort(const std::string& _name, AddrMapper& _mapper)
-            : RequestPort(_name, &_mapper), mapper(_mapper)
+            : RequestPort(_name), mapper(_mapper)
         { }
 
       protected:
@@ -158,7 +158,7 @@ class AddrMapper : public SimObject
     {
       public:
         MapperResponsePort(const std::string& _name, AddrMapper& _mapper)
-            : ResponsePort(_name, &_mapper), mapper(_mapper)
+            : ResponsePort(_name), mapper(_mapper)
         {}
 
       protected:
diff --git a/src/mem/bridge.cc b/src/mem/bridge.cc
index 36832ebfc4..fe15de7036 100644
--- a/src/mem/bridge.cc
+++ b/src/mem/bridge.cc
@@ -58,7 +58,7 @@ Bridge::BridgeResponsePort::BridgeResponsePort(const std::string& _name,
                                          BridgeRequestPort& _memSidePort,
                                          Cycles _delay, int _resp_limit,
                                          std::vector<AddrRange> _ranges)
-    : ResponsePort(_name, &_bridge), bridge(_bridge),
+    : ResponsePort(_name), bridge(_bridge),
       memSidePort(_memSidePort), delay(_delay),
       ranges(_ranges.begin(), _ranges.end()),
       outstandingResponses(0), retryReq(false), respQueueLimit(_resp_limit),
@@ -70,7 +70,7 @@ Bridge::BridgeRequestPort::BridgeRequestPort(const std::string& _name,
                                            Bridge& _bridge,
                                            BridgeResponsePort& _cpuSidePort,
                                            Cycles _delay, int _req_limit)
-    : RequestPort(_name, &_bridge), bridge(_bridge),
+    : RequestPort(_name), bridge(_bridge),
       cpuSidePort(_cpuSidePort),
       delay(_delay), reqQueueLimit(_req_limit),
       sendEvent([this]{ trySendTiming(); }, _name)
diff --git a/src/mem/cache/base.cc b/src/mem/cache/base.cc
index 639d02610e..87c44cefb7 100644
--- a/src/mem/cache/base.cc
+++ b/src/mem/cache/base.cc
@@ -67,10 +67,11 @@ namespace gem5
 {
 
 BaseCache::CacheResponsePort::CacheResponsePort(const std::string &_name,
-                                          BaseCache *_cache,
+                                          BaseCache& _cache,
                                           const std::string &_label)
-    : QueuedResponsePort(_name, _cache, queue),
-      queue(*_cache, *this, true, _label),
+    : QueuedResponsePort(_name, queue),
+      cache{_cache},
+      queue(_cache, *this, true, _label),
       blocked(false), mustSendRetry(false),
       sendRetryEvent([this]{ processSendRetry(); }, _name)
 {
@@ -78,7 +79,7 @@ BaseCache::CacheResponsePort::CacheResponsePort(const std::string &_name,
 
 BaseCache::BaseCache(const BaseCacheParams &p, unsigned blk_size)
     : ClockedObject(p),
-      cpuSidePort (p.name + ".cpu_side_port", this, "CpuSidePort"),
+      cpuSidePort (p.name + ".cpu_side_port", *this, "CpuSidePort"),
       memSidePort(p.name + ".mem_side_port", this, "MemSidePort"),
       mshrQueue("MSHRs", p.mshrs, 0, p.demand_mshr_reserve, p.name),
       writeBuffer("write buffer", p.write_buffers, p.mshrs, p.name),
@@ -150,7 +151,7 @@ BaseCache::CacheResponsePort::setBlocked()
     // if we already scheduled a retry in this cycle, but it has not yet
     // happened, cancel it
     if (sendRetryEvent.scheduled()) {
-        owner.deschedule(sendRetryEvent);
+        cache.deschedule(sendRetryEvent);
         DPRINTF(CachePort, "Port descheduled retry\n");
         mustSendRetry = true;
     }
@@ -164,7 +165,7 @@ BaseCache::CacheResponsePort::clearBlocked()
     blocked = false;
     if (mustSendRetry) {
         // @TODO: need to find a better time (next cycle?)
-        owner.schedule(sendRetryEvent, curTick() + 1);
+        cache.schedule(sendRetryEvent, curTick() + 1);
     }
 }
 
@@ -2522,12 +2523,12 @@ bool
 BaseCache::CpuSidePort::recvTimingSnoopResp(PacketPtr pkt)
 {
     // Snoops shouldn't happen when bypassing caches
-    assert(!cache->system->bypassCaches());
+    assert(!cache.system->bypassCaches());
 
     assert(pkt->isResponse());
 
     // Express snoop responses from requestor to responder, e.g., from L1 to L2
-    cache->recvTimingSnoopResp(pkt);
+    cache.recvTimingSnoopResp(pkt);
     return true;
 }
 
@@ -2535,7 +2536,7 @@ BaseCache::CpuSidePort::recvTimingSnoopResp(PacketPtr pkt)
 bool
 BaseCache::CpuSidePort::tryTiming(PacketPtr pkt)
 {
-    if (cache->system->bypassCaches() || pkt->isExpressSnoop()) {
+    if (cache.system->bypassCaches() || pkt->isExpressSnoop()) {
         // always let express snoop packets through even if blocked
         return true;
     } else if (blocked || mustSendRetry) {
@@ -2552,14 +2553,14 @@ BaseCache::CpuSidePort::recvTimingReq(PacketPtr pkt)
 {
     assert(pkt->isRequest());
 
-    if (cache->system->bypassCaches()) {
+    if (cache.system->bypassCaches()) {
         // Just forward the packet if caches are disabled.
         // @todo This should really enqueue the packet rather
-        [[maybe_unused]] bool success = cache->memSidePort.sendTimingReq(pkt);
+        [[maybe_unused]] bool success = cache.memSidePort.sendTimingReq(pkt);
         assert(success);
         return true;
     } else if (tryTiming(pkt)) {
-        cache->recvTimingReq(pkt);
+        cache.recvTimingReq(pkt);
         return true;
     }
     return false;
@@ -2568,39 +2569,39 @@ BaseCache::CpuSidePort::recvTimingReq(PacketPtr pkt)
 Tick
 BaseCache::CpuSidePort::recvAtomic(PacketPtr pkt)
 {
-    if (cache->system->bypassCaches()) {
+    if (cache.system->bypassCaches()) {
         // Forward the request if the system is in cache bypass mode.
-        return cache->memSidePort.sendAtomic(pkt);
+        return cache.memSidePort.sendAtomic(pkt);
     } else {
-        return cache->recvAtomic(pkt);
+        return cache.recvAtomic(pkt);
     }
 }
 
 void
 BaseCache::CpuSidePort::recvFunctional(PacketPtr pkt)
 {
-    if (cache->system->bypassCaches()) {
+    if (cache.system->bypassCaches()) {
         // The cache should be flushed if we are in cache bypass mode,
         // so we don't need to check if we need to update anything.
-        cache->memSidePort.sendFunctional(pkt);
+        cache.memSidePort.sendFunctional(pkt);
         return;
     }
 
     // functional request
-    cache->functionalAccess(pkt, true);
+    cache.functionalAccess(pkt, true);
 }
 
 AddrRangeList
 BaseCache::CpuSidePort::getAddrRanges() const
 {
-    return cache->getAddrRanges();
+    return cache.getAddrRanges();
 }
 
 
 BaseCache::
-CpuSidePort::CpuSidePort(const std::string &_name, BaseCache *_cache,
+CpuSidePort::CpuSidePort(const std::string &_name, BaseCache& _cache,
                          const std::string &_label)
-    : CacheResponsePort(_name, _cache, _label), cache(_cache)
+    : CacheResponsePort(_name, _cache, _label)
 {
 }
 
@@ -2687,7 +2688,7 @@ BaseCache::CacheReqPacketQueue::sendDeferredPacket()
 BaseCache::MemSidePort::MemSidePort(const std::string &_name,
                                     BaseCache *_cache,
                                     const std::string &_label)
-    : CacheRequestPort(_name, _cache, _reqQueue, _snoopRespQueue),
+    : CacheRequestPort(_name, _reqQueue, _snoopRespQueue),
       _reqQueue(*_cache, *this, _snoopRespQueue, _label),
       _snoopRespQueue(*_cache, *this, true, _label), cache(_cache)
 {
diff --git a/src/mem/cache/base.hh b/src/mem/cache/base.hh
index 78571ceb3c..8a06ec2c42 100644
--- a/src/mem/cache/base.hh
+++ b/src/mem/cache/base.hh
@@ -165,10 +165,10 @@ class BaseCache : public ClockedObject
 
       protected:
 
-        CacheRequestPort(const std::string &_name, BaseCache *_cache,
+        CacheRequestPort(const std::string &_name,
                         ReqPacketQueue &_reqQueue,
                         SnoopRespPacketQueue &_snoopRespQueue) :
-            QueuedRequestPort(_name, _cache, _reqQueue, _snoopRespQueue)
+            QueuedRequestPort(_name, _reqQueue, _snoopRespQueue)
         { }
 
         /**
@@ -285,9 +285,11 @@ class BaseCache : public ClockedObject
 
       protected:
 
-        CacheResponsePort(const std::string &_name, BaseCache *_cache,
+        CacheResponsePort(const std::string &_name, BaseCache& _cache,
                        const std::string &_label);
 
+        BaseCache& cache;
+
         /** A normal packet queue used to store responses. */
         RespPacketQueue queue;
 
@@ -309,11 +311,6 @@ class BaseCache : public ClockedObject
      */
     class CpuSidePort : public CacheResponsePort
     {
-      private:
-
-        // a pointer to our specific cache implementation
-        BaseCache *cache;
-
       protected:
         virtual bool recvTimingSnoopResp(PacketPtr pkt) override;
 
@@ -329,7 +326,7 @@ class BaseCache : public ClockedObject
 
       public:
 
-        CpuSidePort(const std::string &_name, BaseCache *_cache,
+        CpuSidePort(const std::string &_name, BaseCache& _cache,
                     const std::string &_label);
 
     };
diff --git a/src/mem/cfi_mem.cc b/src/mem/cfi_mem.cc
index f8c1084700..b5354ffcbd 100644
--- a/src/mem/cfi_mem.cc
+++ b/src/mem/cfi_mem.cc
@@ -464,7 +464,7 @@ CfiMemory::unserialize(CheckpointIn &cp)
 
 CfiMemory::MemoryPort::MemoryPort(const std::string& _name,
                                      CfiMemory& _memory)
-    : ResponsePort(_name, &_memory), mem(_memory)
+    : ResponsePort(_name), mem(_memory)
 { }
 
 AddrRangeList
diff --git a/src/mem/coherent_xbar.hh b/src/mem/coherent_xbar.hh
index 9693d9225e..454012db8f 100644
--- a/src/mem/coherent_xbar.hh
+++ b/src/mem/coherent_xbar.hh
@@ -100,7 +100,7 @@ class CoherentXBar : public BaseXBar
 
         CoherentXBarResponsePort(const std::string &_name,
                              CoherentXBar &_xbar, PortID _id)
-            : QueuedResponsePort(_name, &_xbar, queue, _id), xbar(_xbar),
+            : QueuedResponsePort(_name, queue, _id), xbar(_xbar),
               queue(_xbar, *this)
         { }
 
@@ -166,7 +166,7 @@ class CoherentXBar : public BaseXBar
 
         CoherentXBarRequestPort(const std::string &_name,
                               CoherentXBar &_xbar, PortID _id)
-            : RequestPort(_name, &_xbar, _id), xbar(_xbar)
+            : RequestPort(_name, _id), xbar(_xbar)
         { }
 
       protected:
@@ -228,7 +228,7 @@ class CoherentXBar : public BaseXBar
          */
         SnoopRespPort(QueuedResponsePort& cpu_side_port,
                       CoherentXBar& _xbar) :
-            RequestPort(cpu_side_port.name() + ".snoopRespPort", &_xbar),
+            RequestPort(cpu_side_port.name() + ".snoopRespPort"),
             cpuSidePort(cpu_side_port) { }
 
         /**
diff --git a/src/mem/comm_monitor.hh b/src/mem/comm_monitor.hh
index b9241090fb..fbaca86c57 100644
--- a/src/mem/comm_monitor.hh
+++ b/src/mem/comm_monitor.hh
@@ -124,7 +124,7 @@ class CommMonitor : public SimObject
       public:
 
         MonitorRequestPort(const std::string& _name, CommMonitor& _mon)
-            : RequestPort(_name, &_mon), mon(_mon)
+            : RequestPort(_name), mon(_mon)
         { }
 
       protected:
@@ -190,7 +190,7 @@ class CommMonitor : public SimObject
       public:
 
         MonitorResponsePort(const std::string& _name, CommMonitor& _mon)
-            : ResponsePort(_name, &_mon), mon(_mon)
+            : ResponsePort(_name), mon(_mon)
         { }
 
       protected:
diff --git a/src/mem/dramsim2.cc b/src/mem/dramsim2.cc
index 028ed433fb..9753d690ce 100644
--- a/src/mem/dramsim2.cc
+++ b/src/mem/dramsim2.cc
@@ -359,7 +359,7 @@ DRAMSim2::drain()
 
 DRAMSim2::MemoryPort::MemoryPort(const std::string& _name,
                                  DRAMSim2& _memory)
-    : ResponsePort(_name, &_memory), mem(_memory)
+    : ResponsePort(_name), mem(_memory)
 { }
 
 AddrRangeList
diff --git a/src/mem/dramsim3.cc b/src/mem/dramsim3.cc
index fbffc7b579..c07a32a3c1 100644
--- a/src/mem/dramsim3.cc
+++ b/src/mem/dramsim3.cc
@@ -357,7 +357,7 @@ DRAMsim3::drain()
 
 DRAMsim3::MemoryPort::MemoryPort(const std::string& _name,
                                  DRAMsim3& _memory)
-    : ResponsePort(_name, &_memory), mem(_memory)
+    : ResponsePort(_name), mem(_memory)
 { }
 
 AddrRangeList
diff --git a/src/mem/external_master.hh b/src/mem/external_master.hh
index 61c41661b4..aad873b35a 100644
--- a/src/mem/external_master.hh
+++ b/src/mem/external_master.hh
@@ -75,7 +75,7 @@ class ExternalMaster : public SimObject
       public:
         ExternalPort(const std::string &name_,
             ExternalMaster &owner_) :
-            RequestPort(name_, &owner_), owner(owner_)
+            RequestPort(name_), owner(owner_)
         { }
 
         ~ExternalPort() { }
diff --git a/src/mem/external_slave.hh b/src/mem/external_slave.hh
index 17ab42a0b4..404319b557 100644
--- a/src/mem/external_slave.hh
+++ b/src/mem/external_slave.hh
@@ -77,7 +77,7 @@ class ExternalSlave : public SimObject
       public:
         ExternalPort(const std::string &name_,
             ExternalSlave &owner_) :
-            ResponsePort(name_, &owner_), owner(owner_)
+            ResponsePort(name_), owner(owner_)
         { }
 
         ~ExternalPort() { }
diff --git a/src/mem/mem_checker_monitor.hh b/src/mem/mem_checker_monitor.hh
index 17fd8eec35..808c3f6f45 100644
--- a/src/mem/mem_checker_monitor.hh
+++ b/src/mem/mem_checker_monitor.hh
@@ -95,7 +95,7 @@ class MemCheckerMonitor : public SimObject
       public:
 
         MonitorRequestPort(const std::string& _name, MemCheckerMonitor& _mon)
-            : RequestPort(_name, &_mon), mon(_mon)
+            : RequestPort(_name), mon(_mon)
         { }
 
       protected:
@@ -156,7 +156,7 @@ class MemCheckerMonitor : public SimObject
       public:
 
         MonitorResponsePort(const std::string& _name, MemCheckerMonitor& _mon)
-            : ResponsePort(_name, &_mon), mon(_mon)
+            : ResponsePort(_name), mon(_mon)
         { }
 
       protected:
diff --git a/src/mem/mem_ctrl.cc b/src/mem/mem_ctrl.cc
index beaace1cbf..543d6373d9 100644
--- a/src/mem/mem_ctrl.cc
+++ b/src/mem/mem_ctrl.cc
@@ -1460,7 +1460,7 @@ MemCtrl::getAddrRanges()
 
 MemCtrl::MemoryPort::
 MemoryPort(const std::string& name, MemCtrl& _ctrl)
-    : QueuedResponsePort(name, &_ctrl, queue), queue(_ctrl, *this, true),
+    : QueuedResponsePort(name, queue), queue(_ctrl, *this, true),
       ctrl(_ctrl)
 { }
 
diff --git a/src/mem/mem_delay.cc b/src/mem/mem_delay.cc
index 81d40c7053..6aff1a40ab 100644
--- a/src/mem/mem_delay.cc
+++ b/src/mem/mem_delay.cc
@@ -81,8 +81,7 @@ MemDelay::trySatisfyFunctional(PacketPtr pkt)
 }
 
 MemDelay::RequestPort::RequestPort(const std::string &_name, MemDelay &_parent)
-    : QueuedRequestPort(_name, &_parent,
-                       _parent.reqQueue, _parent.snoopRespQueue),
+    : QueuedRequestPort(_name, _parent.reqQueue, _parent.snoopRespQueue),
       parent(_parent)
 {
 }
@@ -129,7 +128,7 @@ MemDelay::RequestPort::recvTimingSnoopReq(PacketPtr pkt)
 
 MemDelay::ResponsePort::
 ResponsePort(const std::string &_name, MemDelay &_parent)
-    : QueuedResponsePort(_name, &_parent, _parent.respQueue),
+    : QueuedResponsePort(_name, _parent.respQueue),
       parent(_parent)
 {
 }
diff --git a/src/mem/noncoherent_xbar.hh b/src/mem/noncoherent_xbar.hh
index 03f751b77d..3a7e386fbf 100644
--- a/src/mem/noncoherent_xbar.hh
+++ b/src/mem/noncoherent_xbar.hh
@@ -96,7 +96,7 @@ class NoncoherentXBar : public BaseXBar
 
         NoncoherentXBarResponsePort(const std::string &_name,
                                 NoncoherentXBar &_xbar, PortID _id)
-            : QueuedResponsePort(_name, &_xbar, queue, _id), xbar(_xbar),
+            : QueuedResponsePort(_name, queue, _id), xbar(_xbar),
               queue(_xbar, *this)
         { }
 
@@ -156,7 +156,7 @@ class NoncoherentXBar : public BaseXBar
 
         NoncoherentXBarRequestPort(const std::string &_name,
                                  NoncoherentXBar &_xbar, PortID _id)
-            : RequestPort(_name, &_xbar, _id), xbar(_xbar)
+            : RequestPort(_name, _id), xbar(_xbar)
         { }
 
       protected:
diff --git a/src/mem/port.cc b/src/mem/port.cc
index e36323fb74..2a253b91a3 100644
--- a/src/mem/port.cc
+++ b/src/mem/port.cc
@@ -64,7 +64,7 @@ class DefaultRequestPort : public RequestPort
     }
 
   public:
-    DefaultRequestPort() : RequestPort("default_request_port", nullptr) {}
+    DefaultRequestPort() : RequestPort("default_request_port") {}
 
     // Atomic protocol.
     Tick recvAtomicSnoop(PacketPtr) override { blowUp(); }
@@ -89,7 +89,7 @@ class DefaultResponsePort : public ResponsePort
     }
 
   public:
-    DefaultResponsePort() : ResponsePort("default_response_port", nullptr) {}
+    DefaultResponsePort() : ResponsePort("default_response_port") {}
 
     // Atomic protocol.
     Tick recvAtomic(PacketPtr) override { blowUp(); }
diff --git a/src/mem/port.hh b/src/mem/port.hh
index 0d61787f62..a3acffc427 100644
--- a/src/mem/port.hh
+++ b/src/mem/port.hh
@@ -480,9 +480,7 @@ class ResponsePort : public Port, public AtomicResponseProtocol,
 class [[deprecated]] SlavePort : public ResponsePort
 {
   public:
-    SlavePort(const std::string& name, SimObject* _owner,
-              PortID id=InvalidPortID) : ResponsePort(name, _owner, id)
-              {}
+    using ResponsePort::ResponsePort;
 };
 
 inline Tick
diff --git a/src/mem/port_terminator.cc b/src/mem/port_terminator.cc
index 725acdb2d8..6606a8ff8c 100644
--- a/src/mem/port_terminator.cc
+++ b/src/mem/port_terminator.cc
@@ -34,11 +34,11 @@ PortTerminator::PortTerminator(const PortTerminatorParams &params):
     SimObject(params)
 {
     for (int i = 0; i < params.port_req_ports_connection_count; ++i) {
-        reqPorts.emplace_back(name() + ".req_ports" + std::to_string(i), this);
+        reqPorts.emplace_back(name() + ".req_ports" + std::to_string(i));
     }
     for (int j = 0; j < params.port_resp_ports_connection_count; ++j) {
         reqPorts.emplace_back(name() + ".resp_ports" +
-                                std::to_string(j), this);
+                                std::to_string(j));
     }
 }
 
diff --git a/src/mem/port_terminator.hh b/src/mem/port_terminator.hh
index 233b66d1ac..e95598a29f 100644
--- a/src/mem/port_terminator.hh
+++ b/src/mem/port_terminator.hh
@@ -66,8 +66,8 @@ class PortTerminator : public SimObject
     class ReqPort : public RequestPort
     {
       public:
-        ReqPort(const std::string &name, PortTerminator *owner):
-            RequestPort(name, owner)
+        ReqPort(const std::string &name):
+            RequestPort(name)
         {}
       protected:
         bool recvTimingResp(PacketPtr pkt) override
@@ -97,8 +97,8 @@ class PortTerminator : public SimObject
     class RespPort : public ResponsePort
     {
       public:
-        RespPort(const std::string &name, PortTerminator *owner):
-            ResponsePort(name, owner)
+        RespPort(const std::string &name):
+            ResponsePort(name)
         {}
     };
 
diff --git a/src/mem/port_wrapper.cc b/src/mem/port_wrapper.cc
index fd5ebbd614..3b61fb2e97 100644
--- a/src/mem/port_wrapper.cc
+++ b/src/mem/port_wrapper.cc
@@ -30,9 +30,8 @@
 namespace gem5
 {
 
-RequestPortWrapper::RequestPortWrapper(const std::string& name,
-                                       SimObject* _owner, PortID id)
-    : RequestPort(name, _owner, id)
+RequestPortWrapper::RequestPortWrapper(const std::string& name, PortID id)
+    : RequestPort(name, id)
 {
 }
 
@@ -74,9 +73,8 @@ RequestPortWrapper::setTimingCallbacks(RecvTimingRespCallback resp_cb,
     recvReqRetryCb = std::move(retry_cb);
 }
 
-ResponsePortWrapper::ResponsePortWrapper(const std::string& name,
-                                         SimObject* _owner, PortID id)
-    : ResponsePort(name, _owner, id)
+ResponsePortWrapper::ResponsePortWrapper(const std::string& name, PortID id)
+    : ResponsePort(name, id)
 {
 }
 
diff --git a/src/mem/port_wrapper.hh b/src/mem/port_wrapper.hh
index 5dcdd5dc9b..9da118f25d 100644
--- a/src/mem/port_wrapper.hh
+++ b/src/mem/port_wrapper.hh
@@ -80,8 +80,7 @@ class RequestPortWrapper : public RequestPort
     using RecvTimingRespCallback = std::function<bool(PacketPtr)>;
     using RecvReqRetryCallback = std::function<void()>;
 
-    RequestPortWrapper(const std::string& name, SimObject* _owner,
-                       PortID id = InvalidPortID);
+    RequestPortWrapper(const std::string& name, PortID id = InvalidPortID);
 
     void recvRangeChange() override;
 
@@ -120,8 +119,7 @@ class ResponsePortWrapper : public ResponsePort
 
     using RecvRespRetryCallback = std::function<void()>;
 
-    ResponsePortWrapper(const std::string& name, SimObject* _owner,
-                        PortID id = InvalidPortID);
+    ResponsePortWrapper(const std::string& name, PortID id = InvalidPortID);
 
     AddrRangeList getAddrRanges() const override;
 
diff --git a/src/mem/qos/mem_sink.cc b/src/mem/qos/mem_sink.cc
index 3ffe7f4d61..b6b77ca9df 100644
--- a/src/mem/qos/mem_sink.cc
+++ b/src/mem/qos/mem_sink.cc
@@ -352,7 +352,7 @@ MemSinkCtrl::MemSinkCtrlStats::MemSinkCtrlStats(statistics::Group *parent)
 
 MemSinkCtrl::MemoryPort::MemoryPort(const std::string& n,
                                     MemSinkCtrl& m)
-  : QueuedResponsePort(n, &m, queue, true),
+  : QueuedResponsePort(n, queue, true),
    mem(m), queue(mem, *this, true)
 {}
 
diff --git a/src/mem/qport.hh b/src/mem/qport.hh
index 4758f6699b..02282a0172 100644
--- a/src/mem/qport.hh
+++ b/src/mem/qport.hh
@@ -77,9 +77,10 @@ class QueuedResponsePort : public ResponsePort
      * behaviuor in a subclass, and provide the latter to the
      * QueuePort constructor.
      */
-    QueuedResponsePort(const std::string& name, SimObject* owner,
-                    RespPacketQueue &resp_queue, PortID id = InvalidPortID) :
-        ResponsePort(name, owner, id), respQueue(resp_queue)
+    QueuedResponsePort(const std::string& name,
+                       RespPacketQueue &resp_queue,
+                       PortID id = InvalidPortID) :
+        ResponsePort(name, id), respQueue(resp_queue)
     { }
 
     virtual ~QueuedResponsePort() { }
@@ -124,17 +125,17 @@ class QueuedRequestPort : public RequestPort
   public:
 
     /**
-     * Create a QueuedPort with a given name, owner, and a supplied
+     * Create a QueuedPort with a given name, and a supplied
      * implementation of two packet queues. The external definition of
      * the queues enables e.g. the cache to implement a specific queue
      * behaviuor in a subclass, and provide the latter to the
      * QueuePort constructor.
      */
-    QueuedRequestPort(const std::string& name, SimObject* owner,
+    QueuedRequestPort(const std::string& name,
                      ReqPacketQueue &req_queue,
                      SnoopRespPacketQueue &snoop_resp_queue,
                      PortID id = InvalidPortID) :
-        RequestPort(name, owner, id), reqQueue(req_queue),
+        RequestPort(name, id), reqQueue(req_queue),
         snoopRespQueue(snoop_resp_queue)
     { }
 
diff --git a/src/mem/ruby/slicc_interface/AbstractController.cc b/src/mem/ruby/slicc_interface/AbstractController.cc
index 2d13a5a9b6..2d10422487 100644
--- a/src/mem/ruby/slicc_interface/AbstractController.cc
+++ b/src/mem/ruby/slicc_interface/AbstractController.cc
@@ -455,7 +455,7 @@ AbstractController::MemoryPort::recvReqRetry()
 AbstractController::MemoryPort::MemoryPort(const std::string &_name,
                                            AbstractController *_controller,
                                            PortID id)
-    : RequestPort(_name, _controller, id), controller(_controller)
+    : RequestPort(_name, id), controller(_controller)
 {
 }
 
diff --git a/src/mem/ruby/system/RubyPort.cc b/src/mem/ruby/system/RubyPort.cc
index 48f655d007..ae21dc95ad 100644
--- a/src/mem/ruby/system/RubyPort.cc
+++ b/src/mem/ruby/system/RubyPort.cc
@@ -62,10 +62,10 @@ RubyPort::RubyPort(const Params &p)
     : ClockedObject(p), m_ruby_system(p.ruby_system), m_version(p.version),
       m_controller(NULL), m_mandatory_q_ptr(NULL),
       m_usingRubyTester(p.using_ruby_tester), system(p.system),
-      pioRequestPort(csprintf("%s.pio-request-port", name()), this),
-      pioResponsePort(csprintf("%s.pio-response-port", name()), this),
-      memRequestPort(csprintf("%s.mem-request-port", name()), this),
-      memResponsePort(csprintf("%s-mem-response-port", name()), this,
+      pioRequestPort(csprintf("%s.pio-request-port", name()), *this),
+      pioResponsePort(csprintf("%s.pio-response-port", name()), *this),
+      memRequestPort(csprintf("%s.mem-request-port", name()), *this),
+      memResponsePort(csprintf("%s-mem-response-port", name()), *this,
                    p.ruby_system->getAccessBackingStore(), -1,
                    p.no_retry_on_stall),
       gotAddrRanges(p.port_interrupt_out_port_connection_count),
@@ -76,7 +76,7 @@ RubyPort::RubyPort(const Params &p)
     // create the response ports based on the number of connected ports
     for (size_t i = 0; i < p.port_in_ports_connection_count; ++i) {
         response_ports.push_back(new MemResponsePort(csprintf
-            ("%s.response_ports%d", name(), i), this,
+            ("%s.response_ports%d", name(), i), *this,
             p.ruby_system->getAccessBackingStore(),
             i, p.no_retry_on_stall));
     }
@@ -84,7 +84,7 @@ RubyPort::RubyPort(const Params &p)
     // create the request ports based on the number of connected ports
     for (size_t i = 0; i < p.port_interrupt_out_port_connection_count; ++i) {
         request_ports.push_back(new PioRequestPort(csprintf(
-                    "%s.request_ports%d", name(), i), this));
+                    "%s.request_ports%d", name(), i), *this));
     }
 }
 
@@ -134,35 +134,41 @@ RubyPort::getPort(const std::string &if_name, PortID idx)
 }
 
 RubyPort::PioRequestPort::PioRequestPort(const std::string &_name,
-                           RubyPort *_port)
-    : QueuedRequestPort(_name, _port, reqQueue, snoopRespQueue),
-      reqQueue(*_port, *this), snoopRespQueue(*_port, *this)
+                                         RubyPort& _port) :
+    QueuedRequestPort(_name, reqQueue, snoopRespQueue),
+    owner{_port},
+    reqQueue(_port, *this),
+    snoopRespQueue(_port, *this)
 {
     DPRINTF(RubyPort, "Created request pioport on sequencer %s\n", _name);
 }
 
 RubyPort::PioResponsePort::PioResponsePort(const std::string &_name,
-                           RubyPort *_port)
-    : QueuedResponsePort(_name, _port, queue), queue(*_port, *this)
+                                           RubyPort& _port)
+    : QueuedResponsePort(_name, queue), owner{_port}, queue(_port, *this)
 {
     DPRINTF(RubyPort, "Created response pioport on sequencer %s\n", _name);
 }
 
 RubyPort::MemRequestPort::MemRequestPort(const std::string &_name,
-                           RubyPort *_port)
-    : QueuedRequestPort(_name, _port, reqQueue, snoopRespQueue),
-      reqQueue(*_port, *this), snoopRespQueue(*_port, *this)
+                                         RubyPort& _port):
+    QueuedRequestPort(_name, reqQueue, snoopRespQueue),
+    owner{_port},
+    reqQueue(_port, *this),
+    snoopRespQueue(_port, *this)
 {
     DPRINTF(RubyPort, "Created request memport on ruby sequencer %s\n", _name);
 }
 
 RubyPort::
-MemResponsePort::MemResponsePort(const std::string &_name, RubyPort *_port,
-                                     bool _access_backing_store, PortID id,
-                                     bool _no_retry_on_stall)
-    : QueuedResponsePort(_name, _port, queue, id), queue(*_port, *this),
-      access_backing_store(_access_backing_store),
-      no_retry_on_stall(_no_retry_on_stall)
+MemResponsePort::MemResponsePort(const std::string &_name, RubyPort& _port,
+                                 bool _access_backing_store, PortID id,
+                                 bool _no_retry_on_stall):
+    QueuedResponsePort(_name, queue, id),
+    owner{_port},
+    queue(_port, *this),
+    access_backing_store(_access_backing_store),
+    no_retry_on_stall(_no_retry_on_stall)
 {
     DPRINTF(RubyPort, "Created response memport on ruby sequencer %s\n",
             _name);
@@ -171,12 +177,11 @@ MemResponsePort::MemResponsePort(const std::string &_name, RubyPort *_port,
 bool
 RubyPort::PioRequestPort::recvTimingResp(PacketPtr pkt)
 {
-    RubyPort *rp = static_cast<RubyPort *>(&owner);
     DPRINTF(RubyPort, "Response for address: 0x%#x\n", pkt->getAddr());
 
     // send next cycle
-    rp->pioResponsePort.schedTimingResp(
-            pkt, curTick() + rp->m_ruby_system->clockPeriod());
+    owner.pioResponsePort.schedTimingResp(
+            pkt, curTick() + owner.m_ruby_system->clockPeriod());
     return true;
 }
 
@@ -199,8 +204,7 @@ bool RubyPort::MemRequestPort::recvTimingResp(PacketPtr pkt)
             pkt->getAddr(), port->name());
 
     // attempt to send the response in the next cycle
-    RubyPort *rp = static_cast<RubyPort *>(&owner);
-    port->schedTimingResp(pkt, curTick() + rp->m_ruby_system->clockPeriod());
+    port->schedTimingResp(pkt, curTick() + owner.m_ruby_system->clockPeriod());
 
     return true;
 }
@@ -208,16 +212,15 @@ bool RubyPort::MemRequestPort::recvTimingResp(PacketPtr pkt)
 bool
 RubyPort::PioResponsePort::recvTimingReq(PacketPtr pkt)
 {
-    RubyPort *ruby_port = static_cast<RubyPort *>(&owner);
 
-    for (size_t i = 0; i < ruby_port->request_ports.size(); ++i) {
-        AddrRangeList l = ruby_port->request_ports[i]->getAddrRanges();
+    for (size_t i = 0; i < owner.request_ports.size(); ++i) {
+        AddrRangeList l = owner.request_ports[i]->getAddrRanges();
         for (auto it = l.begin(); it != l.end(); ++it) {
             if (it->contains(pkt->getAddr())) {
                 // generally it is not safe to assume success here as
                 // the port could be blocked
                 [[maybe_unused]] bool success =
-                    ruby_port->request_ports[i]->sendTimingReq(pkt);
+                    owner.request_ports[i]->sendTimingReq(pkt);
                 assert(success);
                 return true;
             }
@@ -229,17 +232,16 @@ RubyPort::PioResponsePort::recvTimingReq(PacketPtr pkt)
 Tick
 RubyPort::PioResponsePort::recvAtomic(PacketPtr pkt)
 {
-    RubyPort *ruby_port = static_cast<RubyPort *>(&owner);
     // Only atomic_noncaching mode supported!
-    if (!ruby_port->system->bypassCaches()) {
+    if (!owner.system->bypassCaches()) {
         panic("Ruby supports atomic accesses only in noncaching mode\n");
     }
 
-    for (size_t i = 0; i < ruby_port->request_ports.size(); ++i) {
-        AddrRangeList l = ruby_port->request_ports[i]->getAddrRanges();
+    for (size_t i = 0; i < owner.request_ports.size(); ++i) {
+        AddrRangeList l = owner.request_ports[i]->getAddrRanges();
         for (auto it = l.begin(); it != l.end(); ++it) {
             if (it->contains(pkt->getAddr())) {
-                return ruby_port->request_ports[i]->sendAtomic(pkt);
+                return owner.request_ports[i]->sendAtomic(pkt);
             }
         }
     }
@@ -251,7 +253,6 @@ RubyPort::MemResponsePort::recvTimingReq(PacketPtr pkt)
 {
     DPRINTF(RubyPort, "Timing request for address %#x on port %d\n",
             pkt->getAddr(), id);
-    RubyPort *ruby_port = static_cast<RubyPort *>(&owner);
 
     if (pkt->cacheResponding())
         panic("RubyPort should never see request with the "
@@ -269,7 +270,7 @@ RubyPort::MemResponsePort::recvTimingReq(PacketPtr pkt)
     // pio port.
     if (pkt->cmd != MemCmd::MemSyncReq) {
         if (!pkt->req->isMemMgmt() && !isPhysMemAddress(pkt)) {
-            assert(ruby_port->memRequestPort.isConnected());
+            assert(owner.memRequestPort.isConnected());
             DPRINTF(RubyPort, "Request address %#x assumed to be a "
                     "pio address\n", pkt->getAddr());
 
@@ -278,8 +279,8 @@ RubyPort::MemResponsePort::recvTimingReq(PacketPtr pkt)
             pkt->pushSenderState(new SenderState(this));
 
             // send next cycle
-            RubySystem *rs = ruby_port->m_ruby_system;
-            ruby_port->memRequestPort.schedTimingReq(pkt,
+            RubySystem *rs = owner.m_ruby_system;
+            owner.memRequestPort.schedTimingReq(pkt,
                 curTick() + rs->clockPeriod());
             return true;
         }
@@ -290,7 +291,7 @@ RubyPort::MemResponsePort::recvTimingReq(PacketPtr pkt)
     pkt->pushSenderState(new SenderState(this));
 
     // Submit the ruby request
-    RequestStatus requestStatus = ruby_port->makeRequest(pkt);
+    RequestStatus requestStatus = owner.makeRequest(pkt);
 
     // If the request successfully issued then we should return true.
     // Otherwise, we need to tell the port to retry at a later point
@@ -320,9 +321,8 @@ RubyPort::MemResponsePort::recvTimingReq(PacketPtr pkt)
 Tick
 RubyPort::MemResponsePort::recvAtomic(PacketPtr pkt)
 {
-    RubyPort *ruby_port = static_cast<RubyPort *>(&owner);
     // Only atomic_noncaching mode supported!
-    if (!ruby_port->system->bypassCaches()) {
+    if (!owner.system->bypassCaches()) {
         panic("Ruby supports atomic accesses only in noncaching mode\n");
     }
 
@@ -330,7 +330,7 @@ RubyPort::MemResponsePort::recvAtomic(PacketPtr pkt)
     // pio port.
     if (pkt->cmd != MemCmd::MemSyncReq) {
         if (!isPhysMemAddress(pkt)) {
-            assert(ruby_port->memRequestPort.isConnected());
+            assert(owner.memRequestPort.isConnected());
             DPRINTF(RubyPort, "Request address %#x assumed to be a "
                     "pio address\n", pkt->getAddr());
 
@@ -339,8 +339,8 @@ RubyPort::MemResponsePort::recvAtomic(PacketPtr pkt)
             pkt->pushSenderState(new SenderState(this));
 
             // send next cycle
-            Tick req_ticks = ruby_port->memRequestPort.sendAtomic(pkt);
-            return ruby_port->ticksToCycles(req_ticks);
+            Tick req_ticks = owner.memRequestPort.sendAtomic(pkt);
+            return owner.ticksToCycles(req_ticks);
         }
 
         assert(getOffset(pkt->getAddr()) + pkt->getSize() <=
@@ -348,7 +348,7 @@ RubyPort::MemResponsePort::recvAtomic(PacketPtr pkt)
     }
 
     // Find the machine type of memory controller interface
-    RubySystem *rs = ruby_port->m_ruby_system;
+    RubySystem *rs = owner.m_ruby_system;
     static int mem_interface_type = -1;
     if (mem_interface_type == -1) {
         if (rs->m_abstract_controls[MachineType_Directory].size() != 0) {
@@ -363,7 +363,7 @@ RubyPort::MemResponsePort::recvAtomic(PacketPtr pkt)
     }
 
     // Find the controller for the target address
-    MachineID id = ruby_port->m_controller->mapAddressToMachine(
+    MachineID id = owner.m_controller->mapAddressToMachine(
                     pkt->getAddr(), (MachineType)mem_interface_type);
     AbstractController *mem_interface =
         rs->m_abstract_controls[mem_interface_type][id.getNum()];
@@ -376,15 +376,14 @@ RubyPort::MemResponsePort::recvAtomic(PacketPtr pkt)
 void
 RubyPort::MemResponsePort::addToRetryList()
 {
-    RubyPort *ruby_port = static_cast<RubyPort *>(&owner);
 
     //
     // Unless the request port do not want retries (e.g., the Ruby tester),
     // record the stalled M5 port for later retry when the sequencer
     // becomes free.
     //
-    if (!no_retry_on_stall && !ruby_port->onRetryList(this)) {
-        ruby_port->addToRetryList(this);
+    if (!no_retry_on_stall && !owner.onRetryList(this)) {
+        owner.addToRetryList(this);
     }
 }
 
@@ -393,15 +392,14 @@ RubyPort::MemResponsePort::recvFunctional(PacketPtr pkt)
 {
     DPRINTF(RubyPort, "Functional access for address: %#x\n", pkt->getAddr());
 
-    [[maybe_unused]] RubyPort *rp = static_cast<RubyPort *>(&owner);
-    RubySystem *rs = rp->m_ruby_system;
+    RubySystem *rs = owner.m_ruby_system;
 
     // Check for pio requests and directly send them to the dedicated
     // pio port.
     if (!isPhysMemAddress(pkt)) {
         DPRINTF(RubyPort, "Pio Request for address: 0x%#x\n", pkt->getAddr());
-        assert(rp->pioRequestPort.isConnected());
-        rp->pioRequestPort.sendFunctional(pkt);
+        assert(owner.pioRequestPort.isConnected());
+        owner.pioRequestPort.sendFunctional(pkt);
         return;
     }
 
@@ -626,15 +624,14 @@ RubyPort::MemResponsePort::hitCallback(PacketPtr pkt)
 
     DPRINTF(RubyPort, "Hit callback needs response %d\n", needsResponse);
 
-    RubyPort *ruby_port = static_cast<RubyPort *>(&owner);
-    RubySystem *rs = ruby_port->m_ruby_system;
+    RubySystem *rs = owner.m_ruby_system;
     if (accessPhysMem) {
         // We must check device memory first in case it overlaps with the
         // system memory range.
-        if (ruby_port->system->isDeviceMemAddr(pkt)) {
-            auto dmem = ruby_port->system->getDeviceMemory(pkt);
+        if (owner.system->isDeviceMemAddr(pkt)) {
+            auto dmem = owner.system->getDeviceMemory(pkt);
             dmem->access(pkt);
-        } else if (ruby_port->system->isMemAddr(pkt->getAddr())) {
+        } else if (owner.system->isMemAddr(pkt->getAddr())) {
             rs->getPhysMem()->access(pkt);
         } else {
             panic("Packet is in neither device nor system memory!");
@@ -662,11 +659,10 @@ RubyPort::PioResponsePort::getAddrRanges() const
 {
     // at the moment the assumption is that the request port does not care
     AddrRangeList ranges;
-    RubyPort *ruby_port = static_cast<RubyPort *>(&owner);
 
-    for (size_t i = 0; i < ruby_port->request_ports.size(); ++i) {
+    for (size_t i = 0; i < owner.request_ports.size(); ++i) {
         ranges.splice(ranges.begin(),
-                ruby_port->request_ports[i]->getAddrRanges());
+                owner.request_ports[i]->getAddrRanges());
     }
     for ([[maybe_unused]] const auto &r : ranges)
         DPRINTF(RubyPort, "%s\n", r.to_string());
@@ -676,8 +672,7 @@ RubyPort::PioResponsePort::getAddrRanges() const
 bool
 RubyPort::MemResponsePort::isShadowRomAddress(Addr addr) const
 {
-    RubyPort *ruby_port = static_cast<RubyPort *>(&owner);
-    AddrRangeList ranges = ruby_port->system->getShadowRomRanges();
+    AddrRangeList ranges = owner.system->getShadowRomRanges();
 
     for (auto it = ranges.begin(); it != ranges.end(); ++it) {
         if (it->contains(addr)) {
@@ -691,10 +686,9 @@ RubyPort::MemResponsePort::isShadowRomAddress(Addr addr) const
 bool
 RubyPort::MemResponsePort::isPhysMemAddress(PacketPtr pkt) const
 {
-    RubyPort *ruby_port = static_cast<RubyPort *>(&owner);
     Addr addr = pkt->getAddr();
-    return (ruby_port->system->isMemAddr(addr) && !isShadowRomAddress(addr))
-           || ruby_port->system->isDeviceMemAddr(pkt);
+    return (owner.system->isMemAddr(addr) && !isShadowRomAddress(addr))
+           || owner.system->isDeviceMemAddr(pkt);
 }
 
 void
diff --git a/src/mem/ruby/system/RubyPort.hh b/src/mem/ruby/system/RubyPort.hh
index e9d073e998..66fe0a7686 100644
--- a/src/mem/ruby/system/RubyPort.hh
+++ b/src/mem/ruby/system/RubyPort.hh
@@ -67,11 +67,12 @@ class RubyPort : public ClockedObject
     class MemRequestPort : public QueuedRequestPort
     {
       private:
+        RubyPort& owner;
         ReqPacketQueue reqQueue;
         SnoopRespPacketQueue snoopRespQueue;
 
       public:
-        MemRequestPort(const std::string &_name, RubyPort *_port);
+        MemRequestPort(const std::string &_name, RubyPort& _port);
 
       protected:
         bool recvTimingResp(PacketPtr pkt);
@@ -81,14 +82,16 @@ class RubyPort : public ClockedObject
     class MemResponsePort : public QueuedResponsePort
     {
       private:
+        RubyPort& owner;
         RespPacketQueue queue;
         bool access_backing_store;
         bool no_retry_on_stall;
 
       public:
-        MemResponsePort(const std::string &_name, RubyPort *_port,
-                     bool _access_backing_store,
-                     PortID id, bool _no_retry_on_stall);
+        MemResponsePort(const std::string &_name,
+                        RubyPort& _port,
+                        bool _access_backing_store,
+                        PortID id, bool _no_retry_on_stall);
         void hitCallback(PacketPtr pkt);
         void evictionCallback(Addr address);
 
@@ -112,11 +115,12 @@ class RubyPort : public ClockedObject
     class PioRequestPort : public QueuedRequestPort
     {
       private:
+        RubyPort& owner;
         ReqPacketQueue reqQueue;
         SnoopRespPacketQueue snoopRespQueue;
 
       public:
-        PioRequestPort(const std::string &_name, RubyPort *_port);
+        PioRequestPort(const std::string &_name, RubyPort& _port);
 
       protected:
         bool recvTimingResp(PacketPtr pkt);
@@ -126,10 +130,11 @@ class RubyPort : public ClockedObject
     class PioResponsePort : public QueuedResponsePort
     {
       private:
+        RubyPort& owner;
         RespPacketQueue queue;
 
       public:
-        PioResponsePort(const std::string &_name, RubyPort *_port);
+        PioResponsePort(const std::string &_name, RubyPort& _port);
 
       protected:
         bool recvTimingReq(PacketPtr pkt);
diff --git a/src/mem/serial_link.cc b/src/mem/serial_link.cc
index 7847e4a26b..3c84a769bf 100644
--- a/src/mem/serial_link.cc
+++ b/src/mem/serial_link.cc
@@ -61,7 +61,7 @@ SerialLinkResponsePort(const std::string& _name,
                                          Cycles _delay, int _resp_limit,
                                          const std::vector<AddrRange>&
                                          _ranges)
-    : ResponsePort(_name, &_serial_link), serial_link(_serial_link),
+    : ResponsePort(_name), serial_link(_serial_link),
       mem_side_port(_mem_side_port), delay(_delay),
       ranges(_ranges.begin(), _ranges.end()),
       outstandingResponses(0), retryReq(false),
@@ -75,7 +75,7 @@ SerialLink::SerialLinkRequestPort::SerialLinkRequestPort(const std::string&
                                            SerialLinkResponsePort&
                                            _cpu_side_port, Cycles _delay,
                                            int _req_limit)
-    : RequestPort(_name, &_serial_link), serial_link(_serial_link),
+    : RequestPort(_name), serial_link(_serial_link),
       cpu_side_port(_cpu_side_port), delay(_delay), reqQueueLimit(_req_limit),
       sendEvent([this]{ trySendTiming(); }, _name)
 {
diff --git a/src/mem/simple_mem.cc b/src/mem/simple_mem.cc
index 27fcac1183..a3809c5379 100644
--- a/src/mem/simple_mem.cc
+++ b/src/mem/simple_mem.cc
@@ -271,7 +271,7 @@ SimpleMemory::drain()
 
 SimpleMemory::MemoryPort::MemoryPort(const std::string& _name,
                                      SimpleMemory& _memory)
-    : ResponsePort(_name, &_memory), mem(_memory)
+    : ResponsePort(_name), mem(_memory)
 { }
 
 AddrRangeList
diff --git a/src/mem/sys_bridge.cc b/src/mem/sys_bridge.cc
index 3037a1d287..6c05ade942 100644
--- a/src/mem/sys_bridge.cc
+++ b/src/mem/sys_bridge.cc
@@ -43,9 +43,9 @@ SysBridge::BridgingPort::replaceReqID(PacketPtr pkt)
 }
 
 SysBridge::SysBridge(const SysBridgeParams &p) : SimObject(p),
-    sourcePort(p.name + ".source_port", this, &targetPort,
+    sourcePort(p.name + ".source_port", &targetPort,
             p.target->getRequestorId(this)),
-    targetPort(p.name + ".target_port", this, &sourcePort,
+    targetPort(p.name + ".target_port", &sourcePort,
             p.source->getRequestorId(this))
 {}
 
diff --git a/src/mem/sys_bridge.hh b/src/mem/sys_bridge.hh
index 15a3fc8270..d26139fc0e 100644
--- a/src/mem/sys_bridge.hh
+++ b/src/mem/sys_bridge.hh
@@ -130,9 +130,9 @@ class SysBridge : public SimObject
         SysBridgeSourcePort *sourcePort;
 
       public:
-        SysBridgeTargetPort(const std::string &_name, SimObject *owner,
+        SysBridgeTargetPort(const std::string &_name,
                 SysBridgeSourcePort *source_port, RequestorID _id) :
-            RequestPort(_name, owner), BridgingPort(_id),
+            RequestPort(_name), BridgingPort(_id),
             sourcePort(source_port)
         {
             DPRINTF(SysBridge, "Target side requestor ID = %s.\n", _id);
@@ -223,9 +223,9 @@ class SysBridge : public SimObject
         SysBridgeTargetPort *targetPort;
 
       public:
-        SysBridgeSourcePort(const std::string &_name, SimObject *owner,
+        SysBridgeSourcePort(const std::string &_name,
                 SysBridgeTargetPort *target_port, RequestorID _id) :
-            ResponsePort(_name, owner), BridgingPort(_id),
+            ResponsePort(_name), BridgingPort(_id),
             targetPort(target_port)
         {
             DPRINTF(SysBridge, "Source side requestor ID = %s.\n", _id);
diff --git a/src/mem/thread_bridge.cc b/src/mem/thread_bridge.cc
index efaf19a0e2..0090e4217c 100644
--- a/src/mem/thread_bridge.cc
+++ b/src/mem/thread_bridge.cc
@@ -40,7 +40,7 @@ ThreadBridge::ThreadBridge(const ThreadBridgeParams &p)
 
 ThreadBridge::IncomingPort::IncomingPort(const std::string &name,
                                          ThreadBridge &device)
-    : ResponsePort(name, &device), device_(device)
+    : ResponsePort(name), device_(device)
 {
 }
 
@@ -94,7 +94,7 @@ ThreadBridge::IncomingPort::recvMemBackdoorReq(const MemBackdoorReq &req,
 
 ThreadBridge::OutgoingPort::OutgoingPort(const std::string &name,
                                          ThreadBridge &device)
-    : RequestPort(name, &device), device_(device)
+    : RequestPort(name), device_(device)
 {
 }
 
diff --git a/src/mem/token_port.hh b/src/mem/token_port.hh
index 8a2d15dce5..1bb8707eae 100644
--- a/src/mem/token_port.hh
+++ b/src/mem/token_port.hh
@@ -50,7 +50,7 @@ class TokenRequestPort : public RequestPort
   public:
     TokenRequestPort(const std::string& name, SimObject* owner,
                     PortID id = InvalidPortID) :
-        RequestPort(name, owner, id), tokenManager(nullptr)
+        RequestPort(name, id), tokenManager(nullptr)
     { }
 
     /**
@@ -98,9 +98,9 @@ class TokenResponsePort : public ResponsePort
     void recvRespRetry() override;
 
   public:
-    TokenResponsePort(const std::string& name, ClockedObject *owner,
+    TokenResponsePort(const std::string& name,
                    PortID id = InvalidPortID) :
-        ResponsePort(name, owner, id), tokenRequestPort(nullptr)
+        ResponsePort(name, id), tokenRequestPort(nullptr)
     { }
     ~TokenResponsePort() { }
 
diff --git a/src/mem/tport.cc b/src/mem/tport.cc
index ad8512ce8a..7e8a1093de 100644
--- a/src/mem/tport.cc
+++ b/src/mem/tport.cc
@@ -46,7 +46,7 @@ namespace gem5
 
 SimpleTimingPort::SimpleTimingPort(const std::string& _name,
                                    SimObject* _owner) :
-    QueuedResponsePort(_name, _owner, queueImpl), queueImpl(*_owner, *this)
+    QueuedResponsePort(_name, queueImpl), queueImpl(*_owner, *this)
 {
 }
 
diff --git a/src/sim/system.cc b/src/sim/system.cc
index ee6c70a5d3..806eca3ddf 100644
--- a/src/sim/system.cc
+++ b/src/sim/system.cc
@@ -165,7 +165,7 @@ System::Threads::quiesceTick(ContextID id, Tick when)
 int System::numSystemsRunning = 0;
 
 System::System(const Params &p)
-    : SimObject(p), _systemPort("system_port", this),
+    : SimObject(p), _systemPort("system_port"),
       multiThread(p.multi_thread),
       init_param(p.init_param),
       physProxy(_systemPort, p.cache_line_size),
diff --git a/src/sim/system.hh b/src/sim/system.hh
index d691fb8bf8..1d179e962a 100644
--- a/src/sim/system.hh
+++ b/src/sim/system.hh
@@ -87,8 +87,8 @@ class System : public SimObject, public PCEventScope
         /**
          * Create a system port with a name and an owner.
          */
-        SystemPort(const std::string &_name, SimObject *_owner)
-            : RequestPort(_name, _owner)
+        SystemPort(const std::string &_name)
+            : RequestPort(_name)
         { }
 
         bool
diff --git a/src/sst/outgoing_request_bridge.cc b/src/sst/outgoing_request_bridge.cc
index 5f342665fe..58abfbad46 100644
--- a/src/sst/outgoing_request_bridge.cc
+++ b/src/sst/outgoing_request_bridge.cc
@@ -52,7 +52,7 @@ OutgoingRequestBridge::~OutgoingRequestBridge()
 OutgoingRequestBridge::
 OutgoingRequestPort::OutgoingRequestPort(const std::string &name_,
                                          OutgoingRequestBridge* owner_) :
-    ResponsePort(name_, owner_)
+    ResponsePort(name_)
 {
     owner = owner_;
 }
diff --git a/src/systemc/tlm_bridge/gem5_to_tlm.hh b/src/systemc/tlm_bridge/gem5_to_tlm.hh
index 35d6ba3b4d..565a1648bc 100644
--- a/src/systemc/tlm_bridge/gem5_to_tlm.hh
+++ b/src/systemc/tlm_bridge/gem5_to_tlm.hh
@@ -145,7 +145,7 @@ class Gem5ToTlmBridge : public Gem5ToTlmBridgeBase
       public:
         BridgeResponsePort(const std::string &name_,
                         Gem5ToTlmBridge<BITWIDTH> &bridge_) :
-            ResponsePort(name_, nullptr), bridge(bridge_)
+            ResponsePort(name_), bridge(bridge_)
         {}
     };
 
diff --git a/src/systemc/tlm_bridge/tlm_to_gem5.hh b/src/systemc/tlm_bridge/tlm_to_gem5.hh
index ca5f681c9c..32c477e6f2 100644
--- a/src/systemc/tlm_bridge/tlm_to_gem5.hh
+++ b/src/systemc/tlm_bridge/tlm_to_gem5.hh
@@ -109,7 +109,7 @@ class TlmToGem5Bridge : public TlmToGem5BridgeBase
       public:
         BridgeRequestPort(const std::string &name_,
                          TlmToGem5Bridge<BITWIDTH> &bridge_) :
-            RequestPort(name_, nullptr), bridge(bridge_)
+            RequestPort(name_), bridge(bridge_)
         {}
     };
 

From a0f6f85ad14062c8f08a40a64ae0fb6dbd35d551 Mon Sep 17 00:00:00 2001
From: Gabriel Busnot <gabriel.busnot@arteris.com>
Date: Tue, 24 Jan 2023 10:02:17 +0000
Subject: [PATCH 167/492] sim: Suppress deleted operator= warn in
 Sys::Threads::const_it

Swapping the reference member to threads for a pointer restores
trivial copiablity and movability.

Change-Id: I18d3a5b908d8575aef198f457b85060aa202bd5f
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67454
Reviewed-by: Giacomo Travaglini <giacomo.travaglini@arm.com>
Maintainer: Giacomo Travaglini <giacomo.travaglini@arm.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/sim/system.hh | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/src/sim/system.hh b/src/sim/system.hh
index 1d179e962a..d2725c32a9 100644
--- a/src/sim/system.hh
+++ b/src/sim/system.hh
@@ -152,19 +152,16 @@ class System : public SimObject, public PCEventScope
         class const_iterator
         {
           private:
-            const Threads &threads;
+            Threads const* threads;
             int pos;
 
             friend class Threads;
 
             const_iterator(const Threads &_threads, int _pos) :
-                threads(_threads), pos(_pos)
+                threads(&_threads), pos(_pos)
             {}
 
           public:
-            const_iterator(const const_iterator &) = default;
-            const_iterator &operator = (const const_iterator &) = default;
-
             using iterator_category = std::forward_iterator_tag;
             using value_type = ThreadContext *;
             using difference_type = int;
@@ -181,16 +178,16 @@ class System : public SimObject, public PCEventScope
             const_iterator
             operator ++ (int)
             {
-                return const_iterator(threads, pos++);
+                return const_iterator(*threads, pos++);
             }
 
-            reference operator * () { return threads.thread(pos).context; }
-            pointer operator -> () { return &threads.thread(pos).context; }
+            reference operator * () { return threads->thread(pos).context; }
+            pointer operator -> () { return &threads->thread(pos).context; }
 
             bool
             operator == (const const_iterator &other) const
             {
-                return &threads == &other.threads && pos == other.pos;
+                return threads == other.threads && pos == other.pos;
             }
 
             bool

From cd2f8b3e6f091857b11f663fec7a0515107377d7 Mon Sep 17 00:00:00 2001
From: Gabriel Busnot <gabriel.busnot@arteris.com>
Date: Fri, 20 Jan 2023 09:37:30 +0000
Subject: [PATCH 168/492] base: Enable non-copiable types in gem5_assert
 message formatting

Previous implementation was taking string formatting arguments by value,
which requires copiability or movability. Took the oportunity to scope
the helper functions inside the macro using lambdas.

Change-Id: I2cefc18df1e99b70e60e64588df61eb72a3e5166
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67335
Tested-by: kokoro <noreply+kokoro@google.com>
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
---
 src/base/logging.hh      | 33 ++++++++++++---------------------
 src/base/logging.test.cc |  3 +++
 2 files changed, 15 insertions(+), 21 deletions(-)

diff --git a/src/base/logging.hh b/src/base/logging.hh
index 8949b0cced..22fd2a84d2 100644
--- a/src/base/logging.hh
+++ b/src/base/logging.hh
@@ -43,7 +43,6 @@
 
 #include <cassert>
 #include <sstream>
-#include <tuple>
 #include <utility>
 
 #include "base/compiler.hh"
@@ -289,24 +288,10 @@ class Logger
 #define NDEBUG_DEFINED 0
 #endif
 
-template <typename ...Args>
-inline std::string
-_assertMsg(const std::string &format, Args... args)
-{
-    return std::string(": ") + csprintf(format, args...);
-}
-
-inline const char *
-_assertMsg()
-{
-    return "";
-}
-
 /**
  * The assert macro will function like a normal assert, but will use panic
  * instead of straight abort(). This allows to perform some cleaning up in
- * ExitLogger::exit() before calling abort(). This macro will not check its
- * condition in fast builds, but it must still be valid code.
+ * ExitLogger::exit() before calling abort().
  *
  * @param cond Condition that is checked; if false -> panic
  * @param ...  Printf-based format string with arguments, extends printout.
@@ -315,11 +300,17 @@ _assertMsg()
  *
  * @ingroup api_logger
  */
-#define gem5_assert(cond, ...) \
-    do { \
-        if (GEM5_UNLIKELY(!NDEBUG_DEFINED && !static_cast<bool>(cond))) { \
-            panic("assert(" #cond ") failed%s", _assertMsg(__VA_ARGS__)); \
-        } \
+#define gem5_assert(cond, ...)                                      \
+    do {                                                            \
+        GEM5_UNLIKELY(NDEBUG_DEFINED || static_cast<bool>(cond)) ?  \
+        void(0) :                                                   \
+        [](const auto&... args) {                                   \
+            auto msg = [&]{                                         \
+                if constexpr (sizeof...(args) == 0) return "";      \
+                else return std::string(": ") + csprintf(args...);  \
+            };                                                      \
+            panic("assert(" #cond ") failed%s", msg());             \
+        }(__VA_ARGS__);                                             \
     } while (0)
 /** @} */ // end of api_logger
 
diff --git a/src/base/logging.test.cc b/src/base/logging.test.cc
index 38cc6059db..5d10f6e33a 100644
--- a/src/base/logging.test.cc
+++ b/src/base/logging.test.cc
@@ -553,6 +553,9 @@ TEST(LoggingDeathTest, gem5Assert)
     gem5_assert(true, "message\n");
     ASSERT_DEATH(gem5_assert(false, "message\n"), ::testing::HasSubstr(
         "panic: assert(false) failed: message\nMemory Usage:"));
+    ASSERT_DEATH(gem5_assert(false, "%s, %s!\n", "Hello", "World"),
+        ::testing::HasSubstr(
+        "panic: assert(false) failed: Hello, World!\nMemory Usage:"));
     gem5_assert(true);
     ASSERT_DEATH(gem5_assert(false), ::testing::HasSubstr(
         "panic: assert(false) failed\nMemory Usage:"));

From d7cb6ac2b1b309095820738f5fca3e84a3868b21 Mon Sep 17 00:00:00 2001
From: Gabriel Busnot <gabriel.busnot@arteris.com>
Date: Fri, 20 Jan 2023 13:01:46 +0000
Subject: [PATCH 169/492] base: Turn all logging.hh macros into expression kind

In the previous version, the body of several macros was a statement
(do{...} while(0);) and not an expression. In the new version, all
macros are expressions. Expressions can be used everywhere a statement
is expected and in other locations as well.

For instance, expressions can be used with the comma operator. When
doing generic programming, the comma operator helps manipulating
parameter packs.  With a statement-based implementation,
(gem5_assert(args > 0), ...) could not be written while perfectly
sound.

Also, (c1 ?  a : c2 ?  b : (gem5_assert(c3), c)) is a usefull
expression to assert completeness of cascaded conditions that cannot
be easily and efficiently achieved without an expression kind of
assertion.

Change-Id: Ia0efeb15e6deda6b90529a6f0e00ebe2e9b5d2a0
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67336
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
---
 src/base/logging.hh | 125 +++++++++++++++++++++++++-------------------
 1 file changed, 70 insertions(+), 55 deletions(-)

diff --git a/src/base/logging.hh b/src/base/logging.hh
index 22fd2a84d2..f66423f652 100644
--- a/src/base/logging.hh
+++ b/src/base/logging.hh
@@ -138,9 +138,10 @@ class Logger
     const char *prefix;
 };
 
-
-#define base_message(logger, ...) \
-    logger.print(::gem5::Logger::Loc(__FILE__, __LINE__), __VA_ARGS__)
+#define base_message(logger, ...)                                       \
+    [&log = logger](const auto&... args) {                              \
+        log.print(::gem5::Logger::Loc(__FILE__, __LINE__), args...);    \
+    }(__VA_ARGS__)
 
 /*
  * Only print the message the first time this expression is
@@ -150,19 +151,29 @@ class Logger
  * would have resulted in a different message thoes messages would be
  * supressed.
  */
-#define base_message_once(...) do {                     \
-        static bool once = false;                       \
-        if (!once) {                                    \
-            base_message(__VA_ARGS__);                  \
-            once = true;                                \
-        }                                               \
-    } while (0)
+#define base_message_once(logger, ...)          \
+    [&log = logger](const auto&... args) {      \
+        static bool once{false};                \
+        if (GEM5_UNLIKELY(!once)) {             \
+            once = true;                        \
+            base_message(log, args...);         \
+        }                                       \
+    }(__VA_ARGS__)
 
-#define exit_message(logger, ...)                       \
-    do {                                                \
-        base_message(logger, __VA_ARGS__);              \
-        logger.exit_helper();                           \
-    } while (0)
+/*
+ * logger.exit_helper() can't be called inside the lambda for now as the
+ * lambda's operator() can't be [[noreturn]]. As a result, exit_message and it'
+ * s derivative cannot be used in functions without also specifying a return
+ * value, which is inconvenient if not impossible.
+ */
+
+#define exit_message(logger, ...)               \
+    (                                           \
+        [&log = logger](const auto&... args) {  \
+            base_message(log, args...);         \
+        }(__VA_ARGS__),                         \
+        logger.exit_helper()                    \
+    )
 
 /**
  * This implements a cprintf based panic() function.  panic() should
@@ -200,13 +211,13 @@ class Logger
  *
  * @ingroup api_logger
  */
-#define panic_if(cond, ...)                                  \
-    do {                                                     \
-        if (GEM5_UNLIKELY(cond)) {                             \
-            panic("panic condition " # cond " occurred: %s", \
-                  ::gem5::csprintf(__VA_ARGS__));                    \
-        }                                                    \
-    } while (0)
+#define panic_if(cond, ...)                             \
+    (                                                   \
+    GEM5_UNLIKELY(static_cast<bool>(cond)) ?            \
+    panic("panic condition " # cond " occurred: %s",    \
+        ::gem5::csprintf(__VA_ARGS__)) :                \
+    void(0)                                             \
+    )
 
 
 /**
@@ -222,13 +233,13 @@ class Logger
  *
  * @ingroup api_logger
  */
-#define fatal_if(cond, ...)                                     \
-    do {                                                        \
-        if (GEM5_UNLIKELY(cond)) {                                \
-            fatal("fatal condition " # cond " occurred: %s",    \
-                  ::gem5::csprintf(__VA_ARGS__));                       \
-        }                                                       \
-    } while (0)
+#define fatal_if(cond, ...)                             \
+    (                                                   \
+    GEM5_UNLIKELY(static_cast<bool>(cond)) ?            \
+    fatal("fatal condition " # cond " occurred: %s",    \
+        ::gem5::csprintf(__VA_ARGS__)) :                \
+    void(0)                                             \
+    )
 
 
 /**
@@ -269,17 +280,20 @@ class Logger
  * @ingroup api_logger
  * @{
  */
-#define warn_if(cond, ...) \
-    do { \
-        if (GEM5_UNLIKELY(cond)) \
-            warn(__VA_ARGS__); \
-    } while (0)
+#define warn_if(cond, ...)      \
+    (                           \
+    static_cast<bool>(cond) ?   \
+    warn(__VA_ARGS__) :         \
+    void(0)                     \
+    )
 
 #define warn_if_once(cond, ...) \
-    do { \
-        if (GEM5_UNLIKELY(cond)) \
-            warn_once(__VA_ARGS__); \
-    } while (0)
+    (                           \
+    static_cast<bool>(cond) ?   \
+    warn_once(__VA_ARGS__) :    \
+    void(0)                     \
+    )
+
 /** @} */ // end of api_logger
 
 #ifdef NDEBUG
@@ -300,25 +314,26 @@ class Logger
  *
  * @ingroup api_logger
  */
-#define gem5_assert(cond, ...)                                      \
-    do {                                                            \
-        GEM5_UNLIKELY(NDEBUG_DEFINED || static_cast<bool>(cond)) ?  \
-        void(0) :                                                   \
-        [](const auto&... args) {                                   \
-            auto msg = [&]{                                         \
-                if constexpr (sizeof...(args) == 0) return "";      \
-                else return std::string(": ") + csprintf(args...);  \
-            };                                                      \
-            panic("assert(" #cond ") failed%s", msg());             \
-        }(__VA_ARGS__);                                             \
-    } while (0)
+#define gem5_assert(cond, ...)                                  \
+    (                                                           \
+    GEM5_UNLIKELY(NDEBUG_DEFINED || static_cast<bool>(cond)) ?  \
+    void(0) :                                                   \
+    [](const auto&... args) {                                   \
+        auto msg = [&]{                                         \
+            if constexpr (sizeof...(args) == 0) return "";      \
+            else return std::string(": ") + csprintf(args...);  \
+        };                                                      \
+        panic("assert(" #cond ") failed%s", msg());             \
+    }(__VA_ARGS__)                                              \
+    )
+
 /** @} */ // end of api_logger
 
-#define chatty_assert(...) \
-    do { \
-        gem5_assert(__VA_ARGS__); \
-        GEM5_DEPRECATED_MACRO(chatty_assert, {}, "Please use gem5_assert()"); \
-    } while(0)
+#define chatty_assert(...)                                                   \
+    (                                                                        \
+        gem5_assert(args...),                                                \
+        GEM5_DEPRECATED_MACRO(chatty_assert, {}, "Please use gem5_assert()") \
+    )
 
 } // namespace gem5
 #endif // __BASE_LOGGING_HH__

From de3dba971c0071540ffac24dd1aa5ffa6baa696b Mon Sep 17 00:00:00 2001
From: Gabe Black <gabeblack@google.com>
Date: Thu, 2 Feb 2023 03:26:42 -0800
Subject: [PATCH 170/492] arch-riscv: Get rid of redundant reset fault
 invocation.

It was added in one change, another pending change which also added it
was rebased on top of it, and the redundant addition was left in when
the second change was submitted.

Change-Id: I3faf53bca983d8568af45ec7174c2a064eadc0a6
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67571
Maintainer: Gabe Black <gabeblack@google.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Roger Chang <rogerycchang@google.com>
---
 src/arch/riscv/bare_metal/fs_workload.cc | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/src/arch/riscv/bare_metal/fs_workload.cc b/src/arch/riscv/bare_metal/fs_workload.cc
index 83f541157a..4f7adb31b6 100644
--- a/src/arch/riscv/bare_metal/fs_workload.cc
+++ b/src/arch/riscv/bare_metal/fs_workload.cc
@@ -59,11 +59,6 @@ BareMetal::initState()
 {
     Workload::initState();
 
-    for (auto *tc: system->threads) {
-        RiscvISA::Reset().invoke(tc);
-        tc->activate();
-    }
-
     warn_if(!bootloader->buildImage().write(system->physProxy),
             "Could not load sections to memory.");
 

From c853187273bb88118704b1af53cfc3b6e1ea0d29 Mon Sep 17 00:00:00 2001
From: Gabe Black <gabeblack@google.com>
Date: Thu, 2 Feb 2023 04:50:19 -0800
Subject: [PATCH 171/492] arch: Add a virtual method to the BaseISA to reset
 its ThreadContext.

This will be used as part of a generic CPU reset mechanism.

Change-Id: I010f6bdaca0cbb6be1799ccdc345c4828515209d
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67572
Reviewed-by: Giacomo Travaglini <giacomo.travaglini@arm.com>
Maintainer: Gabe Black <gabeblack@google.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/arch/generic/isa.hh | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/arch/generic/isa.hh b/src/arch/generic/isa.hh
index e4e7929385..e9e4d95d7b 100644
--- a/src/arch/generic/isa.hh
+++ b/src/arch/generic/isa.hh
@@ -43,6 +43,7 @@
 #include <vector>
 
 #include "arch/generic/pcstate.hh"
+#include "base/logging.hh"
 #include "cpu/reg_class.hh"
 #include "mem/packet.hh"
 #include "mem/request.hh"
@@ -83,6 +84,8 @@ class BaseISA : public SimObject
     virtual bool inUserMode() const = 0;
     virtual void copyRegsFrom(ThreadContext *src) = 0;
 
+    virtual void resetThread() { panic("Thread reset not implemented."); }
+
     const RegClasses &regClasses() const { return _regClasses; }
 
     // Locked memory handling functions.

From c9719b44a3ce69b65012ce180142c6014127f718 Mon Sep 17 00:00:00 2001
From: Gabe Black <gabeblack@google.com>
Date: Thu, 2 Feb 2023 04:53:50 -0800
Subject: [PATCH 172/492] arch-riscv: Implement the resetThread method on the
 ISA object.

This method invokes a Reset fault on the associated ThreadContext.

Change-Id: Ie0725b06e0b506640b9038a986a9c56d9eed7011
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67573
Maintainer: Gabe Black <gabeblack@google.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Roger Chang <rogerycchang@google.com>
---
 src/arch/riscv/isa.cc | 7 +++++++
 src/arch/riscv/isa.hh | 2 ++
 2 files changed, 9 insertions(+)

diff --git a/src/arch/riscv/isa.cc b/src/arch/riscv/isa.cc
index 6e4c380d98..3809c61d63 100644
--- a/src/arch/riscv/isa.cc
+++ b/src/arch/riscv/isa.cc
@@ -34,6 +34,7 @@
 #include <set>
 #include <sstream>
 
+#include "arch/riscv/faults.hh"
 #include "arch/riscv/interrupts.hh"
 #include "arch/riscv/mmu.hh"
 #include "arch/riscv/pagetable.hh"
@@ -723,6 +724,12 @@ ISA::globalClearExclusive()
     tc->getCpuPtr()->wakeup(tc->threadId());
 }
 
+void
+ISA::resetThread()
+{
+    Reset().invoke(tc);
+}
+
 } // namespace RiscvISA
 } // namespace gem5
 
diff --git a/src/arch/riscv/isa.hh b/src/arch/riscv/isa.hh
index e332956972..5a2a610479 100644
--- a/src/arch/riscv/isa.hh
+++ b/src/arch/riscv/isa.hh
@@ -127,6 +127,8 @@ class ISA : public BaseISA
 
     void globalClearExclusive() override;
 
+    void resetThread() override;
+
     RiscvType rvType() const { return rv_type; }
 };
 

From a2d321d47591615aca2a54ce0b770f8ea4888ba1 Mon Sep 17 00:00:00 2001
From: Wei-Han Chen <weihanchen@google.com>
Date: Tue, 17 Jan 2023 02:52:52 +0000
Subject: [PATCH 173/492] fastmodel: change the constructor of bridges

This CL changes the construction of bridges between amba and tlm. This
enables us to add parameters when using this bridge.

Change-Id: I4bbbe8fb1c2573a796a3a0a7976adf3553bbaa86
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67297
Maintainer: Gabe Black <gabeblack@google.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Yu-hsin Wang <yuhsingw@google.com>
---
 src/arch/arm/fastmodel/amba_from_tlm_bridge.cc | 11 +++--------
 src/arch/arm/fastmodel/amba_from_tlm_bridge.hh |  7 ++++++-
 src/arch/arm/fastmodel/amba_to_tlm_bridge.cc   | 10 ++--------
 src/arch/arm/fastmodel/amba_to_tlm_bridge.hh   |  7 ++++++-
 4 files changed, 17 insertions(+), 18 deletions(-)

diff --git a/src/arch/arm/fastmodel/amba_from_tlm_bridge.cc b/src/arch/arm/fastmodel/amba_from_tlm_bridge.cc
index 4baf0ef7aa..8db0d6af63 100644
--- a/src/arch/arm/fastmodel/amba_from_tlm_bridge.cc
+++ b/src/arch/arm/fastmodel/amba_from_tlm_bridge.cc
@@ -37,7 +37,9 @@ namespace gem5
 namespace fastmodel
 {
 
-AmbaFromTlmBridge64::AmbaFromTlmBridge64(const sc_core::sc_module_name& name) :
+AmbaFromTlmBridge64::AmbaFromTlmBridge64(
+    const AmbaFromTlmBridge64Params &params,
+    const sc_core::sc_module_name& name) :
     amba_pv::amba_pv_from_tlm_bridge<64>(name),
     targetProxy("target_proxy"),
     initiatorProxy("initiator_proxy"),
@@ -116,11 +118,4 @@ AmbaFromTlmBridge64::syncControlExtension(amba_pv::amba_pv_transaction &trans)
 }
 
 } // namespace fastmodel
-
-fastmodel::AmbaFromTlmBridge64 *
-AmbaFromTlmBridge64Params::create() const
-{
-    return new fastmodel::AmbaFromTlmBridge64(name.c_str());
-}
-
 } // namespace gem5
diff --git a/src/arch/arm/fastmodel/amba_from_tlm_bridge.hh b/src/arch/arm/fastmodel/amba_from_tlm_bridge.hh
index 8ea8b8a731..11f7b5d400 100644
--- a/src/arch/arm/fastmodel/amba_from_tlm_bridge.hh
+++ b/src/arch/arm/fastmodel/amba_from_tlm_bridge.hh
@@ -33,6 +33,7 @@
 #include "amba_pv.h"
 #pragma GCC diagnostic pop
 #include "arch/arm/fastmodel/amba_ports.hh"
+#include "params/AmbaFromTlmBridge64.hh"
 #include "systemc/tlm_port_wrapper.hh"
 
 namespace gem5
@@ -46,7 +47,11 @@ namespace fastmodel
 class AmbaFromTlmBridge64 : public amba_pv::amba_pv_from_tlm_bridge<64>
 {
   public:
-    AmbaFromTlmBridge64(const sc_core::sc_module_name &name);
+    AmbaFromTlmBridge64(const AmbaFromTlmBridge64Params &params,
+                        const sc_core::sc_module_name &name);
+    AmbaFromTlmBridge64(const AmbaFromTlmBridge64Params &params) :
+      AmbaFromTlmBridge64(params, params.name.c_str())
+    {}
 
     gem5::Port &gem5_getPort(const std::string &if_name, int idx=-1) override;
 
diff --git a/src/arch/arm/fastmodel/amba_to_tlm_bridge.cc b/src/arch/arm/fastmodel/amba_to_tlm_bridge.cc
index 58f6eeab6b..888e077386 100644
--- a/src/arch/arm/fastmodel/amba_to_tlm_bridge.cc
+++ b/src/arch/arm/fastmodel/amba_to_tlm_bridge.cc
@@ -71,7 +71,8 @@ struct FarAtomicOpFunctor : public AtomicOpFunctor
 namespace fastmodel
 {
 
-AmbaToTlmBridge64::AmbaToTlmBridge64(const sc_core::sc_module_name& name) :
+AmbaToTlmBridge64::AmbaToTlmBridge64(const AmbaToTlmBridge64Params &params,
+                                     const sc_core::sc_module_name& name) :
     amba_pv::amba_pv_to_tlm_bridge<64>(name),
     targetProxy("target_proxy"),
     initiatorProxy("initiator_proxy"),
@@ -198,11 +199,4 @@ AmbaToTlmBridge64::setupControlExtension(amba_pv::amba_pv_transaction &trans)
 }
 
 } // namespace fastmodel
-
-fastmodel::AmbaToTlmBridge64 *
-AmbaToTlmBridge64Params::create() const
-{
-    return new fastmodel::AmbaToTlmBridge64(name.c_str());
-}
-
 } // namespace gem5
diff --git a/src/arch/arm/fastmodel/amba_to_tlm_bridge.hh b/src/arch/arm/fastmodel/amba_to_tlm_bridge.hh
index addaac67f9..176d31fbf1 100644
--- a/src/arch/arm/fastmodel/amba_to_tlm_bridge.hh
+++ b/src/arch/arm/fastmodel/amba_to_tlm_bridge.hh
@@ -33,6 +33,7 @@
 #include "amba_pv.h"
 #pragma GCC diagnostic pop
 #include "arch/arm/fastmodel/amba_ports.hh"
+#include "params/AmbaToTlmBridge64.hh"
 #include "systemc/tlm_port_wrapper.hh"
 
 namespace gem5
@@ -46,7 +47,11 @@ namespace fastmodel
 class AmbaToTlmBridge64 : public amba_pv::amba_pv_to_tlm_bridge<64>
 {
   public:
-    AmbaToTlmBridge64(const sc_core::sc_module_name &name);
+    AmbaToTlmBridge64(const AmbaToTlmBridge64Params &params,
+                      const sc_core::sc_module_name &name);
+    AmbaToTlmBridge64(const AmbaToTlmBridge64Params &params) :
+      AmbaToTlmBridge64(params, params.name.c_str())
+    {}
 
     gem5::Port &gem5_getPort(const std::string &if_name, int idx=-1) override;
 

From 59e16b5695e3af14324916d98597b6574819dd1f Mon Sep 17 00:00:00 2001
From: Wei-Han Chen <weihanchen@google.com>
Date: Fri, 3 Feb 2023 03:21:27 +0000
Subject: [PATCH 174/492] fastmodel: forward stream ID to gem5

This CL enables forwarding stream ID from amba_pv to gem5 world.

The stream ID information is originally stored in master_id of
pv::TransactionAtrribute, then it will be stored to m_id of
amba_pv::amba_pv_extension.

This CL brings the information to stream ID field of
Gem5SystemC::ControlExtension. Then the information can be set to stream
ID of the gem5 packet's request.

After bringing the information to gem5, we can identify the packet's
stream ID from gem5 side. One example usage is PL330. In PL330_DMAC, each
transaction is associated with a stream ID. If we can identitfy the
stream ID, we can, for example, set attribute to specific DMAC channel.

Change-Id: I943ce49fde57b0bcfc18b58c7566eec61cc676f4
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67591
Reviewed-by: Yu-hsin Wang <yuhsingw@google.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Maintainer: Gabe Black <gabeblack@google.com>
---
 src/arch/arm/fastmodel/FastModel.py          | 4 ++++
 src/arch/arm/fastmodel/amba_to_tlm_bridge.cc | 7 ++++++-
 src/arch/arm/fastmodel/amba_to_tlm_bridge.hh | 1 +
 3 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/src/arch/arm/fastmodel/FastModel.py b/src/arch/arm/fastmodel/FastModel.py
index 8a28522db4..1ea3c5e8d7 100644
--- a/src/arch/arm/fastmodel/FastModel.py
+++ b/src/arch/arm/fastmodel/FastModel.py
@@ -108,6 +108,10 @@ class AmbaToTlmBridge64(SystemC_ScModule):
     amba = AmbaTargetSocket(64, "AMBA PV target socket")
     tlm = TlmInitiatorSocket(64, "TLM initiator socket")
 
+    set_stream_id = Param.Bool(
+        False, "Set this true to forward stream ID to gem5 world"
+    )
+
 
 class AmbaFromTlmBridge64(SystemC_ScModule):
     type = "AmbaFromTlmBridge64"
diff --git a/src/arch/arm/fastmodel/amba_to_tlm_bridge.cc b/src/arch/arm/fastmodel/amba_to_tlm_bridge.cc
index 888e077386..2f065fcdfe 100644
--- a/src/arch/arm/fastmodel/amba_to_tlm_bridge.cc
+++ b/src/arch/arm/fastmodel/amba_to_tlm_bridge.cc
@@ -77,7 +77,8 @@ AmbaToTlmBridge64::AmbaToTlmBridge64(const AmbaToTlmBridge64Params &params,
     targetProxy("target_proxy"),
     initiatorProxy("initiator_proxy"),
     tlmWrapper(initiatorProxy, std::string(name) + ".tlm", -1),
-    ambaWrapper(amba_pv_s, std::string(name) + ".amba", -1)
+    ambaWrapper(amba_pv_s, std::string(name) + ".amba", -1),
+    setStreamId(params.set_stream_id)
 {
     targetProxy.register_b_transport(this, &AmbaToTlmBridge64::bTransport);
     targetProxy.register_get_direct_mem_ptr(
@@ -191,6 +192,10 @@ AmbaToTlmBridge64::setupControlExtension(amba_pv::amba_pv_transaction &trans)
     control_ex->setSecure(!amba_ex->is_non_secure());
     control_ex->setInstruction(amba_ex->is_instruction());
 
+    if (setStreamId) {
+        control_ex->setStreamId(amba_ex->get_id());
+    }
+
     if (trans.has_mm()) {
         trans.set_auto_extension(control_ex);
     } else {
diff --git a/src/arch/arm/fastmodel/amba_to_tlm_bridge.hh b/src/arch/arm/fastmodel/amba_to_tlm_bridge.hh
index 176d31fbf1..6729604f90 100644
--- a/src/arch/arm/fastmodel/amba_to_tlm_bridge.hh
+++ b/src/arch/arm/fastmodel/amba_to_tlm_bridge.hh
@@ -71,6 +71,7 @@ class AmbaToTlmBridge64 : public amba_pv::amba_pv_to_tlm_bridge<64>
         AmbaToTlmBridge64, 64, tlm::tlm_base_protocol_types> initiatorProxy;
     sc_gem5::TlmInitiatorWrapper<64> tlmWrapper;
     AmbaTarget ambaWrapper;
+    bool setStreamId;
 };
 
 } // namespace fastmodel

From a513e06a1baf762d65d1d8c6dc0297542460e8f6 Mon Sep 17 00:00:00 2001
From: Gabe Black <gabeblack@google.com>
Date: Thu, 2 Feb 2023 07:03:47 -0800
Subject: [PATCH 175/492] fastmodel: Export the reset signals of the GIC.

These are the "reset" and "po_reset" lines. It seems reasonable that
these are the normal reset and the power on reset signals, but that's
not spelled out in the fast model "lisa" file, nor does it explain
exactly what the difference is between them.

Change-Id: I686b4d973fc3cfff8a3ec05f8c95ee2cb6ff6698
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67575
Reviewed-by: Jui-min Lee <fcrh@google.com>
Maintainer: Gabe Black <gabeblack@google.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/arch/arm/fastmodel/GIC/FastModelGIC.py |  4 ++++
 src/arch/arm/fastmodel/GIC/GIC.lisa        |  7 +++++++
 src/arch/arm/fastmodel/GIC/gic.cc          | 21 ++++++++++++++++++++-
 src/arch/arm/fastmodel/GIC/gic.hh          |  5 +++++
 4 files changed, 36 insertions(+), 1 deletion(-)

diff --git a/src/arch/arm/fastmodel/GIC/FastModelGIC.py b/src/arch/arm/fastmodel/GIC/FastModelGIC.py
index ce0a8c5fb4..b1a9a3c8a1 100644
--- a/src/arch/arm/fastmodel/GIC/FastModelGIC.py
+++ b/src/arch/arm/fastmodel/GIC/FastModelGIC.py
@@ -42,6 +42,7 @@ from m5.SimObject import SimObject
 from m5.objects.FastModel import AmbaInitiatorSocket, AmbaTargetSocket
 from m5.objects.Gic import BaseGic
 from m5.objects.IntPin import VectorIntSourcePin
+from m5.objects.ResetPort import ResetResponsePort
 from m5.objects.SystemC import SystemC_ScModule
 
 GICV3_COMMS_TARGET_ROLE = "GICV3 COMMS TARGET"
@@ -850,6 +851,9 @@ class FastModelGIC(BaseGic):
 
     wake_request = VectorIntSourcePin("GIC wake request initiator")
 
+    reset = ResetResponsePort("Reset")
+    po_reset = ResetResponsePort("Power on reset")
+
     # Used for DTB autogeneration
     _state = FdtState(addr_cells=2, size_cells=2, interrupt_cells=3)
 
diff --git a/src/arch/arm/fastmodel/GIC/GIC.lisa b/src/arch/arm/fastmodel/GIC/GIC.lisa
index 34b09c8366..5443b55e06 100644
--- a/src/arch/arm/fastmodel/GIC/GIC.lisa
+++ b/src/arch/arm/fastmodel/GIC/GIC.lisa
@@ -56,6 +56,10 @@ component GIC
         // Outgoing wake requests.
         gic.wake_request => self.wake_request;
 
+        // Reset signals.
+        self.normal_reset => gic.reset;
+        self.po_reset => gic.po_reset;
+
         // Internal ports for PPI and SPI programmatic access.
         self.ppi_0 => gic.ppi_in_0;
         self.ppi_1 => gic.ppi_in_1;
@@ -405,6 +409,9 @@ component GIC
         }
     }
 
+    slave port<Signal> normal_reset;
+    slave port<Signal> po_reset;
+
     internal slave port<Signal> spi[988];
 
     internal slave port<Signal> ppi_0[16];
diff --git a/src/arch/arm/fastmodel/GIC/gic.cc b/src/arch/arm/fastmodel/GIC/gic.cc
index 493aa81fcd..5f01cfb8bb 100644
--- a/src/arch/arm/fastmodel/GIC/gic.cc
+++ b/src/arch/arm/fastmodel/GIC/gic.cc
@@ -72,10 +72,15 @@ SCGIC::Terminator::sendTowardsCPU(uint8_t len, const uint8_t *data)
 
 SCGIC::SCGIC(const SCFastModelGICParams &params,
              sc_core::sc_module_name _name)
-    : scx_evs_GIC(_name), _params(params)
+    : scx_evs_GIC(_name), _params(params),
+      resetPort(params.name + ".reset", 0),
+      poResetPort(params.name + ".po_reset", 0)
 {
     signalInterrupt.bind(signal_interrupt);
 
+    resetPort.signal_out.bind(scx_evs_GIC::normal_reset);
+    poResetPort.signal_out.bind(scx_evs_GIC::po_reset);
+
     for (int i = 0; i < wake_request.size(); i++) {
         wakeRequests.emplace_back(
             new SignalReceiver(csprintf("%s.wakerequest[%d]", name(), i)));
@@ -298,6 +303,18 @@ SCGIC::SCGIC(const SCFastModelGICParams &params,
     set_parameter("gic.consolidators", params.consolidators);
 }
 
+Port &
+SCGIC::gem5_getPort(const std::string &if_name, int idx)
+{
+    if (if_name == "reset") {
+        return resetPort;
+    } else if (if_name == "po_reset") {
+        return poResetPort;
+    } else {
+        return scx_evs_GIC::gem5_getPort(if_name, idx);
+    }
+}
+
 void
 SCGIC::before_end_of_elaboration()
 {
@@ -341,6 +358,8 @@ GIC::getPort(const std::string &if_name, PortID idx)
         return *ptr;
     } else if (if_name == "wake_request") {
         return *wakeRequestPorts.at(idx);
+    } else if (if_name == "reset" || if_name == "po_reset") {
+        return scGIC->gem5_getPort(if_name, idx);
     } else {
         return BaseGic::getPort(if_name, idx);
     }
diff --git a/src/arch/arm/fastmodel/GIC/gic.hh b/src/arch/arm/fastmodel/GIC/gic.hh
index 0e502fc633..070fe3bb77 100644
--- a/src/arch/arm/fastmodel/GIC/gic.hh
+++ b/src/arch/arm/fastmodel/GIC/gic.hh
@@ -37,6 +37,7 @@
 
 #include "arch/arm/fastmodel/amba_ports.hh"
 #include "arch/arm/fastmodel/common/signal_receiver.hh"
+#include "arch/arm/fastmodel/common/signal_sender.hh"
 #include "dev/arm/base_gic.hh"
 #include "dev/intpin.hh"
 #include "params/FastModelGIC.hh"
@@ -91,9 +92,13 @@ class SCGIC : public scx_evs_GIC
     SCGIC(const SCFastModelGICParams &p) : SCGIC(p, p.name.c_str()) {}
     SCGIC(const SCFastModelGICParams &params, sc_core::sc_module_name _name);
 
+    Port &gem5_getPort(const std::string &if_name, int idx) override;
+
     SignalInterruptInitiatorSocket signalInterrupt;
 
     std::vector<std::unique_ptr<SignalReceiver>> wakeRequests;
+    SignalSender resetPort;
+    SignalSender poResetPort;
 
     void before_end_of_elaboration() override;
 

From 8a774e07b259c2d0e5507d39a1c234727c9de5ce Mon Sep 17 00:00:00 2001
From: Gabriel Busnot <gabriel.busnot@arteris.com>
Date: Mon, 6 Feb 2023 12:51:22 +0000
Subject: [PATCH 176/492] dev-amdgpu: Patch forgotten port after mem port owner
 deprecation

Change-Id: I82f88b8962d9f04521e549ca1383c42f2b5b3ffc
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67631
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
---
 src/mem/ruby/system/GPUCoalescer.cc | 2 +-
 src/mem/ruby/system/GPUCoalescer.hh | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/mem/ruby/system/GPUCoalescer.cc b/src/mem/ruby/system/GPUCoalescer.cc
index a0808faa2b..8bde3f7bc8 100644
--- a/src/mem/ruby/system/GPUCoalescer.cc
+++ b/src/mem/ruby/system/GPUCoalescer.cc
@@ -190,7 +190,7 @@ GPUCoalescer::GPUCoalescer(const Params &p)
                  false, Event::Progress_Event_Pri),
       uncoalescedTable(this),
       deadlockCheckEvent([this]{ wakeup(); }, "GPUCoalescer deadlock check"),
-      gmTokenPort(name() + ".gmTokenPort", this)
+      gmTokenPort(name() + ".gmTokenPort")
 {
     m_store_waiting_on_load_cycles = 0;
     m_store_waiting_on_store_cycles = 0;
diff --git a/src/mem/ruby/system/GPUCoalescer.hh b/src/mem/ruby/system/GPUCoalescer.hh
index 1120947fd5..dd28855547 100644
--- a/src/mem/ruby/system/GPUCoalescer.hh
+++ b/src/mem/ruby/system/GPUCoalescer.hh
@@ -216,9 +216,9 @@ class GPUCoalescer : public RubyPort
     class GMTokenPort : public TokenResponsePort
     {
       public:
-        GMTokenPort(const std::string& name, ClockedObject *owner,
+        GMTokenPort(const std::string& name,
                     PortID id = InvalidPortID)
-            : TokenResponsePort(name, owner, id)
+            : TokenResponsePort(name, id)
         { }
         ~GMTokenPort() { }
 

From 7371e468225cfacc871e5b965f99b130a2fcc123 Mon Sep 17 00:00:00 2001
From: Earl Ou <shunhsingou@google.com>
Date: Wed, 1 Feb 2023 21:54:05 -0800
Subject: [PATCH 177/492] mem: use default backdoor behavior for thread_bridge

The original backdoor implementation is incorrect. We use simply
fallback to default (disable backdoor) as backdoor across threads is not
thread-safe in most of cases.

Change-Id: Ia39be0dda4f16917cc3565eb5b012270e6d7697a
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67531
Maintainer: Gabe Black <gabe.black@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Yu-hsin Wang <yuhsingw@google.com>
---
 src/mem/thread_bridge.cc | 14 --------------
 src/mem/thread_bridge.hh |  4 ----
 2 files changed, 18 deletions(-)

diff --git a/src/mem/thread_bridge.cc b/src/mem/thread_bridge.cc
index 0090e4217c..5af2a59de3 100644
--- a/src/mem/thread_bridge.cc
+++ b/src/mem/thread_bridge.cc
@@ -64,12 +64,6 @@ ThreadBridge::IncomingPort::recvRespRetry()
 
 // AtomicResponseProtocol
 Tick
-ThreadBridge::IncomingPort::recvAtomicBackdoor(PacketPtr pkt,
-                                               MemBackdoorPtr &backdoor)
-{
-    panic("ThreadBridge only supports atomic/functional access.");
-}
-Tick
 ThreadBridge::IncomingPort::recvAtomic(PacketPtr pkt)
 {
     EventQueue::ScopedMigration migrate(device_.eventQueue());
@@ -84,14 +78,6 @@ ThreadBridge::IncomingPort::recvFunctional(PacketPtr pkt)
     device_.out_port_.sendFunctional(pkt);
 }
 
-void
-ThreadBridge::IncomingPort::recvMemBackdoorReq(const MemBackdoorReq &req,
-                                               MemBackdoorPtr &backdoor)
-{
-    EventQueue::ScopedMigration migrate(device_.eventQueue());
-    device_.out_port_.sendMemBackdoorReq(req, backdoor);
-}
-
 ThreadBridge::OutgoingPort::OutgoingPort(const std::string &name,
                                          ThreadBridge &device)
     : RequestPort(name), device_(device)
diff --git a/src/mem/thread_bridge.hh b/src/mem/thread_bridge.hh
index 92cb078dd1..8a253fdd55 100644
--- a/src/mem/thread_bridge.hh
+++ b/src/mem/thread_bridge.hh
@@ -55,14 +55,10 @@ class ThreadBridge : public SimObject
         void recvRespRetry() override;
 
         // AtomicResponseProtocol
-        Tick recvAtomicBackdoor(PacketPtr pkt,
-                                MemBackdoorPtr &backdoor) override;
         Tick recvAtomic(PacketPtr pkt) override;
 
         // FunctionalResponseProtocol
         void recvFunctional(PacketPtr pkt) override;
-        void recvMemBackdoorReq(const MemBackdoorReq &req,
-                                MemBackdoorPtr &backdoor) override;
 
       private:
         ThreadBridge &device_;

From e44cbe724b7c1746eea17e2e3b71b3806270ec7d Mon Sep 17 00:00:00 2001
From: Earl Ou <shunhsingou@google.com>
Date: Wed, 1 Feb 2023 21:55:51 -0800
Subject: [PATCH 178/492] sim: handle async events in main thread only

In the current implementation pollqueue is not thread safe. The design
of multi threads handle async events is thus causing issue in parallel
environment. Given the low rate of async events, it should be OK to only
handle them in the main thread to avoid unexpected racing issues.

Change-Id: Iddd512235e84e9d77f60985bb1771aa4cc693004
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67533
Reviewed-by: Gabe Black <gabe.black@gmail.com>
Reviewed-by: Yu-hsin Wang <yuhsingw@google.com>
Maintainer: Gabe Black <gabe.black@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/sim/simulate.cc | 28 ++++------------------------
 1 file changed, 4 insertions(+), 24 deletions(-)

diff --git a/src/sim/simulate.cc b/src/sim/simulate.cc
index 86d516d39a..abd2b1d391 100644
--- a/src/sim/simulate.cc
+++ b/src/sim/simulate.cc
@@ -43,7 +43,6 @@
 #include "sim/simulate.hh"
 
 #include <atomic>
-#include <mutex>
 #include <thread>
 
 #include "base/logging.hh"
@@ -273,28 +272,6 @@ terminateEventQueueThreads()
 }
 
 
-/**
- * Test and clear the global async_event flag, such that each time the
- * flag is cleared, only one thread returns true (and thus is assigned
- * to handle the corresponding async event(s)).
- */
-static bool
-testAndClearAsyncEvent()
-{
-    static std::mutex mutex;
-
-    bool was_set = false;
-    mutex.lock();
-
-    if (async_event) {
-        was_set = true;
-        async_event = false;
-    }
-
-    mutex.unlock();
-    return was_set;
-}
-
 /**
  * The main per-thread simulation loop. This loop is executed by all
  * simulation threads (the main thread and the subordinate threads) in
@@ -307,6 +284,8 @@ doSimLoop(EventQueue *eventq)
     curEventQueue(eventq);
     eventq->handleAsyncInsertions();
 
+    bool mainQueue = eventq == getEventQueue(0);
+
     while (1) {
         // there should always be at least one event (the SimLoopExitEvent
         // we just scheduled) in the queue
@@ -314,7 +293,8 @@ doSimLoop(EventQueue *eventq)
         assert(curTick() <= eventq->nextTick() &&
                "event scheduled in the past");
 
-        if (async_event && testAndClearAsyncEvent()) {
+        if (mainQueue && async_event) {
+            async_event = false;
             // Take the event queue lock in case any of the service
             // routines want to schedule new events.
             std::lock_guard<EventQueue> lock(*eventq);

From aee282b79f1efd556fb6fdda1165c0991f4457e6 Mon Sep 17 00:00:00 2001
From: Melissa Jost <melissakjost@gmail.com>
Date: Wed, 16 Nov 2022 15:26:33 -0800
Subject: [PATCH 179/492] tests: Update testing documentation

This edits the documentation regarding the usage of the --isa
tag, as this has fallen out of date in regards to the new
'ALL' isa.

Change-Id: I3b672ac2c03dd109bba458db688af05ed4135a91
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/65651
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
---
 TESTING.md            | 16 ++++++++++++----
 tests/gem5/fixture.py |  3 +--
 2 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/TESTING.md b/TESTING.md
index 2273e31ea7..146aeac8b1 100644
--- a/TESTING.md
+++ b/TESTING.md
@@ -86,10 +86,10 @@ For instance, if you want to run only with `gem5.opt`, you can use
 ./main.py run --variant opt
 ```
 
-Or, if you want to just run X86 tests with the `gem5.opt` binary:
+Or, if you want to just run quick tests with the `gem5.opt` binary:
 
 ```shell
-./main.py run --length quick --variant opt --isa X86
+./main.py run --length quick --variant opt
 ```
 
 
@@ -102,6 +102,14 @@ To view all of the available tags, use
 The output is split into tag *types* (e.g., isa, variant, length) and the
 tags for each type are listed after the type name.
 
+Note that when using the isa tag type, tests were traditionally sorted based
+on what compilation it required. However, as tests have switched to all be
+compiled under the ALL compilation, which includes all ISAs so one doesn't
+need to compile each one individually, using the isa tag for ISAs other than
+ALL has become a less optimal way of searching for tests.  It would instead
+be better to run subsets of tests based on their directories, as described
+above.
+
 You can specify "or" between tags within the same type by using the tag flag
 multiple times. For instance, to run everything that is tagged "opt" or "fast"
 use
@@ -112,10 +120,10 @@ use
 
 You can also specify "and" between different types of tags by specifying more
 than one type on the command line. For instance, this will only run tests with
-both the "X86" and "opt" tags.
+both the "ALL" and "opt" tags.
 
 ```shell
-./main.py run --isa X86 --variant opt
+./main.py run --isa All --variant opt
 ```
 
 ## Running tests in batch
diff --git a/tests/gem5/fixture.py b/tests/gem5/fixture.py
index c8bc79ff64..6f5dd616ab 100644
--- a/tests/gem5/fixture.py
+++ b/tests/gem5/fixture.py
@@ -174,8 +174,7 @@ class SConsFixture(UniqueFixture):
             )
             log.test_log.message("%s" % (", ".join(self.targets)))
             log.test_log.message(
-                "You may want to run with only a single ISA"
-                "(--isa=), use --skip-build, or use 'rerun'."
+                "You may want to use --skip-build, or use 'rerun'."
             )
 
         command.extend(self.targets)

From d1f76741c6df9a832b15b4c8a7e68d0397d0253c Mon Sep 17 00:00:00 2001
From: Gabe Black <gabeblack@google.com>
Date: Thu, 2 Feb 2023 07:50:12 -0800
Subject: [PATCH 180/492] dev: Add a definition for VectorResetResponsePort.

This is just a simple extension of the regular ResetResponsePort, and
is useful if there is a collection of reset pins on a device.

Change-Id: I6ccb21e949d3a51bf8b788ffd23e4b2b02706da9
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67576
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Yu-hsin Wang <yuhsingw@google.com>
Maintainer: Gabe Black <gabeblack@google.com>
---
 src/dev/ResetPort.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/src/dev/ResetPort.py b/src/dev/ResetPort.py
index f35bc117c0..15caa476ec 100644
--- a/src/dev/ResetPort.py
+++ b/src/dev/ResetPort.py
@@ -42,8 +42,15 @@ class ResetResponsePort(Port):
         super().__init__(RESET_RESPONSE_ROLE, desc)
 
 
-# VectorResetRequestPort presents a bank of artifact reset request
+# VectorResetRequestPort represents a bank of artifact reset request
 # ports.
 class VectorResetRequestPort(VectorPort):
     def __init__(self, desc):
         super().__init__(RESET_REQUEST_ROLE, desc, is_source=True)
+
+
+# VectorResetResponsePort represents a bank of artifact reset request
+# ports.
+class VectorResetResponsePort(VectorPort):
+    def __init__(self, desc):
+        super().__init__(RESET_RESPONSE_ROLE, desc)

From bd9e126d5e8c7d4e37833c57d96e078f7c1c273c Mon Sep 17 00:00:00 2001
From: Gabe Black <gabeblack@google.com>
Date: Thu, 2 Feb 2023 04:59:26 -0800
Subject: [PATCH 181/492] cpu: Add a generic model_reset port on the BaseCPU.

This port will stop execution on the CPU when raised. When lowered, it
will allow execution to reset the state of the CPU and allow execution
to resume. The state could theoretically be reset when the reset state
starts, but then it wouldn't reflect the most up to date condition of
the CPU when resuming. For instance, if a reset vector was set
somehow, that wouldn't be updated if it was changed while reset was
asserted. The tradeoff is that the state won't look like it will when
execution resumes while reset is held (to GDB for instance), but that
seems like a more obvious and less common sort of problem.

This signal is managed by the BaseCPU itself, but is backed by a
virtual method which can be overridden by other CPU types which may
not work the same way or have the same components. For instance, a
fast model CPU could toggle reset lines on the underlying model and
let it handle resetting all the state.

The fast models in particular already have a generic reset line with
the same name, but they have it at the level of the fast model which
may have multiple cores within it, each represented by a gem5 CPU.

It isn't implemented here, but there could be some sort of cooperation
between these signals where the reset at the core level is considered
an "or" of the cluster level reset and the individual core level
resets. At least in the A76 model, there are resets for each individual
core within the cluster as well, which the generic reset toggles.

Another option would be to get rid of the whole cluster reset pin, and
make the user gang the resets for each of the cores together to
whatever reset signal they're using. That's effectively what the
cluster level reset is doing, but within the C++ of the model wrapper
instead of in the python config.

Change-Id: Ie6b4769298ea224ec5dc88360cbb52ee8fbbf69c
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67574
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Roger Chang <rogerycchang@google.com>
Maintainer: Gabe Black <gabeblack@google.com>
Reviewed-by: Yu-hsin Wang <yuhsingw@google.com>
---
 src/cpu/BaseCPU.py |  3 +++
 src/cpu/base.cc    | 41 +++++++++++++++++++++++++++++++++++++++++
 src/cpu/base.hh    | 16 ++++++++++++++++
 3 files changed, 60 insertions(+)

diff --git a/src/cpu/BaseCPU.py b/src/cpu/BaseCPU.py
index 438d4f45df..d77036a480 100644
--- a/src/cpu/BaseCPU.py
+++ b/src/cpu/BaseCPU.py
@@ -53,6 +53,7 @@ from m5.objects.CPUTracers import ExeTracer
 from m5.objects.SubSystem import SubSystem
 from m5.objects.ClockDomain import *
 from m5.objects.Platform import Platform
+from m5.objects.ResetPort import ResetResponsePort
 
 default_tracer = ExeTracer()
 
@@ -153,6 +154,8 @@ class BaseCPU(ClockedObject):
         "between CPU models)",
     )
 
+    model_reset = ResetResponsePort("Generic reset for the CPU")
+
     tracer = Param.InstTracer(default_tracer, "Instruction tracer")
 
     icache_port = RequestPort("Instruction Port")
diff --git a/src/cpu/base.cc b/src/cpu/base.cc
index 98c53d4895..60d443af8c 100644
--- a/src/cpu/base.cc
+++ b/src/cpu/base.cc
@@ -47,6 +47,8 @@
 #include <sstream>
 #include <string>
 
+#include "arch/generic/decoder.hh"
+#include "arch/generic/isa.hh"
 #include "arch/generic/tlb.hh"
 #include "base/cprintf.hh"
 #include "base/loader/symtab.hh"
@@ -130,6 +132,7 @@ BaseCPU::BaseCPU(const Params &p, bool is_checker)
       _dataRequestorId(p.system->getRequestorId(this, "data")),
       _taskId(context_switch_task_id::Unknown), _pid(invldPid),
       _switchedOut(p.switched_out), _cacheLineSize(p.system->cacheLineSize()),
+      modelResetPort(p.name + ".model_reset"),
       interrupts(p.interrupts), numThreads(p.numThreads), system(p.system),
       previousCycle(0), previousState(CPU_STATE_SLEEP),
       functionTraceStream(nullptr), currentFunctionStart(0),
@@ -178,6 +181,10 @@ BaseCPU::BaseCPU(const Params &p, bool is_checker)
         fatal("Number of ISAs (%i) assigned to the CPU does not equal number "
               "of threads (%i).\n", params().isa.size(), numThreads);
     }
+
+    modelResetPort.onChange([this](const bool &new_val) {
+        setReset(new_val);
+    });
 }
 
 void
@@ -413,6 +420,8 @@ BaseCPU::getPort(const std::string &if_name, PortID idx)
         return getDataPort();
     else if (if_name == "icache_port")
         return getInstPort();
+    else if (if_name == "model_reset")
+        return modelResetPort;
     else
         return ClockedObject::getPort(if_name, idx);
 }
@@ -479,6 +488,12 @@ BaseCPU::findContext(ThreadContext *tc)
 void
 BaseCPU::activateContext(ThreadID thread_num)
 {
+    if (modelResetPort.state()) {
+        DPRINTF(Thread, "CPU in reset, not activating context %d\n",
+                threadContexts[thread_num]->contextId());
+        return;
+    }
+
     DPRINTF(Thread, "activate contextId %d\n",
             threadContexts[thread_num]->contextId());
     // Squash enter power gating event while cpu gets activated
@@ -602,6 +617,32 @@ BaseCPU::takeOverFrom(BaseCPU *oldCPU)
     // we are switching to.
     getInstPort().takeOverFrom(&oldCPU->getInstPort());
     getDataPort().takeOverFrom(&oldCPU->getDataPort());
+
+    // Switch over the reset line as well, if necessary.
+    if (oldCPU->modelResetPort.isConnected())
+        modelResetPort.takeOverFrom(&oldCPU->modelResetPort);
+}
+
+void
+BaseCPU::setReset(bool state)
+{
+    for (auto tc: threadContexts) {
+        if (state) {
+            // As we enter reset, stop execution.
+            tc->quiesce();
+        } else {
+            // As we leave reset, first reset thread state,
+            tc->getIsaPtr()->resetThread();
+            // reset the decoder in case it had partially decoded something,
+            tc->getDecoderPtr()->reset();
+            // flush the TLBs,
+            tc->getMMUPtr()->flushAll();
+            // Clear any interrupts,
+            interrupts[tc->threadId()]->clearAll();
+            // and finally reenable execution.
+            tc->activate();
+        }
+    }
 }
 
 void
diff --git a/src/cpu/base.hh b/src/cpu/base.hh
index 0d56fbad89..084d9b9305 100644
--- a/src/cpu/base.hh
+++ b/src/cpu/base.hh
@@ -55,6 +55,7 @@
 #include "sim/insttracer.hh"
 #include "sim/probe/pmu.hh"
 #include "sim/probe/probe.hh"
+#include "sim/signal.hh"
 #include "sim/system.hh"
 
 namespace gem5
@@ -161,6 +162,8 @@ class BaseCPU : public ClockedObject
      * group. */
     static std::unique_ptr<GlobalStats> globalStats;
 
+    SignalSinkPort<bool> modelResetPort;
+
   public:
 
     /**
@@ -337,6 +340,19 @@ class BaseCPU : public ClockedObject
      */
     virtual void takeOverFrom(BaseCPU *cpu);
 
+    /**
+     * Set the reset of the CPU to be either asserted or deasserted.
+     *
+     * When asserted, the CPU should be stopped and waiting. When deasserted,
+     * the CPU should start running again, unless some other condition would
+     * also prevent it. At the point the reset is deasserted, it should be
+     * reinitialized as defined by the ISA it's running and any other relevant
+     * part of its configuration (reset address, etc).
+     *
+     * @param state The new state of the reset signal to this CPU.
+     */
+    virtual void setReset(bool state);
+
     /**
      * Flush all TLBs in the CPU.
      *

From 89c49d1ab06ea5364ab1f80586f8b01c0297cb12 Mon Sep 17 00:00:00 2001
From: zhongchengyong <zhongcy93@gmail.com>
Date: Tue, 7 Feb 2023 22:21:53 +0800
Subject: [PATCH 182/492] arch-riscv: Fix the CSR instruction behavior.

The RISC-V spec clarifies the CSR instruction operation, some of them
shall not read or write CSR by the hints of RD/RS1/uimm, but the
original version use the 'data != oldData' condition to determine
whether write or not, and always read CSR first.
See CSR instruction in spec:
Section 9.1 Page 56 of https://github.com/riscv/riscv-isa-manual/releases/download/Ratified-IMAFDQC/riscv-spec-20191213.pdf

Change-Id: I5e7a43cf639474ae76c19a1f430d314b4634ce62
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67717
Reviewed-by: Hoa Nguyen <hoanguyen@ucdavis.edu>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
---
 src/arch/riscv/insts/standard.hh        | 19 +++++++++++++++++--
 src/arch/riscv/isa/formats/standard.isa | 12 +++++++-----
 2 files changed, 24 insertions(+), 7 deletions(-)

diff --git a/src/arch/riscv/insts/standard.hh b/src/arch/riscv/insts/standard.hh
index 5b0e8c2c22..afcfd7a915 100644
--- a/src/arch/riscv/insts/standard.hh
+++ b/src/arch/riscv/insts/standard.hh
@@ -91,18 +91,33 @@ class CSROp : public RiscvStaticInst
   protected:
     uint64_t csr;
     uint64_t uimm;
+    bool read;
+    bool write;
 
     /// Constructor
     CSROp(const char *mnem, ExtMachInst _machInst, OpClass __opClass)
         : RiscvStaticInst(mnem, _machInst, __opClass),
-            csr(FUNCT12), uimm(CSRIMM)
+            csr(FUNCT12), uimm(CSRIMM), read(true), write(true)
     {
         if (csr == CSR_SATP) {
             flags[IsSquashAfter] = true;
         }
+        if (strcmp(mnemonic, "csrrw") == 0 ||
+            strcmp(mnemonic, "csrrwi") == 0) {
+          if (RD == 0){
+            read = false;
+          }
+        } else if (strcmp(mnemonic, "csrrs") == 0 ||
+                   strcmp(mnemonic, "csrrc") == 0 ||
+                   strcmp(mnemonic, "csrrsi") == 0 ||
+                   strcmp(mnemonic, "csrrci") == 0 ){
+          if (RS1 == 0) {
+            write = false;
+          }
+        }
     }
 
-    std::string generateDisassembly(
+  std::string generateDisassembly(
         Addr pc, const loader::SymbolTable *symtab) const override;
 };
 
diff --git a/src/arch/riscv/isa/formats/standard.isa b/src/arch/riscv/isa/formats/standard.isa
index bb500f5f49..1bd431ac4d 100644
--- a/src/arch/riscv/isa/formats/standard.isa
+++ b/src/arch/riscv/isa/formats/standard.isa
@@ -358,7 +358,7 @@ def template CSRExecute {{
         %(op_decl)s;
         %(op_rd)s;
 
-        RegVal data, olddata;
+        RegVal data = 0, olddata = 0;
         auto lowestAllowedMode = (PrivilegeMode)bits(csr, 9, 8);
         auto pm = (PrivilegeMode)xc->readMiscReg(MISCREG_PRV);
         if (pm < lowestAllowedMode) {
@@ -380,11 +380,13 @@ def template CSRExecute {{
             break;
         }
 
-        if (csr == CSR_FCSR) {
+        if (read) {
+          if (csr == CSR_FCSR) {
             olddata = xc->readMiscReg(MISCREG_FFLAGS) |
-                      (xc->readMiscReg(MISCREG_FRM) << FRM_OFFSET);
-        } else {
+              (xc->readMiscReg(MISCREG_FRM) << FRM_OFFSET);
+          } else {
             olddata = xc->readMiscReg(midx);
+          }
         }
         olddata = rvZext(olddata);
         auto olddata_all = olddata;
@@ -396,7 +398,7 @@ def template CSRExecute {{
         %(code)s;
 
         data &= maskVal;
-        if (data != olddata) {
+        if (write) {
             if (bits(csr, 11, 10) == 0x3) {
                 return std::make_shared<IllegalInstFault>(
                         csprintf("CSR %s is read-only\n", csrName), machInst);

From 905b8ebd2235b730840d5392f605b5cf5de2840f Mon Sep 17 00:00:00 2001
From: Matthew Poremba <matthew.poremba@amd.com>
Date: Tue, 3 Jan 2023 08:08:05 -0800
Subject: [PATCH 183/492] arch-vega: Implement ds_write_b8_d16_hi

Writes a byte to the upper 16-bit input word to an address.

Change-Id: I0bfd573526b9c46585d0008cde07c769b1d29ebd
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67411
Maintainer: Matt Sinclair <mattdsinclair@gmail.com>
Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/arch/amdgpu/vega/decoder.cc            |  3 +-
 src/arch/amdgpu/vega/insts/instructions.cc | 62 ++++++++++++++++++++++
 src/arch/amdgpu/vega/insts/instructions.hh | 34 ++++++++++++
 3 files changed, 97 insertions(+), 2 deletions(-)

diff --git a/src/arch/amdgpu/vega/decoder.cc b/src/arch/amdgpu/vega/decoder.cc
index 18c72a4382..291dd6924a 100644
--- a/src/arch/amdgpu/vega/decoder.cc
+++ b/src/arch/amdgpu/vega/decoder.cc
@@ -7706,8 +7706,7 @@ namespace VegaISA
     GPUStaticInst*
     Decoder::decode_OP_DS__DS_WRITE_B8_D16_HI(MachInst iFmt)
     {
-        fatal("Trying to decode instruction without a class\n");
-        return nullptr;
+        return new Inst_DS__DS_WRITE_B8_D16_HI(&iFmt->iFmt_DS);
     }
 
     GPUStaticInst*
diff --git a/src/arch/amdgpu/vega/insts/instructions.cc b/src/arch/amdgpu/vega/insts/instructions.cc
index 6cf01fb8f9..f019dfd75e 100644
--- a/src/arch/amdgpu/vega/insts/instructions.cc
+++ b/src/arch/amdgpu/vega/insts/instructions.cc
@@ -34877,6 +34877,68 @@ namespace VegaISA
     Inst_DS__DS_WRITE_B8::completeAcc(GPUDynInstPtr gpuDynInst)
     {
     } // completeAcc
+    // --- Inst_DS__DS_WRITE_B8_D16_HI class methods ---
+
+    Inst_DS__DS_WRITE_B8_D16_HI::Inst_DS__DS_WRITE_B8_D16_HI(InFmt_DS *iFmt)
+        : Inst_DS(iFmt, "ds_write_b8_d16_hi")
+    {
+        setFlag(MemoryRef);
+        setFlag(Store);
+    } // Inst_DS__DS_WRITE_B8_D16_HI
+
+    Inst_DS__DS_WRITE_B8_D16_HI::~Inst_DS__DS_WRITE_B8_D16_HI()
+    {
+    } // ~Inst_DS__DS_WRITE_B8_D16_HI
+
+    // --- description from .arch file ---
+    // MEM[ADDR] = DATA[23:16].
+    // Byte write in to high word.
+    void
+    Inst_DS__DS_WRITE_B8_D16_HI::execute(GPUDynInstPtr gpuDynInst)
+    {
+        Wavefront *wf = gpuDynInst->wavefront();
+
+        if (gpuDynInst->exec_mask.none()) {
+            wf->decLGKMInstsIssued();
+            return;
+        }
+
+        gpuDynInst->execUnitId = wf->execUnitId;
+        gpuDynInst->latency.init(gpuDynInst->computeUnit());
+        gpuDynInst->latency.set(
+                gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
+        ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
+        ConstVecOperandU8 data(gpuDynInst, extData.DATA0);
+
+        addr.read();
+        data.read();
+
+        calcAddr(gpuDynInst, addr);
+
+        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
+            if (gpuDynInst->exec_mask[lane]) {
+                (reinterpret_cast<VecElemU8*>(gpuDynInst->d_data))[lane]
+                    = bits(data[lane], 23, 16);
+            }
+        }
+
+        gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
+    } // execute
+
+    void
+    Inst_DS__DS_WRITE_B8_D16_HI::initiateAcc(GPUDynInstPtr gpuDynInst)
+    {
+        Addr offset0 = instData.OFFSET0;
+        Addr offset1 = instData.OFFSET1;
+        Addr offset = (offset1 << 8) | offset0;
+
+        initMemWrite<VecElemU8>(gpuDynInst, offset);
+    } // initiateAcc
+
+    void
+    Inst_DS__DS_WRITE_B8_D16_HI::completeAcc(GPUDynInstPtr gpuDynInst)
+    {
+    } // completeAcc
     // --- Inst_DS__DS_WRITE_B16 class methods ---
 
     Inst_DS__DS_WRITE_B16::Inst_DS__DS_WRITE_B16(InFmt_DS *iFmt)
diff --git a/src/arch/amdgpu/vega/insts/instructions.hh b/src/arch/amdgpu/vega/insts/instructions.hh
index 289673232b..dc2ee08f08 100644
--- a/src/arch/amdgpu/vega/insts/instructions.hh
+++ b/src/arch/amdgpu/vega/insts/instructions.hh
@@ -31934,6 +31934,40 @@ namespace VegaISA
         void completeAcc(GPUDynInstPtr) override;
     }; // Inst_DS__DS_WRITE_B8
 
+    class Inst_DS__DS_WRITE_B8_D16_HI : public Inst_DS
+    {
+      public:
+        Inst_DS__DS_WRITE_B8_D16_HI(InFmt_DS*);
+        ~Inst_DS__DS_WRITE_B8_D16_HI();
+
+        int
+        getNumOperands() override
+        {
+            return numDstRegOperands() + numSrcRegOperands();
+        } // getNumOperands
+
+        int numDstRegOperands() override { return 0; }
+        int numSrcRegOperands() override { return 2; }
+
+        int
+        getOperandSize(int opIdx) override
+        {
+            switch (opIdx) {
+              case 0: //vgpr_a
+                return 4;
+              case 1: //vgpr_d0
+                return 1;
+              default:
+                fatal("op idx %i out of bounds\n", opIdx);
+                return -1;
+            }
+        } // getOperandSize
+
+        void execute(GPUDynInstPtr) override;
+        void initiateAcc(GPUDynInstPtr) override;
+        void completeAcc(GPUDynInstPtr) override;
+    }; // Inst_DS__DS_WRITE_B8_D16_HI
+
     class Inst_DS__DS_WRITE_B16 : public Inst_DS
     {
       public:

From bc9e90d65e3c2813d2eed70b45abd7c62702851c Mon Sep 17 00:00:00 2001
From: Matthew Poremba <matthew.poremba@amd.com>
Date: Tue, 3 Jan 2023 11:40:45 -0800
Subject: [PATCH 184/492] arch-vega: Make VGPR-offset for global SGPR-base
 signed

The VGPR-offset used when SGPR-base addressing is used can be signed in
Vega. These are global instructions of the format:
`global_load_dword v0, v1, s[0:1]`. This is not explicitly stated in the
ISA manual however based on compiler output the offset can be negative.

This changeset assigns the offset to a signed 32-bit integer and the
compiler takes care of the signedness in the expression which calculates
the final address. This fixes a bad address calculation in a rocPRIM
unit test.

Change-Id: I271edfbb4c6344cb1a6a69a0fd3df58a6198d599
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67412
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/arch/amdgpu/vega/insts/op_encodings.hh | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/arch/amdgpu/vega/insts/op_encodings.hh b/src/arch/amdgpu/vega/insts/op_encodings.hh
index 34f6040495..1071eada0e 100644
--- a/src/arch/amdgpu/vega/insts/op_encodings.hh
+++ b/src/arch/amdgpu/vega/insts/op_encodings.hh
@@ -1007,8 +1007,9 @@ namespace VegaISA
             // mask any upper bits from the vaddr.
             for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
                 if (gpuDynInst->exec_mask[lane]) {
+                    ScalarRegI32 voffset = vaddr[lane];
                     gpuDynInst->addr.at(lane) =
-                        saddr.rawData() + (vaddr[lane] & 0xffffffff) + offset;
+                        saddr.rawData() + voffset + offset;
                 }
             }
         }

From 39e813374cab3d9a04e84b4474997b7a6c71ed45 Mon Sep 17 00:00:00 2001
From: Zhengrong Wang <seanyukigeek@gmail.com>
Date: Wed, 8 Feb 2023 20:41:26 -0800
Subject: [PATCH 185/492] ext: Fix typo in DRAMSIM2 Sconscript

ClockDoenv should be ClockDomain.

Change-Id: Ibcf3d0dc969624a4e20d86924ef834781b5bbf21
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67759
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
---
 ext/dramsim2/SConscript | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ext/dramsim2/SConscript b/ext/dramsim2/SConscript
index 95b999dc4c..7eb178d626 100644
--- a/ext/dramsim2/SConscript
+++ b/ext/dramsim2/SConscript
@@ -59,7 +59,7 @@ DRAMFile('AddressMapping.cpp')
 DRAMFile('Bank.cpp')
 DRAMFile('BankState.cpp')
 DRAMFile('BusPacket.cpp')
-DRAMFile('ClockDoenv.cpp')
+DRAMFile('ClockDomain.cpp')
 DRAMFile('CommandQueue.cpp')
 DRAMFile('IniReader.cpp')
 DRAMFile('MemoryController.cpp')

From b6a591e20385f7ea2f08b1a349620cd802f30cd4 Mon Sep 17 00:00:00 2001
From: Zhengrong Wang <seanyukigeek@gmail.com>
Date: Fri, 10 Feb 2023 22:34:28 -0800
Subject: [PATCH 186/492] mem-dram: Make sure SHOW_SIM_OUTPUT is in global
 namespace.

As stated in the comment, SHOW_SIM_OUTPUT is declared extern
in the DRAMSim2 print macros. Therefore, it should be defined
in the global namespace, not in gem5 namespace.

Change-Id: I05245a48ac706b46085ffa8d00db3725ce16a89e
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67859
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/mem/dramsim2_wrapper.cc | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/mem/dramsim2_wrapper.cc b/src/mem/dramsim2_wrapper.cc
index c622c1cb57..b9bcf14c08 100644
--- a/src/mem/dramsim2_wrapper.cc
+++ b/src/mem/dramsim2_wrapper.cc
@@ -54,12 +54,6 @@
 #include "base/compiler.hh"
 #include "base/logging.hh"
 
-namespace gem5
-{
-
-namespace memory
-{
-
 /**
  * DRAMSim2 requires SHOW_SIM_OUTPUT to be defined (declared extern in
  * the DRAMSim2 print macros), otherwise we get linking errors due to
@@ -67,6 +61,12 @@ namespace memory
  */
 int SHOW_SIM_OUTPUT = 0;
 
+namespace gem5
+{
+
+namespace memory
+{
+
 DRAMSim2Wrapper::DRAMSim2Wrapper(const std::string& config_file,
                                  const std::string& system_file,
                                  const std::string& working_dir,

From 39b5b5e5113579ec49e81b14124e97bc8e7e1a6b Mon Sep 17 00:00:00 2001
From: Matthew Poremba <matthew.poremba@amd.com>
Date: Mon, 13 Feb 2023 10:58:12 -0600
Subject: [PATCH 187/492] dev-amdgpu: Fix address in POLL_REGMEM SDMA packet

The address for the POLL_REGMEM packet should not be shifted when the
mode is 1 (memory). Relevant driver code below is not shifting the
address. The shift is causing a page fault due to the incorrect address.

This changeset removes the shift so the correct address is translated.

https://github.com/RadeonOpenCompute/ROCK-Kernel-Driver/blob/
    roc-4.3.x/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c#L903

Change-Id: I7a0ec3245ca14376670df24c5d3773958c08d751
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67877
Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com>
Maintainer: Matt Sinclair <mattdsinclair@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/dev/amdgpu/sdma_engine.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/dev/amdgpu/sdma_engine.cc b/src/dev/amdgpu/sdma_engine.cc
index 4c03bf57b2..736df45d9d 100644
--- a/src/dev/amdgpu/sdma_engine.cc
+++ b/src/dev/amdgpu/sdma_engine.cc
@@ -832,7 +832,7 @@ SDMAEngine::pollRegMem(SDMAQueue *q, sdmaPollRegMemHeader *header,
             auto cb = new DmaVirtCallback<uint32_t>(
                 [ = ] (const uint32_t &dma_buffer) {
                     pollRegMemRead(q, header, pkt, dma_buffer, 0); });
-            dmaReadVirt(pkt->address >> 3, sizeof(uint32_t), cb,
+            dmaReadVirt(pkt->address, sizeof(uint32_t), cb,
                         (void *)&cb->dmaBuffer);
         } else {
             panic("SDMA poll mem operation not implemented.");

From d7516a26dc004892ce03c9784222da5944ea2489 Mon Sep 17 00:00:00 2001
From: Alexandru Dutu <alexandru.dutu@amd.com>
Date: Fri, 7 Oct 2022 17:11:35 -0700
Subject: [PATCH 188/492] arch-vega: Implementing global_atomic_or

Change-Id: I13065186313ca784054956e1165b1b2fd8ce4a19
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/64511
Maintainer: Matt Sinclair <mattdsinclair@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com>
---
 src/arch/amdgpu/vega/insts/instructions.cc | 54 +++++++++++++++++++++-
 src/arch/amdgpu/vega/insts/instructions.hh |  2 +
 2 files changed, 55 insertions(+), 1 deletion(-)

diff --git a/src/arch/amdgpu/vega/insts/instructions.cc b/src/arch/amdgpu/vega/insts/instructions.cc
index f019dfd75e..987474fbfb 100644
--- a/src/arch/amdgpu/vega/insts/instructions.cc
+++ b/src/arch/amdgpu/vega/insts/instructions.cc
@@ -45112,8 +45112,60 @@ namespace VegaISA
     void
     Inst_FLAT__FLAT_ATOMIC_OR::execute(GPUDynInstPtr gpuDynInst)
     {
-        panicUnimplemented();
+        Wavefront *wf = gpuDynInst->wavefront();
+
+        if (gpuDynInst->exec_mask.none()) {
+            wf->decVMemInstsIssued();
+            wf->decLGKMInstsIssued();
+            return;
+        }
+
+        gpuDynInst->execUnitId = wf->execUnitId;
+        gpuDynInst->latency.init(gpuDynInst->computeUnit());
+        gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
+
+        ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
+        ConstVecOperandU32 data(gpuDynInst, extData.DATA);
+
+        addr.read();
+        data.read();
+
+        calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET);
+
+        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
+            if (gpuDynInst->exec_mask[lane]) {
+                (reinterpret_cast<VecElemU32*>(gpuDynInst->a_data))[lane]
+                    = data[lane];
+            }
+        }
+
+        gpuDynInst->computeUnit()->globalMemoryPipe.
+            issueRequest(gpuDynInst);
     } // execute
+
+    void
+    Inst_FLAT__FLAT_ATOMIC_OR::initiateAcc(GPUDynInstPtr gpuDynInst)
+    {
+        initAtomicAccess<VecElemU32>(gpuDynInst);
+    } // initiateAcc
+
+    void
+    Inst_FLAT__FLAT_ATOMIC_OR::completeAcc(GPUDynInstPtr gpuDynInst)
+    {
+        if (isAtomicRet()) {
+            VecOperandU32 vdst(gpuDynInst, extData.VDST);
+
+            for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
+                if (gpuDynInst->exec_mask[lane]) {
+                    vdst[lane] = (reinterpret_cast<VecElemU32*>(
+                        gpuDynInst->d_data))[lane];
+                }
+            }
+
+            vdst.write();
+        }
+    } // completeAcc
+
     // --- Inst_FLAT__FLAT_ATOMIC_XOR class methods ---
 
     Inst_FLAT__FLAT_ATOMIC_XOR::Inst_FLAT__FLAT_ATOMIC_XOR(InFmt_FLAT *iFmt)
diff --git a/src/arch/amdgpu/vega/insts/instructions.hh b/src/arch/amdgpu/vega/insts/instructions.hh
index dc2ee08f08..ddf228a76a 100644
--- a/src/arch/amdgpu/vega/insts/instructions.hh
+++ b/src/arch/amdgpu/vega/insts/instructions.hh
@@ -42800,6 +42800,8 @@ namespace VegaISA
         } // getOperandSize
 
         void execute(GPUDynInstPtr) override;
+        void initiateAcc(GPUDynInstPtr) override;
+        void completeAcc(GPUDynInstPtr) override;
     }; // Inst_FLAT__FLAT_ATOMIC_OR
 
     class Inst_FLAT__FLAT_ATOMIC_XOR : public Inst_FLAT

From 8375058e73dfeefe433f89c9fe00e675d9ad095a Mon Sep 17 00:00:00 2001
From: Alexandru Dutu <alexandru.dutu@amd.com>
Date: Fri, 7 Oct 2022 17:22:12 -0700
Subject: [PATCH 189/492] arch-vega: Implementing global_atomic_smin

Change-Id: Iffb366190f9e3f7ffbacde5dbb3abc97226926d4
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/64512
Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Maintainer: Matt Sinclair <mattdsinclair@gmail.com>
---
 src/arch/amdgpu/vega/insts/instructions.cc | 53 +++++++++++++++++++++-
 src/arch/amdgpu/vega/insts/instructions.hh |  2 +
 2 files changed, 54 insertions(+), 1 deletion(-)

diff --git a/src/arch/amdgpu/vega/insts/instructions.cc b/src/arch/amdgpu/vega/insts/instructions.cc
index 987474fbfb..e3639a5901 100644
--- a/src/arch/amdgpu/vega/insts/instructions.cc
+++ b/src/arch/amdgpu/vega/insts/instructions.cc
@@ -44972,8 +44972,59 @@ namespace VegaISA
     void
     Inst_FLAT__FLAT_ATOMIC_SMIN::execute(GPUDynInstPtr gpuDynInst)
     {
-        panicUnimplemented();
+        Wavefront *wf = gpuDynInst->wavefront();
+
+        if (gpuDynInst->exec_mask.none()) {
+            wf->decVMemInstsIssued();
+            wf->decLGKMInstsIssued();
+            return;
+        }
+
+        gpuDynInst->execUnitId = wf->execUnitId;
+        gpuDynInst->latency.init(gpuDynInst->computeUnit());
+        gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
+
+        ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
+        ConstVecOperandU32 data(gpuDynInst, extData.DATA);
+
+        addr.read();
+        data.read();
+
+        calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET);
+
+        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
+            if (gpuDynInst->exec_mask[lane]) {
+                (reinterpret_cast<VecElemU32*>(gpuDynInst->a_data))[lane]
+                    = data[lane];
+            }
+        }
+
+        gpuDynInst->computeUnit()->globalMemoryPipe.
+            issueRequest(gpuDynInst);
     } // execute
+
+    void
+    Inst_FLAT__FLAT_ATOMIC_SMIN::initiateAcc(GPUDynInstPtr gpuDynInst)
+    {
+        initAtomicAccess<VecElemU32>(gpuDynInst);
+    } // initiateAcc
+
+    void
+    Inst_FLAT__FLAT_ATOMIC_SMIN::completeAcc(GPUDynInstPtr gpuDynInst)
+    {
+        if (isAtomicRet()) {
+            VecOperandU32 vdst(gpuDynInst, extData.VDST);
+
+            for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
+                if (gpuDynInst->exec_mask[lane]) {
+                    vdst[lane] = (reinterpret_cast<VecElemU32*>(
+                        gpuDynInst->d_data))[lane];
+                }
+            }
+
+            vdst.write();
+        }
+    } // completeAcc
     // --- Inst_FLAT__FLAT_ATOMIC_UMIN class methods ---
 
     Inst_FLAT__FLAT_ATOMIC_UMIN::Inst_FLAT__FLAT_ATOMIC_UMIN(InFmt_FLAT *iFmt)
diff --git a/src/arch/amdgpu/vega/insts/instructions.hh b/src/arch/amdgpu/vega/insts/instructions.hh
index ddf228a76a..8b0c8c43de 100644
--- a/src/arch/amdgpu/vega/insts/instructions.hh
+++ b/src/arch/amdgpu/vega/insts/instructions.hh
@@ -42615,6 +42615,8 @@ namespace VegaISA
         } // getOperandSize
 
         void execute(GPUDynInstPtr) override;
+        void initiateAcc(GPUDynInstPtr) override;
+        void completeAcc(GPUDynInstPtr) override;
     }; // Inst_FLAT__FLAT_ATOMIC_SMIN
 
     class Inst_FLAT__FLAT_ATOMIC_UMIN : public Inst_FLAT

From bb8f370e4d36ada08afa82e51d4b3b934bf105c7 Mon Sep 17 00:00:00 2001
From: Alexandru Dutu <alexandru.dutu@amd.com>
Date: Fri, 7 Oct 2022 17:33:50 -0700
Subject: [PATCH 190/492] arch-vega: Implementing global_atomic_smax

Change-Id: Id4053424c98eec1e98eb555bb35b48f0b5d2407b
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/64513
Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com>
Maintainer: Matt Sinclair <mattdsinclair@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/arch/amdgpu/vega/insts/instructions.cc | 53 +++++++++++++++++++++-
 src/arch/amdgpu/vega/insts/instructions.hh |  2 +
 2 files changed, 54 insertions(+), 1 deletion(-)

diff --git a/src/arch/amdgpu/vega/insts/instructions.cc b/src/arch/amdgpu/vega/insts/instructions.cc
index e3639a5901..b6a78b26e4 100644
--- a/src/arch/amdgpu/vega/insts/instructions.cc
+++ b/src/arch/amdgpu/vega/insts/instructions.cc
@@ -45079,8 +45079,59 @@ namespace VegaISA
     void
     Inst_FLAT__FLAT_ATOMIC_SMAX::execute(GPUDynInstPtr gpuDynInst)
     {
-        panicUnimplemented();
+        Wavefront *wf = gpuDynInst->wavefront();
+
+        if (gpuDynInst->exec_mask.none()) {
+            wf->decVMemInstsIssued();
+            wf->decLGKMInstsIssued();
+            return;
+        }
+
+        gpuDynInst->execUnitId = wf->execUnitId;
+        gpuDynInst->latency.init(gpuDynInst->computeUnit());
+        gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
+
+        ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
+        ConstVecOperandU32 data(gpuDynInst, extData.DATA);
+
+        addr.read();
+        data.read();
+
+        calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET);
+
+        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
+            if (gpuDynInst->exec_mask[lane]) {
+                (reinterpret_cast<VecElemU32*>(gpuDynInst->a_data))[lane]
+                    = data[lane];
+            }
+        }
+
+        gpuDynInst->computeUnit()->globalMemoryPipe.
+            issueRequest(gpuDynInst);
     } // execute
+
+    void
+    Inst_FLAT__FLAT_ATOMIC_SMAX::initiateAcc(GPUDynInstPtr gpuDynInst)
+    {
+        initAtomicAccess<VecElemU32>(gpuDynInst);
+    } // initiateAcc
+
+    void
+    Inst_FLAT__FLAT_ATOMIC_SMAX::completeAcc(GPUDynInstPtr gpuDynInst)
+    {
+        if (isAtomicRet()) {
+            VecOperandU32 vdst(gpuDynInst, extData.VDST);
+
+            for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
+                if (gpuDynInst->exec_mask[lane]) {
+                    vdst[lane] = (reinterpret_cast<VecElemU32*>(
+                        gpuDynInst->d_data))[lane];
+                }
+            }
+
+            vdst.write();
+        }
+    } // completeAcc
     // --- Inst_FLAT__FLAT_ATOMIC_UMAX class methods ---
 
     Inst_FLAT__FLAT_ATOMIC_UMAX::Inst_FLAT__FLAT_ATOMIC_UMAX(InFmt_FLAT *iFmt)
diff --git a/src/arch/amdgpu/vega/insts/instructions.hh b/src/arch/amdgpu/vega/insts/instructions.hh
index 8b0c8c43de..d45a84c7b8 100644
--- a/src/arch/amdgpu/vega/insts/instructions.hh
+++ b/src/arch/amdgpu/vega/insts/instructions.hh
@@ -42691,6 +42691,8 @@ namespace VegaISA
         } // getOperandSize
 
         void execute(GPUDynInstPtr) override;
+        void initiateAcc(GPUDynInstPtr) override;
+        void completeAcc(GPUDynInstPtr) override;
     }; // Inst_FLAT__FLAT_ATOMIC_SMAX
 
     class Inst_FLAT__FLAT_ATOMIC_UMAX : public Inst_FLAT

From ea9239ae092a919a0505ba15aef1595bb0ceeb49 Mon Sep 17 00:00:00 2001
From: Matthew Poremba <matthew.poremba@amd.com>
Date: Thu, 9 Feb 2023 12:03:38 -0600
Subject: [PATCH 191/492] dev-amdgpu: Update deprecated ports

Change-Id: Icbc5636c33b437c7396ee27363eed1cf006f8882
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67837
Maintainer: Matt Sinclair <mattdsinclair@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com>
---
 src/arch/amdgpu/common/tlb_coalescer.hh | 2 +-
 src/dev/amdgpu/memory_manager.hh        | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/arch/amdgpu/common/tlb_coalescer.hh b/src/arch/amdgpu/common/tlb_coalescer.hh
index 59d8ebe888..56d72d7abb 100644
--- a/src/arch/amdgpu/common/tlb_coalescer.hh
+++ b/src/arch/amdgpu/common/tlb_coalescer.hh
@@ -152,7 +152,7 @@ class TLBCoalescer : public ClockedObject
       public:
         MemSidePort(const std::string &_name, TLBCoalescer *tlb_coalescer,
                     PortID _index)
-            : RequestPort(_name, tlb_coalescer), coalescer(tlb_coalescer),
+            : RequestPort(_name), coalescer(tlb_coalescer),
               index(_index) { }
 
         std::deque<PacketPtr> retries;
diff --git a/src/dev/amdgpu/memory_manager.hh b/src/dev/amdgpu/memory_manager.hh
index e18ec643a6..0bd08d6ff9 100644
--- a/src/dev/amdgpu/memory_manager.hh
+++ b/src/dev/amdgpu/memory_manager.hh
@@ -45,11 +45,11 @@ namespace gem5
 
 class AMDGPUMemoryManager : public ClockedObject
 {
-    class GPUMemPort : public MasterPort
+    class GPUMemPort : public RequestPort
     {
       public:
         GPUMemPort(const std::string &_name, AMDGPUMemoryManager &_gpuMemMgr)
-            : MasterPort(_name, &_gpuMemMgr), gpu_mem(_gpuMemMgr)
+            : RequestPort(_name), gpu_mem(_gpuMemMgr)
         {
         }
 

From e10be09dcf919d50f03547924dde0157692cc8f8 Mon Sep 17 00:00:00 2001
From: hungweihsu <hungweihsu@google.com>
Date: Thu, 9 Feb 2023 05:39:15 +0000
Subject: [PATCH 192/492] dev: add method to set initial register value out of
 constructor.

The initial value of register is set in constructor but there is no
standard way to assign the initial value and default value at the same
time out of that. So we decided to add an extra method to set the
initialValue to current register value. The usecase would be:

reg.get().field1 = val1;
reg.get().field2 = val2;
reg.resetInitialValue();

Change-Id: Ibc5454e2945cc6aff943e6599043edd8ca442f5f
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67917
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Gabe Black <gabe.black@gmail.com>
Maintainer: Gabe Black <gabe.black@gmail.com>
---
 src/dev/reg_bank.hh      |  3 +++
 src/dev/reg_bank.test.cc | 22 ++++++++++++++++++++++
 2 files changed, 25 insertions(+)

diff --git a/src/dev/reg_bank.hh b/src/dev/reg_bank.hh
index 3d8dc576cb..3a89a00ab6 100644
--- a/src/dev/reg_bank.hh
+++ b/src/dev/reg_bank.hh
@@ -759,6 +759,9 @@ class RegisterBank : public RegisterBankBase
         // constructor. This is intended to be used in a resetter function.
         const Data &initialValue() const { return _resetData; }
 
+        // Reset the initial value, which is normally set in the constructor,
+        // to the register's current value.
+        void resetInitialValue() { _resetData = _data; }
 
         /*
          * Interface for accessing the register's state, for use by the
diff --git a/src/dev/reg_bank.test.cc b/src/dev/reg_bank.test.cc
index 4439526e35..c618ef16d4 100644
--- a/src/dev/reg_bank.test.cc
+++ b/src/dev/reg_bank.test.cc
@@ -881,6 +881,28 @@ TEST_F(TypedRegisterTest, DefaultResetter)
     EXPECT_EQ(reg.get(), initial_value);
 }
 
+// Set initial value later than constructor
+TEST_F(TypedRegisterTest, LateInitialValueAssignment)
+{
+    BackingType initial_value = reg.get();
+    BackingType new_initial_value = initial_value + 1;
+
+    reg.get() = new_initial_value;
+    reg.resetInitialValue();
+
+    EXPECT_EQ(reg.get(), new_initial_value);
+    EXPECT_EQ(reg.initialValue(), new_initial_value);
+
+    reg.get() = new_initial_value + 1;
+    EXPECT_EQ(reg.get(), new_initial_value + 1);
+    EXPECT_EQ(reg.initialValue(), new_initial_value);
+
+    reg.reset();
+
+    EXPECT_EQ(reg.get(), new_initial_value);
+    EXPECT_EQ(reg.initialValue(), new_initial_value);
+}
+
 // Set a custom resetter for a register.
 TEST_F(TypedRegisterTest, Resetter)
 {

From 4b1c24542065380c6cff7ab2baa25e216a0ad38e Mon Sep 17 00:00:00 2001
From: Roger Chang <rogerycchang@google.com>
Date: Tue, 14 Feb 2023 10:26:09 +0800
Subject: [PATCH 193/492] arch-riscv: Fix the behavior of write to status CSR

According to RISC V spec Volumn I, Section 11.1, the CSR will be
written only if RS1 != 0 or imm != 0. However, after the change
of CL(https://gem5-review.googlesource.com/c/public/gem5/+/67717),
it will cause IllegalInstFault to write status CSR if we don't
change the data.

Example of Instruction Fault for mstatus

```
addi a5, zero, 8
csrc mstatus, a5
```

It will cause instruction fault if mstatus value is 0 due to
"newdata_all == olddata_all". We can just simply check if
the data value is changed out of mask.

Change-Id: Iab4ce7ac646a9105dc04e69c24d084572e28ebab
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67897
Reviewed-by: Yu-hsin Wang <yuhsingw@google.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/arch/riscv/insts/standard.hh        | 2 +-
 src/arch/riscv/isa/formats/standard.isa | 5 +++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/arch/riscv/insts/standard.hh b/src/arch/riscv/insts/standard.hh
index afcfd7a915..2dfe73aedf 100644
--- a/src/arch/riscv/insts/standard.hh
+++ b/src/arch/riscv/insts/standard.hh
@@ -111,7 +111,7 @@ class CSROp : public RiscvStaticInst
                    strcmp(mnemonic, "csrrc") == 0 ||
                    strcmp(mnemonic, "csrrsi") == 0 ||
                    strcmp(mnemonic, "csrrci") == 0 ){
-          if (RS1 == 0) {
+          if (RS1 == 0 || uimm == 0) {
             write = false;
           }
         }
diff --git a/src/arch/riscv/isa/formats/standard.isa b/src/arch/riscv/isa/formats/standard.isa
index 1bd431ac4d..c94a0bcdbd 100644
--- a/src/arch/riscv/isa/formats/standard.isa
+++ b/src/arch/riscv/isa/formats/standard.isa
@@ -358,7 +358,7 @@ def template CSRExecute {{
         %(op_decl)s;
         %(op_rd)s;
 
-        RegVal data = 0, olddata = 0;
+        RegVal data = 0, olddata = 0, nonmaskdata = 0;
         auto lowestAllowedMode = (PrivilegeMode)bits(csr, 9, 8);
         auto pm = (PrivilegeMode)xc->readMiscReg(MISCREG_PRV);
         if (pm < lowestAllowedMode) {
@@ -397,6 +397,7 @@ def template CSRExecute {{
 
         %(code)s;
 
+        nonmaskdata = data & ~maskVal;
         data &= maskVal;
         if (write) {
             if (bits(csr, 11, 10) == 0x3) {
@@ -419,7 +420,7 @@ def template CSRExecute {{
               case CSR_SIP: case CSR_SIE:
               case CSR_UIP: case CSR_UIE:
               case CSR_MSTATUS: case CSR_SSTATUS: case CSR_USTATUS:
-                if (newdata_all != olddata_all) {
+                if (nonmaskdata == 0) {
                     xc->setMiscReg(midx, newdata_all);
                 } else {
                     return std::make_shared<IllegalInstFault>(

From 3b4f241fb5a05f29e2235d0dea33475ebbbc3185 Mon Sep 17 00:00:00 2001
From: Roger Chang <rogerycchang@google.com>
Date: Tue, 17 Jan 2023 10:38:11 +0800
Subject: [PATCH 194/492] arch-riscv: Fix incorrect trap value of instruction
 fault

As we add rv_type bit in machInst at 62, It will get the machine
code with rv_type specification if we just return machInst. We
only need return machine code for handling instruction fault.

Change-Id: I9dd7a25047d4a13df5b47dc9e422345ba44b7b09
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67677
Reviewed-by: Yu-hsin Wang <yuhsingw@google.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/arch/riscv/faults.hh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/arch/riscv/faults.hh b/src/arch/riscv/faults.hh
index e66476727c..f687fd6f20 100644
--- a/src/arch/riscv/faults.hh
+++ b/src/arch/riscv/faults.hh
@@ -173,7 +173,7 @@ class InstFault : public RiscvFault
         : RiscvFault(n, FaultType::OTHERS, INST_ILLEGAL), _inst(inst)
     {}
 
-    RegVal trap_value() const override { return _inst; }
+    RegVal trap_value() const override { return bits(_inst, 31, 0); }
 };
 
 class UnknownInstFault : public InstFault

From f028bd55e0ef74c350d0aa75e1523e7139c4f207 Mon Sep 17 00:00:00 2001
From: Matthew Poremba <matthew.poremba@amd.com>
Date: Wed, 15 Feb 2023 07:45:57 -0800
Subject: [PATCH 195/492] arch-vega: Update API for some flat atomics

Some recently submitted atomic instructions were using two older APIs.
Update these to use the newer APIs to support all apertures and avoid
compilation issue.

Change-Id: Ibd6bc00177d33236946f54ef8e5c7544af322852
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67977
Maintainer: Matt Sinclair <mattdsinclair@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com>
---
 src/arch/amdgpu/vega/insts/instructions.cc | 21 ++++++---------------
 1 file changed, 6 insertions(+), 15 deletions(-)

diff --git a/src/arch/amdgpu/vega/insts/instructions.cc b/src/arch/amdgpu/vega/insts/instructions.cc
index b6a78b26e4..45c84910f2 100644
--- a/src/arch/amdgpu/vega/insts/instructions.cc
+++ b/src/arch/amdgpu/vega/insts/instructions.cc
@@ -44984,13 +44984,11 @@ namespace VegaISA
         gpuDynInst->latency.init(gpuDynInst->computeUnit());
         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
 
-        ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
         ConstVecOperandU32 data(gpuDynInst, extData.DATA);
 
-        addr.read();
         data.read();
 
-        calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET);
+        calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET);
 
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (gpuDynInst->exec_mask[lane]) {
@@ -44999,8 +44997,7 @@ namespace VegaISA
             }
         }
 
-        gpuDynInst->computeUnit()->globalMemoryPipe.
-            issueRequest(gpuDynInst);
+        issueRequestHelper(gpuDynInst);
     } // execute
 
     void
@@ -45091,13 +45088,11 @@ namespace VegaISA
         gpuDynInst->latency.init(gpuDynInst->computeUnit());
         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
 
-        ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
         ConstVecOperandU32 data(gpuDynInst, extData.DATA);
 
-        addr.read();
         data.read();
 
-        calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET);
+        calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET);
 
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (gpuDynInst->exec_mask[lane]) {
@@ -45106,8 +45101,7 @@ namespace VegaISA
             }
         }
 
-        gpuDynInst->computeUnit()->globalMemoryPipe.
-            issueRequest(gpuDynInst);
+        issueRequestHelper(gpuDynInst);
     } // execute
 
     void
@@ -45226,13 +45220,11 @@ namespace VegaISA
         gpuDynInst->latency.init(gpuDynInst->computeUnit());
         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
 
-        ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
         ConstVecOperandU32 data(gpuDynInst, extData.DATA);
 
-        addr.read();
         data.read();
 
-        calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET);
+        calcAddr(gpuDynInst, extData.ADDR, extData.SADDR, instData.OFFSET);
 
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (gpuDynInst->exec_mask[lane]) {
@@ -45241,8 +45233,7 @@ namespace VegaISA
             }
         }
 
-        gpuDynInst->computeUnit()->globalMemoryPipe.
-            issueRequest(gpuDynInst);
+        issueRequestHelper(gpuDynInst);
     } // execute
 
     void

From 109c327209de4e6f5a7f621f59e333f61530cb66 Mon Sep 17 00:00:00 2001
From: Yan Lee <yanlee@google.com>
Date: Tue, 14 Feb 2023 19:47:38 -0800
Subject: [PATCH 196/492] base: add extensible type

Extensible is for carrying additional user-defined
information. Each type of the extension will have a unique
extension ID and there is a linked list of extension in every
Extensible object. There will be most one extension with the same type in
the linked list. With the shared_ptr, the extension will be
deleted automatically. That is, the caller should allocate
the extension and add into the packet.

Change-Id: I54729536a305c91c751d5fb059bd2f9a3db05523
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/62892
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Giacomo Travaglini <giacomo.travaglini@arm.com>
Maintainer: Giacomo Travaglini <giacomo.travaglini@arm.com>
---
 src/base/SConscript         |   1 +
 src/base/extensible.hh      | 187 ++++++++++++++++++++++++++++++++++++
 src/base/extensible.test.cc | 111 +++++++++++++++++++++
 3 files changed, 299 insertions(+)
 create mode 100644 src/base/extensible.hh
 create mode 100644 src/base/extensible.test.cc

diff --git a/src/base/SConscript b/src/base/SConscript
index 4a6b65fa72..29f106a0b2 100644
--- a/src/base/SConscript
+++ b/src/base/SConscript
@@ -90,6 +90,7 @@ GTest('bitunion.test', 'bitunion.test.cc')
 GTest('channel_addr.test', 'channel_addr.test.cc', 'channel_addr.cc')
 GTest('circlebuf.test', 'circlebuf.test.cc')
 GTest('circular_queue.test', 'circular_queue.test.cc')
+GTest('extensible.test', 'extensible.test.cc')
 GTest('sat_counter.test', 'sat_counter.test.cc')
 GTest('refcnt.test','refcnt.test.cc')
 GTest('condcodes.test', 'condcodes.test.cc')
diff --git a/src/base/extensible.hh b/src/base/extensible.hh
new file mode 100644
index 0000000000..eb79c71be3
--- /dev/null
+++ b/src/base/extensible.hh
@@ -0,0 +1,187 @@
+/*
+ * Copyright 2023 Google, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* @file
+ * Extensible Object Base Class Declaration
+ */
+
+#ifndef __BASE_EXTENSIBLE_HH__
+#define __BASE_EXTENSIBLE_HH__
+
+#include <list>
+#include <memory>
+#include <utility>
+
+namespace gem5
+{
+
+/**
+ * This is base of every extension.
+ */
+class ExtensionBase
+{
+  public:
+    explicit ExtensionBase(const unsigned int id)
+        : extID(id) {}
+
+    virtual ~ExtensionBase() = default;
+
+    virtual std::unique_ptr<ExtensionBase> clone() const = 0;
+
+    static unsigned int
+    maxNumExtensions()
+    {
+        static unsigned int max_num = 0;
+        return ++max_num;
+    }
+
+    unsigned int getExtensionID() const { return extID; }
+
+  private:
+    const unsigned int extID;
+};
+
+/**
+ * This is the extension for carrying additional information.
+ * Each type of extension will have a unique extensionID.
+ * This extensionID will assign to base class for comparsion.
+ */
+template <typename Target, typename T>
+class Extension : public ExtensionBase
+{
+  public:
+    Extension() : ExtensionBase(extensionID) {}
+
+    const static unsigned int extensionID;
+};
+
+template <typename Target, typename T>
+const unsigned int Extension<Target, T>::extensionID =
+        ExtensionBase::maxNumExtensions() - 1;
+
+template <typename Target>
+class Extensible
+{
+  public:
+     Extensible() = default;
+     Extensible(const Extensible& other)
+     {
+        // Clone every extension from other.
+        for (auto& ext : other.extensions) {
+            extensions.emplace_back(ext->clone());
+        }
+     }
+     virtual ~Extensible() = default;
+
+    /**
+     * Set a new extension to the packet and replace the old one, if there
+     * already exists the same type of extension in this packet. This new
+     * extension will be deleted automatically with the shared_ptr<>.
+     *
+     * @param ext Extension to set
+     */
+    template <typename T>
+    void
+    setExtension(std::shared_ptr<T> ext)
+    {
+        static_assert(std::is_base_of<ExtensionBase, T>::value,
+                      "Extension should inherit from ExtensionBase.");
+        assert(ext.get() != nullptr);
+
+        auto it = findExtension<T>();
+
+        if (it != extensions.end()) {
+            // There exists the same type of extension in the list.
+            // Replace it to the new one.
+            *it = std::move(ext);
+        } else {
+            // Add ext into the linked list.
+            extensions.emplace_back(std::move(ext));
+        }
+    }
+
+    /**
+     * Remove the extension based on its type.
+     *
+     * @param ext Extension to remove
+     */
+    template <typename T>
+    void
+    removeExtension(void)
+    {
+        static_assert(std::is_base_of<ExtensionBase, T>::value,
+                      "Extension should inherit from ExtensionBase.");
+
+        auto it = findExtension<T>();
+        if (it != extensions.end())
+            extensions.erase(it);
+    }
+
+    /**
+     * Get the extension pointer by linear search with the extensionID.
+     */
+    template <typename T>
+    std::shared_ptr<T>
+    getExtension()
+    {
+        static_assert(std::is_base_of<ExtensionBase, T>::value,
+                      "Extension should inherit from ExtensionBase.");
+        auto it = findExtension<T>();
+        if (it == extensions.end())
+            return nullptr;
+        return std::static_pointer_cast<T>(*it);
+    }
+
+  protected:
+
+    /**
+     * Go through the extension list and return the iterator to the instance of
+     * the type of extension. If there is no such an extension, return the end
+     * iterator of the list.
+     *
+     *  @return The iterator to the extension type T if there exists.
+     */
+    template <typename T>
+    std::list<std::shared_ptr<ExtensionBase>>::iterator
+    findExtension()
+    {
+        auto it = extensions.begin();
+        while (it != extensions.end()) {
+            if ((*it)->getExtensionID() == T::extensionID)
+                break;
+            it++;
+        }
+        return it;
+    }
+
+    // Linked list of extensions.
+    std::list<std::shared_ptr<ExtensionBase>> extensions;
+};
+
+} // namespace gem5
+
+#endif //__BASE_EXTENSIBLE_HH__
diff --git a/src/base/extensible.test.cc b/src/base/extensible.test.cc
new file mode 100644
index 0000000000..66cbbda527
--- /dev/null
+++ b/src/base/extensible.test.cc
@@ -0,0 +1,111 @@
+/*
+ * Copyright 2023 Google, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <gtest/gtest.h>
+
+#include <memory>
+
+#include "base/extensible.hh"
+
+using namespace gem5;
+
+namespace {
+
+class TestTarget : public Extensible<TestTarget>
+{
+};
+
+class IntegerExtension : public Extension<TestTarget, IntegerExtension>
+{
+  public:
+    explicit IntegerExtension(uint32_t data)
+        : data_(data) {}
+
+    std::unique_ptr<gem5::ExtensionBase> clone() const override
+    {
+        return std::unique_ptr<IntegerExtension>(new IntegerExtension(data_));
+    }
+
+    uint32_t getData() const { return data_; }
+
+  private:
+    uint32_t data_;
+};
+
+class BoolExtension : public Extension<TestTarget, BoolExtension>
+{
+  public:
+    explicit BoolExtension(bool data)
+        : data_(data) {}
+
+    std::unique_ptr<gem5::ExtensionBase> clone() const override
+    {
+        return std::unique_ptr<BoolExtension>(new BoolExtension(data_));
+    }
+
+    bool getData() const { return data_; }
+
+  private:
+    bool data_;
+};
+
+} // namespace
+
+TEST(ExtensibleTest, ExtensionID)
+{
+    std::shared_ptr<IntegerExtension> ext1(new IntegerExtension(0xabcd));
+    EXPECT_EQ(0, ext1->getExtensionID());
+
+    std::shared_ptr<BoolExtension> ext2(new BoolExtension(true));
+    EXPECT_EQ(1, ext2->getExtensionID());
+}
+
+TEST(ExtensibleTest, SetAndRemoveExtension)
+{
+    const uint32_t data = 0xbeef;
+    std::shared_ptr<IntegerExtension> ext(new IntegerExtension(data));
+    std::unique_ptr<TestTarget> target(new TestTarget);
+    target->setExtension(ext);
+    EXPECT_EQ(data, target->getExtension<IntegerExtension>()->getData());
+
+    target->removeExtension<IntegerExtension>();
+    EXPECT_EQ(nullptr, target->getExtension<IntegerExtension>());
+}
+
+TEST(ExtensibleTest, ReplaceExtension)
+{
+    const uint32_t data = 0xbeef;
+    std::shared_ptr<IntegerExtension> ext(new IntegerExtension(data));
+    std::unique_ptr<TestTarget> target(new TestTarget);
+    target->setExtension(ext);
+    EXPECT_EQ(data, target->getExtension<IntegerExtension>()->getData());
+
+    const uint32_t new_data = 0xa5a5;
+    std::shared_ptr<IntegerExtension> new_ext(new IntegerExtension(new_data));
+    target->setExtension(new_ext);
+    EXPECT_EQ(new_data, target->getExtension<IntegerExtension>()->getData());
+}

From df0bed6858a4b78c1148337695a07e2aeb4125af Mon Sep 17 00:00:00 2001
From: Nikos Nikoleris <nikos.nikoleris@arm.com>
Date: Thu, 9 Feb 2023 09:33:17 +0000
Subject: [PATCH 197/492] python: Ensure that m5.internal.params is available

Add an import to m5.internal.params which became necessary after:

95f9017c2e configs,python: Clean some cruft out of m5.objects.

This import is necessary but also causes problems when scons calls
build_tools/sim_object_param_struct_hh.py to generate
params/SimObject.hh. m5.internal.params itself imports _m5 and _m5 is
unavalailable resulting in an ImportError. This is bening and we can
safely ignore it.

Change-Id: I3809e81284e730fb9c9e0e7e91bd61b801d73f90
Signed-off-by: Nikos Nikoleris <nikos.nikoleris@arm.com>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67797
Maintainer: Giacomo Travaglini <giacomo.travaglini@arm.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Giacomo Travaglini <giacomo.travaglini@arm.com>
---
 src/python/m5/SimObject.py       |  3 +++
 src/python/m5/internal/params.py | 17 +++++++++++++----
 2 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/src/python/m5/SimObject.py b/src/python/m5/SimObject.py
index b5dfca9752..6caa532897 100644
--- a/src/python/m5/SimObject.py
+++ b/src/python/m5/SimObject.py
@@ -445,6 +445,9 @@ class MetaSimObject(type):
         return cls.__name__
 
     def getCCClass(cls):
+        # Ensure that m5.internal.params is available.
+        import m5.internal.params
+
         return getattr(m5.internal.params, cls.pybind_class)
 
     # See ParamValue.cxx_predecls for description.
diff --git a/src/python/m5/internal/params.py b/src/python/m5/internal/params.py
index 8762a69e61..8225d0b059 100644
--- a/src/python/m5/internal/params.py
+++ b/src/python/m5/internal/params.py
@@ -37,8 +37,17 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 import inspect
-import _m5
 
-for name, module in inspect.getmembers(_m5):
-    if name.startswith("param_") or name.startswith("enum_"):
-        exec("from _m5.%s import *" % name)
+try:
+    # Avoid ImportErrors at build time when _m5 is not available
+    import _m5
+
+    in_gem5 = True
+except ImportError:
+    # The import failed, we're being called from the build system
+    in_gem5 = False
+
+if in_gem5:
+    for name, module in inspect.getmembers(_m5):
+        if name.startswith("param_") or name.startswith("enum_"):
+            exec("from _m5.%s import *" % name)

From c913c098a6a6d5afb9dc3911f2fc953510fe328a Mon Sep 17 00:00:00 2001
From: Yan Lee <yanlee@google.com>
Date: Tue, 14 Feb 2023 19:50:10 -0800
Subject: [PATCH 198/492] mem: add extension mechanism into Packet

Change-Id: Ieda941f73078d98ad7896a376d95dd1573c938e6
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67957
Reviewed-by: Yu-hsin Wang <yuhsingw@google.com>
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/mem/packet.hh | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/mem/packet.hh b/src/mem/packet.hh
index 9d720fb9a0..ed7a94f4fb 100644
--- a/src/mem/packet.hh
+++ b/src/mem/packet.hh
@@ -55,6 +55,7 @@
 #include "base/addr_range.hh"
 #include "base/cast.hh"
 #include "base/compiler.hh"
+#include "base/extensible.hh"
 #include "base/flags.hh"
 #include "base/logging.hh"
 #include "base/printable.hh"
@@ -290,7 +291,7 @@ class MemCmd
  * ultimate destination and back, possibly being conveyed by several
  * different Packets along the way.)
  */
-class Packet : public Printable
+class Packet : public Printable, public Extensible<Packet>
 {
   public:
     typedef uint32_t FlagsType;
@@ -941,7 +942,8 @@ class Packet : public Printable
      * packet should allocate its own data.
      */
     Packet(const PacketPtr pkt, bool clear_flags, bool alloc_data)
-        :  cmd(pkt->cmd), id(pkt->id), req(pkt->req),
+        :  Extensible<Packet>(*pkt),
+           cmd(pkt->cmd), id(pkt->id), req(pkt->req),
            data(nullptr),
            addr(pkt->addr), _isSecure(pkt->_isSecure), size(pkt->size),
            bytesValid(pkt->bytesValid),

From 4c9253761ff6ea4bbd4b207d6dca79eedcab903e Mon Sep 17 00:00:00 2001
From: Yan Lee <yanlee@google.com>
Date: Tue, 14 Feb 2023 19:50:49 -0800
Subject: [PATCH 199/492] mem: add extension mechanism into Request

Change-Id: Ie144e0cf243bab6d9ddbea79caf559c7e774a787
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67958
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Reviewed-by: Yu-hsin Wang <yuhsingw@google.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
---
 src/mem/request.hh | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/mem/request.hh b/src/mem/request.hh
index be91c71cc0..491aad0241 100644
--- a/src/mem/request.hh
+++ b/src/mem/request.hh
@@ -58,6 +58,7 @@
 
 #include "base/amo.hh"
 #include "base/compiler.hh"
+#include "base/extensible.hh"
 #include "base/flags.hh"
 #include "base/types.hh"
 #include "cpu/inst_seq.hh"
@@ -93,7 +94,7 @@ class ThreadContext;
 typedef std::shared_ptr<Request> RequestPtr;
 typedef uint16_t RequestorID;
 
-class Request
+class Request : public Extensible<Request>
 {
   public:
     typedef uint64_t FlagsType;
@@ -501,7 +502,8 @@ class Request
     }
 
     Request(const Request& other)
-        : _paddr(other._paddr), _size(other._size),
+        : Extensible<Request>(other),
+          _paddr(other._paddr), _size(other._size),
           _byteEnable(other._byteEnable),
           _requestorId(other._requestorId),
           _flags(other._flags),

From 4dfc312d6df91129a4007e588678d076073d390d Mon Sep 17 00:00:00 2001
From: Yan Lee <yanlee@google.com>
Date: Thu, 16 Feb 2023 20:56:19 -0800
Subject: [PATCH 200/492] base: extensible: add example codes of extension

Change-Id: Iaab1f2998a3f621b86d63bed7274373ba433d71c
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/68017
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/base/extensible.hh | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)

diff --git a/src/base/extensible.hh b/src/base/extensible.hh
index eb79c71be3..e80103c577 100644
--- a/src/base/extensible.hh
+++ b/src/base/extensible.hh
@@ -27,6 +27,10 @@
 
 /* @file
  * Extensible Object Base Class Declaration
+ *
+ * This class can be used to add an "extension" field to packet/request which
+ * will be passed along with the original packet/request pointer. This allows
+ * developers to extend packet/request without modifying the original class.
  */
 
 #ifndef __BASE_EXTENSIBLE_HH__
@@ -69,6 +73,33 @@ class ExtensionBase
  * This is the extension for carrying additional information.
  * Each type of extension will have a unique extensionID.
  * This extensionID will assign to base class for comparsion.
+ *
+ * Example usage:
+ *
+ *   class MyTarget : Extensible<MyTarget> {};
+ *
+ *   class MyExtension : public Extension<MyTarget, MyExtension>
+ *   {
+ *     public:
+ *       MyExtension();
+ *       std::unique_ptr<ExtensionBase> clone() const override;
+ *       uint32_t getData();
+ *
+ *     private:
+ *       uint32_t data_;;
+ *   };
+ *
+ *   std::unique_ptr<MyTarget> mytarget(new MyTarget);
+ *   std::shared_ptr<MyExtension> myext(new MyExtension);
+ *   mytarget->setExtension(myext);
+ *
+ *   std::shared_ptr<MyExtension> ext = mytarget->getExtension<MyExtension>();
+ *   uint32_t data = ext->getData();
+ *   mytarget->removeExtension<MyExtension>();
+ *
+ *   In the example above, MyTarget can carry an extension named MyExtension,
+ *   which contains an additional data field. This could be applicated to any
+ *   debug information or any data field in any protocol.
  */
 template <typename Target, typename T>
 class Extension : public ExtensionBase

From c995d969568be1890717093a0722bf3cd77b8207 Mon Sep 17 00:00:00 2001
From: Ivan Turasov <turasov.ivan@gmail.com>
Date: Tue, 21 Feb 2023 14:46:02 +0100
Subject: [PATCH 201/492] arch-arm: Add missing <array> header in regs/misc.hh

Adding the header avoids "error: implicit instantiation of undefined template 'std::array..."
error that halted the build process on macOS.

Relevant discussion on Slack with Gabriel Bunsot

Change-Id: I935d7045f4b2c01ecef7c663de7c1e9408eead57
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/68217
Maintainer: Giacomo Travaglini <giacomo.travaglini@arm.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Giacomo Travaglini <giacomo.travaglini@arm.com>
---
 src/arch/arm/regs/misc.hh | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/arch/arm/regs/misc.hh b/src/arch/arm/regs/misc.hh
index 999993b0de..bf25ea3144 100644
--- a/src/arch/arm/regs/misc.hh
+++ b/src/arch/arm/regs/misc.hh
@@ -41,6 +41,7 @@
 #ifndef __ARCH_ARM_REGS_MISC_HH__
 #define __ARCH_ARM_REGS_MISC_HH__
 
+#include <array>
 #include <bitset>
 #include <tuple>
 

From 3892ee029aa8814284cbdca352862ccc5ead0e49 Mon Sep 17 00:00:00 2001
From: "Bobby R. Bruce" <bbruce@ucdavis.edu>
Date: Sun, 19 Feb 2023 23:49:28 +0000
Subject: [PATCH 202/492] configs: Deprecate fs.py and se.py scripts

Ideally, 'configs/common' should also be deprecated, but some tests still
depend on this directory.

Change-Id: I7c0cbf1f854e1dec9308b6802d6fb70c9af97fc0
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/68157
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Giacomo Travaglini <giacomo.travaglini@arm.com>
Reviewed-by: Daniel Carvalho <odanrc@yahoo.com.br>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
---
 configs/deprecated/example/fs.py | 444 +++++++++++++++++++++++++++++++
 configs/deprecated/example/se.py | 293 ++++++++++++++++++++
 configs/example/fs.py            | 420 +----------------------------
 configs/example/se.py            | 267 +------------------
 4 files changed, 750 insertions(+), 674 deletions(-)
 create mode 100644 configs/deprecated/example/fs.py
 create mode 100644 configs/deprecated/example/se.py

diff --git a/configs/deprecated/example/fs.py b/configs/deprecated/example/fs.py
new file mode 100644
index 0000000000..59c35925fc
--- /dev/null
+++ b/configs/deprecated/example/fs.py
@@ -0,0 +1,444 @@
+# Copyright (c) 2010-2013, 2016, 2019-2020 ARM Limited
+# Copyright (c) 2020 Barkhausen Institut
+# All rights reserved.
+#
+# The license below extends only to copyright in the software and shall
+# not be construed as granting a license to any other intellectual
+# property including but not limited to intellectual property relating
+# to a hardware implementation of the functionality of the software
+# licensed hereunder.  You may use the software subject to the license
+# terms below provided that you ensure that this notice is replicated
+# unmodified and in its entirety in all distributions of the software,
+# modified or unmodified, in source code or in binary form.
+#
+# Copyright (c) 2012-2014 Mark D. Hill and David A. Wood
+# Copyright (c) 2009-2011 Advanced Micro Devices, Inc.
+# Copyright (c) 2006-2007 The Regents of The University of Michigan
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import argparse
+import sys
+
+import m5
+from m5.defines import buildEnv
+from m5.objects import *
+from m5.util import addToPath, fatal, warn
+from m5.util.fdthelper import *
+from gem5.isas import ISA
+from gem5.runtime import get_runtime_isa
+
+addToPath("../../")
+
+from ruby import Ruby
+
+from common.FSConfig import *
+from common.SysPaths import *
+from common.Benchmarks import *
+from common import Simulation
+from common import CacheConfig
+from common import CpuConfig
+from common import MemConfig
+from common import ObjectList
+from common.Caches import *
+from common import Options
+
+
+def cmd_line_template():
+    if args.command_line and args.command_line_file:
+        print(
+            "Error: --command-line and --command-line-file are "
+            "mutually exclusive"
+        )
+        sys.exit(1)
+    if args.command_line:
+        return args.command_line
+    if args.command_line_file:
+        return open(args.command_line_file).read().strip()
+    return None
+
+
+def build_test_system(np):
+    cmdline = cmd_line_template()
+    isa = get_runtime_isa()
+    if isa == ISA.MIPS:
+        test_sys = makeLinuxMipsSystem(test_mem_mode, bm[0], cmdline=cmdline)
+    elif isa == ISA.SPARC:
+        test_sys = makeSparcSystem(test_mem_mode, bm[0], cmdline=cmdline)
+    elif isa == ISA.RISCV:
+        test_sys = makeBareMetalRiscvSystem(
+            test_mem_mode, bm[0], cmdline=cmdline
+        )
+    elif isa == ISA.X86:
+        test_sys = makeLinuxX86System(
+            test_mem_mode, np, bm[0], args.ruby, cmdline=cmdline
+        )
+    elif isa == ISA.ARM:
+        test_sys = makeArmSystem(
+            test_mem_mode,
+            args.machine_type,
+            np,
+            bm[0],
+            args.dtb_filename,
+            bare_metal=args.bare_metal,
+            cmdline=cmdline,
+            external_memory=args.external_memory_system,
+            ruby=args.ruby,
+            vio_9p=args.vio_9p,
+            bootloader=args.bootloader,
+        )
+        if args.enable_context_switch_stats_dump:
+            test_sys.enable_context_switch_stats_dump = True
+    else:
+        fatal("Incapable of building %s full system!", isa.name)
+
+    # Set the cache line size for the entire system
+    test_sys.cache_line_size = args.cacheline_size
+
+    # Create a top-level voltage domain
+    test_sys.voltage_domain = VoltageDomain(voltage=args.sys_voltage)
+
+    # Create a source clock for the system and set the clock period
+    test_sys.clk_domain = SrcClockDomain(
+        clock=args.sys_clock, voltage_domain=test_sys.voltage_domain
+    )
+
+    # Create a CPU voltage domain
+    test_sys.cpu_voltage_domain = VoltageDomain()
+
+    # Create a source clock for the CPUs and set the clock period
+    test_sys.cpu_clk_domain = SrcClockDomain(
+        clock=args.cpu_clock, voltage_domain=test_sys.cpu_voltage_domain
+    )
+
+    if buildEnv["USE_RISCV_ISA"]:
+        test_sys.workload.bootloader = args.kernel
+    elif args.kernel is not None:
+        test_sys.workload.object_file = binary(args.kernel)
+
+    if args.script is not None:
+        test_sys.readfile = args.script
+
+    test_sys.init_param = args.init_param
+
+    # For now, assign all the CPUs to the same clock domain
+    test_sys.cpu = [
+        TestCPUClass(clk_domain=test_sys.cpu_clk_domain, cpu_id=i)
+        for i in range(np)
+    ]
+
+    if args.ruby:
+        bootmem = getattr(test_sys, "_bootmem", None)
+        Ruby.create_system(
+            args, True, test_sys, test_sys.iobus, test_sys._dma_ports, bootmem
+        )
+
+        # Create a seperate clock domain for Ruby
+        test_sys.ruby.clk_domain = SrcClockDomain(
+            clock=args.ruby_clock, voltage_domain=test_sys.voltage_domain
+        )
+
+        # Connect the ruby io port to the PIO bus,
+        # assuming that there is just one such port.
+        test_sys.iobus.mem_side_ports = test_sys.ruby._io_port.in_ports
+
+        for (i, cpu) in enumerate(test_sys.cpu):
+            #
+            # Tie the cpu ports to the correct ruby system ports
+            #
+            cpu.clk_domain = test_sys.cpu_clk_domain
+            cpu.createThreads()
+            cpu.createInterruptController()
+
+            test_sys.ruby._cpu_ports[i].connectCpuPorts(cpu)
+
+    else:
+        if args.caches or args.l2cache:
+            # By default the IOCache runs at the system clock
+            test_sys.iocache = IOCache(addr_ranges=test_sys.mem_ranges)
+            test_sys.iocache.cpu_side = test_sys.iobus.mem_side_ports
+            test_sys.iocache.mem_side = test_sys.membus.cpu_side_ports
+        elif not args.external_memory_system:
+            test_sys.iobridge = Bridge(
+                delay="50ns", ranges=test_sys.mem_ranges
+            )
+            test_sys.iobridge.cpu_side_port = test_sys.iobus.mem_side_ports
+            test_sys.iobridge.mem_side_port = test_sys.membus.cpu_side_ports
+
+        # Sanity check
+        if args.simpoint_profile:
+            if not ObjectList.is_noncaching_cpu(TestCPUClass):
+                fatal("SimPoint generation should be done with atomic cpu")
+            if np > 1:
+                fatal(
+                    "SimPoint generation not supported with more than one CPUs"
+                )
+
+        for i in range(np):
+            if args.simpoint_profile:
+                test_sys.cpu[i].addSimPointProbe(args.simpoint_interval)
+            if args.checker:
+                test_sys.cpu[i].addCheckerCpu()
+            if not ObjectList.is_kvm_cpu(TestCPUClass):
+                if args.bp_type:
+                    bpClass = ObjectList.bp_list.get(args.bp_type)
+                    test_sys.cpu[i].branchPred = bpClass()
+                if args.indirect_bp_type:
+                    IndirectBPClass = ObjectList.indirect_bp_list.get(
+                        args.indirect_bp_type
+                    )
+                    test_sys.cpu[
+                        i
+                    ].branchPred.indirectBranchPred = IndirectBPClass()
+            test_sys.cpu[i].createThreads()
+
+        # If elastic tracing is enabled when not restoring from checkpoint and
+        # when not fast forwarding using the atomic cpu, then check that the
+        # TestCPUClass is DerivO3CPU or inherits from DerivO3CPU. If the check
+        # passes then attach the elastic trace probe.
+        # If restoring from checkpoint or fast forwarding, the code that does this for
+        # FutureCPUClass is in the Simulation module. If the check passes then the
+        # elastic trace probe is attached to the switch CPUs.
+        if (
+            args.elastic_trace_en
+            and args.checkpoint_restore == None
+            and not args.fast_forward
+        ):
+            CpuConfig.config_etrace(TestCPUClass, test_sys.cpu, args)
+
+        CacheConfig.config_cache(args, test_sys)
+
+        MemConfig.config_mem(args, test_sys)
+
+    if ObjectList.is_kvm_cpu(TestCPUClass) or ObjectList.is_kvm_cpu(
+        FutureClass
+    ):
+        # Assign KVM CPUs to their own event queues / threads. This
+        # has to be done after creating caches and other child objects
+        # since these mustn't inherit the CPU event queue.
+        for i, cpu in enumerate(test_sys.cpu):
+            # Child objects usually inherit the parent's event
+            # queue. Override that and use the same event queue for
+            # all devices.
+            for obj in cpu.descendants():
+                obj.eventq_index = 0
+            cpu.eventq_index = i + 1
+        test_sys.kvm_vm = KvmVM()
+
+    return test_sys
+
+
+def build_drive_system(np):
+    # driver system CPU is always simple, so is the memory
+    # Note this is an assignment of a class, not an instance.
+    DriveCPUClass = AtomicSimpleCPU
+    drive_mem_mode = "atomic"
+    DriveMemClass = SimpleMemory
+
+    cmdline = cmd_line_template()
+    if buildEnv["USE_MIPS_ISA"]:
+        drive_sys = makeLinuxMipsSystem(drive_mem_mode, bm[1], cmdline=cmdline)
+    elif buildEnv["USE_SPARC_ISA"]:
+        drive_sys = makeSparcSystem(drive_mem_mode, bm[1], cmdline=cmdline)
+    elif buildEnv["USE_X86_ISA"]:
+        drive_sys = makeLinuxX86System(
+            drive_mem_mode, np, bm[1], cmdline=cmdline
+        )
+    elif buildEnv["USE_ARM_ISA"]:
+        drive_sys = makeArmSystem(
+            drive_mem_mode,
+            args.machine_type,
+            np,
+            bm[1],
+            args.dtb_filename,
+            cmdline=cmdline,
+        )
+
+    # Create a top-level voltage domain
+    drive_sys.voltage_domain = VoltageDomain(voltage=args.sys_voltage)
+
+    # Create a source clock for the system and set the clock period
+    drive_sys.clk_domain = SrcClockDomain(
+        clock=args.sys_clock, voltage_domain=drive_sys.voltage_domain
+    )
+
+    # Create a CPU voltage domain
+    drive_sys.cpu_voltage_domain = VoltageDomain()
+
+    # Create a source clock for the CPUs and set the clock period
+    drive_sys.cpu_clk_domain = SrcClockDomain(
+        clock=args.cpu_clock, voltage_domain=drive_sys.cpu_voltage_domain
+    )
+
+    drive_sys.cpu = DriveCPUClass(
+        clk_domain=drive_sys.cpu_clk_domain, cpu_id=0
+    )
+    drive_sys.cpu.createThreads()
+    drive_sys.cpu.createInterruptController()
+    drive_sys.cpu.connectBus(drive_sys.membus)
+    if args.kernel is not None:
+        drive_sys.workload.object_file = binary(args.kernel)
+
+    if ObjectList.is_kvm_cpu(DriveCPUClass):
+        drive_sys.kvm_vm = KvmVM()
+
+    drive_sys.iobridge = Bridge(delay="50ns", ranges=drive_sys.mem_ranges)
+    drive_sys.iobridge.cpu_side_port = drive_sys.iobus.mem_side_ports
+    drive_sys.iobridge.mem_side_port = drive_sys.membus.cpu_side_ports
+
+    # Create the appropriate memory controllers and connect them to the
+    # memory bus
+    drive_sys.mem_ctrls = [
+        DriveMemClass(range=r) for r in drive_sys.mem_ranges
+    ]
+    for i in range(len(drive_sys.mem_ctrls)):
+        drive_sys.mem_ctrls[i].port = drive_sys.membus.mem_side_ports
+
+    drive_sys.init_param = args.init_param
+
+    return drive_sys
+
+
+warn(
+    "The fs.py script is deprecated. It will be removed in future releases of "
+    " gem5."
+)
+
+# Add args
+parser = argparse.ArgumentParser()
+Options.addCommonOptions(parser)
+Options.addFSOptions(parser)
+
+# Add the ruby specific and protocol specific args
+if "--ruby" in sys.argv:
+    Ruby.define_options(parser)
+
+args = parser.parse_args()
+
+# system under test can be any CPU
+(TestCPUClass, test_mem_mode, FutureClass) = Simulation.setCPUClass(args)
+
+# Match the memories with the CPUs, based on the options for the test system
+TestMemClass = Simulation.setMemClass(args)
+
+if args.benchmark:
+    try:
+        bm = Benchmarks[args.benchmark]
+    except KeyError:
+        print("Error benchmark %s has not been defined." % args.benchmark)
+        print("Valid benchmarks are: %s" % DefinedBenchmarks)
+        sys.exit(1)
+else:
+    if args.dual:
+        bm = [
+            SysConfig(
+                disks=args.disk_image,
+                rootdev=args.root_device,
+                mem=args.mem_size,
+                os_type=args.os_type,
+            ),
+            SysConfig(
+                disks=args.disk_image,
+                rootdev=args.root_device,
+                mem=args.mem_size,
+                os_type=args.os_type,
+            ),
+        ]
+    else:
+        bm = [
+            SysConfig(
+                disks=args.disk_image,
+                rootdev=args.root_device,
+                mem=args.mem_size,
+                os_type=args.os_type,
+            )
+        ]
+
+np = args.num_cpus
+
+test_sys = build_test_system(np)
+
+if len(bm) == 2:
+    drive_sys = build_drive_system(np)
+    root = makeDualRoot(True, test_sys, drive_sys, args.etherdump)
+elif len(bm) == 1 and args.dist:
+    # This system is part of a dist-gem5 simulation
+    root = makeDistRoot(
+        test_sys,
+        args.dist_rank,
+        args.dist_size,
+        args.dist_server_name,
+        args.dist_server_port,
+        args.dist_sync_repeat,
+        args.dist_sync_start,
+        args.ethernet_linkspeed,
+        args.ethernet_linkdelay,
+        args.etherdump,
+    )
+elif len(bm) == 1:
+    root = Root(full_system=True, system=test_sys)
+else:
+    print("Error I don't know how to create more than 2 systems.")
+    sys.exit(1)
+
+if ObjectList.is_kvm_cpu(TestCPUClass) or ObjectList.is_kvm_cpu(FutureClass):
+    # Required for running kvm on multiple host cores.
+    # Uses gem5's parallel event queue feature
+    # Note: The simulator is quite picky about this number!
+    root.sim_quantum = int(1e9)  # 1 ms
+
+if args.timesync:
+    root.time_sync_enable = True
+
+if args.frame_capture:
+    VncServer.frame_capture = True
+
+if buildEnv["USE_ARM_ISA"] and not args.bare_metal and not args.dtb_filename:
+    if args.machine_type not in [
+        "VExpress_GEM5",
+        "VExpress_GEM5_V1",
+        "VExpress_GEM5_V2",
+        "VExpress_GEM5_Foundation",
+    ]:
+        warn(
+            "Can only correctly generate a dtb for VExpress_GEM5_* "
+            "platforms, unless custom hardware models have been equipped "
+            "with generation functionality."
+        )
+
+    # Generate a Device Tree
+    for sysname in ("system", "testsys", "drivesys"):
+        if hasattr(root, sysname):
+            sys = getattr(root, sysname)
+            sys.workload.dtb_filename = os.path.join(
+                m5.options.outdir, "%s.dtb" % sysname
+            )
+            sys.generateDtb(sys.workload.dtb_filename)
+
+if args.wait_gdb:
+    test_sys.workload.wait_for_remote_gdb = True
+
+Simulation.setWorkCountOptions(test_sys, args)
+Simulation.run(args, root, test_sys, FutureClass)
diff --git a/configs/deprecated/example/se.py b/configs/deprecated/example/se.py
new file mode 100644
index 0000000000..4732839874
--- /dev/null
+++ b/configs/deprecated/example/se.py
@@ -0,0 +1,293 @@
+# Copyright (c) 2012-2013 ARM Limited
+# All rights reserved.
+#
+# The license below extends only to copyright in the software and shall
+# not be construed as granting a license to any other intellectual
+# property including but not limited to intellectual property relating
+# to a hardware implementation of the functionality of the software
+# licensed hereunder.  You may use the software subject to the license
+# terms below provided that you ensure that this notice is replicated
+# unmodified and in its entirety in all distributions of the software,
+# modified or unmodified, in source code or in binary form.
+#
+# Copyright (c) 2006-2008 The Regents of The University of Michigan
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# Simple test script
+#
+# "m5 test.py"
+
+import argparse
+import sys
+import os
+
+import m5
+from m5.defines import buildEnv
+from m5.objects import *
+from m5.params import NULL
+from m5.util import addToPath, fatal, warn
+from gem5.isas import ISA
+from gem5.runtime import get_runtime_isa
+
+addToPath("../../")
+
+from ruby import Ruby
+
+from common import Options
+from common import Simulation
+from common import CacheConfig
+from common import CpuConfig
+from common import ObjectList
+from common import MemConfig
+from common.FileSystemConfig import config_filesystem
+from common.Caches import *
+from common.cpu2000 import *
+
+
+def get_processes(args):
+    """Interprets provided args and returns a list of processes"""
+
+    multiprocesses = []
+    inputs = []
+    outputs = []
+    errouts = []
+    pargs = []
+
+    workloads = args.cmd.split(";")
+    if args.input != "":
+        inputs = args.input.split(";")
+    if args.output != "":
+        outputs = args.output.split(";")
+    if args.errout != "":
+        errouts = args.errout.split(";")
+    if args.options != "":
+        pargs = args.options.split(";")
+
+    idx = 0
+    for wrkld in workloads:
+        process = Process(pid=100 + idx)
+        process.executable = wrkld
+        process.cwd = os.getcwd()
+        process.gid = os.getgid()
+
+        if args.env:
+            with open(args.env, "r") as f:
+                process.env = [line.rstrip() for line in f]
+
+        if len(pargs) > idx:
+            process.cmd = [wrkld] + pargs[idx].split()
+        else:
+            process.cmd = [wrkld]
+
+        if len(inputs) > idx:
+            process.input = inputs[idx]
+        if len(outputs) > idx:
+            process.output = outputs[idx]
+        if len(errouts) > idx:
+            process.errout = errouts[idx]
+
+        multiprocesses.append(process)
+        idx += 1
+
+    if args.smt:
+        assert args.cpu_type == "DerivO3CPU"
+        return multiprocesses, idx
+    else:
+        return multiprocesses, 1
+
+
+warn(
+    "The se.py script is deprecated. It will be removed in future releases of "
+    " gem5."
+)
+
+parser = argparse.ArgumentParser()
+Options.addCommonOptions(parser)
+Options.addSEOptions(parser)
+
+if "--ruby" in sys.argv:
+    Ruby.define_options(parser)
+
+args = parser.parse_args()
+
+multiprocesses = []
+numThreads = 1
+
+if args.bench:
+    apps = args.bench.split("-")
+    if len(apps) != args.num_cpus:
+        print("number of benchmarks not equal to set num_cpus!")
+        sys.exit(1)
+
+    for app in apps:
+        try:
+            if get_runtime_isa() == ISA.ARM:
+                exec(
+                    "workload = %s('arm_%s', 'linux', '%s')"
+                    % (app, args.arm_iset, args.spec_input)
+                )
+            else:
+                # TARGET_ISA has been removed, but this is missing a ], so it
+                # has incorrect syntax and wasn't being used anyway.
+                exec(
+                    "workload = %s(buildEnv['TARGET_ISA', 'linux', '%s')"
+                    % (app, args.spec_input)
+                )
+            multiprocesses.append(workload.makeProcess())
+        except:
+            print(
+                "Unable to find workload for %s: %s"
+                % (get_runtime_isa().name(), app),
+                file=sys.stderr,
+            )
+            sys.exit(1)
+elif args.cmd:
+    multiprocesses, numThreads = get_processes(args)
+else:
+    print("No workload specified. Exiting!\n", file=sys.stderr)
+    sys.exit(1)
+
+
+(CPUClass, test_mem_mode, FutureClass) = Simulation.setCPUClass(args)
+CPUClass.numThreads = numThreads
+
+# Check -- do not allow SMT with multiple CPUs
+if args.smt and args.num_cpus > 1:
+    fatal("You cannot use SMT with multiple CPUs!")
+
+np = args.num_cpus
+mp0_path = multiprocesses[0].executable
+system = System(
+    cpu=[CPUClass(cpu_id=i) for i in range(np)],
+    mem_mode=test_mem_mode,
+    mem_ranges=[AddrRange(args.mem_size)],
+    cache_line_size=args.cacheline_size,
+)
+
+if numThreads > 1:
+    system.multi_thread = True
+
+# Create a top-level voltage domain
+system.voltage_domain = VoltageDomain(voltage=args.sys_voltage)
+
+# Create a source clock for the system and set the clock period
+system.clk_domain = SrcClockDomain(
+    clock=args.sys_clock, voltage_domain=system.voltage_domain
+)
+
+# Create a CPU voltage domain
+system.cpu_voltage_domain = VoltageDomain()
+
+# Create a separate clock domain for the CPUs
+system.cpu_clk_domain = SrcClockDomain(
+    clock=args.cpu_clock, voltage_domain=system.cpu_voltage_domain
+)
+
+# If elastic tracing is enabled, then configure the cpu and attach the elastic
+# trace probe
+if args.elastic_trace_en:
+    CpuConfig.config_etrace(CPUClass, system.cpu, args)
+
+# All cpus belong to a common cpu_clk_domain, therefore running at a common
+# frequency.
+for cpu in system.cpu:
+    cpu.clk_domain = system.cpu_clk_domain
+
+if ObjectList.is_kvm_cpu(CPUClass) or ObjectList.is_kvm_cpu(FutureClass):
+    if buildEnv["USE_X86_ISA"]:
+        system.kvm_vm = KvmVM()
+        system.m5ops_base = 0xFFFF0000
+        for process in multiprocesses:
+            process.useArchPT = True
+            process.kvmInSE = True
+    else:
+        fatal("KvmCPU can only be used in SE mode with x86")
+
+# Sanity check
+if args.simpoint_profile:
+    if not ObjectList.is_noncaching_cpu(CPUClass):
+        fatal("SimPoint/BPProbe should be done with an atomic cpu")
+    if np > 1:
+        fatal("SimPoint generation not supported with more than one CPUs")
+
+for i in range(np):
+    if args.smt:
+        system.cpu[i].workload = multiprocesses
+    elif len(multiprocesses) == 1:
+        system.cpu[i].workload = multiprocesses[0]
+    else:
+        system.cpu[i].workload = multiprocesses[i]
+
+    if args.simpoint_profile:
+        system.cpu[i].addSimPointProbe(args.simpoint_interval)
+
+    if args.checker:
+        system.cpu[i].addCheckerCpu()
+
+    if args.bp_type:
+        bpClass = ObjectList.bp_list.get(args.bp_type)
+        system.cpu[i].branchPred = bpClass()
+
+    if args.indirect_bp_type:
+        indirectBPClass = ObjectList.indirect_bp_list.get(
+            args.indirect_bp_type
+        )
+        system.cpu[i].branchPred.indirectBranchPred = indirectBPClass()
+
+    system.cpu[i].createThreads()
+
+if args.ruby:
+    Ruby.create_system(args, False, system)
+    assert args.num_cpus == len(system.ruby._cpu_ports)
+
+    system.ruby.clk_domain = SrcClockDomain(
+        clock=args.ruby_clock, voltage_domain=system.voltage_domain
+    )
+    for i in range(np):
+        ruby_port = system.ruby._cpu_ports[i]
+
+        # Create the interrupt controller and connect its ports to Ruby
+        # Note that the interrupt controller is always present but only
+        # in x86 does it have message ports that need to be connected
+        system.cpu[i].createInterruptController()
+
+        # Connect the cpu's cache ports to Ruby
+        ruby_port.connectCpuPorts(system.cpu[i])
+else:
+    MemClass = Simulation.setMemClass(args)
+    system.membus = SystemXBar()
+    system.system_port = system.membus.cpu_side_ports
+    CacheConfig.config_cache(args, system)
+    MemConfig.config_mem(args, system)
+    config_filesystem(system, args)
+
+system.workload = SEWorkload.init_compatible(mp0_path)
+
+if args.wait_gdb:
+    system.workload.wait_for_remote_gdb = True
+
+root = Root(full_system=False, system=system)
+Simulation.run(args, root, system, FutureClass)
diff --git a/configs/example/fs.py b/configs/example/fs.py
index 0e31cfccac..30b4f19553 100644
--- a/configs/example/fs.py
+++ b/configs/example/fs.py
@@ -1,19 +1,4 @@
-# Copyright (c) 2010-2013, 2016, 2019-2020 ARM Limited
-# Copyright (c) 2020 Barkhausen Institut
-# All rights reserved.
-#
-# The license below extends only to copyright in the software and shall
-# not be construed as granting a license to any other intellectual
-# property including but not limited to intellectual property relating
-# to a hardware implementation of the functionality of the software
-# licensed hereunder.  You may use the software subject to the license
-# terms below provided that you ensure that this notice is replicated
-# unmodified and in its entirety in all distributions of the software,
-# modified or unmodified, in source code or in binary form.
-#
-# Copyright (c) 2012-2014 Mark D. Hill and David A. Wood
-# Copyright (c) 2009-2011 Advanced Micro Devices, Inc.
-# Copyright (c) 2006-2007 The Regents of The University of Michigan
+# Copyright (c) 2023 The Regents of the University of California
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -39,401 +24,10 @@
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-import argparse
-import sys
+from m5.util import fatal
 
-import m5
-from m5.defines import buildEnv
-from m5.objects import *
-from m5.util import addToPath, fatal, warn
-from m5.util.fdthelper import *
-from gem5.isas import ISA
-from gem5.runtime import get_runtime_isa
-
-addToPath("../")
-
-from ruby import Ruby
-
-from common.FSConfig import *
-from common.SysPaths import *
-from common.Benchmarks import *
-from common import Simulation
-from common import CacheConfig
-from common import CpuConfig
-from common import MemConfig
-from common import ObjectList
-from common.Caches import *
-from common import Options
-
-
-def cmd_line_template():
-    if args.command_line and args.command_line_file:
-        print(
-            "Error: --command-line and --command-line-file are "
-            "mutually exclusive"
-        )
-        sys.exit(1)
-    if args.command_line:
-        return args.command_line
-    if args.command_line_file:
-        return open(args.command_line_file).read().strip()
-    return None
-
-
-def build_test_system(np):
-    cmdline = cmd_line_template()
-    isa = get_runtime_isa()
-    if isa == ISA.MIPS:
-        test_sys = makeLinuxMipsSystem(test_mem_mode, bm[0], cmdline=cmdline)
-    elif isa == ISA.SPARC:
-        test_sys = makeSparcSystem(test_mem_mode, bm[0], cmdline=cmdline)
-    elif isa == ISA.RISCV:
-        test_sys = makeBareMetalRiscvSystem(
-            test_mem_mode, bm[0], cmdline=cmdline
-        )
-    elif isa == ISA.X86:
-        test_sys = makeLinuxX86System(
-            test_mem_mode, np, bm[0], args.ruby, cmdline=cmdline
-        )
-    elif isa == ISA.ARM:
-        test_sys = makeArmSystem(
-            test_mem_mode,
-            args.machine_type,
-            np,
-            bm[0],
-            args.dtb_filename,
-            bare_metal=args.bare_metal,
-            cmdline=cmdline,
-            external_memory=args.external_memory_system,
-            ruby=args.ruby,
-            vio_9p=args.vio_9p,
-            bootloader=args.bootloader,
-        )
-        if args.enable_context_switch_stats_dump:
-            test_sys.enable_context_switch_stats_dump = True
-    else:
-        fatal("Incapable of building %s full system!", isa.name)
-
-    # Set the cache line size for the entire system
-    test_sys.cache_line_size = args.cacheline_size
-
-    # Create a top-level voltage domain
-    test_sys.voltage_domain = VoltageDomain(voltage=args.sys_voltage)
-
-    # Create a source clock for the system and set the clock period
-    test_sys.clk_domain = SrcClockDomain(
-        clock=args.sys_clock, voltage_domain=test_sys.voltage_domain
-    )
-
-    # Create a CPU voltage domain
-    test_sys.cpu_voltage_domain = VoltageDomain()
-
-    # Create a source clock for the CPUs and set the clock period
-    test_sys.cpu_clk_domain = SrcClockDomain(
-        clock=args.cpu_clock, voltage_domain=test_sys.cpu_voltage_domain
-    )
-
-    if buildEnv["USE_RISCV_ISA"]:
-        test_sys.workload.bootloader = args.kernel
-    elif args.kernel is not None:
-        test_sys.workload.object_file = binary(args.kernel)
-
-    if args.script is not None:
-        test_sys.readfile = args.script
-
-    test_sys.init_param = args.init_param
-
-    # For now, assign all the CPUs to the same clock domain
-    test_sys.cpu = [
-        TestCPUClass(clk_domain=test_sys.cpu_clk_domain, cpu_id=i)
-        for i in range(np)
-    ]
-
-    if args.ruby:
-        bootmem = getattr(test_sys, "_bootmem", None)
-        Ruby.create_system(
-            args, True, test_sys, test_sys.iobus, test_sys._dma_ports, bootmem
-        )
-
-        # Create a seperate clock domain for Ruby
-        test_sys.ruby.clk_domain = SrcClockDomain(
-            clock=args.ruby_clock, voltage_domain=test_sys.voltage_domain
-        )
-
-        # Connect the ruby io port to the PIO bus,
-        # assuming that there is just one such port.
-        test_sys.iobus.mem_side_ports = test_sys.ruby._io_port.in_ports
-
-        for (i, cpu) in enumerate(test_sys.cpu):
-            #
-            # Tie the cpu ports to the correct ruby system ports
-            #
-            cpu.clk_domain = test_sys.cpu_clk_domain
-            cpu.createThreads()
-            cpu.createInterruptController()
-
-            test_sys.ruby._cpu_ports[i].connectCpuPorts(cpu)
-
-    else:
-        if args.caches or args.l2cache:
-            # By default the IOCache runs at the system clock
-            test_sys.iocache = IOCache(addr_ranges=test_sys.mem_ranges)
-            test_sys.iocache.cpu_side = test_sys.iobus.mem_side_ports
-            test_sys.iocache.mem_side = test_sys.membus.cpu_side_ports
-        elif not args.external_memory_system:
-            test_sys.iobridge = Bridge(
-                delay="50ns", ranges=test_sys.mem_ranges
-            )
-            test_sys.iobridge.cpu_side_port = test_sys.iobus.mem_side_ports
-            test_sys.iobridge.mem_side_port = test_sys.membus.cpu_side_ports
-
-        # Sanity check
-        if args.simpoint_profile:
-            if not ObjectList.is_noncaching_cpu(TestCPUClass):
-                fatal("SimPoint generation should be done with atomic cpu")
-            if np > 1:
-                fatal(
-                    "SimPoint generation not supported with more than one CPUs"
-                )
-
-        for i in range(np):
-            if args.simpoint_profile:
-                test_sys.cpu[i].addSimPointProbe(args.simpoint_interval)
-            if args.checker:
-                test_sys.cpu[i].addCheckerCpu()
-            if not ObjectList.is_kvm_cpu(TestCPUClass):
-                if args.bp_type:
-                    bpClass = ObjectList.bp_list.get(args.bp_type)
-                    test_sys.cpu[i].branchPred = bpClass()
-                if args.indirect_bp_type:
-                    IndirectBPClass = ObjectList.indirect_bp_list.get(
-                        args.indirect_bp_type
-                    )
-                    test_sys.cpu[
-                        i
-                    ].branchPred.indirectBranchPred = IndirectBPClass()
-            test_sys.cpu[i].createThreads()
-
-        # If elastic tracing is enabled when not restoring from checkpoint and
-        # when not fast forwarding using the atomic cpu, then check that the
-        # TestCPUClass is DerivO3CPU or inherits from DerivO3CPU. If the check
-        # passes then attach the elastic trace probe.
-        # If restoring from checkpoint or fast forwarding, the code that does this for
-        # FutureCPUClass is in the Simulation module. If the check passes then the
-        # elastic trace probe is attached to the switch CPUs.
-        if (
-            args.elastic_trace_en
-            and args.checkpoint_restore == None
-            and not args.fast_forward
-        ):
-            CpuConfig.config_etrace(TestCPUClass, test_sys.cpu, args)
-
-        CacheConfig.config_cache(args, test_sys)
-
-        MemConfig.config_mem(args, test_sys)
-
-    if ObjectList.is_kvm_cpu(TestCPUClass) or ObjectList.is_kvm_cpu(
-        FutureClass
-    ):
-        # Assign KVM CPUs to their own event queues / threads. This
-        # has to be done after creating caches and other child objects
-        # since these mustn't inherit the CPU event queue.
-        for i, cpu in enumerate(test_sys.cpu):
-            # Child objects usually inherit the parent's event
-            # queue. Override that and use the same event queue for
-            # all devices.
-            for obj in cpu.descendants():
-                obj.eventq_index = 0
-            cpu.eventq_index = i + 1
-        test_sys.kvm_vm = KvmVM()
-
-    return test_sys
-
-
-def build_drive_system(np):
-    # driver system CPU is always simple, so is the memory
-    # Note this is an assignment of a class, not an instance.
-    DriveCPUClass = AtomicSimpleCPU
-    drive_mem_mode = "atomic"
-    DriveMemClass = SimpleMemory
-
-    cmdline = cmd_line_template()
-    if buildEnv["USE_MIPS_ISA"]:
-        drive_sys = makeLinuxMipsSystem(drive_mem_mode, bm[1], cmdline=cmdline)
-    elif buildEnv["USE_SPARC_ISA"]:
-        drive_sys = makeSparcSystem(drive_mem_mode, bm[1], cmdline=cmdline)
-    elif buildEnv["USE_X86_ISA"]:
-        drive_sys = makeLinuxX86System(
-            drive_mem_mode, np, bm[1], cmdline=cmdline
-        )
-    elif buildEnv["USE_ARM_ISA"]:
-        drive_sys = makeArmSystem(
-            drive_mem_mode,
-            args.machine_type,
-            np,
-            bm[1],
-            args.dtb_filename,
-            cmdline=cmdline,
-        )
-
-    # Create a top-level voltage domain
-    drive_sys.voltage_domain = VoltageDomain(voltage=args.sys_voltage)
-
-    # Create a source clock for the system and set the clock period
-    drive_sys.clk_domain = SrcClockDomain(
-        clock=args.sys_clock, voltage_domain=drive_sys.voltage_domain
-    )
-
-    # Create a CPU voltage domain
-    drive_sys.cpu_voltage_domain = VoltageDomain()
-
-    # Create a source clock for the CPUs and set the clock period
-    drive_sys.cpu_clk_domain = SrcClockDomain(
-        clock=args.cpu_clock, voltage_domain=drive_sys.cpu_voltage_domain
-    )
-
-    drive_sys.cpu = DriveCPUClass(
-        clk_domain=drive_sys.cpu_clk_domain, cpu_id=0
-    )
-    drive_sys.cpu.createThreads()
-    drive_sys.cpu.createInterruptController()
-    drive_sys.cpu.connectBus(drive_sys.membus)
-    if args.kernel is not None:
-        drive_sys.workload.object_file = binary(args.kernel)
-
-    if ObjectList.is_kvm_cpu(DriveCPUClass):
-        drive_sys.kvm_vm = KvmVM()
-
-    drive_sys.iobridge = Bridge(delay="50ns", ranges=drive_sys.mem_ranges)
-    drive_sys.iobridge.cpu_side_port = drive_sys.iobus.mem_side_ports
-    drive_sys.iobridge.mem_side_port = drive_sys.membus.cpu_side_ports
-
-    # Create the appropriate memory controllers and connect them to the
-    # memory bus
-    drive_sys.mem_ctrls = [
-        DriveMemClass(range=r) for r in drive_sys.mem_ranges
-    ]
-    for i in range(len(drive_sys.mem_ctrls)):
-        drive_sys.mem_ctrls[i].port = drive_sys.membus.mem_side_ports
-
-    drive_sys.init_param = args.init_param
-
-    return drive_sys
-
-
-# Add args
-parser = argparse.ArgumentParser()
-Options.addCommonOptions(parser)
-Options.addFSOptions(parser)
-
-# Add the ruby specific and protocol specific args
-if "--ruby" in sys.argv:
-    Ruby.define_options(parser)
-
-args = parser.parse_args()
-
-# system under test can be any CPU
-(TestCPUClass, test_mem_mode, FutureClass) = Simulation.setCPUClass(args)
-
-# Match the memories with the CPUs, based on the options for the test system
-TestMemClass = Simulation.setMemClass(args)
-
-if args.benchmark:
-    try:
-        bm = Benchmarks[args.benchmark]
-    except KeyError:
-        print("Error benchmark %s has not been defined." % args.benchmark)
-        print("Valid benchmarks are: %s" % DefinedBenchmarks)
-        sys.exit(1)
-else:
-    if args.dual:
-        bm = [
-            SysConfig(
-                disks=args.disk_image,
-                rootdev=args.root_device,
-                mem=args.mem_size,
-                os_type=args.os_type,
-            ),
-            SysConfig(
-                disks=args.disk_image,
-                rootdev=args.root_device,
-                mem=args.mem_size,
-                os_type=args.os_type,
-            ),
-        ]
-    else:
-        bm = [
-            SysConfig(
-                disks=args.disk_image,
-                rootdev=args.root_device,
-                mem=args.mem_size,
-                os_type=args.os_type,
-            )
-        ]
-
-np = args.num_cpus
-
-test_sys = build_test_system(np)
-
-if len(bm) == 2:
-    drive_sys = build_drive_system(np)
-    root = makeDualRoot(True, test_sys, drive_sys, args.etherdump)
-elif len(bm) == 1 and args.dist:
-    # This system is part of a dist-gem5 simulation
-    root = makeDistRoot(
-        test_sys,
-        args.dist_rank,
-        args.dist_size,
-        args.dist_server_name,
-        args.dist_server_port,
-        args.dist_sync_repeat,
-        args.dist_sync_start,
-        args.ethernet_linkspeed,
-        args.ethernet_linkdelay,
-        args.etherdump,
-    )
-elif len(bm) == 1:
-    root = Root(full_system=True, system=test_sys)
-else:
-    print("Error I don't know how to create more than 2 systems.")
-    sys.exit(1)
-
-if ObjectList.is_kvm_cpu(TestCPUClass) or ObjectList.is_kvm_cpu(FutureClass):
-    # Required for running kvm on multiple host cores.
-    # Uses gem5's parallel event queue feature
-    # Note: The simulator is quite picky about this number!
-    root.sim_quantum = int(1e9)  # 1 ms
-
-if args.timesync:
-    root.time_sync_enable = True
-
-if args.frame_capture:
-    VncServer.frame_capture = True
-
-if buildEnv["USE_ARM_ISA"] and not args.bare_metal and not args.dtb_filename:
-    if args.machine_type not in [
-        "VExpress_GEM5",
-        "VExpress_GEM5_V1",
-        "VExpress_GEM5_V2",
-        "VExpress_GEM5_Foundation",
-    ]:
-        warn(
-            "Can only correctly generate a dtb for VExpress_GEM5_* "
-            "platforms, unless custom hardware models have been equipped "
-            "with generation functionality."
-        )
-
-    # Generate a Device Tree
-    for sysname in ("system", "testsys", "drivesys"):
-        if hasattr(root, sysname):
-            sys = getattr(root, sysname)
-            sys.workload.dtb_filename = os.path.join(
-                m5.options.outdir, "%s.dtb" % sysname
-            )
-            sys.generateDtb(sys.workload.dtb_filename)
-
-if args.wait_gdb:
-    test_sys.workload.wait_for_remote_gdb = True
-
-Simulation.setWorkCountOptions(test_sys, args)
-Simulation.run(args, root, test_sys, FutureClass)
+fatal(
+    "The 'configs/example/fs.py' script has been deprecated. It can be "
+    "found in 'configs/deprecated/example' if required. Its usage should be "
+    "avoided as it will be removed in future releases of gem5."
+)
diff --git a/configs/example/se.py b/configs/example/se.py
index 2372cf0efe..c185f09e5a 100644
--- a/configs/example/se.py
+++ b/configs/example/se.py
@@ -1,16 +1,4 @@
-# Copyright (c) 2012-2013 ARM Limited
-# All rights reserved.
-#
-# The license below extends only to copyright in the software and shall
-# not be construed as granting a license to any other intellectual
-# property including but not limited to intellectual property relating
-# to a hardware implementation of the functionality of the software
-# licensed hereunder.  You may use the software subject to the license
-# terms below provided that you ensure that this notice is replicated
-# unmodified and in its entirety in all distributions of the software,
-# modified or unmodified, in source code or in binary form.
-#
-# Copyright (c) 2006-2008 The Regents of The University of Michigan
+# Copyright (c) 2023 The Regents of the University of California
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -36,253 +24,10 @@
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-# Simple test script
-#
-# "m5 test.py"
+from m5.util import fatal
 
-import argparse
-import sys
-import os
-
-import m5
-from m5.defines import buildEnv
-from m5.objects import *
-from m5.params import NULL
-from m5.util import addToPath, fatal, warn
-from gem5.isas import ISA
-from gem5.runtime import get_runtime_isa
-
-addToPath("../")
-
-from ruby import Ruby
-
-from common import Options
-from common import Simulation
-from common import CacheConfig
-from common import CpuConfig
-from common import ObjectList
-from common import MemConfig
-from common.FileSystemConfig import config_filesystem
-from common.Caches import *
-from common.cpu2000 import *
-
-
-def get_processes(args):
-    """Interprets provided args and returns a list of processes"""
-
-    multiprocesses = []
-    inputs = []
-    outputs = []
-    errouts = []
-    pargs = []
-
-    workloads = args.cmd.split(";")
-    if args.input != "":
-        inputs = args.input.split(";")
-    if args.output != "":
-        outputs = args.output.split(";")
-    if args.errout != "":
-        errouts = args.errout.split(";")
-    if args.options != "":
-        pargs = args.options.split(";")
-
-    idx = 0
-    for wrkld in workloads:
-        process = Process(pid=100 + idx)
-        process.executable = wrkld
-        process.cwd = os.getcwd()
-        process.gid = os.getgid()
-
-        if args.env:
-            with open(args.env, "r") as f:
-                process.env = [line.rstrip() for line in f]
-
-        if len(pargs) > idx:
-            process.cmd = [wrkld] + pargs[idx].split()
-        else:
-            process.cmd = [wrkld]
-
-        if len(inputs) > idx:
-            process.input = inputs[idx]
-        if len(outputs) > idx:
-            process.output = outputs[idx]
-        if len(errouts) > idx:
-            process.errout = errouts[idx]
-
-        multiprocesses.append(process)
-        idx += 1
-
-    if args.smt:
-        assert args.cpu_type == "DerivO3CPU"
-        return multiprocesses, idx
-    else:
-        return multiprocesses, 1
-
-
-parser = argparse.ArgumentParser()
-Options.addCommonOptions(parser)
-Options.addSEOptions(parser)
-
-if "--ruby" in sys.argv:
-    Ruby.define_options(parser)
-
-args = parser.parse_args()
-
-multiprocesses = []
-numThreads = 1
-
-if args.bench:
-    apps = args.bench.split("-")
-    if len(apps) != args.num_cpus:
-        print("number of benchmarks not equal to set num_cpus!")
-        sys.exit(1)
-
-    for app in apps:
-        try:
-            if get_runtime_isa() == ISA.ARM:
-                exec(
-                    "workload = %s('arm_%s', 'linux', '%s')"
-                    % (app, args.arm_iset, args.spec_input)
-                )
-            else:
-                # TARGET_ISA has been removed, but this is missing a ], so it
-                # has incorrect syntax and wasn't being used anyway.
-                exec(
-                    "workload = %s(buildEnv['TARGET_ISA', 'linux', '%s')"
-                    % (app, args.spec_input)
-                )
-            multiprocesses.append(workload.makeProcess())
-        except:
-            print(
-                "Unable to find workload for %s: %s"
-                % (get_runtime_isa().name(), app),
-                file=sys.stderr,
-            )
-            sys.exit(1)
-elif args.cmd:
-    multiprocesses, numThreads = get_processes(args)
-else:
-    print("No workload specified. Exiting!\n", file=sys.stderr)
-    sys.exit(1)
-
-
-(CPUClass, test_mem_mode, FutureClass) = Simulation.setCPUClass(args)
-CPUClass.numThreads = numThreads
-
-# Check -- do not allow SMT with multiple CPUs
-if args.smt and args.num_cpus > 1:
-    fatal("You cannot use SMT with multiple CPUs!")
-
-np = args.num_cpus
-mp0_path = multiprocesses[0].executable
-system = System(
-    cpu=[CPUClass(cpu_id=i) for i in range(np)],
-    mem_mode=test_mem_mode,
-    mem_ranges=[AddrRange(args.mem_size)],
-    cache_line_size=args.cacheline_size,
+fatal(
+    "The 'configs/example/se.py' script has been deprecated. It can be "
+    "found in 'configs/deprecated/example' if required. Its usage should be "
+    "avoided as it will be removed in future releases of gem5."
 )
-
-if numThreads > 1:
-    system.multi_thread = True
-
-# Create a top-level voltage domain
-system.voltage_domain = VoltageDomain(voltage=args.sys_voltage)
-
-# Create a source clock for the system and set the clock period
-system.clk_domain = SrcClockDomain(
-    clock=args.sys_clock, voltage_domain=system.voltage_domain
-)
-
-# Create a CPU voltage domain
-system.cpu_voltage_domain = VoltageDomain()
-
-# Create a separate clock domain for the CPUs
-system.cpu_clk_domain = SrcClockDomain(
-    clock=args.cpu_clock, voltage_domain=system.cpu_voltage_domain
-)
-
-# If elastic tracing is enabled, then configure the cpu and attach the elastic
-# trace probe
-if args.elastic_trace_en:
-    CpuConfig.config_etrace(CPUClass, system.cpu, args)
-
-# All cpus belong to a common cpu_clk_domain, therefore running at a common
-# frequency.
-for cpu in system.cpu:
-    cpu.clk_domain = system.cpu_clk_domain
-
-if ObjectList.is_kvm_cpu(CPUClass) or ObjectList.is_kvm_cpu(FutureClass):
-    if buildEnv["USE_X86_ISA"]:
-        system.kvm_vm = KvmVM()
-        system.m5ops_base = 0xFFFF0000
-        for process in multiprocesses:
-            process.useArchPT = True
-            process.kvmInSE = True
-    else:
-        fatal("KvmCPU can only be used in SE mode with x86")
-
-# Sanity check
-if args.simpoint_profile:
-    if not ObjectList.is_noncaching_cpu(CPUClass):
-        fatal("SimPoint/BPProbe should be done with an atomic cpu")
-    if np > 1:
-        fatal("SimPoint generation not supported with more than one CPUs")
-
-for i in range(np):
-    if args.smt:
-        system.cpu[i].workload = multiprocesses
-    elif len(multiprocesses) == 1:
-        system.cpu[i].workload = multiprocesses[0]
-    else:
-        system.cpu[i].workload = multiprocesses[i]
-
-    if args.simpoint_profile:
-        system.cpu[i].addSimPointProbe(args.simpoint_interval)
-
-    if args.checker:
-        system.cpu[i].addCheckerCpu()
-
-    if args.bp_type:
-        bpClass = ObjectList.bp_list.get(args.bp_type)
-        system.cpu[i].branchPred = bpClass()
-
-    if args.indirect_bp_type:
-        indirectBPClass = ObjectList.indirect_bp_list.get(
-            args.indirect_bp_type
-        )
-        system.cpu[i].branchPred.indirectBranchPred = indirectBPClass()
-
-    system.cpu[i].createThreads()
-
-if args.ruby:
-    Ruby.create_system(args, False, system)
-    assert args.num_cpus == len(system.ruby._cpu_ports)
-
-    system.ruby.clk_domain = SrcClockDomain(
-        clock=args.ruby_clock, voltage_domain=system.voltage_domain
-    )
-    for i in range(np):
-        ruby_port = system.ruby._cpu_ports[i]
-
-        # Create the interrupt controller and connect its ports to Ruby
-        # Note that the interrupt controller is always present but only
-        # in x86 does it have message ports that need to be connected
-        system.cpu[i].createInterruptController()
-
-        # Connect the cpu's cache ports to Ruby
-        ruby_port.connectCpuPorts(system.cpu[i])
-else:
-    MemClass = Simulation.setMemClass(args)
-    system.membus = SystemXBar()
-    system.system_port = system.membus.cpu_side_ports
-    CacheConfig.config_cache(args, system)
-    MemConfig.config_mem(args, system)
-    config_filesystem(system, args)
-
-system.workload = SEWorkload.init_compatible(mp0_path)
-
-if args.wait_gdb:
-    system.workload.wait_for_remote_gdb = True
-
-root = Root(full_system=False, system=system)
-Simulation.run(args, root, system, FutureClass)

From 4ee724e054f9cf19685bd81f6e74e019040013cc Mon Sep 17 00:00:00 2001
From: "Bobby R. Bruce" <bbruce@ucdavis.edu>
Date: Fri, 6 Jan 2023 16:58:16 +0000
Subject: [PATCH 203/492] stdlib: Specialize the gem5-resources

This commit specializes the Resource class into specific sub-types.

The `Resource`, `CustomResource` and `CustomDiskImageResource` classes
have been deprecated in favor of the `AbstractResource` subclasses.
Custom Resources can be created via the resource specialization
constructor. Resources can be obtained via the gem5-resource
infrastructure with the `obtain_resource` function.

Fully implemented:

- DiskImageResource
- BinaryResource
- KernelResource
- BootloaderResource
- FileResource
- DirectoryResource

Partially implemented:

- SimpointResource
- CheckpointResource

While the schema of the resource.json file has changed, efforts have
been made to ensure backwards compatibility is maintained during this
transition.

Tests are included in this commit to verify this feature works as
expected.

**Note:** The Simpoint tests are disabled in this commit, to be
reenabled when Simpoint resource specialization is fully incorporated
here:
https://gem5-review.googlesource.com/c/public/gem5/+/67339

Change-Id: I77277ecaffc7abc86db08526aacc0b606ef04fe8
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67175
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
---
 .../components/boards/kernel_disk_workload.py |  36 +-
 .../components/boards/se_binary_workload.py   |  22 +-
 src/python/gem5/resources/downloader.py       |  12 +-
 src/python/gem5/resources/resource.py         | 565 ++++++++++++++----
 src/python/gem5/resources/workload.py         |   4 +-
 .../test_gem5_library_examples.py             |  74 +--
 .../pyunit_resource_specialization.py         | 196 ++++++
 .../resources/pyunit_workload_checks.py       |  26 +-
 .../refs/resource-specialization.json         |  99 +++
 9 files changed, 837 insertions(+), 197 deletions(-)
 create mode 100644 tests/pyunit/stdlib/resources/pyunit_resource_specialization.py
 create mode 100644 tests/pyunit/stdlib/resources/refs/resource-specialization.json

diff --git a/src/python/gem5/components/boards/kernel_disk_workload.py b/src/python/gem5/components/boards/kernel_disk_workload.py
index 29d38baa7b..15e0cdf303 100644
--- a/src/python/gem5/components/boards/kernel_disk_workload.py
+++ b/src/python/gem5/components/boards/kernel_disk_workload.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2021 The Regents of the University of California
+# Copyright (c) 2021, 2023 The Regents of the University of California
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -27,7 +27,12 @@
 from abc import abstractmethod
 
 from .abstract_board import AbstractBoard
-from ...resources.resource import AbstractResource
+from ...resources.resource import (
+    DiskImageResource,
+    BootloaderResource,
+    CheckpointResource,
+    KernelResource,
+)
 
 from typing import List, Optional, Union
 import os
@@ -89,7 +94,7 @@ class KernelDiskWorkload:
         raise NotImplementedError
 
     @abstractmethod
-    def _add_disk_to_board(self, disk_image: AbstractResource) -> None:
+    def _add_disk_to_board(self, disk_image: DiskImageResource) -> None:
         """
         Sets the configuration needed to add the disk image to the board.
 
@@ -101,7 +106,7 @@ class KernelDiskWorkload:
         raise NotImplementedError
 
     def get_disk_root_partition(
-        cls, disk_image: AbstractResource
+        cls, disk_image: DiskImageResource
     ) -> Optional[str]:
         """
         Obtains the root partition of a disk image by inspecting the resource's
@@ -109,14 +114,11 @@ class KernelDiskWorkload:
 
         :returns: The disk image's root partition.
         """
-        try:
-            return disk_image.get_metadata()["additional_metadata"][
-                "root_partition"
-            ]
-        except KeyError:
-            return None
+        return disk_image.get_root_partition()
 
-    def get_default_kernel_root_val(self, disk_image: AbstractResource) -> str:
+    def get_default_kernel_root_val(
+        self, disk_image: DiskImageResource
+    ) -> str:
         """
         Get the default kernel root value to be passed to the kernel. This is
         determined by the value implemented in the `get_disk_device()`
@@ -134,14 +136,14 @@ class KernelDiskWorkload:
 
     def set_kernel_disk_workload(
         self,
-        kernel: AbstractResource,
-        disk_image: AbstractResource,
-        bootloader: Optional[AbstractResource] = None,
+        kernel: KernelResource,
+        disk_image: DiskImageResource,
+        bootloader: Optional[BootloaderResource] = None,
         readfile: Optional[str] = None,
         readfile_contents: Optional[str] = None,
         kernel_args: Optional[List[str]] = None,
         exit_on_work_items: bool = True,
-        checkpoint: Optional[Union[Path, AbstractResource]] = None,
+        checkpoint: Optional[Union[Path, CheckpointResource]] = None,
     ) -> None:
         """
         This function allows the setting of a full-system run with a Kernel
@@ -212,11 +214,11 @@ class KernelDiskWorkload:
         if checkpoint:
             if isinstance(checkpoint, Path):
                 self._checkpoint = checkpoint
-            elif isinstance(checkpoint, AbstractResource):
+            elif isinstance(checkpoint, CheckpointResource):
                 self._checkpoint = Path(checkpoint.get_local_path())
             else:
                 # The checkpoint_dir must be None, Path, Or AbstractResource.
                 raise Exception(
                     "Checkpoints must be passed as a Path or an "
-                    "AbstractResource."
+                    "CheckpointResource."
                 )
diff --git a/src/python/gem5/components/boards/se_binary_workload.py b/src/python/gem5/components/boards/se_binary_workload.py
index 8ec112ee13..acedfaf9a9 100644
--- a/src/python/gem5/components/boards/se_binary_workload.py
+++ b/src/python/gem5/components/boards/se_binary_workload.py
@@ -25,7 +25,13 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 from .abstract_board import AbstractBoard
-from ...resources.resource import AbstractResource
+from ...resources.resource import (
+    FileResource,
+    AbstractResource,
+    BinaryResource,
+    CheckpointResource,
+    SimpointResource,
+)
 from gem5.utils.simpoint import SimPoint
 
 from m5.objects import SEWorkload, Process
@@ -51,13 +57,13 @@ class SEBinaryWorkload:
 
     def set_se_binary_workload(
         self,
-        binary: AbstractResource,
+        binary: BinaryResource,
         exit_on_work_items: bool = True,
-        stdin_file: Optional[AbstractResource] = None,
+        stdin_file: Optional[FileResource] = None,
         stdout_file: Optional[Path] = None,
         stderr_file: Optional[Path] = None,
         arguments: List[str] = [],
-        checkpoint: Optional[Union[Path, AbstractResource]] = None,
+        checkpoint: Optional[Union[Path, CheckpointResource]] = None,
     ) -> None:
         """Set up the system to run a specific binary.
 
@@ -117,10 +123,10 @@ class SEBinaryWorkload:
 
     def set_se_simpoint_workload(
         self,
-        binary: AbstractResource,
+        binary: BinaryResource,
         arguments: List[str] = [],
-        simpoint: Union[AbstractResource, SimPoint] = None,
-        checkpoint: Optional[Union[Path, AbstractResource]] = None,
+        simpoint: Union[SimpointResource, SimPoint] = None,
+        checkpoint: Optional[Union[Path, CheckpointResource]] = None,
     ) -> None:
         """Set up the system to run a SimPoint workload.
 
@@ -141,7 +147,7 @@ class SEBinaryWorkload:
         """
 
         # convert input to SimPoint if necessary
-        if isinstance(simpoint, AbstractResource):
+        if isinstance(simpoint, SimpointResource):
             self._simpoint_object = SimPoint(simpoint)
         else:
             assert isinstance(simpoint, SimPoint)
diff --git a/src/python/gem5/resources/downloader.py b/src/python/gem5/resources/downloader.py
index 1fda8d86b6..0b67ecdebd 100644
--- a/src/python/gem5/resources/downloader.py
+++ b/src/python/gem5/resources/downloader.py
@@ -323,7 +323,11 @@ def list_resources() -> List[str]:
 
     :returns: A list of resources by name.
     """
-    return _get_resources(valid_types={"resource"}).keys()
+    from .resource import _get_resource_json_type_map
+
+    return _get_resources(
+        valid_types=_get_resource_json_type_map.keys()
+    ).keys()
 
 
 def get_workload_json_obj(workload_name: str) -> Dict:
@@ -356,7 +360,11 @@ def get_resources_json_obj(resource_name: str) -> Dict:
     :raises Exception: An exception is raised if the specified resources does
     not exist.
     """
-    resource_map = _get_resources(valid_types={"resource"})
+    from .resource import _get_resource_json_type_map
+
+    resource_map = _get_resources(
+        valid_types=_get_resource_json_type_map.keys()
+    )
 
     if resource_name not in resource_map:
         raise Exception(
diff --git a/src/python/gem5/resources/resource.py b/src/python/gem5/resources/resource.py
index 1f7305def7..e4873d689d 100644
--- a/src/python/gem5/resources/resource.py
+++ b/src/python/gem5/resources/resource.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2021 The Regents of the University of California
+# Copyright (c) 2021-2023 The Regents of the University of California
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -27,43 +27,401 @@
 from abc import ABCMeta
 import os
 from pathlib import Path
+from m5.util import warn
 
 from .downloader import get_resource, get_resources_json_obj
 
-from typing import Optional, Dict
+from ..isas import ISA, get_isa_from_str
+
+from typing import Optional, Dict, Union, Type
 
 """
-A Resource object encapsulates a gem5 resource. Resources are items needed to
-run a simulation, such as a disk image, kernel, or binary. The gem5 project
-provides pre-built resources, with sources, at <resources.gem5.org>.
+Resources are items needed to run a simulation, such as a disk image, kernel,
+or binary. The gem5 project provides pre-built resources, with sources, at
+<resources.gem5.org>. Here we provide the `AbstractResource` class and its
+various implementations which are designed to encapsulate a resource for use
+in the gem5 Standard Library.
 
-The purpose of this encapsulation is two fold:
+These classes may be contructed directly. E.g.:
 
-1. It allows automatic retrieval of gem5 resources. E.g., specifying a resource
-   which is not local will initiate a download.
-2. It provides a location where code may be added to record the resources used
-   within a simulation. At present this is a TODO work-item.
+```python
+binary = BinaryResource(local_path="/path/to/binary")
+```
+
+or obtained via the gem5-resources infrastructure with the `obtain_resource`
+function:
+
+```python
+binary = obtain_resource("resource name here")
+```
 """
 
 
 class AbstractResource:
+    """
+    An abstract class which all Resource classes inherit from.
+    """
 
     __metaclass__ = ABCMeta
 
-    def __init__(self, local_path: str, metadata: Dict = {}):
+    def __init__(
+        self,
+        local_path: str,
+        documentation: Optional[str] = None,
+        source: Optional[str] = None,
+    ):
+        """
+        :param local_path: The path on the host system where this resource is
+        located
+        :param documentation: Documentation describing this resource. Not a
+        required parameter. By default is None.
+        :param source: The source (as in "source code") for this resource. This
+        string should navigate users to where the source for this resource
+        may be found. Not a required parameter. By default is None.
+        """
+
+        if not os.path.exists(local_path):
+            raise Exception(
+                f"Local path specified for resource, '{local_path}', does not "
+                "exist."
+            )
+
         self._local_path = local_path
-        self._metadata = metadata
+        self._documentation = documentation
+        self._source = source
 
     def get_local_path(self) -> str:
+        """Returns the local path of the resource."""
         return self._local_path
 
-    def get_metadata(self) -> Dict:
+    def get_documentation(self) -> Optional[str]:
+        """Returns documentation associated with this resource."""
+        return self._documentation
+
+    def get_source(self) -> Optional[str]:
+        """Returns information as to where the source for this resource may be
+        found.
         """
-        Returns the raw data from this resource, as seen in the
-        `resources.json` file. A user may specify the metadata of a local
-        resource.
-        """
-        return self._metadata
+        return self._source
+
+
+class FileResource(AbstractResource):
+    """A resource consisting of a single file."""
+
+    def __init__(
+        self,
+        local_path: str,
+        documentation: Optional[str] = None,
+        source: Optional[str] = None,
+        **kwargs,
+    ):
+        if not os.path.isfile(local_path):
+            raise Exception(
+                f"FileResource path specified, '{local_path}', is not a file."
+            )
+
+        super().__init__(
+            local_path=local_path,
+            documentation=documentation,
+            source=source,
+        )
+
+
+class DirectoryResource(AbstractResource):
+    """A resource consisting of a directory."""
+
+    def __init__(
+        self,
+        local_path: str,
+        documentation: Optional[str] = None,
+        source: Optional[str] = None,
+        **kwargs,
+    ):
+
+        if not os.path.isdir(local_path):
+            raise Exception(
+                f"DirectoryResource path specified, {local_path}, is not a "
+                "directory."
+            )
+
+        super().__init__(
+            local_path=local_path,
+            documentation=documentation,
+            source=source,
+        )
+
+
+class DiskImageResource(FileResource):
+    """A Disk Image resource."""
+
+    def __init__(
+        self,
+        local_path: str,
+        documentation: Optional[str] = None,
+        source: Optional[str] = None,
+        root_partition: Optional[str] = None,
+        **kwargs,
+    ):
+        super().__init__(
+            local_path=local_path,
+            documentation=documentation,
+            source=source,
+        )
+        self._root_partition = root_partition
+
+    def get_root_partition(self) -> Optional[str]:
+        """Returns, if applicable, the Root Partition of the disk image."""
+        return self._root_partition
+
+
+class BinaryResource(FileResource):
+    """A binary resource."""
+
+    def __init__(
+        self,
+        local_path: str,
+        documentation: Optional[str] = None,
+        source: Optional[str] = None,
+        architecture: Optional[Union[ISA, str]] = None,
+        **kwargs,
+    ):
+        super().__init__(
+            local_path=local_path,
+            documentation=documentation,
+            source=source,
+        )
+
+        self._architecture = None
+        if architecture:
+            if isinstance(architecture, str):
+                self._architecture = get_isa_from_str(architecture)
+            elif isinstance(architecture, ISA):
+                self._architecture = architecture
+
+    def get_architecture(self) -> Optional[ISA]:
+        """Returns the ISA this binary is compiled to."""
+        return self._architecture
+
+
+class BootloaderResource(BinaryResource):
+    """A bootloader resource."""
+
+    def __init__(
+        self,
+        local_path: str,
+        documentation: Optional[str] = None,
+        source: Optional[str] = None,
+        architecture: Optional[Union[ISA, str]] = None,
+        **kwargs,
+    ):
+        super().__init__(
+            local_path=local_path,
+            documentation=documentation,
+            architecture=architecture,
+            source=source,
+        )
+
+
+class GitResource(DirectoryResource):
+    """A git resource."""
+
+    def __init__(
+        self,
+        local_path: str,
+        documentation: Optional[str] = None,
+        source: Optional[str] = None,
+        **kwargs,
+    ):
+        super().__init__(
+            local_path=local_path,
+            documentation=documentation,
+            source=source,
+        )
+
+
+class KernelResource(BinaryResource):
+    """A kernel resource."""
+
+    def __init__(
+        self,
+        local_path: str,
+        documentation: Optional[str] = None,
+        source: Optional[str] = None,
+        architecture: Optional[Union[ISA, str]] = None,
+        **kwargs,
+    ):
+        super().__init__(
+            local_path=local_path,
+            documentation=documentation,
+            source=source,
+            architecture=architecture,
+        )
+
+
+class CheckpointResource(DirectoryResource):
+    """A checkpoint resource. The following directory structure is expected:
+
+    <local_path>:
+       - board.physmem.store0.pmem
+       - m5.cpt
+    """
+
+    def __init__(
+        self,
+        local_path: str,
+        documentation: Optional[str] = None,
+        source: Optional[str] = None,
+        **kwargs,
+    ):
+        super().__init__(
+            local_path=local_path,
+            documentation=documentation,
+            source=source,
+        )
+
+
+class SimpointResource(DirectoryResource):
+    """A simpoint resource."""
+
+    def __init__(
+        self,
+        local_path: str,
+        documentation: Optional[str] = None,
+        source: Optional[str] = None,
+        **kwargs,
+    ):
+        super().__init__(
+            local_path=local_path,
+            documentation=documentation,
+            source=source,
+        )
+
+
+def obtain_resource(
+    resource_name: str,
+    resource_directory: Optional[str] = None,
+    download_md5_mismatch: bool = True,
+) -> AbstractResource:
+    """
+    This function primarily serves as a factory for resources. It will return
+    the correct `AbstractResource` implementation based on the resource
+    requested, by referencing the "resource.json" file (by default, that hosted
+    at https://resources.gem5.org/resources.json). In addition to this, this
+    function will download the resource if not detected in the
+    `resource_directory`.
+
+    :param resource_name: The name of the gem5 resource as it appears under the
+    "name" field in the `resource.json` file.
+    :param resource_directory: The location of the directory in which the
+    resource is to be stored. If this parameter is not set, it will set to
+    the environment variable `GEM5_RESOURCE_DIR`. If the environment is not
+    set it will default to `~/.cache/gem5` if available, otherwise the CWD.
+    :param download_md5_mismatch: If the resource is present, but does not
+    have the correct md5 value, the resoruce will be deleted and
+    re-downloaded if this value is True. Otherwise an exception will be
+    thrown. True by default.
+    """
+
+    # If the `resource_directory` parameter is not set via this function, we
+    # check the "GEM5_RESOURCE_DIR" environment variable. If this too is not
+    # set we call `_get_default_resource_dir()` to determine where the
+    # resource directory is, or should be, located.
+    if resource_directory == None:
+        resource_directory = os.getenv(
+            "GEM5_RESOURCE_DIR", _get_default_resource_dir()
+        )
+
+    # Small checks here to ensure the resource directory is valid.
+    if os.path.exists(resource_directory):
+        if not os.path.isdir(resource_directory):
+            raise Exception(
+                "gem5 resource directory, "
+                "'{}', exists but is not a directory".format(
+                    resource_directory
+                )
+            )
+    else:
+        # `exist_ok=True` here as, occasionally, if multiple instance of
+        # gem5 are started simultaneously, a race condition can exist to
+        # create the resource directory. Without `exit_ok=True`, threads
+        # which lose this race will thrown a `FileExistsError` exception.
+        # `exit_ok=True` ensures no exception is thrown.
+        os.makedirs(resource_directory, exist_ok=True)
+
+    # This is the path to which the resource is to be stored.
+    to_path = os.path.join(resource_directory, resource_name)
+
+    # Download the resource if it does not already exist.
+    get_resource(
+        resource_name=resource_name,
+        to_path=os.path.join(resource_directory, resource_name),
+        download_md5_mismatch=download_md5_mismatch,
+    )
+
+    # Obtain the JSON resource entry for this resource
+    resource_json = get_resources_json_obj(resource_name)
+
+    # Obtain the type from the JSON. From this we will determine what subclass
+    # of `AbstractResource` we are to create and return.
+    resources_type = resource_json["type"]
+
+    if resources_type == "resource":
+        # This is a stop-gap measure to ensure to work with older versions of
+        # the "resource.json" file. These should be replaced with their
+        # respective specializations ASAP and this case removed.
+        if (
+            "additional_metadata" in resource_json
+            and "root_partition" in resource_json["additional_metadata"]
+        ):
+            # In this case we should return a DiskImageResource.
+            root_partition = resource_json["additional_metadata"][
+                "root_partition"
+            ]
+            return DiskImageResource(
+                local_path=to_path, root_partition=root_partition
+            )
+        return CustomResource(local_path=to_path)
+
+    assert resources_type in _get_resource_json_type_map
+    resource_class = _get_resource_json_type_map[resources_type]
+
+    # Once we know what AbstractResource subclass we are using, we create it.
+    # The fields in the JSON object are assumed to map like-for-like to the
+    # subclass contructor, so we can pass the resource_json map directly.
+    return resource_class(local_path=to_path, **resource_json)
+
+
+def _get_default_resource_dir() -> str:
+    """
+    Obtain the default gem5 resources directory on the host system. This
+    function will iterate through sensible targets until it finds one that
+    works on the host system.
+
+    :returns: The default gem5 resources directory.
+    """
+    test_list = [
+        # First try `~/.cache/gem5`.
+        os.path.join(Path.home(), ".cache", "gem5"),
+        # Last resort, just put things in the cwd.
+        os.path.join(Path.cwd(), "resources"),
+    ]
+
+    for path in test_list:
+        if os.path.exists(path):  # If the path already exists...
+            if os.path.isdir(path):  # Check to see the path is a directory.
+                return path  # If so, the path is valid and can be used.
+        else:  # If the path does not exist, try to create it.
+            try:
+                os.makedirs(path, exist_ok=False)
+                return path
+            except OSError:
+                continue  # If the path cannot be created, then try another.
+
+    raise Exception("Cannot find a valid location to download resources")
+
+
+# The following classes exist to preserve backwards functionality between the
+# API for obtaining resources in v21.1.0 and prior.
 
 
 class CustomResource(AbstractResource):
@@ -71,134 +429,101 @@ class CustomResource(AbstractResource):
     A custom gem5 resource. This can be used to encapsulate a resource provided
     by a gem5 user as opposed to one available within the gem5 resources
     repository.
+
+    **Warning**: This class is deprecated and will be removed in future
+    releases of gem5. Please use the correct `AbstractResource` subclass
+    instead.
     """
 
     def __init__(self, local_path: str, metadata: Dict = {}):
         """
         :param local_path: The path of the resource on the host system.
-        :param metadata: Add metadata for the custom resource.
+        :param metadata: Add metadata for the custom resource. **Warning:**
+        As of v22.1.1, this parameter is not used.
         """
-        super().__init__(local_path=local_path, metadata=metadata)
+        warn(
+            "The `CustomResource` class is deprecated. Please use an "
+            "`AbstractResource` subclass instead."
+        )
+        if bool(metadata):  # Empty dicts cast to False
+            warn(
+                "the `metadata` parameter was set via the `CustomResource` "
+                "constructor. This parameter is not used."
+            )
+        super().__init__(local_path=local_path)
 
 
-class CustomDiskImageResource(CustomResource):
+class CustomDiskImageResource(DiskImageResource):
     """
     A custom disk image gem5 resource. It can be used to specify a custom,
     local disk image.
+
+    **Warning**: This class is deprecated and will be removed in future
+    releases of gem5. Please use the `DiskImageResource` class instead. This
+    class is merely a wrapper for it.
     """
 
     def __init__(
         self,
         local_path: str,
-        disk_root_partition: Optional[str] = None,
+        root_partition: Optional[str] = None,
         metadata: Dict = {},
     ):
         """
         :param local_path: The path of the disk image on the host system.
-        :param disk_root_partition: The root disk partition to use.
-        :param metadata: Metadata for the resource.
+        :param root_partition: The root disk partition to use.
+        :param metadata: Metadata for the resource. **Warning:** As of "
+        "v22.1.1, this parameter is not used.
         """
-
-        # Behind the scenes, we set the the root partition via the metadata.
-        # For a traditional, non-custom, resource it is the metadata that is
-        # used to specify the disk image partition root. Therefore, when the
-        # root disk partition specified during the construction, we apply it as
-        # metadata.
-        if disk_root_partition:
-            disk_root_partition_dict = {
-                "additional_metadata": {"root_partition": disk_root_partition}
-            }
-            metadata.update(disk_root_partition_dict)
-
-        super().__init__(local_path=local_path, metadata=metadata)
-
-
-class Resource(AbstractResource):
-    """
-    An official gem5 resources as hosted within our gem5 resources repository
-    (<resources.gem5.org>).
-
-    A user need only specify the name of the resource during construction. The
-    resource will be downloaded if needed. A list of available resources can
-    be obtained via `downloader.list_resources()`.
-    """
-
-    def __init__(
-        self,
-        resource_name: str,
-        resource_directory: Optional[str] = None,
-        download_md5_mismatch: bool = True,
-    ):
-        """
-        :param resource_name: The name of the gem5 resource.
-        :param resource_directory: The location of the directory in which the
-        resource is to be stored. If this parameter is not set, it will set to
-        the environment variable `GEM5_RESOURCE_DIR`. If the environment is not
-        set it will default to `~/.cache/gem5` if available, otherwise the CWD.
-        :param download_md5_mismatch: If the resource is present, but does not
-        have the correct md5 value, the resoruce will be deleted and
-        re-downloaded if this value is True. Otherwise an exception will be
-        thrown. True by default.
-        """
-
-        if resource_directory == None:
-            resource_directory = os.getenv(
-                "GEM5_RESOURCE_DIR", self._get_default_resource_dir()
+        warn(
+            "The `CustomDiskImageResource` class is deprecated. Please use "
+            "`DiskImageResource` instead."
+        )
+        if bool(metadata):  # Empty dicts cast to False
+            warn(
+                "the `metadata` parameter was set via the "
+                "`CustomDiskImageResource` constructor. This parameter is not "
+                "used."
             )
+        super().__init__(local_path=local_path, root_partition=root_partition)
 
-        if os.path.exists(resource_directory):
-            if not os.path.isdir(resource_directory):
-                raise Exception(
-                    "gem5 resource directory, "
-                    "'{}', exists but is not a directory".format(
-                        resource_directory
-                    )
-                )
-        else:
-            # `exist_ok=True` here as, occasionally, if multiple instance of
-            # gem5 are started simultaneously, a race condition can exist to
-            # create the resource directory. Without `exit_ok=True`, threads
-            # which lose this race will thrown a `FileExistsError` exception.
-            # `exit_ok=True` ensures no exception is thrown.
-            os.makedirs(resource_directory, exist_ok=True)
 
-        to_path = os.path.join(resource_directory, resource_name)
+def Resource(
+    resource_name: str,
+    resource_directory: Optional[str] = None,
+    download_md5_mismatch: bool = True,
+) -> AbstractResource:
+    """
+    This function was created to maintain backwards compability for v21.1.0
+    and prior releases of gem5 where `Resource` was a class.
 
-        super().__init__(
-            local_path=to_path, metadata=get_resources_json_obj(resource_name)
-        )
-        get_resource(
-            resource_name=resource_name,
-            to_path=to_path,
-            download_md5_mismatch=download_md5_mismatch,
-        )
+    In the interests of gem5-resource specialization, the `Resource` class
+    has been dropped. Instead users are advized to use the `obtain_resource`
+    function which will return the correct `AbstractResource` implementation.
+    This function (disguised as a class) wraps this function.
+    """
 
-    def _get_default_resource_dir(cls) -> str:
-        """
-        Obtain the default gem5 resources directory on the host system. This
-        function will iterate through sensible targets until it finds one that
-        works on the host system.
+    warn(
+        "`Resource` has been deprecated. Please use the `obtain_resource` "
+        "function instead."
+    )
 
-        :returns: The default gem5 resources directory.
-        """
-        test_list = [
-            # First try `~/.cache/gem5`.
-            os.path.join(Path.home(), ".cache", "gem5"),
-            # Last resort, just put things in the cwd.
-            os.path.join(Path.cwd(), "resources"),
-        ]
+    return obtain_resource(
+        resource_name=resource_name,
+        resource_directory=resource_directory,
+        download_md5_mismatch=download_md5_mismatch,
+    )
 
-        for path in test_list:
-            if os.path.exists(path):  # If the path already exists...
-                if os.path.isdir(
-                    path
-                ):  # Check to see the path is a directory.
-                    return path  # If so, the path is valid and can be used.
-            else:  # If the path does not exist, try to create it.
-                try:
-                    os.makedirs(path, exist_ok=False)
-                    return path
-                except OSError:
-                    continue  # If the path cannot be created, then try another.
 
-        raise Exception("Cannot find a valid location to download resources")
+_get_resource_json_type_map = {
+    "disk-image": DiskImageResource,
+    "binary": BinaryResource,
+    "kernel": KernelResource,
+    "checkpoint": CheckpointResource,
+    "git": GitResource,
+    "bootloader": BootloaderResource,
+    "file": FileResource,
+    "directory": DirectoryResource,
+    "simpoint": SimpointResource,
+    "resource": Resource,
+}
diff --git a/src/python/gem5/resources/workload.py b/src/python/gem5/resources/workload.py
index 2ae89655e8..e0a19d0792 100644
--- a/src/python/gem5/resources/workload.py
+++ b/src/python/gem5/resources/workload.py
@@ -25,7 +25,7 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 from .downloader import get_workload_json_obj
-from .resource import Resource
+from .resource import obtain_resource
 
 from typing import Dict, Any, Optional
 
@@ -209,7 +209,7 @@ class Workload(AbstractWorkload):
                 assert isinstance(key, str)
                 value = workload_json["resources"][key]
                 assert isinstance(value, str)
-                params[key] = Resource(
+                params[key] = obtain_resource(
                     value, resource_directory=resource_directory
                 )
 
diff --git a/tests/gem5/gem5_library_example_tests/test_gem5_library_examples.py b/tests/gem5/gem5_library_example_tests/test_gem5_library_examples.py
index 9b5c2c67ff..514894f8d2 100644
--- a/tests/gem5/gem5_library_example_tests/test_gem5_library_examples.py
+++ b/tests/gem5/gem5_library_example_tests/test_gem5_library_examples.py
@@ -94,44 +94,44 @@ gem5_verify_config(
     length=constants.quick_tag,
 )
 
-gem5_verify_config(
-    name="test-simpoints-se-checkpoint",
-    fixtures=(),
-    verifiers=(),
-    config=joinpath(
-        config.base_dir,
-        "configs",
-        "example",
-        "gem5_library",
-        "checkpoints",
-        "simpoints-se-checkpoint.py",
-    ),
-    config_args=[
-        "--checkpoint-path",
-        joinpath(resource_path, "se_checkpoint_folder-save"),
-    ],
-    valid_isas=(constants.all_compiled_tag,),
-    valid_hosts=constants.supported_hosts,
-    length=constants.quick_tag,
-)
+# gem5_verify_config(
+#    name="test-simpoints-se-checkpoint",
+#    fixtures=(),
+#    verifiers=(),
+#    config=joinpath(
+#        config.base_dir,
+#        "configs",
+#        "example",
+#        "gem5_library",
+#        "checkpoints",
+#        "simpoints-se-checkpoint.py",
+#    ),
+#    config_args=[
+#        "--checkpoint-path",
+#        joinpath(resource_path, "se_checkpoint_folder-save"),
+#    ],
+#    valid_isas=(constants.all_compiled_tag,),
+#    valid_hosts=constants.supported_hosts,
+#    length=constants.quick_tag,
+# )
 
-gem5_verify_config(
-    name="test-simpoints-se-restore",
-    fixtures=(),
-    verifiers=(),
-    config=joinpath(
-        config.base_dir,
-        "configs",
-        "example",
-        "gem5_library",
-        "checkpoints",
-        "simpoints-se-restore.py",
-    ),
-    config_args=[],
-    valid_isas=(constants.all_compiled_tag,),
-    valid_hosts=constants.supported_hosts,
-    length=constants.quick_tag,
-)
+# gem5_verify_config(
+#    name="test-simpoints-se-restore",
+#    fixtures=(),
+#    verifiers=(),
+#    config=joinpath(
+#        config.base_dir,
+#        "configs",
+#        "example",
+#        "gem5_library",
+#        "checkpoints",
+#        "simpoints-se-restore.py",
+#    ),
+#    config_args=[],
+#    valid_isas=(constants.all_compiled_tag,),
+#    valid_hosts=constants.supported_hosts,
+#    length=constants.quick_tag,
+# )
 
 if os.access("/dev/kvm", mode=os.R_OK | os.W_OK):
     # The x86-ubuntu-run uses KVM cores, this test will therefore only be run
diff --git a/tests/pyunit/stdlib/resources/pyunit_resource_specialization.py b/tests/pyunit/stdlib/resources/pyunit_resource_specialization.py
new file mode 100644
index 0000000000..e0a8dddd07
--- /dev/null
+++ b/tests/pyunit/stdlib/resources/pyunit_resource_specialization.py
@@ -0,0 +1,196 @@
+# Copyright (c) 2023 The Regents of the University of California
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import os
+import unittest
+
+from gem5.resources.resource import *
+from gem5.isas import ISA
+
+
+class ResourceSpecializationSuite(unittest.TestCase):
+    """This suite tests that `gem5.resource.resource` casts to the correct
+    `AbstractResource` specialization when using the `obtain_resource`
+    function.
+    """
+
+    @classmethod
+    def setUpClass(cls):
+        """Prior to running the suite we set the resource directory to
+        "ref/resource-specialization.json"
+        """
+        os.environ["GEM5_RESOURCE_JSON"] = os.path.join(
+            os.path.realpath(os.path.dirname(__file__)),
+            "refs",
+            "resource-specialization.json",
+        )
+
+    @classmethod
+    def tearDownClass(cls) -> None:
+        """After running the suite we unset the gem5-resource JSON file, as to
+        not interfere with others tests.
+        """
+        del os.environ["GEM5_RESOURCE_JSON"]
+
+    def get_resource_dir(cls) -> str:
+        """To ensure the resources are cached to the same directory as all
+        other tests, this function returns the location of the testing
+        directories "resources" directory.
+        """
+        return os.path.join(
+            os.path.realpath(os.path.dirname(__file__)),
+            os.pardir,
+            os.pardir,
+            os.pardir,
+            "gem5",
+            "resources",
+        )
+
+    def test_binary_resource(self) -> None:
+        """Tests the loading of of a BinaryResource"""
+        resource = obtain_resource(
+            resource_name="binary-example",
+            resource_directory=self.get_resource_dir(),
+        )
+
+        self.assertIsInstance(resource, BinaryResource)
+
+        self.assertEquals(
+            "binary-example documentation.", resource.get_documentation()
+        )
+        self.assertEquals("src/simple", resource.get_source())
+        self.assertEquals(ISA.ARM, resource.get_architecture())
+
+    def test_kernel_resource(self) -> None:
+        """Tests the loading of a KernelResource."""
+        resource = obtain_resource(
+            resource_name="kernel-example",
+            resource_directory=self.get_resource_dir(),
+        )
+
+        self.assertIsInstance(resource, KernelResource)
+
+        self.assertEquals(
+            "kernel-example documentation.", resource.get_documentation()
+        )
+        self.assertEquals("src/linux-kernel", resource.get_source())
+        self.assertEquals(ISA.RISCV, resource.get_architecture())
+
+    def test_bootloader_resource(self) -> None:
+        """Tests the loading of a BootloaderResource."""
+        resource = obtain_resource(
+            resource_name="bootloader-example",
+            resource_directory=self.get_resource_dir(),
+        )
+
+        self.assertIsInstance(resource, BootloaderResource)
+
+        self.assertEquals(
+            "bootloader documentation.", resource.get_documentation()
+        )
+        self.assertIsNone(resource.get_source())
+        self.assertIsNone(resource.get_architecture())
+
+    def test_disk_image_resource(self) -> None:
+        """Tests the loading of a DiskImageResource."""
+        resource = obtain_resource(
+            resource_name="disk-image-example",
+            resource_directory=self.get_resource_dir(),
+        )
+
+        self.assertIsInstance(resource, DiskImageResource)
+
+        self.assertEquals(
+            "disk-image documentation.", resource.get_documentation()
+        )
+        self.assertEquals("src/x86-ubuntu", resource.get_source())
+        self.assertEquals("1", resource.get_root_partition())
+
+    def test_checkpoint_resource(self) -> None:
+        """Tests the loading of a CheckpointResource."""
+        resource = obtain_resource(
+            resource_name="checkpoint-example",
+            resource_directory=self.get_resource_dir(),
+        )
+
+        self.assertIsInstance(resource, CheckpointResource)
+
+        self.assertEquals(
+            "checkpoint-example documentation.", resource.get_documentation()
+        )
+        self.assertIsNone(resource.get_source())
+
+    def test_git_resource(self) -> None:
+        """Tests the loading of a GitResource."""
+        resource = obtain_resource(
+            resource_name="git-example",
+            resource_directory=self.get_resource_dir(),
+        )
+
+        self.assertIsInstance(resource, GitResource)
+
+        self.assertIsNone(resource.get_documentation())
+        self.assertIsNone(resource.get_source())
+
+    def test_simpoint_resource(self) -> None:
+        """Tests the loading of a Simpoint resource."""
+        resource = obtain_resource(
+            resource_name="simpoint-example",
+            resource_directory=self.get_resource_dir(),
+        )
+
+        self.assertIsInstance(resource, SimpointResource)
+
+        self.assertEquals(
+            "simpoint documentation.", resource.get_documentation()
+        )
+        self.assertIsNone(resource.get_source())
+
+    def test_file_resource(self) -> None:
+        """Tests the loading of a FileResource."""
+        resource = obtain_resource(
+            resource_name="file-example",
+            resource_directory=self.get_resource_dir(),
+        )
+
+        self.assertIsInstance(resource, FileResource)
+
+        self.assertIsNone(resource.get_documentation())
+        self.assertIsNone(resource.get_source())
+
+    def test_directory_resource(self) -> None:
+        """Tests the loading of a DirectoryResource."""
+        resource = obtain_resource(
+            resource_name="directory-example",
+            resource_directory=self.get_resource_dir(),
+        )
+
+        self.assertIsInstance(resource, DirectoryResource)
+
+        self.assertEquals(
+            "directory-example documentation.", resource.get_documentation()
+        )
+        self.assertIsNone(resource.get_source())
diff --git a/tests/pyunit/stdlib/resources/pyunit_workload_checks.py b/tests/pyunit/stdlib/resources/pyunit_workload_checks.py
index 9620289446..fab0bbfbf1 100644
--- a/tests/pyunit/stdlib/resources/pyunit_workload_checks.py
+++ b/tests/pyunit/stdlib/resources/pyunit_workload_checks.py
@@ -29,7 +29,11 @@ import tempfile
 import os
 
 from gem5.resources.workload import Workload, CustomWorkload
-from gem5.resources.resource import Resource
+from gem5.resources.resource import (
+    BinaryResource,
+    DiskImageResource,
+    obtain_resource,
+)
 from gem5.resources.downloader import _resources_json_version_required
 
 from typing import Dict
@@ -50,7 +54,7 @@ class CustomWorkloadTestSuite(unittest.TestCase):
         "previous-versions" : {},
         "resources": [
         {
-            "type" : "resource",
+            "type" : "binary",
             "name" : "x86-hello64-static",
             "documentation" : "A 'Hello World!' binary.",
             "architecture" : "X86",
@@ -73,7 +77,7 @@ class CustomWorkloadTestSuite(unittest.TestCase):
         cls.custom_workload = CustomWorkload(
             function="set_se_binary_workload",
             parameters={
-                "binary": Resource("x86-hello64-static"),
+                "binary": obtain_resource("x86-hello64-static"),
                 "arguments": ["hello", 6],
             },
         )
@@ -100,7 +104,7 @@ class CustomWorkloadTestSuite(unittest.TestCase):
         self.assertEquals(2, len(parameters))
 
         self.assertTrue("binary" in parameters)
-        self.assertTrue(isinstance(parameters["binary"], Resource))
+        self.assertTrue(isinstance(parameters["binary"], BinaryResource))
 
         self.assertTrue("arguments" in parameters)
         self.assertTrue(isinstance(parameters["arguments"], list))
@@ -156,7 +160,7 @@ class WorkloadTestSuite(unittest.TestCase):
         "previous-versions" : {},
         "resources": [
         {
-            "type" : "resource",
+            "type" : "kernel",
             "name" : "x86-linux-kernel-5.2.3",
             "documentation" : "The linux kernel (v5.2.3), compiled to X86.",
             "architecture" : "X86",
@@ -166,7 +170,7 @@ class WorkloadTestSuite(unittest.TestCase):
             "source" : "src/linux-kernel"
         },
         {
-            "type" : "resource",
+            "type" : "disk-image",
             "name" : "x86-ubuntu-18.04-img",
             "documentation" : "A disk image containing Ubuntu 18.04 for x86..",
             "architecture" : "X86",
@@ -174,9 +178,7 @@ class WorkloadTestSuite(unittest.TestCase):
             "md5sum" : "90e363abf0ddf22eefa2c7c5c9391c49",
             "url" : "{url_base}/images/x86/ubuntu-18-04/x86-ubuntu.img.gz",
             "source" : "src/x86-ubuntu",
-            "additional_metadata" : {
-                "root_partition": "1"
-            }
+            "root_partition": "1"
         },
         {
             "type" : "workload",
@@ -226,10 +228,12 @@ class WorkloadTestSuite(unittest.TestCase):
         self.assertEqual(3, len(parameters))
 
         self.assertTrue("kernel" in parameters)
-        self.assertTrue(isinstance(parameters["kernel"], Resource))
+        self.assertTrue(isinstance(parameters["kernel"], BinaryResource))
 
         self.assertTrue("disk_image" in parameters)
-        self.assertTrue(isinstance(parameters["disk_image"], Resource))
+        self.assertTrue(
+            isinstance(parameters["disk_image"], DiskImageResource)
+        )
 
         self.assertTrue("readfile_contents" in parameters)
         self.assertTrue(
diff --git a/tests/pyunit/stdlib/resources/refs/resource-specialization.json b/tests/pyunit/stdlib/resources/refs/resource-specialization.json
new file mode 100644
index 0000000000..77ffc10705
--- /dev/null
+++ b/tests/pyunit/stdlib/resources/refs/resource-specialization.json
@@ -0,0 +1,99 @@
+
+{
+    "version" : "develop",
+    "url_base" : "http://dist.gem5.org/dist/v22-1",
+    "previous-versions" : {
+        "develop" : "https://gem5.googlesource.com/public/gem5-resources/+/refs/heads/develop/resources.json?format=TEXT",
+        "21.2" : "http://resources.gem5.org/prev-resources-json/resources-21-2.json"
+    },
+    "resources": [
+        {
+            "type" : "kernel",
+            "name" : "kernel-example",
+            "documentation" : "kernel-example documentation.",
+            "architecture" : "RISCV",
+            "is_zipped" : false,
+            "md5sum" : "60a53c7d47d7057436bf4b9df707a841",
+            "url" : "{url_base}/kernels/x86/static/vmlinux-5.4.49",
+            "source" : "src/linux-kernel"
+        },
+        {
+            "type" : "disk-image",
+            "name" : "disk-image-example",
+            "documentation" : "disk-image documentation.",
+            "architecture" : "X86",
+            "is_zipped" : true,
+            "md5sum" : "90e363abf0ddf22eefa2c7c5c9391c49",
+            "url" : "{url_base}/images/x86/ubuntu-18-04/x86-ubuntu.img.gz",
+            "source" : "src/x86-ubuntu",
+            "root_partition": "1"
+        },
+        {
+            "type" : "binary",
+            "name" : "binary-example",
+            "documentation" : "binary-example documentation.",
+            "architecture" : "ARM",
+            "is_zipped" :  false,
+            "md5sum" : "71b2cb004fe2cda4556f0b1a38638af6",
+            "url" : "{url_base}/test-progs/hello/bin/arm/linux/hello64-static",
+            "source" : "src/simple"
+        },
+        {
+            "type" : "bootloader",
+            "name" : "bootloader-example",
+            "documentation" : "bootloader documentation.",
+            "is_zipped" :  false,
+            "md5sum" : "71b2cb004fe2cda4556f0b1a38638af6",
+            "url" : "{url_base}/test-progs/hello/bin/arm/linux/hello64-static"
+        },
+        {
+            "type" : "checkpoint",
+            "name" : "checkpoint-example",
+            "documentation" : "checkpoint-example documentation.",
+            "architecture": "RISCV",
+            "is_zipped" : false,
+            "md5sum" : "3a57c1bb1077176c4587b8a3bf4f8ace",
+            "source" : null,
+            "is_tar_archive" : true,
+            "url": "{url_base}/checkpoints/riscv-hello-example-checkpoint.tar"
+        },
+        {
+            "type" : "git",
+            "name" : "git-example",
+            "documentation" : null,
+            "is_zipped" :  false,
+            "is_tar_archive" : true,
+            "md5sum" : "71b2cb004fe2cda4556f0b1a38638af6",
+            "url": "{url_base}/checkpoints/riscv-hello-example-checkpoint.tar"
+        },
+        {
+            "type" : "file",
+            "name" : "file-example",
+            "documentation" : null,
+            "is_zipped" :  false,
+            "md5sum" : "71b2cb004fe2cda4556f0b1a38638af6",
+            "url": "{url_base}/checkpoints/riscv-hello-example-checkpoint.tar",
+            "source" : null
+        },
+        {
+            "type" : "directory",
+            "name" : "directory-example",
+            "documentation" : "directory-example documentation.",
+            "is_zipped" : false,
+            "md5sum" : "3a57c1bb1077176c4587b8a3bf4f8ace",
+            "source" : null,
+            "is_tar_archive" : true,
+            "url": "{url_base}/checkpoints/riscv-hello-example-checkpoint.tar"
+        },
+        {
+            "type" : "simpoint",
+            "name" : "simpoint-example",
+            "documentation" : "simpoint documentation.",
+            "is_zipped" : false,
+            "md5sum" : "3a57c1bb1077176c4587b8a3bf4f8ace",
+            "source" : null,
+            "is_tar_archive" : true,
+            "url": "{url_base}/checkpoints/riscv-hello-example-checkpoint.tar"
+         }
+    ]
+}

From a9b69ee055d7b82ba5da0e4dbfaecd42be41de6b Mon Sep 17 00:00:00 2001
From: "Bobby R. Bruce" <bbruce@ucdavis.edu>
Date: Thu, 19 Jan 2023 14:30:19 +0000
Subject: [PATCH 204/492] stdlib: Add null/None versioning in resources.json

This patch allows for the "version" field in the resources.json file to
be `null` (translated to `None` in the Python JSON package) or not
declared. In this case the resources.json file will be used regardless
as to what version the gem5 binary is set. This is useful for testing
purposes.

Tests have been updated to utilize this where possible.

Change-Id: I9d8ae18cb3e61d58bc822bad30853fa3442cb33f
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67337
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
---
 src/python/gem5/resources/downloader.py       |  7 +-
 .../resources/pyunit_workload_checks.py       | 99 +++----------------
 .../refs/workload-checks-custom-workload.json | 17 ++++
 .../resources/refs/workload-checks.json       | 40 ++++++++
 4 files changed, 76 insertions(+), 87 deletions(-)
 create mode 100644 tests/pyunit/stdlib/resources/refs/workload-checks-custom-workload.json
 create mode 100644 tests/pyunit/stdlib/resources/refs/workload-checks.json

diff --git a/src/python/gem5/resources/downloader.py b/src/python/gem5/resources/downloader.py
index 0b67ecdebd..4a2ed5d332 100644
--- a/src/python/gem5/resources/downloader.py
+++ b/src/python/gem5/resources/downloader.py
@@ -154,8 +154,13 @@ def _get_resources_json() -> Dict:
 
     # If the current version pulled is not correct, look up the
     # "previous-versions" field to find the correct one.
+    # If the resource JSON file does not have a "version" field or it's
+    # null/None, then we will use this resource JSON file (this is usefull for
+    # testing purposes).
     version = _resources_json_version_required()
-    if to_return["version"] != version:
+    json_version = None if "version" not in to_return else to_return["version"]
+
+    if json_version and json_version != version:
         if version in to_return["previous-versions"].keys():
             to_return = _get_resources_json_at_path(
                 path=to_return["previous-versions"][version]
diff --git a/tests/pyunit/stdlib/resources/pyunit_workload_checks.py b/tests/pyunit/stdlib/resources/pyunit_workload_checks.py
index fab0bbfbf1..2bc31f5a3f 100644
--- a/tests/pyunit/stdlib/resources/pyunit_workload_checks.py
+++ b/tests/pyunit/stdlib/resources/pyunit_workload_checks.py
@@ -25,7 +25,6 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 import unittest
-import tempfile
 import os
 
 from gem5.resources.workload import Workload, CustomWorkload
@@ -34,7 +33,6 @@ from gem5.resources.resource import (
     DiskImageResource,
     obtain_resource,
 )
-from gem5.resources.downloader import _resources_json_version_required
 
 from typing import Dict
 
@@ -46,33 +44,12 @@ class CustomWorkloadTestSuite(unittest.TestCase):
 
     @classmethod
     def setUpClass(cls) -> None:
-        file_contents = (
-            "{"
-            + f'"version" : "{_resources_json_version_required()}",'
-            + """
-        "url_base" : "http://dist.gem5.org/dist/v22-0",
-        "previous-versions" : {},
-        "resources": [
-        {
-            "type" : "binary",
-            "name" : "x86-hello64-static",
-            "documentation" : "A 'Hello World!' binary.",
-            "architecture" : "X86",
-            "is_zipped" :  false,
-            "md5sum" : "dbf120338b37153e3334603970cebd8c",
-            "url" : "{url_base}/test-progs/hello/bin/x86/linux/hello64-static",
-            "source" : "src/simple"
-        }
-    ]
-}
-        """
-        )
-        file = tempfile.NamedTemporaryFile(mode="w", delete=False)
-        file.write(file_contents)
-        file.close()
 
-        cls.test_json = file.name
-        os.environ["GEM5_RESOURCE_JSON"] = cls.test_json
+        os.environ["GEM5_RESOURCE_JSON"] = os.path.join(
+            os.path.realpath(os.path.dirname(__file__)),
+            "refs",
+            "workload-checks-custom-workload.json",
+        )
 
         cls.custom_workload = CustomWorkload(
             function="set_se_binary_workload",
@@ -84,9 +61,8 @@ class CustomWorkloadTestSuite(unittest.TestCase):
 
     @classmethod
     def tearDownClass(cls):
-        # Remove the test json file and unset the environment variable so this
-        # test does not interfere with others.
-        os.remove(cls.test_json)
+        # Unset the environment variable so this test does not interfere with
+        # others.
         os.environ["GEM5_RESOURCE_JSON"]
 
     def test_get_function_str(self) -> None:
@@ -149,67 +125,18 @@ class WorkloadTestSuite(unittest.TestCase):
 
     @classmethod
     def setUpClass(cls):
-        # In this constructor we create a json file to load then create a test
-        # workload.
 
-        file_contents = (
-            "{"
-            + f'"version" : "{_resources_json_version_required()}",'
-            + """
-        "url_base" : "http://dist.gem5.org/dist/v22-0",
-        "previous-versions" : {},
-        "resources": [
-        {
-            "type" : "kernel",
-            "name" : "x86-linux-kernel-5.2.3",
-            "documentation" : "The linux kernel (v5.2.3), compiled to X86.",
-            "architecture" : "X86",
-            "is_zipped" : false,
-            "md5sum" : "4838c99b77d33c8307b939c16624e4ac",
-            "url" : "{url_base}/kernels/x86/static/vmlinux-5.2.3",
-            "source" : "src/linux-kernel"
-        },
-        {
-            "type" : "disk-image",
-            "name" : "x86-ubuntu-18.04-img",
-            "documentation" : "A disk image containing Ubuntu 18.04 for x86..",
-            "architecture" : "X86",
-            "is_zipped" : true,
-            "md5sum" : "90e363abf0ddf22eefa2c7c5c9391c49",
-            "url" : "{url_base}/images/x86/ubuntu-18-04/x86-ubuntu.img.gz",
-            "source" : "src/x86-ubuntu",
-            "root_partition": "1"
-        },
-        {
-            "type" : "workload",
-            "name" : "simple-boot",
-            "documentation" : "Description of workload here",
-            "function" : "set_kernel_disk_workload",
-            "resources" : {
-                "kernel" : "x86-linux-kernel-5.2.3",
-                "disk_image" : "x86-ubuntu-18.04-img"
-            },
-            "additional_params" : {
-                "readfile_contents" : "echo 'Boot successful'; m5 exit"
-            }
-        }
-    ]
-}
-        """
+        os.environ["GEM5_RESOURCE_JSON"] = os.path.join(
+            os.path.realpath(os.path.dirname(__file__)),
+            "refs",
+            "workload-checks.json",
         )
-        file = tempfile.NamedTemporaryFile(mode="w", delete=False)
-        file.write(file_contents)
-        file.close()
-
-        cls.test_json = file.name
-        os.environ["GEM5_RESOURCE_JSON"] = cls.test_json
         cls.workload = Workload("simple-boot")
 
     @classmethod
     def tearDownClass(cls):
-        # Remove the test json file and unset the environment variable so this
-        # test does not interfere with others.
-        os.remove(cls.test_json)
+        # Unset the environment variable so this test does not interfere with
+        # others.
         os.environ["GEM5_RESOURCE_JSON"]
 
     def test_get_function_str(self) -> None:
diff --git a/tests/pyunit/stdlib/resources/refs/workload-checks-custom-workload.json b/tests/pyunit/stdlib/resources/refs/workload-checks-custom-workload.json
new file mode 100644
index 0000000000..a9dd2aaa46
--- /dev/null
+++ b/tests/pyunit/stdlib/resources/refs/workload-checks-custom-workload.json
@@ -0,0 +1,17 @@
+{
+        "version" : null,
+        "url_base" : "http://dist.gem5.org/dist/v22-0",
+        "previous-versions" : {},
+        "resources": [
+        {
+            "type" : "binary",
+            "name" : "x86-hello64-static",
+            "documentation" : "A 'Hello World!' binary.",
+            "architecture" : "X86",
+            "is_zipped" :  false,
+            "md5sum" : "dbf120338b37153e3334603970cebd8c",
+            "url" : "{url_base}/test-progs/hello/bin/x86/linux/hello64-static",
+            "source" : "src/simple"
+        }
+    ]
+}
diff --git a/tests/pyunit/stdlib/resources/refs/workload-checks.json b/tests/pyunit/stdlib/resources/refs/workload-checks.json
new file mode 100644
index 0000000000..4f7e76bfb5
--- /dev/null
+++ b/tests/pyunit/stdlib/resources/refs/workload-checks.json
@@ -0,0 +1,40 @@
+{
+    "url_base" : "http://dist.gem5.org/dist/v22-0",
+    "previous-versions" : {},
+    "resources": [
+        {
+            "type" : "kernel",
+            "name" : "x86-linux-kernel-5.2.3",
+            "documentation" : "The linux kernel (v5.2.3), compiled to X86.",
+            "architecture" : "X86",
+            "is_zipped" : false,
+            "md5sum" : "4838c99b77d33c8307b939c16624e4ac",
+            "url" : "{url_base}/kernels/x86/static/vmlinux-5.2.3",
+            "source" : "src/linux-kernel"
+        },
+        {
+            "type" : "disk-image",
+            "name" : "x86-ubuntu-18.04-img",
+            "documentation" : "A disk image containing Ubuntu 18.04 for x86..",
+            "architecture" : "X86",
+            "is_zipped" : true,
+            "md5sum" : "90e363abf0ddf22eefa2c7c5c9391c49",
+            "url" : "{url_base}/images/x86/ubuntu-18-04/x86-ubuntu.img.gz",
+            "source" : "src/x86-ubuntu",
+            "root_partition": "1"
+        },
+        {
+            "type" : "workload",
+            "name" : "simple-boot",
+            "documentation" : "Description of workload here",
+            "function" : "set_kernel_disk_workload",
+            "resources" : {
+                "kernel" : "x86-linux-kernel-5.2.3",
+                "disk_image" : "x86-ubuntu-18.04-img"
+            },
+            "additional_params" : {
+                "readfile_contents" : "echo 'Boot successful'; m5 exit"
+            }
+        }
+    ]
+}

From cc838d72a6aad837397a4e3c6f8fac9943267767 Mon Sep 17 00:00:00 2001
From: "Bobby R. Bruce" <bbruce@ucdavis.edu>
Date: Fri, 20 Jan 2023 00:34:09 +0000
Subject: [PATCH 205/492] stdlib: Update resources to have downloads optional

With this patch, when a resource entry does not specify a "url" field,
there is no file downloaded. This is necessary infrastructure for
gem5-resources which do not have specific files/directories to be
downloaded but exist solely in the resources.json file.

Change-Id: I0d92e830bfcef750119078b8c226b0659ba7f6cb
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67338
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
---
 src/python/gem5/resources/resource.py | 82 ++++++++++++++-------------
 1 file changed, 43 insertions(+), 39 deletions(-)

diff --git a/src/python/gem5/resources/resource.py b/src/python/gem5/resources/resource.py
index e4873d689d..0987453c9a 100644
--- a/src/python/gem5/resources/resource.py
+++ b/src/python/gem5/resources/resource.py
@@ -66,7 +66,7 @@ class AbstractResource:
 
     def __init__(
         self,
-        local_path: str,
+        local_path: Optional[str] = None,
         documentation: Optional[str] = None,
         source: Optional[str] = None,
     ):
@@ -80,7 +80,7 @@ class AbstractResource:
         may be found. Not a required parameter. By default is None.
         """
 
-        if not os.path.exists(local_path):
+        if local_path and not os.path.exists(local_path):
             raise Exception(
                 f"Local path specified for resource, '{local_path}', does not "
                 "exist."
@@ -90,7 +90,7 @@ class AbstractResource:
         self._documentation = documentation
         self._source = source
 
-    def get_local_path(self) -> str:
+    def get_local_path(self) -> Optional[str]:
         """Returns the local path of the resource."""
         return self._local_path
 
@@ -322,45 +322,49 @@ def obtain_resource(
     thrown. True by default.
     """
 
-    # If the `resource_directory` parameter is not set via this function, we
-    # check the "GEM5_RESOURCE_DIR" environment variable. If this too is not
-    # set we call `_get_default_resource_dir()` to determine where the
-    # resource directory is, or should be, located.
-    if resource_directory == None:
-        resource_directory = os.getenv(
-            "GEM5_RESOURCE_DIR", _get_default_resource_dir()
-        )
-
-    # Small checks here to ensure the resource directory is valid.
-    if os.path.exists(resource_directory):
-        if not os.path.isdir(resource_directory):
-            raise Exception(
-                "gem5 resource directory, "
-                "'{}', exists but is not a directory".format(
-                    resource_directory
-                )
-            )
-    else:
-        # `exist_ok=True` here as, occasionally, if multiple instance of
-        # gem5 are started simultaneously, a race condition can exist to
-        # create the resource directory. Without `exit_ok=True`, threads
-        # which lose this race will thrown a `FileExistsError` exception.
-        # `exit_ok=True` ensures no exception is thrown.
-        os.makedirs(resource_directory, exist_ok=True)
-
-    # This is the path to which the resource is to be stored.
-    to_path = os.path.join(resource_directory, resource_name)
-
-    # Download the resource if it does not already exist.
-    get_resource(
-        resource_name=resource_name,
-        to_path=os.path.join(resource_directory, resource_name),
-        download_md5_mismatch=download_md5_mismatch,
-    )
-
     # Obtain the JSON resource entry for this resource
     resource_json = get_resources_json_obj(resource_name)
 
+    to_path = None
+    # If the "url" field is specified, the resoruce must be downloaded.
+    if "url" in resource_json and resource_json["url"]:
+
+        # If the `resource_directory` parameter is not set via this function, we
+        # check the "GEM5_RESOURCE_DIR" environment variable. If this too is not
+        # set we call `_get_default_resource_dir()` to determine where the
+        # resource directory is, or should be, located.
+        if resource_directory == None:
+            resource_directory = os.getenv(
+                "GEM5_RESOURCE_DIR", _get_default_resource_dir()
+            )
+
+        # Small checks here to ensure the resource directory is valid.
+        if os.path.exists(resource_directory):
+            if not os.path.isdir(resource_directory):
+                raise Exception(
+                    "gem5 resource directory, "
+                    "'{}', exists but is not a directory".format(
+                        resource_directory
+                    )
+                )
+        else:
+            # `exist_ok=True` here as, occasionally, if multiple instance of
+            # gem5 are started simultaneously, a race condition can exist to
+            # create the resource directory. Without `exit_ok=True`, threads
+            # which lose this race will thrown a `FileExistsError` exception.
+            # `exit_ok=True` ensures no exception is thrown.
+            os.makedirs(resource_directory, exist_ok=True)
+
+        # This is the path to which the resource is to be stored.
+        to_path = os.path.join(resource_directory, resource_name)
+
+        # Download the resource if it does not already exist.
+        get_resource(
+            resource_name=resource_name,
+            to_path=os.path.join(resource_directory, resource_name),
+            download_md5_mismatch=download_md5_mismatch,
+        )
+
     # Obtain the type from the JSON. From this we will determine what subclass
     # of `AbstractResource` we are to create and return.
     resources_type = resource_json["type"]

From e1601954f052ec51d40ff46f1f14d4b33f4ca556 Mon Sep 17 00:00:00 2001
From: "Bobby R. Bruce" <bbruce@ucdavis.edu>
Date: Fri, 20 Jan 2023 13:40:22 +0000
Subject: [PATCH 206/492] stdlib: Implement Simpoint Resources

This patches does the following:
- Adds 'SimpointResource' which encapsulates Simpoint data and
  functionality. It replaces the old 'gem5.util.simpoint.SimPoint'
  class. Simpoints can be loaded from gem5-resources using the
  `obtain_resource` function.
- Adds 'SimpointDirectoryResource'. This inherits form
  'SimpointResource'. While 'SimpointResource' takes raw Simpoint data
  via parameters, 'SimpointDirectoryResource' assumes the data exists
  in files, in a directory.
- Updates the
  "configs/example/gem5_library/checkpoints/simpoints-se-checkpoint.py"
  and
  "configs/example/gem5_library/checkpoints/simpoints-se-restory.py"
  example files to utilize this new Simpoint resource classes.

**Note**: While the old "SimPoint" class
("src/python/gem5/util/simpoint.py") is marked as deprecated, it may be
difficult to utilize given updates to the APIs in the gem5 stdlib Cores
and Simulator modules.

Change-Id: I9bed5c643ffc735838c9f22a58c53547941010e7
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67339
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
---
 .../checkpoints/simpoints-se-checkpoint.py    |  19 +-
 .../checkpoints/simpoints-se-restore.py       |  27 ++-
 .../components/boards/se_binary_workload.py   |  27 +--
 src/python/gem5/resources/resource.py         | 196 +++++++++++++++++-
 .../gem5/simulate/exit_event_generators.py    |   4 +-
 src/python/gem5/utils/simpoint.py             |  17 +-
 .../test_gem5_library_examples.py             |  74 +++----
 .../pyunit_resource_specialization.py         |  41 ++++
 .../refs/resource-specialization.json         |  26 ++-
 9 files changed, 354 insertions(+), 77 deletions(-)

diff --git a/configs/example/gem5_library/checkpoints/simpoints-se-checkpoint.py b/configs/example/gem5_library/checkpoints/simpoints-se-checkpoint.py
index d2d1af730f..b5eb7e9912 100644
--- a/configs/example/gem5_library/checkpoints/simpoints-se-checkpoint.py
+++ b/configs/example/gem5_library/checkpoints/simpoints-se-checkpoint.py
@@ -58,6 +58,7 @@ from gem5.components.processors.simple_processor import SimpleProcessor
 from gem5.components.processors.cpu_types import CPUTypes
 from gem5.isas import ISA
 from gem5.resources.workload import Workload
+from gem5.resources.resource import obtain_resource, SimpointResource
 from pathlib import Path
 from gem5.components.cachehierarchies.classic.no_cache import NoCache
 from gem5.simulate.exit_event_generators import (
@@ -108,7 +109,23 @@ board = SimpleBoard(
     cache_hierarchy=cache_hierarchy,
 )
 
-board.set_workload(Workload("x86-print-this-15000-with-simpoints"))
+# board.set_workload(
+#    Workload("x86-print-this-15000-with-simpoints")
+#
+# **Note: This has been removed until we update the resources.json file to
+# encapsulate the new Simpoint format.
+# Below we set the simpount manually.
+
+board.set_se_simpoint_workload(
+    binary=obtain_resource("x86-print-this"),
+    arguments=["print this", 15000],
+    simpoint=SimpointResource(
+        simpoint_interval=1000000,
+        simpoint_list=[2, 3, 4, 15],
+        weight_list=[0.1, 0.2, 0.4, 0.3],
+        warmup_interval=1000000,
+    ),
+)
 
 dir = Path(args.checkpoint_path)
 dir.mkdir(exist_ok=True)
diff --git a/configs/example/gem5_library/checkpoints/simpoints-se-restore.py b/configs/example/gem5_library/checkpoints/simpoints-se-restore.py
index f8f48d0ec1..5ff82dba04 100644
--- a/configs/example/gem5_library/checkpoints/simpoints-se-restore.py
+++ b/configs/example/gem5_library/checkpoints/simpoints-se-restore.py
@@ -63,8 +63,9 @@ from gem5.components.memory import DualChannelDDR4_2400
 from gem5.components.processors.simple_processor import SimpleProcessor
 from gem5.components.processors.cpu_types import CPUTypes
 from gem5.isas import ISA
-from gem5.resources.resource import Resource
+from gem5.resources.resource import SimpointResource, obtain_resource
 from gem5.resources.workload import Workload
+from gem5.resources.resource import SimpointResource
 
 from pathlib import Path
 from m5.stats import reset, dump
@@ -96,11 +97,29 @@ board = SimpleBoard(
     cache_hierarchy=cache_hierarchy,
 )
 
-# Here we obtain the workloadfrom gem5 resources, the checkpoint in this
+# Here we obtain the workload from gem5 resources, the checkpoint in this
 # workload was generated from
 # `configs/example/gem5_library/checkpoints/simpoints-se-checkpoint.py`.
-board.set_workload(
-    Workload("x86-print-this-15000-with-simpoints-and-checkpoint")
+# board.set_workload(
+#    Workload("x86-print-this-15000-with-simpoints-and-checkpoint")
+#
+# **Note: This has been removed until we update the resources.json file to
+# encapsulate the new Simpoint format.
+# Below we set the simpount manually.
+#
+# This loads a single checkpoint as an example of using simpoints to simulate
+# the function of a single simpoint region.
+
+board.set_se_simpoint_workload(
+    binary=obtain_resource("x86-print-this"),
+    arguments=["print this", 15000],
+    simpoint=SimpointResource(
+        simpoint_interval=1000000,
+        simpoint_list=[2, 3, 4, 15],
+        weight_list=[0.1, 0.2, 0.4, 0.3],
+        warmup_interval=1000000,
+    ),
+    checkpoint=obtain_resource("simpoints-se-checkpoints-v22-1-v2"),
 )
 
 
diff --git a/src/python/gem5/components/boards/se_binary_workload.py b/src/python/gem5/components/boards/se_binary_workload.py
index acedfaf9a9..31931106c9 100644
--- a/src/python/gem5/components/boards/se_binary_workload.py
+++ b/src/python/gem5/components/boards/se_binary_workload.py
@@ -31,8 +31,8 @@ from ...resources.resource import (
     BinaryResource,
     CheckpointResource,
     SimpointResource,
+    SimpointDirectoryResource,
 )
-from gem5.utils.simpoint import SimPoint
 
 from m5.objects import SEWorkload, Process
 
@@ -125,7 +125,7 @@ class SEBinaryWorkload:
         self,
         binary: BinaryResource,
         arguments: List[str] = [],
-        simpoint: Union[SimpointResource, SimPoint] = None,
+        simpoint: SimpointResource = None,
         checkpoint: Optional[Union[Path, CheckpointResource]] = None,
     ) -> None:
         """Set up the system to run a SimPoint workload.
@@ -135,28 +135,23 @@ class SEBinaryWorkload:
         * Dynamically linked executables are partially supported when the host
           ISA and the simulated ISA are the same.
 
-        **Warning:** SimPoints only works with one core
+        **Warning:** Simpoints only works with one core
 
         :param binary: The resource encapsulating the binary to be run.
         :param arguments: The input arguments for the binary
-        :param simpoint: The SimPoint object or Resource that contains the list of
+        :param simpoint: The SimpointResource that contains the list of
         SimPoints starting instructions, the list of weights, and the SimPoints
         interval
         :param checkpoint: The checkpoint directory. Used to restore the
         simulation to that checkpoint.
         """
 
-        # convert input to SimPoint if necessary
-        if isinstance(simpoint, SimpointResource):
-            self._simpoint_object = SimPoint(simpoint)
-        else:
-            assert isinstance(simpoint, SimPoint)
-            self._simpoint_object = simpoint
+        self._simpoint_resource = simpoint
 
         if self.get_processor().get_num_cores() > 1:
             warn("SimPoints only works with one core")
         self.get_processor().get_cores()[0]._set_simpoint(
-            inst_starts=self._simpoint_object.get_simpoint_start_insts(),
+            inst_starts=self._simpoint_resource.get_simpoint_start_insts(),
             board_initialized=False,
         )
 
@@ -167,11 +162,11 @@ class SEBinaryWorkload:
             checkpoint=checkpoint,
         )
 
-    def get_simpoint(self) -> SimPoint:
+    def get_simpoint(self) -> SimpointResource:
         """
-        Returns the SimPoint object set. If no SimPoint object has been set an
-        exception is thrown.
+        Returns the SimpointResorce object set. If no SimpointResource object
+        has been set an exception is thrown.
         """
-        if getattr(self, "_simpoint_object", None):
-            return self._simpoint_object
+        if getattr(self, "_simpoint_resource", None):
+            return self._simpoint_resource
         raise Exception("This board does not have a simpoint set.")
diff --git a/src/python/gem5/resources/resource.py b/src/python/gem5/resources/resource.py
index 0987453c9a..678497eaa7 100644
--- a/src/python/gem5/resources/resource.py
+++ b/src/python/gem5/resources/resource.py
@@ -27,13 +27,13 @@
 from abc import ABCMeta
 import os
 from pathlib import Path
-from m5.util import warn
+from m5.util import warn, fatal
 
 from .downloader import get_resource, get_resources_json_obj
 
 from ..isas import ISA, get_isa_from_str
 
-from typing import Optional, Dict, Union, Type
+from typing import Optional, Dict, Union, Type, Tuple, List
 
 """
 Resources are items needed to run a simulation, such as a disk image, kernel,
@@ -72,7 +72,7 @@ class AbstractResource:
     ):
         """
         :param local_path: The path on the host system where this resource is
-        located
+        located.
         :param documentation: Documentation describing this resource. Not a
         required parameter. By default is None.
         :param source: The source (as in "source code") for this resource. This
@@ -280,22 +280,205 @@ class CheckpointResource(DirectoryResource):
         )
 
 
-class SimpointResource(DirectoryResource):
-    """A simpoint resource."""
+class SimpointResource(AbstractResource):
+    """A simpoint resource. This resource stores all information required to
+    perform a Simpoint creation and restore. It contains the Simpoint, the
+    Simpoint interval, the weight for each Simpoint, the full warmup length,
+    and the warmup length for each Simpoint.
+    """
 
     def __init__(
         self,
-        local_path: str,
+        simpoint_interval: int = None,
+        simpoint_list: List[int] = None,
+        weight_list: List[float] = None,
+        warmup_interval: int = 0,
+        workload_name: Optional[str] = None,
         documentation: Optional[str] = None,
         source: Optional[str] = None,
+        local_path: Optional[str] = None,
         **kwargs,
     ):
+        """
+        :param simpoint_interval: The simpoint interval.
+        :param simpoint_list: The simpoint list.
+        :param weight_list: The weight list.
+        :param warmup_interval: The warmup interval. Default to zero (a value
+        of zero means effectively not set).
+        :param workload_name: Simpoints are typically associated with a
+        particular workload due to their dependency on chosen input parameters.
+        This field helps backtrack to that resource if required. This should
+        relate to a workload "name" field in the resource.json file.
+        """
+
         super().__init__(
             local_path=local_path,
             documentation=documentation,
             source=source,
         )
 
+        self._weight_list = weight_list
+        self._simpoint_list = simpoint_list
+        self._simpoint_interval = simpoint_interval
+        self._warmup_interval = warmup_interval
+        self._workload_name = workload_name
+
+        self._simpoint_start_insts = list(
+            inst * simpoint_interval for inst in self.get_simpoint_list()
+        )
+
+        if self._warmup_interval != 0:
+            self._warmup_list = self._set_warmup_list()
+        else:
+            self._warmup_list = [0] * len(self.get_simpoint_start_insts)
+
+    def get_simpoint_list(self) -> List[int]:
+        """Returns the a list containing all the Simpoints for the workload."""
+        return self._simpoint_list
+
+    def get_simpoint_start_insts(self) -> List[int]:
+        """Returns a lst containing all the Simpoint starting instrunction
+        points for the workload. This was calculated by multiplying the
+        Simpoint with the Simpoint interval when it was generated."""
+        return self._simpoint_start_insts
+
+    def get_warmup_interval(self) -> int:
+        """Returns the instruction length of the warmup interval."""
+        return self._warmup_interval
+
+    def get_weight_list(self) -> List[float]:
+        """Returns the list that contains the weight for each Simpoint. The
+        order of the weights matches that of the list returned by
+        `get_simpoint_list(). I.e. `get_weight_list()[3]` is the weight for
+        simpoint `get_simpoint_list()[3]`."""
+        return self._weight_list
+
+    def get_simpoint_interval(self) -> int:
+        """Returns the Simpoint interval value."""
+        return self._simpoint_interval
+
+    def get_warmup_list(self) -> List[int]:
+        """Returns the a list containing the warmup length for each Simpoint.
+        Each warmup length in this list corresponds to the Simpoint at the same
+        index in `get_simpoint_list()`. I.e., `get_warmup_list()[4]` is the
+        warmup length for Simpoint `get_simpoint_list()[4]`."""
+        return self._warmup_list
+
+    def get_workload_name(self) -> Optional[str]:
+        """Return the workload name this Simpoint is associated with."""
+        return self._workload_name
+
+    def _set_warmup_list(self) -> List[int]:
+        """
+        This function uses the warmup_interval, fits it into the
+        simpoint_start_insts, and outputs a list of warmup instruction lengths
+        for each SimPoint.
+
+        The warmup instruction length is calculated using the starting
+        instruction of a SimPoint to minus the warmup_interval and the ending
+        instruction of the last SimPoint. If it is less than 0, then the warmup
+        instruction length is the gap between the starting instruction of a
+        SimPoint and the ending instruction of the last SimPoint.
+        """
+        warmup_list = []
+        for index, start_inst in enumerate(self.get_simpoint_start_insts()):
+            warmup_inst = start_inst - self.get_warmup_interval()
+            if warmup_inst < 0:
+                warmup_inst = start_inst
+            else:
+                warmup_inst = self.get_warmup_interval()
+            warmup_list.append(warmup_inst)
+            # change the starting instruction of a SimPoint to include the
+            # warmup instruction length
+            self._simpoint_start_insts[index] = start_inst - warmup_inst
+        return warmup_list
+
+
+class SimpointDirectoryResource(SimpointResource):
+    """A Simpoint diretory resource. This Simpoint Resource assumes the
+    existance of a directory containing a simpoint file and a weight file."""
+
+    def __init__(
+        self,
+        local_path: str,
+        simpoint_file: str,
+        weight_file: str,
+        simpoint_interval: int,
+        warmup_interval: int,
+        workload_name: Optional[str] = None,
+        documentation: Optional[str] = None,
+        source: Optional[str] = None,
+        **kwargs,
+    ):
+        """
+        :param simpoint_file: The Simpoint file. This file is a list of
+        Simpoints, each on its own line. It should map 1-to-1 to the weights
+        file.
+        :param weight_file: The Simpoint weights file. This file is a list of
+        weights, each on its own line.
+        """
+        self._simpoint_file = simpoint_file
+        self._weight_file = weight_file
+
+        # This is a little hack. The functions `get_simpoint_file` and
+        # `get_weight_file` use the local path, so we set it here despite it
+        # also being set in the `AbstractResource` constructor. This isn't
+        # elegant but does not harm.
+        self._local_path = local_path
+        (
+            simpoint_list,
+            weight_list,
+        ) = self._get_weights_and_simpoints_from_file()
+
+        super().__init__(
+            simpoint_interval=simpoint_interval,
+            simpoint_list=simpoint_list,
+            weight_list=weight_list,
+            warmup_interval=warmup_interval,
+            workload_name=workload_name,
+            local_path=local_path,
+            documentation=documentation,
+            source=source,
+        )
+
+    def get_simpoint_file(self) -> Path:
+        """Return the Simpoint File path."""
+        return Path(Path(self._local_path) / self._simpoint_file)
+
+    def get_weight_file(self) -> Path:
+        """Returns the Weight File path."""
+        return Path(Path(self._local_path) / self._weight_file)
+
+    def _get_weights_and_simpoints_from_file(
+        self,
+    ) -> Tuple[List[int], List[int]]:
+        """This is a helper function to extract the weights and simpoints from
+        the files.
+        """
+        simpoint_weight_pair = []
+        with open(self.get_simpoint_file()) as simpoint_file, open(
+            self.get_weight_file()
+        ) as weight_file:
+            while True:
+                line = simpoint_file.readline()
+                if not line:
+                    break
+                interval = int(line.split(" ", 1)[0])
+                line = weight_file.readline()
+                if not line:
+                    fatal("not engough weights")
+                weight = float(line.split(" ", 1)[0])
+                simpoint_weight_pair.append((interval, weight))
+        simpoint_weight_pair.sort(key=lambda obj: obj[0])
+        # use simpoint to sort
+
+        weight_list = []
+        simpoint_list = []
+        for simpoint, weight in simpoint_weight_pair:
+            simpoint_list.append(simpoint)
+            weight_list.append(weight)
+        return simpoint_list, weight_list
+
 
 def obtain_resource(
     resource_name: str,
@@ -529,5 +712,6 @@ _get_resource_json_type_map = {
     "file": FileResource,
     "directory": DirectoryResource,
     "simpoint": SimpointResource,
+    "simpoint-directory": SimpointDirectoryResource,
     "resource": Resource,
 }
diff --git a/src/python/gem5/simulate/exit_event_generators.py b/src/python/gem5/simulate/exit_event_generators.py
index d6732bb49d..738e1281d9 100644
--- a/src/python/gem5/simulate/exit_event_generators.py
+++ b/src/python/gem5/simulate/exit_event_generators.py
@@ -28,7 +28,7 @@ from typing import Generator, Optional
 import m5.stats
 from ..components.processors.abstract_processor import AbstractProcessor
 from ..components.processors.switchable_processor import SwitchableProcessor
-from ..utils.simpoint import SimPoint
+from ..resources.resource import SimpointResource
 from m5.util import warn
 from pathlib import Path
 
@@ -134,7 +134,7 @@ def skip_generator():
 
 
 def simpoints_save_checkpoint_generator(
-    checkpoint_dir: Path, simpoint: SimPoint
+    checkpoint_dir: Path, simpoint: SimpointResource
 ):
     """
     A generator for taking multiple checkpoints for SimPoints. It will save the
diff --git a/src/python/gem5/utils/simpoint.py b/src/python/gem5/utils/simpoint.py
index 9e861cc0a5..eab92e2291 100644
--- a/src/python/gem5/utils/simpoint.py
+++ b/src/python/gem5/utils/simpoint.py
@@ -24,10 +24,10 @@
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-from m5.util import fatal
+from m5.util import fatal, warn
 from pathlib import Path
 from typing import List, Tuple
-from gem5.resources.resource import Resource, CustomResource
+from gem5.resources.resource import SimpointResource
 
 
 class SimPoint:
@@ -39,7 +39,7 @@ class SimPoint:
 
     def __init__(
         self,
-        simpoint_resource: CustomResource = None,
+        simpoint_resource: SimpointResource = None,
         simpoint_interval: int = None,
         simpoint_file_path: Path = None,
         weight_file_path: Path = None,
@@ -70,12 +70,19 @@ class SimPoint:
         The warmup_list only works correctly with sorted simpoint_list.
         """
 
+        warn(
+            "This `SimPoint` class has been deprecated in favor of "
+            "`SimpointResource` and `SimpointDirectory` resource which may be "
+            "found in `gem5.resources.resource`. Please utilize these. This "
+            "`SimPoint` class will be removed in future releases of gem5."
+        )
+
         # initalize input if you're passing in a CustomResource
         if simpoint_resource is not None:
             simpoint_directory = str(simpoint_resource.get_local_path())
 
-            simpoint_file_path = Path(simpoint_directory + "/simpoint.simpt")
-            weight_file_path = Path(simpoint_directory + "/simpoint.weight")
+            simpoint_file_path = simpoint_directory.get_simpoint_file()
+            weight_file_path = simpoint_resource.get_weight_file()
             simpoint_interval = (
                 simpoint_resource.get_metadata()
                 .get("additional_metadata")
diff --git a/tests/gem5/gem5_library_example_tests/test_gem5_library_examples.py b/tests/gem5/gem5_library_example_tests/test_gem5_library_examples.py
index 514894f8d2..9b5c2c67ff 100644
--- a/tests/gem5/gem5_library_example_tests/test_gem5_library_examples.py
+++ b/tests/gem5/gem5_library_example_tests/test_gem5_library_examples.py
@@ -94,44 +94,44 @@ gem5_verify_config(
     length=constants.quick_tag,
 )
 
-# gem5_verify_config(
-#    name="test-simpoints-se-checkpoint",
-#    fixtures=(),
-#    verifiers=(),
-#    config=joinpath(
-#        config.base_dir,
-#        "configs",
-#        "example",
-#        "gem5_library",
-#        "checkpoints",
-#        "simpoints-se-checkpoint.py",
-#    ),
-#    config_args=[
-#        "--checkpoint-path",
-#        joinpath(resource_path, "se_checkpoint_folder-save"),
-#    ],
-#    valid_isas=(constants.all_compiled_tag,),
-#    valid_hosts=constants.supported_hosts,
-#    length=constants.quick_tag,
-# )
+gem5_verify_config(
+    name="test-simpoints-se-checkpoint",
+    fixtures=(),
+    verifiers=(),
+    config=joinpath(
+        config.base_dir,
+        "configs",
+        "example",
+        "gem5_library",
+        "checkpoints",
+        "simpoints-se-checkpoint.py",
+    ),
+    config_args=[
+        "--checkpoint-path",
+        joinpath(resource_path, "se_checkpoint_folder-save"),
+    ],
+    valid_isas=(constants.all_compiled_tag,),
+    valid_hosts=constants.supported_hosts,
+    length=constants.quick_tag,
+)
 
-# gem5_verify_config(
-#    name="test-simpoints-se-restore",
-#    fixtures=(),
-#    verifiers=(),
-#    config=joinpath(
-#        config.base_dir,
-#        "configs",
-#        "example",
-#        "gem5_library",
-#        "checkpoints",
-#        "simpoints-se-restore.py",
-#    ),
-#    config_args=[],
-#    valid_isas=(constants.all_compiled_tag,),
-#    valid_hosts=constants.supported_hosts,
-#    length=constants.quick_tag,
-# )
+gem5_verify_config(
+    name="test-simpoints-se-restore",
+    fixtures=(),
+    verifiers=(),
+    config=joinpath(
+        config.base_dir,
+        "configs",
+        "example",
+        "gem5_library",
+        "checkpoints",
+        "simpoints-se-restore.py",
+    ),
+    config_args=[],
+    valid_isas=(constants.all_compiled_tag,),
+    valid_hosts=constants.supported_hosts,
+    length=constants.quick_tag,
+)
 
 if os.access("/dev/kvm", mode=os.R_OK | os.W_OK):
     # The x86-ubuntu-run uses KVM cores, this test will therefore only be run
diff --git a/tests/pyunit/stdlib/resources/pyunit_resource_specialization.py b/tests/pyunit/stdlib/resources/pyunit_resource_specialization.py
index e0a8dddd07..f31e35d719 100644
--- a/tests/pyunit/stdlib/resources/pyunit_resource_specialization.py
+++ b/tests/pyunit/stdlib/resources/pyunit_resource_specialization.py
@@ -26,6 +26,7 @@
 
 import os
 import unittest
+from pathlib import Path
 
 from gem5.resources.resource import *
 from gem5.isas import ISA
@@ -155,6 +156,40 @@ class ResourceSpecializationSuite(unittest.TestCase):
         self.assertIsNone(resource.get_documentation())
         self.assertIsNone(resource.get_source())
 
+    def test_simpoint_directory_resource(self) -> None:
+        """Tests the loading of a Simpoint directory resource."""
+        resource = obtain_resource(
+            resource_name="simpoint-directory-example",
+            resource_directory=self.get_resource_dir(),
+        )
+
+        self.assertIsInstance(resource, SimpointDirectoryResource)
+
+        self.assertEquals(
+            "simpoint directory documentation.", resource.get_documentation()
+        )
+        self.assertIsNone(resource.get_source())
+
+        self.assertEquals(1000000, resource.get_simpoint_interval())
+        self.assertEquals(1000000, resource.get_warmup_interval())
+        self.assertEquals(
+            Path(
+                Path(self.get_resource_dir())
+                / "simpoint-directory-example"
+                / "simpoint.simpt"
+            ),
+            resource.get_simpoint_file(),
+        )
+        self.assertEquals(
+            Path(
+                Path(self.get_resource_dir())
+                / "simpoint-directory-example"
+                / "simpoint.weight"
+            ),
+            resource.get_weight_file(),
+        )
+        self.assertEquals("Example Workload", resource.get_workload_name())
+
     def test_simpoint_resource(self) -> None:
         """Tests the loading of a Simpoint resource."""
         resource = obtain_resource(
@@ -168,6 +203,12 @@ class ResourceSpecializationSuite(unittest.TestCase):
             "simpoint documentation.", resource.get_documentation()
         )
         self.assertIsNone(resource.get_source())
+        self.assertIsNone(resource.get_local_path())
+
+        self.assertEquals(1000000, resource.get_simpoint_interval())
+        self.assertEquals(23445, resource.get_warmup_interval())
+        self.assertEquals([2, 3, 4, 15], resource.get_simpoint_list())
+        self.assertEquals([0.1, 0.2, 0.4, 0.3], resource.get_weight_list())
 
     def test_file_resource(self) -> None:
         """Tests the loading of a FileResource."""
diff --git a/tests/pyunit/stdlib/resources/refs/resource-specialization.json b/tests/pyunit/stdlib/resources/refs/resource-specialization.json
index 77ffc10705..01671b564b 100644
--- a/tests/pyunit/stdlib/resources/refs/resource-specialization.json
+++ b/tests/pyunit/stdlib/resources/refs/resource-specialization.json
@@ -86,14 +86,28 @@
             "url": "{url_base}/checkpoints/riscv-hello-example-checkpoint.tar"
         },
         {
-            "type" : "simpoint",
-            "name" : "simpoint-example",
-            "documentation" : "simpoint documentation.",
+            "type": "simpoint-directory",
+            "name": "simpoint-directory-example",
+            "documentation": "simpoint directory documentation.",
             "is_zipped" : false,
-            "md5sum" : "3a57c1bb1077176c4587b8a3bf4f8ace",
+            "md5sum" : "3fcffe3956c8a95e3fb82e232e2b41fb",
             "source" : null,
             "is_tar_archive" : true,
-            "url": "{url_base}/checkpoints/riscv-hello-example-checkpoint.tar"
-         }
+            "url": "{url_base}/simpoints/x86-print-this-15000-simpoints-20221013.tar",
+            "simpoint_interval": 1000000,
+            "warmup_interval": 1000000,
+            "simpoint_file": "simpoint.simpt",
+            "weight_file": "simpoint.weight",
+            "workload_name": "Example Workload"
+        },
+        {
+            "type": "simpoint",
+            "name": "simpoint-example",
+            "documentation": "simpoint documentation.",
+            "simpoint_interval": 1000000,
+            "warmup_interval": 23445,
+            "simpoint_list" : [2,3,4,15],
+            "weight_list" : [0.1, 0.2, 0.4, 0.3]
+        }
     ]
 }

From 717d3b239cbc7a8371bfd69b4d67fc7c57b3f104 Mon Sep 17 00:00:00 2001
From: Zhantong Qiu <ztqiu@ucdavis.edu>
Date: Fri, 6 Jan 2023 16:11:57 -0800
Subject: [PATCH 207/492] base,python: Added PcCountPair type and parameter

This commit introduces a PcCountPair type that stores a Program Counter
address and an integer of counts for the Program Counter address.
The PcCountPair can be used in the same way and hashable in both C++
and Python.

Change-Id: I66d93e2c6a1d286cb9dd795ba97f8d887f67d503
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67193
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
---
 src/cpu/probes/pc_count_pair.hh | 99 +++++++++++++++++++++++++++++++++
 src/python/m5/params.py         | 41 ++++++++++++++
 src/python/pybind11/core.cc     | 27 +++++++++
 3 files changed, 167 insertions(+)
 create mode 100644 src/cpu/probes/pc_count_pair.hh

diff --git a/src/cpu/probes/pc_count_pair.hh b/src/cpu/probes/pc_count_pair.hh
new file mode 100644
index 0000000000..fd6bc639fe
--- /dev/null
+++ b/src/cpu/probes/pc_count_pair.hh
@@ -0,0 +1,99 @@
+/*
+ * Copyright (c) 2023 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __PC_COUNT_PAIR_HH__
+#define __PC_COUNT_PAIR_HH__
+
+#include "base/types.hh"
+
+namespace gem5
+{
+
+class PcCountPair
+{
+
+  private:
+
+    /** The Program Counter address */
+    Addr pc;
+    /** The count of the Program Counter address */
+    int count;
+
+  public:
+
+    /** Explicit constructor assigning the pc and count values */
+    explicit constexpr PcCountPair(Addr _pc, int _count) :
+        pc(_pc), count(_count) {}
+
+    /** Default constructor for parameter classes */
+    PcCountPair() : pc(0), count(0) {}
+
+    /** Returns the Program Counter address */
+    constexpr Addr getPC() const { return pc; }
+    /** Returns the count of the Program */
+    constexpr int getCount() const { return count; }
+
+    /** Greater than comparison */
+    constexpr bool
+    operator>(const PcCountPair& cc) const
+    {
+        return count > cc.getCount();
+    }
+
+    /** Equal comparison */
+    constexpr bool
+    operator==(const PcCountPair& cc) const
+    {
+        return (pc == cc.getPC() && count == cc.getCount());
+    }
+
+    /** String format */
+    std::string
+    to_string() const
+    {
+        std::string s = "(" + std::to_string(pc)
+                                    + "," + std::to_string(count) + ")";
+        return s;
+    }
+
+    /** Enable hashing for this parameter */
+    struct HashFunction
+    {
+        size_t operator()(const PcCountPair& item) const
+        {
+            size_t xHash = std::hash<int>()(item.pc);
+            size_t yHash = std::hash<int>()(item.count);
+            return xHash * 2 + yHash;
+        }
+    };
+
+};
+
+} // namespace gem5
+
+#endif // __PC_COUNT_PAIR_HH__
diff --git a/src/python/m5/params.py b/src/python/m5/params.py
index e76380bc40..92e913b2f0 100644
--- a/src/python/m5/params.py
+++ b/src/python/m5/params.py
@@ -854,6 +854,46 @@ class Addr(CheckedInt):
         return "0x%x" % int(val)
 
 
+class PcCountPair(ParamValue):
+    # This parameter stores a Program Counter address and the a count value for
+    # the Program Counter address
+    cxx_type = "PcCountPair"
+    cmd_line_settable = True
+
+    def __init__(self, _pc, _count):
+        self.pc = _pc
+        self.count = _count
+
+    def get_pc(self):
+        return self.pc
+
+    def get_count(self):
+        return self.count
+
+    def getValue(self):
+        #  convert Python PcCountPair into C++ PcCountPair
+        from _m5.pc import PcCountPair
+
+        return PcCountPair(self.pc, self.count)
+
+    def __str__(self):
+        return "(%i,%i)" % (self.pc, self.count)
+
+    def __eq__(self, other):
+        return self.pc == other.get_pc() and self.count == other.get_count()
+
+    def __hash__(self):
+        return hash((int(self.pc), int(self.count)))
+
+    @classmethod
+    def cxx_predecls(cls, code):
+        code('#include "cpu/probes/pc_count_pair.hh"')
+
+    @classmethod
+    def pybind_predecls(cls, code):
+        code('#include "cpu/probes/pc_count_pair.hh"')
+
+
 class AddrRange(ParamValue):
     cxx_type = "AddrRange"
 
@@ -2426,4 +2466,5 @@ __all__ = [
     "VectorMasterPort",
     "VectorSlavePort",
     "DeprecatedParam",
+    "PcCountPair",
 ]
diff --git a/src/python/pybind11/core.cc b/src/python/pybind11/core.cc
index 89466750d0..bd83a74331 100644
--- a/src/python/pybind11/core.cc
+++ b/src/python/pybind11/core.cc
@@ -58,6 +58,7 @@
 #include "sim/drain.hh"
 #include "sim/serialize.hh"
 #include "sim/sim_object.hh"
+#include "cpu/probes/pc_count_pair.hh"
 
 namespace py = pybind11;
 
@@ -163,6 +164,31 @@ init_range(py::module_ &m_native)
     m.def("RangeSize", &RangeSize);
 }
 
+static void
+init_pc(py::module_ &m_native)
+{
+    py::module_ m = m_native.def_submodule("pc");
+    py::class_<PcCountPair>(m, "PcCountPair")
+        .def(py::init<>())
+        .def(py::init<Addr, int>())
+        .def("__eq__", [](const PcCountPair& self, py::object other) {
+            py::int_ pyPC = other.attr("get_pc")();
+            py::int_ pyCount = other.attr("get_count")();
+            uint64_t cPC = pyPC.cast<uint64_t>();
+            int cCount = pyCount.cast<int>();
+            return (self.getPC() == cPC && self.getCount() == cCount);
+        })
+        .def("__hash__", [](const PcCountPair& self){
+            py::int_ pyPC = py::cast(self.getPC());
+            py::int_ pyCount = py::cast(self.getCount());
+            return py::hash(py::make_tuple(pyPC, pyCount));
+        })
+        .def("__str__", &PcCountPair::to_string)
+        .def("get_pc", &PcCountPair::getPC)
+        .def("get_count", &PcCountPair::getCount)
+        ;
+}
+
 static void
 init_net(py::module_ &m_native)
 {
@@ -307,6 +333,7 @@ pybind_init_core(py::module_ &m_native)
     init_range(m_native);
     init_net(m_native);
     init_loader(m_native);
+    init_pc(m_native);
 }
 
 } // namespace gem5

From 0d129a6bf2e6b9f7d3ec102376151e8752a3afde Mon Sep 17 00:00:00 2001
From: Zhantong Qiu <ztqiu@ucdavis.edu>
Date: Fri, 6 Jan 2023 16:58:06 -0800
Subject: [PATCH 208/492] sim: Added PcCountTracker and PcCountTrackerManager

PcCountTracker is a probelistener that connects to one core and listens
for a list of Program Counter addresses(PCs). It notifys the
PcCountTrackerManager every time it encounters a Program Counter
address in the list.
PcCountTrackerManager is a SimObject that is responsible for keeping
track of a list of PC-count pairs and the number of time a particular
PC has been executed globally.

This patch adds a way to track the number of times a set of specific
PCs have been executed.

Change-Id: I8f47bfa7e29aa2bb6ab817417266033439b85d51
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67194
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/cpu/probes/PcCountTracker.py           |  64 +++++++++++
 src/cpu/probes/SConscript                  |  37 ++++++
 src/cpu/probes/pc_count_tracker.cc         |  70 ++++++++++++
 src/cpu/probes/pc_count_tracker.hh         |  72 ++++++++++++
 src/cpu/probes/pc_count_tracker_manager.cc |  88 +++++++++++++++
 src/cpu/probes/pc_count_tracker_manager.hh | 124 +++++++++++++++++++++
 6 files changed, 455 insertions(+)
 create mode 100644 src/cpu/probes/PcCountTracker.py
 create mode 100644 src/cpu/probes/SConscript
 create mode 100644 src/cpu/probes/pc_count_tracker.cc
 create mode 100644 src/cpu/probes/pc_count_tracker.hh
 create mode 100644 src/cpu/probes/pc_count_tracker_manager.cc
 create mode 100644 src/cpu/probes/pc_count_tracker_manager.hh

diff --git a/src/cpu/probes/PcCountTracker.py b/src/cpu/probes/PcCountTracker.py
new file mode 100644
index 0000000000..259ec68f8e
--- /dev/null
+++ b/src/cpu/probes/PcCountTracker.py
@@ -0,0 +1,64 @@
+# Copyright (c) 2023 The Regents of the University of California
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from m5.params import *
+from m5.util.pybind import *
+from m5.objects.Probe import ProbeListenerObject
+from m5.objects import SimObject
+
+
+class PcCountTrackerManager(SimObject):
+    """This class manages global PC-count pair tracking.
+    It keeps the global counters for all target PC-count pairs and raises exit
+    events when a PC executed a target number of times.
+    It gets called every time a PcCountTracker encounters a target PC.
+    """
+
+    type = "PcCountTrackerManager"
+    cxx_header = "cpu/probes/pc_count_tracker_manager.hh"
+    cxx_class = "gem5::PcCountTrackerManager"
+
+    cxx_exports = [
+        PyBindMethod("getPcCount"),
+        PyBindMethod("getCurrentPcCountPair"),
+    ]
+
+    targets = VectorParam.PcCountPair("the target PC Count pairs")
+
+
+class PcCountTracker(ProbeListenerObject):
+    """This probe listener tracks the number of times a particular pc has been
+    executed. It needs to be connected to a manager to track the global
+    information.
+    """
+
+    type = "PcCountTracker"
+    cxx_header = "cpu/probes/pc_count_tracker.hh"
+    cxx_class = "gem5::PcCountTracker"
+
+    targets = VectorParam.PcCountPair("the target PC Count pairs")
+    core = Param.BaseCPU("the connected cpu")
+    ptmanager = Param.PcCountTrackerManager("the PcCountTracker manager")
diff --git a/src/cpu/probes/SConscript b/src/cpu/probes/SConscript
new file mode 100644
index 0000000000..c96ca78a0c
--- /dev/null
+++ b/src/cpu/probes/SConscript
@@ -0,0 +1,37 @@
+# Copyright (c) 2022 The Regents of the University of California
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+Import("*")
+
+if not env["CONF"]["USE_NULL_ISA"]:
+    SimObject(
+        "PcCountTracker.py",
+        sim_objects=["PcCountTracker", "PcCountTrackerManager"],
+    )
+    Source("pc_count_tracker.cc")
+    Source("pc_count_tracker_manager.cc")
+
+    DebugFlag("PcCountTracker")
diff --git a/src/cpu/probes/pc_count_tracker.cc b/src/cpu/probes/pc_count_tracker.cc
new file mode 100644
index 0000000000..184db9a6a3
--- /dev/null
+++ b/src/cpu/probes/pc_count_tracker.cc
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2022 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "cpu/probes/pc_count_tracker.hh"
+
+
+namespace gem5
+{
+
+PcCountTracker::PcCountTracker(const PcCountTrackerParams &p)
+    : ProbeListenerObject(p),
+      cpuptr(p.core),
+      manager(p.ptmanager)
+{
+    if (!cpuptr || !manager) {
+        fatal("%s is NULL", !cpuptr ? "CPU": "PcCountTrackerManager");
+    }
+    for (int i = 0; i < p.targets.size(); i++) {
+        // initialize the set of targeting Program Counter addresses
+        targetPC.insert(p.targets[i].getPC());
+    }
+}
+
+void
+PcCountTracker::regProbeListeners()
+{
+    // connect the probe listener with the probe "RetriedInstsPC" in the
+    // corresponding core.
+    // when "RetiredInstsPC" notifies the probe listener, then the function
+    // 'check_pc' is automatically called
+    typedef ProbeListenerArg<PcCountTracker, Addr> PcCountTrackerListener;
+    listeners.push_back(new PcCountTrackerListener(this, "RetiredInstsPC",
+                                            &PcCountTracker::checkPc));
+}
+
+void
+PcCountTracker::checkPc(const Addr& pc) {
+    if (targetPC.find(pc) != targetPC.end()) {
+        // if the PC is one of the target PCs, then notify the
+        // PcCounterTrackerManager by calling its `check_count` function
+        manager->checkCount(pc);
+    }
+}
+
+} // namespace gem5
diff --git a/src/cpu/probes/pc_count_tracker.hh b/src/cpu/probes/pc_count_tracker.hh
new file mode 100644
index 0000000000..8f54e1ad72
--- /dev/null
+++ b/src/cpu/probes/pc_count_tracker.hh
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2022 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __CPU_PROBES_PC_COUNT_TRACKER_HH__
+#define __CPU_PROBES_PC_COUNT_TRACKER_HH__
+
+#include <unordered_set>
+
+#include "cpu/probes/pc_count_tracker_manager.hh"
+#include "params/PcCountTracker.hh"
+#include "sim/probe/probe.hh"
+
+namespace gem5
+{
+
+class PcCountTracker : public ProbeListenerObject
+{
+  public:
+    PcCountTracker(const PcCountTrackerParams &params);
+
+    /** setup the probelistener */
+    virtual void regProbeListeners();
+
+    /**
+     * this function is called when the probelistener receives signal from the
+     * probe
+     *
+     * @param pc the targeting Program Counter address
+     */
+    void checkPc(const Addr& pc);
+
+  private:
+    /**
+     * a set of Program Counter addresses that should notify the
+     * PcCounterTrackerManager for
+     */
+    std::unordered_set<Addr> targetPC;
+
+    /** the core this PcCountTracker is tracking at */
+    BaseCPU *cpuptr;
+
+    /** the PcCounterTrackerManager */
+    PcCountTrackerManager *manager;
+};
+}
+
+#endif // __CPU_PROBES_PC_COUNT_TRACKER_HH__
diff --git a/src/cpu/probes/pc_count_tracker_manager.cc b/src/cpu/probes/pc_count_tracker_manager.cc
new file mode 100644
index 0000000000..88d7dda568
--- /dev/null
+++ b/src/cpu/probes/pc_count_tracker_manager.cc
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2022 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "cpu/probes/pc_count_tracker_manager.hh"
+
+namespace gem5
+{
+
+PcCountTrackerManager::PcCountTrackerManager(
+    const PcCountTrackerManagerParams &p)
+    : SimObject(p)
+{
+    currentPair = PcCountPair(0,0);
+    ifListNotEmpty = true;
+
+    for (int i = 0 ; i < p.targets.size() ; i++) {
+        // initialize the counter for the inputted PC Count pair
+        // unordered_map does not allow duplicate, so counter won't
+        // have duplicates
+        counter.insert(std::make_pair(p.targets[i].getPC(),0));
+        // store all the PC Count pair into the targetPair set
+        targetPair.insert(p.targets[i]);
+    }
+    DPRINTF(PcCountTracker,
+            "total %i PCs in counter\n", counter.size());
+    DPRINTF(PcCountTracker,
+            "all targets: \n%s", printAllTargets());
+}
+
+void
+PcCountTrackerManager::checkCount(Addr pc)
+{
+
+    if(ifListNotEmpty) {
+        int count = ++counter.find(pc)->second;
+        // increment the counter of the encountered PC address by 1
+
+        currentPair = PcCountPair(pc,count);
+        // update the current PC Count pair
+        if(targetPair.find(currentPair) != targetPair.end()) {
+            // if the current PC Count pair is one of the target pairs
+            DPRINTF(PcCountTracker,
+                "pc:%s encountered\n", currentPair.to_string());
+
+            exitSimLoopNow("simpoint starting point found");
+            // raise the SIMPOINT_BEGIN exit event
+
+            targetPair.erase(currentPair);
+            // erase the encountered PC Count pair from the target pairs
+            DPRINTF(PcCountTracker,
+                "There are %i targets remained\n", targetPair.size());
+        }
+
+        if(targetPair.empty()) {
+            // if all target PC Count pairs are encountered
+            DPRINTF(PcCountTracker,
+                    "all targets are encountered.\n");
+            ifListNotEmpty = false;
+        }
+    }
+}
+
+}
diff --git a/src/cpu/probes/pc_count_tracker_manager.hh b/src/cpu/probes/pc_count_tracker_manager.hh
new file mode 100644
index 0000000000..00742ff239
--- /dev/null
+++ b/src/cpu/probes/pc_count_tracker_manager.hh
@@ -0,0 +1,124 @@
+/*
+ * Copyright (c) 2022 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __CPU_PROBES_PC_COUNT_TRACKER_MANAGER_HH__
+#define __CPU_PROBES_PC_COUNT_TRACKER_MANAGER_HH__
+
+#include <unordered_map>
+#include <unordered_set>
+
+#include "cpu/base.hh"
+#include "params/PcCountTrackerManager.hh"
+#include "sim/sim_exit.hh"
+#include "debug/PcCountTracker.hh"
+
+namespace gem5
+{
+
+
+class PcCountTrackerManager : public SimObject {
+  public:
+    PcCountTrackerManager(const PcCountTrackerManagerParams &params);
+
+    /** this function is called when PcCountTrackerProbeListener finds a target
+     * PC
+     */
+    void checkCount(Addr pc);
+
+  private:
+    /** a counter that stores all the target PC addresses and the number
+     * of times the target PC has been executed
+     */
+    std::unordered_map<Addr, int> counter;
+
+    /** a set that stores all the PC Count pairs that should raise an
+     * exit event at
+     */
+    std::unordered_set<PcCountPair,
+                        PcCountPair::HashFunction> targetPair;
+
+    /** the current PC Count pair */
+    PcCountPair currentPair;
+
+    /** when all the PC Count pairs in the `targetPair` are encountered,
+     * and the PCCOUNTTRACK_END exit event is raised, this boolean
+     * variable becomes false and is used to stop the `check_count`
+     * from functioning. This is default as true.
+     */
+    bool ifListNotEmpty;
+
+  public:
+
+    /** this function returns the corresponding value of count for the
+     * inputted Program Counter address. If the PC address does not
+     * exist in the counter, then it returns a -1.
+     *
+     * @param pc the targeting Program Counter address
+     * @return the corresponding value of count for the inputted Program
+     * Counter address
+     */
+    int
+    getPcCount(Addr pc) const
+    {
+        if (counter.find(pc) != counter.end()) {
+            return counter.find(pc)->second;
+        }
+        return -1;
+    }
+
+    /** this function returns the current PC Count pair
+     *
+     * @return current PC Count pair
+     */
+    PcCountPair
+    getCurrentPcCountPair() const
+    {
+        return currentPair;
+    }
+
+    /** this function print all targets
+     *
+     * @return formatted string that contains all targets
+     */
+    std::string
+    printAllTargets() const
+    {
+        std::string s;
+        for(auto itr = targetPair.begin();
+            itr != targetPair.end();
+            ++itr) {
+            s += itr->to_string();
+            s += "\n";
+        }
+        return s;
+    }
+};
+
+}
+
+#endif // __CPU_PROBES_PC_COUNT_TRACKER_MANAGER_HH__

From 41816bf0307c8ef6bc40f032e2e65c22bed26262 Mon Sep 17 00:00:00 2001
From: Zhantong Qiu <ztqiu@ucdavis.edu>
Date: Fri, 6 Jan 2023 17:13:27 -0800
Subject: [PATCH 209/492] stdlib: Added stdlib LoopPoint classes

LoopPoint is a multithreaded workload sampling method that targets
PCs and PC execution counts.
The main idea for LoopPoint is to base the beginning and end of the
simjulation sample on the number of times a particular loop (PC) has
been executed globally across all threads in a region that partitioned
with a set length of instruction counts. This in some senses
generalizes SimPoint which use the instruction count of a single
thread.
The link to the paper: https://ieeexplore.ieee.org/document/9773236

The LoopPointCheckpoint is designed to take in LoopPoint data file
and generate the information needed to take checkpoints for LoopPoint
regions(warmup regions+simulation region)
The LoopPointRestore is designed to take in the LoopPOint data file
and generate information needed to to restore a checkpoint taken by
the LoopPOintCheckpoint.
The LoopPoint is the parent class for LoopPointCheckpoint and
LoopPointRestore.

Change-Id: I595b0ff9d350c7c496639748a9c63ecc61fbaec9
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67195
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
---
 src/python/SConscript                         |   1 +
 .../components/processors/abstract_core.py    |   9 +-
 .../components/processors/base_cpu_core.py    |  20 +-
 src/python/gem5/utils/looppoint.py            | 401 ++++++++++++++++++
 4 files changed, 429 insertions(+), 2 deletions(-)
 create mode 100644 src/python/gem5/utils/looppoint.py

diff --git a/src/python/SConscript b/src/python/SConscript
index aeeb8925a3..68b5e1d926 100644
--- a/src/python/SConscript
+++ b/src/python/SConscript
@@ -240,6 +240,7 @@ PySource('gem5.components.processors',
 PySource('gem5.components.processors',
     'gem5/components/processors/switchable_processor.py')
 PySource('gem5.utils', 'gem5/utils/simpoint.py')
+PySource('gem5.utils', 'gem5/utils/looppoint.py')
 PySource('gem5.components.processors',
     'gem5/components/processors/traffic_generator_core.py')
 PySource('gem5.components.processors',
diff --git a/src/python/gem5/components/processors/abstract_core.py b/src/python/gem5/components/processors/abstract_core.py
index 58296bca3b..8259df8a8b 100644
--- a/src/python/gem5/components/processors/abstract_core.py
+++ b/src/python/gem5/components/processors/abstract_core.py
@@ -29,7 +29,8 @@ from typing import Optional, List
 
 from ...isas import ISA
 
-from m5.objects import BaseMMU, Port, SubSystem
+from m5.objects import BaseMMU, Port, SubSystem, PcCountTrackerManager
+from m5.params import PcCountPair
 
 
 class AbstractCore(SubSystem):
@@ -155,3 +156,9 @@ class AbstractCore(SubSystem):
         instruction stop is setup differently dependent on this.
         """
         raise NotImplementedError("This core type does not support MAX_INSTS")
+
+    @abstractmethod
+    def add_pc_tracker_probe(
+        self, target_pair: List[PcCountPair], manager: PcCountTrackerManager
+    ) -> None:
+        raise NotImplementedError
diff --git a/src/python/gem5/components/processors/base_cpu_core.py b/src/python/gem5/components/processors/base_cpu_core.py
index 631fd0ad0e..c75c0029cf 100644
--- a/src/python/gem5/components/processors/base_cpu_core.py
+++ b/src/python/gem5/components/processors/base_cpu_core.py
@@ -33,7 +33,15 @@ from ...runtime import get_runtime_isa
 from ...utils.override import overrides
 from ...utils.requires import requires
 
-from m5.objects import BaseMMU, Port, BaseCPU, Process
+from m5.objects import (
+    BaseMMU,
+    Port,
+    BaseCPU,
+    Process,
+    PcCountTracker,
+    PcCountTrackerManager,
+)
+from m5.params import PcCountPair
 
 
 class BaseCPUCore(AbstractCore):
@@ -169,3 +177,13 @@ class BaseCPUCore(AbstractCore):
             self.core.scheduleInstStopAnyThread(inst)
         else:
             self.core.max_insts_any_thread = inst
+
+    @overrides(AbstractCore)
+    def add_pc_tracker_probe(
+        self, target_pair: List[PcCountPair], manager: PcCountTrackerManager
+    ) -> None:
+        pair_tracker = PcCountTracker()
+        pair_tracker.targets = target_pair
+        pair_tracker.core = self.core
+        pair_tracker.ptmanager = manager
+        self.core.probeListener = pair_tracker
diff --git a/src/python/gem5/utils/looppoint.py b/src/python/gem5/utils/looppoint.py
new file mode 100644
index 0000000000..b681e75fee
--- /dev/null
+++ b/src/python/gem5/utils/looppoint.py
@@ -0,0 +1,401 @@
+# Copyright (c) 2022 The Regents of the University of California
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+from m5.util import fatal
+from m5.params import PcCountPair
+from pathlib import Path
+from typing import List, Dict
+from gem5.components.processors.abstract_processor import AbstractProcessor
+from m5.objects import PcCountTrackerManager
+import csv
+import re
+import json
+
+
+class LoopPoint:
+    """
+    This LoopPoint class is used to manage the information needed for LoopPoint
+    in workload
+    """
+
+    def __init__(
+        self,
+        targets: List[PcCountPair],
+        regions: Dict[PcCountPair, int],
+        json_file: Dict[int, Dict],
+    ) -> None:
+        """
+        :param targets: a list of PcCountPair that are used to generate exit
+        event at when the PcCountTrackerManager encounter this PcCountPair in
+        execution
+        :param regions: a dictionary used to find the corresponding region id
+        for the significant PcCountPair. This is mainly used to ensure
+        checkpoints are taken in the correct PcCountPair or relative counts are
+        updated at the correct count
+        :param json_file: all the LoopPoint data including relative counts and
+        multiplier are stored in this parameter. It can be outputted as a json
+        file.
+        """
+
+        self._manager = PcCountTrackerManager()
+        self._manager.targets = targets
+        self._targets = targets
+        self._regions = regions
+        self._json_file = json_file
+
+    def setup_processor(
+        self,
+        processor: AbstractProcessor,
+    ) -> None:
+        """
+        This function is used to setup a PC tracker in all the cores and
+        connect all the tracker to the PC tracker manager to perform
+        multithread PC tracking
+        :param processor: the processor used in the simulation configuration
+        """
+        for core in processor.get_cores():
+            core.add_pc_tracker_probe(self._targets, self._manager)
+
+    def update_relatives_counts(self) -> None:
+        """
+        This function is used to update the relative count for restore used.
+        The new relative count will be stored in the _json_file and can be
+        outputted into a json file by calling the output_json_file function.
+        """
+        current_pair = self._manager.getCurrentPcCountPair()
+        if current_pair in self._regions:
+            rid = self._regions[current_pair]
+            region = self._json_file[rid]["simulation"]
+            if "warmup" in self._json_file[rid]:
+                # if this region has a warmup interval,
+                # then update the relative count for the
+                # start of the simulation region
+                start = region["start"]["pc"]
+                temp = region["start"]["global"] - self._manager.getPcCount(
+                    start
+                )
+                self._json_file[rid]["simulation"]["start"]["relative"] = int(
+                    temp
+                )
+            end = region["end"]["pc"]
+            temp = region["end"]["global"] - self._manager.getPcCount(end)
+            self._json_file[rid]["simulation"]["end"]["relative"] = int(temp)
+
+    def output_json_file(
+        self, input_indent: int = 4, filename: str = "outdir.json"
+    ) -> Dict[int, Dict]:
+        """
+        This function is used to output the _json_file into a json file
+        :param input_indent: the indent value of the json file
+        :param filename: the name of the output file
+        """
+        with open(filename, "w") as file:
+            json.dump(self._json_file, file, indent=input_indent)
+
+    def get_current_region(self) -> int:
+        """
+        This function returns the region id if the current PC Count pair is
+        significant(e.x. beginning of the checkpoint), otherwise, it returns
+        a '-1' to indicate the current PC Count pair is not significant
+        """
+        current_pair = self._manager.getCurrentPcCountPair()
+        if current_pair in self._regions:
+            return self._regions[current_pair]
+        return -1
+
+    def get_current_pair(self) -> PcCountPair:
+        """
+        This function returns the current PC Count pair
+        """
+        return self._manager.getCurrentPcCountPair()
+
+    def get_regions(self) -> Dict[PcCountPair, int]:
+        """
+        This function returns the complete dictionary of _regions
+        """
+        return self._regions
+
+    def get_targets(self) -> List[PcCountPair]:
+        """
+        This function returns the complete list of _targets
+        """
+        return self._targets
+
+
+class LoopPointCheckpoint(LoopPoint):
+    def __init__(self, looppoint_file: Path, if_csv: bool) -> None:
+        """
+        This class is specifically designed to take in the LoopPoint data file
+        and generate the information needed to take checkpoints for LoopPoint
+        regions(warmup region+simulation region)
+        :param looppoint_file: the director of the LoopPoint data file
+        :param if_csv: if the file is a csv file, then it is True. If the file
+        is a json file, then it is False
+        """
+
+        _json_file = {}
+        _targets = []
+        _region_id = {}
+
+        if if_csv:
+            self.profile_csv(looppoint_file, _targets, _json_file, _region_id)
+        else:
+            self.profile_json(looppoint_file, _targets, _json_file, _region_id)
+
+        super().__init__(
+            _targets,
+            _region_id,
+            _json_file,
+        )
+
+    def profile_csv(
+        self,
+        looppoint_file_path: Path,
+        targets: List[PcCountPair],
+        json_file: Dict[int, Dict],
+        region_id: Dict[PcCountPair, int],
+    ) -> None:
+        """
+        This function profiles the csv LoopPoint data file into three variables
+        to take correct checkpoints for LoopPoint
+        :param looppoint_file_path: the director of the LoopPoint data file
+        :param targets: a list of PcCountPair
+        :param json_file: a dictionary for all the LoopPoint data
+        :param region_id: a dictionary for all the significant PcCountPair and
+        its corresponding region id
+        """
+
+        # This section is hard-coded to parse the data in the csv file.
+        # The csv file is assumed to have a constant format.
+        with open(looppoint_file_path, newline="") as csvfile:
+            reader = csv.reader(csvfile, delimiter=" ", quotechar="|")
+            for row in reader:
+                if len(row) > 1:
+                    if row[0] == "cluster":
+                        # if it is a simulation region
+                        line = row[4].split(",")
+                        start = PcCountPair(int(line[3], 16), int(line[6]))
+                        end = PcCountPair(int(line[7], 16), int(line[10]))
+                        if int(line[2]) in json_file:
+                            #  if this region was created in the json_file
+                            json_file[int(line[2])]["simulation"] = {
+                                "start": {"pc": int(line[3], 16)}
+                            }
+                        else:
+                            json_file[int(line[2])] = {
+                                "simulation": {
+                                    "start": {"pc": int(line[3], 16)}
+                                }
+                            }
+                        json_file[int(line[2])]["simulation"]["start"][
+                            "global"
+                        ] = int(line[6])
+                        json_file[int(line[2])]["simulation"]["end"] = {
+                            "pc": int(line[7], 16)
+                        }
+                        json_file[int(line[2])]["simulation"]["end"][
+                            "global"
+                        ] = int(line[10])
+                        json_file[int(line[2])]["multiplier"] = float(line[14])
+                        targets.append(start)
+                        targets.append(end)
+                        # store all the PC Count pairs from the file to the
+                        # targets list
+                    elif row[0] == "Warmup":
+                        line = row[3].split(",")
+                        start = PcCountPair(int(line[3], 16), int(line[6]))
+                        end = PcCountPair(int(line[7], 16), int(line[10]))
+                        if int(line[0]) in json_file:
+                            json_file[int(line[0])]["warmup"] = {
+                                "start": {"pc": int(line[3], 16)}
+                            }
+                        else:
+                            json_file[int(line[0])] = {
+                                "warmup": {"start": {"pc": int(line[3], 16)}}
+                            }
+                        json_file[int(line[0])]["warmup"]["start"][
+                            "count"
+                        ] = int(line[6])
+                        json_file[int(line[0])]["warmup"]["end"] = {
+                            "pc": int(line[7], 16)
+                        }
+                        json_file[int(line[0])]["warmup"]["end"][
+                            "count"
+                        ] = int(line[10])
+                        targets.append(start)
+                        targets.append(end)
+                        # store all the PC Count pairs from the file to the
+                        # targets list
+
+        for rid, region in json_file.items():
+            # this loop iterates all the regions and find the significant PC
+            # Count pair for the region
+            if "warmup" in region:
+                # if the region has a warmup interval, then the checkpoint
+                # should be taken at the start of the warmup interval
+                start = PcCountPair(
+                    region["warmup"]["start"]["pc"],
+                    region["warmup"]["start"]["count"],
+                )
+            else:
+                # if the region does not have a warmup interval, then the
+                # checkpoint should be taken at the start of the simulation
+                # region
+                start = PcCountPair(
+                    region["simulation"]["start"]["pc"],
+                    region["simulation"]["start"]["global"],
+                )
+            region_id[start] = rid
+
+    def profile_json(
+        self,
+        looppoint_file_path: Path,
+        targets: List[PcCountPair],
+        json_file: Dict[int, Dict],
+        region_id: Dict[PcCountPair, int],
+    ) -> None:
+        """
+        This function profiles the json LoopPoint data file into three
+        variables to take correct checkpoints for LoopPoint
+        :param looppoint_file_path: the director of the LoopPoint data file
+        :param targets: a list of PcCountPair
+        :param json_file: a dictionary for all the LoopPoint data
+        :param region_id: a dictionary for all the significant PcCountPair and
+        its corresponding region id
+        """
+
+        with open(looppoint_file_path) as file:
+            json_file = json.load(file)
+            # load all json information into the json_file variable
+            for rid, region in json_file.items():
+                # iterates all regions
+                sim_start = PcCountPair(
+                    region["simulation"]["start"]["pc"],
+                    region["simulation"]["start"]["global"],
+                )
+                targets.append(sim_start)
+                # store all PC Count pairs in the file into targets list
+                end = PcCountPair(
+                    region["simulation"]["end"]["pc"],
+                    region["simulation"]["end"]["global"],
+                )
+                targets.append(end)
+                if "warmup" in region:
+                    # if there is a warmup in the region, then the checkpoint
+                    # should be taken at the start of the warmup interval
+                    start = PcCountPair(
+                        region["warmup"]["start"]["pc"],
+                        region["warmup"]["start"]["count"],
+                    )
+                    targets.append(start)
+                    end = PcCountPair(
+                        region["warmup"]["end"]["pc"],
+                        region["warmup"]["end"]["count"],
+                    )
+                    targets.append(end)
+                else:
+                    # if there is not a warmup interval in the region, then the
+                    # checkpoint should be taken at the start of the simulation
+                    # region
+                    start = sim_start
+                region_id[start] = rid
+
+
+class LoopPointRestore(LoopPoint):
+    def __init__(self, looppoint_file: Path, checkpoint_path: Path) -> None:
+        """
+        This class is specifically designed to take in the LoopPoint data file and
+        generator information needed to restore a checkpoint taken by the
+        LoopPointCheckPoint.
+        :param looppoint_file: a json file generated by gem5 that has all the
+        LoopPoint data information
+        :param checkpoint_path: the director of the checkpoint taken by the gem5
+        standard library looppoint_save_checkpoint_generator
+
+        """
+
+        _json_file = {}
+        _targets = []
+        _region_id = {}
+
+        self.profile_restore(
+            looppoint_file, checkpoint_path, _targets, _json_file, _region_id
+        )
+
+        super().__init__(
+            _targets,
+            _region_id,
+            _json_file,
+        )
+
+    def profile_restore(
+        self,
+        looppoint_file_path: Path,
+        checkpoint_dir: Path,
+        targets: List[PcCountPair],
+        json_file: Dict[int, Dict],
+        region_id: Dict[PcCountPair, int],
+    ) -> None:
+        """
+        This function is used to profile data from the LoopPoint data file to
+        information needed to restore the LoopPoint checkpoint
+        :param looppoint_file_path: the director of the LoopPoint data file
+        :param targets: a list of PcCountPair
+        :param json_file: a dictionary for all the LoopPoint data
+        :param region_id: a dictionary for all the significant PcCountPair and
+        its corresponding region id
+        """
+        regex = re.compile(r"cpt.Region([0-9]+)")
+        rid = regex.findall(checkpoint_dir.as_posix())[0]
+        # finds out the region id from the directory name
+        with open(looppoint_file_path) as file:
+            json_file = json.load(file)
+            if rid not in json_file:
+                # if the region id does not exist in the LoopPoint data file
+                # raise a fatal message
+                fatal(f"{rid} is not a valid region\n")
+            region = json_file[rid]
+            if "warmup" in region:
+                if "relative" not in region["simulation"]["start"]:
+                    # if there are not relative counts for the PC Count pair
+                    # then it means there is not enough information to restore
+                    # this checkpoint
+                    fatal(f"region {rid} doesn't have relative count info\n")
+                start = PcCountPair(
+                    region["simulation"]["start"]["pc"],
+                    region["simulation"]["start"]["relative"],
+                )
+                region_id[start] = rid
+                targets.append(start)
+            if "relative" not in region["simulation"]["end"]:
+                fatal(f"region {rid} doesn't have relative count info\n")
+            end = PcCountPair(
+                region["simulation"]["end"]["pc"],
+                region["simulation"]["end"]["relative"],
+            )
+            region_id[end] = rid
+            targets.append(end)

From 82c587bd8834fb0700414cb6d81e76f46d588d8f Mon Sep 17 00:00:00 2001
From: Zhantong Qiu <ztqiu@ucdavis.edu>
Date: Fri, 6 Jan 2023 17:25:26 -0800
Subject: [PATCH 210/492] stdlib: Allow se_binary_workload to setup LoopPoints

Added a set_se_looppoint_workload function to take in information for
workload and a stdlib LoopPoint object that stores all the information
the workload needed to run the LoopPoint sampling method.
Added a get_looppoint function to return the stdlib LoopPoint object.

Change-Id: I7afc5c4c743256f7df97345f331b6f72b7a5fd07
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67196
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 .../components/boards/se_binary_workload.py   | 47 +++++++++++++++++++
 1 file changed, 47 insertions(+)

diff --git a/src/python/gem5/components/boards/se_binary_workload.py b/src/python/gem5/components/boards/se_binary_workload.py
index 31931106c9..404a78458f 100644
--- a/src/python/gem5/components/boards/se_binary_workload.py
+++ b/src/python/gem5/components/boards/se_binary_workload.py
@@ -25,6 +25,7 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 from .abstract_board import AbstractBoard
+
 from ...resources.resource import (
     FileResource,
     AbstractResource,
@@ -34,6 +35,8 @@ from ...resources.resource import (
     SimpointDirectoryResource,
 )
 
+from gem5.utils.looppoint import LoopPoint
+
 from m5.objects import SEWorkload, Process
 
 from typing import Optional, List, Union
@@ -170,3 +173,47 @@ class SEBinaryWorkload:
         if getattr(self, "_simpoint_resource", None):
             return self._simpoint_resource
         raise Exception("This board does not have a simpoint set.")
+
+    def set_se_looppoint_workload(
+        self,
+        binary: AbstractResource,
+        arguments: List[str] = [],
+        looppoint: Optional[Union[AbstractResource, LoopPoint]] = None,
+        checkpoint: Optional[Union[Path, AbstractResource]] = None,
+    ) -> None:
+        """Set up the system to run a LoopPoint workload.
+
+        **Limitations**
+        * Dynamically linked executables are partially supported when the host
+          ISA and the simulated ISA are the same.
+
+        :param binary: The resource encapsulating the binary to be run.
+        :param arguments: The input arguments for the binary
+        :param looppoint: The LoopPoint object that contain all the information
+        gather from the LoopPoint files and a LoopPointManager that will raise
+        exit events for LoopPoints
+        """
+
+        if isinstance(looppoint, AbstractResource):
+            self._looppoint_object = LoopPoint(looppoint)
+        else:
+            assert isinstance(looppoint, LoopPoint)
+            self._looppoint_object = looppoint
+
+        self._looppoint_object.setup_processor(self.get_processor())
+
+        # Call set_se_binary_workload after LoopPoint setup is complete
+        self.set_se_binary_workload(
+            binary=binary,
+            arguments=arguments,
+            checkpoint=checkpoint,
+        )
+
+    def get_looppoint(self) -> LoopPoint:
+        """
+        Returns the LoopPoint object set. If no LoopPoint object has been set
+        an exception is thrown.
+        """
+        if getattr(self, "_looppoint_object", None):
+            return self._looppoint_object
+        raise Exception("This board does not have a looppoint set.")

From afbca3b6e7a7a4eb919d9b919e292de3090efd60 Mon Sep 17 00:00:00 2001
From: Zhantong Qiu <ztqiu@ucdavis.edu>
Date: Fri, 6 Jan 2023 17:28:00 -0800
Subject: [PATCH 211/492] stdlib: Added LoopPoint checkpoint specific generator

Added looppoint_save_checkpoint_generator to take checkpoints for
LoopPoint methodology.
Users can decide to update the relative counts storing in the LoopPoint
module and exit when all the target PC-count pairs are encountered or
not.

Change-Id: Id1cf1516f4fa838e20a67530e94b361e42ca09f3
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67197
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 .../gem5/simulate/exit_event_generators.py    | 44 +++++++++++++++++++
 1 file changed, 44 insertions(+)

diff --git a/src/python/gem5/simulate/exit_event_generators.py b/src/python/gem5/simulate/exit_event_generators.py
index 738e1281d9..82eba17543 100644
--- a/src/python/gem5/simulate/exit_event_generators.py
+++ b/src/python/gem5/simulate/exit_event_generators.py
@@ -29,6 +29,7 @@ import m5.stats
 from ..components.processors.abstract_processor import AbstractProcessor
 from ..components.processors.switchable_processor import SwitchableProcessor
 from ..resources.resource import SimpointResource
+from gem5.utils.looppoint import LoopPoint
 from m5.util import warn
 from pathlib import Path
 
@@ -167,3 +168,46 @@ def simpoints_save_checkpoint_generator(
             yield False
         else:
             yield True
+
+
+def looppoint_save_checkpoint_generator(
+    checkpoint_dir: Path,
+    looppoint: LoopPoint,
+    update_relatives: bool = True,
+    exit_when_empty: bool = True,
+):
+    """
+    A generator for taking a checkpoint for LoopPoint. It will save the
+    checkpoints in the checkpoint_dir path with the Region id.
+    (i.e. "cpt.Region10) It only takes a checkpoint if the current PC Count
+    pair is a significant PC Count Pair. This is determined in the LoopPoint
+    module. The simulation loop continues after exiting this generator.
+    :param checkpoint_dir: where to save the checkpoints
+    :param loopoint: the looppoint object used in the configuration script
+    :param update_relative: if the generator should update the relative count
+    information in the output json file, then it should be True. It is default
+    as True.
+    :param exit_when_empty: if the generator should exit the simulation loop if
+    all PC paris have been discovered, then it should be True. It is default as
+    True.
+    """
+    if exit_when_empty:
+        total_pairs = len(looppoint.get_targets())
+    else:
+        total_pairs = -1
+        # it will never equal to 0 if exit_when_empty is false
+
+    while total_pairs != 0:
+        region = looppoint.get_current_region()
+        # if it is a significant PC Count pair, then the get_current_region()
+        # will return an integer greater than 0. By significant PC Count pair,
+        # it means the PC Count pair that indicates where to take the
+        # checkpoint at. This is determined in the LoopPoint module.
+        if region != -1:
+            if update_relatives:
+                looppoint.update_relatives_counts()
+            m5.checkpoint((checkpoint_dir / f"cpt.Region{region}").as_posix())
+        total_pairs -= 1
+        yield False
+
+    yield True

From de5044cfe088449bba6d74d2f51540174880dac1 Mon Sep 17 00:00:00 2001
From: "Bobby R. Bruce" <bbruce@ucdavis.edu>
Date: Mon, 30 Jan 2023 01:09:45 +0000
Subject: [PATCH 212/492] stdlib: Change the default Looppoint JSON output to
 m5out

Change-Id: I6ac9aa55dfd4a0250c487ae743a1f4ea0b1bc154
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67491
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
Tested-by: kokoro <noreply+kokoro@google.com>
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
---
 src/python/gem5/utils/looppoint.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/src/python/gem5/utils/looppoint.py b/src/python/gem5/utils/looppoint.py
index b681e75fee..d1851a8478 100644
--- a/src/python/gem5/utils/looppoint.py
+++ b/src/python/gem5/utils/looppoint.py
@@ -34,6 +34,8 @@ from m5.objects import PcCountTrackerManager
 import csv
 import re
 import json
+import m5
+import os
 
 
 class LoopPoint:
@@ -106,14 +108,16 @@ class LoopPoint:
             self._json_file[rid]["simulation"]["end"]["relative"] = int(temp)
 
     def output_json_file(
-        self, input_indent: int = 4, filename: str = "outdir.json"
+        self,
+        input_indent: int = 4,
+        filepath: str = os.path.join(m5.options.outdir, "outdir.json"),
     ) -> Dict[int, Dict]:
         """
         This function is used to output the _json_file into a json file
         :param input_indent: the indent value of the json file
-        :param filename: the name of the output file
+        :param filepath: the path of the output file
         """
-        with open(filename, "w") as file:
+        with open(filepath, "w") as file:
             json.dump(self._json_file, file, indent=input_indent)
 
     def get_current_region(self) -> int:

From 82aec1c19be55ac64864fb08099dba075db90a83 Mon Sep 17 00:00:00 2001
From: "Bobby R. Bruce" <bbruce@ucdavis.edu>
Date: Mon, 30 Jan 2023 01:16:27 +0000
Subject: [PATCH 213/492] stdlib: Update LoopPointRestore to take singular
 region_id

This change has been introduced as we only restore to one checkpoint at
a time. The change updates LoopPointRestore to take a sigular region_id
and use it to generate the LoopPpoint from the LoopPoint JSON file.

Change-Id: I0e88d5ba03b164bdd5da098397f44e16af591134
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67492
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/python/gem5/utils/looppoint.py | 111 ++++++++++++-----------------
 1 file changed, 45 insertions(+), 66 deletions(-)

diff --git a/src/python/gem5/utils/looppoint.py b/src/python/gem5/utils/looppoint.py
index d1851a8478..8e01e3030f 100644
--- a/src/python/gem5/utils/looppoint.py
+++ b/src/python/gem5/utils/looppoint.py
@@ -28,7 +28,7 @@
 from m5.util import fatal
 from m5.params import PcCountPair
 from pathlib import Path
-from typing import List, Dict
+from typing import List, Dict, Tuple
 from gem5.components.processors.abstract_processor import AbstractProcessor
 from m5.objects import PcCountTrackerManager
 import csv
@@ -330,76 +330,55 @@ class LoopPointCheckpoint(LoopPoint):
 
 
 class LoopPointRestore(LoopPoint):
-    def __init__(self, looppoint_file: Path, checkpoint_path: Path) -> None:
+    def __init__(self, looppoint_file: Path, region_id: int) -> None:
         """
-        This class is specifically designed to take in the LoopPoint data file and
-        generator information needed to restore a checkpoint taken by the
+        This class is specifically designed to take in the LoopPoint data file
+        and generator information needed to restore a checkpoint taken by the
         LoopPointCheckPoint.
         :param looppoint_file: a json file generated by gem5 that has all the
         LoopPoint data information
-        :param checkpoint_path: the director of the checkpoint taken by the gem5
-        standard library looppoint_save_checkpoint_generator
-
+        :param region_id: The region ID we will be restoring to.
         """
 
-        _json_file = {}
-        _targets = []
-        _region_id = {}
-
-        self.profile_restore(
-            looppoint_file, checkpoint_path, _targets, _json_file, _region_id
-        )
-
-        super().__init__(
-            _targets,
-            _region_id,
-            _json_file,
-        )
-
-    def profile_restore(
-        self,
-        looppoint_file_path: Path,
-        checkpoint_dir: Path,
-        targets: List[PcCountPair],
-        json_file: Dict[int, Dict],
-        region_id: Dict[PcCountPair, int],
-    ) -> None:
-        """
-        This function is used to profile data from the LoopPoint data file to
-        information needed to restore the LoopPoint checkpoint
-        :param looppoint_file_path: the director of the LoopPoint data file
-        :param targets: a list of PcCountPair
-        :param json_file: a dictionary for all the LoopPoint data
-        :param region_id: a dictionary for all the significant PcCountPair and
-        its corresponding region id
-        """
-        regex = re.compile(r"cpt.Region([0-9]+)")
-        rid = regex.findall(checkpoint_dir.as_posix())[0]
-        # finds out the region id from the directory name
-        with open(looppoint_file_path) as file:
+        with open(looppoint_file) as file:
             json_file = json.load(file)
-            if rid not in json_file:
-                # if the region id does not exist in the LoopPoint data file
-                # raise a fatal message
-                fatal(f"{rid} is not a valid region\n")
-            region = json_file[rid]
-            if "warmup" in region:
-                if "relative" not in region["simulation"]["start"]:
-                    # if there are not relative counts for the PC Count pair
-                    # then it means there is not enough information to restore
-                    # this checkpoint
-                    fatal(f"region {rid} doesn't have relative count info\n")
-                start = PcCountPair(
-                    region["simulation"]["start"]["pc"],
-                    region["simulation"]["start"]["relative"],
-                )
-                region_id[start] = rid
-                targets.append(start)
-            if "relative" not in region["simulation"]["end"]:
-                fatal(f"region {rid} doesn't have relative count info\n")
-            end = PcCountPair(
-                region["simulation"]["end"]["pc"],
-                region["simulation"]["end"]["relative"],
+
+        targets, regions = self.get_region(
+            json_file=json_file, region_id=region_id
+        )
+
+        super().__init__(targets=targets, regions=regions, json_file=json_file)
+
+    def get_region(
+        self, json_file: Dict[int, Dict], region_id: int
+    ) -> Tuple[List[PcCountPair], Dict[PcCountPair, int]]:
+        to_return_region = {}
+        to_return_targets = []
+
+        if region_id not in json_file:
+            # if the region id does not exist in the LoopPoint data
+            # file raise a fatal message
+            fatal(f"{region_id} is not a valid region\n")
+        region = json_file[region_id]
+        if "warmup" in region:
+            if "relative" not in region["simulation"]["start"]:
+                # if there are not relative counts for the PC Count
+                # pair then it means there is not enough information to
+                # restore this checkpoint
+                fatal(f"region {region_id} doesn't have relative count info\n")
+            start = PcCountPair(
+                region["simulation"]["start"]["pc"],
+                region["simulation"]["start"]["relative"],
             )
-            region_id[end] = rid
-            targets.append(end)
+            to_return_region[start] = region_id
+            to_return_targets.append(start)
+        if "relative" not in region["simulation"]["end"]:
+            fatal(f"region {region_id} doesn't have relative count info\n")
+        end = PcCountPair(
+            region["simulation"]["end"]["pc"],
+            region["simulation"]["end"]["relative"],
+        )
+        to_return_region[end] = region_id
+        to_return_targets.append(end)
+
+        return to_return_targets, to_return_region

From f59d860e5104a4f073c7b108b576b88afec1e5cb Mon Sep 17 00:00:00 2001
From: "Bobby R. Bruce" <bbruce@ucdavis.edu>
Date: Mon, 30 Jan 2023 01:29:45 +0000
Subject: [PATCH 214/492] stdlib: Add looppoint example scripts

Change-Id: If9827af9ba7958af492a6c09cf83e4f6dac9a2eb
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67493
Tested-by: kokoro <noreply+kokoro@google.com>
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
---
 .../create-looppoint-checkpoints.py           | 155 ++++++++++++++++++
 .../restore-looppoint-checkpoint.py           | 150 +++++++++++++++++
 2 files changed, 305 insertions(+)
 create mode 100644 configs/example/gem5_library/looppoints/create-looppoint-checkpoints.py
 create mode 100644 configs/example/gem5_library/looppoints/restore-looppoint-checkpoint.py

diff --git a/configs/example/gem5_library/looppoints/create-looppoint-checkpoints.py b/configs/example/gem5_library/looppoints/create-looppoint-checkpoints.py
new file mode 100644
index 0000000000..1d8525fe77
--- /dev/null
+++ b/configs/example/gem5_library/looppoints/create-looppoint-checkpoints.py
@@ -0,0 +1,155 @@
+# Copyright (c) 2023 The Regents of the University of California
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""
+This configuration script shows an example of how to take checkpoints for
+LoopPoint using the gem5 stdlib. To take checkpoints for LoopPoint simulation
+regions, there must be a LoopPoint data file generated by Pin or the gem5
+simulator. With the information in the LoopPoint data file, the stdlib
+modules will take checkpoints at the beginning of the simulation regions
+(warmup region included if it exists) and record all restore needed information
+into a JSON file. The JSON file is needed for later restoring, so please call
+`looppoint.output_json_file()` at the end of the simulation.
+
+This script builds a simple board with the gem5 stdlib with no cache and a
+simple memory structure to take checkpoints. Some of the components, such as
+cache hierarchy, can be changed when restoring checkpoints.
+
+Usage
+-----
+```
+scons build/X86/gem5.opt
+./build/X86/gem5.opt \
+    configs/example/gem5_library/looppoints/create-looppoint-checkpoint.py
+```
+"""
+
+from gem5.simulate.exit_event import ExitEvent
+from gem5.simulate.simulator import Simulator
+from gem5.utils.requires import requires
+from gem5.components.cachehierarchies.classic.no_cache import NoCache
+from gem5.components.boards.simple_board import SimpleBoard
+from gem5.components.memory.single_channel import SingleChannelDDR3_1600
+from gem5.components.processors.simple_processor import SimpleProcessor
+from gem5.components.processors.cpu_types import CPUTypes
+from gem5.isas import ISA
+from gem5.resources.resource import obtain_resource
+from pathlib import Path
+from gem5.simulate.exit_event_generators import (
+    looppoint_save_checkpoint_generator,
+)
+from gem5.utils.looppoint import LoopPointCheckpoint
+import argparse
+
+requires(isa_required=ISA.X86)
+
+parser = argparse.ArgumentParser(
+    description="An example looppoint workload file path"
+)
+
+# The lone arguments is a file path to a directory to store the checkpoints.
+
+parser.add_argument(
+    "--checkpoint-path",
+    type=str,
+    required=False,
+    default="looppoint_checkpoints_folder",
+    help="The directory to store the checkpoints.",
+)
+
+args = parser.parse_args()
+
+# When taking a checkpoint, the cache state is not saved, so the cache
+# hierarchy can be changed completely when restoring from a checkpoint.
+# By using NoCache() to take checkpoints, it can slightly improve the
+# performance when running in atomic mode, and it will not put any restrictions
+# on what people can do with the checkpoints.
+cache_hierarchy = NoCache()
+
+
+# Using simple memory to take checkpoints might slightly imporve the
+# performance in atomic mode. The memory structure can be changed when
+# restoring from a checkpoint, but the size of the memory must be equal or
+# greater to that taken when creating the checkpoint.
+memory = SingleChannelDDR3_1600(size="2GB")
+
+processor = SimpleProcessor(
+    cpu_type=CPUTypes.ATOMIC,
+    isa=ISA.X86,
+    # LoopPoint can work with multicore workloads
+    num_cores=9,
+)
+
+looppoint = LoopPointCheckpoint(
+    # Pass in the LoopPoint data file
+    looppoint_file=Path(
+        obtain_resource(
+            "x86-matrix-multiply-omp-100-8-global-pinpoints"
+        ).get_local_path()
+    ),
+    # True if the LoopPoint data file is a csv generated by Pin.
+    # False if it is a JSON file generated by the gem5 simulator.
+    if_csv=True,
+)
+
+board = SimpleBoard(
+    clk_freq="3GHz",
+    processor=processor,
+    memory=memory,
+    cache_hierarchy=cache_hierarchy,
+)
+
+board.set_se_looppoint_workload(
+    binary=obtain_resource("x86-matrix-multiply-omp"),
+    arguments=[100, 8],
+    # Pass LoopPoint module into the board
+    looppoint=looppoint,
+)
+
+dir = Path(args.checkpoint_path)
+dir.mkdir(exist_ok=True)
+
+simulator = Simulator(
+    board=board,
+    on_exit_event={
+        ExitEvent.SIMPOINT_BEGIN: looppoint_save_checkpoint_generator(
+            checkpoint_dir=dir,
+            looppoint=looppoint,
+            # True if the relative PC count pairs should be updated during the
+            # simulation. Default as True.
+            update_relatives=True,
+            # True if the simulation loop should exit after all the PC count
+            # pairs in the LoopPoint data file have been encountered. Default
+            # as True.
+            exit_when_empty=True,
+        )
+    },
+)
+
+simulator.run()
+
+# Output the JSON file
+looppoint.output_json_file()
diff --git a/configs/example/gem5_library/looppoints/restore-looppoint-checkpoint.py b/configs/example/gem5_library/looppoints/restore-looppoint-checkpoint.py
new file mode 100644
index 0000000000..28645259d0
--- /dev/null
+++ b/configs/example/gem5_library/looppoints/restore-looppoint-checkpoint.py
@@ -0,0 +1,150 @@
+# Copyright (c) 2023 The Regents of the University of California
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""
+This configuration script shows an example of how to restore a checkpoint that
+was taken for a LoopPoint simulation region in the example-restore.py.
+All the LoopPoint information should be passed in through the JSON file
+generated by the gem5 simulator when all the checkpoints were taken.
+
+This script builds a more complex board than the board used for taking
+checkpoints.
+
+Usage
+-----
+```
+./build/X86/gem5.opt \
+    configs/example/gem5_library/looppoints/restore-looppoint-checkpoint.py
+```
+"""
+import argparse
+
+from gem5.simulate.exit_event import ExitEvent
+from gem5.simulate.simulator import Simulator
+from gem5.utils.requires import requires
+from gem5.components.cachehierarchies.classic.private_l1_private_l2_cache_hierarchy import (
+    PrivateL1PrivateL2CacheHierarchy,
+)
+from gem5.components.boards.simple_board import SimpleBoard
+from gem5.components.memory import DualChannelDDR4_2400
+from gem5.components.processors.simple_processor import SimpleProcessor
+from gem5.components.processors.cpu_types import CPUTypes
+from gem5.isas import ISA
+from gem5.resources.resource import obtain_resource
+from pathlib import Path
+from gem5.utils.looppoint import LoopPointRestore
+from m5.stats import reset, dump
+
+requires(isa_required=ISA.X86)
+
+parser = argparse.ArgumentParser(description="An restore checkpoint script.")
+
+parser.add_argument(
+    "--checkpoint-region",
+    type=str,
+    required=False,
+    choices=(
+        "1",
+        "2",
+        "3",
+        "5",
+        "6",
+        "7",
+        "8",
+        "9",
+        "10",
+        "11",
+        "12",
+        "13",
+        "14",
+    ),
+    default="1",
+    help="The checkpoint region to restore from.",
+)
+args = parser.parse_args()
+
+# The cache hierarchy can be different from the cache hierarchy used in taking
+# the checkpoints
+cache_hierarchy = PrivateL1PrivateL2CacheHierarchy(
+    l1d_size="32kB",
+    l1i_size="32kB",
+    l2_size="256kB",
+)
+
+# The memory structure can be different from the memory structure used in
+# taking the checkpoints, but the size of the memory must be equal or larger.
+memory = DualChannelDDR4_2400(size="2GB")
+
+processor = SimpleProcessor(
+    cpu_type=CPUTypes.TIMING,
+    isa=ISA.X86,
+    # The number of cores must be equal or greater than that used when taking
+    # the checkpoint.
+    num_cores=9,
+)
+
+board = SimpleBoard(
+    clk_freq="3GHz",
+    processor=processor,
+    memory=memory,
+    cache_hierarchy=cache_hierarchy,
+)
+
+looppoint = LoopPointRestore(
+    looppoint_file=Path(
+        obtain_resource(
+            "x86-matrix-multiply-omp-100-8-looppoint"
+        ).get_local_path()
+    ),
+    region_id=args.checkpoint_region,
+)
+
+board.set_se_looppoint_workload(
+    binary=obtain_resource("x86-matrix-multiply-omp"), looppoint=looppoint
+)
+
+# This generator will dump the stats and exit the simulation loop when the
+# simulation region reaches its end. In the case there is a warmup interval,
+# the simulation stats are reset after the warmup is complete.
+def reset_and_dump():
+    if len(looppoint.get_targets()) > 1:
+        print("Warmup region ended. Resetting stats.")
+        reset()
+        yield False
+    print("Region ended. Dumping stats.")
+    dump()
+    yield True
+
+
+simulator = Simulator(
+    board=board,
+    checkpoint_path=obtain_resource(
+        f"x86-matrix-multiply-omp-100-8-looppoint-checkpoint-region-{args.checkpoint_region}"
+    ).get_local_path(),
+    on_exit_event={ExitEvent.SIMPOINT_BEGIN: reset_and_dump()},
+)
+
+simulator.run()

From aae34302814df270748d542b7fb795f03dd66fad Mon Sep 17 00:00:00 2001
From: "Bobby R. Bruce" <bbruce@ucdavis.edu>
Date: Sun, 5 Feb 2023 20:20:30 +0000
Subject: [PATCH 215/492] stdlib: Refactor Looppoint

This change refactors the Looppoint files. While functionally
equivalent, this classes have been moved and altered to be easier to
handle going forward. The following changes have been made:

- New classes have been added to represent the data structure of the
  Looppoint JSON. This simplifies the parsing of JSON files and makes it
  handle Looppoint data structures. Ultimately this is hidden from the
  user via the new 'gem5.resources.Looppoint' class which will be the
  front-facing class for Looppoint interactions.
- The `LooppointCheckpoint` class has been replaced with
  `LooppointCsvLoader`. This new class takes in a CSV pintpoints file
  to load necessary looppoint data.
- The `LoopPointRestore` class has been replaced by
  `LooppointJsonLoader`.
- All Looppoint classes have been moved to `gem5.resources`. This will
  make it easier when we add Looppoints as specific gem5 resources.

Change-Id: I11dd1fe8f76658db220320584270d57cb37a3c62
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67611
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
---
 .../create-looppoint-checkpoints.py           |   9 +-
 .../restore-looppoint-checkpoint.py           |   4 +-
 src/python/SConscript                         |   2 +-
 .../components/boards/se_binary_workload.py   |  21 +-
 src/python/gem5/resources/looppoint.py        | 544 ++++++++++++++++++
 .../gem5/simulate/exit_event_generators.py    |   6 +-
 src/python/gem5/utils/looppoint.py            | 384 -------------
 7 files changed, 564 insertions(+), 406 deletions(-)
 create mode 100644 src/python/gem5/resources/looppoint.py
 delete mode 100644 src/python/gem5/utils/looppoint.py

diff --git a/configs/example/gem5_library/looppoints/create-looppoint-checkpoints.py b/configs/example/gem5_library/looppoints/create-looppoint-checkpoints.py
index 1d8525fe77..f967aa56e4 100644
--- a/configs/example/gem5_library/looppoints/create-looppoint-checkpoints.py
+++ b/configs/example/gem5_library/looppoints/create-looppoint-checkpoints.py
@@ -61,7 +61,7 @@ from pathlib import Path
 from gem5.simulate.exit_event_generators import (
     looppoint_save_checkpoint_generator,
 )
-from gem5.utils.looppoint import LoopPointCheckpoint
+from gem5.resources.looppoint import LooppointCsvLoader
 import argparse
 
 requires(isa_required=ISA.X86)
@@ -103,16 +103,13 @@ processor = SimpleProcessor(
     num_cores=9,
 )
 
-looppoint = LoopPointCheckpoint(
+looppoint = LooppointCsvLoader(
     # Pass in the LoopPoint data file
     looppoint_file=Path(
         obtain_resource(
             "x86-matrix-multiply-omp-100-8-global-pinpoints"
         ).get_local_path()
-    ),
-    # True if the LoopPoint data file is a csv generated by Pin.
-    # False if it is a JSON file generated by the gem5 simulator.
-    if_csv=True,
+    )
 )
 
 board = SimpleBoard(
diff --git a/configs/example/gem5_library/looppoints/restore-looppoint-checkpoint.py b/configs/example/gem5_library/looppoints/restore-looppoint-checkpoint.py
index 28645259d0..c54fdabca1 100644
--- a/configs/example/gem5_library/looppoints/restore-looppoint-checkpoint.py
+++ b/configs/example/gem5_library/looppoints/restore-looppoint-checkpoint.py
@@ -55,7 +55,7 @@ from gem5.components.processors.cpu_types import CPUTypes
 from gem5.isas import ISA
 from gem5.resources.resource import obtain_resource
 from pathlib import Path
-from gem5.utils.looppoint import LoopPointRestore
+from gem5.utils.resource import LooppointJsonLoader
 from m5.stats import reset, dump
 
 requires(isa_required=ISA.X86)
@@ -113,7 +113,7 @@ board = SimpleBoard(
     cache_hierarchy=cache_hierarchy,
 )
 
-looppoint = LoopPointRestore(
+looppoint = LooppointJsonLoader(
     looppoint_file=Path(
         obtain_resource(
             "x86-matrix-multiply-omp-100-8-looppoint"
diff --git a/src/python/SConscript b/src/python/SConscript
index 68b5e1d926..f401c03468 100644
--- a/src/python/SConscript
+++ b/src/python/SConscript
@@ -240,7 +240,6 @@ PySource('gem5.components.processors',
 PySource('gem5.components.processors',
     'gem5/components/processors/switchable_processor.py')
 PySource('gem5.utils', 'gem5/utils/simpoint.py')
-PySource('gem5.utils', 'gem5/utils/looppoint.py')
 PySource('gem5.components.processors',
     'gem5/components/processors/traffic_generator_core.py')
 PySource('gem5.components.processors',
@@ -263,6 +262,7 @@ PySource('gem5.resources', 'gem5/resources/downloader.py')
 PySource('gem5.resources', 'gem5/resources/md5_utils.py')
 PySource('gem5.resources', 'gem5/resources/resource.py')
 PySource('gem5.resources', 'gem5/resources/workload.py')
+PySource('gem5.resources', 'gem5/resources/looppoint.py')
 PySource('gem5.utils', 'gem5/utils/__init__.py')
 PySource('gem5.utils', 'gem5/utils/filelock.py')
 PySource('gem5.utils', 'gem5/utils/override.py')
diff --git a/src/python/gem5/components/boards/se_binary_workload.py b/src/python/gem5/components/boards/se_binary_workload.py
index 404a78458f..dc5425754e 100644
--- a/src/python/gem5/components/boards/se_binary_workload.py
+++ b/src/python/gem5/components/boards/se_binary_workload.py
@@ -35,7 +35,7 @@ from ...resources.resource import (
     SimpointDirectoryResource,
 )
 
-from gem5.utils.looppoint import LoopPoint
+from gem5.resources.looppoint import Looppoint
 
 from m5.objects import SEWorkload, Process
 
@@ -177,9 +177,10 @@ class SEBinaryWorkload:
     def set_se_looppoint_workload(
         self,
         binary: AbstractResource,
+        looppoint: Looppoint,
         arguments: List[str] = [],
-        looppoint: Optional[Union[AbstractResource, LoopPoint]] = None,
         checkpoint: Optional[Union[Path, AbstractResource]] = None,
+        region_id: Optional[Union[int, str]] = None,
     ) -> None:
         """Set up the system to run a LoopPoint workload.
 
@@ -188,18 +189,18 @@ class SEBinaryWorkload:
           ISA and the simulated ISA are the same.
 
         :param binary: The resource encapsulating the binary to be run.
-        :param arguments: The input arguments for the binary
         :param looppoint: The LoopPoint object that contain all the information
         gather from the LoopPoint files and a LoopPointManager that will raise
         exit events for LoopPoints
+        :param arguments: The input arguments for the binary
+        :param region_id: If set, will only load the Looppoint region
+        corresponding to that ID.
         """
 
-        if isinstance(looppoint, AbstractResource):
-            self._looppoint_object = LoopPoint(looppoint)
-        else:
-            assert isinstance(looppoint, LoopPoint)
-            self._looppoint_object = looppoint
-
+        assert isinstance(looppoint, Looppoint)
+        self._looppoint_object = looppoint
+        if region_id:
+            self._looppoint_object.set_target_region_id(region_id=region_id)
         self._looppoint_object.setup_processor(self.get_processor())
 
         # Call set_se_binary_workload after LoopPoint setup is complete
@@ -209,7 +210,7 @@ class SEBinaryWorkload:
             checkpoint=checkpoint,
         )
 
-    def get_looppoint(self) -> LoopPoint:
+    def get_looppoint(self) -> Looppoint:
         """
         Returns the LoopPoint object set. If no LoopPoint object has been set
         an exception is thrown.
diff --git a/src/python/gem5/resources/looppoint.py b/src/python/gem5/resources/looppoint.py
new file mode 100644
index 0000000000..684faef37d
--- /dev/null
+++ b/src/python/gem5/resources/looppoint.py
@@ -0,0 +1,544 @@
+# Copyright (c) 2023 The Regents of the University of California
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from m5.params import PcCountPair
+from m5.objects import PcCountTrackerManager
+import m5
+
+import os
+import csv
+import json
+from pathlib import Path
+from typing import List, Optional, Dict, Union
+
+
+class LooppointRegionPC:
+    """A data structure for storing the Looppoint region's PC information.
+
+    **Note**: This is not intended to be a user-facing class. The classes
+    `LooppointJsonLoader` and `LooppointCSVLoader` can be used to load
+    and restore Simpoint data.
+    """
+
+    def __init__(self, pc: int, globl: int, relative: Optional[int] = None):
+        """
+        :param pc: The Program Counter value of this region.
+        :param globl: The global value of this region.
+        :param relative: The relative program counter value. Optional.
+        """
+        self._pc = pc
+        self._global = globl
+        self._relative = relative
+
+    def get_pc(self) -> int:
+        """Returns the Program counter value."""
+        return self._pc
+
+    def get_global(self) -> int:
+        """Returns the global value."""
+        return self._global
+
+    def get_relative(self) -> Optional[int]:
+        """If specified, returns the relative Program counter value, otherwise
+        returns None."""
+        return self._relative
+
+    def get_pc_count_pair(self) -> PcCountPair:
+        """Returns the PcCountPair for this Region PC value."""
+        return PcCountPair(self.get_pc(), self.get_global())
+
+    def update_relative_count(self, manager: PcCountTrackerManager) -> None:
+        """Updates the relative count."""
+        self._relative = int(
+            self.get_global() - manager.getPcCount(self.get_pc())
+        )
+
+    def to_json(self) -> Dict[str, int]:
+        """Returns this class in a JSON structure which can then be serialized
+        and later be restored from."""
+        to_return = {
+            "pc": self.get_pc(),
+            "global": self.get_global(),
+        }
+        if self._relative:
+            to_return["relative"] = self.get_relative()
+
+        return to_return
+
+
+class LooppointRegionWarmup:
+    """A data structure for storing a Looppoint region's warmup data.
+
+    **Note**: This is not intended to be a user-facing class. The classes
+    `LooppointJsonLoader` and `LooppointCSVLoader` can be used to load
+    and restore Simpoint data.
+    """
+
+    def __init__(self, start: PcCountPair, end: PcCountPair):
+        """
+        :param start: The starting PcCountPair.
+        :param end: The ending PcCountPair.
+        """
+        self._start = start
+        self._end = end
+
+    def get_start(self) -> PcCountPair:
+        """Returns the PcCountPair for the start of the region warmup."""
+        return self._start
+
+    def get_end(self) -> PcCountPair:
+        """Returns the PcCountPair for the end of the region warmup."""
+        return self._end
+
+    def get_pc_count_pairs(self) -> List[PcCountPair]:
+        """Returns the start and end PC count pairs."""
+        return [self.get_start(), self.get_end()]
+
+    def to_json(self) -> Dict[str, Dict[str, int]]:
+        """Returns this class in a JSON structure which can then be
+        serialized."""
+        return {
+            "start": {
+                "pc": self.get_start().pc,
+                "count": self.get_start().count,
+            },
+            "end": {
+                "pc": self.get_end().pc,
+                "count": self.get_end().count,
+            },
+        }
+
+
+class LooppointSimulation:
+    """A data structure to store the simulation region start and end region.
+
+    **Note**: This is not intended to be a user-facing class. The classes
+    `LooppointJsonLoader` and `LooppointCSVLoader` can be used to load
+    and restore Simpoint data.
+    """
+
+    def __init__(self, start: LooppointRegionPC, end: LooppointRegionPC):
+        """
+        :param start: The starting LooppointRegionPC.
+        :param end: The ending LoopppointRegionPC.
+        """
+        self._start = start
+        self._end = end
+
+    def get_start(self) -> LooppointRegionPC:
+        """Returns the starting LooppointRegionPC data structure."""
+        return self._start
+
+    def get_end(self) -> LooppointRegionPC:
+        """Returns the ending LooppointRegionPC data structure."""
+        return self._end
+
+    def get_pc_count_pairs(self) -> List[PcCountPair]:
+        """Returns the PC count pairs for the start and end
+        LoopointRegionPCs."""
+        return [
+            self.get_start().get_pc_count_pair(),
+            self.get_end().get_pc_count_pair(),
+        ]
+
+    def update_relatives_counts(
+        self, manager: PcCountTrackerManager, include_start: bool = False
+    ) -> None:
+        """Updates the relative counts for this simulation region."""
+        if include_start:
+            # if this region has a warmup interval,
+            # then update the relative count for the
+            # start of the simulation region
+            self.get_start().update_relative_count(manager=manager)
+
+        self.get_end().update_relative_count(manager=manager)
+
+    def to_json(self) -> Dict:
+        """Returns this class in a JSON structure which can then be serialized
+        and later be restored from."""
+        return {
+            "start": self.get_start().to_json(),
+            "end": self.get_end().to_json(),
+        }
+
+
+class LooppointRegion:
+    """A data structure to store Looppoint region information.
+
+    **Note**: This is not intended to be a user-facing class. The classes
+    `LooppointJsonLoader` and `LooppointCSVLoader` can be used to load
+    and restore Simpoint data.
+    """
+
+    def __init__(
+        self,
+        simulation: LooppointSimulation,
+        multiplier: float,
+        warmup: Optional[LooppointRegionWarmup] = None,
+    ):
+        """
+        :param simulation: The simulation information for this Looppoint
+        region.
+        :param multiplier: The multiplier for this Looppoint region.
+        :param warmup: The warmup information for this Looppoint region.
+        Optional.
+        """
+        self._simulation = simulation
+        self._multiplier = multiplier
+        self._warmup = warmup
+
+    def get_simulation(self) -> LooppointSimulation:
+        """Returns the simulation region information."""
+        return self._simulation
+
+    def get_multiplier(self) -> float:
+        """Returns the multiplier."""
+        return self._multiplier
+
+    def get_warmup(self) -> Optional[LooppointRegionWarmup]:
+        """If set, returns the warmup region information. Otherwise None."""
+        return self._warmup
+
+    def get_pc_count_pairs(self) -> List[PcCountPair]:
+        """Returns the PC count pairs for this Looppoint region."""
+        pc_count_pairs = self.get_simulation().get_pc_count_pairs()
+        if self.get_warmup():
+            pc_count_pairs.extend(self.get_warmup().get_pc_count_pairs())
+        return pc_count_pairs
+
+    def update_relatives_counts(self, manager: PcCountTrackerManager) -> None:
+        """Updates the relative counds of this Looppoint region."""
+        self.get_simulation().update_relatives_counts(
+            manager=manager, include_start=bool(self.get_warmup())
+        )
+
+    def get_start(self) -> PcCountPair:
+        """Returns the correct starting PcCountPair for this Looppoint
+        region."""
+        if self.get_warmup():
+            return self.get_warmup().get_start()
+        return self.get_simulation().get_start().get_pc_count_pair()
+
+    def to_json(self) -> Dict:
+        """Returns this class in a JSON structure which can then be serialized
+        and later be restored from."""
+        to_return = {
+            "simulation": self.get_simulation().to_json(),
+            "multiplier": self.get_multiplier(),
+        }
+        if self.get_warmup():
+            to_return["warmup"] = self.get_warmup().to_json()
+        return to_return
+
+
+class Looppoint:
+    """Stores all the Looppoint information for a gem5 workload."""
+
+    def __init__(self, regions: Dict[Union[str, int], LooppointRegion]):
+        """
+        :param regions: A dictionary mapping the region_ids with the
+        LooppointRegions.
+        """
+        self._regions = regions
+        self._manager = PcCountTrackerManager()
+        self._manager.targets = self.get_targets()
+
+    def set_target_region_id(self, region_id: Union[str, int]) -> None:
+        """There are use-cases where we want to obtain a looppoint data
+        structure containing a single target region via its ID. This function
+        will remove all irrelevant regions."""
+
+        if region_id not in self._regions:
+            raise Exception(f"Region ID '{region_id}' cannot be found.")
+
+        to_remove = [rid for rid in self._regions if rid is not region_id]
+        for rid in to_remove:
+            del self._regions[rid]
+
+        self._manager.targets = self.get_targets()
+
+    def get_manager(self) -> PcCountTrackerManager:
+        """Returns the PcCountTrackerManager for this Looppoint data
+        structure."""
+        return self._manager
+
+    def get_regions(self) -> Dict[Union[int, str], LooppointRegion]:
+        """Returns the regions for this Looppoint data structure."""
+        return self._regions
+
+    def setup_processor(
+        self,
+        processor: "AbstractProcessor",
+    ) -> None:
+        """
+        A function is used to setup a PC tracker in all the cores and
+        connect all the tracker to the PC tracker manager to perform
+        multithread PC tracking.
+
+        :param processor: The processor used in the simulation configuration.
+        """
+        for core in processor.get_cores():
+            core.add_pc_tracker_probe(self.get_targets(), self.get_manager())
+
+    def update_relatives_counts(self) -> None:
+        """
+        Updates the relative count for restore usage. The new relative count
+        will be stored in relevant data structures.
+        """
+        current_pair = self.get_current_pair()
+        region_start_map = self.get_region_start_id_map()
+        if current_pair in region_start_map:
+            region_id = region_start_map[current_pair]
+            self.get_regions()[region_id].update_relatives_counts(
+                manager=self.get_manager()
+            )
+
+    def get_current_region(self) -> Optional[Union[str, int]]:
+        """Returns the region id if the current PC Count pair if significant
+        (e.g. beginning of the checkpoint), otherwise, it returns None to
+        indicate the current PC Count pair is not significant.
+        """
+        current_pair = self.get_current_pair()
+        region_start_map = self.get_region_start_id_map()
+        if current_pair in region_start_map:
+            return region_start_map[current_pair]
+        return None
+
+    def get_current_pair(self) -> PcCountPair:
+        """This function returns the current PC Count pair."""
+        return self.get_manager().getCurrentPcCountPair()
+
+    def get_region_start_id_map(self) -> Dict[PcCountPair, Union[int, str]]:
+        """Returns the starting PcCountPairs mapped to the corresponding region
+        IDs. This is a helper function for quick mapping of PcCountPairs to
+        region IDs."""
+
+        regions = {}
+        for rid in self.get_regions():
+            regions[self.get_regions()[rid].get_start()] = rid
+
+        return regions
+
+    def get_targets(self) -> List[PcCountPair]:
+        """Returns the complete list of target PcCountPairs. That is, the
+        PcCountPairs each region starts with as well as the relevant warmup
+        intervals."""
+        targets = []
+        for rid in self.get_regions():
+            targets.extend(self.get_regions()[rid].get_pc_count_pairs())
+
+        return targets
+
+    def to_json(self) -> Dict[Union[int, str], Dict]:
+        """Returns this data-structure as a dictionary for serialization via
+        the `output_json_file` function."""
+        to_return = {}
+        for region_id in self.get_regions():
+            to_return[region_id] = self.get_regions()[region_id].to_json()
+        return to_return
+
+    def output_json_file(
+        self,
+        input_indent: int = 4,
+        filepath: str = os.path.join(m5.options.outdir, "looppoint.json"),
+    ) -> Dict[int, Dict]:
+        """
+        This function is used to output the _json_file into a json file
+
+        :param input_indent: the indent value of the json file
+        :param filepath: the path of the output json file
+        """
+        with open(filepath, "w") as file:
+            json.dump(self.to_json(), file, indent=input_indent)
+
+
+class LooppointCsvLoader(Looppoint):
+    """This class will create a Looppoint data structure from data extracted
+    from a Looppoint pinpoints file."""
+
+    def __init__(
+        self,
+        pinpoints_file: Union[Path, str],
+        region_id: Optional[Union[str, int]] = None,
+    ):
+        """
+        :params pinpoints_file: The pinpoints file in which the data is to be
+        expected.
+        :params region_id: If set, will only load the specified region data.
+        Otherwise, all region info is loaded. Is used when restoring to a
+        particular region.
+        """
+
+        regions = {}
+        warmups = {}
+
+        _path = (
+            pinpoints_file
+            if isinstance(pinpoints_file, Path)
+            else Path(pinpoints_file)
+        )
+
+        # This section is hard-coded to parse the data in the csv file.
+        # The csv file is assumed to have a constant format.
+        with open(_path, newline="") as csvfile:
+            reader = csv.reader(csvfile, delimiter=" ", quotechar="|")
+            for row in reader:
+                if len(row) > 1:
+                    if row[0] == "cluster":
+                        # if it is a simulation region
+                        line = row[4].split(",")
+
+                        rid = int(line[2])
+
+                        region_start = LooppointRegionPC(
+                            pc=int(line[3], 16),
+                            globl=int(line[6]),
+                            # From the CSV's I've observed, the start relative
+                            # value is never set, while the end is always set.
+                            # Given limited information, I can only determine
+                            # this is a rule of how the CSV is setup.
+                            relative=None,
+                        )
+
+                        region_end = LooppointRegionPC(
+                            pc=int(line[7], 16),
+                            globl=int(line[10]),
+                            relative=int(line[11]),
+                        )
+
+                        simulation = LooppointSimulation(
+                            start=region_start, end=region_end
+                        )
+
+                        multiplier = float(line[14])
+
+                        region = LooppointRegion(
+                            simulation=simulation, multiplier=multiplier
+                        )
+
+                        regions[rid] = region
+
+                    elif row[0] == "Warmup":
+                        line = row[3].split(",")
+                        rid = int(line[0])
+                        start = PcCountPair(int(line[3], 16), int(line[6]))
+                        end = PcCountPair(int(line[7], 16), int(line[10]))
+
+                        warmup = LooppointRegionWarmup(start=start, end=end)
+                        warmups[rid] = warmup
+
+        for rid in warmups:
+            if rid not in regions:
+                raise Exception(
+                    "Warmup region ID '{rid}' does not have a "
+                    "corresponding region."
+                )
+            regions[rid]._warmup = warmups[rid]
+
+        super().__init__(regions=regions)
+
+        if region_id:
+            self.set_target_region_id(region_id=region_id)
+
+
+class LooppointJsonLoader(Looppoint):
+    """This class will create a generate a Looppoint data structure from data
+    extracted from a Looppoint json file."""
+
+    def __init__(
+        self,
+        looppoint_file: Union[str, Path],
+        region_id: Optional[Union[str, int]] = None,
+    ) -> None:
+        """
+        :param looppoint_file: a json file generated by gem5 that has all the
+        LoopPoint data information
+        :params region_id: If set, will only load the specified region data.
+        Otherwise, all region info is loaded. Is used when restoring to a
+        particular region.
+        """
+
+        _path = (
+            looppoint_file
+            if isinstance(looppoint_file, Path)
+            else Path(looppoint_file)
+        )
+
+        regions = {}
+        with open(_path) as file:
+            json_contents = json.load(file)
+            for rid in json_contents:
+
+                start_pc = int(json_contents[rid]["simulation"]["start"]["pc"])
+                start_globl = int(
+                    json_contents[rid]["simulation"]["start"]["global"]
+                )
+                start_relative = (
+                    int(json_contents[rid]["simulation"]["start"]["relative"])
+                    if "relative" in json_contents[rid]["simulation"]["start"]
+                    else None
+                )
+                start = LooppointRegionPC(
+                    pc=start_pc,
+                    globl=start_globl,
+                    relative=start_relative,
+                )
+
+                end_pc = int(json_contents[rid]["simulation"]["end"]["pc"])
+                end_globl = int(
+                    json_contents[rid]["simulation"]["end"]["global"]
+                )
+                end_relative = (
+                    int(json_contents[rid]["simulation"]["end"]["relative"])
+                    if "relative" in json_contents[rid]["simulation"]["end"]
+                    else None
+                )
+                end = LooppointRegionPC(
+                    pc=end_pc,
+                    globl=end_globl,
+                    relative=end_relative,
+                )
+                simulation = LooppointSimulation(start=start, end=end)
+                multiplier = float(json_contents[rid]["multiplier"])
+                warmup = None
+                if "warmup" in json_contents[rid]:
+                    start = PcCountPair(
+                        json_contents[rid]["warmup"]["start"]["pc"],
+                        json_contents[rid]["warmup"]["start"]["count"],
+                    )
+                    end = PcCountPair(
+                        json_contents[rid]["warmup"]["end"]["pc"],
+                        json_contents[rid]["warmup"]["end"]["count"],
+                    )
+                    warmup = LooppointRegionWarmup(start=start, end=end)
+
+                regions[rid] = LooppointRegion(
+                    simulation=simulation, multiplier=multiplier, warmup=warmup
+                )
+
+        super().__init__(regions=regions)
+        if region_id:
+            self.set_target_region_id(region_id=region_id)
diff --git a/src/python/gem5/simulate/exit_event_generators.py b/src/python/gem5/simulate/exit_event_generators.py
index 82eba17543..37998d3a9b 100644
--- a/src/python/gem5/simulate/exit_event_generators.py
+++ b/src/python/gem5/simulate/exit_event_generators.py
@@ -29,7 +29,7 @@ import m5.stats
 from ..components.processors.abstract_processor import AbstractProcessor
 from ..components.processors.switchable_processor import SwitchableProcessor
 from ..resources.resource import SimpointResource
-from gem5.utils.looppoint import LoopPoint
+from gem5.resources.looppoint import Looppoint
 from m5.util import warn
 from pathlib import Path
 
@@ -172,7 +172,7 @@ def simpoints_save_checkpoint_generator(
 
 def looppoint_save_checkpoint_generator(
     checkpoint_dir: Path,
-    looppoint: LoopPoint,
+    looppoint: Looppoint,
     update_relatives: bool = True,
     exit_when_empty: bool = True,
 ):
@@ -203,7 +203,7 @@ def looppoint_save_checkpoint_generator(
         # will return an integer greater than 0. By significant PC Count pair,
         # it means the PC Count pair that indicates where to take the
         # checkpoint at. This is determined in the LoopPoint module.
-        if region != -1:
+        if region:
             if update_relatives:
                 looppoint.update_relatives_counts()
             m5.checkpoint((checkpoint_dir / f"cpt.Region{region}").as_posix())
diff --git a/src/python/gem5/utils/looppoint.py b/src/python/gem5/utils/looppoint.py
deleted file mode 100644
index 8e01e3030f..0000000000
--- a/src/python/gem5/utils/looppoint.py
+++ /dev/null
@@ -1,384 +0,0 @@
-# Copyright (c) 2022 The Regents of the University of California
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met: redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer;
-# redistributions in binary form must reproduce the above copyright
-# notice, this list of conditions and the following disclaimer in the
-# documentation and/or other materials provided with the distribution;
-# neither the name of the copyright holders nor the names of its
-# contributors may be used to endorse or promote products derived from
-# this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-
-from m5.util import fatal
-from m5.params import PcCountPair
-from pathlib import Path
-from typing import List, Dict, Tuple
-from gem5.components.processors.abstract_processor import AbstractProcessor
-from m5.objects import PcCountTrackerManager
-import csv
-import re
-import json
-import m5
-import os
-
-
-class LoopPoint:
-    """
-    This LoopPoint class is used to manage the information needed for LoopPoint
-    in workload
-    """
-
-    def __init__(
-        self,
-        targets: List[PcCountPair],
-        regions: Dict[PcCountPair, int],
-        json_file: Dict[int, Dict],
-    ) -> None:
-        """
-        :param targets: a list of PcCountPair that are used to generate exit
-        event at when the PcCountTrackerManager encounter this PcCountPair in
-        execution
-        :param regions: a dictionary used to find the corresponding region id
-        for the significant PcCountPair. This is mainly used to ensure
-        checkpoints are taken in the correct PcCountPair or relative counts are
-        updated at the correct count
-        :param json_file: all the LoopPoint data including relative counts and
-        multiplier are stored in this parameter. It can be outputted as a json
-        file.
-        """
-
-        self._manager = PcCountTrackerManager()
-        self._manager.targets = targets
-        self._targets = targets
-        self._regions = regions
-        self._json_file = json_file
-
-    def setup_processor(
-        self,
-        processor: AbstractProcessor,
-    ) -> None:
-        """
-        This function is used to setup a PC tracker in all the cores and
-        connect all the tracker to the PC tracker manager to perform
-        multithread PC tracking
-        :param processor: the processor used in the simulation configuration
-        """
-        for core in processor.get_cores():
-            core.add_pc_tracker_probe(self._targets, self._manager)
-
-    def update_relatives_counts(self) -> None:
-        """
-        This function is used to update the relative count for restore used.
-        The new relative count will be stored in the _json_file and can be
-        outputted into a json file by calling the output_json_file function.
-        """
-        current_pair = self._manager.getCurrentPcCountPair()
-        if current_pair in self._regions:
-            rid = self._regions[current_pair]
-            region = self._json_file[rid]["simulation"]
-            if "warmup" in self._json_file[rid]:
-                # if this region has a warmup interval,
-                # then update the relative count for the
-                # start of the simulation region
-                start = region["start"]["pc"]
-                temp = region["start"]["global"] - self._manager.getPcCount(
-                    start
-                )
-                self._json_file[rid]["simulation"]["start"]["relative"] = int(
-                    temp
-                )
-            end = region["end"]["pc"]
-            temp = region["end"]["global"] - self._manager.getPcCount(end)
-            self._json_file[rid]["simulation"]["end"]["relative"] = int(temp)
-
-    def output_json_file(
-        self,
-        input_indent: int = 4,
-        filepath: str = os.path.join(m5.options.outdir, "outdir.json"),
-    ) -> Dict[int, Dict]:
-        """
-        This function is used to output the _json_file into a json file
-        :param input_indent: the indent value of the json file
-        :param filepath: the path of the output file
-        """
-        with open(filepath, "w") as file:
-            json.dump(self._json_file, file, indent=input_indent)
-
-    def get_current_region(self) -> int:
-        """
-        This function returns the region id if the current PC Count pair is
-        significant(e.x. beginning of the checkpoint), otherwise, it returns
-        a '-1' to indicate the current PC Count pair is not significant
-        """
-        current_pair = self._manager.getCurrentPcCountPair()
-        if current_pair in self._regions:
-            return self._regions[current_pair]
-        return -1
-
-    def get_current_pair(self) -> PcCountPair:
-        """
-        This function returns the current PC Count pair
-        """
-        return self._manager.getCurrentPcCountPair()
-
-    def get_regions(self) -> Dict[PcCountPair, int]:
-        """
-        This function returns the complete dictionary of _regions
-        """
-        return self._regions
-
-    def get_targets(self) -> List[PcCountPair]:
-        """
-        This function returns the complete list of _targets
-        """
-        return self._targets
-
-
-class LoopPointCheckpoint(LoopPoint):
-    def __init__(self, looppoint_file: Path, if_csv: bool) -> None:
-        """
-        This class is specifically designed to take in the LoopPoint data file
-        and generate the information needed to take checkpoints for LoopPoint
-        regions(warmup region+simulation region)
-        :param looppoint_file: the director of the LoopPoint data file
-        :param if_csv: if the file is a csv file, then it is True. If the file
-        is a json file, then it is False
-        """
-
-        _json_file = {}
-        _targets = []
-        _region_id = {}
-
-        if if_csv:
-            self.profile_csv(looppoint_file, _targets, _json_file, _region_id)
-        else:
-            self.profile_json(looppoint_file, _targets, _json_file, _region_id)
-
-        super().__init__(
-            _targets,
-            _region_id,
-            _json_file,
-        )
-
-    def profile_csv(
-        self,
-        looppoint_file_path: Path,
-        targets: List[PcCountPair],
-        json_file: Dict[int, Dict],
-        region_id: Dict[PcCountPair, int],
-    ) -> None:
-        """
-        This function profiles the csv LoopPoint data file into three variables
-        to take correct checkpoints for LoopPoint
-        :param looppoint_file_path: the director of the LoopPoint data file
-        :param targets: a list of PcCountPair
-        :param json_file: a dictionary for all the LoopPoint data
-        :param region_id: a dictionary for all the significant PcCountPair and
-        its corresponding region id
-        """
-
-        # This section is hard-coded to parse the data in the csv file.
-        # The csv file is assumed to have a constant format.
-        with open(looppoint_file_path, newline="") as csvfile:
-            reader = csv.reader(csvfile, delimiter=" ", quotechar="|")
-            for row in reader:
-                if len(row) > 1:
-                    if row[0] == "cluster":
-                        # if it is a simulation region
-                        line = row[4].split(",")
-                        start = PcCountPair(int(line[3], 16), int(line[6]))
-                        end = PcCountPair(int(line[7], 16), int(line[10]))
-                        if int(line[2]) in json_file:
-                            #  if this region was created in the json_file
-                            json_file[int(line[2])]["simulation"] = {
-                                "start": {"pc": int(line[3], 16)}
-                            }
-                        else:
-                            json_file[int(line[2])] = {
-                                "simulation": {
-                                    "start": {"pc": int(line[3], 16)}
-                                }
-                            }
-                        json_file[int(line[2])]["simulation"]["start"][
-                            "global"
-                        ] = int(line[6])
-                        json_file[int(line[2])]["simulation"]["end"] = {
-                            "pc": int(line[7], 16)
-                        }
-                        json_file[int(line[2])]["simulation"]["end"][
-                            "global"
-                        ] = int(line[10])
-                        json_file[int(line[2])]["multiplier"] = float(line[14])
-                        targets.append(start)
-                        targets.append(end)
-                        # store all the PC Count pairs from the file to the
-                        # targets list
-                    elif row[0] == "Warmup":
-                        line = row[3].split(",")
-                        start = PcCountPair(int(line[3], 16), int(line[6]))
-                        end = PcCountPair(int(line[7], 16), int(line[10]))
-                        if int(line[0]) in json_file:
-                            json_file[int(line[0])]["warmup"] = {
-                                "start": {"pc": int(line[3], 16)}
-                            }
-                        else:
-                            json_file[int(line[0])] = {
-                                "warmup": {"start": {"pc": int(line[3], 16)}}
-                            }
-                        json_file[int(line[0])]["warmup"]["start"][
-                            "count"
-                        ] = int(line[6])
-                        json_file[int(line[0])]["warmup"]["end"] = {
-                            "pc": int(line[7], 16)
-                        }
-                        json_file[int(line[0])]["warmup"]["end"][
-                            "count"
-                        ] = int(line[10])
-                        targets.append(start)
-                        targets.append(end)
-                        # store all the PC Count pairs from the file to the
-                        # targets list
-
-        for rid, region in json_file.items():
-            # this loop iterates all the regions and find the significant PC
-            # Count pair for the region
-            if "warmup" in region:
-                # if the region has a warmup interval, then the checkpoint
-                # should be taken at the start of the warmup interval
-                start = PcCountPair(
-                    region["warmup"]["start"]["pc"],
-                    region["warmup"]["start"]["count"],
-                )
-            else:
-                # if the region does not have a warmup interval, then the
-                # checkpoint should be taken at the start of the simulation
-                # region
-                start = PcCountPair(
-                    region["simulation"]["start"]["pc"],
-                    region["simulation"]["start"]["global"],
-                )
-            region_id[start] = rid
-
-    def profile_json(
-        self,
-        looppoint_file_path: Path,
-        targets: List[PcCountPair],
-        json_file: Dict[int, Dict],
-        region_id: Dict[PcCountPair, int],
-    ) -> None:
-        """
-        This function profiles the json LoopPoint data file into three
-        variables to take correct checkpoints for LoopPoint
-        :param looppoint_file_path: the director of the LoopPoint data file
-        :param targets: a list of PcCountPair
-        :param json_file: a dictionary for all the LoopPoint data
-        :param region_id: a dictionary for all the significant PcCountPair and
-        its corresponding region id
-        """
-
-        with open(looppoint_file_path) as file:
-            json_file = json.load(file)
-            # load all json information into the json_file variable
-            for rid, region in json_file.items():
-                # iterates all regions
-                sim_start = PcCountPair(
-                    region["simulation"]["start"]["pc"],
-                    region["simulation"]["start"]["global"],
-                )
-                targets.append(sim_start)
-                # store all PC Count pairs in the file into targets list
-                end = PcCountPair(
-                    region["simulation"]["end"]["pc"],
-                    region["simulation"]["end"]["global"],
-                )
-                targets.append(end)
-                if "warmup" in region:
-                    # if there is a warmup in the region, then the checkpoint
-                    # should be taken at the start of the warmup interval
-                    start = PcCountPair(
-                        region["warmup"]["start"]["pc"],
-                        region["warmup"]["start"]["count"],
-                    )
-                    targets.append(start)
-                    end = PcCountPair(
-                        region["warmup"]["end"]["pc"],
-                        region["warmup"]["end"]["count"],
-                    )
-                    targets.append(end)
-                else:
-                    # if there is not a warmup interval in the region, then the
-                    # checkpoint should be taken at the start of the simulation
-                    # region
-                    start = sim_start
-                region_id[start] = rid
-
-
-class LoopPointRestore(LoopPoint):
-    def __init__(self, looppoint_file: Path, region_id: int) -> None:
-        """
-        This class is specifically designed to take in the LoopPoint data file
-        and generator information needed to restore a checkpoint taken by the
-        LoopPointCheckPoint.
-        :param looppoint_file: a json file generated by gem5 that has all the
-        LoopPoint data information
-        :param region_id: The region ID we will be restoring to.
-        """
-
-        with open(looppoint_file) as file:
-            json_file = json.load(file)
-
-        targets, regions = self.get_region(
-            json_file=json_file, region_id=region_id
-        )
-
-        super().__init__(targets=targets, regions=regions, json_file=json_file)
-
-    def get_region(
-        self, json_file: Dict[int, Dict], region_id: int
-    ) -> Tuple[List[PcCountPair], Dict[PcCountPair, int]]:
-        to_return_region = {}
-        to_return_targets = []
-
-        if region_id not in json_file:
-            # if the region id does not exist in the LoopPoint data
-            # file raise a fatal message
-            fatal(f"{region_id} is not a valid region\n")
-        region = json_file[region_id]
-        if "warmup" in region:
-            if "relative" not in region["simulation"]["start"]:
-                # if there are not relative counts for the PC Count
-                # pair then it means there is not enough information to
-                # restore this checkpoint
-                fatal(f"region {region_id} doesn't have relative count info\n")
-            start = PcCountPair(
-                region["simulation"]["start"]["pc"],
-                region["simulation"]["start"]["relative"],
-            )
-            to_return_region[start] = region_id
-            to_return_targets.append(start)
-        if "relative" not in region["simulation"]["end"]:
-            fatal(f"region {region_id} doesn't have relative count info\n")
-        end = PcCountPair(
-            region["simulation"]["end"]["pc"],
-            region["simulation"]["end"]["relative"],
-        )
-        to_return_region[end] = region_id
-        to_return_targets.append(end)
-
-        return to_return_targets, to_return_region

From 52194c87b0ca80c480e5b5c8b26868204d601f19 Mon Sep 17 00:00:00 2001
From: "Bobby R. Bruce" <bbruce@ucdavis.edu>
Date: Wed, 8 Feb 2023 20:06:16 +0000
Subject: [PATCH 216/492] tests: Add pyunit tests for Looppoint

Change-Id: Ie275e50bbcf5fb3d38ee98d7ada27c3afe4ec1b0
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67757
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 tests/pyunit/stdlib/pyunit_looppoint.py       | 600 ++++++++++++++++++
 .../matrix.1_92.global.pinpoints_reduced.csv  |  28 +
 tests/pyunit/stdlib/refs/output.json          |  40 ++
 3 files changed, 668 insertions(+)
 create mode 100644 tests/pyunit/stdlib/pyunit_looppoint.py
 create mode 100644 tests/pyunit/stdlib/refs/matrix.1_92.global.pinpoints_reduced.csv
 create mode 100644 tests/pyunit/stdlib/refs/output.json

diff --git a/tests/pyunit/stdlib/pyunit_looppoint.py b/tests/pyunit/stdlib/pyunit_looppoint.py
new file mode 100644
index 0000000000..0cb708e8ac
--- /dev/null
+++ b/tests/pyunit/stdlib/pyunit_looppoint.py
@@ -0,0 +1,600 @@
+# Copyright (c) 2023 The Regents of the University of California
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import unittest
+
+from m5.params import PcCountPair
+
+from gem5.resources.looppoint import (
+    Looppoint,
+    LooppointRegionPC,
+    LooppointRegionWarmup,
+    LooppointSimulation,
+    LooppointRegion,
+    LooppointCsvLoader,
+    LooppointJsonLoader,
+)
+
+import os
+
+
+class LooppointRegionPCTestSuite(unittest.TestCase):
+    """Tests the resources.looppoint.LooppointRegionPC class."""
+
+    def test_construction_with_relative(self) -> None:
+        region_pc = LooppointRegionPC(pc=444, globl=65, relative=454)
+
+        self.assertEquals(444, region_pc.get_pc())
+        self.assertEquals(65, region_pc.get_global())
+        self.assertEquals(454, region_pc.get_relative())
+
+    def test_construction_without_relative(self) -> None:
+        region_pc = LooppointRegionPC(pc=43454, globl=653434)
+
+        self.assertEquals(43454, region_pc.get_pc())
+        self.assertEquals(653434, region_pc.get_global())
+        self.assertIsNone(region_pc.get_relative())
+
+    def test_get_pc_count_pair(self) -> None:
+        region_pc = LooppointRegionPC(pc=1, globl=2)
+        expected = PcCountPair(1, 2)
+        self.assertEquals(expected, region_pc.get_pc_count_pair())
+
+    def update_relative_count(self) -> None:
+        pass  # Not really sure what to do here...
+
+    def test_to_json_with_relative(self) -> None:
+        region_pc = LooppointRegionPC(pc=100, globl=200, relative=300)
+        json_contents = region_pc.to_json()
+
+        self.assertEquals(3, len(json_contents))
+        self.assertTrue("pc" in json_contents)
+        self.assertEquals(100, json_contents["pc"])
+        self.assertTrue("global" in json_contents)
+        self.assertEquals(200, json_contents["global"])
+        self.assertTrue("relative" in json_contents)
+        self.assertEquals(300, json_contents["relative"])
+
+    def test_to_json_without_relative(self) -> None:
+        region_pc = LooppointRegionPC(pc=1111, globl=2222)
+        json_contents = region_pc.to_json()
+
+        self.assertEquals(2, len(json_contents))
+        self.assertTrue("pc" in json_contents)
+        self.assertEquals(1111, json_contents["pc"])
+        self.assertTrue("global" in json_contents)
+        self.assertEquals(2222, json_contents["global"])
+        self.assertFalse("relative" in json_contents)
+
+
+class LooppointRegionWarmupTestSuite(unittest.TestCase):
+    """Tests the resources.looppoint.LooppointWarmup class."""
+
+    def test_construction(self) -> None:
+        region_warmup = LooppointRegionWarmup(
+            start=PcCountPair(123, 456), end=PcCountPair(789, 1011)
+        )
+
+        self.assertEquals(PcCountPair(123, 456), region_warmup.get_start())
+        self.assertEquals(PcCountPair(789, 1011), region_warmup.get_end())
+
+    def test_get_pc_count_pairs(self) -> None:
+        region_warmup = LooppointRegionWarmup(
+            start=PcCountPair(1, 1), end=PcCountPair(2, 2)
+        )
+
+        output = region_warmup.get_pc_count_pairs()
+        self.assertEquals(2, len(output))
+        self.assertEquals(PcCountPair(1, 1), output[0])
+        self.assertEquals(PcCountPair(2, 2), output[1])
+
+    def test_to_json(self) -> None:
+        region_warmup = LooppointRegionWarmup(
+            start=PcCountPair(100, 200), end=PcCountPair(101, 202)
+        )
+
+        expected = {
+            "start": {"pc": 100, "count": 200},
+            "end": {"pc": 101, "count": 202},
+        }
+
+        self.assertDictEqual(expected, region_warmup.to_json())
+
+
+class LooppointSimulationTestSuite(unittest.TestCase):
+    """Tests the resources.looppoint.LooppointSimulation class."""
+
+    def test_construction_with(self) -> None:
+        sim = LooppointSimulation(
+            start=LooppointRegionPC(pc=444, globl=65, relative=454),
+            end=LooppointRegionPC(pc=555, globl=699),
+        )
+
+        sim_start = sim.get_start()
+
+        self.assertEquals(444, sim_start.get_pc())
+        self.assertEquals(65, sim_start.get_global())
+        self.assertEquals(454, sim_start.get_relative())
+
+        sim_end = sim.get_end()
+
+        self.assertEquals(555, sim_end.get_pc())
+        self.assertEquals(699, sim_end.get_global())
+        self.assertIsNone(sim_end.get_relative())
+
+    def test_get_pc_count_pairs(self) -> None:
+        sim = LooppointSimulation(
+            start=LooppointRegionPC(pc=56, globl=45, relative=34),
+            end=LooppointRegionPC(pc=23, globl=12),
+        )
+
+        sim_pc_count_pairs = sim.get_pc_count_pairs()
+        self.assertEquals(2, len(sim_pc_count_pairs))
+        self.assertEquals(PcCountPair(56, 45), sim_pc_count_pairs[0])
+        self.assertEquals(PcCountPair(23, 12), sim_pc_count_pairs[1])
+
+    def test_get_json(self) -> None:
+        sim = LooppointSimulation(
+            start=LooppointRegionPC(pc=1, globl=2, relative=3),
+            end=LooppointRegionPC(pc=4, globl=5),
+        )
+        expected = {
+            "start": {
+                "pc": 1,
+                "global": 2,
+                "relative": 3,
+            },
+            "end": {
+                "pc": 4,
+                "global": 5,
+            },
+        }
+        self.assertDictEqual(expected, sim.to_json())
+
+
+class LooppointRegionTestSuite(unittest.TestCase):
+    """Tests the resources.looppoint.LooppointRegion class."""
+
+    def test_construction_with_warmup(self):
+        region = LooppointRegion(
+            simulation=LooppointSimulation(
+                start=LooppointRegionPC(pc=1, globl=2, relative=3),
+                end=LooppointRegionPC(pc=6, globl=7),
+            ),
+            multiplier=5.6,
+            warmup=LooppointRegionWarmup(
+                start=PcCountPair(100, 200), end=PcCountPair(101, 202)
+            ),
+        )
+
+        self.assertTrue(
+            isinstance(region.get_simulation(), LooppointSimulation)
+        )
+        self.assertEquals(5.6, region.get_multiplier())
+        self.assertIsNotNone(region.get_warmup())
+        self.assertTrue(isinstance(region.get_warmup(), LooppointRegionWarmup))
+
+    def test_construction_without_warmup(self):
+        region = LooppointRegion(
+            simulation=LooppointSimulation(
+                start=LooppointRegionPC(pc=56, globl=2345, relative=344),
+                end=LooppointRegionPC(pc=645, globl=457),
+            ),
+            multiplier=5444.4,
+        )
+
+        self.assertTrue(
+            isinstance(region.get_simulation(), LooppointSimulation)
+        )
+        self.assertEquals(5444.4, region.get_multiplier())
+        self.assertIsNone(region.get_warmup())
+
+    def test_get_pc_count_pairs_with_warmup(self):
+        region = LooppointRegion(
+            simulation=LooppointSimulation(
+                start=LooppointRegionPC(pc=1, globl=2, relative=3),
+                end=LooppointRegionPC(pc=6, globl=7),
+            ),
+            multiplier=5.6,
+            warmup=LooppointRegionWarmup(
+                start=PcCountPair(100, 200), end=PcCountPair(101, 202)
+            ),
+        )
+        pc_count_pairs = region.get_pc_count_pairs()
+
+        self.assertEquals(4, len(pc_count_pairs))
+        self.assertEquals(PcCountPair(1, 2), pc_count_pairs[0])
+        self.assertEquals(PcCountPair(6, 7), pc_count_pairs[1])
+        self.assertEquals(PcCountPair(100, 200), pc_count_pairs[2])
+        self.assertEquals(PcCountPair(101, 202), pc_count_pairs[3])
+
+    def test_get_pc_count_pairs_without_warmup(self):
+        region = LooppointRegion(
+            simulation=LooppointSimulation(
+                start=LooppointRegionPC(pc=56, globl=2345, relative=344),
+                end=LooppointRegionPC(pc=645, globl=457),
+            ),
+            multiplier=5444.4,
+        )
+
+        pc_count_pairs = region.get_pc_count_pairs()
+
+        self.assertEquals(2, len(pc_count_pairs))
+        self.assertEquals(PcCountPair(56, 2345), pc_count_pairs[0])
+        self.assertEquals(PcCountPair(645, 457), pc_count_pairs[1])
+
+
+class LooppointTestSuite(unittest.TestCase):
+    """Tests the resources.looppoint.Looppoint class."""
+
+    def test_construction(self):
+        region1 = LooppointRegion(
+            simulation=LooppointSimulation(
+                start=LooppointRegionPC(pc=56, globl=2345, relative=344),
+                end=LooppointRegionPC(pc=645, globl=457),
+            ),
+            multiplier=5444.4,
+        )
+        region2 = LooppointRegion(
+            simulation=LooppointSimulation(
+                start=LooppointRegionPC(pc=67, globl=254, relative=3345),
+                end=LooppointRegionPC(pc=64554, globl=7454),
+            ),
+            multiplier=5.6,
+            warmup=LooppointRegionWarmup(
+                start=PcCountPair(100, 200), end=PcCountPair(101, 202)
+            ),
+        )
+
+        looppoint = Looppoint(
+            regions={
+                1: region1,
+                3: region2,
+            }
+        )
+
+        self.assertEquals(2, len(looppoint.get_regions()))
+        self.assertTrue(1 in looppoint.get_regions())
+        self.assertEquals(region1, looppoint.get_regions()[1])
+        self.assertTrue(3 in looppoint.get_regions())
+        self.assertEquals(region2, looppoint.get_regions()[3])
+
+    def test_get_targets(self):
+        region1 = LooppointRegion(
+            simulation=LooppointSimulation(
+                start=LooppointRegionPC(pc=56, globl=2345, relative=344),
+                end=LooppointRegionPC(pc=645, globl=457),
+            ),
+            multiplier=5444.4,
+        )
+        region2 = LooppointRegion(
+            simulation=LooppointSimulation(
+                start=LooppointRegionPC(pc=67, globl=254, relative=3345),
+                end=LooppointRegionPC(pc=64554, globl=7454),
+            ),
+            multiplier=5.6,
+            warmup=LooppointRegionWarmup(
+                start=PcCountPair(100, 200), end=PcCountPair(101, 202)
+            ),
+        )
+
+        looppoint = Looppoint(
+            regions={
+                1: region1,
+                3: region2,
+            }
+        )
+
+        targets = looppoint.get_targets()
+        self.assertEquals(6, len(targets))
+        self.assertEquals(PcCountPair(56, 2345), targets[0])
+        self.assertEquals(PcCountPair(645, 457), targets[1])
+        self.assertEquals(PcCountPair(67, 254), targets[2])
+        self.assertEquals(PcCountPair(64554, 7454), targets[3])
+        self.assertEquals(PcCountPair(100, 200), targets[4])
+        self.assertEquals(PcCountPair(101, 202), targets[5])
+
+    def test_get_region_start_id_map(self):
+
+        region1 = LooppointRegion(
+            simulation=LooppointSimulation(
+                start=LooppointRegionPC(pc=56, globl=2345, relative=344),
+                end=LooppointRegionPC(pc=645, globl=457),
+            ),
+            multiplier=5444.4,
+        )
+        region2 = LooppointRegion(
+            simulation=LooppointSimulation(
+                start=LooppointRegionPC(pc=67, globl=254, relative=3345),
+                end=LooppointRegionPC(pc=64554, globl=7454),
+            ),
+            multiplier=5.6,
+            warmup=LooppointRegionWarmup(
+                start=PcCountPair(100, 200), end=PcCountPair(101, 202)
+            ),
+        )
+
+        looppoint = Looppoint(
+            regions={
+                1: region1,
+                3: region2,
+            }
+        )
+
+        region_start_id_map = looppoint.get_region_start_id_map()
+
+        self.assertEquals(2, len(region_start_id_map))
+
+        # The start of region1.
+        self.assertTrue(PcCountPair(56, 2345) in region_start_id_map)
+        self.assertEquals(1, region_start_id_map[PcCountPair(56, 2345)])
+
+        # The start of region2.  Since this has a warmup, it's the warmup.
+        self.assertTrue(PcCountPair(100, 200) in region_start_id_map)
+        self.assertEquals(3, region_start_id_map[PcCountPair(100, 200)])
+
+    def test_to_json(self) -> None:
+        region1 = LooppointRegion(
+            simulation=LooppointSimulation(
+                start=LooppointRegionPC(pc=56, globl=2345, relative=344),
+                end=LooppointRegionPC(pc=645, globl=457),
+            ),
+            multiplier=5444.4,
+        )
+        region2 = LooppointRegion(
+            simulation=LooppointSimulation(
+                start=LooppointRegionPC(pc=67, globl=254, relative=3345),
+                end=LooppointRegionPC(pc=64554, globl=7454),
+            ),
+            multiplier=5.6,
+            warmup=LooppointRegionWarmup(
+                start=PcCountPair(100, 200), end=PcCountPair(101, 202)
+            ),
+        )
+
+        looppoint = Looppoint(
+            regions={
+                1: region1,
+                3: region2,
+            }
+        )
+
+        expected = {
+            1: {
+                "simulation": {
+                    "start": {
+                        "pc": 56,
+                        "global": 2345,
+                        "relative": 344,
+                    },
+                    "end": {
+                        "pc": 645,
+                        "global": 457,
+                    },
+                },
+                "multiplier": 5444.4,
+            },
+            3: {
+                "simulation": {
+                    "start": {
+                        "pc": 67,
+                        "global": 254,
+                        "relative": 3345,
+                    },
+                    "end": {
+                        "pc": 64554,
+                        "global": 7454,
+                    },
+                },
+                "multiplier": 5.6,
+                "warmup": {
+                    "start": {
+                        "pc": 100,
+                        "count": 200,
+                    },
+                    "end": {
+                        "pc": 101,
+                        "count": 202,
+                    },
+                },
+            },
+        }
+
+        # Need to increase the max for if there is an error.
+        self.maxDiff = 2056
+        self.assertDictEqual(expected, looppoint.to_json())
+
+
+class LooppointCSVLoaderTestSuite(unittest.TestCase):
+    """Tests the resources.looppoint.LooppointCsvLoader class."""
+
+    def test_load_pinpoints_matrix(self):
+        looppoint = LooppointCsvLoader(
+            pinpoints_file=os.path.join(
+                os.path.realpath(os.path.dirname(__file__)),
+                "refs",
+                "matrix.1_92.global.pinpoints_reduced.csv",
+            )
+        )
+
+        regions = looppoint.get_regions()
+        self.assertEquals(3, len(regions))
+
+        region1 = regions[1]
+        self.assertEquals(4.0, region1.get_multiplier())
+
+        region1start = region1.get_simulation().get_start()
+        self.assertEquals(0x4069D0, region1start.get_pc())
+        self.assertEquals(211076617, region1start.get_global())
+        self.assertIsNone(region1start.get_relative())
+
+        region1end = region1.get_simulation().get_end()
+        self.assertEquals(0x4069D0, region1end.get_pc())
+        self.assertEquals(219060252, region1end.get_global())
+        self.assertIsNotNone(region1end.get_relative())
+        self.assertEquals(1060676, region1end.get_relative())
+
+        self.assertIsNone(region1.get_warmup())
+
+        region2 = regions[2]
+        self.assertEquals(5.001, region2.get_multiplier())
+
+        region2start = region2.get_simulation().get_start()
+        self.assertEquals(0x4069D0, region2start.get_pc())
+        self.assertEquals(407294228, region2start.get_global())
+        self.assertIsNone(region2start.get_relative())
+
+        region2end = region2.get_simulation().get_end()
+        self.assertEquals(0x4069D0, region2end.get_pc())
+        self.assertEquals(415282447, region2end.get_global())
+        self.assertIsNotNone(region2end.get_relative())
+        self.assertEquals(1035231, region2end.get_relative())
+
+        region2warmup = region2.get_warmup()
+        self.assertIsNotNone(region2warmup)
+        self.assertEquals(
+            PcCountPair(0x406880, 48111518), region2warmup.get_start()
+        )
+        self.assertEquals(
+            PcCountPair(0x4069D0, 407294228), region2warmup.get_end()
+        )
+
+        region3 = regions[3]
+        self.assertEquals(4.0, region3.get_multiplier())
+
+        region3start = region3.get_simulation().get_start()
+        self.assertEquals(0x4069D0, region3start.get_pc())
+        self.assertEquals(187978221, region3start.get_global())
+        self.assertIsNone(region3start.get_relative())
+
+        region3end = region3.get_simulation().get_end()
+        self.assertEquals(0x406880, region3end.get_pc())
+        self.assertEquals(23520614, region3end.get_global())
+        self.assertIsNotNone(region3end.get_relative())
+        self.assertEquals(144352, region3end.get_relative())
+
+        self.assertIsNone(region3.get_warmup())
+
+    def test_load_pinpoints_matrix_region_1(self):
+        looppoint = LooppointCsvLoader(
+            pinpoints_file=os.path.join(
+                os.path.realpath(os.path.dirname(__file__)),
+                "refs",
+                "matrix.1_92.global.pinpoints_reduced.csv",
+            ),
+            region_id=1,
+        )
+
+        regions = looppoint.get_regions()
+        self.assertEquals(1, len(regions))
+
+        self.assertTrue(1 in regions)
+        region1 = regions[1]
+        self.assertEquals(4.0, region1.get_multiplier())
+
+        region1start = region1.get_simulation().get_start()
+        self.assertEquals(0x4069D0, region1start.get_pc())
+        self.assertEquals(211076617, region1start.get_global())
+        self.assertIsNone(region1start.get_relative())
+
+        region1end = region1.get_simulation().get_end()
+        self.assertEquals(0x4069D0, region1end.get_pc())
+        self.assertEquals(219060252, region1end.get_global())
+        self.assertIsNotNone(region1end.get_relative())
+        self.assertEquals(1060676, region1end.get_relative())
+
+        self.assertIsNone(region1.get_warmup())
+
+
+class LooppointJsonLoaderTestSuite(unittest.TestCase):
+    """Tests the resources.looppoint.LooppointJsonLoader class."""
+
+    def test_load_pinpoints_matrix_region_1(self):
+        looppoint = LooppointJsonLoader(
+            looppoint_file=os.path.join(
+                os.path.realpath(os.path.dirname(__file__)),
+                "refs",
+                "output.json",
+            ),
+            region_id="1",
+        )
+
+        self.assertEquals(1, len(looppoint.get_regions()))
+        self.assertTrue("1" in looppoint.get_regions())
+        region = looppoint.get_regions()["1"]
+
+        self.assertEquals(4.0, region.get_multiplier())
+
+        region_start = region.get_simulation().get_start()
+        self.assertEquals(4221392, region_start.get_pc())
+        self.assertEquals(211076617, region_start.get_global())
+        self.assertIsNotNone(region_start.get_relative())
+        self.assertEquals(15326617, region_start.get_relative())
+
+        region_end = region.get_simulation().get_end()
+        self.assertEquals(4221392, region_end.get_pc())
+        self.assertEquals(219060252, region_end.get_global())
+        self.assertIsNotNone(region_end.get_relative())
+        self.assertEquals(23310252, region_end.get_relative())
+
+        region_warmup = region.get_warmup()
+        self.assertIsNotNone(region_warmup)
+
+        self.assertEquals(
+            PcCountPair(4221056, 23520614), region_warmup.get_start()
+        )
+        self.assertEquals(
+            PcCountPair(4221392, 211076617), region_warmup.get_end()
+        )
+
+    def test_load_pinpoints_matrix_region_2(self):
+        looppoint = LooppointJsonLoader(
+            looppoint_file=os.path.join(
+                os.path.realpath(os.path.dirname(__file__)),
+                "refs",
+                "output.json",
+            ),
+            region_id="2",
+        )
+
+        self.assertEquals(1, len(looppoint.get_regions()))
+        self.assertTrue("2" in looppoint.get_regions())
+        region = looppoint.get_regions()["2"]
+
+        self.assertEquals(5.001, region.get_multiplier())
+
+        region_start = region.get_simulation().get_start()
+        self.assertEquals(4221392, region_start.get_pc())
+        self.assertEquals(407294228, region_start.get_global())
+        self.assertIsNone(region_start.get_relative())
+
+        region_end = region.get_simulation().get_end()
+        self.assertEquals(4221392, region_end.get_pc())
+        self.assertEquals(415282447, region_end.get_global())
+        self.assertIsNone(region_end.get_relative())
+
+        region_warmup = region.get_warmup()
+        self.assertIsNone(region_warmup)
diff --git a/tests/pyunit/stdlib/refs/matrix.1_92.global.pinpoints_reduced.csv b/tests/pyunit/stdlib/refs/matrix.1_92.global.pinpoints_reduced.csv
new file mode 100644
index 0000000000..56e3fc57bf
--- /dev/null
+++ b/tests/pyunit/stdlib/refs/matrix.1_92.global.pinpoints_reduced.csv
@@ -0,0 +1,28 @@
+# Regions based on: /home/alen/isca2022/looppoint/tools/sde-external-9.0.0-2021-11-07-lin/pinplay-scripts/pcregions.py --label_file t.labels --warmup_factor 2 --tid global --bbv_file t.bb --region_file t.simpoints --weight_file t.weights
+
+# comment,thread-id,region-id,start-pc, start-image-name, start-image-offset, start-pc-count,end-pc, end-image-name, end-image-offset, end-pc-count,end-pc-relative-count, region-length, region-weight, region-multiplier, region-type
+
+# RegionId = 1 Slice = 27 Icount = 2160042521 Length = 79958388 Weight = 0.04651 Multiplier = 4.000 ClusterSlicecount = 4 ClusterIcount = 320005167
+#Start: pc : 0x4069d0 image: matrix-omp offset: 0x69d0 absolute_count: 211076617  source-info: matrix-omp.cpp:75
+#End: pc : 0x4069d0 image: matrix-omp offset: 0x69d0 absolute_count: 219060252  relative_count: 1060676.0 source-info: matrix-omp.cpp:75
+cluster 0 from slice 27,global,1,0x4069d0,matrix-omp,0x69d0,211076617,0x4069d0,matrix-omp,0x69d0,219060252,1060676,79958388,0.04651,4.000,simulation
+
+# RegionId = 2 Slice = 52 Icount = 4160001603 Length = 80000011 Weight = 0.05814 Multiplier = 5.001 ClusterSlicecount = 5 ClusterIcount = 400040399
+#Start: pc : 0x4069d0 image: matrix-omp offset: 0x69d0 absolute_count: 407294228  source-info: matrix-omp.cpp:75
+#End: pc : 0x4069d0 image: matrix-omp offset: 0x69d0 absolute_count: 415282447  relative_count: 1035231.0 source-info: matrix-omp.cpp:75
+cluster 1 from slice 52,global,2,0x4069d0,matrix-omp,0x69d0,407294228,0x4069d0,matrix-omp,0x69d0,415282447,1035231,80000011,0.05814,5.001,simulation
+
+# RegionId = 3 Slice = 24 Icount = 1920000792 Length = 80027459 Weight = 0.04651 Multiplier = 4.000 ClusterSlicecount = 4 ClusterIcount = 320021091
+#Start: pc : 0x4069d0 image: matrix-omp offset: 0x69d0 absolute_count: 187978221  source-info: matrix-omp.cpp:75
+#End: pc : 0x406880 image: matrix-omp offset: 0x6880 absolute_count: 23520614  relative_count: 144352.0 source-info: matrix-omp.cpp:95
+cluster 2 from slice 24,global,3,0x4069d0,matrix-omp,0x69d0,187978221,0x406880,matrix-omp,0x6880,23520614,144352,80027459,0.04651,4.000,simulation
+
+# RegionId = 16 Slice = 50 Icount = 4000001542 Length = 160000061 WarmupFactor = 2
+#Start: pc : 0x406880 image: matrix-omp offset: 0x6880 absolute_count: 48111518 source-info: matrix-omp.cpp:95
+#End: pc : 0x4069d0 image: matrix-omp offset: 0x69d0 absolute_count: 407294228  relative_count: 2004125.0 source-info: matrix-omp.cpp:75
+Warmup for regionid 2,global,16,0x406880,matrix-omp,0x6880,48111518,0x4069d0,matrix-omp,0x69d0,407294228,2004125,160000061,0.00000,0.000,warmup:2
+
+# First PC, 0x403050
+# Total instructions in 13 regions = 1039968792
+# Total instructions in workload = 6880002703
+# Total slices in workload = 86
diff --git a/tests/pyunit/stdlib/refs/output.json b/tests/pyunit/stdlib/refs/output.json
new file mode 100644
index 0000000000..4324bac3de
--- /dev/null
+++ b/tests/pyunit/stdlib/refs/output.json
@@ -0,0 +1,40 @@
+{
+    "1": {
+        "simulation": {
+            "start": {
+                "pc": 4221392,
+                "global": 211076617,
+                "relative": 15326617
+            },
+            "end": {
+                "pc": 4221392,
+                "global": 219060252,
+                "relative": 23310252
+            }
+        },
+        "multiplier": 4.0,
+        "warmup": {
+            "start": {
+                "pc": 4221056,
+                "count": 23520614
+            },
+            "end": {
+                "pc": 4221392,
+                "count": 211076617
+            }
+        }
+    },
+    "2": {
+        "simulation": {
+            "start": {
+                "pc": 4221392,
+                "global": 407294228
+            },
+            "end": {
+                "pc": 4221392,
+                "global": 415282447
+            }
+        },
+        "multiplier": 5.001
+    }
+}

From 29b19530ce6dbca9fa3d00b987cdde592e1ab0d0 Mon Sep 17 00:00:00 2001
From: "Bobby R. Bruce" <bbruce@ucdavis.edu>
Date: Wed, 8 Feb 2023 20:07:25 +0000
Subject: [PATCH 217/492] tests: Incorporate Looppoint example scripts into
 TestLib

Change-Id: I97d89d3cc80ce8d8991ca8d3cb4aab8019324d76
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67758
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
---
 .../test_gem5_library_examples.py             | 56 ++++++++++++++++++-
 1 file changed, 55 insertions(+), 1 deletion(-)

diff --git a/tests/gem5/gem5_library_example_tests/test_gem5_library_examples.py b/tests/gem5/gem5_library_example_tests/test_gem5_library_examples.py
index 9b5c2c67ff..7db46b0770 100644
--- a/tests/gem5/gem5_library_example_tests/test_gem5_library_examples.py
+++ b/tests/gem5/gem5_library_example_tests/test_gem5_library_examples.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2021 The Regents of the University of California
+# Copyright (c) 2021-2023 The Regents of the University of California
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -324,3 +324,57 @@ gem5_verify_config(
     valid_hosts=constants.supported_hosts,
     length=constants.very_long_tag,
 )
+
+gem5_verify_config(
+    name="test-gem5-library-create-looppoint-checkpoints",
+    fixtures=(),
+    verifiers=(),
+    config=joinpath(
+        config.base_dir,
+        "configs",
+        "example",
+        "gem5_library",
+        "looppoints",
+        "create-looppoint-checkpoint.py",
+    ),
+    config_args=[
+        "--checkpoint-path",
+        joinpath(resource_path, "looppoint-checkpoint-save"),
+    ],
+    valid_isas=(constants.all_compiled_tag,),
+    valid_hosts=constants.supported_hosts,
+    length=constants.very_long_tag,
+)
+
+for region in (
+    "1",
+    "2",
+    "3",
+    "5",
+    "6",
+    "7",
+    "8",
+    "9",
+    "10",
+    "11",
+    "12",
+    "13",
+    "14",
+):
+    gem5_verify_config(
+        name=f"test-gem5-library-restore-looppoint-checkpoint-region-f{region}",
+        fixtures=(),
+        verifiers=(),
+        config=joinpath(
+            config.base_dir,
+            "configs",
+            "example",
+            "gem5_library",
+            "looppoints",
+            "restore-looppoint-checkpoint.py",
+        ),
+        config_args=["--checkpoint-region", region],
+        valid_isas=(constants.all_compiled_tag,),
+        valid_hosts=constants.supported_hosts,
+        length=constants.very_long_tag,
+    )

From 4ad1150372aa1b3ecced8438c50b625ae6433f55 Mon Sep 17 00:00:00 2001
From: "Bobby R. Bruce" <bbruce@ucdavis.edu>
Date: Fri, 10 Feb 2023 15:15:42 +0000
Subject: [PATCH 218/492] stdlib: Add the LooppointCsvResource resource

This resource wraps the LooppointCsvLoader class so it may be obtained
as a specialized resource via gem5 resources.

Relevant tests and config scripts have been updated.

Change-Id: Ib8e5ff5500fb1560951c9c0110e3c3aec8ca3c42
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67857
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 .../create-looppoint-checkpoints.py           | 12 ++--------
 src/python/gem5/resources/resource.py         | 22 +++++++++++++++++++
 .../pyunit_resource_specialization.py         | 21 ++++++++++++++++++
 .../refs/resource-specialization.json         |  9 ++++++++
 4 files changed, 54 insertions(+), 10 deletions(-)

diff --git a/configs/example/gem5_library/looppoints/create-looppoint-checkpoints.py b/configs/example/gem5_library/looppoints/create-looppoint-checkpoints.py
index f967aa56e4..6c23d38c7d 100644
--- a/configs/example/gem5_library/looppoints/create-looppoint-checkpoints.py
+++ b/configs/example/gem5_library/looppoints/create-looppoint-checkpoints.py
@@ -61,7 +61,7 @@ from pathlib import Path
 from gem5.simulate.exit_event_generators import (
     looppoint_save_checkpoint_generator,
 )
-from gem5.resources.looppoint import LooppointCsvLoader
+
 import argparse
 
 requires(isa_required=ISA.X86)
@@ -103,15 +103,6 @@ processor = SimpleProcessor(
     num_cores=9,
 )
 
-looppoint = LooppointCsvLoader(
-    # Pass in the LoopPoint data file
-    looppoint_file=Path(
-        obtain_resource(
-            "x86-matrix-multiply-omp-100-8-global-pinpoints"
-        ).get_local_path()
-    )
-)
-
 board = SimpleBoard(
     clk_freq="3GHz",
     processor=processor,
@@ -119,6 +110,7 @@ board = SimpleBoard(
     cache_hierarchy=cache_hierarchy,
 )
 
+looppoint = obtain_resource("x86-matrix-multiply-omp-100-8-global-pinpoints")
 board.set_se_looppoint_workload(
     binary=obtain_resource("x86-matrix-multiply-omp"),
     arguments=[100, 8],
diff --git a/src/python/gem5/resources/resource.py b/src/python/gem5/resources/resource.py
index 678497eaa7..0cf58800f2 100644
--- a/src/python/gem5/resources/resource.py
+++ b/src/python/gem5/resources/resource.py
@@ -31,6 +31,7 @@ from m5.util import warn, fatal
 
 from .downloader import get_resource, get_resources_json_obj
 
+from .looppoint import LooppointCsvLoader
 from ..isas import ISA, get_isa_from_str
 
 from typing import Optional, Dict, Union, Type, Tuple, List
@@ -394,6 +395,26 @@ class SimpointResource(AbstractResource):
         return warmup_list
 
 
+class LooppointCsvResource(FileResource, LooppointCsvLoader):
+    """This Looppoint resource used to create a Looppoint resource from a
+    pinpoints CSV file"""
+
+    def __init__(
+        self,
+        local_path: str,
+        documentation: Optional[str] = None,
+        source: Optional[str] = None,
+        **kwargs,
+    ):
+        FileResource.__init__(
+            self,
+            local_path=local_path,
+            documentation=documentation,
+            source=source,
+        )
+        LooppointCsvLoader.__init__(self, pinpoints_file=Path(local_path))
+
+
 class SimpointDirectoryResource(SimpointResource):
     """A Simpoint diretory resource. This Simpoint Resource assumes the
     existance of a directory containing a simpoint file and a weight file."""
@@ -714,4 +735,5 @@ _get_resource_json_type_map = {
     "simpoint": SimpointResource,
     "simpoint-directory": SimpointDirectoryResource,
     "resource": Resource,
+    "looppoint-pinpoint-csv": LooppointCsvResource,
 }
diff --git a/tests/pyunit/stdlib/resources/pyunit_resource_specialization.py b/tests/pyunit/stdlib/resources/pyunit_resource_specialization.py
index f31e35d719..5c60eb5c4a 100644
--- a/tests/pyunit/stdlib/resources/pyunit_resource_specialization.py
+++ b/tests/pyunit/stdlib/resources/pyunit_resource_specialization.py
@@ -29,6 +29,7 @@ import unittest
 from pathlib import Path
 
 from gem5.resources.resource import *
+from gem5.resources.looppoint import LooppointCsvLoader
 from gem5.isas import ISA
 
 
@@ -235,3 +236,23 @@ class ResourceSpecializationSuite(unittest.TestCase):
             "directory-example documentation.", resource.get_documentation()
         )
         self.assertIsNone(resource.get_source())
+
+    def test_looppoint_pinpoints_resource(self) -> None:
+        """Tests the creation of LooppointCreatorCSVResource via a Looppoint
+        pinpoints csv file."""
+
+        resource = obtain_resource(
+            resource_name="looppoint-pinpoint-csv-resource",
+            resource_directory=self.get_resource_dir(),
+        )
+
+        self.assertIsInstance(resource, LooppointCsvResource)
+
+        # The LooppointCreatorCSVResource should be a subtype of
+        # LooppointCsvLoader.
+        self.assertIsInstance(resource, LooppointCsvLoader)
+
+        self.assertEquals(
+            "A looppoint pinpoints csv file.", resource.get_documentation()
+        )
+        self.assertIsNone(resource.get_source())
diff --git a/tests/pyunit/stdlib/resources/refs/resource-specialization.json b/tests/pyunit/stdlib/resources/refs/resource-specialization.json
index 01671b564b..bfe0d4a448 100644
--- a/tests/pyunit/stdlib/resources/refs/resource-specialization.json
+++ b/tests/pyunit/stdlib/resources/refs/resource-specialization.json
@@ -108,6 +108,15 @@
             "warmup_interval": 23445,
             "simpoint_list" : [2,3,4,15],
             "weight_list" : [0.1, 0.2, 0.4, 0.3]
+        },
+        {
+            "type": "looppoint-pinpoint-csv",
+            "name": "looppoint-pinpoint-csv-resource",
+            "documentation" : "A looppoint pinpoints csv file.",
+            "is_zipped" :  false,
+            "md5sum" : "199ab22dd463dc70ee2d034bfe045082",
+            "url": "http://dist.gem5.org/dist/develop/pinpoints/x86-matrix-multiply-omp-100-8-global-pinpoints-20230127",
+            "source" : null
         }
     ]
 }

From 5d0dd10cfa04967d8e6b9814f03a09b4b901a362 Mon Sep 17 00:00:00 2001
From: "Bobby R. Bruce" <bbruce@ucdavis.edu>
Date: Fri, 10 Feb 2023 21:01:34 +0000
Subject: [PATCH 219/492] stdlib: Add LooppointJsonResource resource

This resource wraps the LooppointJsonLoader class for use with gem5
resources.

Change-Id: Ic00d689c289330bab8564abc4c68a9047d5096e0
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67858
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
Tested-by: kokoro <noreply+kokoro@google.com>
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
---
 src/python/gem5/resources/resource.py         | 23 +++++++++++++++-
 .../pyunit_resource_specialization.py         | 27 ++++++++++++++++++-
 .../refs/resource-specialization.json         | 10 +++++++
 3 files changed, 58 insertions(+), 2 deletions(-)

diff --git a/src/python/gem5/resources/resource.py b/src/python/gem5/resources/resource.py
index 0cf58800f2..9cba9cf88f 100644
--- a/src/python/gem5/resources/resource.py
+++ b/src/python/gem5/resources/resource.py
@@ -31,7 +31,7 @@ from m5.util import warn, fatal
 
 from .downloader import get_resource, get_resources_json_obj
 
-from .looppoint import LooppointCsvLoader
+from .looppoint import LooppointCsvLoader, LooppointJsonLoader
 from ..isas import ISA, get_isa_from_str
 
 from typing import Optional, Dict, Union, Type, Tuple, List
@@ -415,6 +415,26 @@ class LooppointCsvResource(FileResource, LooppointCsvLoader):
         LooppointCsvLoader.__init__(self, pinpoints_file=Path(local_path))
 
 
+class LooppointJsonResource(FileResource, LooppointJsonLoader):
+    def __init__(
+        self,
+        local_path: str,
+        region_id: Optional[Union[str, int]] = None,
+        documentation: Optional[str] = None,
+        source: Optional[str] = None,
+        **kwargs,
+    ):
+        FileResource.__init__(
+            self,
+            local_path=local_path,
+            documentation=documentation,
+            source=source,
+        )
+        LooppointJsonLoader.__init__(
+            self, looppoint_file=local_path, region_id=region_id
+        )
+
+
 class SimpointDirectoryResource(SimpointResource):
     """A Simpoint diretory resource. This Simpoint Resource assumes the
     existance of a directory containing a simpoint file and a weight file."""
@@ -736,4 +756,5 @@ _get_resource_json_type_map = {
     "simpoint-directory": SimpointDirectoryResource,
     "resource": Resource,
     "looppoint-pinpoint-csv": LooppointCsvResource,
+    "looppoint-json": LooppointJsonResource,
 }
diff --git a/tests/pyunit/stdlib/resources/pyunit_resource_specialization.py b/tests/pyunit/stdlib/resources/pyunit_resource_specialization.py
index 5c60eb5c4a..660bf5f38f 100644
--- a/tests/pyunit/stdlib/resources/pyunit_resource_specialization.py
+++ b/tests/pyunit/stdlib/resources/pyunit_resource_specialization.py
@@ -29,7 +29,12 @@ import unittest
 from pathlib import Path
 
 from gem5.resources.resource import *
-from gem5.resources.looppoint import LooppointCsvLoader
+
+from gem5.resources.looppoint import (
+    LooppointCsvLoader,
+    LooppointJsonLoader,
+)
+
 from gem5.isas import ISA
 
 
@@ -256,3 +261,23 @@ class ResourceSpecializationSuite(unittest.TestCase):
             "A looppoint pinpoints csv file.", resource.get_documentation()
         )
         self.assertIsNone(resource.get_source())
+
+    def test_looppoint_json_restore_resource(self) -> None:
+        """Tests the creation of LooppointJsonResource via a
+        Looppoint JSON file."""
+
+        resource = obtain_resource(
+            resource_name="looppoint-json-restore-resource-region-1",
+            resource_directory=self.get_resource_dir(),
+        )
+
+        self.assertIsInstance(resource, LooppointJsonResource)
+        self.assertIsInstance(resource, LooppointJsonLoader)
+
+        self.assertEquals(1, len(resource.get_regions()))
+        self.assertTrue("1" in resource.get_regions())
+
+        self.assertEquals(
+            "A looppoint json file resource.", resource.get_documentation()
+        )
+        self.assertIsNone(resource.get_source())
diff --git a/tests/pyunit/stdlib/resources/refs/resource-specialization.json b/tests/pyunit/stdlib/resources/refs/resource-specialization.json
index bfe0d4a448..c4d5eb4714 100644
--- a/tests/pyunit/stdlib/resources/refs/resource-specialization.json
+++ b/tests/pyunit/stdlib/resources/refs/resource-specialization.json
@@ -117,6 +117,16 @@
             "md5sum" : "199ab22dd463dc70ee2d034bfe045082",
             "url": "http://dist.gem5.org/dist/develop/pinpoints/x86-matrix-multiply-omp-100-8-global-pinpoints-20230127",
             "source" : null
+        },
+        {
+            "type": "looppoint-json",
+            "name": "looppoint-json-restore-resource-region-1",
+            "documentation" : "A looppoint json file resource.",
+            "is_zipped" :  false,
+            "region_id" : "1",
+            "md5sum" : "a71ed64908b082ea619b26b940a643c1",
+            "url": "http://dist.gem5.org/dist/develop/looppoints/x86-matrix-multiply-omp-100-8-looppoint-json-20230128",
+            "source" : null
         }
     ]
 }

From ce516397dac6171ba06ffdd0bb8a581647ec321a Mon Sep 17 00:00:00 2001
From: "Bobby R. Bruce" <bbruce@ucdavis.edu>
Date: Wed, 22 Feb 2023 08:41:11 -0800
Subject: [PATCH 220/492] configs stdlib: Update checkpoint resource for
 riscv-hello

This change updates the riscv-hello-restore-checkpoint.py script's
checkpoint for one compatible with v23

Change-Id: Idee262491db45049d9afe69190bc8890d75c8cdf
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/68337
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 .../gem5_library/checkpoints/riscv-hello-restore-checkpoint.py  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/configs/example/gem5_library/checkpoints/riscv-hello-restore-checkpoint.py b/configs/example/gem5_library/checkpoints/riscv-hello-restore-checkpoint.py
index e112b76ddb..60a7dd0f59 100644
--- a/configs/example/gem5_library/checkpoints/riscv-hello-restore-checkpoint.py
+++ b/configs/example/gem5_library/checkpoints/riscv-hello-restore-checkpoint.py
@@ -90,7 +90,7 @@ board = SimpleBoard(
 board.set_se_binary_workload(
     # the workload should be the same as the save-checkpoint script
     Resource("riscv-hello"),
-    checkpoint=Resource("riscv-hello-example-checkpoint-v22-1"),
+    checkpoint=Resource("riscv-hello-example-checkpoint-v23"),
 )
 
 simulator = Simulator(

From 55348d062c9ff36538c9fee0c2e0867e0ccef4d9 Mon Sep 17 00:00:00 2001
From: "Bobby R. Bruce" <bbruce@ucdavis.edu>
Date: Wed, 22 Feb 2023 17:03:04 -0800
Subject: [PATCH 221/492] configs,stdlib: Update simpoint-se-restore checkpoint

This patch fixes the checkpoint resource for the simpoints-se-restore.py
script.

Change-Id: I29698844023c54fdc645c99da4a19c77bae58729
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/68338
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
Tested-by: kokoro <noreply+kokoro@google.com>
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
---
 .../example/gem5_library/checkpoints/simpoints-se-restore.py    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/configs/example/gem5_library/checkpoints/simpoints-se-restore.py b/configs/example/gem5_library/checkpoints/simpoints-se-restore.py
index 5ff82dba04..d063c143a7 100644
--- a/configs/example/gem5_library/checkpoints/simpoints-se-restore.py
+++ b/configs/example/gem5_library/checkpoints/simpoints-se-restore.py
@@ -119,7 +119,7 @@ board.set_se_simpoint_workload(
         weight_list=[0.1, 0.2, 0.4, 0.3],
         warmup_interval=1000000,
     ),
-    checkpoint=obtain_resource("simpoints-se-checkpoints-v22-1-v2"),
+    checkpoint=obtain_resource("simpoints-se-checkpoints-v23-0-v1"),
 )
 
 

From b4b024808ec21b006155f04852eb3f93877b2de9 Mon Sep 17 00:00:00 2001
From: "Bobby R. Bruce" <bbruce@ucdavis.edu>
Date: Wed, 22 Feb 2023 03:17:09 -0800
Subject: [PATCH 222/492] stdlib: Fix checkpoint setting through set_workload
 func

Due to a typo ('checkpoint_dir' instead of 'checkpoint'), setting
checkpoints via the set_workload functions was not working. This patch
fixes this error.

Change-Id: I5720406f2a01f166666e80079c1f84651f750fe2
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/68277
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/python/gem5/components/boards/se_binary_workload.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/python/gem5/components/boards/se_binary_workload.py b/src/python/gem5/components/boards/se_binary_workload.py
index dc5425754e..2558ce3cf2 100644
--- a/src/python/gem5/components/boards/se_binary_workload.py
+++ b/src/python/gem5/components/boards/se_binary_workload.py
@@ -111,16 +111,16 @@ class SEBinaryWorkload:
         # Set whether to exit on work items for the se_workload
         self.exit_on_work_items = exit_on_work_items
 
-        # Here we set `self._checkpoint_dir`. This is then used by the
+        # Here we set `self._checkpoint`. This is then used by the
         # Simulator module to setup checkpoints.
         if checkpoint:
             if isinstance(checkpoint, Path):
                 self._checkpoint = checkpoint
             elif isinstance(checkpoint, AbstractResource):
-                self._checkpoint_dir = Path(checkpoint.get_local_path())
+                self._checkpoint = Path(checkpoint.get_local_path())
             else:
                 raise Exception(
-                    "The checkpoint_dir must be None, Path, or "
+                    "The checkpoint must be None, Path, or "
                     "AbstractResource."
                 )
 

From 3bb19be083d38249d1e1652184d05b0e6406a660 Mon Sep 17 00:00:00 2001
From: "Bobby R. Bruce" <bbruce@ucdavis.edu>
Date: Sun, 19 Feb 2023 13:44:04 +0000
Subject: [PATCH 223/492] configs,stdlib: Add Workloads to Looppoint examples

Change-Id: I6a0eebb127ad8a6796c96390594868668424c9b4
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/68117
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 .../create-looppoint-checkpoints.py           | 14 ++++-------
 .../restore-looppoint-checkpoint.py           | 23 +++++--------------
 2 files changed, 10 insertions(+), 27 deletions(-)

diff --git a/configs/example/gem5_library/looppoints/create-looppoint-checkpoints.py b/configs/example/gem5_library/looppoints/create-looppoint-checkpoints.py
index 6c23d38c7d..abb15fb7f8 100644
--- a/configs/example/gem5_library/looppoints/create-looppoint-checkpoints.py
+++ b/configs/example/gem5_library/looppoints/create-looppoint-checkpoints.py
@@ -56,7 +56,7 @@ from gem5.components.memory.single_channel import SingleChannelDDR3_1600
 from gem5.components.processors.simple_processor import SimpleProcessor
 from gem5.components.processors.cpu_types import CPUTypes
 from gem5.isas import ISA
-from gem5.resources.resource import obtain_resource
+from gem5.resources.workload import Workload
 from pathlib import Path
 from gem5.simulate.exit_event_generators import (
     looppoint_save_checkpoint_generator,
@@ -110,13 +110,7 @@ board = SimpleBoard(
     cache_hierarchy=cache_hierarchy,
 )
 
-looppoint = obtain_resource("x86-matrix-multiply-omp-100-8-global-pinpoints")
-board.set_se_looppoint_workload(
-    binary=obtain_resource("x86-matrix-multiply-omp"),
-    arguments=[100, 8],
-    # Pass LoopPoint module into the board
-    looppoint=looppoint,
-)
+board.set_workload(Workload("x86-matrix-multiply-omp-100-8-looppoint-csv"))
 
 dir = Path(args.checkpoint_path)
 dir.mkdir(exist_ok=True)
@@ -126,7 +120,7 @@ simulator = Simulator(
     on_exit_event={
         ExitEvent.SIMPOINT_BEGIN: looppoint_save_checkpoint_generator(
             checkpoint_dir=dir,
-            looppoint=looppoint,
+            looppoint=board.get_looppoint(),
             # True if the relative PC count pairs should be updated during the
             # simulation. Default as True.
             update_relatives=True,
@@ -141,4 +135,4 @@ simulator = Simulator(
 simulator.run()
 
 # Output the JSON file
-looppoint.output_json_file()
+board.get_looppoint().output_json_file()
diff --git a/configs/example/gem5_library/looppoints/restore-looppoint-checkpoint.py b/configs/example/gem5_library/looppoints/restore-looppoint-checkpoint.py
index c54fdabca1..21353a34a1 100644
--- a/configs/example/gem5_library/looppoints/restore-looppoint-checkpoint.py
+++ b/configs/example/gem5_library/looppoints/restore-looppoint-checkpoint.py
@@ -54,8 +54,7 @@ from gem5.components.processors.simple_processor import SimpleProcessor
 from gem5.components.processors.cpu_types import CPUTypes
 from gem5.isas import ISA
 from gem5.resources.resource import obtain_resource
-from pathlib import Path
-from gem5.utils.resource import LooppointJsonLoader
+from gem5.resources.workload import Workload
 from m5.stats import reset, dump
 
 requires(isa_required=ISA.X86)
@@ -113,24 +112,17 @@ board = SimpleBoard(
     cache_hierarchy=cache_hierarchy,
 )
 
-looppoint = LooppointJsonLoader(
-    looppoint_file=Path(
-        obtain_resource(
-            "x86-matrix-multiply-omp-100-8-looppoint"
-        ).get_local_path()
-    ),
-    region_id=args.checkpoint_region,
-)
-
-board.set_se_looppoint_workload(
-    binary=obtain_resource("x86-matrix-multiply-omp"), looppoint=looppoint
+board.set_workload(
+    Workload(
+        f"x86-matrix-multiply-omp-100-8-looppoint-region-{args.checkpoint_region}"
+    )
 )
 
 # This generator will dump the stats and exit the simulation loop when the
 # simulation region reaches its end. In the case there is a warmup interval,
 # the simulation stats are reset after the warmup is complete.
 def reset_and_dump():
-    if len(looppoint.get_targets()) > 1:
+    if len(board.get_looppoint().get_targets()) > 1:
         print("Warmup region ended. Resetting stats.")
         reset()
         yield False
@@ -141,9 +133,6 @@ def reset_and_dump():
 
 simulator = Simulator(
     board=board,
-    checkpoint_path=obtain_resource(
-        f"x86-matrix-multiply-omp-100-8-looppoint-checkpoint-region-{args.checkpoint_region}"
-    ).get_local_path(),
     on_exit_event={ExitEvent.SIMPOINT_BEGIN: reset_and_dump()},
 )
 

From 9fb5ce5cd3a425dffcde18caed36f428afdf3cbd Mon Sep 17 00:00:00 2001
From: Roger Chang <rogerycchang@google.com>
Date: Tue, 21 Feb 2023 11:45:44 +0800
Subject: [PATCH 224/492] arch-riscv,dev: Fix behavior issues of PLIC

1. Fix reserved size between enable memory map and threshold memory
map. The number of enablePadding should be the number of context in
PLIC
2. writePriority to memory should update

Change-Id: Ib4b7e5ecd183863e140c4f3382a75057902d446d
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/68198
Reviewed-by: Ayaz Akram <yazakram@ucdavis.edu>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Yu-hsin Wang <yuhsingw@google.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
---
 src/dev/riscv/plic.cc | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/dev/riscv/plic.cc b/src/dev/riscv/plic.cc
index b8f765a17c..371af9e78a 100644
--- a/src/dev/riscv/plic.cc
+++ b/src/dev/riscv/plic.cc
@@ -203,7 +203,7 @@ Plic::PlicRegisters::init()
         - plic->nSrc32 * 4;
     reserved.emplace_back("reserved1", reserve1_size);
     const size_t reserve2_size = thresholdStart - enableStart
-        - plic->nSrc32 * plic->nContext * enablePadding;
+        - plic->nContext * enablePadding;
     reserved.emplace_back("reserved2", reserve2_size);
     const size_t reserve3_size = plic->pioSize - thresholdStart
         - plic->nContext * thresholdPadding;
@@ -333,6 +333,8 @@ void
 Plic::writeThreshold(Register32& reg, const uint32_t& data,
     const int context_id)
 {
+    reg.update(data);
+
     DPRINTF(Plic,
         "Threshold updated - context: %d, val: %d\n",
         context_id, reg.get());

From 379da2474b42c6aedfcc0a34367d41206efe1d4e Mon Sep 17 00:00:00 2001
From: Jason Lowe-Power <jason@lowepower.com>
Date: Tue, 14 Feb 2023 10:48:38 -0800
Subject: [PATCH 225/492] cpu: Add fatal in BaseCPU for wrong workloads

The CPU models assume that the number of workloads (Processes) is equal
to the number of threads when using SE mode. This wasn't checked leading
to a segfault if there were no workloads. This change makes the error
more clear.

Change-Id: I9a7b21112b8f819c6eeca944ee0d73ae9ce9a57b
Signed-off-by: Jason Lowe-Power <jason@lowepower.com>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67937
Reviewed-by: Ayaz Akram <yazakram@ucdavis.edu>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/cpu/base.cc | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/src/cpu/base.cc b/src/cpu/base.cc
index 60d443af8c..d2c0a78d44 100644
--- a/src/cpu/base.cc
+++ b/src/cpu/base.cc
@@ -182,6 +182,12 @@ BaseCPU::BaseCPU(const Params &p, bool is_checker)
               "of threads (%i).\n", params().isa.size(), numThreads);
     }
 
+    if (!FullSystem && params().workload.size() != numThreads) {
+        fatal("Number of processes (cpu.workload) (%i) assigned to the CPU "
+              "does not equal number of threads (%i).\n",
+              params().workload.size(), numThreads);
+    }
+
     modelResetPort.onChange([this](const bool &new_val) {
         setReset(new_val);
     });

From 30200051a300beb36e6e7842c93576771420a35a Mon Sep 17 00:00:00 2001
From: Jason Lowe-Power <jason@lowepower.com>
Date: Wed, 15 Feb 2023 18:23:24 -0800
Subject: [PATCH 226/492] arch-x86,sim-se: Ignore the some mem syscalls

This makes the Linux SE mode ignore mlockall and modify_ldt. It is
needed to get ELFies working.

Change-Id: I9fce3c6a5531e5f1bb094c2d0587fa330d2892a9
Signed-off-by: Jason Lowe-Power <jason@lowepower.com>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/68037
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/arch/x86/linux/syscall_tbl64.cc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/arch/x86/linux/syscall_tbl64.cc b/src/arch/x86/linux/syscall_tbl64.cc
index 1e7274cc42..26299d884b 100644
--- a/src/arch/x86/linux/syscall_tbl64.cc
+++ b/src/arch/x86/linux/syscall_tbl64.cc
@@ -194,10 +194,10 @@ SyscallDescTable<EmuLinux::SyscallABI64> EmuLinux::syscallDescs64 = {
     { 148, "sched_rr_get_interval" },
     { 149, "mlock" },
     { 150, "munlock" },
-    { 151, "mlockall" },
+    { 151, "mlockall", ignoreFunc },
     { 152, "munlockall" },
     { 153, "vhangup" },
-    { 154, "modify_ldt" },
+    { 154, "modify_ldt", ignoreFunc },
     { 155, "pivot_root" },
     { 156, "_sysctl" },
     { 157, "prctl", ignoreFunc },

From cd35c9a6194451952735f58fb09cb7983e5861ba Mon Sep 17 00:00:00 2001
From: Jason Lowe-Power <jason@lowepower.com>
Date: Fri, 17 Feb 2023 17:11:36 -0800
Subject: [PATCH 227/492] stdlib: Add support for ELFies

This enables the stdlib to load and run ELFie-based binaries

See https://github.com/intel/pinball2elf for more details on ELFies

Change-Id: Ic1b624df64da1c77afc0907257a9e989488912ec
Signed-off-by: Jason Lowe-Power <jason@lowepower.com>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/68038
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/python/SConscript                         |  1 +
 .../components/boards/se_binary_workload.py   | 32 +++++++++
 src/python/gem5/resources/elfie.py            | 67 +++++++++++++++++++
 3 files changed, 100 insertions(+)
 create mode 100644 src/python/gem5/resources/elfie.py

diff --git a/src/python/SConscript b/src/python/SConscript
index f401c03468..900723b0cf 100644
--- a/src/python/SConscript
+++ b/src/python/SConscript
@@ -263,6 +263,7 @@ PySource('gem5.resources', 'gem5/resources/md5_utils.py')
 PySource('gem5.resources', 'gem5/resources/resource.py')
 PySource('gem5.resources', 'gem5/resources/workload.py')
 PySource('gem5.resources', 'gem5/resources/looppoint.py')
+PySource('gem5.resources', 'gem5/resources/elfie.py')
 PySource('gem5.utils', 'gem5/utils/__init__.py')
 PySource('gem5.utils', 'gem5/utils/filelock.py')
 PySource('gem5.utils', 'gem5/utils/override.py')
diff --git a/src/python/gem5/components/boards/se_binary_workload.py b/src/python/gem5/components/boards/se_binary_workload.py
index 2558ce3cf2..23be81b03d 100644
--- a/src/python/gem5/components/boards/se_binary_workload.py
+++ b/src/python/gem5/components/boards/se_binary_workload.py
@@ -35,6 +35,7 @@ from ...resources.resource import (
     SimpointDirectoryResource,
 )
 
+from gem5.resources.elfie import ELFieInfo
 from gem5.resources.looppoint import Looppoint
 
 from m5.objects import SEWorkload, Process
@@ -210,6 +211,37 @@ class SEBinaryWorkload:
             checkpoint=checkpoint,
         )
 
+    def set_se_elfie_workload(
+        self,
+        elfie: AbstractResource,
+        elfie_info: ELFieInfo,
+        arguments: List[str] = [],
+        checkpoint: Optional[Union[Path, AbstractResource]] = None,
+    ) -> None:
+        """Set up the system to run a ELFie workload.
+
+        **Limitations**
+        * Dynamically linked executables are partially supported when the host
+          ISA and the simulated ISA are the same.
+
+        :param elfie: The resource encapsulating the binary elfie to be run.
+        :param elfie_info: The ELFieInfo object that contain all the
+        information for the ELFie
+        :param arguments: The input arguments for the binary
+        """
+
+        assert isinstance(elfie_info, ELFieInfo)
+        self._elfie_info_object = elfie_info
+
+        self._elfie_info_object.setup_processor(self.get_processor())
+
+        # Call set_se_binary_workload after LoopPoint setup is complete
+        self.set_se_binary_workload(
+            binary=elfie,
+            arguments=arguments,
+            checkpoint=checkpoint,
+        )
+
     def get_looppoint(self) -> Looppoint:
         """
         Returns the LoopPoint object set. If no LoopPoint object has been set
diff --git a/src/python/gem5/resources/elfie.py b/src/python/gem5/resources/elfie.py
new file mode 100644
index 0000000000..ae51388d62
--- /dev/null
+++ b/src/python/gem5/resources/elfie.py
@@ -0,0 +1,67 @@
+# Copyright (c) 2023 The Regents of the University of California
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from m5.params import PcCountPair
+from m5.objects import PcCountTrackerManager
+
+from typing import List
+
+
+class ELFieInfo:
+    """Stores information to load/run ELFies
+
+    See https://github.com/intel/pinball2elf for more information
+    """
+
+    def __init__(self, start: PcCountPair, end: PcCountPair):
+        self._start = start
+        self._end = end
+        self._manager = PcCountTrackerManager()
+        self._manager.targets = self.get_targets()
+
+    def setup_processor(
+        self,
+        processor: "AbstractProcessor",
+    ) -> None:
+        """
+        A function is used to setup a PC tracker in all the cores and
+        connect all the tracker to the PC tracker manager to perform
+        multithread PC tracking.
+        :param processor: The processor used in the simulation configuration.
+        """
+        for core in processor.get_cores():
+            core.add_pc_tracker_probe(self.get_targets(), self.get_manager())
+
+    def get_targets(self) -> List[PcCountPair]:
+        """Returns the complete list of targets PcCountPairs. That is, the
+        PcCountPairs each region starts with as well as the relevant warmup
+        intervals."""
+        return [self._start, self._end]
+
+    def get_manager(self) -> PcCountTrackerManager:
+        """Returns the PcCountTrackerManager for this ELFie data
+        structure."""
+        return self._manager

From a6048f2fe209f29c07238d0fca406fe0de82585e Mon Sep 17 00:00:00 2001
From: Jason Lowe-Power <jason@lowepower.com>
Date: Thu, 26 Jan 2023 18:00:24 -0800
Subject: [PATCH 228/492] stdlib: Add progress bars for long functions

This adds a progress bar for downloading large files and computing
md5sums on large files.

Change-Id: Iddc9faf61e861837cc1e2e3b3dbdbeebd6ccf529
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67472
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Melissa Jost <melissakjost@gmail.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
---
 optional-requirements.txt               |  1 +
 src/python/SConscript                   |  1 +
 src/python/gem5/resources/downloader.py | 25 ++++++--
 src/python/gem5/resources/md5_utils.py  | 19 ++++++-
 src/python/gem5/utils/progress_bar.py   | 76 +++++++++++++++++++++++++
 5 files changed, 116 insertions(+), 6 deletions(-)
 create mode 100644 optional-requirements.txt
 create mode 100644 src/python/gem5/utils/progress_bar.py

diff --git a/optional-requirements.txt b/optional-requirements.txt
new file mode 100644
index 0000000000..f88787df1f
--- /dev/null
+++ b/optional-requirements.txt
@@ -0,0 +1 @@
+tqdm==4.64.1
diff --git a/src/python/SConscript b/src/python/SConscript
index 900723b0cf..b0f11ddc73 100644
--- a/src/python/SConscript
+++ b/src/python/SConscript
@@ -267,6 +267,7 @@ PySource('gem5.resources', 'gem5/resources/elfie.py')
 PySource('gem5.utils', 'gem5/utils/__init__.py')
 PySource('gem5.utils', 'gem5/utils/filelock.py')
 PySource('gem5.utils', 'gem5/utils/override.py')
+PySource('gem5.utils', 'gem5/utils/progress_bar.py')
 PySource('gem5.utils', 'gem5/utils/requires.py')
 PySource('gem5.utils.multiprocessing',
     'gem5/utils/multiprocessing/__init__.py')
diff --git a/src/python/gem5/resources/downloader.py b/src/python/gem5/resources/downloader.py
index 4a2ed5d332..24b8970cc0 100644
--- a/src/python/gem5/resources/downloader.py
+++ b/src/python/gem5/resources/downloader.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2021 The Regents of the University of California
+# Copyright (c) 2021-2023 The Regents of the University of California
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -42,6 +42,7 @@ from urllib.error import HTTPError
 from typing import List, Dict, Set, Optional
 
 from .md5_utils import md5_file, md5_dir
+from ..utils.progress_bar import tqdm, progress_hook
 
 from ..utils.filelock import FileLock
 
@@ -286,10 +287,26 @@ def _download(url: str, download_to: str, max_attempts: int = 6) -> None:
                 # get the file as a bytes blob
                 request = urllib.request.Request(url)
                 with urllib.request.urlopen(request, context=ctx) as fr:
-                    with open(download_to, "wb") as fw:
-                        fw.write(fr.read())
+                    with tqdm.wrapattr(
+                        open(download_to, "wb"),
+                        "write",
+                        miniters=1,
+                        desc="Downloading {download_to}",
+                        total=getattr(fr, "length", None),
+                    ) as fw:
+                        for chunk in fr:
+                            fw.write(chunk)
             else:
-                urllib.request.urlretrieve(url, download_to)
+                with tqdm(
+                    unit="B",
+                    unit_scale=True,
+                    unit_divisor=1024,
+                    miniters=1,
+                    desc=f"Downloading {download_to}",
+                ) as t:
+                    urllib.request.urlretrieve(
+                        url, download_to, reporthook=progress_hook(t)
+                    )
             return
         except HTTPError as e:
             # If the error code retrieved is retryable, we retry using a
diff --git a/src/python/gem5/resources/md5_utils.py b/src/python/gem5/resources/md5_utils.py
index d7212ab83f..f4a1a87df5 100644
--- a/src/python/gem5/resources/md5_utils.py
+++ b/src/python/gem5/resources/md5_utils.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2022 The Regents of the University of California
+# Copyright (c) 2022-2023 The Regents of the University of California
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -31,7 +31,22 @@ from _hashlib import HASH as Hash
 
 def _md5_update_from_file(filename: Path, hash: Hash) -> Hash:
     assert filename.is_file()
-    with open(str(filename), "rb") as f:
+
+    if filename.stat().st_size < 1024 * 1024 * 100:
+        from ..utils.progress_bar import FakeTQDM
+
+        # if the file is less than 100MB, no need to show a progress bar.
+        tqdm = FakeTQDM()
+    else:
+        from ..utils.progress_bar import tqdm
+
+    with tqdm.wrapattr(
+        open(str(filename), "rb"),
+        "read",
+        miniters=1,
+        desc=f"Computing md5sum on {filename}",
+        total=filename.stat().st_size,
+    ) as f:
         for chunk in iter(lambda: f.read(4096), b""):
             hash.update(chunk)
     return hash
diff --git a/src/python/gem5/utils/progress_bar.py b/src/python/gem5/utils/progress_bar.py
new file mode 100644
index 0000000000..0ac13200b9
--- /dev/null
+++ b/src/python/gem5/utils/progress_bar.py
@@ -0,0 +1,76 @@
+# Copyright (c) 2023 The Regents of the University of California
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+class FakeTQDM:
+    """This is a fake wrapper so that the tqdm calls work whether or not it
+    has been installed.
+    """
+
+    def __call__(*args, **kwargs):
+        if args:
+            return args[0]
+        return kwargs.get("iterable", None)
+
+    def wrapattr(self, *args, **kwargs):
+        if args:
+            return args[0]
+        return kwargs.get("iterable", None)
+
+    def __enter__(self):
+        pass
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        pass
+
+
+try:
+    from tqdm.auto import tqdm
+
+    _have_tqdm = True
+except ImportError:
+    tqdm = FakeTQDM()
+    _have_tqdm = False
+
+# Hook for the progress bar
+def progress_hook(t):
+    if not _have_tqdm:
+        # Takes 3 arguments
+        return lambda a, b, c: None
+
+    last_b = [0]
+
+    def update_to(b=1, bsize=1, tsize=None):
+        if tsize not in (None, -1):
+            t.total = tsize
+        displayed = t.update((b - last_b[0]) * bsize)
+        last_b[0] = b
+        return displayed
+
+    return update_to
+
+
+__all__ = [tqdm, progress_hook, FakeTQDM]

From 65a678c75b2536f2cb5069fdc54aaf0f10528955 Mon Sep 17 00:00:00 2001
From: Tom Rollet <tom.rollet@huawei.com>
Date: Fri, 24 Feb 2023 16:29:37 +0100
Subject: [PATCH 229/492] cpu-o3: fix false positive in AddressSanitizer

AddressSanitizer found a new-delete-type-mismatch because of
the custom new operator for DynInst.
Adding a custom delete operator for DynInstPtr fixes this issue.
It has been fixed the same way in Mozilla:
https://bugzilla.mozilla.org/show_bug.cgi?id=1391500

Change-Id: I0ab4cb6d79cac88069cc2374a1deb499cdb15f02
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/68357
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/cpu/o3/dyn_inst.cc | 9 +++++++++
 src/cpu/o3/dyn_inst.hh | 1 +
 2 files changed, 10 insertions(+)

diff --git a/src/cpu/o3/dyn_inst.cc b/src/cpu/o3/dyn_inst.cc
index 0b9a900446..94433cf433 100644
--- a/src/cpu/o3/dyn_inst.cc
+++ b/src/cpu/o3/dyn_inst.cc
@@ -187,6 +187,15 @@ DynInst::operator new(size_t count, Arrays &arrays)
     return buf;
 }
 
+// Because of the custom "new" operator that allocates more bytes than the
+// size of the DynInst object, AddressSanitizer throw new-delete-type-mismatch.
+// Adding a custom delete function is enough to shut down this false positive
+void
+DynInst::operator delete(void *ptr)
+{
+    ::operator delete(ptr);
+}
+
 DynInst::~DynInst()
 {
     /*
diff --git a/src/cpu/o3/dyn_inst.hh b/src/cpu/o3/dyn_inst.hh
index ab165bbcd5..54c0385374 100644
--- a/src/cpu/o3/dyn_inst.hh
+++ b/src/cpu/o3/dyn_inst.hh
@@ -95,6 +95,7 @@ class DynInst : public ExecContext, public RefCounted
     };
 
     static void *operator new(size_t count, Arrays &arrays);
+    static void  operator delete(void* ptr);
 
     /** BaseDynInst constructor given a binary instruction. */
     DynInst(const Arrays &arrays, const StaticInstPtr &staticInst,

From 220995725616b2605692994f660a248ce6044aa2 Mon Sep 17 00:00:00 2001
From: Roger Chang <rogerycchang@google.com>
Date: Tue, 21 Feb 2023 11:27:23 +0800
Subject: [PATCH 230/492] arch-riscv,dev: Add PLIC abstract class to support
 multiple PLIC implementation

We should create PLIC abstract and have common interface to let
HiFive platform send and clear interrupt to variable type of PLIC

Change-Id: Ic3a2ffc2a2a002540b400c70c85c3495fa838f2a
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/68197
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Reviewed-by: Yu-hsin Wang <yuhsingw@google.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/dev/riscv/Plic.py    | 20 ++++++++++++++++++--
 src/dev/riscv/SConscript |  3 ++-
 src/dev/riscv/plic.cc    |  3 ++-
 src/dev/riscv/plic.hh    | 22 +++++++++++++++++++---
 4 files changed, 41 insertions(+), 7 deletions(-)

diff --git a/src/dev/riscv/Plic.py b/src/dev/riscv/Plic.py
index 33b6940c3f..b4486b9350 100644
--- a/src/dev/riscv/Plic.py
+++ b/src/dev/riscv/Plic.py
@@ -1,4 +1,5 @@
 # Copyright (c) 2021 Huawei International
+# Copyright (c) 2023 Google LLC
 # All rights reserved.
 #
 # The license below extends only to copyright in the software and shall
@@ -39,7 +40,22 @@ from m5.proxy import *
 from m5.util.fdthelper import *
 
 
-class Plic(BasicPioDevice):
+class PlicBase(BasicPioDevice):
+    """
+    This is abstract class of PLIC and
+    define interface to handle received
+    interrupt singal from device
+    """
+
+    type = "PlicBase"
+    cxx_header = "dev/riscv/plic.hh"
+    cxx_class = "gem5::PlicBase"
+    abstract = True
+
+    pio_size = Param.Addr("PIO Size")
+
+
+class Plic(PlicBase):
     """
     This implementation of PLIC is based on
     the SiFive U54MC datasheet:
@@ -51,7 +67,7 @@ class Plic(BasicPioDevice):
     type = "Plic"
     cxx_header = "dev/riscv/plic.hh"
     cxx_class = "gem5::Plic"
-    pio_size = Param.Addr(0x4000000, "PIO Size")
+    pio_size = 0x4000000
     n_src = Param.Int("Number of interrupt sources")
     n_contexts = Param.Int(
         "Number of interrupt contexts. Usually the number "
diff --git a/src/dev/riscv/SConscript b/src/dev/riscv/SConscript
index af0b96b88e..6e3376bb02 100755
--- a/src/dev/riscv/SConscript
+++ b/src/dev/riscv/SConscript
@@ -2,6 +2,7 @@
 
 # Copyright (c) 2021 Huawei International
 # Copyright (c) 2022 EXAscale Performance SYStems (EXAPSYS)
+# Copyright (c) 2023 Google LLC
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -34,7 +35,7 @@ SimObject('HiFive.py', sim_objects=['HiFive', 'GenericRiscvPciHost'],
 SimObject('LupV.py', sim_objects=['LupV'], tags='riscv isa')
 SimObject('Clint.py', sim_objects=['Clint'], tags='riscv isa')
 SimObject('PlicDevice.py', sim_objects=['PlicIntDevice'], tags='riscv isa')
-SimObject('Plic.py', sim_objects=['Plic'], tags='riscv isa')
+SimObject('Plic.py', sim_objects=['PlicBase', 'Plic'], tags='riscv isa')
 SimObject('RTC.py', sim_objects=['RiscvRTC'], tags='riscv isa')
 SimObject('RiscvVirtIOMMIO.py', sim_objects=['RiscvMmioVirtIO'],
     tags='riscv isa')
diff --git a/src/dev/riscv/plic.cc b/src/dev/riscv/plic.cc
index 371af9e78a..fd42920dc5 100644
--- a/src/dev/riscv/plic.cc
+++ b/src/dev/riscv/plic.cc
@@ -45,6 +45,7 @@
 #include "mem/packet.hh"
 #include "mem/packet_access.hh"
 #include "params/Plic.hh"
+#include "params/PlicBase.hh"
 #include "sim/system.hh"
 
 namespace gem5
@@ -53,7 +54,7 @@ namespace gem5
 using namespace RiscvISA;
 
 Plic::Plic(const Params &params) :
-    BasicPioDevice(params, params.pio_size),
+    PlicBase(params),
     system(params.system),
     nSrc(params.n_src),
     nContext(params.n_contexts),
diff --git a/src/dev/riscv/plic.hh b/src/dev/riscv/plic.hh
index d077e73617..00128ee56c 100644
--- a/src/dev/riscv/plic.hh
+++ b/src/dev/riscv/plic.hh
@@ -1,5 +1,6 @@
 /*
  * Copyright (c) 2021 Huawei International
+ * Copyright (c) 2023 Google LLC
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -47,6 +48,7 @@
 #include "mem/packet.hh"
 #include "mem/packet_access.hh"
 #include "params/Plic.hh"
+#include "params/PlicBase.hh"
 #include "sim/system.hh"
 
 namespace gem5
@@ -94,7 +96,21 @@ struct PlicOutput
   std::vector<uint32_t> maxPriority;
 };
 
-class Plic : public BasicPioDevice
+class PlicBase : public BasicPioDevice
+{
+  public:
+    typedef PlicBaseParams Params;
+    PlicBase(const Params &params) :
+      BasicPioDevice(params, params.pio_size)
+    {}
+
+    // Interrupt interface to send signal to PLIC
+    virtual void post(int src_id) = 0;
+    // Interrupt interface to clear signal to PLIC
+    virtual void clear(int src_id) = 0;
+};
+
+class Plic : public PlicBase
 {
   // Params
   protected:
@@ -125,8 +141,8 @@ class Plic : public BasicPioDevice
     /**
      * Interrupt interface
      */
-    void post(int src_id);
-    void clear(int src_id);
+    void post(int src_id) override;
+    void clear(int src_id) override;
 
     /**
      * SimObject functions

From e6604bf1097ea7af4bfc54dbda9c2db1b5561ed8 Mon Sep 17 00:00:00 2001
From: Roger Chang <rogerycchang@google.com>
Date: Tue, 21 Feb 2023 12:02:37 +0800
Subject: [PATCH 231/492] arch-riscv,dev: Add HiFive Base Platform

This is basic abstract platform and all of RISC-V system should
use platform inherit from HiFiveBase, HiFiveBase declared the common
way to handle interrupt.

Change-Id: I52122e1c82c200d7e6012433c2535c07d427f637
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/68199
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Reviewed-by: Yu-hsin Wang <yuhsingw@google.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/dev/riscv/HiFive.py  | 154 ++++++++++++++++++++++-----------------
 src/dev/riscv/SConscript |   2 +-
 src/dev/riscv/hifive.cc  |  19 +++--
 src/dev/riscv/hifive.hh  |  11 +--
 4 files changed, 107 insertions(+), 79 deletions(-)

diff --git a/src/dev/riscv/HiFive.py b/src/dev/riscv/HiFive.py
index 466968602b..5bd6363363 100755
--- a/src/dev/riscv/HiFive.py
+++ b/src/dev/riscv/HiFive.py
@@ -1,5 +1,6 @@
 # Copyright (c) 2021 Huawei International
 # Copyright (c) 2022 EXAscale Performance SYStems (EXAPSYS)
+# Copyright (c) 2023 Google LLC
 # All rights reserved.
 #
 # The license below extends only to copyright in the software and shall
@@ -63,24 +64,14 @@ class GenericRiscvPciHost(GenericPciHost):
     _dma_coherent = True
 
 
-class HiFive(Platform):
-    """HiFive Platform
+class HiFiveBase(Platform):
+    """HiFive Base Abstract Platform
 
     Implementation:
         This is the base class for SiFive's HiFive
         board series. It contains the CLINT and PLIC
         interrupt controllers, Uart and Disk.
 
-        Implementation details are based on SiFive
-        FU540-C000. https://sifive.cdn.prismic.io/
-        sifive/b5e7a29c-d3c2-44ea-85fb-acc1df282e2
-        1_FU540-C000-v1p3.pdf
-
-    Setup:
-        The following sections outline the required
-        setup for a RISC-V HiFive platform. See
-        configs/example/riscv/fs_linux.py for example.
-
     Driving CLINT:
         CLINT has an interrupt pin which increments
         mtime. It can be connected to any interrupt
@@ -88,7 +79,7 @@ class HiFive(Platform):
         abstract RTC wrapper called RiscvRTC can be
         used.
 
-    Attaching PLIC devices:
+    Driving PLIC:
         PLIC handles external interrupts. Interrupt
         PioDevices should inherit from PlicIntDevice
         (PCI and DMA not yet implemented). It contains
@@ -96,63 +87,30 @@ class HiFive(Platform):
         to call platform->postPciInt(id).
 
         All PLIC interrupt devices should be returned
-        by _off_chip_devices(). Calling attachPlic sets
-        up the PLIC interrupt source count.
-
-    Uart:
-        The HiFive platform also has an uart_int_id.
-        This is because Uart8250 uses postConsoleInt
-        instead of postPciInt. In the future if a Uart
-        that inherits PlicIntDevice is implemented,
-        this can be removed.
-
-    Disk:
-        See fs_linux.py for setup example.
-
-    PMAChecker:
-        The PMAChecker will be attached to the MMU of
-        each CPU (which allows them to differ). See
-        fs_linux.py for setup example.
+        by _off_chip_devices().
     """
 
-    type = "HiFive"
+    type = "HiFiveBase"
     cxx_header = "dev/riscv/hifive.hh"
-    cxx_class = "gem5::HiFive"
+    cxx_class = "gem5::HiFiveBase"
 
     # CLINT
-    clint = Param.Clint(Clint(pio_addr=0x2000000), "CLINT")
+    clint = Param.Clint(NULL, "CLINT")
 
     # PLIC
-    plic = Param.Plic(Plic(pio_addr=0xC000000), "PLIC")
+    plic = Param.PlicBase(NULL, "PLIC")
 
-    # PCI
-    pci_host = GenericRiscvPciHost(
-        conf_base=0x30000000,
-        conf_size="256MB",
-        conf_device_bits=12,
-        pci_pio_base=0x2F000000,
-        pci_mem_base=0x40000000,
-    )
-
-    # Uart
-    uart = RiscvUart8250(pio_addr=0x10000000)
     # Int source ID to redirect console interrupts to
     # Set to 0 if using a pci interrupt for Uart instead
-    uart_int_id = Param.Int(0xA, "PLIC Uart interrupt ID")
-    terminal = Terminal()
+    uart_int_id = Param.Int(0, "PLIC Uart interrupt ID")
 
     def _on_chip_devices(self):
         """Returns a list of on-chip peripherals"""
-        return [self.clint, self.plic]
+        return []
 
     def _off_chip_devices(self):
         """Returns a list of off-chip peripherals"""
-        devices = [self.uart]
-        if hasattr(self, "disk"):
-            devices.append(self.disk)
-        if hasattr(self, "rng"):
-            devices.append(self.rng)
-        return devices
+        return []
 
     def _on_chip_ranges(self):
         """Returns a list of on-chip peripherals
@@ -172,17 +130,6 @@ class HiFive(Platform):
             for dev in self._off_chip_devices()
         ]
 
-    def attachPlic(self):
-        """Count number of PLIC interrupt sources"""
-        plic_srcs = [
-            self.uart_int_id,
-            self.pci_host.int_base + self.pci_host.int_count,
-        ]
-        for device in self._off_chip_devices():
-            if hasattr(device, "interrupt_id"):
-                plic_srcs.append(device.interrupt_id)
-        self.plic.n_src = max(plic_srcs) + 1
-
     def attachOnChipIO(self, bus):
         """Attach on-chip IO devices, needs modification
         to support DMA
@@ -197,6 +144,83 @@ class HiFive(Platform):
         for device in self._off_chip_devices():
             device.pio = bus.mem_side_ports
 
+
+class HiFive(HiFiveBase):
+    """HiFive Platform
+
+    Implementation:
+        Implementation details are based on SiFive
+        FU540-C000. https://sifive.cdn.prismic.io/
+        sifive/b5e7a29c-d3c2-44ea-85fb-acc1df282e2
+        1_FU540-C000-v1p3.pdf
+
+    Setup:
+        The following sections outline the required
+        setup for a RISC-V HiFive platform. See
+        configs/example/riscv/fs_linux.py for example.
+
+    Uart:
+        The HiFive platform also has an uart_int_id.
+        This is because Uart8250 uses postConsoleInt
+        instead of postPciInt. In the future if a Uart
+        that inherits PlicIntDevice is implemented,
+        this can be removed.
+
+    Disk:
+        See fs_linux.py for setup example.
+
+    PMAChecker:
+        The PMAChecker will be attached to the MMU of
+        each CPU (which allows them to differ). See
+        fs_linux.py for setup example.
+    """
+
+    # CLINT
+    clint = Clint(pio_addr=0x2000000)
+
+    # PLIC
+    plic = Plic(pio_addr=0xC000000)
+
+    # PCI
+    pci_host = GenericRiscvPciHost(
+        conf_base=0x30000000,
+        conf_size="256MB",
+        conf_device_bits=12,
+        pci_pio_base=0x2F000000,
+        pci_mem_base=0x40000000,
+    )
+
+    # Uart
+    uart = RiscvUart8250(pio_addr=0x10000000)
+    # Int source ID to redirect console interrupts to
+    # Set to 0 if using a pci interrupt for Uart instead
+    uart_int_id = 0xA
+    terminal = Terminal()
+
+    def _on_chip_devices(self):
+        """Returns a list of on-chip peripherals"""
+        return [self.clint, self.plic]
+
+    def _off_chip_devices(self):
+        """Returns a list of off-chip peripherals"""
+        devices = [self.uart]
+        if hasattr(self, "disk"):
+            devices.append(self.disk)
+        if hasattr(self, "rng"):
+            devices.append(self.rng)
+        return devices
+
+    def attachPlic(self):
+        """Count and set number of PLIC interrupt sources"""
+        plic_srcs = [
+            self.uart_int_id,
+            self.pci_host.int_base + self.pci_host.int_count,
+        ]
+        for device in self._off_chip_devices():
+            if hasattr(device, "interrupt_id"):
+                plic_srcs.append(device.interrupt_id)
+        self.plic.n_src = max(plic_srcs) + 1
+
     def setNumCores(self, num_cpu):
         """Sets the PLIC and CLINT to have the right number of threads and
         contexts. Assumes that the cores have a single hardware thread.
diff --git a/src/dev/riscv/SConscript b/src/dev/riscv/SConscript
index 6e3376bb02..be5ff8defe 100755
--- a/src/dev/riscv/SConscript
+++ b/src/dev/riscv/SConscript
@@ -30,7 +30,7 @@
 
 Import('*')
 
-SimObject('HiFive.py', sim_objects=['HiFive', 'GenericRiscvPciHost'],
+SimObject('HiFive.py', sim_objects=['HiFiveBase', 'GenericRiscvPciHost'],
           tags='riscv isa')
 SimObject('LupV.py', sim_objects=['LupV'], tags='riscv isa')
 SimObject('Clint.py', sim_objects=['Clint'], tags='riscv isa')
diff --git a/src/dev/riscv/hifive.cc b/src/dev/riscv/hifive.cc
index 74ae346f1f..0487eabba2 100644
--- a/src/dev/riscv/hifive.cc
+++ b/src/dev/riscv/hifive.cc
@@ -1,5 +1,6 @@
 /*
  * Copyright (c) 2021 Huawei International
+ * Copyright (c) 2023 Google LLC
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -39,7 +40,7 @@
 
 #include "dev/riscv/clint.hh"
 #include "dev/riscv/plic.hh"
-#include "params/HiFive.hh"
+#include "params/HiFiveBase.hh"
 #include "sim/system.hh"
 
 namespace gem5
@@ -47,44 +48,46 @@ namespace gem5
 
 using namespace RiscvISA;
 
-HiFive::HiFive(const Params &params) :
+HiFiveBase::HiFiveBase(const Params &params) :
     Platform(params),
     clint(params.clint), plic(params.plic),
     uartIntID(params.uart_int_id)
 {
+    fatal_if(clint == nullptr, "CLINT should not be NULL");
+    fatal_if(plic == nullptr, "PLIC should not be NULL");
 }
 
 void
-HiFive::postConsoleInt()
+HiFiveBase::postConsoleInt()
 {
     plic->post(uartIntID);
 }
 
 void
-HiFive::clearConsoleInt()
+HiFiveBase::clearConsoleInt()
 {
     plic->clear(uartIntID);
 }
 
 void
-HiFive::postPciInt(int line)
+HiFiveBase::postPciInt(int line)
 {
     plic->post(line);
 }
 
 void
-HiFive::clearPciInt(int line)
+HiFiveBase::clearPciInt(int line)
 {
     plic->clear(line);
 }
 
 void
-HiFive::serialize(CheckpointOut &cp) const
+HiFiveBase::serialize(CheckpointOut &cp) const
 {
 }
 
 void
-HiFive::unserialize(CheckpointIn &cp)
+HiFiveBase::unserialize(CheckpointIn &cp)
 {
 }
 
diff --git a/src/dev/riscv/hifive.hh b/src/dev/riscv/hifive.hh
index 78d45046b1..99d7ae67b3 100644
--- a/src/dev/riscv/hifive.hh
+++ b/src/dev/riscv/hifive.hh
@@ -1,5 +1,6 @@
 /*
  * Copyright (c) 2021 Huawei International
+ * Copyright (c) 2023 Google LLC
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -41,23 +42,23 @@
 #include "dev/platform.hh"
 #include "dev/riscv/clint.hh"
 #include "dev/riscv/plic.hh"
-#include "params/HiFive.hh"
+#include "params/HiFiveBase.hh"
 
 namespace gem5
 {
 
 using namespace RiscvISA;
 
-class HiFive : public Platform
+class HiFiveBase : public Platform
 {
   public:
     Clint *clint;
-    Plic *plic;
+    PlicBase *plic;
     int uartIntID;
 
   public:
-    typedef HiFiveParams Params;
-    HiFive(const Params &params);
+    typedef HiFiveBaseParams Params;
+    HiFiveBase(const Params &params);
 
     void postConsoleInt() override;
 

From 75001363923d0f64a8b1454890db575e7c7ca099 Mon Sep 17 00:00:00 2001
From: Gabe Black <gabeblack@google.com>
Date: Thu, 2 Mar 2023 09:25:17 -0800
Subject: [PATCH 232/492] mem: Add a parameter which will make a memory truly a
 ROM.

This piggy-backs on the writeOK method which already exists. It also
modifies the flags returned as part of the memory's backdoor
descriptor which doesn't enforce that the memory is read only, but will
let the other party know it's expected not to write to it.

Change-Id: Ib95e619c76c327d302e62a88515a92af11815981
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/68557
Maintainer: Gabe Black <gabeblack@google.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Matthew Poremba <matthew.poremba@amd.com>
---
 src/mem/AbstractMemory.py | 2 ++
 src/mem/abstract_mem.cc   | 7 ++++---
 src/mem/abstract_mem.hh   | 9 ++++++++-
 3 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/src/mem/AbstractMemory.py b/src/mem/AbstractMemory.py
index ea88fd879c..7ab24bc118 100644
--- a/src/mem/AbstractMemory.py
+++ b/src/mem/AbstractMemory.py
@@ -74,3 +74,5 @@ class AbstractMemory(ClockedObject):
     image_file = Param.String(
         "", "Image to load into memory as its initial contents"
     )
+
+    writeable = Param.Bool(True, "Allow writes to this memory")
diff --git a/src/mem/abstract_mem.cc b/src/mem/abstract_mem.cc
index 03f2557d63..9340f7e96f 100644
--- a/src/mem/abstract_mem.cc
+++ b/src/mem/abstract_mem.cc
@@ -59,10 +59,11 @@ namespace memory
 AbstractMemory::AbstractMemory(const Params &p) :
     ClockedObject(p), range(p.range), pmemAddr(NULL),
     backdoor(params().range, nullptr,
-             (MemBackdoor::Flags)(MemBackdoor::Readable |
-                                  MemBackdoor::Writeable)),
+             (MemBackdoor::Flags)(p.writeable ?
+                 MemBackdoor::Readable | MemBackdoor::Writeable :
+                 MemBackdoor::Readable)),
     confTableReported(p.conf_table_reported), inAddrMap(p.in_addr_map),
-    kvmMap(p.kvm_map), _system(NULL),
+    kvmMap(p.kvm_map), writeable(p.writeable), _system(NULL),
     stats(*this)
 {
     panic_if(!range.valid() || !range.size(),
diff --git a/src/mem/abstract_mem.hh b/src/mem/abstract_mem.hh
index 53b794012d..7f12487421 100644
--- a/src/mem/abstract_mem.hh
+++ b/src/mem/abstract_mem.hh
@@ -129,6 +129,9 @@ class AbstractMemory : public ClockedObject
     // Should KVM map this memory for the guest
     const bool kvmMap;
 
+    // Are writes allowed to this memory
+    const bool writeable;
+
     std::list<LockedAddr> lockedAddrList;
 
     // helper function for checkLockedAddrs(): we really want to
@@ -149,8 +152,12 @@ class AbstractMemory : public ClockedObject
     // requesting execution context), 'true' otherwise.  Note that
     // this method must be called on *all* stores since even
     // non-conditional stores must clear any matching lock addresses.
-    bool writeOK(PacketPtr pkt) {
+    bool
+    writeOK(PacketPtr pkt)
+    {
         const RequestPtr &req = pkt->req;
+        if (!writeable)
+            return false;
         if (lockedAddrList.empty()) {
             // no locked addrs: nothing to check, store_conditional fails
             bool isLLSC = pkt->isLLSC();

From e3f51e595c0a49d2d3fca5d720f350f50b89c3dd Mon Sep 17 00:00:00 2001
From: Melissa Jost <melissakjost@gmail.com>
Date: Thu, 2 Mar 2023 16:34:22 -0800
Subject: [PATCH 233/492] cpu: Allow PcCountTracker to compile in NULL ISA

While the PcCountTracker isn't necessary in the NULL ISA, the
structure of the standard library requires us to have it built
when running the replacement policy tests, which should fix
these tests failing within the nightlies at the moment.

Change-Id: I225b7923f2a11d351c24bdceba3ded4ed2b3bc87
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/68597
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/cpu/probes/SConscript | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/src/cpu/probes/SConscript b/src/cpu/probes/SConscript
index c96ca78a0c..9f43317284 100644
--- a/src/cpu/probes/SConscript
+++ b/src/cpu/probes/SConscript
@@ -26,12 +26,11 @@
 
 Import("*")
 
-if not env["CONF"]["USE_NULL_ISA"]:
-    SimObject(
-        "PcCountTracker.py",
-        sim_objects=["PcCountTracker", "PcCountTrackerManager"],
-    )
-    Source("pc_count_tracker.cc")
-    Source("pc_count_tracker_manager.cc")
+SimObject(
+    "PcCountTracker.py",
+    sim_objects=["PcCountTracker", "PcCountTrackerManager"],
+)
+Source("pc_count_tracker.cc")
+Source("pc_count_tracker_manager.cc")
 
-    DebugFlag("PcCountTracker")
+DebugFlag("PcCountTracker")

From 6884aeb86a73125969c293b9d3fedd242e104985 Mon Sep 17 00:00:00 2001
From: Melissa Jost <melissakjost@gmail.com>
Date: Tue, 28 Feb 2023 23:24:49 -0800
Subject: [PATCH 234/492] base: Fix gcc-13 build error

This change adds relevant errors that allow building with
gcc-13.

Change-Id: Ib97a90ef647a9cd9ec1bf1f2bde61daca85de427
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/68497
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
---
 src/arch/arm/insts/crypto.hh     | 2 ++
 src/base/cprintf_formats.hh      | 1 +
 src/mem/ruby/common/Histogram.hh | 1 +
 3 files changed, 4 insertions(+)

diff --git a/src/arch/arm/insts/crypto.hh b/src/arch/arm/insts/crypto.hh
index 23eda5b8ab..089cbe5726 100644
--- a/src/arch/arm/insts/crypto.hh
+++ b/src/arch/arm/insts/crypto.hh
@@ -38,6 +38,8 @@
 #ifndef __ARCH_ARM_INSTS_CRYPTO_HH__
 #define __ARCH_ARM_INSTS_CRYPTO_HH__
 
+#include <cstdint>
+
 namespace gem5
 {
 
diff --git a/src/base/cprintf_formats.hh b/src/base/cprintf_formats.hh
index 02ba49699e..4a64780c4a 100644
--- a/src/base/cprintf_formats.hh
+++ b/src/base/cprintf_formats.hh
@@ -29,6 +29,7 @@
 #ifndef __BASE_CPRINTF_FORMATS_HH__
 #define __BASE_CPRINTF_FORMATS_HH__
 
+#include <cstdint>
 #include <cstring>
 #include <ostream>
 #include <sstream>
diff --git a/src/mem/ruby/common/Histogram.hh b/src/mem/ruby/common/Histogram.hh
index bfd3ae0aba..cdc27af11f 100644
--- a/src/mem/ruby/common/Histogram.hh
+++ b/src/mem/ruby/common/Histogram.hh
@@ -29,6 +29,7 @@
 #ifndef __MEM_RUBY_COMMON_HISTOGRAM_HH__
 #define __MEM_RUBY_COMMON_HISTOGRAM_HH__
 
+#include <cstdint>
 #include <iostream>
 #include <vector>
 

From da050eeddea5b3281d63e4f647a19c80880ebeae Mon Sep 17 00:00:00 2001
From: handsomeliu <handsomeliu@google.com>
Date: Fri, 3 Mar 2023 16:32:01 +0800
Subject: [PATCH 235/492] base: support calculating the intersection of two
 AddrRange

Change-Id: I2f089039c709fe4c3f7086263fb56470c7713bad
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/68617
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
---
 src/base/addr_range.hh | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/src/base/addr_range.hh b/src/base/addr_range.hh
index 07bd255d26..11fb1cd668 100644
--- a/src/base/addr_range.hh
+++ b/src/base/addr_range.hh
@@ -732,6 +732,22 @@ class AddrRange
     {
         return !(*this == r);
     }
+
+    /**
+     * @ingroup api_addr_range
+     */
+    AddrRange
+    operator&(const AddrRange& r) const
+    {
+        panic_if(this->interleaved() || r.interleaved(),
+                 "Cannot calculate intersection of interleaved ranges.");
+        Addr start = std::max(this->_start, r._start);
+        Addr end = std::min(this->_end, r._end);
+        if (end <= start) {
+            return AddrRange(0, 0);
+        }
+        return AddrRange(start, end);
+    }
 };
 
 static inline AddrRangeList

From fd7006f4f1a34d6bbfe0c7d62be72ced43281462 Mon Sep 17 00:00:00 2001
From: Melissa Jost <melissakjost@gmail.com>
Date: Fri, 3 Mar 2023 17:24:58 -0800
Subject: [PATCH 236/492] arch-riscv: Revert CSR instruction fixes

This reverts commit 4b1c24542065380c6cff7ab2baa25e216a0ad38e
and commit 89c49d1ab06ea5364ab1f80586f8b01c0297cb12 because
they are causing the RISC-V Ubuntu boot test within the
nightly tests to hang and time out.

Change-Id: Ia4d8098ec940cb5900256c8cede0146256c851e5
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/68637
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Roger Chang <rogerycchang@google.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
---
 src/arch/riscv/insts/standard.hh        | 19 ++-----------------
 src/arch/riscv/isa/formats/standard.isa | 15 ++++++---------
 2 files changed, 8 insertions(+), 26 deletions(-)

diff --git a/src/arch/riscv/insts/standard.hh b/src/arch/riscv/insts/standard.hh
index 2dfe73aedf..5b0e8c2c22 100644
--- a/src/arch/riscv/insts/standard.hh
+++ b/src/arch/riscv/insts/standard.hh
@@ -91,33 +91,18 @@ class CSROp : public RiscvStaticInst
   protected:
     uint64_t csr;
     uint64_t uimm;
-    bool read;
-    bool write;
 
     /// Constructor
     CSROp(const char *mnem, ExtMachInst _machInst, OpClass __opClass)
         : RiscvStaticInst(mnem, _machInst, __opClass),
-            csr(FUNCT12), uimm(CSRIMM), read(true), write(true)
+            csr(FUNCT12), uimm(CSRIMM)
     {
         if (csr == CSR_SATP) {
             flags[IsSquashAfter] = true;
         }
-        if (strcmp(mnemonic, "csrrw") == 0 ||
-            strcmp(mnemonic, "csrrwi") == 0) {
-          if (RD == 0){
-            read = false;
-          }
-        } else if (strcmp(mnemonic, "csrrs") == 0 ||
-                   strcmp(mnemonic, "csrrc") == 0 ||
-                   strcmp(mnemonic, "csrrsi") == 0 ||
-                   strcmp(mnemonic, "csrrci") == 0 ){
-          if (RS1 == 0 || uimm == 0) {
-            write = false;
-          }
-        }
     }
 
-  std::string generateDisassembly(
+    std::string generateDisassembly(
         Addr pc, const loader::SymbolTable *symtab) const override;
 };
 
diff --git a/src/arch/riscv/isa/formats/standard.isa b/src/arch/riscv/isa/formats/standard.isa
index c94a0bcdbd..bb500f5f49 100644
--- a/src/arch/riscv/isa/formats/standard.isa
+++ b/src/arch/riscv/isa/formats/standard.isa
@@ -358,7 +358,7 @@ def template CSRExecute {{
         %(op_decl)s;
         %(op_rd)s;
 
-        RegVal data = 0, olddata = 0, nonmaskdata = 0;
+        RegVal data, olddata;
         auto lowestAllowedMode = (PrivilegeMode)bits(csr, 9, 8);
         auto pm = (PrivilegeMode)xc->readMiscReg(MISCREG_PRV);
         if (pm < lowestAllowedMode) {
@@ -380,13 +380,11 @@ def template CSRExecute {{
             break;
         }
 
-        if (read) {
-          if (csr == CSR_FCSR) {
+        if (csr == CSR_FCSR) {
             olddata = xc->readMiscReg(MISCREG_FFLAGS) |
-              (xc->readMiscReg(MISCREG_FRM) << FRM_OFFSET);
-          } else {
+                      (xc->readMiscReg(MISCREG_FRM) << FRM_OFFSET);
+        } else {
             olddata = xc->readMiscReg(midx);
-          }
         }
         olddata = rvZext(olddata);
         auto olddata_all = olddata;
@@ -397,9 +395,8 @@ def template CSRExecute {{
 
         %(code)s;
 
-        nonmaskdata = data & ~maskVal;
         data &= maskVal;
-        if (write) {
+        if (data != olddata) {
             if (bits(csr, 11, 10) == 0x3) {
                 return std::make_shared<IllegalInstFault>(
                         csprintf("CSR %s is read-only\n", csrName), machInst);
@@ -420,7 +417,7 @@ def template CSRExecute {{
               case CSR_SIP: case CSR_SIE:
               case CSR_UIP: case CSR_UIE:
               case CSR_MSTATUS: case CSR_SSTATUS: case CSR_USTATUS:
-                if (nonmaskdata == 0) {
+                if (newdata_all != olddata_all) {
                     xc->setMiscReg(midx, newdata_all);
                 } else {
                     return std::make_shared<IllegalInstFault>(

From 8a11b39c41353ec5345250a0ca370b89d14e97bd Mon Sep 17 00:00:00 2001
From: Jasjeet Rangi <jasrangi@ucdavis.edu>
Date: Wed, 23 Nov 2022 12:31:12 -0800
Subject: [PATCH 237/492] cpu: Move fetch stats from simple and minor to base

This summarizes a series of changes to move general Simple, Minor,
O3 CPU stats to BaseCPU. This commit focuses on moving numBranches
from SimpleCPU to the FetchCPUStats in the BaseCPU, and
numFetchSuspends from MinorCPU into FetchCPUStats.  More general
information about this relation chain is below

1. Summary:
Moved general CPU stats found across Simple, Minor, and O3 CPU models
into BaseCPU through new stat groups. The stat groups are
FetchCPUStats, ExecuteCPUStats, and CommitCPUStats. Implemented the
committedControl stat vector found in MinorCPU for Simple and O3 CPU.
Implemented the numStoreInsts stat found in SimpleCPU for O3CPU. IPC
and CPI stats are now tracked at the core and thread level in BaseCPU
and are made universal for simple, minor, o3, and kvm CPUs. Duplicate
stats across the models are merged into a single stat in BaseCPU under
the same stat name. This change does not implement every general level
stat moved to BaseCPU for every model.

2. Stat API Changes
a. SimpleCPU:
statExecutedInstType vector unified into committedInstType
numCondCtrlInsts unified into committedControl::isControl

b. O3CPU:
i. Fetch Stage
branches in fetch unified into with numBranches
rate renamed to fetchRate
insts unified into with numInsts

ii. Execute Stage
Regfile stats unified into base with use of Simple's stat naming
numRefs in IEW unified into numMemRefs
numRate from IEW renamed to instRate

iii. Commit Stage
committedInsts is renamed to numInstsNotNOP
committedOps is renamed to numOpsNotNOP
instsCommitted is unified into numInsts
opsCommitted is unified into numOps
branches is unified into committedControl::isControl
floating is unified into numFpInsts
integer is unified into numIntInsts
loads is unified into numLoadInsts
memRefs is renamed to numMemRefs
vectorInstructions is unified into numVecInsts

3. Details:
Created three stat groups in BaseCPU. FetchCPUStats track statistics
related to the fetch stage. ExecuteCPUStats track statistics related
to the execute stage. CommitCPUStats track statistics related to the
commit stage.

There are three vectors in Base that store unique pointers to per
thread instances of these stat groups. The stat group pointer for
thread i is accessible at index i of one of these vectors. For example,
stat numCCRegReads of the execute stage for thread 0 can be accessed
with executeStats[0]->numCCRegReads. The stats.txt output will print the
thread ID of the stat group. For example, numVecRegReads on thread 0
of a single core prints as
"board.processor.cores.core.executeStats0.numVecRegReads".
NOTE: Multithreading in gem5 is untested. Therefore per thread stats
output in stats.txt is not currently guaranteed to be correctly
formatted.

For FetchCPUStats, the stats moved from  SimpleCPU are numBranches
and numInsts. From MinorCPU, the stat moved is numFetchSuspends. From
O3CPU, the stats moved are from the O3 fetch stage: Stat branches is
unified into numBranches, stat rate is renamed to fetchRate in Base,
stat insts is unified into numInsts, stat icacheStallCycles keeps the
same name in Base.

For ExecuteCPUStats, the stats moved from SimpleCPU are
dcacheStallCycles, numCCRegReads, numCCRegWrites,
numFpAluAccesses, numFpRegReads, numFpRegWrites, numIntAluAccesses,
numIntRegReads, numIntRegWrites, numMemRefs, numMiscRegReads,
numMiscRegWrites, numVecAluAccesses, numVecPredRegReads,
numVecPredRegWrites, numVecRegReads, numVecRegWrites. The stat moved
from MinorCPU is numDiscardedOps. From O3, the Regfile stats in CPU are
unified into the reg stats in Base and use the names found originally
in SimpleCPU. From O3 IEW stage, numInsts keeps the same name in
Base, numBranches is unified into numBranches in base, numNop keeps
the same name in Base, numRefs is unified into numMemRefs in Base,
numLoadInsts and numStoreInsts are moved into Base, numRate is renamed
to instRate in base.

For CommitCPUStats, the stats moved from SimpleCPU are
numCondCtrlInsts, numFpInsts, numIntInsts, numLoadInsts, numStoreInsts,
numVecInsts. The stats moved from MinorCPU are numInsts,
committedInstType, and committedControl. statExecutedInstType of
SimpleCPU is unified with committedInstType of MinorCPU. Implemented
committedControl stats from MinorCPU in Simple and O3 CPU. In MinorCPU,
this stat was a 2D vector, where the first dimension is the thread ID.
In base it is now a 1D vector that is tied to a thread ID via the
commitStats vector that the object is accessible through. From the O3
commit stage, committedInsts is renamed to numInstsNotNOP, committedOps
is renamed to numOpsNotNOP, instsCommitted is unified into numInsts,
opsCommitted is renamed to numOps, committedInstType is unified into
committedInstType from Minor, branches is removed because it duplicates
committedControl::IsControl, floating is unified into numFpInsts,
interger is unified into numIntInsts, loads is unified into
numLoadInsts, numStoreInsts is implemented for tracking in O3, memRefs
is renamed to numMemRefs, vectorInstructions is unified into
numVecInsts. Note that numCondCtrlInsts of Simple is unified into
committedControl::IsCondCtrl.

Implemented IPC and CPI tracking inside BaseCPU.
In BaseCPU::BaseCPUStats, numInsts and numOps track per CPU core
committed instructions and operations.
In BaseCPU::FetchCPUStats, numInsts and numOps track per thread
fetched instructions and operations.
In BaseCPU::CommitCPUStats, numInsts tracks per thread executed
instructions.
In BaseCPU::CommitCPUStats, numInsts and numOps track per thread
committed instructions and operations.
In BaseSimpleCPU, the countInst() function has been split into
countInst(), countFetchInst(), and countCommitInst(). The stat count
incrementation step of countInst() has been removed and delegated to the
other two functions. countFetchInst() increments numInsts and numOps
of the FetchCPUStats group for a thread. countCommitInst() increments
the numInsts and numOps of the CommitCPUStats group for a thread and
of the BaseCPUStats group for a CPU core. These functions are called
in the appropriate stage within timing.cc and atomic.cc. The call to
countInst() is left unchanged. countFetchInst() is called in
preExecute(). countCommitInst() is called in postExecute().
For MinorCPU, only the commit level numInsts and numOps stats have been
implemented.
IPC and CPI stats have been added to BaseCPUStats (core level) and
CommitCPUStats (thread level). The formulas for the IPC and CPI stats
in CommitCPUStats are set in the BaseCPU constructor, after the
CommitCPUStats stat group object has been created. These replace IPC,
CPI, totalIpc, and totalCpi stats in O3.

Replaced committedInsts stats of KVM CPU with commitStats.numInsts
of BaseCPU. This results in IPC and CPI printing in stats.txt for
KVM simulations.

This change does not implement most general stats found in one or two
model for all others.

Jira Ticket: https://gem5.atlassian.net/browse/GEM5-1304

Change-Id: I3c852f8dba3268c71b7a3415480fb63d8dc30cb7
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/66031
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/cpu/base.cc                | 19 +++++++++++++++++++
 src/cpu/base.hh                | 16 ++++++++++++++++
 src/cpu/minor/execute.cc       |  2 +-
 src/cpu/minor/stats.cc         |  2 --
 src/cpu/minor/stats.hh         |  3 ---
 src/cpu/simple/base.cc         |  2 +-
 src/cpu/simple/exec_context.hh |  7 -------
 7 files changed, 37 insertions(+), 14 deletions(-)

diff --git a/src/cpu/base.cc b/src/cpu/base.cc
index d2c0a78d44..1d293397e5 100644
--- a/src/cpu/base.cc
+++ b/src/cpu/base.cc
@@ -191,6 +191,11 @@ BaseCPU::BaseCPU(const Params &p, bool is_checker)
     modelResetPort.onChange([this](const bool &new_val) {
         setReset(new_val);
     });
+    // create a stat group object for each thread on this core
+    fetchStats.reserve(numThreads);
+    for (int i = 0; i < numThreads; i++) {
+        fetchStats.emplace_back(new FetchCPUStats(this, i));
+    }
 }
 
 void
@@ -827,4 +832,18 @@ BaseCPU::GlobalStats::GlobalStats(statistics::Group *parent)
     hostOpRate = simOps / hostSeconds;
 }
 
+BaseCPU::
+FetchCPUStats::FetchCPUStats(statistics::Group *parent, int thread_id)
+    : statistics::Group(parent, csprintf("fetchStats%i", thread_id).c_str()),
+    ADD_STAT(numBranches, statistics::units::Count::get(),
+             "Number of branches fetched"),
+    ADD_STAT(numFetchSuspends, statistics::units::Count::get(),
+             "Number of times Execute suspended instruction fetching")
+
+{
+    numBranches
+        .prereq(numBranches);
+
+}
+
 } // namespace gem5
diff --git a/src/cpu/base.hh b/src/cpu/base.hh
index 084d9b9305..d6e5d38838 100644
--- a/src/cpu/base.hh
+++ b/src/cpu/base.hh
@@ -43,6 +43,7 @@
 #define __CPU_BASE_HH__
 
 #include <vector>
+#include <memory>
 
 #include "arch/generic/interrupts.hh"
 #include "base/statistics.hh"
@@ -676,6 +677,21 @@ class BaseCPU : public ClockedObject
     const Cycles pwrGatingLatency;
     const bool powerGatingOnIdle;
     EventFunctionWrapper enterPwrGatingEvent;
+
+  public:
+    struct FetchCPUStats : public statistics::Group
+    {
+        FetchCPUStats(statistics::Group *parent, int thread_id);
+
+        /* Total number of branches fetched */
+        statistics::Scalar numBranches;
+
+        /* Number of times fetch was asked to suspend by Execute */
+        statistics::Scalar numFetchSuspends;
+
+    };
+
+    std::vector<std::unique_ptr<FetchCPUStats>> fetchStats;
 };
 
 } // namespace gem5
diff --git a/src/cpu/minor/execute.cc b/src/cpu/minor/execute.cc
index 5eaaf5804e..323ae2982b 100644
--- a/src/cpu/minor/execute.cc
+++ b/src/cpu/minor/execute.cc
@@ -1054,7 +1054,7 @@ Execute::commitInst(MinorDynInstPtr inst, bool early_memory_issue,
             DPRINTF(MinorInterrupt, "Suspending thread: %d from Execute"
                 " inst: %s\n", thread_id, *inst);
 
-            cpu.stats.numFetchSuspends++;
+            cpu.fetchStats[thread_id]->numFetchSuspends++;
 
             updateBranchData(thread_id, BranchData::SuspendThread, inst,
                 resume_pc, branch);
diff --git a/src/cpu/minor/stats.cc b/src/cpu/minor/stats.cc
index 64d4c475e0..e9ca562c16 100644
--- a/src/cpu/minor/stats.cc
+++ b/src/cpu/minor/stats.cc
@@ -52,8 +52,6 @@ MinorStats::MinorStats(BaseCPU *base_cpu)
     ADD_STAT(numDiscardedOps, statistics::units::Count::get(),
              "Number of ops (including micro ops) which were discarded before "
              "commit"),
-    ADD_STAT(numFetchSuspends, statistics::units::Count::get(),
-             "Number of times Execute suspended instruction fetching"),
     ADD_STAT(quiesceCycles, statistics::units::Cycle::get(),
              "Total number of cycles that CPU has spent quiesced or waiting "
              "for an interrupt"),
diff --git a/src/cpu/minor/stats.hh b/src/cpu/minor/stats.hh
index 1ab81f4407..524d20f85d 100644
--- a/src/cpu/minor/stats.hh
+++ b/src/cpu/minor/stats.hh
@@ -68,9 +68,6 @@ struct MinorStats : public statistics::Group
     /** Number of ops discarded before committing */
     statistics::Scalar numDiscardedOps;
 
-    /** Number of times fetch was asked to suspend by Execute */
-    statistics::Scalar numFetchSuspends;
-
     /** Number of cycles in quiescent state */
     statistics::Scalar quiesceCycles;
 
diff --git a/src/cpu/simple/base.cc b/src/cpu/simple/base.cc
index 768f63ede5..b2a11fd84b 100644
--- a/src/cpu/simple/base.cc
+++ b/src/cpu/simple/base.cc
@@ -396,7 +396,7 @@ BaseSimpleCPU::postExecute()
     }
 
     if (curStaticInst->isControl()) {
-        ++t_info.execContextStats.numBranches;
+        ++fetchStats[t_info.thread->threadId()]->numBranches;
     }
 
     /* Power model statistics */
diff --git a/src/cpu/simple/exec_context.hh b/src/cpu/simple/exec_context.hh
index 0f20763f28..d4bb017481 100644
--- a/src/cpu/simple/exec_context.hh
+++ b/src/cpu/simple/exec_context.hh
@@ -152,8 +152,6 @@ class SimpleExecContext : public ExecContext
                        "ICache total stall cycles"),
               ADD_STAT(dcacheStallCycles, statistics::units::Cycle::get(),
                        "DCache total stall cycles"),
-              ADD_STAT(numBranches, statistics::units::Count::get(),
-                       "Number of branches fetched"),
               ADD_STAT(numPredictedBranches, statistics::units::Count::get(),
                        "Number of branches predicted as taken"),
               ADD_STAT(numBranchMispred, statistics::units::Count::get(),
@@ -203,9 +201,6 @@ class SimpleExecContext : public ExecContext
             numIdleCycles = idleFraction * cpu->baseStats.numCycles;
             numBusyCycles = notIdleFraction * cpu->baseStats.numCycles;
 
-            numBranches
-                .prereq(numBranches);
-
             numPredictedBranches
                 .prereq(numPredictedBranches);
 
@@ -297,8 +292,6 @@ class SimpleExecContext : public ExecContext
         statistics::Scalar dcacheStallCycles;
 
         /// @{
-        /// Total number of branches fetched
-        statistics::Scalar numBranches;
         /// Number of branches predicted as taken
         statistics::Scalar numPredictedBranches;
         /// Number of misprediced branches

From fd2d80baa39645842985a489edd20e0fab15b9d1 Mon Sep 17 00:00:00 2001
From: Melissa Jost <melissakjost@gmail.com>
Date: Thu, 19 Jan 2023 00:25:26 -0800
Subject: [PATCH 238/492] cpu: Move execute stats from simple and minor to base

Created stat group ExecuteCPUStats in BaseCPU and moved stats from the
simple and minor cpu models.

The stats moved from SimpleCPU are dcacheStallCycles,
icacheStallCycles, numCCRegReads, numCCRegWrites, numFpAluAccesses,
numFpRegReads, numFpRegWrites, numIntAluAccesses, numIntRegReads,
numIntRegWrites, numMemRefs, numMiscRegReads, numMiscRegWrites,
numVecAluAccesses, numVecPredRegReads, numVecPredRegWrites,
numVecRegReads, numVecRegWrites.

The stat moved from MinorCPU is numDiscardedOps.

Also, ccRegfileReads, ccRegfileWrites, fpRegfileReads, fpRegfileWrites,
intRegfileReads, intRegfileWrites, miscRegfileReads, miscRegfileWrites,
vecPredRegfileReads, vecPredRegfileWrites, vecRegfileReads,
and vecRegfileWrites are removed from cpu.hh and cpu.cc in O3CPU. The
corresponding stats in BaseCPU::ExecuteCPUStats are used instead.
Changed the getReg, getWritableReg, and setReg functions in the O3 CPU
object to take the thread ID as a parameter. This is because the stats
in base are stored in vectors that are indexed by thread ID.

Change-Id: I801c5ceb4c70b7b281127569f11c6ee98f614b27
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67390
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/cpu/base.cc                |  76 ++++++++++++++++++++
 src/cpu/base.hh                |  48 +++++++++++++
 src/cpu/minor/execute.cc       |   2 +-
 src/cpu/minor/stats.cc         |   3 -
 src/cpu/minor/stats.hh         |   3 -
 src/cpu/o3/cpu.cc              | 120 ++++++++-----------------------
 src/cpu/o3/cpu.hh              |  28 ++------
 src/cpu/o3/dyn_inst.hh         |  14 ++--
 src/cpu/simple/base.cc         |   8 +--
 src/cpu/simple/exec_context.hh | 125 +++++----------------------------
 10 files changed, 190 insertions(+), 237 deletions(-)

diff --git a/src/cpu/base.cc b/src/cpu/base.cc
index 1d293397e5..b10c731e17 100644
--- a/src/cpu/base.cc
+++ b/src/cpu/base.cc
@@ -193,8 +193,10 @@ BaseCPU::BaseCPU(const Params &p, bool is_checker)
     });
     // create a stat group object for each thread on this core
     fetchStats.reserve(numThreads);
+    executeStats.reserve(numThreads);
     for (int i = 0; i < numThreads; i++) {
         fetchStats.emplace_back(new FetchCPUStats(this, i));
+        executeStats.emplace_back(new ExecuteCPUStats(this, i));
     }
 }
 
@@ -846,4 +848,78 @@ FetchCPUStats::FetchCPUStats(statistics::Group *parent, int thread_id)
 
 }
 
+// means it is incremented in a vector indexing and not directly
+BaseCPU::
+ExecuteCPUStats::ExecuteCPUStats(statistics::Group *parent, int thread_id)
+    : statistics::Group(parent, csprintf("executeStats%i", thread_id).c_str()),
+    ADD_STAT(dcacheStallCycles, statistics::units::Cycle::get(),
+             "DCache total stall cycles"),
+    ADD_STAT(numCCRegReads, statistics::units::Count::get(),
+             "Number of times the CC registers were read"),
+    ADD_STAT(numCCRegWrites, statistics::units::Count::get(),
+             "Number of times the CC registers were written"),
+    ADD_STAT(numFpAluAccesses, statistics::units::Count::get(),
+             "Number of float alu accesses"),
+    ADD_STAT(numFpRegReads, statistics::units::Count::get(),
+             "Number of times the floating registers were read"),
+    ADD_STAT(numFpRegWrites, statistics::units::Count::get(),
+             "Number of times the floating registers were written"),
+    ADD_STAT(numIntAluAccesses, statistics::units::Count::get(),
+             "Number of integer alu accesses"),
+    ADD_STAT(numIntRegReads, statistics::units::Count::get(),
+             "Number of times the integer registers were read"),
+    ADD_STAT(numIntRegWrites, statistics::units::Count::get(),
+             "Number of times the integer registers were written"),
+    ADD_STAT(numMemRefs, statistics::units::Count::get(),
+             "Number of memory refs"),
+    ADD_STAT(numMiscRegReads, statistics::units::Count::get(),
+             "Number of times the Misc registers were read"),
+    ADD_STAT(numMiscRegWrites, statistics::units::Count::get(),
+             "Number of times the Misc registers were written"),
+    ADD_STAT(numVecAluAccesses, statistics::units::Count::get(),
+             "Number of vector alu accesses"),
+    ADD_STAT(numVecPredRegReads, statistics::units::Count::get(),
+             "Number of times the predicate registers were read"),
+    ADD_STAT(numVecPredRegWrites, statistics::units::Count::get(),
+             "Number of times the predicate registers were written"),
+    ADD_STAT(numVecRegReads, statistics::units::Count::get(),
+             "Number of times the vector registers were read"),
+    ADD_STAT(numVecRegWrites, statistics::units::Count::get(),
+             "Number of times the vector registers were written"),
+    ADD_STAT(numDiscardedOps, statistics::units::Count::get(),
+             "Number of ops (including micro ops) which were discarded before "
+             "commit")
+{
+    dcacheStallCycles
+                .prereq(dcacheStallCycles);
+    numCCRegReads
+                .prereq(numCCRegReads)
+                .flags(statistics::nozero);
+    numCCRegWrites
+                .prereq(numCCRegWrites)
+                .flags(statistics::nozero);
+    numFpAluAccesses
+                .prereq(numFpAluAccesses);
+    numFpRegReads
+                .prereq(numFpRegReads);
+    numIntAluAccesses
+                .prereq(numIntAluAccesses);
+    numIntRegReads
+                .prereq(numIntRegReads);
+    numIntRegWrites
+                .prereq(numIntRegWrites);
+    numMiscRegReads
+                .prereq(numMiscRegReads);
+    numMiscRegWrites
+                .prereq(numMiscRegWrites);
+    numVecPredRegReads
+                .prereq(numVecPredRegReads);
+    numVecPredRegWrites
+                .prereq(numVecPredRegWrites);
+    numVecRegReads
+                .prereq(numVecRegReads);
+    numVecRegWrites
+                .prereq(numVecRegWrites);
+}
+
 } // namespace gem5
diff --git a/src/cpu/base.hh b/src/cpu/base.hh
index d6e5d38838..ad6fa469a3 100644
--- a/src/cpu/base.hh
+++ b/src/cpu/base.hh
@@ -691,7 +691,55 @@ class BaseCPU : public ClockedObject
 
     };
 
+    struct ExecuteCPUStats: public statistics::Group
+    {
+        ExecuteCPUStats(statistics::Group *parent, int thread_id);
+
+        /* Number of cycles stalled for D-cache responses */
+        statistics::Scalar dcacheStallCycles;
+
+        /* Number of condition code register file accesses */
+        statistics::Scalar numCCRegReads;
+        statistics::Scalar numCCRegWrites;
+
+        /* number of float alu accesses */
+        statistics::Scalar numFpAluAccesses;
+
+        /* Number of float register file accesses */
+        statistics::Scalar numFpRegReads;
+        statistics::Scalar numFpRegWrites;
+
+        /* Number of integer alu accesses */
+        statistics::Scalar numIntAluAccesses;
+
+        /* Number of integer register file accesses */
+        statistics::Scalar numIntRegReads;
+        statistics::Scalar numIntRegWrites;
+
+        /* number of simulated memory references */
+        statistics::Scalar numMemRefs;
+
+        /* Number of misc register file accesses */
+        statistics::Scalar numMiscRegReads;
+        statistics::Scalar numMiscRegWrites;
+
+        /* Number of vector alu accesses */
+        statistics::Scalar numVecAluAccesses;
+
+        /* Number of predicate register file accesses */
+        mutable statistics::Scalar numVecPredRegReads;
+        statistics::Scalar numVecPredRegWrites;
+
+        /* Number of vector register file accesses */
+        mutable statistics::Scalar numVecRegReads;
+        statistics::Scalar numVecRegWrites;
+
+        /* Number of ops discarded before committing */
+        statistics::Scalar numDiscardedOps;
+    };
+
     std::vector<std::unique_ptr<FetchCPUStats>> fetchStats;
+    std::vector<std::unique_ptr<ExecuteCPUStats>> executeStats;
 };
 
 } // namespace gem5
diff --git a/src/cpu/minor/execute.cc b/src/cpu/minor/execute.cc
index 323ae2982b..d657de5225 100644
--- a/src/cpu/minor/execute.cc
+++ b/src/cpu/minor/execute.cc
@@ -1368,7 +1368,7 @@ Execute::commit(ThreadID thread_id, bool only_commit_microops, bool discard,
                 *inst, ex_info.streamSeqNum);
 
             if (fault == NoFault)
-                cpu.stats.numDiscardedOps++;
+                cpu.executeStats[thread_id]->numDiscardedOps++;
         }
 
         /* Mark the mem inst as being in the LSQ */
diff --git a/src/cpu/minor/stats.cc b/src/cpu/minor/stats.cc
index e9ca562c16..10e7573afd 100644
--- a/src/cpu/minor/stats.cc
+++ b/src/cpu/minor/stats.cc
@@ -49,9 +49,6 @@ MinorStats::MinorStats(BaseCPU *base_cpu)
              "Number of instructions committed"),
     ADD_STAT(numOps, statistics::units::Count::get(),
              "Number of ops (including micro ops) committed"),
-    ADD_STAT(numDiscardedOps, statistics::units::Count::get(),
-             "Number of ops (including micro ops) which were discarded before "
-             "commit"),
     ADD_STAT(quiesceCycles, statistics::units::Cycle::get(),
              "Total number of cycles that CPU has spent quiesced or waiting "
              "for an interrupt"),
diff --git a/src/cpu/minor/stats.hh b/src/cpu/minor/stats.hh
index 524d20f85d..e5d018679d 100644
--- a/src/cpu/minor/stats.hh
+++ b/src/cpu/minor/stats.hh
@@ -65,9 +65,6 @@ struct MinorStats : public statistics::Group
     /** Number of simulated insts and microops */
     statistics::Scalar numOps;
 
-    /** Number of ops discarded before committing */
-    statistics::Scalar numDiscardedOps;
-
     /** Number of cycles in quiescent state */
     statistics::Scalar quiesceCycles;
 
diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc
index d2bacaa523..90df3b349e 100644
--- a/src/cpu/o3/cpu.cc
+++ b/src/cpu/o3/cpu.cc
@@ -344,31 +344,7 @@ CPU::CPUStats::CPUStats(CPU *cpu)
                "IPC: Instructions Per Cycle"),
       ADD_STAT(totalIpc, statistics::units::Rate<
                     statistics::units::Count, statistics::units::Cycle>::get(),
-               "IPC: Total IPC of All Threads"),
-      ADD_STAT(intRegfileReads, statistics::units::Count::get(),
-               "Number of integer regfile reads"),
-      ADD_STAT(intRegfileWrites, statistics::units::Count::get(),
-               "Number of integer regfile writes"),
-      ADD_STAT(fpRegfileReads, statistics::units::Count::get(),
-               "Number of floating regfile reads"),
-      ADD_STAT(fpRegfileWrites, statistics::units::Count::get(),
-               "Number of floating regfile writes"),
-      ADD_STAT(vecRegfileReads, statistics::units::Count::get(),
-               "number of vector regfile reads"),
-      ADD_STAT(vecRegfileWrites, statistics::units::Count::get(),
-               "number of vector regfile writes"),
-      ADD_STAT(vecPredRegfileReads, statistics::units::Count::get(),
-               "number of predicate regfile reads"),
-      ADD_STAT(vecPredRegfileWrites, statistics::units::Count::get(),
-               "number of predicate regfile writes"),
-      ADD_STAT(ccRegfileReads, statistics::units::Count::get(),
-               "number of cc regfile reads"),
-      ADD_STAT(ccRegfileWrites, statistics::units::Count::get(),
-               "number of cc regfile writes"),
-      ADD_STAT(miscRegfileReads, statistics::units::Count::get(),
-               "number of misc regfile reads"),
-      ADD_STAT(miscRegfileWrites, statistics::units::Count::get(),
-               "number of misc regfile writes")
+               "IPC: Total IPC of All Threads")
 {
     // Register any of the O3CPU's stats here.
     timesIdled
@@ -407,42 +383,6 @@ CPU::CPUStats::CPUStats(CPU *cpu)
     totalIpc
         .precision(6);
     totalIpc = sum(committedInsts) / cpu->baseStats.numCycles;
-
-    intRegfileReads
-        .prereq(intRegfileReads);
-
-    intRegfileWrites
-        .prereq(intRegfileWrites);
-
-    fpRegfileReads
-        .prereq(fpRegfileReads);
-
-    fpRegfileWrites
-        .prereq(fpRegfileWrites);
-
-    vecRegfileReads
-        .prereq(vecRegfileReads);
-
-    vecRegfileWrites
-        .prereq(vecRegfileWrites);
-
-    vecPredRegfileReads
-        .prereq(vecPredRegfileReads);
-
-    vecPredRegfileWrites
-        .prereq(vecPredRegfileWrites);
-
-    ccRegfileReads
-        .prereq(ccRegfileReads);
-
-    ccRegfileWrites
-        .prereq(ccRegfileWrites);
-
-    miscRegfileReads
-        .prereq(miscRegfileReads);
-
-    miscRegfileWrites
-        .prereq(miscRegfileWrites);
 }
 
 void
@@ -1019,7 +959,7 @@ CPU::readMiscRegNoEffect(int misc_reg, ThreadID tid) const
 RegVal
 CPU::readMiscReg(int misc_reg, ThreadID tid)
 {
-    cpuStats.miscRegfileReads++;
+    executeStats[tid]->numMiscRegReads++;
     return isa[tid]->readMiscReg(misc_reg);
 }
 
@@ -1032,29 +972,29 @@ CPU::setMiscRegNoEffect(int misc_reg, RegVal val, ThreadID tid)
 void
 CPU::setMiscReg(int misc_reg, RegVal val, ThreadID tid)
 {
-    cpuStats.miscRegfileWrites++;
+    executeStats[tid]->numMiscRegWrites++;
     isa[tid]->setMiscReg(misc_reg, val);
 }
 
 RegVal
-CPU::getReg(PhysRegIdPtr phys_reg)
+CPU::getReg(PhysRegIdPtr phys_reg, ThreadID tid)
 {
     switch (phys_reg->classValue()) {
       case IntRegClass:
-        cpuStats.intRegfileReads++;
+        executeStats[tid]->numIntRegReads++;
         break;
       case FloatRegClass:
-        cpuStats.fpRegfileReads++;
+        executeStats[tid]->numFpRegReads++;
         break;
       case CCRegClass:
-        cpuStats.ccRegfileReads++;
+        executeStats[tid]->numCCRegReads++;
         break;
       case VecRegClass:
       case VecElemClass:
-        cpuStats.vecRegfileReads++;
+        executeStats[tid]->numVecRegReads++;
         break;
       case VecPredRegClass:
-        cpuStats.vecPredRegfileReads++;
+        executeStats[tid]->numVecPredRegReads++;
         break;
       default:
         break;
@@ -1063,24 +1003,24 @@ CPU::getReg(PhysRegIdPtr phys_reg)
 }
 
 void
-CPU::getReg(PhysRegIdPtr phys_reg, void *val)
+CPU::getReg(PhysRegIdPtr phys_reg, void *val, ThreadID tid)
 {
     switch (phys_reg->classValue()) {
       case IntRegClass:
-        cpuStats.intRegfileReads++;
+        executeStats[tid]->numIntRegReads++;
         break;
       case FloatRegClass:
-        cpuStats.fpRegfileReads++;
+        executeStats[tid]->numFpRegReads++;
         break;
       case CCRegClass:
-        cpuStats.ccRegfileReads++;
+        executeStats[tid]->numCCRegReads++;
         break;
       case VecRegClass:
       case VecElemClass:
-        cpuStats.vecRegfileReads++;
+        executeStats[tid]->numVecRegReads++;
         break;
       case VecPredRegClass:
-        cpuStats.vecPredRegfileReads++;
+        executeStats[tid]->numVecPredRegReads++;
         break;
       default:
         break;
@@ -1089,14 +1029,14 @@ CPU::getReg(PhysRegIdPtr phys_reg, void *val)
 }
 
 void *
-CPU::getWritableReg(PhysRegIdPtr phys_reg)
+CPU::getWritableReg(PhysRegIdPtr phys_reg, ThreadID tid)
 {
     switch (phys_reg->classValue()) {
       case VecRegClass:
-        cpuStats.vecRegfileReads++;
+        executeStats[tid]->numVecRegReads++;
         break;
       case VecPredRegClass:
-        cpuStats.vecPredRegfileReads++;
+        executeStats[tid]->numVecPredRegReads++;
         break;
       default:
         break;
@@ -1105,24 +1045,24 @@ CPU::getWritableReg(PhysRegIdPtr phys_reg)
 }
 
 void
-CPU::setReg(PhysRegIdPtr phys_reg, RegVal val)
+CPU::setReg(PhysRegIdPtr phys_reg, RegVal val, ThreadID tid)
 {
     switch (phys_reg->classValue()) {
       case IntRegClass:
-        cpuStats.intRegfileWrites++;
+        executeStats[tid]->numIntRegWrites++;
         break;
       case FloatRegClass:
-        cpuStats.fpRegfileWrites++;
+        executeStats[tid]->numFpRegWrites++;
         break;
       case CCRegClass:
-        cpuStats.ccRegfileWrites++;
+        executeStats[tid]->numCCRegWrites++;
         break;
       case VecRegClass:
       case VecElemClass:
-        cpuStats.vecRegfileWrites++;
+        executeStats[tid]->numVecRegWrites++;
         break;
       case VecPredRegClass:
-        cpuStats.vecPredRegfileWrites++;
+        executeStats[tid]->numVecPredRegWrites++;
         break;
       default:
         break;
@@ -1131,24 +1071,24 @@ CPU::setReg(PhysRegIdPtr phys_reg, RegVal val)
 }
 
 void
-CPU::setReg(PhysRegIdPtr phys_reg, const void *val)
+CPU::setReg(PhysRegIdPtr phys_reg, const void *val, ThreadID tid)
 {
     switch (phys_reg->classValue()) {
       case IntRegClass:
-        cpuStats.intRegfileWrites++;
+        executeStats[tid]->numIntRegWrites++;
         break;
       case FloatRegClass:
-        cpuStats.fpRegfileWrites++;
+        executeStats[tid]->numFpRegWrites++;
         break;
       case CCRegClass:
-        cpuStats.ccRegfileWrites++;
+        executeStats[tid]->numCCRegWrites++;
         break;
       case VecRegClass:
       case VecElemClass:
-        cpuStats.vecRegfileWrites++;
+        executeStats[tid]->numVecRegWrites++;
         break;
       case VecPredRegClass:
-        cpuStats.vecPredRegfileWrites++;
+        executeStats[tid]->numVecPredRegWrites++;
         break;
       default:
         break;
diff --git a/src/cpu/o3/cpu.hh b/src/cpu/o3/cpu.hh
index 08a1312e73..07775298af 100644
--- a/src/cpu/o3/cpu.hh
+++ b/src/cpu/o3/cpu.hh
@@ -310,12 +310,12 @@ class CPU : public BaseCPU
      */
     void setMiscReg(int misc_reg, RegVal val, ThreadID tid);
 
-    RegVal getReg(PhysRegIdPtr phys_reg);
-    void getReg(PhysRegIdPtr phys_reg, void *val);
-    void *getWritableReg(PhysRegIdPtr phys_reg);
+    RegVal getReg(PhysRegIdPtr phys_reg, ThreadID tid);
+    void getReg(PhysRegIdPtr phys_reg, void *val, ThreadID tid);
+    void *getWritableReg(PhysRegIdPtr phys_reg, ThreadID tid);
 
-    void setReg(PhysRegIdPtr phys_reg, RegVal val);
-    void setReg(PhysRegIdPtr phys_reg, const void *val);
+    void setReg(PhysRegIdPtr phys_reg, RegVal val, ThreadID tid);
+    void setReg(PhysRegIdPtr phys_reg, const void *val, ThreadID tid);
 
     /** Architectural register accessors.  Looks up in the commit
      * rename table to obtain the true physical index of the
@@ -595,24 +595,6 @@ class CPU : public BaseCPU
         /** Stat for the total IPC. */
         statistics::Formula totalIpc;
 
-        //number of integer register file accesses
-        statistics::Scalar intRegfileReads;
-        statistics::Scalar intRegfileWrites;
-        //number of float register file accesses
-        statistics::Scalar fpRegfileReads;
-        statistics::Scalar fpRegfileWrites;
-        //number of vector register file accesses
-        mutable statistics::Scalar vecRegfileReads;
-        statistics::Scalar vecRegfileWrites;
-        //number of predicate register file accesses
-        mutable statistics::Scalar vecPredRegfileReads;
-        statistics::Scalar vecPredRegfileWrites;
-        //number of CC register file accesses
-        statistics::Scalar ccRegfileReads;
-        statistics::Scalar ccRegfileWrites;
-        //number of misc
-        statistics::Scalar miscRegfileReads;
-        statistics::Scalar miscRegfileWrites;
     } cpuStats;
 
   public:
diff --git a/src/cpu/o3/dyn_inst.hh b/src/cpu/o3/dyn_inst.hh
index 54c0385374..c759c5eb38 100644
--- a/src/cpu/o3/dyn_inst.hh
+++ b/src/cpu/o3/dyn_inst.hh
@@ -1086,10 +1086,10 @@ class DynInst : public ExecContext, public RefCounted
 
             if (bytes == sizeof(RegVal)) {
                 setRegOperand(staticInst.get(), idx,
-                        cpu->getReg(prev_phys_reg));
+                        cpu->getReg(prev_phys_reg, threadNumber));
             } else {
                 uint8_t val[original_dest_reg.regClass().regBytes()];
-                cpu->getReg(prev_phys_reg, val);
+                cpu->getReg(prev_phys_reg, val, threadNumber);
                 setRegOperand(staticInst.get(), idx, val);
             }
         }
@@ -1116,7 +1116,7 @@ class DynInst : public ExecContext, public RefCounted
         const PhysRegIdPtr reg = renamedSrcIdx(idx);
         if (reg->is(InvalidRegClass))
             return 0;
-        return cpu->getReg(reg);
+        return cpu->getReg(reg, threadNumber);
     }
 
     void
@@ -1125,13 +1125,13 @@ class DynInst : public ExecContext, public RefCounted
         const PhysRegIdPtr reg = renamedSrcIdx(idx);
         if (reg->is(InvalidRegClass))
             return;
-        cpu->getReg(reg, val);
+        cpu->getReg(reg, val, threadNumber);
     }
 
     void *
     getWritableRegOperand(const StaticInst *si, int idx) override
     {
-        return cpu->getWritableReg(renamedDestIdx(idx));
+        return cpu->getWritableReg(renamedDestIdx(idx), threadNumber);
     }
 
     /** @todo: Make results into arrays so they can handle multiple dest
@@ -1143,7 +1143,7 @@ class DynInst : public ExecContext, public RefCounted
         const PhysRegIdPtr reg = renamedDestIdx(idx);
         if (reg->is(InvalidRegClass))
             return;
-        cpu->setReg(reg, val);
+        cpu->setReg(reg, val, threadNumber);
         setResult(reg->regClass(), val);
     }
 
@@ -1153,7 +1153,7 @@ class DynInst : public ExecContext, public RefCounted
         const PhysRegIdPtr reg = renamedDestIdx(idx);
         if (reg->is(InvalidRegClass))
             return;
-        cpu->setReg(reg, val);
+        cpu->setReg(reg, val, threadNumber);
         setResult(reg->regClass(), val);
     }
 };
diff --git a/src/cpu/simple/base.cc b/src/cpu/simple/base.cc
index b2a11fd84b..c8d9aeeb86 100644
--- a/src/cpu/simple/base.cc
+++ b/src/cpu/simple/base.cc
@@ -388,7 +388,7 @@ BaseSimpleCPU::postExecute()
     Addr instAddr = threadContexts[curThread]->pcState().instAddr();
 
     if (curStaticInst->isMemRef()) {
-        t_info.execContextStats.numMemRefs++;
+        executeStats[t_info.thread->threadId()]->numMemRefs++;
     }
 
     if (curStaticInst->isLoad()) {
@@ -402,19 +402,19 @@ BaseSimpleCPU::postExecute()
     /* Power model statistics */
     //integer alu accesses
     if (curStaticInst->isInteger()){
-        t_info.execContextStats.numIntAluAccesses++;
+        executeStats[t_info.thread->threadId()]->numIntAluAccesses++;
         t_info.execContextStats.numIntInsts++;
     }
 
     //float alu accesses
     if (curStaticInst->isFloating()){
-        t_info.execContextStats.numFpAluAccesses++;
+        executeStats[t_info.thread->threadId()]->numFpAluAccesses++;
         t_info.execContextStats.numFpInsts++;
     }
 
     //vector alu accesses
     if (curStaticInst->isVector()){
-        t_info.execContextStats.numVecAluAccesses++;
+        executeStats[t_info.thread->threadId()]->numVecAluAccesses++;
         t_info.execContextStats.numVecInsts++;
     }
 
diff --git a/src/cpu/simple/exec_context.hh b/src/cpu/simple/exec_context.hh
index d4bb017481..00efd8593c 100644
--- a/src/cpu/simple/exec_context.hh
+++ b/src/cpu/simple/exec_context.hh
@@ -90,12 +90,6 @@ class SimpleExecContext : public ExecContext
                        "Number of instructions committed"),
               ADD_STAT(numOps, statistics::units::Count::get(),
                        "Number of ops (including micro ops) committed"),
-              ADD_STAT(numIntAluAccesses, statistics::units::Count::get(),
-                       "Number of integer alu accesses"),
-              ADD_STAT(numFpAluAccesses, statistics::units::Count::get(),
-                       "Number of float alu accesses"),
-              ADD_STAT(numVecAluAccesses, statistics::units::Count::get(),
-                       "Number of vector alu accesses"),
               ADD_STAT(numMatAluAccesses, statistics::units::Count::get(),
                        "Number of matrix alu accesses"),
               ADD_STAT(numCallsReturns, statistics::units::Count::get(),
@@ -110,32 +104,6 @@ class SimpleExecContext : public ExecContext
                        "Number of vector instructions"),
               ADD_STAT(numMatInsts, statistics::units::Count::get(),
                        "Number of matrix instructions"),
-              ADD_STAT(numIntRegReads, statistics::units::Count::get(),
-                       "Number of times the integer registers were read"),
-              ADD_STAT(numIntRegWrites, statistics::units::Count::get(),
-                       "Number of times the integer registers were written"),
-              ADD_STAT(numFpRegReads, statistics::units::Count::get(),
-                       "Number of times the floating registers were read"),
-              ADD_STAT(numFpRegWrites, statistics::units::Count::get(),
-                       "Number of times the floating registers were written"),
-              ADD_STAT(numVecRegReads, statistics::units::Count::get(),
-                       "Number of times the vector registers were read"),
-              ADD_STAT(numVecRegWrites, statistics::units::Count::get(),
-                       "Number of times the vector registers were written"),
-              ADD_STAT(numVecPredRegReads, statistics::units::Count::get(),
-                       "Number of times the predicate registers were read"),
-              ADD_STAT(numVecPredRegWrites, statistics::units::Count::get(),
-                       "Number of times the predicate registers were written"),
-              ADD_STAT(numCCRegReads, statistics::units::Count::get(),
-                       "Number of times the CC registers were read"),
-              ADD_STAT(numCCRegWrites, statistics::units::Count::get(),
-                       "Number of times the CC registers were written"),
-              ADD_STAT(numMiscRegReads, statistics::units::Count::get(),
-                       "Number of times the Misc registers were read"),
-              ADD_STAT(numMiscRegWrites, statistics::units::Count::get(),
-                       "Number of times the Misc registers were written"),
-              ADD_STAT(numMemRefs, statistics::units::Count::get(),
-                       "Number of memory refs"),
               ADD_STAT(numLoadInsts, statistics::units::Count::get(),
                        "Number of load instructions"),
               ADD_STAT(numStoreInsts, statistics::units::Count::get(),
@@ -148,10 +116,6 @@ class SimpleExecContext : public ExecContext
                        "Percentage of non-idle cycles"),
               ADD_STAT(idleFraction, statistics::units::Ratio::get(),
                        "Percentage of idle cycles"),
-              ADD_STAT(icacheStallCycles, statistics::units::Cycle::get(),
-                       "ICache total stall cycles"),
-              ADD_STAT(dcacheStallCycles, statistics::units::Cycle::get(),
-                       "DCache total stall cycles"),
               ADD_STAT(numPredictedBranches, statistics::units::Count::get(),
                        "Number of branches predicted as taken"),
               ADD_STAT(numBranchMispred, statistics::units::Count::get(),
@@ -159,36 +123,25 @@ class SimpleExecContext : public ExecContext
               ADD_STAT(statExecutedInstType, statistics::units::Count::get(),
                        "Class of executed instruction."),
               numRegReads{
-                  &numIntRegReads,
-                  &numFpRegReads,
-                  &numVecRegReads,
-                  &numVecRegReads,
-                  &numVecPredRegReads,
-                  &numMatRegReads,
-                  &numCCRegReads
+                  &(cpu->executeStats[thread->threadId()]->numIntRegReads),
+                  &(cpu->executeStats[thread->threadId()]->numFpRegReads),
+                  &(cpu->executeStats[thread->threadId()]->numVecRegReads),
+                  &(cpu->executeStats[thread->threadId()]->numVecRegReads),
+                  &(cpu->executeStats[thread->threadId()]->numVecPredRegReads),
+                  &(cpu->executeStats[thread->threadId()]->numCCRegReads),
+                  &numMatRegReads
               },
               numRegWrites{
-                  &numIntRegWrites,
-                  &numFpRegWrites,
-                  &numVecRegWrites,
-                  &numVecRegWrites,
-                  &numVecPredRegWrites,
-                  &numMatRegWrites,
-                  &numCCRegWrites
+                  &(cpu->executeStats[thread->threadId()]->numIntRegWrites),
+                  &(cpu->executeStats[thread->threadId()]->numFpRegWrites),
+                  &(cpu->executeStats[thread->threadId()]->numVecRegWrites),
+                  &(cpu->executeStats[thread->threadId()]->numVecRegWrites),
+                  &(cpu->executeStats[thread->threadId()]
+                        ->numVecPredRegWrites),
+                  &(cpu->executeStats[thread->threadId()]->numCCRegWrites),
+                  &numMatRegWrites
               }
         {
-            numCCRegReads
-                .flags(statistics::nozero);
-
-            numCCRegWrites
-                .flags(statistics::nozero);
-
-            icacheStallCycles
-                .prereq(icacheStallCycles);
-
-            dcacheStallCycles
-                .prereq(dcacheStallCycles);
-
             statExecutedInstType
                 .init(enums::Num_OpClass)
                 .flags(statistics::total | statistics::pdf | statistics::dist);
@@ -212,15 +165,6 @@ class SimpleExecContext : public ExecContext
         statistics::Scalar numInsts;
         statistics::Scalar numOps;
 
-        // Number of integer alu accesses
-        statistics::Scalar numIntAluAccesses;
-
-        // Number of float alu accesses
-        statistics::Scalar numFpAluAccesses;
-
-        // Number of vector alu accesses
-        statistics::Scalar numVecAluAccesses;
-
         // Number of matrix alu accesses
         statistics::Scalar numMatAluAccesses;
 
@@ -242,36 +186,11 @@ class SimpleExecContext : public ExecContext
         // Number of matrix instructions
         statistics::Scalar numMatInsts;
 
-        // Number of integer register file accesses
-        statistics::Scalar numIntRegReads;
-        statistics::Scalar numIntRegWrites;
-
-        // Number of float register file accesses
-        statistics::Scalar numFpRegReads;
-        statistics::Scalar numFpRegWrites;
-
-        // Number of vector register file accesses
-        mutable statistics::Scalar numVecRegReads;
-        statistics::Scalar numVecRegWrites;
-
-        // Number of predicate register file accesses
-        mutable statistics::Scalar numVecPredRegReads;
-        statistics::Scalar numVecPredRegWrites;
-
         // Number of matrix register file accesses
         mutable statistics::Scalar numMatRegReads;
         statistics::Scalar numMatRegWrites;
 
-        // Number of condition code register file accesses
-        statistics::Scalar numCCRegReads;
-        statistics::Scalar numCCRegWrites;
-
-        // Number of misc register file accesses
-        statistics::Scalar numMiscRegReads;
-        statistics::Scalar numMiscRegWrites;
-
         // Number of simulated memory references
-        statistics::Scalar numMemRefs;
         statistics::Scalar numLoadInsts;
         statistics::Scalar numStoreInsts;
 
@@ -285,12 +204,6 @@ class SimpleExecContext : public ExecContext
         statistics::Average notIdleFraction;
         statistics::Formula idleFraction;
 
-        // Number of cycles stalled for I-cache responses
-        statistics::Scalar icacheStallCycles;
-
-        // Number of cycles stalled for D-cache responses
-        statistics::Scalar dcacheStallCycles;
-
         /// @{
         /// Number of branches predicted as taken
         statistics::Scalar numPredictedBranches;
@@ -361,7 +274,7 @@ class SimpleExecContext : public ExecContext
     RegVal
     readMiscRegOperand(const StaticInst *si, int idx) override
     {
-        execContextStats.numMiscRegReads++;
+        cpu->executeStats[thread->threadId()]->numMiscRegReads++;
         const RegId& reg = si->srcRegIdx(idx);
         assert(reg.is(MiscRegClass));
         return thread->readMiscReg(reg.index());
@@ -370,7 +283,7 @@ class SimpleExecContext : public ExecContext
     void
     setMiscRegOperand(const StaticInst *si, int idx, RegVal val) override
     {
-        execContextStats.numMiscRegWrites++;
+        cpu->executeStats[thread->threadId()]->numMiscRegWrites++;
         const RegId& reg = si->destRegIdx(idx);
         assert(reg.is(MiscRegClass));
         thread->setMiscReg(reg.index(), val);
@@ -383,7 +296,7 @@ class SimpleExecContext : public ExecContext
     RegVal
     readMiscReg(int misc_reg) override
     {
-        execContextStats.numMiscRegReads++;
+        cpu->executeStats[thread->threadId()]->numMiscRegReads++;
         return thread->readMiscReg(misc_reg);
     }
 
@@ -394,7 +307,7 @@ class SimpleExecContext : public ExecContext
     void
     setMiscReg(int misc_reg, RegVal val) override
     {
-        execContextStats.numMiscRegWrites++;
+        cpu->executeStats[thread->threadId()]->numMiscRegWrites++;
         thread->setMiscReg(misc_reg, val);
     }
 

From e85cf4f717ddd764a7c84000427ae56bac084855 Mon Sep 17 00:00:00 2001
From: Melissa Jost <melissakjost@gmail.com>
Date: Thu, 19 Jan 2023 00:52:58 -0800
Subject: [PATCH 239/492] cpu: Move commit stats from simple to base cpu

Created stat group CommitCPUStats in BaseCPU and moved stats from the
simple cpu model.

The stats moved from SImpleCPU are numCondCtrlInsts, numFpInsts,
numIntInsts, numLoadInsts, numStoreInsts, numVecInsts.

Moved committedControl of MinorCPU to BaseCPU::CommittedCPUStats. In
MinorCPU, this stat was a 2D vector, where the first dimension is the
thread ID. In base it is now  a 1D vector that is tied to a thread ID
via the commitStats vector.

The committedControl stat vector in CommitCPUStats is updated in the
same way in all CPU models. The function updateComCtrlStats will
update committedControl and the CPU models will call this function
instead of updating committedControl directly. This function takes
a StaticInstPtr as input, which Simple, Minor, and O3 CPU models are
able to provide.

Removed stat "branches" from O3 commit stage. This stat duplicates
BaseCPU::CommittedCPUStats::committedControl::IsControl.

O3 commit stats floating, integer, loads, memRefs, vectorInstructions
are replaced by numFpInsts, numIntInsts, numLoadInsts, numMemRefs,
numVecInsts from BaseCPU::CommitCPUStats respectively. Implemented
numStoreInsts from BaseCPU::commitCPUStats for O3 commit stage.

Change-Id: I362cec51513a404de56a02b450d7663327be20f5
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67391
Tested-by: kokoro <noreply+kokoro@google.com>
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
---
 src/cpu/base.cc                | 70 ++++++++++++++++++++++++++++++++++
 src/cpu/base.hh                | 32 ++++++++++++++++
 src/cpu/minor/execute.cc       | 37 +-----------------
 src/cpu/minor/stats.cc         | 15 +-------
 src/cpu/minor/stats.hh         |  6 ---
 src/cpu/o3/commit.cc           | 52 ++++---------------------
 src/cpu/o3/commit.hh           | 12 ------
 src/cpu/simple/base.cc         | 19 ++++-----
 src/cpu/simple/exec_context.hh | 40 -------------------
 9 files changed, 121 insertions(+), 162 deletions(-)

diff --git a/src/cpu/base.cc b/src/cpu/base.cc
index b10c731e17..8121307d50 100644
--- a/src/cpu/base.cc
+++ b/src/cpu/base.cc
@@ -194,9 +194,11 @@ BaseCPU::BaseCPU(const Params &p, bool is_checker)
     // create a stat group object for each thread on this core
     fetchStats.reserve(numThreads);
     executeStats.reserve(numThreads);
+    commitStats.reserve(numThreads);
     for (int i = 0; i < numThreads; i++) {
         fetchStats.emplace_back(new FetchCPUStats(this, i));
         executeStats.emplace_back(new ExecuteCPUStats(this, i));
+        commitStats.emplace_back(new CommitCPUStats(this, i));
     }
 }
 
@@ -922,4 +924,72 @@ ExecuteCPUStats::ExecuteCPUStats(statistics::Group *parent, int thread_id)
                 .prereq(numVecRegWrites);
 }
 
+BaseCPU::
+CommitCPUStats::CommitCPUStats(statistics::Group *parent, int thread_id)
+    : statistics::Group(parent, csprintf("commitStats%i", thread_id).c_str()),
+    ADD_STAT(numMemRefs, statistics::units::Count::get(),
+            "Number of memory references committed"),
+    ADD_STAT(numFpInsts, statistics::units::Count::get(),
+            "Number of float instructions"),
+    ADD_STAT(numIntInsts, statistics::units::Count::get(),
+            "Number of integer instructions"),
+    ADD_STAT(numLoadInsts, statistics::units::Count::get(),
+            "Number of load instructions"),
+    ADD_STAT(numStoreInsts, statistics::units::Count::get(),
+            "Number of store instructions"),
+    ADD_STAT(numVecInsts, statistics::units::Count::get(),
+            "Number of vector instructions"),
+    ADD_STAT(committedInstType, statistics::units::Count::get(),
+            "Class of committed instruction."),
+    ADD_STAT(committedControl, statistics::units::Count::get(),
+             "Class of control type instructions committed")
+{
+    committedInstType
+        .init(enums::Num_OpClass)
+        .flags(statistics::total | statistics::pdf | statistics::dist);
+
+    for (unsigned i = 0; i < Num_OpClasses; ++i) {
+        committedInstType.subname(i, enums::OpClassStrings[i]);
+    }
+
+    committedControl
+        .init(StaticInstFlags::Flags::Num_Flags)
+        .flags(statistics::nozero);
+
+    for (unsigned i = 0; i < StaticInstFlags::Flags::Num_Flags; i++) {
+        committedControl.subname(i, StaticInstFlags::FlagsStrings[i]);
+    }
+}
+
+
+void
+BaseCPU::
+CommitCPUStats::updateComCtrlStats(const StaticInstPtr staticInst)
+{
+    /* Add a count for every control instruction type */
+    if (staticInst->isControl()) {
+        if (staticInst->isReturn()) {
+            committedControl[gem5::StaticInstFlags::Flags::IsReturn]++;
+        }
+        if (staticInst->isCall()) {
+            committedControl[gem5::StaticInstFlags::Flags::IsCall]++;
+        }
+        if (staticInst->isDirectCtrl()) {
+            committedControl[gem5::StaticInstFlags::Flags::IsDirectControl]++;
+        }
+        if (staticInst->isIndirectCtrl()) {
+            committedControl
+                [gem5::StaticInstFlags::Flags::IsIndirectControl]++;
+        }
+        if (staticInst->isCondCtrl()) {
+            committedControl[gem5::StaticInstFlags::Flags::IsCondControl]++;
+        }
+        if (staticInst->isUncondCtrl()) {
+            committedControl[gem5::StaticInstFlags::Flags::IsUncondControl]++;
+        }
+        committedControl[gem5::StaticInstFlags::Flags::IsControl]++;
+    }
+
+}
+
 } // namespace gem5
diff --git a/src/cpu/base.hh b/src/cpu/base.hh
index ad6fa469a3..5b2e97f8b0 100644
--- a/src/cpu/base.hh
+++ b/src/cpu/base.hh
@@ -738,8 +738,40 @@ class BaseCPU : public ClockedObject
         statistics::Scalar numDiscardedOps;
     };
 
+    struct CommitCPUStats: public statistics::Group
+    {
+        CommitCPUStats(statistics::Group *parent, int thread_id);
+
+        /* Number of committed memory references. */
+        statistics::Scalar numMemRefs;
+
+        /* Number of float instructions */
+        statistics::Scalar numFpInsts;
+
+        /* Number of int instructions */
+        statistics::Scalar numIntInsts;
+
+        /* number of load instructions */
+        statistics::Scalar numLoadInsts;
+
+        /* Number of store instructions */
+        statistics::Scalar numStoreInsts;
+
+        /* Number of vector instructions */
+        statistics::Scalar numVecInsts;
+
+        /* Number of instructions committed by type (OpClass) */
+        statistics::Vector committedInstType;
+
+        /* number of control instructions committed by control inst type */
+        statistics::Vector committedControl;
+        void updateComCtrlStats(const StaticInstPtr staticInst);
+
+    };
+
     std::vector<std::unique_ptr<FetchCPUStats>> fetchStats;
     std::vector<std::unique_ptr<ExecuteCPUStats>> executeStats;
+    std::vector<std::unique_ptr<CommitCPUStats>> commitStats;
 };
 
 } // namespace gem5
diff --git a/src/cpu/minor/execute.cc b/src/cpu/minor/execute.cc
index d657de5225..5c0354bb8a 100644
--- a/src/cpu/minor/execute.cc
+++ b/src/cpu/minor/execute.cc
@@ -879,41 +879,8 @@ Execute::doInstCommitAccounting(MinorDynInstPtr inst)
     thread->numOp++;
     thread->threadStats.numOps++;
     cpu.stats.numOps++;
-    cpu.stats.committedInstType[inst->id.threadId]
-                               [inst->staticInst->opClass()]++;
-
-    /** Add a count for every control instruction */
-    if (inst->staticInst->isControl()) {
-        if (inst->staticInst->isReturn()) {
-            cpu.stats.committedControl[inst->id.threadId]
-                        [gem5::StaticInstFlags::Flags::IsReturn]++;
-        }
-        if (inst->staticInst->isCall()) {
-            cpu.stats.committedControl[inst->id.threadId]
-                        [gem5::StaticInstFlags::Flags::IsCall]++;
-        }
-        if (inst->staticInst->isDirectCtrl()) {
-            cpu.stats.committedControl[inst->id.threadId]
-                        [gem5::StaticInstFlags::Flags::IsDirectControl]++;
-        }
-        if (inst->staticInst->isIndirectCtrl()) {
-            cpu.stats.committedControl[inst->id.threadId]
-                        [gem5::StaticInstFlags::Flags::IsIndirectControl]++;
-        }
-        if (inst->staticInst->isCondCtrl()) {
-            cpu.stats.committedControl[inst->id.threadId]
-                        [gem5::StaticInstFlags::Flags::IsCondControl]++;
-        }
-        if (inst->staticInst->isUncondCtrl()) {
-            cpu.stats.committedControl[inst->id.threadId]
-                        [gem5::StaticInstFlags::Flags::IsUncondControl]++;
-
-        }
-        cpu.stats.committedControl[inst->id.threadId]
-                        [gem5::StaticInstFlags::Flags::IsControl]++;
-    }
-
-
+    cpu.commitStats[inst->id.threadId]
+        ->committedInstType[inst->staticInst->opClass()]++;
 
     /* Set the CP SeqNum to the numOps commit number */
     if (inst->traceData)
diff --git a/src/cpu/minor/stats.cc b/src/cpu/minor/stats.cc
index 10e7573afd..b20ce95ec8 100644
--- a/src/cpu/minor/stats.cc
+++ b/src/cpu/minor/stats.cc
@@ -57,11 +57,7 @@ MinorStats::MinorStats(BaseCPU *base_cpu)
              "CPI: cycles per instruction"),
     ADD_STAT(ipc, statistics::units::Rate<
                 statistics::units::Count, statistics::units::Cycle>::get(),
-             "IPC: instructions per cycle"),
-    ADD_STAT(committedInstType, statistics::units::Count::get(),
-             "Class of committed instruction"),
-    ADD_STAT(committedControl, statistics::units::Count::get(),
-             "Class of control type instructions committed")
+             "IPC: instructions per cycle")
 
 {
     quiesceCycles.prereq(quiesceCycles);
@@ -72,15 +68,6 @@ MinorStats::MinorStats(BaseCPU *base_cpu)
     ipc.precision(6);
     ipc = numInsts / base_cpu->baseStats.numCycles;
 
-    committedInstType
-        .init(base_cpu->numThreads, enums::Num_OpClass)
-        .flags(statistics::total | statistics::pdf | statistics::dist);
-    committedInstType.ysubnames(enums::OpClassStrings);
-
-    committedControl
-        .init(base_cpu->numThreads, StaticInstFlags::Flags::Num_Flags)
-        .flags(statistics::nozero);
-    committedControl.ysubnames(StaticInstFlags::FlagsStrings);
 }
 
 } // namespace minor
diff --git a/src/cpu/minor/stats.hh b/src/cpu/minor/stats.hh
index e5d018679d..f7d5e71dfa 100644
--- a/src/cpu/minor/stats.hh
+++ b/src/cpu/minor/stats.hh
@@ -72,12 +72,6 @@ struct MinorStats : public statistics::Group
     statistics::Formula cpi;
     statistics::Formula ipc;
 
-    /** Number of instructions by type (OpClass) */
-    statistics::Vector2d committedInstType;
-
-    /** Number of branches commited */
-    statistics::Vector2d committedControl;
-
 };
 
 } // namespace minor
diff --git a/src/cpu/o3/commit.cc b/src/cpu/o3/commit.cc
index 38dce831b1..7419b2a2f9 100644
--- a/src/cpu/o3/commit.cc
+++ b/src/cpu/o3/commit.cc
@@ -160,21 +160,10 @@ Commit::CommitStats::CommitStats(CPU *cpu, Commit *commit)
                "Number of instructions committed"),
       ADD_STAT(opsCommitted, statistics::units::Count::get(),
                "Number of ops (including micro ops) committed"),
-      ADD_STAT(memRefs, statistics::units::Count::get(),
-               "Number of memory references committed"),
-      ADD_STAT(loads, statistics::units::Count::get(), "Number of loads committed"),
       ADD_STAT(amos, statistics::units::Count::get(),
                "Number of atomic instructions committed"),
       ADD_STAT(membars, statistics::units::Count::get(),
                "Number of memory barriers committed"),
-      ADD_STAT(branches, statistics::units::Count::get(),
-               "Number of branches committed"),
-      ADD_STAT(vectorInstructions, statistics::units::Count::get(),
-               "Number of committed Vector instructions."),
-      ADD_STAT(floating, statistics::units::Count::get(),
-               "Number of committed floating point instructions."),
-      ADD_STAT(integer, statistics::units::Count::get(),
-               "Number of committed integer instructions."),
       ADD_STAT(functionCalls, statistics::units::Count::get(),
                "Number of function calls committed."),
       ADD_STAT(committedInstType, statistics::units::Count::get(),
@@ -200,14 +189,6 @@ Commit::CommitStats::CommitStats(CPU *cpu, Commit *commit)
         .init(cpu->numThreads)
         .flags(total);
 
-    memRefs
-        .init(cpu->numThreads)
-        .flags(total);
-
-    loads
-        .init(cpu->numThreads)
-        .flags(total);
-
     amos
         .init(cpu->numThreads)
         .flags(total);
@@ -216,22 +197,6 @@ Commit::CommitStats::CommitStats(CPU *cpu, Commit *commit)
         .init(cpu->numThreads)
         .flags(total);
 
-    branches
-        .init(cpu->numThreads)
-        .flags(total);
-
-    vectorInstructions
-        .init(cpu->numThreads)
-        .flags(total);
-
-    floating
-        .init(cpu->numThreads)
-        .flags(total);
-
-    integer
-        .init(cpu->numThreads)
-        .flags(total);
-
     functionCalls
         .init(commit->numThreads)
         .flags(total);
@@ -1396,21 +1361,20 @@ Commit::updateComInstStats(const DynInstPtr &inst)
     //
     //  Control Instructions
     //
-    if (inst->isControl())
-        stats.branches[tid]++;
+    cpu->commitStats[tid]->updateComCtrlStats(inst->staticInst);
 
     //
     //  Memory references
     //
     if (inst->isMemRef()) {
-        stats.memRefs[tid]++;
+        cpu->commitStats[tid]->numMemRefs++;
 
         if (inst->isLoad()) {
-            stats.loads[tid]++;
+            cpu->commitStats[tid]->numLoadInsts++;
         }
 
-        if (inst->isAtomic()) {
-            stats.amos[tid]++;
+        if (inst->isStore()) {
+            cpu->commitStats[tid]->numStoreInsts++;
         }
     }
 
@@ -1420,14 +1384,14 @@ Commit::updateComInstStats(const DynInstPtr &inst)
 
     // Integer Instruction
     if (inst->isInteger())
-        stats.integer[tid]++;
+        cpu->commitStats[tid]->numIntInsts++;
 
     // Floating Point Instruction
     if (inst->isFloating())
-        stats.floating[tid]++;
+        cpu->commitStats[tid]->numFpInsts++;
     // Vector Instruction
     if (inst->isVector())
-        stats.vectorInstructions[tid]++;
+        cpu->commitStats[tid]->numVecInsts++;
 
     // Function Calls
     if (inst->isCall())
diff --git a/src/cpu/o3/commit.hh b/src/cpu/o3/commit.hh
index cf4eaf5d92..6591360197 100644
--- a/src/cpu/o3/commit.hh
+++ b/src/cpu/o3/commit.hh
@@ -483,22 +483,10 @@ class Commit
         statistics::Vector instsCommitted;
         /** Total number of ops (including micro ops) committed. */
         statistics::Vector opsCommitted;
-        /** Stat for the total number of committed memory references. */
-        statistics::Vector memRefs;
-        /** Stat for the total number of committed loads. */
-        statistics::Vector loads;
         /** Stat for the total number of committed atomics. */
         statistics::Vector amos;
         /** Total number of committed memory barriers. */
         statistics::Vector membars;
-        /** Total number of committed branches. */
-        statistics::Vector branches;
-        /** Total number of vector instructions */
-        statistics::Vector vectorInstructions;
-        /** Total number of floating point instructions */
-        statistics::Vector floating;
-        /** Total number of integer instructions */
-        statistics::Vector integer;
         /** Total number of function calls */
         statistics::Vector functionCalls;
         /** Committed instructions by instruction type (OpClass) */
diff --git a/src/cpu/simple/base.cc b/src/cpu/simple/base.cc
index c8d9aeeb86..70da65953b 100644
--- a/src/cpu/simple/base.cc
+++ b/src/cpu/simple/base.cc
@@ -403,19 +403,19 @@ BaseSimpleCPU::postExecute()
     //integer alu accesses
     if (curStaticInst->isInteger()){
         executeStats[t_info.thread->threadId()]->numIntAluAccesses++;
-        t_info.execContextStats.numIntInsts++;
+        commitStats[t_info.thread->threadId()]->numIntInsts++;
     }
 
     //float alu accesses
     if (curStaticInst->isFloating()){
         executeStats[t_info.thread->threadId()]->numFpAluAccesses++;
-        t_info.execContextStats.numFpInsts++;
+        commitStats[t_info.thread->threadId()]->numFpInsts++;
     }
 
     //vector alu accesses
     if (curStaticInst->isVector()){
         executeStats[t_info.thread->threadId()]->numVecAluAccesses++;
-        t_info.execContextStats.numVecInsts++;
+        commitStats[t_info.thread->threadId()]->numVecInsts++;
     }
 
     //Matrix alu accesses
@@ -429,22 +429,19 @@ BaseSimpleCPU::postExecute()
         t_info.execContextStats.numCallsReturns++;
     }
 
-    //the number of branch predictions that will be made
-    if (curStaticInst->isCondCtrl()){
-        t_info.execContextStats.numCondCtrlInsts++;
-    }
-
     //result bus acceses
     if (curStaticInst->isLoad()){
-        t_info.execContextStats.numLoadInsts++;
+        commitStats[t_info.thread->threadId()]->numLoadInsts++;
     }
 
     if (curStaticInst->isStore() || curStaticInst->isAtomic()){
-        t_info.execContextStats.numStoreInsts++;
+        commitStats[t_info.thread->threadId()]->numStoreInsts++;
     }
     /* End power model statistics */
 
-    t_info.execContextStats.statExecutedInstType[curStaticInst->opClass()]++;
+    commitStats[t_info.thread->threadId()]
+        ->committedInstType[curStaticInst->opClass()]++;
+    commitStats[t_info.thread->threadId()]->updateComCtrlStats(curStaticInst);
 
     if (FullSystem)
         traceFunctions(instAddr);
diff --git a/src/cpu/simple/exec_context.hh b/src/cpu/simple/exec_context.hh
index 00efd8593c..42d6181cf2 100644
--- a/src/cpu/simple/exec_context.hh
+++ b/src/cpu/simple/exec_context.hh
@@ -94,20 +94,8 @@ class SimpleExecContext : public ExecContext
                        "Number of matrix alu accesses"),
               ADD_STAT(numCallsReturns, statistics::units::Count::get(),
                        "Number of times a function call or return occured"),
-              ADD_STAT(numCondCtrlInsts, statistics::units::Count::get(),
-                       "Number of instructions that are conditional controls"),
-              ADD_STAT(numIntInsts, statistics::units::Count::get(),
-                       "Number of integer instructions"),
-              ADD_STAT(numFpInsts, statistics::units::Count::get(),
-                       "Number of float instructions"),
-              ADD_STAT(numVecInsts, statistics::units::Count::get(),
-                       "Number of vector instructions"),
               ADD_STAT(numMatInsts, statistics::units::Count::get(),
                        "Number of matrix instructions"),
-              ADD_STAT(numLoadInsts, statistics::units::Count::get(),
-                       "Number of load instructions"),
-              ADD_STAT(numStoreInsts, statistics::units::Count::get(),
-                       "Number of store instructions"),
               ADD_STAT(numIdleCycles, statistics::units::Cycle::get(),
                        "Number of idle cycles"),
               ADD_STAT(numBusyCycles, statistics::units::Cycle::get(),
@@ -120,8 +108,6 @@ class SimpleExecContext : public ExecContext
                        "Number of branches predicted as taken"),
               ADD_STAT(numBranchMispred, statistics::units::Count::get(),
                        "Number of branch mispredictions"),
-              ADD_STAT(statExecutedInstType, statistics::units::Count::get(),
-                       "Class of executed instruction."),
               numRegReads{
                   &(cpu->executeStats[thread->threadId()]->numIntRegReads),
                   &(cpu->executeStats[thread->threadId()]->numFpRegReads),
@@ -142,13 +128,6 @@ class SimpleExecContext : public ExecContext
                   &numMatRegWrites
               }
         {
-            statExecutedInstType
-                .init(enums::Num_OpClass)
-                .flags(statistics::total | statistics::pdf | statistics::dist);
-
-            for (unsigned i = 0; i < Num_OpClasses; ++i) {
-                statExecutedInstType.subname(i, enums::OpClassStrings[i]);
-            }
 
             idleFraction = statistics::constant(1.0) - notIdleFraction;
             numIdleCycles = idleFraction * cpu->baseStats.numCycles;
@@ -171,18 +150,6 @@ class SimpleExecContext : public ExecContext
         // Number of function calls/returns
         statistics::Scalar numCallsReturns;
 
-        // Conditional control instructions;
-        statistics::Scalar numCondCtrlInsts;
-
-        // Number of int instructions
-        statistics::Scalar numIntInsts;
-
-        // Number of float instructions
-        statistics::Scalar numFpInsts;
-
-        // Number of vector instructions
-        statistics::Scalar numVecInsts;
-
         // Number of matrix instructions
         statistics::Scalar numMatInsts;
 
@@ -190,10 +157,6 @@ class SimpleExecContext : public ExecContext
         mutable statistics::Scalar numMatRegReads;
         statistics::Scalar numMatRegWrites;
 
-        // Number of simulated memory references
-        statistics::Scalar numLoadInsts;
-        statistics::Scalar numStoreInsts;
-
         // Number of idle cycles
         statistics::Formula numIdleCycles;
 
@@ -211,9 +174,6 @@ class SimpleExecContext : public ExecContext
         statistics::Scalar numBranchMispred;
         /// @}
 
-        // Instruction mix histogram by OpClass
-        statistics::Vector statExecutedInstType;
-
         std::array<statistics::Scalar *, CCRegClass + 1> numRegReads;
         std::array<statistics::Scalar *, CCRegClass + 1> numRegWrites;
 

From c7b6e7809933d0d4d63506ef58f87d7265e0fb51 Mon Sep 17 00:00:00 2001
From: Melissa Jost <melissakjost@gmail.com>
Date: Thu, 19 Jan 2023 01:40:36 -0800
Subject: [PATCH 240/492] cpu: Move numInsts, numOps, ipc, cpi to BaseCPU

In BaseCPU::BaseCPUStats, numInsts and numOps track per CPU core
committed instructions and operations.

In BaseCPU::FetchCPUStats, numInsts and numOps track per thread
fetched instructions and operations.

In BaseCPU::CommitCPUStats, numInsts and numOps track per thread
committed instructions and operations.

In BaseSimpleCPU, the countInst() function has been split into
countInst(), countFetchInst(), and countCommitInst(). The stat count
incrementation of countInst() has been removed and delegated to the
other two functions. countFetchInst() increments numInsts and numOps
of the FetchCPUStats group for a thread. countCommitInst() increments
the numInsts and numOps of the CommitCPUStats group for a thread and
of the BaseCPUStats group for a CPU core. These functions are called
in the appropriate stage within timing.cc and atomic.cc. The call to
countInst() is left unchanged. countFetchInst() is called in
preExecute(). countCommitInst() is called in postExecute().

For MinorCPU, only the commit level numInsts and numOps stats have been
implemented.

IPC and CPI stats have been added to BaseCPUStats (core level) and
CommitCPUStats (thread level). The formulas for the IPC and CPI stats
in CommitCPUStats are set in the BaseCPU constructor, after the
CommitCPUStats stat group object has been created.

Change-Id: If893b331fe4a6908e4b4caf4a30f1b0aeb4c4266
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67392
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
---
 src/cpu/base.cc                | 38 +++++++++++++++++++++++++++++++++-
 src/cpu/base.hh                | 20 ++++++++++++++++++
 src/cpu/minor/execute.cc       |  6 ++++--
 src/cpu/minor/stats.cc         | 18 +---------------
 src/cpu/minor/stats.hh         | 10 ---------
 src/cpu/simple/base.cc         | 38 ++++++++++++++++++++++++++++++++--
 src/cpu/simple/base.hh         |  2 ++
 src/cpu/simple/exec_context.hh |  8 -------
 8 files changed, 100 insertions(+), 40 deletions(-)

diff --git a/src/cpu/base.cc b/src/cpu/base.cc
index 8121307d50..67f8e7bfc0 100644
--- a/src/cpu/base.cc
+++ b/src/cpu/base.cc
@@ -198,7 +198,11 @@ BaseCPU::BaseCPU(const Params &p, bool is_checker)
     for (int i = 0; i < numThreads; i++) {
         fetchStats.emplace_back(new FetchCPUStats(this, i));
         executeStats.emplace_back(new ExecuteCPUStats(this, i));
-        commitStats.emplace_back(new CommitCPUStats(this, i));
+        // create commitStat object for thread i and set ipc, cpi formulas
+        CommitCPUStats* commitStatptr = new CommitCPUStats(this, i);
+        commitStatptr->ipc = commitStatptr->numInsts / baseStats.numCycles;
+        commitStatptr->cpi = baseStats.numCycles / commitStatptr->numInsts;
+        commitStats.emplace_back(commitStatptr);
     }
 }
 
@@ -392,13 +396,28 @@ BaseCPU::probeInstCommit(const StaticInstPtr &inst, Addr pc)
 BaseCPU::
 BaseCPUStats::BaseCPUStats(statistics::Group *parent)
     : statistics::Group(parent),
+      ADD_STAT(numInsts, statistics::units::Count::get(),
+               "Number of instructions committed (core level)"),
+      ADD_STAT(numOps, statistics::units::Count::get(),
+               "Number of ops (including micro ops) committed (core level)"),
       ADD_STAT(numCycles, statistics::units::Cycle::get(),
                "Number of cpu cycles simulated"),
+      ADD_STAT(cpi, statistics::units::Rate<
+                statistics::units::Cycle, statistics::units::Count>::get(),
+               "CPI: cycles per instruction (core level)"),
+      ADD_STAT(ipc, statistics::units::Rate<
+                statistics::units::Count, statistics::units::Cycle>::get(),
+               "IPC: instructions per cycle (core level)"),
       ADD_STAT(numWorkItemsStarted, statistics::units::Count::get(),
                "Number of work items this cpu started"),
       ADD_STAT(numWorkItemsCompleted, statistics::units::Count::get(),
                "Number of work items this cpu completed")
 {
+    cpi.precision(6);
+    cpi = numCycles / numInsts;
+
+    ipc.precision(6);
+    ipc = numInsts / numCycles;
 }
 
 void
@@ -839,6 +858,10 @@ BaseCPU::GlobalStats::GlobalStats(statistics::Group *parent)
 BaseCPU::
 FetchCPUStats::FetchCPUStats(statistics::Group *parent, int thread_id)
     : statistics::Group(parent, csprintf("fetchStats%i", thread_id).c_str()),
+    ADD_STAT(numInsts, statistics::units::Count::get(),
+             "Number of instructions fetched (thread level)"),
+    ADD_STAT(numOps, statistics::units::Count::get(),
+             "Number of ops (including micro ops) fetched (thread level)"),
     ADD_STAT(numBranches, statistics::units::Count::get(),
              "Number of branches fetched"),
     ADD_STAT(numFetchSuspends, statistics::units::Count::get(),
@@ -927,6 +950,16 @@ ExecuteCPUStats::ExecuteCPUStats(statistics::Group *parent, int thread_id)
 BaseCPU::
 CommitCPUStats::CommitCPUStats(statistics::Group *parent, int thread_id)
     : statistics::Group(parent, csprintf("commitStats%i", thread_id).c_str()),
+    ADD_STAT(numInsts, statistics::units::Count::get(),
+             "Number of instructions committed (thread level)"),
+    ADD_STAT(numOps, statistics::units::Count::get(),
+             "Number of ops (including micro ops) committed (thread level)"),
+    ADD_STAT(cpi, statistics::units::Rate<
+                statistics::units::Cycle, statistics::units::Count>::get(),
+             "CPI: cycles per instruction (thread level)"),
+    ADD_STAT(ipc, statistics::units::Rate<
+                statistics::units::Count, statistics::units::Cycle>::get(),
+             "IPC: instructions per cycle (thread level)"),
     ADD_STAT(numMemRefs, statistics::units::Count::get(),
             "Number of memory references committed"),
     ADD_STAT(numFpInsts, statistics::units::Count::get(),
@@ -944,6 +977,9 @@ CommitCPUStats::CommitCPUStats(statistics::Group *parent, int thread_id)
     ADD_STAT(committedControl, statistics::units::Count::get(),
              "Class of control type instructions committed")
 {
+    cpi.precision(6);
+    ipc.precision(6);
+
     committedInstType
         .init(enums::Num_OpClass)
         .flags(statistics::total | statistics::pdf | statistics::dist);
diff --git a/src/cpu/base.hh b/src/cpu/base.hh
index 5b2e97f8b0..06fc2a391d 100644
--- a/src/cpu/base.hh
+++ b/src/cpu/base.hh
@@ -633,8 +633,14 @@ class BaseCPU : public ClockedObject
     struct BaseCPUStats : public statistics::Group
     {
         BaseCPUStats(statistics::Group *parent);
+        // Number of CPU insts and ops committed at CPU core level
+        statistics::Scalar numInsts;
+        statistics::Scalar numOps;
         // Number of CPU cycles simulated
         statistics::Scalar numCycles;
+        /* CPI/IPC for total cycle counts and macro insts */
+        statistics::Formula cpi;
+        statistics::Formula ipc;
         statistics::Scalar numWorkItemsStarted;
         statistics::Scalar numWorkItemsCompleted;
     } baseStats;
@@ -683,6 +689,12 @@ class BaseCPU : public ClockedObject
     {
         FetchCPUStats(statistics::Group *parent, int thread_id);
 
+        /* Total number of instructions fetched */
+        statistics::Scalar numInsts;
+
+        /* Total number of operations fetched */
+        statistics::Scalar numOps;
+
         /* Total number of branches fetched */
         statistics::Scalar numBranches;
 
@@ -742,6 +754,14 @@ class BaseCPU : public ClockedObject
     {
         CommitCPUStats(statistics::Group *parent, int thread_id);
 
+        /* Number of simulated instructions committed */
+        statistics::Scalar numInsts;
+        statistics::Scalar numOps;
+
+        /* CPI/IPC for total cycle counts and macro insts */
+        statistics::Formula cpi;
+        statistics::Formula ipc;
+
         /* Number of committed memory references. */
         statistics::Scalar numMemRefs;
 
diff --git a/src/cpu/minor/execute.cc b/src/cpu/minor/execute.cc
index 5c0354bb8a..2908c2266f 100644
--- a/src/cpu/minor/execute.cc
+++ b/src/cpu/minor/execute.cc
@@ -871,14 +871,16 @@ Execute::doInstCommitAccounting(MinorDynInstPtr inst)
     {
         thread->numInst++;
         thread->threadStats.numInsts++;
-        cpu.stats.numInsts++;
+        cpu.commitStats[inst->id.threadId]->numInsts++;
+        cpu.baseStats.numInsts++;
 
         /* Act on events related to instruction counts */
         thread->comInstEventQueue.serviceEvents(thread->numInst);
     }
     thread->numOp++;
     thread->threadStats.numOps++;
-    cpu.stats.numOps++;
+    cpu.commitStats[inst->id.threadId]->numOps++;
+    cpu.baseStats.numOps++;
     cpu.commitStats[inst->id.threadId]
         ->committedInstType[inst->staticInst->opClass()]++;
 
diff --git a/src/cpu/minor/stats.cc b/src/cpu/minor/stats.cc
index b20ce95ec8..e31cbe93a1 100644
--- a/src/cpu/minor/stats.cc
+++ b/src/cpu/minor/stats.cc
@@ -45,29 +45,13 @@ namespace minor
 
 MinorStats::MinorStats(BaseCPU *base_cpu)
     : statistics::Group(base_cpu),
-    ADD_STAT(numInsts, statistics::units::Count::get(),
-             "Number of instructions committed"),
-    ADD_STAT(numOps, statistics::units::Count::get(),
-             "Number of ops (including micro ops) committed"),
     ADD_STAT(quiesceCycles, statistics::units::Cycle::get(),
              "Total number of cycles that CPU has spent quiesced or waiting "
-             "for an interrupt"),
-    ADD_STAT(cpi, statistics::units::Rate<
-                statistics::units::Cycle, statistics::units::Count>::get(),
-             "CPI: cycles per instruction"),
-    ADD_STAT(ipc, statistics::units::Rate<
-                statistics::units::Count, statistics::units::Cycle>::get(),
-             "IPC: instructions per cycle")
+             "for an interrupt")
 
 {
     quiesceCycles.prereq(quiesceCycles);
 
-    cpi.precision(6);
-    cpi = base_cpu->baseStats.numCycles / numInsts;
-
-    ipc.precision(6);
-    ipc = numInsts / base_cpu->baseStats.numCycles;
-
 }
 
 } // namespace minor
diff --git a/src/cpu/minor/stats.hh b/src/cpu/minor/stats.hh
index f7d5e71dfa..98ac80f15c 100644
--- a/src/cpu/minor/stats.hh
+++ b/src/cpu/minor/stats.hh
@@ -59,19 +59,9 @@ struct MinorStats : public statistics::Group
 {
     MinorStats(BaseCPU *parent);
 
-    /** Number of simulated instructions */
-    statistics::Scalar numInsts;
-
-    /** Number of simulated insts and microops */
-    statistics::Scalar numOps;
-
     /** Number of cycles in quiescent state */
     statistics::Scalar quiesceCycles;
 
-    /** CPI/IPC for total cycle counts and macro insts */
-    statistics::Formula cpi;
-    statistics::Formula ipc;
-
 };
 
 } // namespace minor
diff --git a/src/cpu/simple/base.cc b/src/cpu/simple/base.cc
index 70da65953b..35d149097c 100644
--- a/src/cpu/simple/base.cc
+++ b/src/cpu/simple/base.cc
@@ -154,10 +154,36 @@ BaseSimpleCPU::countInst()
 
     if (!curStaticInst->isMicroop() || curStaticInst->isLastMicroop()) {
         t_info.numInst++;
-        t_info.execContextStats.numInsts++;
     }
     t_info.numOp++;
-    t_info.execContextStats.numOps++;
+}
+
+void
+BaseSimpleCPU::countFetchInst()
+{
+    SimpleExecContext& t_info = *threadInfo[curThread];
+
+    if (!curStaticInst->isMicroop() || curStaticInst->isLastMicroop()) {
+        // increment thread level numInsts fetched count
+        fetchStats[t_info.thread->threadId()]->numInsts++;
+    }
+    // increment thread level numOps fetched count
+    fetchStats[t_info.thread->threadId()]->numOps++;
+}
+
+void
+BaseSimpleCPU::countCommitInst()
+{
+    SimpleExecContext& t_info = *threadInfo[curThread];
+
+    if (!curStaticInst->isMicroop() || curStaticInst->isLastMicroop()) {
+        // increment thread level and core level numInsts count
+        commitStats[t_info.thread->threadId()]->numInsts++;
+        baseStats.numInsts++;
+    }
+    // increment thread level and core level numOps count
+    commitStats[t_info.thread->threadId()]->numOps++;
+    baseStats.numOps++;
 }
 
 Counter
@@ -376,6 +402,11 @@ BaseSimpleCPU::preExecute()
         if (predict_taken)
             ++t_info.execContextStats.numPredictedBranches;
     }
+
+    // increment the fetch instruction stat counters
+    if (curStaticInst) {
+        countFetchInst();
+    }
 }
 
 void
@@ -443,6 +474,9 @@ BaseSimpleCPU::postExecute()
         ->committedInstType[curStaticInst->opClass()]++;
     commitStats[t_info.thread->threadId()]->updateComCtrlStats(curStaticInst);
 
+    /* increment the committed numInsts and numOps stats */
+    countCommitInst();
+
     if (FullSystem)
         traceFunctions(instAddr);
 
diff --git a/src/cpu/simple/base.hh b/src/cpu/simple/base.hh
index df5290cf3c..46a25a0a42 100644
--- a/src/cpu/simple/base.hh
+++ b/src/cpu/simple/base.hh
@@ -182,6 +182,8 @@ class BaseSimpleCPU : public BaseCPU
     }
 
     void countInst();
+    void countFetchInst();
+    void countCommitInst();
     Counter totalInsts() const override;
     Counter totalOps() const override;
 
diff --git a/src/cpu/simple/exec_context.hh b/src/cpu/simple/exec_context.hh
index 42d6181cf2..c0927fcadd 100644
--- a/src/cpu/simple/exec_context.hh
+++ b/src/cpu/simple/exec_context.hh
@@ -86,10 +86,6 @@ class SimpleExecContext : public ExecContext
             : statistics::Group(cpu,
                            csprintf("exec_context.thread_%i",
                                     thread->threadId()).c_str()),
-              ADD_STAT(numInsts, statistics::units::Count::get(),
-                       "Number of instructions committed"),
-              ADD_STAT(numOps, statistics::units::Count::get(),
-                       "Number of ops (including micro ops) committed"),
               ADD_STAT(numMatAluAccesses, statistics::units::Count::get(),
                        "Number of matrix alu accesses"),
               ADD_STAT(numCallsReturns, statistics::units::Count::get(),
@@ -140,10 +136,6 @@ class SimpleExecContext : public ExecContext
                 .prereq(numBranchMispred);
         }
 
-        // Number of simulated instructions
-        statistics::Scalar numInsts;
-        statistics::Scalar numOps;
-
         // Number of matrix alu accesses
         statistics::Scalar numMatAluAccesses;
 

From d943e42bdd0c4d7c0c3c70258306149ee341bb5a Mon Sep 17 00:00:00 2001
From: Melissa Jost <melissakjost@gmail.com>
Date: Thu, 19 Jan 2023 01:48:32 -0800
Subject: [PATCH 241/492] cpu-o3: Use base instructions committed counters in
 O3CPU

Moved committedInsts from O3 cpu.* to BaseCPU as numInstsNotNOP because
it tracks the instructions committed that are not NOPs or prefetches.
This change also does the same for commitedOps. InstsCommitted from O3
commit.*, which tracks all instructions committed, has been removed.
CommitCPUStats::numInsts replaces it in O3. The same has been done for
opsCommitted. Because IPC and CPI calculations are handled in BaseCPU,
removed IPC and CPI stats from O3 cpu.*.

Change-Id: I9f122c9a9dafccd5342f18056f282f3dad8b1b1e
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67393
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
---
 src/cpu/base.cc      |  4 ++++
 src/cpu/base.hh      |  4 ++++
 src/cpu/o3/commit.cc | 21 ++++++-------------
 src/cpu/o3/commit.hh |  4 ----
 src/cpu/o3/cpu.cc    | 49 +++-----------------------------------------
 src/cpu/o3/cpu.hh    | 13 ------------
 6 files changed, 17 insertions(+), 78 deletions(-)

diff --git a/src/cpu/base.cc b/src/cpu/base.cc
index 67f8e7bfc0..fa30e4b5e6 100644
--- a/src/cpu/base.cc
+++ b/src/cpu/base.cc
@@ -954,6 +954,10 @@ CommitCPUStats::CommitCPUStats(statistics::Group *parent, int thread_id)
              "Number of instructions committed (thread level)"),
     ADD_STAT(numOps, statistics::units::Count::get(),
              "Number of ops (including micro ops) committed (thread level)"),
+    ADD_STAT(numInstsNotNOP, statistics::units::Count::get(),
+             "Number of instructions committed excluding NOPs or prefetches"),
+    ADD_STAT(numOpsNotNOP, statistics::units::Count::get(),
+             "Number of Ops (including micro ops) Simulated"),
     ADD_STAT(cpi, statistics::units::Rate<
                 statistics::units::Cycle, statistics::units::Count>::get(),
              "CPI: cycles per instruction (thread level)"),
diff --git a/src/cpu/base.hh b/src/cpu/base.hh
index 06fc2a391d..a9af865da0 100644
--- a/src/cpu/base.hh
+++ b/src/cpu/base.hh
@@ -758,6 +758,10 @@ class BaseCPU : public ClockedObject
         statistics::Scalar numInsts;
         statistics::Scalar numOps;
 
+        /* Number of instructions committed that are not NOP or prefetches */
+        statistics::Scalar numInstsNotNOP;
+        statistics::Scalar numOpsNotNOP;
+
         /* CPI/IPC for total cycle counts and macro insts */
         statistics::Formula cpi;
         statistics::Formula ipc;
diff --git a/src/cpu/o3/commit.cc b/src/cpu/o3/commit.cc
index 7419b2a2f9..e1f01680ca 100644
--- a/src/cpu/o3/commit.cc
+++ b/src/cpu/o3/commit.cc
@@ -156,10 +156,6 @@ Commit::CommitStats::CommitStats(CPU *cpu, Commit *commit)
                "The number of times a branch was mispredicted"),
       ADD_STAT(numCommittedDist, statistics::units::Count::get(),
                "Number of insts commited each cycle"),
-      ADD_STAT(instsCommitted, statistics::units::Count::get(),
-               "Number of instructions committed"),
-      ADD_STAT(opsCommitted, statistics::units::Count::get(),
-               "Number of ops (including micro ops) committed"),
       ADD_STAT(amos, statistics::units::Count::get(),
                "Number of atomic instructions committed"),
       ADD_STAT(membars, statistics::units::Count::get(),
@@ -181,14 +177,6 @@ Commit::CommitStats::CommitStats(CPU *cpu, Commit *commit)
         .init(0,commit->commitWidth,1)
         .flags(statistics::pdf);
 
-    instsCommitted
-        .init(cpu->numThreads)
-        .flags(total);
-
-    opsCommitted
-        .init(cpu->numThreads)
-        .flags(total);
-
     amos
         .init(cpu->numThreads)
         .flags(total);
@@ -1348,9 +1336,12 @@ Commit::updateComInstStats(const DynInstPtr &inst)
 {
     ThreadID tid = inst->threadNumber;
 
-    if (!inst->isMicroop() || inst->isLastMicroop())
-        stats.instsCommitted[tid]++;
-    stats.opsCommitted[tid]++;
+    if (!inst->isMicroop() || inst->isLastMicroop()) {
+        cpu->commitStats[tid]->numInsts++;
+        cpu->baseStats.numInsts++;
+    }
+    cpu->commitStats[tid]->numOps++;
+    cpu->baseStats.numOps++;
 
     // To match the old model, don't count nops and instruction
     // prefetches towards the total commit count.
diff --git a/src/cpu/o3/commit.hh b/src/cpu/o3/commit.hh
index 6591360197..eccd023d45 100644
--- a/src/cpu/o3/commit.hh
+++ b/src/cpu/o3/commit.hh
@@ -479,10 +479,6 @@ class Commit
         /** Distribution of the number of committed instructions each cycle. */
         statistics::Distribution numCommittedDist;
 
-        /** Total number of instructions committed. */
-        statistics::Vector instsCommitted;
-        /** Total number of ops (including micro ops) committed. */
-        statistics::Vector opsCommitted;
         /** Stat for the total number of committed atomics. */
         statistics::Vector amos;
         /** Total number of committed memory barriers. */
diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc
index 90df3b349e..93c58fef63 100644
--- a/src/cpu/o3/cpu.cc
+++ b/src/cpu/o3/cpu.cc
@@ -328,23 +328,7 @@ CPU::CPUStats::CPUStats(CPU *cpu)
                "to idling"),
       ADD_STAT(quiesceCycles, statistics::units::Cycle::get(),
                "Total number of cycles that CPU has spent quiesced or waiting "
-               "for an interrupt"),
-      ADD_STAT(committedInsts, statistics::units::Count::get(),
-               "Number of Instructions Simulated"),
-      ADD_STAT(committedOps, statistics::units::Count::get(),
-               "Number of Ops (including micro ops) Simulated"),
-      ADD_STAT(cpi, statistics::units::Rate<
-                    statistics::units::Cycle, statistics::units::Count>::get(),
-               "CPI: Cycles Per Instruction"),
-      ADD_STAT(totalCpi, statistics::units::Rate<
-                    statistics::units::Cycle, statistics::units::Count>::get(),
-               "CPI: Total CPI of All Threads"),
-      ADD_STAT(ipc, statistics::units::Rate<
-                    statistics::units::Count, statistics::units::Cycle>::get(),
-               "IPC: Instructions Per Cycle"),
-      ADD_STAT(totalIpc, statistics::units::Rate<
-                    statistics::units::Count, statistics::units::Cycle>::get(),
-               "IPC: Total IPC of All Threads")
+               "for an interrupt")
 {
     // Register any of the O3CPU's stats here.
     timesIdled
@@ -356,33 +340,6 @@ CPU::CPUStats::CPUStats(CPU *cpu)
     quiesceCycles
         .prereq(quiesceCycles);
 
-    // Number of Instructions simulated
-    // --------------------------------
-    // Should probably be in Base CPU but need templated
-    // MaxThreads so put in here instead
-    committedInsts
-        .init(cpu->numThreads)
-        .flags(statistics::total);
-
-    committedOps
-        .init(cpu->numThreads)
-        .flags(statistics::total);
-
-    cpi
-        .precision(6);
-    cpi = cpu->baseStats.numCycles / committedInsts;
-
-    totalCpi
-        .precision(6);
-    totalCpi = cpu->baseStats.numCycles / sum(committedInsts);
-
-    ipc
-        .precision(6);
-    ipc = committedInsts / cpu->baseStats.numCycles;
-
-    totalIpc
-        .precision(6);
-    totalIpc = sum(committedInsts) / cpu->baseStats.numCycles;
 }
 
 void
@@ -1170,14 +1127,14 @@ CPU::instDone(ThreadID tid, const DynInstPtr &inst)
     if (!inst->isMicroop() || inst->isLastMicroop()) {
         thread[tid]->numInst++;
         thread[tid]->threadStats.numInsts++;
-        cpuStats.committedInsts[tid]++;
+        commitStats[tid]->numInstsNotNOP++;
 
         // Check for instruction-count-based events.
         thread[tid]->comInstEventQueue.serviceEvents(thread[tid]->numInst);
     }
     thread[tid]->numOp++;
     thread[tid]->threadStats.numOps++;
-    cpuStats.committedOps[tid]++;
+    commitStats[tid]->numOpsNotNOP++;
 
     probeInstCommit(inst->staticInst, inst->pcState().instAddr());
 }
diff --git a/src/cpu/o3/cpu.hh b/src/cpu/o3/cpu.hh
index 07775298af..7dc378428b 100644
--- a/src/cpu/o3/cpu.hh
+++ b/src/cpu/o3/cpu.hh
@@ -581,19 +581,6 @@ class CPU : public BaseCPU
         /** Stat for total number of cycles the CPU spends descheduled due to a
          * quiesce operation or waiting for an interrupt. */
         statistics::Scalar quiesceCycles;
-        /** Stat for the number of committed instructions per thread. */
-        statistics::Vector committedInsts;
-        /** Stat for the number of committed ops (including micro ops) per
-         *  thread. */
-        statistics::Vector committedOps;
-        /** Stat for the CPI per thread. */
-        statistics::Formula cpi;
-        /** Stat for the total CPI. */
-        statistics::Formula totalCpi;
-        /** Stat for the IPC per thread. */
-        statistics::Formula ipc;
-        /** Stat for the total IPC. */
-        statistics::Formula totalIpc;
 
     } cpuStats;
 

From 1c4cc8dbd04b6ae875ca920fcd2ce0ef00cd6b38 Mon Sep 17 00:00:00 2001
From: Melissa Jost <melissakjost@gmail.com>
Date: Thu, 19 Jan 2023 02:04:39 -0800
Subject: [PATCH 242/492] cpu-o3: Move general fetch stats to
 BaseCPU::FetchCPUStats

The stats moved are from fetch.hh and fetch.cc of O3. Stat branches is
now tracked by numBranches. Stat branchRate is now tracked by
branchRate in FetchCPUStats. Stat rate is tracked by fetchRate. Stat
insts is tracked by numInsts. Stat icacheStallCycles is tracked by
icacheStallCycles in FetchCPUStats.

Change-Id: I48313614edd078631df4ef6b00982c335798fcb1
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67394
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
---
 src/cpu/base.cc     | 28 +++++++++++++++++++++++++++-
 src/cpu/base.hh     |  9 +++++++++
 src/cpu/o3/fetch.cc | 33 +++++----------------------------
 src/cpu/o3/fetch.hh | 10 ----------
 4 files changed, 41 insertions(+), 39 deletions(-)

diff --git a/src/cpu/base.cc b/src/cpu/base.cc
index fa30e4b5e6..490e48938a 100644
--- a/src/cpu/base.cc
+++ b/src/cpu/base.cc
@@ -196,8 +196,15 @@ BaseCPU::BaseCPU(const Params &p, bool is_checker)
     executeStats.reserve(numThreads);
     commitStats.reserve(numThreads);
     for (int i = 0; i < numThreads; i++) {
-        fetchStats.emplace_back(new FetchCPUStats(this, i));
+        // create fetchStat object for thread i and set rate formulas
+        FetchCPUStats* fetchStatptr = new FetchCPUStats(this, i);
+        fetchStatptr->fetchRate = fetchStatptr->numInsts / baseStats.numCycles;
+        fetchStatptr->branchRate = fetchStatptr->numBranches /
+            baseStats.numCycles;
+        fetchStats.emplace_back(fetchStatptr);
+
         executeStats.emplace_back(new ExecuteCPUStats(this, i));
+
         // create commitStat object for thread i and set ipc, cpi formulas
         CommitCPUStats* commitStatptr = new CommitCPUStats(this, i);
         commitStatptr->ipc = commitStatptr->numInsts / baseStats.numCycles;
@@ -862,15 +869,31 @@ FetchCPUStats::FetchCPUStats(statistics::Group *parent, int thread_id)
              "Number of instructions fetched (thread level)"),
     ADD_STAT(numOps, statistics::units::Count::get(),
              "Number of ops (including micro ops) fetched (thread level)"),
+    ADD_STAT(fetchRate, statistics::units::Rate<
+             statistics::units::Count, statistics::units::Cycle>::get(),
+             "Number of inst fetches per cycle"),
     ADD_STAT(numBranches, statistics::units::Count::get(),
              "Number of branches fetched"),
+    ADD_STAT(branchRate, statistics::units::Ratio::get(),
+             "Number of branch fetches per cycle"),
+    ADD_STAT(icacheStallCycles, statistics::units::Cycle::get(),
+             "ICache total stall cycles"),
     ADD_STAT(numFetchSuspends, statistics::units::Count::get(),
              "Number of times Execute suspended instruction fetching")
 
 {
+    fetchRate
+        .flags(statistics::total);
+
     numBranches
         .prereq(numBranches);
 
+    branchRate
+        .flags(statistics::total);
+
+    icacheStallCycles
+        .prereq(icacheStallCycles);
+
 }
 
 // means it is incremented in a vector indexing and not directly
@@ -981,6 +1004,9 @@ CommitCPUStats::CommitCPUStats(statistics::Group *parent, int thread_id)
     ADD_STAT(committedControl, statistics::units::Count::get(),
              "Class of control type instructions committed")
 {
+    numInsts
+        .prereq(numInsts);
+
     cpi.precision(6);
     ipc.precision(6);
 
diff --git a/src/cpu/base.hh b/src/cpu/base.hh
index a9af865da0..5d0d3cab01 100644
--- a/src/cpu/base.hh
+++ b/src/cpu/base.hh
@@ -695,9 +695,18 @@ class BaseCPU : public ClockedObject
         /* Total number of operations fetched */
         statistics::Scalar numOps;
 
+        /* Number of instruction fetched per cycle. */
+        statistics::Formula fetchRate;
+
         /* Total number of branches fetched */
         statistics::Scalar numBranches;
 
+        /* Number of branch fetches per cycle. */
+        statistics::Formula branchRate;
+
+        /* Number of cycles stalled due to an icache miss */
+        statistics::Scalar icacheStallCycles;
+
         /* Number of times fetch was asked to suspend by Execute */
         statistics::Scalar numFetchSuspends;
 
diff --git a/src/cpu/o3/fetch.cc b/src/cpu/o3/fetch.cc
index d3cdd2c761..f5fc6c62ec 100644
--- a/src/cpu/o3/fetch.cc
+++ b/src/cpu/o3/fetch.cc
@@ -158,12 +158,6 @@ Fetch::regProbePoints()
 
 Fetch::FetchStatGroup::FetchStatGroup(CPU *cpu, Fetch *fetch)
     : statistics::Group(cpu, "fetch"),
-    ADD_STAT(icacheStallCycles, statistics::units::Cycle::get(),
-             "Number of cycles fetch is stalled on an Icache miss"),
-    ADD_STAT(insts, statistics::units::Count::get(),
-             "Number of instructions fetch has processed"),
-    ADD_STAT(branches, statistics::units::Count::get(),
-             "Number of branches that fetch encountered"),
     ADD_STAT(predictedBranches, statistics::units::Count::get(),
              "Number of branches that fetch has predicted taken"),
     ADD_STAT(cycles, statistics::units::Cycle::get(),
@@ -200,21 +194,8 @@ Fetch::FetchStatGroup::FetchStatGroup(CPU *cpu, Fetch *fetch)
              "Number of instructions fetched each cycle (Total)"),
     ADD_STAT(idleRate, statistics::units::Ratio::get(),
              "Ratio of cycles fetch was idle",
-             idleCycles / cpu->baseStats.numCycles),
-    ADD_STAT(branchRate, statistics::units::Ratio::get(),
-             "Number of branch fetches per cycle",
-             branches / cpu->baseStats.numCycles),
-    ADD_STAT(rate, statistics::units::Rate<
-                    statistics::units::Count, statistics::units::Cycle>::get(),
-             "Number of inst fetches per cycle",
-             insts / cpu->baseStats.numCycles)
+             idleCycles / cpu->baseStats.numCycles)
 {
-        icacheStallCycles
-            .prereq(icacheStallCycles);
-        insts
-            .prereq(insts);
-        branches
-            .prereq(branches);
         predictedBranches
             .prereq(predictedBranches);
         cycles
@@ -252,10 +233,6 @@ Fetch::FetchStatGroup::FetchStatGroup(CPU *cpu, Fetch *fetch)
             .flags(statistics::pdf);
         idleRate
             .prereq(idleRate);
-        branchRate
-            .flags(statistics::total);
-        rate
-            .flags(statistics::total);
 }
 void
 Fetch::setTimeBuffer(TimeBuffer<TimeStruct> *time_buffer)
@@ -540,7 +517,7 @@ Fetch::lookupAndUpdateNextPC(const DynInstPtr &inst, PCStateBase &next_pc)
     inst->setPredTarg(next_pc);
     inst->setPredTaken(predict_taken);
 
-    ++fetchStats.branches;
+    cpu->fetchStats[tid]->numBranches++;
 
     if (predict_taken) {
         ++fetchStats.predictedBranches;
@@ -1146,7 +1123,7 @@ Fetch::fetch(bool &status_change)
             fetchCacheLine(fetchAddr, tid, this_pc.instAddr());
 
             if (fetchStatus[tid] == IcacheWaitResponse)
-                ++fetchStats.icacheStallCycles;
+                cpu->fetchStats[tid]->icacheStallCycles++;
             else if (fetchStatus[tid] == ItlbWait)
                 ++fetchStats.tlbCycles;
             else
@@ -1242,7 +1219,7 @@ Fetch::fetch(bool &status_change)
                     staticInst = dec_ptr->decode(this_pc);
 
                     // Increment stat of fetched instructions.
-                    ++fetchStats.insts;
+                    cpu->fetchStats[tid]->numInsts++;
 
                     if (staticInst->isMacroop()) {
                         curMacroop = staticInst;
@@ -1572,7 +1549,7 @@ Fetch::profileStall(ThreadID tid)
         ++fetchStats.squashCycles;
         DPRINTF(Fetch, "[tid:%i] Fetch is squashing!\n", tid);
     } else if (fetchStatus[tid] == IcacheWaitResponse) {
-        ++fetchStats.icacheStallCycles;
+        cpu->fetchStats[tid]->icacheStallCycles++;
         DPRINTF(Fetch, "[tid:%i] Fetch is waiting cache response!\n",
                 tid);
     } else if (fetchStatus[tid] == ItlbWait) {
diff --git a/src/cpu/o3/fetch.hh b/src/cpu/o3/fetch.hh
index cd311913f5..6add31444d 100644
--- a/src/cpu/o3/fetch.hh
+++ b/src/cpu/o3/fetch.hh
@@ -533,12 +533,6 @@ class Fetch
         FetchStatGroup(CPU *cpu, Fetch *fetch);
         // @todo: Consider making these
         // vectors and tracking on a per thread basis.
-        /** Stat for total number of cycles stalled due to an icache miss. */
-        statistics::Scalar icacheStallCycles;
-        /** Stat for total number of fetched instructions. */
-        statistics::Scalar insts;
-        /** Total number of fetched branches. */
-        statistics::Scalar branches;
         /** Stat for total number of predicted branches. */
         statistics::Scalar predictedBranches;
         /** Stat for total number of cycles spent fetching. */
@@ -581,10 +575,6 @@ class Fetch
         statistics::Distribution nisnDist;
         /** Rate of how often fetch was idle. */
         statistics::Formula idleRate;
-        /** Number of branch fetches per cycle. */
-        statistics::Formula branchRate;
-        /** Number of instruction fetched per cycle. */
-        statistics::Formula rate;
     } fetchStats;
 };
 

From 0974fe6f24ce748057b5b1a3002ebac75d11b397 Mon Sep 17 00:00:00 2001
From: Melissa Jost <melissakjost@gmail.com>
Date: Thu, 19 Jan 2023 02:11:11 -0800
Subject: [PATCH 243/492] cpu-o3: Move O3 IEW stats to BaseCPU::ExecuteCPUStats

Moved numInsts, numBranches, numNop, numRefs, numLoadInsts, numRate to
Base. Merged numRefs into numMemRefs of ExecuteCPUStats. Renamed
numRate to instRate. Updated formatting in ExecuteCPUStats group.

Change-Id: I1fd3a989d917eb2ffaa865b067b80e266d6f55bc
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67395
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
---
 src/cpu/base.cc   | 53 ++++++++++++++++++++++++++++++++---------------
 src/cpu/base.hh   | 13 ++++++++++++
 src/cpu/o3/iew.cc | 50 ++++++--------------------------------------
 src/cpu/o3/iew.hh | 14 -------------
 4 files changed, 55 insertions(+), 75 deletions(-)

diff --git a/src/cpu/base.cc b/src/cpu/base.cc
index 490e48938a..cee76472f5 100644
--- a/src/cpu/base.cc
+++ b/src/cpu/base.cc
@@ -203,7 +203,11 @@ BaseCPU::BaseCPU(const Params &p, bool is_checker)
             baseStats.numCycles;
         fetchStats.emplace_back(fetchStatptr);
 
-        executeStats.emplace_back(new ExecuteCPUStats(this, i));
+        // create executeStat object for thread i and set rate formulas
+        ExecuteCPUStats* executeStatptr = new ExecuteCPUStats(this, i);
+        executeStatptr->instRate = executeStatptr->numInsts /
+            baseStats.numCycles;
+        executeStats.emplace_back(executeStatptr);
 
         // create commitStat object for thread i and set ipc, cpi formulas
         CommitCPUStats* commitStatptr = new CommitCPUStats(this, i);
@@ -900,6 +904,19 @@ FetchCPUStats::FetchCPUStats(statistics::Group *parent, int thread_id)
 BaseCPU::
 ExecuteCPUStats::ExecuteCPUStats(statistics::Group *parent, int thread_id)
     : statistics::Group(parent, csprintf("executeStats%i", thread_id).c_str()),
+    ADD_STAT(numInsts, statistics::units::Count::get(),
+             "Number of executed instructions"),
+    ADD_STAT(numNop, statistics::units::Count::get(),
+             "Number of nop insts executed"),
+    ADD_STAT(numBranches, statistics::units::Count::get(),
+             "Number of branches executed"),
+    ADD_STAT(numLoadInsts, statistics::units::Count::get(),
+             "Number of load instructions executed"),
+    ADD_STAT(numStoreInsts, statistics::units::Count::get(),
+             "Number of stores executed"),
+    ADD_STAT(instRate, statistics::units::Rate<
+                statistics::units::Count, statistics::units::Cycle>::get(),
+             "Inst execution rate"),
     ADD_STAT(dcacheStallCycles, statistics::units::Cycle::get(),
              "DCache total stall cycles"),
     ADD_STAT(numCCRegReads, statistics::units::Count::get(),
@@ -938,36 +955,38 @@ ExecuteCPUStats::ExecuteCPUStats(statistics::Group *parent, int thread_id)
              "Number of ops (including micro ops) which were discarded before "
              "commit")
 {
+    numStoreInsts = numMemRefs - numLoadInsts;
+
     dcacheStallCycles
-                .prereq(dcacheStallCycles);
+        .prereq(dcacheStallCycles);
     numCCRegReads
-                .prereq(numCCRegReads)
-                .flags(statistics::nozero);
+        .prereq(numCCRegReads)
+        .flags(statistics::nozero);
     numCCRegWrites
-                .prereq(numCCRegWrites)
-                .flags(statistics::nozero);
+        .prereq(numCCRegWrites)
+        .flags(statistics::nozero);
     numFpAluAccesses
-                .prereq(numFpAluAccesses);
+        .prereq(numFpAluAccesses);
     numFpRegReads
-                .prereq(numFpRegReads);
+        .prereq(numFpRegReads);
     numIntAluAccesses
-                .prereq(numIntAluAccesses);
+        .prereq(numIntAluAccesses);
     numIntRegReads
-                .prereq(numIntRegReads);
+        .prereq(numIntRegReads);
     numIntRegWrites
-                .prereq(numIntRegWrites);
+        .prereq(numIntRegWrites);
     numMiscRegReads
-                .prereq(numMiscRegReads);
+        .prereq(numMiscRegReads);
     numMiscRegWrites
-                .prereq(numMiscRegWrites);
+        .prereq(numMiscRegWrites);
     numVecPredRegReads
-                .prereq(numVecPredRegReads);
+        .prereq(numVecPredRegReads);
     numVecPredRegWrites
-                .prereq(numVecPredRegWrites);
+        .prereq(numVecPredRegWrites);
     numVecRegReads
-                .prereq(numVecRegReads);
+        .prereq(numVecRegReads);
     numVecRegWrites
-                .prereq(numVecRegWrites);
+        .prereq(numVecRegWrites);
 }
 
 BaseCPU::
diff --git a/src/cpu/base.hh b/src/cpu/base.hh
index 5d0d3cab01..fc22abc5aa 100644
--- a/src/cpu/base.hh
+++ b/src/cpu/base.hh
@@ -716,6 +716,19 @@ class BaseCPU : public ClockedObject
     {
         ExecuteCPUStats(statistics::Group *parent, int thread_id);
 
+        /* Stat for total number of executed instructions */
+        statistics::Scalar numInsts;
+        /* Number of executed nops */
+        statistics::Scalar numNop;
+        /* Number of executed branches */
+        statistics::Scalar numBranches;
+        /* Stat for total number of executed load instructions */
+        statistics::Scalar numLoadInsts;
+        /* Number of executed store instructions */
+        statistics::Formula numStoreInsts;
+        /* Number of instructions executed per cycle */
+        statistics::Formula instRate;
+
         /* Number of cycles stalled for D-cache responses */
         statistics::Scalar dcacheStallCycles;
 
diff --git a/src/cpu/o3/iew.cc b/src/cpu/o3/iew.cc
index 7cf6c54542..92d281ce93 100644
--- a/src/cpu/o3/iew.cc
+++ b/src/cpu/o3/iew.cc
@@ -217,52 +217,14 @@ IEW::IEWStats::IEWStats(CPU *cpu)
 
 IEW::IEWStats::ExecutedInstStats::ExecutedInstStats(CPU *cpu)
     : statistics::Group(cpu),
-    ADD_STAT(numInsts, statistics::units::Count::get(),
-             "Number of executed instructions"),
-    ADD_STAT(numLoadInsts, statistics::units::Count::get(),
-             "Number of load instructions executed"),
     ADD_STAT(numSquashedInsts, statistics::units::Count::get(),
              "Number of squashed instructions skipped in execute"),
     ADD_STAT(numSwp, statistics::units::Count::get(),
-             "Number of swp insts executed"),
-    ADD_STAT(numNop, statistics::units::Count::get(),
-             "Number of nop insts executed"),
-    ADD_STAT(numRefs, statistics::units::Count::get(),
-             "Number of memory reference insts executed"),
-    ADD_STAT(numBranches, statistics::units::Count::get(),
-             "Number of branches executed"),
-    ADD_STAT(numStoreInsts, statistics::units::Count::get(),
-             "Number of stores executed"),
-    ADD_STAT(numRate, statistics::units::Rate<
-                statistics::units::Count, statistics::units::Cycle>::get(),
-             "Inst execution rate", numInsts / cpu->baseStats.numCycles)
+             "Number of swp insts executed")
 {
-    numLoadInsts
-        .init(cpu->numThreads)
-        .flags(statistics::total);
-
     numSwp
         .init(cpu->numThreads)
         .flags(statistics::total);
-
-    numNop
-        .init(cpu->numThreads)
-        .flags(statistics::total);
-
-    numRefs
-        .init(cpu->numThreads)
-        .flags(statistics::total);
-
-    numBranches
-        .init(cpu->numThreads)
-        .flags(statistics::total);
-
-    numStoreInsts
-        .flags(statistics::total);
-    numStoreInsts = numRefs - numLoadInsts;
-
-    numRate
-        .flags(statistics::total);
 }
 
 void
@@ -1053,7 +1015,7 @@ IEW::dispatchInsts(ThreadID tid)
 
             instQueue.recordProducer(inst);
 
-            iewStats.executedInstStats.numNop[tid]++;
+            cpu->executeStats[tid]->numNop++;
 
             add_to_iq = false;
         } else {
@@ -1561,7 +1523,7 @@ IEW::updateExeInstStats(const DynInstPtr& inst)
 {
     ThreadID tid = inst->threadNumber;
 
-    iewStats.executedInstStats.numInsts++;
+    cpu->executeStats[tid]->numInsts++;
 
 #if TRACING_ON
     if (debug::O3PipeView) {
@@ -1573,16 +1535,16 @@ IEW::updateExeInstStats(const DynInstPtr& inst)
     //  Control operations
     //
     if (inst->isControl())
-        iewStats.executedInstStats.numBranches[tid]++;
+        cpu->executeStats[tid]->numBranches++;
 
     //
     //  Memory operations
     //
     if (inst->isMemRef()) {
-        iewStats.executedInstStats.numRefs[tid]++;
+        cpu->executeStats[tid]->numMemRefs++;
 
         if (inst->isLoad()) {
-            iewStats.executedInstStats.numLoadInsts[tid]++;
+            cpu->executeStats[tid]->numLoadInsts++;
         }
     }
 }
diff --git a/src/cpu/o3/iew.hh b/src/cpu/o3/iew.hh
index 80fed295df..4fe8227dcc 100644
--- a/src/cpu/o3/iew.hh
+++ b/src/cpu/o3/iew.hh
@@ -455,25 +455,11 @@ class IEW
         {
             ExecutedInstStats(CPU *cpu);
 
-            /** Stat for total number of executed instructions. */
-            statistics::Scalar numInsts;
-            /** Stat for total number of executed load instructions. */
-            statistics::Vector numLoadInsts;
             /** Stat for total number of squashed instructions skipped at
              *  execute. */
             statistics::Scalar numSquashedInsts;
             /** Number of executed software prefetches. */
             statistics::Vector numSwp;
-            /** Number of executed nops. */
-            statistics::Vector numNop;
-            /** Number of executed meomory references. */
-            statistics::Vector numRefs;
-            /** Number of executed branches. */
-            statistics::Vector numBranches;
-            /** Number of executed store instructions. */
-            statistics::Formula numStoreInsts;
-            /** Number of instructions executed per cycle. */
-            statistics::Formula numRate;
         } executedInstStats;
 
         /** Number of instructions sent to commit. */

From 457d70df626a8cb0a7fa0ce63b3d3e0886a2bbda Mon Sep 17 00:00:00 2001
From: Melissa Jost <melissakjost@gmail.com>
Date: Thu, 19 Jan 2023 02:12:49 -0800
Subject: [PATCH 244/492] cpu-kvm: Implement IPC and CPI base stats for KVM CPU

Replaced committedInsts stats of KVM CPU with commitStats.numInsts
of BaseCPU. This results in IPC and CPI printing in stats.txt for
KVM simulations.

Change-Id: I02395630fc50a69adebf11f4ed39d9cefb852e1f
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67396
Reviewed-by: Andreas Sandberg <andreas.sandberg@arm.com>
Maintainer: Andreas Sandberg <andreas.sandberg@arm.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/cpu/kvm/base.cc | 5 ++---
 src/cpu/kvm/base.hh | 1 -
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/src/cpu/kvm/base.cc b/src/cpu/kvm/base.cc
index b76bddc2fd..e22e1628d2 100644
--- a/src/cpu/kvm/base.cc
+++ b/src/cpu/kvm/base.cc
@@ -261,8 +261,6 @@ BaseKvmCPU::restartEqThread()
 
 BaseKvmCPU::StatGroup::StatGroup(statistics::Group *parent)
     : statistics::Group(parent),
-    ADD_STAT(committedInsts, statistics::units::Count::get(),
-             "Number of instructions committed"),
     ADD_STAT(numVMExits, statistics::units::Count::get(),
              "total number of KVM exits"),
     ADD_STAT(numVMHalfEntries, statistics::units::Count::get(),
@@ -778,7 +776,8 @@ BaseKvmCPU::kvmRun(Tick ticks)
 
         /* Update statistics */
         baseStats.numCycles += simCyclesExecuted;;
-        stats.committedInsts += instsExecuted;
+        commitStats[thread->threadId()]->numInsts += instsExecuted;
+        baseStats.numInsts += instsExecuted;
         ctrInsts += instsExecuted;
 
         DPRINTF(KvmRun,
diff --git a/src/cpu/kvm/base.hh b/src/cpu/kvm/base.hh
index 2d81c7c7eb..7bbf393f9b 100644
--- a/src/cpu/kvm/base.hh
+++ b/src/cpu/kvm/base.hh
@@ -804,7 +804,6 @@ class BaseKvmCPU : public BaseCPU
     struct StatGroup : public statistics::Group
     {
         StatGroup(statistics::Group *parent);
-        statistics::Scalar committedInsts;
         statistics::Scalar numVMExits;
         statistics::Scalar numVMHalfEntries;
         statistics::Scalar numExitSignal;

From 8a9a629bdb346b49d592d11367c2b6ba76702d52 Mon Sep 17 00:00:00 2001
From: Roger Chang <rogerycchang@google.com>
Date: Fri, 17 Feb 2023 20:20:36 +0800
Subject: [PATCH 245/492] arch-riscv: Support PMP lock feature

The lock feature will let M mode do memory permission check before
R/W/X data. If the lock bit of pmpicfg set, then the pmpicfg and
pmpaddri will ignore the update value later until CPU reset, and
pmpaddri-1 will ignore if the TOR A field is set.

The following is add in CL:
1. Add condition to run PMP check when any lock bit of pmp tables
   is set
2. Add PMP_LOCK bit check when try to update pmpaddr and pmpcfg
3. If there is no PMP entry matches and priviledge mode is M,
   no fault generated
4. If the address matches PMP entry, return no fault if priviledge
mode is M and lock bit is not set

For more details about PMP, please see RISC-V Spec Volumn II,
Priviledge Archtecture, Ver 1.12, Section 3.7 Physical Memory
Protection

Change-Id: I3e7c5824d6c05f2ea928ee9ec7714f7271e4c58c
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/68057
Reviewed-by: Ayaz Akram <yazakram@ucdavis.edu>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Yu-hsin Wang <yuhsingw@google.com>
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
---
 src/arch/riscv/faults.cc | 10 ++++++
 src/arch/riscv/isa.cc    | 18 +++++++---
 src/arch/riscv/pmp.cc    | 75 ++++++++++++++++++++++++++++------------
 src/arch/riscv/pmp.hh    | 19 ++++++++--
 4 files changed, 93 insertions(+), 29 deletions(-)

diff --git a/src/arch/riscv/faults.cc b/src/arch/riscv/faults.cc
index 3469c71252..940f7107ba 100644
--- a/src/arch/riscv/faults.cc
+++ b/src/arch/riscv/faults.cc
@@ -33,6 +33,8 @@
 
 #include "arch/riscv/insts/static_inst.hh"
 #include "arch/riscv/isa.hh"
+#include "arch/riscv/mmu.hh"
+#include "arch/riscv/pmp.hh"
 #include "arch/riscv/regs/misc.hh"
 #include "arch/riscv/utility.hh"
 #include "cpu/base.hh"
@@ -180,6 +182,14 @@ Reset::invoke(ThreadContext *tc, const StaticInstPtr &inst)
         tc->getIsaPtr()->newPCState(workload->getEntry())));
     panic_if(!new_pc, "Failed create new PCState from ISA pointer");
     tc->pcState(*new_pc);
+
+    // Reset PMP Cfg
+    auto* mmu = dynamic_cast<RiscvISA::MMU*>(tc->getMMUPtr());
+    if (mmu == nullptr) {
+        warn("MMU is not Riscv MMU instance, we can't reset PMP");
+        return;
+    }
+    mmu->getPMP()->pmpReset();
 }
 
 void
diff --git a/src/arch/riscv/isa.cc b/src/arch/riscv/isa.cc
index 3809c61d63..d778957b9e 100644
--- a/src/arch/riscv/isa.cc
+++ b/src/arch/riscv/isa.cc
@@ -2,6 +2,7 @@
  * Copyright (c) 2016 RISC-V Foundation
  * Copyright (c) 2016 The University of Virginia
  * Copyright (c) 2020 Barkhausen Institut
+ * Copyright (c) 2022 Google LLC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -544,6 +545,8 @@ ISA::setMiscReg(RegIndex idx, RegVal val)
                 // qemu seems to update the tables when
                 // pmp addr regs are written (with the assumption
                 // that cfg regs are already written)
+                RegVal res = 0;
+                RegVal old_val = readMiscRegNoEffect(idx);
 
                 for (int i=0; i < regSize; i++) {
 
@@ -554,10 +557,15 @@ ISA::setMiscReg(RegIndex idx, RegVal val)
                     // Form pmp_index using the index i and
                     // PMPCFG register number
                     uint32_t pmp_index = i+(4*(idx-MISCREG_PMPCFG0));
-                    mmu->getPMP()->pmpUpdateCfg(pmp_index,cfg_val);
+                    bool result = mmu->getPMP()->pmpUpdateCfg(pmp_index,cfg_val);
+                    if (result) {
+                        res |= ((RegVal)cfg_val << (8*i));
+                    } else {
+                        res |= (old_val & (0xFF << (8*i)));
+                    }
                 }
 
-                setMiscRegNoEffect(idx, val);
+                setMiscRegNoEffect(idx, res);
             }
             break;
           case MISCREG_PMPADDR00 ... MISCREG_PMPADDR15:
@@ -568,9 +576,9 @@ ISA::setMiscReg(RegIndex idx, RegVal val)
                 auto mmu = dynamic_cast<RiscvISA::MMU *>
                               (tc->getMMUPtr());
                 uint32_t pmp_index = idx-MISCREG_PMPADDR00;
-                mmu->getPMP()->pmpUpdateAddr(pmp_index, val);
-
-                setMiscRegNoEffect(idx, val);
+                if (mmu->getPMP()->pmpUpdateAddr(pmp_index, val)) {
+                    setMiscRegNoEffect(idx, val);
+                }
             }
             break;
 
diff --git a/src/arch/riscv/pmp.cc b/src/arch/riscv/pmp.cc
index 77ef98f2d0..940af47686 100644
--- a/src/arch/riscv/pmp.cc
+++ b/src/arch/riscv/pmp.cc
@@ -1,5 +1,6 @@
 /*
  * Copyright (c) 2021 The Regents of the University of California
+ * Copyright (c) 2023 Google LLC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -27,7 +28,6 @@
  */
 
 #include "arch/riscv/pmp.hh"
-
 #include "arch/generic/tlb.hh"
 #include "arch/riscv/faults.hh"
 #include "arch/riscv/isa.hh"
@@ -47,7 +47,8 @@ namespace gem5
 PMP::PMP(const Params &params) :
     SimObject(params),
     pmpEntries(params.pmp_entries),
-    numRules(0)
+    numRules(0),
+    hasLockEntry(false)
 {
     pmpTable.resize(pmpEntries);
 }
@@ -70,10 +71,7 @@ PMP::pmpCheck(const RequestPtr &req, BaseMMU::Mode mode,
                 req->getPaddr());
     }
 
-    // An access should be successful if there are
-    // no rules defined yet or we are in M mode (based
-    // on specs v1.10)
-    if (numRules == 0 || (pmode == RiscvISA::PrivilegeMode::PRV_M))
+    if (numRules == 0)
         return NoFault;
 
     // match_index will be used to identify the pmp entry
@@ -94,20 +92,19 @@ PMP::pmpCheck(const RequestPtr &req, BaseMMU::Mode mode,
 
         if ((match_index > -1)
             && (PMP_OFF != pmpGetAField(pmpTable[match_index].pmpCfg))) {
-            // check the RWX permissions from the pmp entry
-            uint8_t allowed_privs = PMP_READ | PMP_WRITE | PMP_EXEC;
+            uint8_t this_cfg = pmpTable[match_index].pmpCfg;
 
-            // i is the index of pmp table which matched
-            allowed_privs &= pmpTable[match_index].pmpCfg;
-
-            if ((mode == BaseMMU::Mode::Read) &&
-                                        (PMP_READ & allowed_privs)) {
+            if ((pmode == RiscvISA::PrivilegeMode::PRV_M) &&
+                                    (PMP_LOCK & this_cfg) == 0) {
+                return NoFault;
+            } else if ((mode == BaseMMU::Mode::Read) &&
+                                        (PMP_READ & this_cfg)) {
                 return NoFault;
             } else if ((mode == BaseMMU::Mode::Write) &&
-                                        (PMP_WRITE & allowed_privs)) {
+                                        (PMP_WRITE & this_cfg)) {
                 return NoFault;
             } else if ((mode == BaseMMU::Mode::Execute) &&
-                                        (PMP_EXEC & allowed_privs)) {
+                                        (PMP_EXEC & this_cfg)) {
                 return NoFault;
             } else {
                 if (req->hasVaddr()) {
@@ -119,7 +116,9 @@ PMP::pmpCheck(const RequestPtr &req, BaseMMU::Mode mode,
         }
     }
     // if no entry matched and we are not in M mode return fault
-    if (req->hasVaddr()) {
+    if (pmode == RiscvISA::PrivilegeMode::PRV_M) {
+        return NoFault;
+    } else if (req->hasVaddr()) {
         return createAddrfault(req->getVaddr(), mode);
     } else {
         return createAddrfault(vaddr, mode);
@@ -150,17 +149,19 @@ PMP::pmpGetAField(uint8_t cfg)
 }
 
 
-void
+bool
 PMP::pmpUpdateCfg(uint32_t pmp_index, uint8_t this_cfg)
 {
     DPRINTF(PMP, "Update pmp config with %u for pmp entry: %u \n",
                                     (unsigned)this_cfg, pmp_index);
-
-    warn_if((PMP_LOCK & this_cfg), "pmp lock feature is not supported.\n");
-
+    if (pmpTable[pmp_index].pmpCfg & PMP_LOCK) {
+        DPRINTF(PMP, "Update pmp entry config %u failed because it locked\n",
+                pmp_index);
+        return false;
+    }
     pmpTable[pmp_index].pmpCfg = this_cfg;
     pmpUpdateRule(pmp_index);
-
+    return true;
 }
 
 void
@@ -170,6 +171,7 @@ PMP::pmpUpdateRule(uint32_t pmp_index)
     // pmpaddr/pmpcfg is written
 
     numRules = 0;
+    hasLockEntry = false;
     Addr prevAddr = 0;
 
     if (pmp_index >= 1) {
@@ -209,15 +211,42 @@ PMP::pmpUpdateRule(uint32_t pmp_index)
       if (PMP_OFF != a_field) {
           numRules++;
       }
+      hasLockEntry |= ((pmpTable[i].pmpCfg & PMP_LOCK) != 0);
+    }
+
+    if (hasLockEntry) {
+        DPRINTF(PMP, "Find lock entry\n");
     }
 }
 
 void
+PMP::pmpReset()
+{
+    for (uint32_t i = 0; i < pmpTable.size(); i++) {
+        pmpTable[i].pmpCfg &= ~(PMP_A_MASK | PMP_LOCK);
+        pmpUpdateRule(i);
+    }
+}
+
+bool
 PMP::pmpUpdateAddr(uint32_t pmp_index, Addr this_addr)
 {
     DPRINTF(PMP, "Update pmp addr %#x for pmp entry %u \n",
                                       this_addr, pmp_index);
 
+    if (pmpTable[pmp_index].pmpCfg & PMP_LOCK) {
+        DPRINTF(PMP, "Update pmp entry %u failed because the lock bit set\n",
+                pmp_index);
+        return false;
+    } else if (pmp_index < pmpTable.size() - 1 &&
+               ((pmpTable[pmp_index+1].pmpCfg & PMP_LOCK) != 0) &&
+               pmpGetAField(pmpTable[pmp_index+1].pmpCfg) == PMP_TOR) {
+        DPRINTF(PMP, "Update pmp entry %u failed because the entry %u lock bit set"
+                "and A field is TOR\n",
+                pmp_index, pmp_index+1);
+        return false;
+    }
+
     // just writing the raw addr in the pmp table
     // will convert it into a range, once cfg
     // reg is written
@@ -225,6 +254,8 @@ PMP::pmpUpdateAddr(uint32_t pmp_index, Addr this_addr)
     for (int index = 0; index < pmpEntries; index++) {
         pmpUpdateRule(index);
     }
+
+    return true;
 }
 
 bool
@@ -247,7 +278,7 @@ PMP::shouldCheckPMP(RiscvISA::PrivilegeMode pmode,
     bool cond3 = (mode != BaseMMU::Execute && (status.mprv)
     && (status.mpp != RiscvISA::PrivilegeMode::PRV_M));
 
-    return (cond1 || cond2 || cond3);
+    return (cond1 || cond2 || cond3 || hasLockEntry);
 }
 
 AddrRange
diff --git a/src/arch/riscv/pmp.hh b/src/arch/riscv/pmp.hh
index 1509646850..24cb4ad1ca 100644
--- a/src/arch/riscv/pmp.hh
+++ b/src/arch/riscv/pmp.hh
@@ -1,5 +1,6 @@
 /*
  * Copyright (c) 2021 The Regents of the University of California
+ * Copyright (c) 2023 Google LLC
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -85,12 +86,18 @@ class PMP : public SimObject
     /** pmpcfg address range execute permission mask */
     const uint8_t PMP_EXEC = 1 << 2;
 
+    /** pmpcfg A field mask */
+    const uint8_t PMP_A_MASK = 3 << 3;
+
     /** pmpcfg address range locked mask */
     const uint8_t PMP_LOCK = 1 << 7;
 
     /** variable to keep track of active number of rules any time */
     int numRules;
 
+    /** variable to keep track of any lock of entry */
+    bool hasLockEntry;
+
     /** single pmp entry struct*/
     struct PmpEntry
     {
@@ -127,8 +134,9 @@ class PMP : public SimObject
      * rule of corresponding pmp entry.
      * @param pmp_index pmp entry index.
      * @param this_cfg value to be written to pmpcfg.
+     * @returns true if update pmpicfg success
      */
-    void pmpUpdateCfg(uint32_t pmp_index, uint8_t this_cfg);
+    bool pmpUpdateCfg(uint32_t pmp_index, uint8_t this_cfg);
 
     /**
      * pmpUpdateAddr updates the pmpaddr for a pmp
@@ -136,8 +144,15 @@ class PMP : public SimObject
      * rule of corresponding pmp entry.
      * @param pmp_index pmp entry index.
      * @param this_addr value to be written to pmpaddr.
+     * @returns true if update pmpaddri success
      */
-    void pmpUpdateAddr(uint32_t pmp_index, Addr this_addr);
+    bool pmpUpdateAddr(uint32_t pmp_index, Addr this_addr);
+
+    /**
+     * pmpReset reset when reset signal in trigger from
+     * CPU.
+     */
+    void pmpReset();
 
   private:
     /**

From b440355cbce50c189ed7a3d42586f6eb0fc5887f Mon Sep 17 00:00:00 2001
From: Matt Sinclair <mattdsinclair.wisc@gmail.com>
Date: Sat, 7 Jan 2023 16:08:11 -0600
Subject: [PATCH 246/492] tests: cleanup m5out directly in weekly

The weekly test script was implicitly assuming that no m5out
directory existed in the folder where the script was run.
However, if a prior test ran and failed, it would not clean up
its m5out directory, causing the weekly tests to fail.

This commit resolves this by removing the m5out directory before
trying to run any tests in the weekly script.  Moreover, we also
update the weekly script to explicitly remove this m5out directory
at the end of the script.

Change-Id: If10c59034528e171cc2c5dacb928b3a81d6b8c50
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67198
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 tests/weekly.sh | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/tests/weekly.sh b/tests/weekly.sh
index c7f834b7a5..f9d3e4bc04 100755
--- a/tests/weekly.sh
+++ b/tests/weekly.sh
@@ -70,13 +70,14 @@ mkdir -p tests/testing-results
 
 # GPU weekly tests start here
 # before pulling gem5 resources, make sure it doesn't exist already
-docker run --rm --volume "${gem5_root}":"${gem5_root}" -w \
+docker run -u $UID:$GID --rm --volume "${gem5_root}":"${gem5_root}" -w \
        "${gem5_root}" --memory="${docker_mem_limit}" \
        gcr.io/gem5-test/gcn-gpu:${tag} bash -c \
        "rm -rf ${gem5_root}/gem5-resources"
-# delete Pannotia datasets and output files in case a failed regression run left
-# them around
-rm -f coAuthorsDBLP.graph 1k_128k.gr result.out
+
+# delete m5out, Pannotia datasets, and output files in case a failed regression
+# run left them around
+rm -rf ${gem5_root}/m5out coAuthorsDBLP.graph 1k_128k.gr result.out
 
 # Pull gem5 resources to the root of the gem5 directory -- currently the
 # pre-built binares for LULESH are out-of-date and won't run correctly with
@@ -383,5 +384,8 @@ docker run --rm --volume "${gem5_root}":"${gem5_root}" -w \
        "${gem5_root}" --memory="${docker_mem_limit}" hacc-test-weekly bash -c \
        "rm -rf ${gem5_root}/gem5-resources"
 
+# Delete the gem5 m5out folder we created
+rm -rf ${gem5_root}/m5out
+
 # delete Pannotia datasets we downloaded and output files it created
 rm -f coAuthorsDBLP.graph 1k_128k.gr result.out

From 18ba4e12788c2e7d39d204961e95007ad8a236d7 Mon Sep 17 00:00:00 2001
From: Alex Richardson <alexrichardson@google.com>
Date: Thu, 2 Mar 2023 11:35:46 +0000
Subject: [PATCH 247/492] tests: Fix GCC -W(maybe-)uninitialized warnings

These all look like valid (but harmless) diagnostics to me and are
all simple to fix. Most of them can be fixed by using ASSERT_* variants
of the GTest checkers to ensure that the remainder of the function is
not executed and the uninitialized result isn't touched.

Change-Id: Ib5fe2ac2ec539c880d670ebc3321ce98940c7e38
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/68517
Tested-by: kokoro <noreply+kokoro@google.com>
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
Reviewed-by: Daniel Carvalho <odanrc@yahoo.com.br>
---
 src/base/circlebuf.test.cc |  2 +-
 src/base/str.test.cc       | 28 ++++++++++++++--------------
 2 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/src/base/circlebuf.test.cc b/src/base/circlebuf.test.cc
index 02fe3961d4..c7913f7a27 100644
--- a/src/base/circlebuf.test.cc
+++ b/src/base/circlebuf.test.cc
@@ -136,7 +136,7 @@ TEST(CircleBufTest, PointerWrapAround)
 TEST(CircleBufTest, ProduceConsumeEmpty)
 {
     CircleBuf<char> buf(8);
-    char foo[1];
+    char foo[1] = {'a'};
 
     // buf is empty to begin with.
     EXPECT_TRUE(buf.empty());
diff --git a/src/base/str.test.cc b/src/base/str.test.cc
index f999c98825..a08f984416 100644
--- a/src/base/str.test.cc
+++ b/src/base/str.test.cc
@@ -254,7 +254,7 @@ TEST(StrTest, ToNumber8BitInt)
 {
     int8_t output;
     std::string input = "-128";
-    EXPECT_TRUE(to_number(input, output));
+    ASSERT_TRUE(to_number(input, output));
     EXPECT_EQ(-128, output);
 }
 
@@ -276,7 +276,7 @@ TEST(StrTest, ToNumberUnsigned8BitInt)
 {
     uint8_t output;
     std::string input = "255";
-    EXPECT_TRUE(to_number(input, output));
+    ASSERT_TRUE(to_number(input, output));
     EXPECT_EQ(255, output);
 }
 
@@ -292,11 +292,11 @@ TEST(StrTest, ToNumberUnsigned8BitIntRoundDown)
 {
     uint8_t output;
     std::string input_1 = "2.99";
-    EXPECT_TRUE(to_number(input_1, output));
+    ASSERT_TRUE(to_number(input_1, output));
     EXPECT_EQ(2, output);
 
     std::string input_2 = "3.99";
-    EXPECT_TRUE(to_number(input_2, output));
+    ASSERT_TRUE(to_number(input_2, output));
     EXPECT_EQ(3, output);
 }
 
@@ -308,7 +308,7 @@ TEST(StrTest, ToNumber8BitUnsignedLimit)
 {
     uint8_t output;
     std::string input = "255.99";
-    EXPECT_TRUE(to_number(input, output));
+    ASSERT_TRUE(to_number(input, output));
     EXPECT_EQ(255, output);
 }
 
@@ -344,7 +344,7 @@ TEST(StrTest, ToNumber64BitInt)
     int64_t output;
     int64_t input_number = 0xFFFFFFFFFFFFFFFF;
     std::string input = std::to_string(input_number);
-    EXPECT_TRUE(to_number(input, output));
+    ASSERT_TRUE(to_number(input, output));
     EXPECT_EQ(input_number, output);
 }
 
@@ -363,7 +363,7 @@ TEST(StrTest, ToNumberEnum)
     };
     Number output;
     std::string input = "2";
-    EXPECT_TRUE(to_number(input, output));
+    ASSERT_TRUE(to_number(input, output));
     EXPECT_EQ(TWO, output);
 }
 
@@ -384,7 +384,7 @@ TEST(StrTest, ToNumberFloat)
     float output;
     std::string input = "0.1";
     float expected_output = 0.1;
-    EXPECT_TRUE(to_number(input, output));
+    ASSERT_TRUE(to_number(input, output));
     EXPECT_EQ(expected_output, output);
 }
 
@@ -393,7 +393,7 @@ TEST(StrTest, ToNumberFloatIntegerString)
     float output;
     std::string input = "10";
     float expected_output = 10.0;
-    EXPECT_TRUE(to_number(input, output));
+    ASSERT_TRUE(to_number(input, output));
     EXPECT_EQ(expected_output, output);
 }
 
@@ -402,7 +402,7 @@ TEST(StrTest, ToNumberFloatNegative)
     float output;
     std::string input = "-0.1";
     float expected_output = -0.1;
-    EXPECT_TRUE(to_number(input, output));
+    ASSERT_TRUE(to_number(input, output));
     EXPECT_EQ(expected_output, output);
 }
 
@@ -411,7 +411,7 @@ TEST(StrTest, ToNumberDouble)
     double output;
     std::string input = "0.0001";
     double expected_output = 0.0001;
-    EXPECT_TRUE(to_number(input, output));
+    ASSERT_TRUE(to_number(input, output));
     EXPECT_EQ(expected_output, output);
 }
 
@@ -420,7 +420,7 @@ TEST(StrTest, ToNumberDoubleIntegerString)
     double output;
     std::string input = "12345";
     double expected_output = 12345.0;
-    EXPECT_TRUE(to_number(input, output));
+    ASSERT_TRUE(to_number(input, output));
     EXPECT_EQ(expected_output, output);
 }
 
@@ -429,7 +429,7 @@ TEST(StrTest, ToNumberDoubleNegative)
     double output;
     std::string input = "-1.2345";
     double expected_output = -1.2345;
-    EXPECT_TRUE(to_number(input, output));
+    ASSERT_TRUE(to_number(input, output));
     EXPECT_EQ(expected_output, output);
 }
 
@@ -439,7 +439,7 @@ TEST(StrTest, ToNumberScientific)
     double output;
     std::string input = "8.234e+08";
     double expected_output = 823400000;
-    EXPECT_TRUE(to_number(input, output));
+    ASSERT_TRUE(to_number(input, output));
     EXPECT_EQ(expected_output, output);
 }
 

From 85342dbb0eca5b05029bf3376a8af1e598cfd840 Mon Sep 17 00:00:00 2001
From: "Bobby R. Bruce" <bbruce@ucdavis.edu>
Date: Tue, 7 Mar 2023 09:21:26 -0800
Subject: [PATCH 248/492] cpu: Revert CPU stats changes

This reverts this relationchain:
https://gem5-review.googlesource.com/c/public/gem5/+/67396/6

This was pre-maturely submitted before all testing and checking was
done. To be safe this has been reverted. When all testing and checks are
completed, this revert will be undone.

Change-Id: I2a88cadfee03c1fc81932e6548938db108786dd2
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/68717
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/cpu/base.cc                | 250 ---------------------------------
 src/cpu/base.hh                | 142 -------------------
 src/cpu/kvm/base.cc            |   5 +-
 src/cpu/kvm/base.hh            |   1 +
 src/cpu/minor/execute.cc       |  47 +++++--
 src/cpu/minor/stats.cc         |  36 ++++-
 src/cpu/minor/stats.hh         |  22 +++
 src/cpu/o3/commit.cc           |  73 ++++++++--
 src/cpu/o3/commit.hh           |  16 +++
 src/cpu/o3/cpu.cc              | 167 +++++++++++++++++-----
 src/cpu/o3/cpu.hh              |  41 +++++-
 src/cpu/o3/dyn_inst.hh         |  14 +-
 src/cpu/o3/fetch.cc            |  33 ++++-
 src/cpu/o3/fetch.hh            |  10 ++
 src/cpu/o3/iew.cc              |  50 ++++++-
 src/cpu/o3/iew.hh              |  14 ++
 src/cpu/simple/base.cc         |  67 +++------
 src/cpu/simple/base.hh         |   2 -
 src/cpu/simple/exec_context.hh | 180 +++++++++++++++++++++---
 19 files changed, 628 insertions(+), 542 deletions(-)

diff --git a/src/cpu/base.cc b/src/cpu/base.cc
index cee76472f5..d2c0a78d44 100644
--- a/src/cpu/base.cc
+++ b/src/cpu/base.cc
@@ -191,30 +191,6 @@ BaseCPU::BaseCPU(const Params &p, bool is_checker)
     modelResetPort.onChange([this](const bool &new_val) {
         setReset(new_val);
     });
-    // create a stat group object for each thread on this core
-    fetchStats.reserve(numThreads);
-    executeStats.reserve(numThreads);
-    commitStats.reserve(numThreads);
-    for (int i = 0; i < numThreads; i++) {
-        // create fetchStat object for thread i and set rate formulas
-        FetchCPUStats* fetchStatptr = new FetchCPUStats(this, i);
-        fetchStatptr->fetchRate = fetchStatptr->numInsts / baseStats.numCycles;
-        fetchStatptr->branchRate = fetchStatptr->numBranches /
-            baseStats.numCycles;
-        fetchStats.emplace_back(fetchStatptr);
-
-        // create executeStat object for thread i and set rate formulas
-        ExecuteCPUStats* executeStatptr = new ExecuteCPUStats(this, i);
-        executeStatptr->instRate = executeStatptr->numInsts /
-            baseStats.numCycles;
-        executeStats.emplace_back(executeStatptr);
-
-        // create commitStat object for thread i and set ipc, cpi formulas
-        CommitCPUStats* commitStatptr = new CommitCPUStats(this, i);
-        commitStatptr->ipc = commitStatptr->numInsts / baseStats.numCycles;
-        commitStatptr->cpi = baseStats.numCycles / commitStatptr->numInsts;
-        commitStats.emplace_back(commitStatptr);
-    }
 }
 
 void
@@ -407,28 +383,13 @@ BaseCPU::probeInstCommit(const StaticInstPtr &inst, Addr pc)
 BaseCPU::
 BaseCPUStats::BaseCPUStats(statistics::Group *parent)
     : statistics::Group(parent),
-      ADD_STAT(numInsts, statistics::units::Count::get(),
-               "Number of instructions committed (core level)"),
-      ADD_STAT(numOps, statistics::units::Count::get(),
-               "Number of ops (including micro ops) committed (core level)"),
       ADD_STAT(numCycles, statistics::units::Cycle::get(),
                "Number of cpu cycles simulated"),
-      ADD_STAT(cpi, statistics::units::Rate<
-                statistics::units::Cycle, statistics::units::Count>::get(),
-               "CPI: cycles per instruction (core level)"),
-      ADD_STAT(ipc, statistics::units::Rate<
-                statistics::units::Count, statistics::units::Cycle>::get(),
-               "IPC: instructions per cycle (core level)"),
       ADD_STAT(numWorkItemsStarted, statistics::units::Count::get(),
                "Number of work items this cpu started"),
       ADD_STAT(numWorkItemsCompleted, statistics::units::Count::get(),
                "Number of work items this cpu completed")
 {
-    cpi.precision(6);
-    cpi = numCycles / numInsts;
-
-    ipc.precision(6);
-    ipc = numInsts / numCycles;
 }
 
 void
@@ -866,215 +827,4 @@ BaseCPU::GlobalStats::GlobalStats(statistics::Group *parent)
     hostOpRate = simOps / hostSeconds;
 }
 
-BaseCPU::
-FetchCPUStats::FetchCPUStats(statistics::Group *parent, int thread_id)
-    : statistics::Group(parent, csprintf("fetchStats%i", thread_id).c_str()),
-    ADD_STAT(numInsts, statistics::units::Count::get(),
-             "Number of instructions fetched (thread level)"),
-    ADD_STAT(numOps, statistics::units::Count::get(),
-             "Number of ops (including micro ops) fetched (thread level)"),
-    ADD_STAT(fetchRate, statistics::units::Rate<
-             statistics::units::Count, statistics::units::Cycle>::get(),
-             "Number of inst fetches per cycle"),
-    ADD_STAT(numBranches, statistics::units::Count::get(),
-             "Number of branches fetched"),
-    ADD_STAT(branchRate, statistics::units::Ratio::get(),
-             "Number of branch fetches per cycle"),
-    ADD_STAT(icacheStallCycles, statistics::units::Cycle::get(),
-             "ICache total stall cycles"),
-    ADD_STAT(numFetchSuspends, statistics::units::Count::get(),
-             "Number of times Execute suspended instruction fetching")
-
-{
-    fetchRate
-        .flags(statistics::total);
-
-    numBranches
-        .prereq(numBranches);
-
-    branchRate
-        .flags(statistics::total);
-
-    icacheStallCycles
-        .prereq(icacheStallCycles);
-
-}
-
-// means it is incremented in a vector indexing and not directly
-BaseCPU::
-ExecuteCPUStats::ExecuteCPUStats(statistics::Group *parent, int thread_id)
-    : statistics::Group(parent, csprintf("executeStats%i", thread_id).c_str()),
-    ADD_STAT(numInsts, statistics::units::Count::get(),
-             "Number of executed instructions"),
-    ADD_STAT(numNop, statistics::units::Count::get(),
-             "Number of nop insts executed"),
-    ADD_STAT(numBranches, statistics::units::Count::get(),
-             "Number of branches executed"),
-    ADD_STAT(numLoadInsts, statistics::units::Count::get(),
-             "Number of load instructions executed"),
-    ADD_STAT(numStoreInsts, statistics::units::Count::get(),
-             "Number of stores executed"),
-    ADD_STAT(instRate, statistics::units::Rate<
-                statistics::units::Count, statistics::units::Cycle>::get(),
-             "Inst execution rate"),
-    ADD_STAT(dcacheStallCycles, statistics::units::Cycle::get(),
-             "DCache total stall cycles"),
-    ADD_STAT(numCCRegReads, statistics::units::Count::get(),
-             "Number of times the CC registers were read"),
-    ADD_STAT(numCCRegWrites, statistics::units::Count::get(),
-             "Number of times the CC registers were written"),
-    ADD_STAT(numFpAluAccesses, statistics::units::Count::get(),
-             "Number of float alu accesses"),
-    ADD_STAT(numFpRegReads, statistics::units::Count::get(),
-             "Number of times the floating registers were read"),
-    ADD_STAT(numFpRegWrites, statistics::units::Count::get(),
-             "Number of times the floating registers were written"),
-    ADD_STAT(numIntAluAccesses, statistics::units::Count::get(),
-             "Number of integer alu accesses"),
-    ADD_STAT(numIntRegReads, statistics::units::Count::get(),
-             "Number of times the integer registers were read"),
-    ADD_STAT(numIntRegWrites, statistics::units::Count::get(),
-             "Number of times the integer registers were written"),
-    ADD_STAT(numMemRefs, statistics::units::Count::get(),
-             "Number of memory refs"),
-    ADD_STAT(numMiscRegReads, statistics::units::Count::get(),
-             "Number of times the Misc registers were read"),
-    ADD_STAT(numMiscRegWrites, statistics::units::Count::get(),
-             "Number of times the Misc registers were written"),
-    ADD_STAT(numVecAluAccesses, statistics::units::Count::get(),
-             "Number of vector alu accesses"),
-    ADD_STAT(numVecPredRegReads, statistics::units::Count::get(),
-             "Number of times the predicate registers were read"),
-    ADD_STAT(numVecPredRegWrites, statistics::units::Count::get(),
-             "Number of times the predicate registers were written"),
-    ADD_STAT(numVecRegReads, statistics::units::Count::get(),
-             "Number of times the vector registers were read"),
-    ADD_STAT(numVecRegWrites, statistics::units::Count::get(),
-             "Number of times the vector registers were written"),
-    ADD_STAT(numDiscardedOps, statistics::units::Count::get(),
-             "Number of ops (including micro ops) which were discarded before "
-             "commit")
-{
-    numStoreInsts = numMemRefs - numLoadInsts;
-
-    dcacheStallCycles
-        .prereq(dcacheStallCycles);
-    numCCRegReads
-        .prereq(numCCRegReads)
-        .flags(statistics::nozero);
-    numCCRegWrites
-        .prereq(numCCRegWrites)
-        .flags(statistics::nozero);
-    numFpAluAccesses
-        .prereq(numFpAluAccesses);
-    numFpRegReads
-        .prereq(numFpRegReads);
-    numIntAluAccesses
-        .prereq(numIntAluAccesses);
-    numIntRegReads
-        .prereq(numIntRegReads);
-    numIntRegWrites
-        .prereq(numIntRegWrites);
-    numMiscRegReads
-        .prereq(numMiscRegReads);
-    numMiscRegWrites
-        .prereq(numMiscRegWrites);
-    numVecPredRegReads
-        .prereq(numVecPredRegReads);
-    numVecPredRegWrites
-        .prereq(numVecPredRegWrites);
-    numVecRegReads
-        .prereq(numVecRegReads);
-    numVecRegWrites
-        .prereq(numVecRegWrites);
-}
-
-BaseCPU::
-CommitCPUStats::CommitCPUStats(statistics::Group *parent, int thread_id)
-    : statistics::Group(parent, csprintf("commitStats%i", thread_id).c_str()),
-    ADD_STAT(numInsts, statistics::units::Count::get(),
-             "Number of instructions committed (thread level)"),
-    ADD_STAT(numOps, statistics::units::Count::get(),
-             "Number of ops (including micro ops) committed (thread level)"),
-    ADD_STAT(numInstsNotNOP, statistics::units::Count::get(),
-             "Number of instructions committed excluding NOPs or prefetches"),
-    ADD_STAT(numOpsNotNOP, statistics::units::Count::get(),
-             "Number of Ops (including micro ops) Simulated"),
-    ADD_STAT(cpi, statistics::units::Rate<
-                statistics::units::Cycle, statistics::units::Count>::get(),
-             "CPI: cycles per instruction (thread level)"),
-    ADD_STAT(ipc, statistics::units::Rate<
-                statistics::units::Count, statistics::units::Cycle>::get(),
-             "IPC: instructions per cycle (thread level)"),
-    ADD_STAT(numMemRefs, statistics::units::Count::get(),
-            "Number of memory references committed"),
-    ADD_STAT(numFpInsts, statistics::units::Count::get(),
-            "Number of float instructions"),
-    ADD_STAT(numIntInsts, statistics::units::Count::get(),
-            "Number of integer instructions"),
-    ADD_STAT(numLoadInsts, statistics::units::Count::get(),
-            "Number of load instructions"),
-    ADD_STAT(numStoreInsts, statistics::units::Count::get(),
-            "Number of store instructions"),
-    ADD_STAT(numVecInsts, statistics::units::Count::get(),
-            "Number of vector instructions"),
-    ADD_STAT(committedInstType, statistics::units::Count::get(),
-            "Class of committed instruction."),
-    ADD_STAT(committedControl, statistics::units::Count::get(),
-             "Class of control type instructions committed")
-{
-    numInsts
-        .prereq(numInsts);
-
-    cpi.precision(6);
-    ipc.precision(6);
-
-    committedInstType
-        .init(enums::Num_OpClass)
-        .flags(statistics::total | statistics::pdf | statistics::dist);
-
-    for (unsigned i = 0; i < Num_OpClasses; ++i) {
-        committedInstType.subname(i, enums::OpClassStrings[i]);
-    }
-
-    committedControl
-        .init(StaticInstFlags::Flags::Num_Flags)
-        .flags(statistics::nozero);
-
-    for (unsigned i = 0; i < StaticInstFlags::Flags::Num_Flags; i++) {
-        committedControl.subname(i, StaticInstFlags::FlagsStrings[i]);
-    }
-}
-
-
-void
-BaseCPU::
-CommitCPUStats::updateComCtrlStats(const StaticInstPtr staticInst)
-{
-    /* Add a count for every control instruction type */
-    if (staticInst->isControl()) {
-        if (staticInst->isReturn()) {
-            committedControl[gem5::StaticInstFlags::Flags::IsReturn]++;
-        }
-        if (staticInst->isCall()) {
-            committedControl[gem5::StaticInstFlags::Flags::IsCall]++;
-        }
-        if (staticInst->isDirectCtrl()) {
-            committedControl[gem5::StaticInstFlags::Flags::IsDirectControl]++;
-        }
-        if (staticInst->isIndirectCtrl()) {
-            committedControl
-                [gem5::StaticInstFlags::Flags::IsIndirectControl]++;
-        }
-        if (staticInst->isCondCtrl()) {
-            committedControl[gem5::StaticInstFlags::Flags::IsCondControl]++;
-        }
-        if (staticInst->isUncondCtrl()) {
-            committedControl[gem5::StaticInstFlags::Flags::IsUncondControl]++;
-        }
-        committedControl[gem5::StaticInstFlags::Flags::IsControl]++;
-    }
-
-}
-
 } // namespace gem5
diff --git a/src/cpu/base.hh b/src/cpu/base.hh
index fc22abc5aa..084d9b9305 100644
--- a/src/cpu/base.hh
+++ b/src/cpu/base.hh
@@ -43,7 +43,6 @@
 #define __CPU_BASE_HH__
 
 #include <vector>
-#include <memory>
 
 #include "arch/generic/interrupts.hh"
 #include "base/statistics.hh"
@@ -633,14 +632,8 @@ class BaseCPU : public ClockedObject
     struct BaseCPUStats : public statistics::Group
     {
         BaseCPUStats(statistics::Group *parent);
-        // Number of CPU insts and ops committed at CPU core level
-        statistics::Scalar numInsts;
-        statistics::Scalar numOps;
         // Number of CPU cycles simulated
         statistics::Scalar numCycles;
-        /* CPI/IPC for total cycle counts and macro insts */
-        statistics::Formula cpi;
-        statistics::Formula ipc;
         statistics::Scalar numWorkItemsStarted;
         statistics::Scalar numWorkItemsCompleted;
     } baseStats;
@@ -683,141 +676,6 @@ class BaseCPU : public ClockedObject
     const Cycles pwrGatingLatency;
     const bool powerGatingOnIdle;
     EventFunctionWrapper enterPwrGatingEvent;
-
-  public:
-    struct FetchCPUStats : public statistics::Group
-    {
-        FetchCPUStats(statistics::Group *parent, int thread_id);
-
-        /* Total number of instructions fetched */
-        statistics::Scalar numInsts;
-
-        /* Total number of operations fetched */
-        statistics::Scalar numOps;
-
-        /* Number of instruction fetched per cycle. */
-        statistics::Formula fetchRate;
-
-        /* Total number of branches fetched */
-        statistics::Scalar numBranches;
-
-        /* Number of branch fetches per cycle. */
-        statistics::Formula branchRate;
-
-        /* Number of cycles stalled due to an icache miss */
-        statistics::Scalar icacheStallCycles;
-
-        /* Number of times fetch was asked to suspend by Execute */
-        statistics::Scalar numFetchSuspends;
-
-    };
-
-    struct ExecuteCPUStats: public statistics::Group
-    {
-        ExecuteCPUStats(statistics::Group *parent, int thread_id);
-
-        /* Stat for total number of executed instructions */
-        statistics::Scalar numInsts;
-        /* Number of executed nops */
-        statistics::Scalar numNop;
-        /* Number of executed branches */
-        statistics::Scalar numBranches;
-        /* Stat for total number of executed load instructions */
-        statistics::Scalar numLoadInsts;
-        /* Number of executed store instructions */
-        statistics::Formula numStoreInsts;
-        /* Number of instructions executed per cycle */
-        statistics::Formula instRate;
-
-        /* Number of cycles stalled for D-cache responses */
-        statistics::Scalar dcacheStallCycles;
-
-        /* Number of condition code register file accesses */
-        statistics::Scalar numCCRegReads;
-        statistics::Scalar numCCRegWrites;
-
-        /* number of float alu accesses */
-        statistics::Scalar numFpAluAccesses;
-
-        /* Number of float register file accesses */
-        statistics::Scalar numFpRegReads;
-        statistics::Scalar numFpRegWrites;
-
-        /* Number of integer alu accesses */
-        statistics::Scalar numIntAluAccesses;
-
-        /* Number of integer register file accesses */
-        statistics::Scalar numIntRegReads;
-        statistics::Scalar numIntRegWrites;
-
-        /* number of simulated memory references */
-        statistics::Scalar numMemRefs;
-
-        /* Number of misc register file accesses */
-        statistics::Scalar numMiscRegReads;
-        statistics::Scalar numMiscRegWrites;
-
-        /* Number of vector alu accesses */
-        statistics::Scalar numVecAluAccesses;
-
-        /* Number of predicate register file accesses */
-        mutable statistics::Scalar numVecPredRegReads;
-        statistics::Scalar numVecPredRegWrites;
-
-        /* Number of vector register file accesses */
-        mutable statistics::Scalar numVecRegReads;
-        statistics::Scalar numVecRegWrites;
-
-        /* Number of ops discarded before committing */
-        statistics::Scalar numDiscardedOps;
-    };
-
-    struct CommitCPUStats: public statistics::Group
-    {
-        CommitCPUStats(statistics::Group *parent, int thread_id);
-
-        /* Number of simulated instructions committed */
-        statistics::Scalar numInsts;
-        statistics::Scalar numOps;
-
-        /* Number of instructions committed that are not NOP or prefetches */
-        statistics::Scalar numInstsNotNOP;
-        statistics::Scalar numOpsNotNOP;
-
-        /* CPI/IPC for total cycle counts and macro insts */
-        statistics::Formula cpi;
-        statistics::Formula ipc;
-
-        /* Number of committed memory references. */
-        statistics::Scalar numMemRefs;
-
-        /* Number of float instructions */
-        statistics::Scalar numFpInsts;
-
-        /* Number of int instructions */
-        statistics::Scalar numIntInsts;
-
-        /* number of load instructions */
-        statistics::Scalar numLoadInsts;
-
-        /* Number of store instructions */
-        statistics::Scalar numStoreInsts;
-
-        /* Number of vector instructions */
-        statistics::Scalar numVecInsts;
-
-        /* Number of instructions committed by type (OpClass) */
-        statistics::Vector committedInstType;
-
-        /* number of control instructions committed by control inst type */
-        statistics::Vector committedControl;
-        void updateComCtrlStats(const StaticInstPtr staticInst);
-
-    };
-
-    std::vector<std::unique_ptr<FetchCPUStats>> fetchStats;
-    std::vector<std::unique_ptr<ExecuteCPUStats>> executeStats;
-    std::vector<std::unique_ptr<CommitCPUStats>> commitStats;
 };
 
 } // namespace gem5
diff --git a/src/cpu/kvm/base.cc b/src/cpu/kvm/base.cc
index e22e1628d2..b76bddc2fd 100644
--- a/src/cpu/kvm/base.cc
+++ b/src/cpu/kvm/base.cc
@@ -261,6 +261,8 @@ BaseKvmCPU::restartEqThread()
 
 BaseKvmCPU::StatGroup::StatGroup(statistics::Group *parent)
     : statistics::Group(parent),
+    ADD_STAT(committedInsts, statistics::units::Count::get(),
+             "Number of instructions committed"),
     ADD_STAT(numVMExits, statistics::units::Count::get(),
              "total number of KVM exits"),
     ADD_STAT(numVMHalfEntries, statistics::units::Count::get(),
@@ -776,8 +778,7 @@ BaseKvmCPU::kvmRun(Tick ticks)
 
         /* Update statistics */
         baseStats.numCycles += simCyclesExecuted;;
-        commitStats[thread->threadId()]->numInsts += instsExecuted;
-        baseStats.numInsts += instsExecuted;
+        stats.committedInsts += instsExecuted;
         ctrInsts += instsExecuted;
 
         DPRINTF(KvmRun,
diff --git a/src/cpu/kvm/base.hh b/src/cpu/kvm/base.hh
index 7bbf393f9b..2d81c7c7eb 100644
--- a/src/cpu/kvm/base.hh
+++ b/src/cpu/kvm/base.hh
@@ -804,6 +804,7 @@ class BaseKvmCPU : public BaseCPU
     struct StatGroup : public statistics::Group
     {
         StatGroup(statistics::Group *parent);
+        statistics::Scalar committedInsts;
         statistics::Scalar numVMExits;
         statistics::Scalar numVMHalfEntries;
         statistics::Scalar numExitSignal;
diff --git a/src/cpu/minor/execute.cc b/src/cpu/minor/execute.cc
index 2908c2266f..5eaaf5804e 100644
--- a/src/cpu/minor/execute.cc
+++ b/src/cpu/minor/execute.cc
@@ -871,18 +871,49 @@ Execute::doInstCommitAccounting(MinorDynInstPtr inst)
     {
         thread->numInst++;
         thread->threadStats.numInsts++;
-        cpu.commitStats[inst->id.threadId]->numInsts++;
-        cpu.baseStats.numInsts++;
+        cpu.stats.numInsts++;
 
         /* Act on events related to instruction counts */
         thread->comInstEventQueue.serviceEvents(thread->numInst);
     }
     thread->numOp++;
     thread->threadStats.numOps++;
-    cpu.commitStats[inst->id.threadId]->numOps++;
-    cpu.baseStats.numOps++;
-    cpu.commitStats[inst->id.threadId]
-        ->committedInstType[inst->staticInst->opClass()]++;
+    cpu.stats.numOps++;
+    cpu.stats.committedInstType[inst->id.threadId]
+                               [inst->staticInst->opClass()]++;
+
+    /** Add a count for every control instruction */
+    if (inst->staticInst->isControl()) {
+        if (inst->staticInst->isReturn()) {
+            cpu.stats.committedControl[inst->id.threadId]
+                        [gem5::StaticInstFlags::Flags::IsReturn]++;
+        }
+        if (inst->staticInst->isCall()) {
+            cpu.stats.committedControl[inst->id.threadId]
+                        [gem5::StaticInstFlags::Flags::IsCall]++;
+        }
+        if (inst->staticInst->isDirectCtrl()) {
+            cpu.stats.committedControl[inst->id.threadId]
+                        [gem5::StaticInstFlags::Flags::IsDirectControl]++;
+        }
+        if (inst->staticInst->isIndirectCtrl()) {
+            cpu.stats.committedControl[inst->id.threadId]
+                        [gem5::StaticInstFlags::Flags::IsIndirectControl]++;
+        }
+        if (inst->staticInst->isCondCtrl()) {
+            cpu.stats.committedControl[inst->id.threadId]
+                        [gem5::StaticInstFlags::Flags::IsCondControl]++;
+        }
+        if (inst->staticInst->isUncondCtrl()) {
+            cpu.stats.committedControl[inst->id.threadId]
+                        [gem5::StaticInstFlags::Flags::IsUncondControl]++;
+
+        }
+        cpu.stats.committedControl[inst->id.threadId]
+                        [gem5::StaticInstFlags::Flags::IsControl]++;
+    }
+
+
 
     /* Set the CP SeqNum to the numOps commit number */
     if (inst->traceData)
@@ -1023,7 +1054,7 @@ Execute::commitInst(MinorDynInstPtr inst, bool early_memory_issue,
             DPRINTF(MinorInterrupt, "Suspending thread: %d from Execute"
                 " inst: %s\n", thread_id, *inst);
 
-            cpu.fetchStats[thread_id]->numFetchSuspends++;
+            cpu.stats.numFetchSuspends++;
 
             updateBranchData(thread_id, BranchData::SuspendThread, inst,
                 resume_pc, branch);
@@ -1337,7 +1368,7 @@ Execute::commit(ThreadID thread_id, bool only_commit_microops, bool discard,
                 *inst, ex_info.streamSeqNum);
 
             if (fault == NoFault)
-                cpu.executeStats[thread_id]->numDiscardedOps++;
+                cpu.stats.numDiscardedOps++;
         }
 
         /* Mark the mem inst as being in the LSQ */
diff --git a/src/cpu/minor/stats.cc b/src/cpu/minor/stats.cc
index e31cbe93a1..64d4c475e0 100644
--- a/src/cpu/minor/stats.cc
+++ b/src/cpu/minor/stats.cc
@@ -45,13 +45,47 @@ namespace minor
 
 MinorStats::MinorStats(BaseCPU *base_cpu)
     : statistics::Group(base_cpu),
+    ADD_STAT(numInsts, statistics::units::Count::get(),
+             "Number of instructions committed"),
+    ADD_STAT(numOps, statistics::units::Count::get(),
+             "Number of ops (including micro ops) committed"),
+    ADD_STAT(numDiscardedOps, statistics::units::Count::get(),
+             "Number of ops (including micro ops) which were discarded before "
+             "commit"),
+    ADD_STAT(numFetchSuspends, statistics::units::Count::get(),
+             "Number of times Execute suspended instruction fetching"),
     ADD_STAT(quiesceCycles, statistics::units::Cycle::get(),
              "Total number of cycles that CPU has spent quiesced or waiting "
-             "for an interrupt")
+             "for an interrupt"),
+    ADD_STAT(cpi, statistics::units::Rate<
+                statistics::units::Cycle, statistics::units::Count>::get(),
+             "CPI: cycles per instruction"),
+    ADD_STAT(ipc, statistics::units::Rate<
+                statistics::units::Count, statistics::units::Cycle>::get(),
+             "IPC: instructions per cycle"),
+    ADD_STAT(committedInstType, statistics::units::Count::get(),
+             "Class of committed instruction"),
+    ADD_STAT(committedControl, statistics::units::Count::get(),
+             "Class of control type instructions committed")
 
 {
     quiesceCycles.prereq(quiesceCycles);
 
+    cpi.precision(6);
+    cpi = base_cpu->baseStats.numCycles / numInsts;
+
+    ipc.precision(6);
+    ipc = numInsts / base_cpu->baseStats.numCycles;
+
+    committedInstType
+        .init(base_cpu->numThreads, enums::Num_OpClass)
+        .flags(statistics::total | statistics::pdf | statistics::dist);
+    committedInstType.ysubnames(enums::OpClassStrings);
+
+    committedControl
+        .init(base_cpu->numThreads, StaticInstFlags::Flags::Num_Flags)
+        .flags(statistics::nozero);
+    committedControl.ysubnames(StaticInstFlags::FlagsStrings);
 }
 
 } // namespace minor
diff --git a/src/cpu/minor/stats.hh b/src/cpu/minor/stats.hh
index 98ac80f15c..1ab81f4407 100644
--- a/src/cpu/minor/stats.hh
+++ b/src/cpu/minor/stats.hh
@@ -59,9 +59,31 @@ struct MinorStats : public statistics::Group
 {
     MinorStats(BaseCPU *parent);
 
+    /** Number of simulated instructions */
+    statistics::Scalar numInsts;
+
+    /** Number of simulated insts and microops */
+    statistics::Scalar numOps;
+
+    /** Number of ops discarded before committing */
+    statistics::Scalar numDiscardedOps;
+
+    /** Number of times fetch was asked to suspend by Execute */
+    statistics::Scalar numFetchSuspends;
+
     /** Number of cycles in quiescent state */
     statistics::Scalar quiesceCycles;
 
+    /** CPI/IPC for total cycle counts and macro insts */
+    statistics::Formula cpi;
+    statistics::Formula ipc;
+
+    /** Number of instructions by type (OpClass) */
+    statistics::Vector2d committedInstType;
+
+    /** Number of branches commited */
+    statistics::Vector2d committedControl;
+
 };
 
 } // namespace minor
diff --git a/src/cpu/o3/commit.cc b/src/cpu/o3/commit.cc
index e1f01680ca..38dce831b1 100644
--- a/src/cpu/o3/commit.cc
+++ b/src/cpu/o3/commit.cc
@@ -156,10 +156,25 @@ Commit::CommitStats::CommitStats(CPU *cpu, Commit *commit)
                "The number of times a branch was mispredicted"),
       ADD_STAT(numCommittedDist, statistics::units::Count::get(),
                "Number of insts commited each cycle"),
+      ADD_STAT(instsCommitted, statistics::units::Count::get(),
+               "Number of instructions committed"),
+      ADD_STAT(opsCommitted, statistics::units::Count::get(),
+               "Number of ops (including micro ops) committed"),
+      ADD_STAT(memRefs, statistics::units::Count::get(),
+               "Number of memory references committed"),
+      ADD_STAT(loads, statistics::units::Count::get(), "Number of loads committed"),
       ADD_STAT(amos, statistics::units::Count::get(),
                "Number of atomic instructions committed"),
       ADD_STAT(membars, statistics::units::Count::get(),
                "Number of memory barriers committed"),
+      ADD_STAT(branches, statistics::units::Count::get(),
+               "Number of branches committed"),
+      ADD_STAT(vectorInstructions, statistics::units::Count::get(),
+               "Number of committed Vector instructions."),
+      ADD_STAT(floating, statistics::units::Count::get(),
+               "Number of committed floating point instructions."),
+      ADD_STAT(integer, statistics::units::Count::get(),
+               "Number of committed integer instructions."),
       ADD_STAT(functionCalls, statistics::units::Count::get(),
                "Number of function calls committed."),
       ADD_STAT(committedInstType, statistics::units::Count::get(),
@@ -177,6 +192,22 @@ Commit::CommitStats::CommitStats(CPU *cpu, Commit *commit)
         .init(0,commit->commitWidth,1)
         .flags(statistics::pdf);
 
+    instsCommitted
+        .init(cpu->numThreads)
+        .flags(total);
+
+    opsCommitted
+        .init(cpu->numThreads)
+        .flags(total);
+
+    memRefs
+        .init(cpu->numThreads)
+        .flags(total);
+
+    loads
+        .init(cpu->numThreads)
+        .flags(total);
+
     amos
         .init(cpu->numThreads)
         .flags(total);
@@ -185,6 +216,22 @@ Commit::CommitStats::CommitStats(CPU *cpu, Commit *commit)
         .init(cpu->numThreads)
         .flags(total);
 
+    branches
+        .init(cpu->numThreads)
+        .flags(total);
+
+    vectorInstructions
+        .init(cpu->numThreads)
+        .flags(total);
+
+    floating
+        .init(cpu->numThreads)
+        .flags(total);
+
+    integer
+        .init(cpu->numThreads)
+        .flags(total);
+
     functionCalls
         .init(commit->numThreads)
         .flags(total);
@@ -1336,12 +1383,9 @@ Commit::updateComInstStats(const DynInstPtr &inst)
 {
     ThreadID tid = inst->threadNumber;
 
-    if (!inst->isMicroop() || inst->isLastMicroop()) {
-        cpu->commitStats[tid]->numInsts++;
-        cpu->baseStats.numInsts++;
-    }
-    cpu->commitStats[tid]->numOps++;
-    cpu->baseStats.numOps++;
+    if (!inst->isMicroop() || inst->isLastMicroop())
+        stats.instsCommitted[tid]++;
+    stats.opsCommitted[tid]++;
 
     // To match the old model, don't count nops and instruction
     // prefetches towards the total commit count.
@@ -1352,20 +1396,21 @@ Commit::updateComInstStats(const DynInstPtr &inst)
     //
     //  Control Instructions
     //
-    cpu->commitStats[tid]->updateComCtrlStats(inst->staticInst);
+    if (inst->isControl())
+        stats.branches[tid]++;
 
     //
     //  Memory references
     //
     if (inst->isMemRef()) {
-        cpu->commitStats[tid]->numMemRefs++;
+        stats.memRefs[tid]++;
 
         if (inst->isLoad()) {
-            cpu->commitStats[tid]->numLoadInsts++;
+            stats.loads[tid]++;
         }
 
-        if (inst->isStore()) {
-            cpu->commitStats[tid]->numStoreInsts++;
+        if (inst->isAtomic()) {
+            stats.amos[tid]++;
         }
     }
 
@@ -1375,14 +1420,14 @@ Commit::updateComInstStats(const DynInstPtr &inst)
 
     // Integer Instruction
     if (inst->isInteger())
-        cpu->commitStats[tid]->numIntInsts++;
+        stats.integer[tid]++;
 
     // Floating Point Instruction
     if (inst->isFloating())
-        cpu->commitStats[tid]->numFpInsts++;
+        stats.floating[tid]++;
     // Vector Instruction
     if (inst->isVector())
-        cpu->commitStats[tid]->numVecInsts++;
+        stats.vectorInstructions[tid]++;
 
     // Function Calls
     if (inst->isCall())
diff --git a/src/cpu/o3/commit.hh b/src/cpu/o3/commit.hh
index eccd023d45..cf4eaf5d92 100644
--- a/src/cpu/o3/commit.hh
+++ b/src/cpu/o3/commit.hh
@@ -479,10 +479,26 @@ class Commit
         /** Distribution of the number of committed instructions each cycle. */
         statistics::Distribution numCommittedDist;
 
+        /** Total number of instructions committed. */
+        statistics::Vector instsCommitted;
+        /** Total number of ops (including micro ops) committed. */
+        statistics::Vector opsCommitted;
+        /** Stat for the total number of committed memory references. */
+        statistics::Vector memRefs;
+        /** Stat for the total number of committed loads. */
+        statistics::Vector loads;
         /** Stat for the total number of committed atomics. */
         statistics::Vector amos;
         /** Total number of committed memory barriers. */
         statistics::Vector membars;
+        /** Total number of committed branches. */
+        statistics::Vector branches;
+        /** Total number of vector instructions */
+        statistics::Vector vectorInstructions;
+        /** Total number of floating point instructions */
+        statistics::Vector floating;
+        /** Total number of integer instructions */
+        statistics::Vector integer;
         /** Total number of function calls */
         statistics::Vector functionCalls;
         /** Committed instructions by instruction type (OpClass) */
diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc
index 93c58fef63..d2bacaa523 100644
--- a/src/cpu/o3/cpu.cc
+++ b/src/cpu/o3/cpu.cc
@@ -328,7 +328,47 @@ CPU::CPUStats::CPUStats(CPU *cpu)
                "to idling"),
       ADD_STAT(quiesceCycles, statistics::units::Cycle::get(),
                "Total number of cycles that CPU has spent quiesced or waiting "
-               "for an interrupt")
+               "for an interrupt"),
+      ADD_STAT(committedInsts, statistics::units::Count::get(),
+               "Number of Instructions Simulated"),
+      ADD_STAT(committedOps, statistics::units::Count::get(),
+               "Number of Ops (including micro ops) Simulated"),
+      ADD_STAT(cpi, statistics::units::Rate<
+                    statistics::units::Cycle, statistics::units::Count>::get(),
+               "CPI: Cycles Per Instruction"),
+      ADD_STAT(totalCpi, statistics::units::Rate<
+                    statistics::units::Cycle, statistics::units::Count>::get(),
+               "CPI: Total CPI of All Threads"),
+      ADD_STAT(ipc, statistics::units::Rate<
+                    statistics::units::Count, statistics::units::Cycle>::get(),
+               "IPC: Instructions Per Cycle"),
+      ADD_STAT(totalIpc, statistics::units::Rate<
+                    statistics::units::Count, statistics::units::Cycle>::get(),
+               "IPC: Total IPC of All Threads"),
+      ADD_STAT(intRegfileReads, statistics::units::Count::get(),
+               "Number of integer regfile reads"),
+      ADD_STAT(intRegfileWrites, statistics::units::Count::get(),
+               "Number of integer regfile writes"),
+      ADD_STAT(fpRegfileReads, statistics::units::Count::get(),
+               "Number of floating regfile reads"),
+      ADD_STAT(fpRegfileWrites, statistics::units::Count::get(),
+               "Number of floating regfile writes"),
+      ADD_STAT(vecRegfileReads, statistics::units::Count::get(),
+               "number of vector regfile reads"),
+      ADD_STAT(vecRegfileWrites, statistics::units::Count::get(),
+               "number of vector regfile writes"),
+      ADD_STAT(vecPredRegfileReads, statistics::units::Count::get(),
+               "number of predicate regfile reads"),
+      ADD_STAT(vecPredRegfileWrites, statistics::units::Count::get(),
+               "number of predicate regfile writes"),
+      ADD_STAT(ccRegfileReads, statistics::units::Count::get(),
+               "number of cc regfile reads"),
+      ADD_STAT(ccRegfileWrites, statistics::units::Count::get(),
+               "number of cc regfile writes"),
+      ADD_STAT(miscRegfileReads, statistics::units::Count::get(),
+               "number of misc regfile reads"),
+      ADD_STAT(miscRegfileWrites, statistics::units::Count::get(),
+               "number of misc regfile writes")
 {
     // Register any of the O3CPU's stats here.
     timesIdled
@@ -340,6 +380,69 @@ CPU::CPUStats::CPUStats(CPU *cpu)
     quiesceCycles
         .prereq(quiesceCycles);
 
+    // Number of Instructions simulated
+    // --------------------------------
+    // Should probably be in Base CPU but need templated
+    // MaxThreads so put in here instead
+    committedInsts
+        .init(cpu->numThreads)
+        .flags(statistics::total);
+
+    committedOps
+        .init(cpu->numThreads)
+        .flags(statistics::total);
+
+    cpi
+        .precision(6);
+    cpi = cpu->baseStats.numCycles / committedInsts;
+
+    totalCpi
+        .precision(6);
+    totalCpi = cpu->baseStats.numCycles / sum(committedInsts);
+
+    ipc
+        .precision(6);
+    ipc = committedInsts / cpu->baseStats.numCycles;
+
+    totalIpc
+        .precision(6);
+    totalIpc = sum(committedInsts) / cpu->baseStats.numCycles;
+
+    intRegfileReads
+        .prereq(intRegfileReads);
+
+    intRegfileWrites
+        .prereq(intRegfileWrites);
+
+    fpRegfileReads
+        .prereq(fpRegfileReads);
+
+    fpRegfileWrites
+        .prereq(fpRegfileWrites);
+
+    vecRegfileReads
+        .prereq(vecRegfileReads);
+
+    vecRegfileWrites
+        .prereq(vecRegfileWrites);
+
+    vecPredRegfileReads
+        .prereq(vecPredRegfileReads);
+
+    vecPredRegfileWrites
+        .prereq(vecPredRegfileWrites);
+
+    ccRegfileReads
+        .prereq(ccRegfileReads);
+
+    ccRegfileWrites
+        .prereq(ccRegfileWrites);
+
+    miscRegfileReads
+        .prereq(miscRegfileReads);
+
+    miscRegfileWrites
+        .prereq(miscRegfileWrites);
 }
 
 void
@@ -916,7 +1019,7 @@ CPU::readMiscRegNoEffect(int misc_reg, ThreadID tid) const
 RegVal
 CPU::readMiscReg(int misc_reg, ThreadID tid)
 {
-    executeStats[tid]->numMiscRegReads++;
+    cpuStats.miscRegfileReads++;
     return isa[tid]->readMiscReg(misc_reg);
 }
 
@@ -929,29 +1032,29 @@ CPU::setMiscRegNoEffect(int misc_reg, RegVal val, ThreadID tid)
 void
 CPU::setMiscReg(int misc_reg, RegVal val, ThreadID tid)
 {
-    executeStats[tid]->numMiscRegWrites++;
+    cpuStats.miscRegfileWrites++;
     isa[tid]->setMiscReg(misc_reg, val);
 }
 
 RegVal
-CPU::getReg(PhysRegIdPtr phys_reg, ThreadID tid)
+CPU::getReg(PhysRegIdPtr phys_reg)
 {
     switch (phys_reg->classValue()) {
       case IntRegClass:
-        executeStats[tid]->numIntRegReads++;
+        cpuStats.intRegfileReads++;
         break;
       case FloatRegClass:
-        executeStats[tid]->numFpRegReads++;
+        cpuStats.fpRegfileReads++;
         break;
       case CCRegClass:
-        executeStats[tid]->numCCRegReads++;
+        cpuStats.ccRegfileReads++;
         break;
       case VecRegClass:
       case VecElemClass:
-        executeStats[tid]->numVecRegReads++;
+        cpuStats.vecRegfileReads++;
         break;
       case VecPredRegClass:
-        executeStats[tid]->numVecPredRegReads++;
+        cpuStats.vecPredRegfileReads++;
         break;
       default:
         break;
@@ -960,24 +1063,24 @@ CPU::getReg(PhysRegIdPtr phys_reg, ThreadID tid)
 }
 
 void
-CPU::getReg(PhysRegIdPtr phys_reg, void *val, ThreadID tid)
+CPU::getReg(PhysRegIdPtr phys_reg, void *val)
 {
     switch (phys_reg->classValue()) {
       case IntRegClass:
-        executeStats[tid]->numIntRegReads++;
+        cpuStats.intRegfileReads++;
         break;
       case FloatRegClass:
-        executeStats[tid]->numFpRegReads++;
+        cpuStats.fpRegfileReads++;
         break;
       case CCRegClass:
-        executeStats[tid]->numCCRegReads++;
+        cpuStats.ccRegfileReads++;
         break;
       case VecRegClass:
       case VecElemClass:
-        executeStats[tid]->numVecRegReads++;
+        cpuStats.vecRegfileReads++;
         break;
       case VecPredRegClass:
-        executeStats[tid]->numVecPredRegReads++;
+        cpuStats.vecPredRegfileReads++;
         break;
       default:
         break;
@@ -986,14 +1089,14 @@ CPU::getReg(PhysRegIdPtr phys_reg, void *val, ThreadID tid)
 }
 
 void *
-CPU::getWritableReg(PhysRegIdPtr phys_reg, ThreadID tid)
+CPU::getWritableReg(PhysRegIdPtr phys_reg)
 {
     switch (phys_reg->classValue()) {
       case VecRegClass:
-        executeStats[tid]->numVecRegReads++;
+        cpuStats.vecRegfileReads++;
         break;
       case VecPredRegClass:
-        executeStats[tid]->numVecPredRegReads++;
+        cpuStats.vecPredRegfileReads++;
         break;
       default:
         break;
@@ -1002,24 +1105,24 @@ CPU::getWritableReg(PhysRegIdPtr phys_reg, ThreadID tid)
 }
 
 void
-CPU::setReg(PhysRegIdPtr phys_reg, RegVal val, ThreadID tid)
+CPU::setReg(PhysRegIdPtr phys_reg, RegVal val)
 {
     switch (phys_reg->classValue()) {
       case IntRegClass:
-        executeStats[tid]->numIntRegWrites++;
+        cpuStats.intRegfileWrites++;
         break;
       case FloatRegClass:
-        executeStats[tid]->numFpRegWrites++;
+        cpuStats.fpRegfileWrites++;
         break;
       case CCRegClass:
-        executeStats[tid]->numCCRegWrites++;
+        cpuStats.ccRegfileWrites++;
         break;
       case VecRegClass:
       case VecElemClass:
-        executeStats[tid]->numVecRegWrites++;
+        cpuStats.vecRegfileWrites++;
         break;
       case VecPredRegClass:
-        executeStats[tid]->numVecPredRegWrites++;
+        cpuStats.vecPredRegfileWrites++;
         break;
       default:
         break;
@@ -1028,24 +1131,24 @@ CPU::setReg(PhysRegIdPtr phys_reg, RegVal val, ThreadID tid)
 }
 
 void
-CPU::setReg(PhysRegIdPtr phys_reg, const void *val, ThreadID tid)
+CPU::setReg(PhysRegIdPtr phys_reg, const void *val)
 {
     switch (phys_reg->classValue()) {
       case IntRegClass:
-        executeStats[tid]->numIntRegWrites++;
+        cpuStats.intRegfileWrites++;
         break;
       case FloatRegClass:
-        executeStats[tid]->numFpRegWrites++;
+        cpuStats.fpRegfileWrites++;
         break;
       case CCRegClass:
-        executeStats[tid]->numCCRegWrites++;
+        cpuStats.ccRegfileWrites++;
         break;
       case VecRegClass:
       case VecElemClass:
-        executeStats[tid]->numVecRegWrites++;
+        cpuStats.vecRegfileWrites++;
         break;
       case VecPredRegClass:
-        executeStats[tid]->numVecPredRegWrites++;
+        cpuStats.vecPredRegfileWrites++;
         break;
       default:
         break;
@@ -1127,14 +1230,14 @@ CPU::instDone(ThreadID tid, const DynInstPtr &inst)
     if (!inst->isMicroop() || inst->isLastMicroop()) {
         thread[tid]->numInst++;
         thread[tid]->threadStats.numInsts++;
-        commitStats[tid]->numInstsNotNOP++;
+        cpuStats.committedInsts[tid]++;
 
         // Check for instruction-count-based events.
         thread[tid]->comInstEventQueue.serviceEvents(thread[tid]->numInst);
     }
     thread[tid]->numOp++;
     thread[tid]->threadStats.numOps++;
-    commitStats[tid]->numOpsNotNOP++;
+    cpuStats.committedOps[tid]++;
 
     probeInstCommit(inst->staticInst, inst->pcState().instAddr());
 }
diff --git a/src/cpu/o3/cpu.hh b/src/cpu/o3/cpu.hh
index 7dc378428b..08a1312e73 100644
--- a/src/cpu/o3/cpu.hh
+++ b/src/cpu/o3/cpu.hh
@@ -310,12 +310,12 @@ class CPU : public BaseCPU
      */
     void setMiscReg(int misc_reg, RegVal val, ThreadID tid);
 
-    RegVal getReg(PhysRegIdPtr phys_reg, ThreadID tid);
-    void getReg(PhysRegIdPtr phys_reg, void *val, ThreadID tid);
-    void *getWritableReg(PhysRegIdPtr phys_reg, ThreadID tid);
+    RegVal getReg(PhysRegIdPtr phys_reg);
+    void getReg(PhysRegIdPtr phys_reg, void *val);
+    void *getWritableReg(PhysRegIdPtr phys_reg);
 
-    void setReg(PhysRegIdPtr phys_reg, RegVal val, ThreadID tid);
-    void setReg(PhysRegIdPtr phys_reg, const void *val, ThreadID tid);
+    void setReg(PhysRegIdPtr phys_reg, RegVal val);
+    void setReg(PhysRegIdPtr phys_reg, const void *val);
 
     /** Architectural register accessors.  Looks up in the commit
      * rename table to obtain the true physical index of the
@@ -581,7 +581,38 @@ class CPU : public BaseCPU
         /** Stat for total number of cycles the CPU spends descheduled due to a
          * quiesce operation or waiting for an interrupt. */
         statistics::Scalar quiesceCycles;
+        /** Stat for the number of committed instructions per thread. */
+        statistics::Vector committedInsts;
+        /** Stat for the number of committed ops (including micro ops) per
+         *  thread. */
+        statistics::Vector committedOps;
+        /** Stat for the CPI per thread. */
+        statistics::Formula cpi;
+        /** Stat for the total CPI. */
+        statistics::Formula totalCpi;
+        /** Stat for the IPC per thread. */
+        statistics::Formula ipc;
+        /** Stat for the total IPC. */
+        statistics::Formula totalIpc;
 
+        //number of integer register file accesses
+        statistics::Scalar intRegfileReads;
+        statistics::Scalar intRegfileWrites;
+        //number of float register file accesses
+        statistics::Scalar fpRegfileReads;
+        statistics::Scalar fpRegfileWrites;
+        //number of vector register file accesses
+        mutable statistics::Scalar vecRegfileReads;
+        statistics::Scalar vecRegfileWrites;
+        //number of predicate register file accesses
+        mutable statistics::Scalar vecPredRegfileReads;
+        statistics::Scalar vecPredRegfileWrites;
+        //number of CC register file accesses
+        statistics::Scalar ccRegfileReads;
+        statistics::Scalar ccRegfileWrites;
+        //number of misc
+        statistics::Scalar miscRegfileReads;
+        statistics::Scalar miscRegfileWrites;
     } cpuStats;
 
   public:
diff --git a/src/cpu/o3/dyn_inst.hh b/src/cpu/o3/dyn_inst.hh
index c759c5eb38..54c0385374 100644
--- a/src/cpu/o3/dyn_inst.hh
+++ b/src/cpu/o3/dyn_inst.hh
@@ -1086,10 +1086,10 @@ class DynInst : public ExecContext, public RefCounted
 
             if (bytes == sizeof(RegVal)) {
                 setRegOperand(staticInst.get(), idx,
-                        cpu->getReg(prev_phys_reg, threadNumber));
+                        cpu->getReg(prev_phys_reg));
             } else {
                 uint8_t val[original_dest_reg.regClass().regBytes()];
-                cpu->getReg(prev_phys_reg, val, threadNumber);
+                cpu->getReg(prev_phys_reg, val);
                 setRegOperand(staticInst.get(), idx, val);
             }
         }
@@ -1116,7 +1116,7 @@ class DynInst : public ExecContext, public RefCounted
         const PhysRegIdPtr reg = renamedSrcIdx(idx);
         if (reg->is(InvalidRegClass))
             return 0;
-        return cpu->getReg(reg, threadNumber);
+        return cpu->getReg(reg);
     }
 
     void
@@ -1125,13 +1125,13 @@ class DynInst : public ExecContext, public RefCounted
         const PhysRegIdPtr reg = renamedSrcIdx(idx);
         if (reg->is(InvalidRegClass))
             return;
-        cpu->getReg(reg, val, threadNumber);
+        cpu->getReg(reg, val);
     }
 
     void *
     getWritableRegOperand(const StaticInst *si, int idx) override
     {
-        return cpu->getWritableReg(renamedDestIdx(idx), threadNumber);
+        return cpu->getWritableReg(renamedDestIdx(idx));
     }
 
     /** @todo: Make results into arrays so they can handle multiple dest
@@ -1143,7 +1143,7 @@ class DynInst : public ExecContext, public RefCounted
         const PhysRegIdPtr reg = renamedDestIdx(idx);
         if (reg->is(InvalidRegClass))
             return;
-        cpu->setReg(reg, val, threadNumber);
+        cpu->setReg(reg, val);
         setResult(reg->regClass(), val);
     }
 
@@ -1153,7 +1153,7 @@ class DynInst : public ExecContext, public RefCounted
         const PhysRegIdPtr reg = renamedDestIdx(idx);
         if (reg->is(InvalidRegClass))
             return;
-        cpu->setReg(reg, val, threadNumber);
+        cpu->setReg(reg, val);
         setResult(reg->regClass(), val);
     }
 };
diff --git a/src/cpu/o3/fetch.cc b/src/cpu/o3/fetch.cc
index f5fc6c62ec..d3cdd2c761 100644
--- a/src/cpu/o3/fetch.cc
+++ b/src/cpu/o3/fetch.cc
@@ -158,6 +158,12 @@ Fetch::regProbePoints()
 
 Fetch::FetchStatGroup::FetchStatGroup(CPU *cpu, Fetch *fetch)
     : statistics::Group(cpu, "fetch"),
+    ADD_STAT(icacheStallCycles, statistics::units::Cycle::get(),
+             "Number of cycles fetch is stalled on an Icache miss"),
+    ADD_STAT(insts, statistics::units::Count::get(),
+             "Number of instructions fetch has processed"),
+    ADD_STAT(branches, statistics::units::Count::get(),
+             "Number of branches that fetch encountered"),
     ADD_STAT(predictedBranches, statistics::units::Count::get(),
              "Number of branches that fetch has predicted taken"),
     ADD_STAT(cycles, statistics::units::Cycle::get(),
@@ -194,8 +200,21 @@ Fetch::FetchStatGroup::FetchStatGroup(CPU *cpu, Fetch *fetch)
              "Number of instructions fetched each cycle (Total)"),
     ADD_STAT(idleRate, statistics::units::Ratio::get(),
              "Ratio of cycles fetch was idle",
-             idleCycles / cpu->baseStats.numCycles)
+             idleCycles / cpu->baseStats.numCycles),
+    ADD_STAT(branchRate, statistics::units::Ratio::get(),
+             "Number of branch fetches per cycle",
+             branches / cpu->baseStats.numCycles),
+    ADD_STAT(rate, statistics::units::Rate<
+                    statistics::units::Count, statistics::units::Cycle>::get(),
+             "Number of inst fetches per cycle",
+             insts / cpu->baseStats.numCycles)
 {
+        icacheStallCycles
+            .prereq(icacheStallCycles);
+        insts
+            .prereq(insts);
+        branches
+            .prereq(branches);
         predictedBranches
             .prereq(predictedBranches);
         cycles
@@ -233,6 +252,10 @@ Fetch::FetchStatGroup::FetchStatGroup(CPU *cpu, Fetch *fetch)
             .flags(statistics::pdf);
         idleRate
             .prereq(idleRate);
+        branchRate
+            .flags(statistics::total);
+        rate
+            .flags(statistics::total);
 }
 void
 Fetch::setTimeBuffer(TimeBuffer<TimeStruct> *time_buffer)
@@ -517,7 +540,7 @@ Fetch::lookupAndUpdateNextPC(const DynInstPtr &inst, PCStateBase &next_pc)
     inst->setPredTarg(next_pc);
     inst->setPredTaken(predict_taken);
 
-    cpu->fetchStats[tid]->numBranches++;
+    ++fetchStats.branches;
 
     if (predict_taken) {
         ++fetchStats.predictedBranches;
@@ -1123,7 +1146,7 @@ Fetch::fetch(bool &status_change)
             fetchCacheLine(fetchAddr, tid, this_pc.instAddr());
 
             if (fetchStatus[tid] == IcacheWaitResponse)
-                cpu->fetchStats[tid]->icacheStallCycles++;
+                ++fetchStats.icacheStallCycles;
             else if (fetchStatus[tid] == ItlbWait)
                 ++fetchStats.tlbCycles;
             else
@@ -1219,7 +1242,7 @@ Fetch::fetch(bool &status_change)
                     staticInst = dec_ptr->decode(this_pc);
 
                     // Increment stat of fetched instructions.
-                    cpu->fetchStats[tid]->numInsts++;
+                    ++fetchStats.insts;
 
                     if (staticInst->isMacroop()) {
                         curMacroop = staticInst;
@@ -1549,7 +1572,7 @@ Fetch::profileStall(ThreadID tid)
         ++fetchStats.squashCycles;
         DPRINTF(Fetch, "[tid:%i] Fetch is squashing!\n", tid);
     } else if (fetchStatus[tid] == IcacheWaitResponse) {
-        cpu->fetchStats[tid]->icacheStallCycles++;
+        ++fetchStats.icacheStallCycles;
         DPRINTF(Fetch, "[tid:%i] Fetch is waiting cache response!\n",
                 tid);
     } else if (fetchStatus[tid] == ItlbWait) {
diff --git a/src/cpu/o3/fetch.hh b/src/cpu/o3/fetch.hh
index 6add31444d..cd311913f5 100644
--- a/src/cpu/o3/fetch.hh
+++ b/src/cpu/o3/fetch.hh
@@ -533,6 +533,12 @@ class Fetch
         FetchStatGroup(CPU *cpu, Fetch *fetch);
         // @todo: Consider making these
         // vectors and tracking on a per thread basis.
+        /** Stat for total number of cycles stalled due to an icache miss. */
+        statistics::Scalar icacheStallCycles;
+        /** Stat for total number of fetched instructions. */
+        statistics::Scalar insts;
+        /** Total number of fetched branches. */
+        statistics::Scalar branches;
         /** Stat for total number of predicted branches. */
         statistics::Scalar predictedBranches;
         /** Stat for total number of cycles spent fetching. */
@@ -575,6 +581,10 @@ class Fetch
         statistics::Distribution nisnDist;
         /** Rate of how often fetch was idle. */
         statistics::Formula idleRate;
+        /** Number of branch fetches per cycle. */
+        statistics::Formula branchRate;
+        /** Number of instruction fetched per cycle. */
+        statistics::Formula rate;
     } fetchStats;
 };
 
diff --git a/src/cpu/o3/iew.cc b/src/cpu/o3/iew.cc
index 92d281ce93..7cf6c54542 100644
--- a/src/cpu/o3/iew.cc
+++ b/src/cpu/o3/iew.cc
@@ -217,14 +217,52 @@ IEW::IEWStats::IEWStats(CPU *cpu)
 
 IEW::IEWStats::ExecutedInstStats::ExecutedInstStats(CPU *cpu)
     : statistics::Group(cpu),
+    ADD_STAT(numInsts, statistics::units::Count::get(),
+             "Number of executed instructions"),
+    ADD_STAT(numLoadInsts, statistics::units::Count::get(),
+             "Number of load instructions executed"),
     ADD_STAT(numSquashedInsts, statistics::units::Count::get(),
              "Number of squashed instructions skipped in execute"),
     ADD_STAT(numSwp, statistics::units::Count::get(),
-             "Number of swp insts executed")
+             "Number of swp insts executed"),
+    ADD_STAT(numNop, statistics::units::Count::get(),
+             "Number of nop insts executed"),
+    ADD_STAT(numRefs, statistics::units::Count::get(),
+             "Number of memory reference insts executed"),
+    ADD_STAT(numBranches, statistics::units::Count::get(),
+             "Number of branches executed"),
+    ADD_STAT(numStoreInsts, statistics::units::Count::get(),
+             "Number of stores executed"),
+    ADD_STAT(numRate, statistics::units::Rate<
+                statistics::units::Count, statistics::units::Cycle>::get(),
+             "Inst execution rate", numInsts / cpu->baseStats.numCycles)
 {
+    numLoadInsts
+        .init(cpu->numThreads)
+        .flags(statistics::total);
+
     numSwp
         .init(cpu->numThreads)
         .flags(statistics::total);
+
+    numNop
+        .init(cpu->numThreads)
+        .flags(statistics::total);
+
+    numRefs
+        .init(cpu->numThreads)
+        .flags(statistics::total);
+
+    numBranches
+        .init(cpu->numThreads)
+        .flags(statistics::total);
+
+    numStoreInsts
+        .flags(statistics::total);
+    numStoreInsts = numRefs - numLoadInsts;
+
+    numRate
+        .flags(statistics::total);
 }
 
 void
@@ -1015,7 +1053,7 @@ IEW::dispatchInsts(ThreadID tid)
 
             instQueue.recordProducer(inst);
 
-            cpu->executeStats[tid]->numNop++;
+            iewStats.executedInstStats.numNop[tid]++;
 
             add_to_iq = false;
         } else {
@@ -1523,7 +1561,7 @@ IEW::updateExeInstStats(const DynInstPtr& inst)
 {
     ThreadID tid = inst->threadNumber;
 
-    cpu->executeStats[tid]->numInsts++;
+    iewStats.executedInstStats.numInsts++;
 
 #if TRACING_ON
     if (debug::O3PipeView) {
@@ -1535,16 +1573,16 @@ IEW::updateExeInstStats(const DynInstPtr& inst)
     //  Control operations
     //
     if (inst->isControl())
-        cpu->executeStats[tid]->numBranches++;
+        iewStats.executedInstStats.numBranches[tid]++;
 
     //
     //  Memory operations
     //
     if (inst->isMemRef()) {
-        cpu->executeStats[tid]->numMemRefs++;
+        iewStats.executedInstStats.numRefs[tid]++;
 
         if (inst->isLoad()) {
-            cpu->executeStats[tid]->numLoadInsts++;
+            iewStats.executedInstStats.numLoadInsts[tid]++;
         }
     }
 }
diff --git a/src/cpu/o3/iew.hh b/src/cpu/o3/iew.hh
index 4fe8227dcc..80fed295df 100644
--- a/src/cpu/o3/iew.hh
+++ b/src/cpu/o3/iew.hh
@@ -455,11 +455,25 @@ class IEW
         {
             ExecutedInstStats(CPU *cpu);
 
+            /** Stat for total number of executed instructions. */
+            statistics::Scalar numInsts;
+            /** Stat for total number of executed load instructions. */
+            statistics::Vector numLoadInsts;
             /** Stat for total number of squashed instructions skipped at
              *  execute. */
             statistics::Scalar numSquashedInsts;
             /** Number of executed software prefetches. */
             statistics::Vector numSwp;
+            /** Number of executed nops. */
+            statistics::Vector numNop;
+            /** Number of executed meomory references. */
+            statistics::Vector numRefs;
+            /** Number of executed branches. */
+            statistics::Vector numBranches;
+            /** Number of executed store instructions. */
+            statistics::Formula numStoreInsts;
+            /** Number of instructions executed per cycle. */
+            statistics::Formula numRate;
         } executedInstStats;
 
         /** Number of instructions sent to commit. */
diff --git a/src/cpu/simple/base.cc b/src/cpu/simple/base.cc
index 35d149097c..768f63ede5 100644
--- a/src/cpu/simple/base.cc
+++ b/src/cpu/simple/base.cc
@@ -154,36 +154,10 @@ BaseSimpleCPU::countInst()
 
     if (!curStaticInst->isMicroop() || curStaticInst->isLastMicroop()) {
         t_info.numInst++;
+        t_info.execContextStats.numInsts++;
     }
     t_info.numOp++;
-}
-
-void
-BaseSimpleCPU::countFetchInst()
-{
-    SimpleExecContext& t_info = *threadInfo[curThread];
-
-    if (!curStaticInst->isMicroop() || curStaticInst->isLastMicroop()) {
-        // increment thread level numInsts fetched count
-        fetchStats[t_info.thread->threadId()]->numInsts++;
-    }
-    // increment thread level numOps fetched count
-    fetchStats[t_info.thread->threadId()]->numOps++;
-}
-
-void
-BaseSimpleCPU::countCommitInst()
-{
-    SimpleExecContext& t_info = *threadInfo[curThread];
-
-    if (!curStaticInst->isMicroop() || curStaticInst->isLastMicroop()) {
-        // increment thread level and core level numInsts count
-        commitStats[t_info.thread->threadId()]->numInsts++;
-        baseStats.numInsts++;
-    }
-    // increment thread level and core level numOps count
-    commitStats[t_info.thread->threadId()]->numOps++;
-    baseStats.numOps++;
+    t_info.execContextStats.numOps++;
 }
 
 Counter
@@ -402,11 +376,6 @@ BaseSimpleCPU::preExecute()
         if (predict_taken)
             ++t_info.execContextStats.numPredictedBranches;
     }
-
-    // increment the fetch instruction stat counters
-    if (curStaticInst) {
-        countFetchInst();
-    }
 }
 
 void
@@ -419,7 +388,7 @@ BaseSimpleCPU::postExecute()
     Addr instAddr = threadContexts[curThread]->pcState().instAddr();
 
     if (curStaticInst->isMemRef()) {
-        executeStats[t_info.thread->threadId()]->numMemRefs++;
+        t_info.execContextStats.numMemRefs++;
     }
 
     if (curStaticInst->isLoad()) {
@@ -427,26 +396,26 @@ BaseSimpleCPU::postExecute()
     }
 
     if (curStaticInst->isControl()) {
-        ++fetchStats[t_info.thread->threadId()]->numBranches;
+        ++t_info.execContextStats.numBranches;
     }
 
     /* Power model statistics */
     //integer alu accesses
     if (curStaticInst->isInteger()){
-        executeStats[t_info.thread->threadId()]->numIntAluAccesses++;
-        commitStats[t_info.thread->threadId()]->numIntInsts++;
+        t_info.execContextStats.numIntAluAccesses++;
+        t_info.execContextStats.numIntInsts++;
     }
 
     //float alu accesses
     if (curStaticInst->isFloating()){
-        executeStats[t_info.thread->threadId()]->numFpAluAccesses++;
-        commitStats[t_info.thread->threadId()]->numFpInsts++;
+        t_info.execContextStats.numFpAluAccesses++;
+        t_info.execContextStats.numFpInsts++;
     }
 
     //vector alu accesses
     if (curStaticInst->isVector()){
-        executeStats[t_info.thread->threadId()]->numVecAluAccesses++;
-        commitStats[t_info.thread->threadId()]->numVecInsts++;
+        t_info.execContextStats.numVecAluAccesses++;
+        t_info.execContextStats.numVecInsts++;
     }
 
     //Matrix alu accesses
@@ -460,22 +429,22 @@ BaseSimpleCPU::postExecute()
         t_info.execContextStats.numCallsReturns++;
     }
 
+    //the number of branch predictions that will be made
+    if (curStaticInst->isCondCtrl()){
+        t_info.execContextStats.numCondCtrlInsts++;
+    }
+
     //result bus acceses
     if (curStaticInst->isLoad()){
-        commitStats[t_info.thread->threadId()]->numLoadInsts++;
+        t_info.execContextStats.numLoadInsts++;
     }
 
     if (curStaticInst->isStore() || curStaticInst->isAtomic()){
-        commitStats[t_info.thread->threadId()]->numStoreInsts++;
+        t_info.execContextStats.numStoreInsts++;
     }
     /* End power model statistics */
 
-    commitStats[t_info.thread->threadId()]
-        ->committedInstType[curStaticInst->opClass()]++;
-    commitStats[t_info.thread->threadId()]->updateComCtrlStats(curStaticInst);
-
-    /* increment the committed numInsts and numOps stats */
-    countCommitInst();
+    t_info.execContextStats.statExecutedInstType[curStaticInst->opClass()]++;
 
     if (FullSystem)
         traceFunctions(instAddr);
diff --git a/src/cpu/simple/base.hh b/src/cpu/simple/base.hh
index 46a25a0a42..df5290cf3c 100644
--- a/src/cpu/simple/base.hh
+++ b/src/cpu/simple/base.hh
@@ -182,8 +182,6 @@ class BaseSimpleCPU : public BaseCPU
     }
 
     void countInst();
-    void countFetchInst();
-    void countCommitInst();
     Counter totalInsts() const override;
     Counter totalOps() const override;
 
diff --git a/src/cpu/simple/exec_context.hh b/src/cpu/simple/exec_context.hh
index c0927fcadd..0f20763f28 100644
--- a/src/cpu/simple/exec_context.hh
+++ b/src/cpu/simple/exec_context.hh
@@ -86,12 +86,60 @@ class SimpleExecContext : public ExecContext
             : statistics::Group(cpu,
                            csprintf("exec_context.thread_%i",
                                     thread->threadId()).c_str()),
+              ADD_STAT(numInsts, statistics::units::Count::get(),
+                       "Number of instructions committed"),
+              ADD_STAT(numOps, statistics::units::Count::get(),
+                       "Number of ops (including micro ops) committed"),
+              ADD_STAT(numIntAluAccesses, statistics::units::Count::get(),
+                       "Number of integer alu accesses"),
+              ADD_STAT(numFpAluAccesses, statistics::units::Count::get(),
+                       "Number of float alu accesses"),
+              ADD_STAT(numVecAluAccesses, statistics::units::Count::get(),
+                       "Number of vector alu accesses"),
               ADD_STAT(numMatAluAccesses, statistics::units::Count::get(),
                        "Number of matrix alu accesses"),
               ADD_STAT(numCallsReturns, statistics::units::Count::get(),
                        "Number of times a function call or return occured"),
+              ADD_STAT(numCondCtrlInsts, statistics::units::Count::get(),
+                       "Number of instructions that are conditional controls"),
+              ADD_STAT(numIntInsts, statistics::units::Count::get(),
+                       "Number of integer instructions"),
+              ADD_STAT(numFpInsts, statistics::units::Count::get(),
+                       "Number of float instructions"),
+              ADD_STAT(numVecInsts, statistics::units::Count::get(),
+                       "Number of vector instructions"),
               ADD_STAT(numMatInsts, statistics::units::Count::get(),
                        "Number of matrix instructions"),
+              ADD_STAT(numIntRegReads, statistics::units::Count::get(),
+                       "Number of times the integer registers were read"),
+              ADD_STAT(numIntRegWrites, statistics::units::Count::get(),
+                       "Number of times the integer registers were written"),
+              ADD_STAT(numFpRegReads, statistics::units::Count::get(),
+                       "Number of times the floating registers were read"),
+              ADD_STAT(numFpRegWrites, statistics::units::Count::get(),
+                       "Number of times the floating registers were written"),
+              ADD_STAT(numVecRegReads, statistics::units::Count::get(),
+                       "Number of times the vector registers were read"),
+              ADD_STAT(numVecRegWrites, statistics::units::Count::get(),
+                       "Number of times the vector registers were written"),
+              ADD_STAT(numVecPredRegReads, statistics::units::Count::get(),
+                       "Number of times the predicate registers were read"),
+              ADD_STAT(numVecPredRegWrites, statistics::units::Count::get(),
+                       "Number of times the predicate registers were written"),
+              ADD_STAT(numCCRegReads, statistics::units::Count::get(),
+                       "Number of times the CC registers were read"),
+              ADD_STAT(numCCRegWrites, statistics::units::Count::get(),
+                       "Number of times the CC registers were written"),
+              ADD_STAT(numMiscRegReads, statistics::units::Count::get(),
+                       "Number of times the Misc registers were read"),
+              ADD_STAT(numMiscRegWrites, statistics::units::Count::get(),
+                       "Number of times the Misc registers were written"),
+              ADD_STAT(numMemRefs, statistics::units::Count::get(),
+                       "Number of memory refs"),
+              ADD_STAT(numLoadInsts, statistics::units::Count::get(),
+                       "Number of load instructions"),
+              ADD_STAT(numStoreInsts, statistics::units::Count::get(),
+                       "Number of store instructions"),
               ADD_STAT(numIdleCycles, statistics::units::Cycle::get(),
                        "Number of idle cycles"),
               ADD_STAT(numBusyCycles, statistics::units::Cycle::get(),
@@ -100,35 +148,64 @@ class SimpleExecContext : public ExecContext
                        "Percentage of non-idle cycles"),
               ADD_STAT(idleFraction, statistics::units::Ratio::get(),
                        "Percentage of idle cycles"),
+              ADD_STAT(icacheStallCycles, statistics::units::Cycle::get(),
+                       "ICache total stall cycles"),
+              ADD_STAT(dcacheStallCycles, statistics::units::Cycle::get(),
+                       "DCache total stall cycles"),
+              ADD_STAT(numBranches, statistics::units::Count::get(),
+                       "Number of branches fetched"),
               ADD_STAT(numPredictedBranches, statistics::units::Count::get(),
                        "Number of branches predicted as taken"),
               ADD_STAT(numBranchMispred, statistics::units::Count::get(),
                        "Number of branch mispredictions"),
+              ADD_STAT(statExecutedInstType, statistics::units::Count::get(),
+                       "Class of executed instruction."),
               numRegReads{
-                  &(cpu->executeStats[thread->threadId()]->numIntRegReads),
-                  &(cpu->executeStats[thread->threadId()]->numFpRegReads),
-                  &(cpu->executeStats[thread->threadId()]->numVecRegReads),
-                  &(cpu->executeStats[thread->threadId()]->numVecRegReads),
-                  &(cpu->executeStats[thread->threadId()]->numVecPredRegReads),
-                  &(cpu->executeStats[thread->threadId()]->numCCRegReads),
-                  &numMatRegReads
+                  &numIntRegReads,
+                  &numFpRegReads,
+                  &numVecRegReads,
+                  &numVecRegReads,
+                  &numVecPredRegReads,
+                  &numMatRegReads,
+                  &numCCRegReads
               },
               numRegWrites{
-                  &(cpu->executeStats[thread->threadId()]->numIntRegWrites),
-                  &(cpu->executeStats[thread->threadId()]->numFpRegWrites),
-                  &(cpu->executeStats[thread->threadId()]->numVecRegWrites),
-                  &(cpu->executeStats[thread->threadId()]->numVecRegWrites),
-                  &(cpu->executeStats[thread->threadId()]
-                        ->numVecPredRegWrites),
-                  &(cpu->executeStats[thread->threadId()]->numCCRegWrites),
-                  &numMatRegWrites
+                  &numIntRegWrites,
+                  &numFpRegWrites,
+                  &numVecRegWrites,
+                  &numVecRegWrites,
+                  &numVecPredRegWrites,
+                  &numMatRegWrites,
+                  &numCCRegWrites
               }
         {
+            numCCRegReads
+                .flags(statistics::nozero);
+
+            numCCRegWrites
+                .flags(statistics::nozero);
+
+            icacheStallCycles
+                .prereq(icacheStallCycles);
+
+            dcacheStallCycles
+                .prereq(dcacheStallCycles);
+
+            statExecutedInstType
+                .init(enums::Num_OpClass)
+                .flags(statistics::total | statistics::pdf | statistics::dist);
+
+            for (unsigned i = 0; i < Num_OpClasses; ++i) {
+                statExecutedInstType.subname(i, enums::OpClassStrings[i]);
+            }
 
             idleFraction = statistics::constant(1.0) - notIdleFraction;
             numIdleCycles = idleFraction * cpu->baseStats.numCycles;
             numBusyCycles = notIdleFraction * cpu->baseStats.numCycles;
 
+            numBranches
+                .prereq(numBranches);
+
             numPredictedBranches
                 .prereq(numPredictedBranches);
 
@@ -136,19 +213,73 @@ class SimpleExecContext : public ExecContext
                 .prereq(numBranchMispred);
         }
 
+        // Number of simulated instructions
+        statistics::Scalar numInsts;
+        statistics::Scalar numOps;
+
+        // Number of integer alu accesses
+        statistics::Scalar numIntAluAccesses;
+
+        // Number of float alu accesses
+        statistics::Scalar numFpAluAccesses;
+
+        // Number of vector alu accesses
+        statistics::Scalar numVecAluAccesses;
+
         // Number of matrix alu accesses
         statistics::Scalar numMatAluAccesses;
 
         // Number of function calls/returns
         statistics::Scalar numCallsReturns;
 
+        // Conditional control instructions;
+        statistics::Scalar numCondCtrlInsts;
+
+        // Number of int instructions
+        statistics::Scalar numIntInsts;
+
+        // Number of float instructions
+        statistics::Scalar numFpInsts;
+
+        // Number of vector instructions
+        statistics::Scalar numVecInsts;
+
         // Number of matrix instructions
         statistics::Scalar numMatInsts;
 
+        // Number of integer register file accesses
+        statistics::Scalar numIntRegReads;
+        statistics::Scalar numIntRegWrites;
+
+        // Number of float register file accesses
+        statistics::Scalar numFpRegReads;
+        statistics::Scalar numFpRegWrites;
+
+        // Number of vector register file accesses
+        mutable statistics::Scalar numVecRegReads;
+        statistics::Scalar numVecRegWrites;
+
+        // Number of predicate register file accesses
+        mutable statistics::Scalar numVecPredRegReads;
+        statistics::Scalar numVecPredRegWrites;
+
         // Number of matrix register file accesses
         mutable statistics::Scalar numMatRegReads;
         statistics::Scalar numMatRegWrites;
 
+        // Number of condition code register file accesses
+        statistics::Scalar numCCRegReads;
+        statistics::Scalar numCCRegWrites;
+
+        // Number of misc register file accesses
+        statistics::Scalar numMiscRegReads;
+        statistics::Scalar numMiscRegWrites;
+
+        // Number of simulated memory references
+        statistics::Scalar numMemRefs;
+        statistics::Scalar numLoadInsts;
+        statistics::Scalar numStoreInsts;
+
         // Number of idle cycles
         statistics::Formula numIdleCycles;
 
@@ -159,13 +290,24 @@ class SimpleExecContext : public ExecContext
         statistics::Average notIdleFraction;
         statistics::Formula idleFraction;
 
+        // Number of cycles stalled for I-cache responses
+        statistics::Scalar icacheStallCycles;
+
+        // Number of cycles stalled for D-cache responses
+        statistics::Scalar dcacheStallCycles;
+
         /// @{
+        /// Total number of branches fetched
+        statistics::Scalar numBranches;
         /// Number of branches predicted as taken
         statistics::Scalar numPredictedBranches;
         /// Number of misprediced branches
         statistics::Scalar numBranchMispred;
         /// @}
 
+        // Instruction mix histogram by OpClass
+        statistics::Vector statExecutedInstType;
+
         std::array<statistics::Scalar *, CCRegClass + 1> numRegReads;
         std::array<statistics::Scalar *, CCRegClass + 1> numRegWrites;
 
@@ -226,7 +368,7 @@ class SimpleExecContext : public ExecContext
     RegVal
     readMiscRegOperand(const StaticInst *si, int idx) override
     {
-        cpu->executeStats[thread->threadId()]->numMiscRegReads++;
+        execContextStats.numMiscRegReads++;
         const RegId& reg = si->srcRegIdx(idx);
         assert(reg.is(MiscRegClass));
         return thread->readMiscReg(reg.index());
@@ -235,7 +377,7 @@ class SimpleExecContext : public ExecContext
     void
     setMiscRegOperand(const StaticInst *si, int idx, RegVal val) override
     {
-        cpu->executeStats[thread->threadId()]->numMiscRegWrites++;
+        execContextStats.numMiscRegWrites++;
         const RegId& reg = si->destRegIdx(idx);
         assert(reg.is(MiscRegClass));
         thread->setMiscReg(reg.index(), val);
@@ -248,7 +390,7 @@ class SimpleExecContext : public ExecContext
     RegVal
     readMiscReg(int misc_reg) override
     {
-        cpu->executeStats[thread->threadId()]->numMiscRegReads++;
+        execContextStats.numMiscRegReads++;
         return thread->readMiscReg(misc_reg);
     }
 
@@ -259,7 +401,7 @@ class SimpleExecContext : public ExecContext
     void
     setMiscReg(int misc_reg, RegVal val) override
     {
-        cpu->executeStats[thread->threadId()]->numMiscRegWrites++;
+        execContextStats.numMiscRegWrites++;
         thread->setMiscReg(misc_reg, val);
     }
 

From 4c8ad56072d66974485fbe94e92ef1a1a890c291 Mon Sep 17 00:00:00 2001
From: Nicolas Boichat <drinkcat@google.com>
Date: Tue, 21 Feb 2023 05:08:34 +0000
Subject: [PATCH 249/492] fastmodel: Check early for license server issue

We have a setup that requires manual startup of an ssh proxy to
access license server, and without that, gem5 takes about a minute
until the license checkout times out (until then, it's unclear
why nothing is happening).

We asked ARM for a way to decrease timeouts, but that doesn't
seem to be easy to do.

Change-Id: I37b84fd52cb7fb221a9e48dcb52a33a11f4d1580
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/68177
Reviewed-by: Gabe Black <gabe.black@gmail.com>
Maintainer: Gabe Black <gabe.black@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/arch/arm/fastmodel/arm_fast_model.py | 70 ++++++++++++++++++++++--
 1 file changed, 65 insertions(+), 5 deletions(-)

diff --git a/src/arch/arm/fastmodel/arm_fast_model.py b/src/arch/arm/fastmodel/arm_fast_model.py
index d2d911f5b4..81b2cfe04b 100644
--- a/src/arch/arm/fastmodel/arm_fast_model.py
+++ b/src/arch/arm/fastmodel/arm_fast_model.py
@@ -23,21 +23,75 @@
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+import logging
 import os
+import socket
 
 from m5.defines import buildEnv
 import _m5.arm_fast_model
 
+ARM_LICENSE_ENV = "ARMLMD_LICENSE_FILE"
+LM_LICENSE_ENV = "LM_LICENSE_FILE"
+
 
 def set_armlmd_license_file(force=False):
     """Set the ARMLMD_LICENSE_FILE environment variable. If "force" is
     False, then it will only be set if it wasn't already set in the
     environment. The value it's set to is the one gem5 was built with.
     """
-    key = "ARMLMD_LICENSE_FILE"
-    license_file = buildEnv[key]
-    if force or key not in os.environ:
-        os.environ[key] = license_file
+    license_file = buildEnv[ARM_LICENSE_ENV]
+    if force or ARM_LICENSE_ENV not in os.environ:
+        os.environ[ARM_LICENSE_ENV] = license_file
+
+
+def check_armlmd_license(timeout):
+    """Check if any of the provided license server can be reached, or
+    if a license file is provided. This allows to fail early and fast,
+    as fastmodel code makes multiple lengthy attempts to connect to
+    license server. "timeout" is in seconds.
+    """
+    servers = os.environ[ARM_LICENSE_ENV].split(":")
+
+    extras = list()
+    # Add LM_LICENSE_ENV to the list, if set.
+    if LM_LICENSE_ENV in os.environ and os.environ[LM_LICENSE_ENV]:
+        extras += os.environ[LM_LICENSE_ENV].split(":")
+    # Fastmodel appears to always add this file.
+    extras.append("/opt/arm/licenses/license.dat")
+    for extra in extras:
+        if extra not in servers:
+            servers.append(extra)
+
+    for server in servers:
+        if os.path.exists(server):
+            logging.debug("License file %s exists." % server)
+            break
+
+        tuple = server.split("@")
+        if len(tuple) != 2:
+            # Probably not a server, and we know the file doesn't exist.
+            logging.debug('License file "%s" does not exist.' % server)
+            continue
+
+        try:
+            # Try to connect to license server. This doesn't attempt to
+            # communicate with it, just checking reachability.
+            s = socket.create_connection(
+                (tuple[1], int(tuple[0])), timeout=timeout
+            )
+            s.close()
+            logging.debug("License server %s is reachable." % server)
+            break
+        except Exception as e:
+            logging.debug(
+                "Cannot connect to license server %s (%s: %s)."
+                % (server, type(e).__name__, e)
+            )
+    else:
+        raise ConnectionError(
+            "Cannot connect to any of the license servers (%s)."
+            % ", ".join(servers)
+        )
 
 
 # These methods wrap much of the SystemC Export API described in section
@@ -142,9 +196,15 @@ def scx_get_min_sync_latency(arg=None):
 
 # This should be called once per simulation
 def setup_simulation(
-    sim_name, min_sync_latency=100.0 / 100000000, exit_on_dmi_warning=False
+    sim_name,
+    min_sync_latency=100.0 / 100000000,
+    exit_on_dmi_warning=False,
+    license_precheck=False,
+    license_precheck_timeout=1,
 ):
     set_armlmd_license_file()
+    if license_precheck:
+        check_armlmd_license(license_precheck_timeout)
     scx_initialize(sim_name)
     scx_set_min_sync_latency(min_sync_latency)
     if exit_on_dmi_warning:

From cdab011373f74ec8a8810af09c5b2ee52a83242c Mon Sep 17 00:00:00 2001
From: paikunal <kunpai@ucdavis.edu>
Date: Tue, 7 Mar 2023 10:22:48 -0800
Subject: [PATCH 250/492] configs: Adds an example script for POWER Hello

Used the "power-hello" resource to make an
stdlib example script for that resource

Change-Id: Ia8a051330e263617aa0e2ef08321d01cfa1093c4
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/68737
Reviewed-by: Boris Shingarov <shingarov@labware.com>
Maintainer: Boris Shingarov <shingarov@labware.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 configs/example/gem5_library/power-hello.py | 89 +++++++++++++++++++++
 1 file changed, 89 insertions(+)
 create mode 100644 configs/example/gem5_library/power-hello.py

diff --git a/configs/example/gem5_library/power-hello.py b/configs/example/gem5_library/power-hello.py
new file mode 100644
index 0000000000..cf31778945
--- /dev/null
+++ b/configs/example/gem5_library/power-hello.py
@@ -0,0 +1,89 @@
+# Copyright (c) 2023 The Regents of the University of California
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""
+This gem5 configuation script creates a simple board to run a POWER
+"hello world" binary.
+
+This is setup is the close to the simplest setup possible using the gem5
+library. It does not contain any kind of caching, IO, or any non-essential
+components.
+
+Usage
+-----
+
+```
+scons build/POWER/gem5.opt
+./build/POWER/gem5.opt configs/example/gem5_library/power-hello.py
+```
+"""
+
+from gem5.isas import ISA
+from gem5.utils.requires import requires
+from gem5.resources.resource import Resource
+from gem5.components.memory import SingleChannelDDR4_2400
+from gem5.components.processors.cpu_types import CPUTypes
+from gem5.components.boards.simple_board import SimpleBoard
+from gem5.components.cachehierarchies.classic.no_cache import NoCache
+from gem5.components.processors.simple_processor import SimpleProcessor
+from gem5.simulate.simulator import Simulator
+
+# This check ensures the gem5 binary is compiled to the POWER ISA target.
+# If not, an exception will be thrown.
+requires(isa_required=ISA.POWER)
+
+# In this setup we don't have a cache. `NoCache` can be used for such setups.
+cache_hierarchy = NoCache()
+
+# We use a single channel DDR4_2400 memory system
+memory = SingleChannelDDR4_2400(size="32MB")
+
+# We use a simple ATOMIC processor with one core.
+processor = SimpleProcessor(
+    cpu_type=CPUTypes.ATOMIC, isa=ISA.POWER, num_cores=1
+)
+
+# The gem5 library simple board which can be used to run simple SE-mode
+# simulations.
+board = SimpleBoard(
+    clk_freq="3GHz",
+    processor=processor,
+    memory=memory,
+    cache_hierarchy=cache_hierarchy,
+)
+
+board.set_se_binary_workload(Resource("power-hello"))
+
+# Lastly we run the simulation.
+simulator = Simulator(board=board)
+simulator.run()
+
+print(
+    "Exiting @ tick {} because {}.".format(
+        simulator.get_current_tick(),
+        simulator.get_last_exit_event_cause(),
+    )
+)

From 54e06f88c25a2284e4cd053a27bb868b1260fe19 Mon Sep 17 00:00:00 2001
From: Alex Richardson <alexrichardson@google.com>
Date: Wed, 8 Mar 2023 09:19:14 +0000
Subject: [PATCH 251/492] tests: Fix import path in simple_binary_run.py

We should be using gem5.components instead of python.gem5.components.
In https://gem5-review.git.corp.google.com/c/public/gem5/+/68518 I was
seeing the RISC-V tests fail with
`ModuleNotFoundError: No module named 'python.gem5.components.processors.base_cpu_core'`.

This fixes the issue for me with the RISC-V tests. I also searched for
other similar imports and I've removed a similar (unused) one in
x86_boot_exit_run.py.

Change-Id: I61a0c4c27724854956f778f14e1fcfafea927ffd
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/68757
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 tests/gem5/configs/simple_binary_run.py | 3 +--
 tests/gem5/configs/x86_boot_exit_run.py | 3 ---
 2 files changed, 1 insertion(+), 5 deletions(-)

diff --git a/tests/gem5/configs/simple_binary_run.py b/tests/gem5/configs/simple_binary_run.py
index fbb0313f93..5540e806ba 100644
--- a/tests/gem5/configs/simple_binary_run.py
+++ b/tests/gem5/configs/simple_binary_run.py
@@ -40,6 +40,7 @@ from gem5.components.memory import SingleChannelDDR3_1600
 from gem5.components.boards.simple_board import SimpleBoard
 from gem5.components.cachehierarchies.classic.no_cache import NoCache
 from gem5.components.processors.simple_processor import SimpleProcessor
+from gem5.components.processors.base_cpu_core import BaseCPUCore
 from gem5.components.processors.base_cpu_processor import BaseCPUProcessor
 from gem5.components.processors.simple_core import SimpleCore
 from gem5.components.boards.mem_mode import MemMode
@@ -52,8 +53,6 @@ from m5.util import fatal
 import argparse
 import importlib
 
-from python.gem5.components.processors.base_cpu_core import BaseCPUCore
-
 cpu_types_string_map = {
     CPUTypes.ATOMIC: "AtomicSimpleCPU",
     CPUTypes.O3: "O3CPU",
diff --git a/tests/gem5/configs/x86_boot_exit_run.py b/tests/gem5/configs/x86_boot_exit_run.py
index b1cbc647b2..5458b6db6c 100644
--- a/tests/gem5/configs/x86_boot_exit_run.py
+++ b/tests/gem5/configs/x86_boot_exit_run.py
@@ -46,9 +46,6 @@ from gem5.resources.workload import Workload
 import argparse
 import importlib
 
-from python.gem5.components.boards.kernel_disk_workload import (
-    KernelDiskWorkload,
-)
 
 parser = argparse.ArgumentParser(
     description="A script to run the gem5 boot test. This test boots the "

From 79d407280ca0d7553b1f29a9533f64664b8ecb65 Mon Sep 17 00:00:00 2001
From: Alex Richardson <alexrichardson@google.com>
Date: Mon, 27 Feb 2023 16:58:44 +0000
Subject: [PATCH 252/492] arch-riscv: Fix invalid std::map access

The CSRData map uses a RISC-V CSR number as the key rather than one of
the MISCREG_* enumerators. Use MiscRegNames[] instead to stringify the
argument for the debug message.

Change-Id: I2533bc29d148d3b34c01022eeaeedf64c39a99b9
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/68759
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/arch/riscv/isa.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/arch/riscv/isa.cc b/src/arch/riscv/isa.cc
index d778957b9e..7964de51ec 100644
--- a/src/arch/riscv/isa.cc
+++ b/src/arch/riscv/isa.cc
@@ -510,7 +510,7 @@ ISA::setMiscReg(RegIndex idx, RegVal val)
 {
     if (idx >= MISCREG_CYCLE && idx <= MISCREG_HPMCOUNTER31) {
         // Ignore writes to HPM counters for now
-        warn("Ignoring write to %s.\n", CSRData.at(idx).name);
+        warn("Ignoring write to miscreg %s.\n", MiscRegNames[idx]);
     } else {
         switch (idx) {
 

From 6841e1aa5a1738961940fece2b35baf77c8c224d Mon Sep 17 00:00:00 2001
From: Hoa Nguyen <hoanguyen@ucdavis.edu>
Date: Fri, 10 Mar 2023 13:53:08 -0800
Subject: [PATCH 253/492] stdlib: Fix bug in MESI_Three_Level_Cache
 initialization

Change-Id: I2d06c842955aa1868053a0d852fc523392480154
Signed-off-by: Hoa Nguyen <hoanguyen@ucdavis.edu>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/68857
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
---
 .../cachehierarchies/ruby/caches/mesi_three_level/l1_cache.py | 4 ++--
 .../cachehierarchies/ruby/caches/mesi_three_level/l2_cache.py | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/l1_cache.py b/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/l1_cache.py
index 9f47e411f8..b4854816fb 100644
--- a/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/l1_cache.py
+++ b/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/l1_cache.py
@@ -68,14 +68,14 @@ class L1Cache(L0Cache_Controller):
         self.Icache = RubyCache(
             size=l1i_size,
             assoc=l1i_assoc,
-            start_index_bit=self.getBlockSizeBits(cache_line_size.value),
+            start_index_bit=self.getBlockSizeBits(cache_line_size),
             is_icache=True,
             replacement_policy=LRURP(),
         )
         self.Dcache = RubyCache(
             size=l1d_size,
             assoc=l1d_assoc,
-            start_index_bit=self.getBlockSizeBits(cache_line_size.value),
+            start_index_bit=self.getBlockSizeBits(cache_line_size),
             is_icache=False,
             replacement_policy=LRURP(),
         )
diff --git a/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/l2_cache.py b/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/l2_cache.py
index d8c965924e..d54e1ab8dc 100644
--- a/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/l2_cache.py
+++ b/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/l2_cache.py
@@ -67,7 +67,7 @@ class L2Cache(L1Cache_Controller):
         self.cache = RubyCache(
             size=l2_size,
             assoc=l2_assoc,
-            start_index_bit=self.getBlockSizeBits(cache_line_size.value),
+            start_index_bit=self.getBlockSizeBits(cache_line_size),
             is_icache=False,
         )
         # l2_select_num_bits is ruby backend terminology.

From 3fe129e8ea375f45879de414c02444c68659ad1d Mon Sep 17 00:00:00 2001
From: Hoa Nguyen <hoanguyen@ucdavis.edu>
Date: Fri, 10 Mar 2023 23:52:57 -0800
Subject: [PATCH 254/492] stdlib: use atomic_noncaching when using
 AtomicSimpleCPU with Ruby

mem_mode is supposed to be atomic_noncaching when running
AtomicSimpleCPU with Ruby cache.

Change-Id: Icb419f9370038f5c1f80dd879b187338279a5b93
Signed-off-by: Hoa Nguyen <hoanguyen@ucdavis.edu>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/68877
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 .../gem5/components/processors/base_cpu_processor.py   |  1 +
 .../processors/simple_switchable_processor.py          | 10 ++++++++++
 2 files changed, 11 insertions(+)

diff --git a/src/python/gem5/components/processors/base_cpu_processor.py b/src/python/gem5/components/processors/base_cpu_processor.py
index d311a0fdc0..9a7561587a 100644
--- a/src/python/gem5/components/processors/base_cpu_processor.py
+++ b/src/python/gem5/components/processors/base_cpu_processor.py
@@ -97,6 +97,7 @@ class BaseCPUProcessor(AbstractProcessor):
                     "'atomic_noncaching' memory mode. This will skip caching "
                     "completely."
                 )
+                board.set_mem_mode(MemMode.ATOMIC_NONCACHING)
             else:
                 board.set_mem_mode(MemMode.ATOMIC)
         else:
diff --git a/src/python/gem5/components/processors/simple_switchable_processor.py b/src/python/gem5/components/processors/simple_switchable_processor.py
index 56603fa98b..e3978412c3 100644
--- a/src/python/gem5/components/processors/simple_switchable_processor.py
+++ b/src/python/gem5/components/processors/simple_switchable_processor.py
@@ -103,6 +103,16 @@ class SimpleSwitchableProcessor(SwitchableProcessor):
     def incorporate_processor(self, board: AbstractBoard) -> None:
         super().incorporate_processor(board=board)
 
+        if (
+            board.get_cache_hierarchy().is_ruby()
+            and self._mem_mode == MemMode.ATOMIC
+        ):
+            warn(
+                "Using an atomic core with Ruby will result in "
+                "'atomic_noncaching' memory mode. This will skip caching "
+                "completely."
+            )
+            self._mem_mode = MemMode.ATOMIC_NONCACHING
         board.set_mem_mode(self._mem_mode)
 
     def switch(self):

From 9fe9b2853c27c846206a2c6b48f186cb8ed5097d Mon Sep 17 00:00:00 2001
From: Gabriel Busnot <gabriel.busnot@arteris.com>
Date: Mon, 20 Jun 2022 16:52:15 +0200
Subject: [PATCH 255/492] base: Create a gem5 type_traits.hh header

That header currently contains type traits that derive the class, the
return type and the arguments of a member function from a pointer to
that member function.

Change-Id: I41dd41056f507016219d6111d25c8cb4c2ad3439
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67652
Reviewed-by: Daniel Carvalho <odanrc@yahoo.com.br>
Maintainer: Daniel Carvalho <odanrc@yahoo.com.br>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/base/type_traits.hh | 97 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 97 insertions(+)
 create mode 100644 src/base/type_traits.hh

diff --git a/src/base/type_traits.hh b/src/base/type_traits.hh
new file mode 100644
index 0000000000..1fec93d9d1
--- /dev/null
+++ b/src/base/type_traits.hh
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2022 Arteris, Inc. and its applicable licensors and
+ * affiliates.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __BASE_TYPETRAITS_HH__
+#define __BASE_TYPETRAITS_HH__
+
+#include <tuple>
+#include <type_traits>
+
+namespace gem5
+{
+
+/*
+ * Type traits that enable inspecting the signature of a member function based
+ * on a pointer to that function. Specifically, these type traits provide a
+ * class_t, a return_t and  a argsTuple_t alias that correspond respectively to
+ * the class that the function is a member of, the return type of the member
+ * function and the list of parameters types packed in a tuple. Convenience
+ * Convenience template aliases are also provided.
+ *
+ * Example, assuming "struct Struct {void foo(int, bool);};":
+ *    - MemberFunctionClass_t<&Struct::foo> is Struct.
+ *    - MemberFunctionReturn_t<&Struct::foo> is void.
+ *    - MemberFunctionArgsTuple_t<&Struct::foo> is std::tuple<int, bool>.
+ */
+
+template<typename F>
+struct MemberFunctionSignature;
+template<typename C, typename R, class... A>
+struct MemberFunctionSignature<R(C::*)(A...)>
+{
+    using class_t = C;
+    using return_t = R;
+    using argsTuple_t = std::tuple<A...>;
+};
+template<typename C, typename R, class... A>
+struct MemberFunctionSignature<R(C::*)(A...) const>
+{
+    using class_t = std::add_const_t<C>;
+    using return_t = R;
+    using argsTuple_t = std::tuple<A...>;
+};
+template<typename C, typename R, class... A>
+struct MemberFunctionSignature<R(C::*)(A...) volatile>
+{
+    using class_t = std::add_volatile_t<C>;
+    using return_t = R;
+    using argsTuple_t = std::tuple<A...>;
+};
+template<typename C, typename R, class... A>
+struct MemberFunctionSignature<R(C::*)(A...) const volatile>
+{
+    using class_t = std::add_cv_t<C>;
+    using return_t = R;
+    using argsTuple_t = std::tuple<A...>;
+};
+template<auto F>
+using MemberFunctionClass_t =
+    typename MemberFunctionSignature<decltype(F)>::class_t;
+
+template<auto F>
+using MemberFunctionReturn_t =
+    typename MemberFunctionSignature<decltype(F)>::return_t;
+
+template<auto F>
+using MemberFunctionArgsTuple_t =
+    typename MemberFunctionSignature<decltype(F)>::argsTuple_t;
+
+} // namespace gem5
+
+#endif // __BASE_TYPETRAITS_HH__

From 4ec432caa435e69946fcf6735838027e6cd5789a Mon Sep 17 00:00:00 2001
From: Gabriel Busnot <gabriel.busnot@arteris.com>
Date: Thu, 2 Feb 2023 16:09:57 +0000
Subject: [PATCH 256/492] sim: Define a new MemberEventWrapper event class

This new event class simplifies the use of EventWrapper and aims at
superseeding it. EventWrapper has been redefined in terms of
MemberEventWrapper.

MemberEventWrapper makes use of the new type traits to simplify
template parameterization and encourage its use over SimpleEvent that
often wraps a lambda that merely calls a member function.

Change-Id: Ie59e4c51705b9c2b2faa27097678d7d85f5b99c6
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67653
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Maintainer: Daniel Carvalho <odanrc@yahoo.com.br>
Reviewed-by: Daniel Carvalho <odanrc@yahoo.com.br>
---
 src/sim/eventq.hh | 74 ++++++++++++++++++++++++++++++++---------------
 1 file changed, 51 insertions(+), 23 deletions(-)

diff --git a/src/sim/eventq.hh b/src/sim/eventq.hh
index 62495bf86d..b46a25bf38 100644
--- a/src/sim/eventq.hh
+++ b/src/sim/eventq.hh
@@ -46,7 +46,9 @@
 
 #include "base/debug.hh"
 #include "base/flags.hh"
+#include "base/named.hh"
 #include "base/trace.hh"
+#include "base/type_traits.hh"
 #include "base/types.hh"
 #include "base/uncontended_mutex.hh"
 #include "debug/Event.hh"
@@ -1071,38 +1073,64 @@ class EventManager
     void setCurTick(Tick newVal) { eventq->setCurTick(newVal); }
 };
 
-template <class T, void (T::* F)()>
-class EventWrapper : public Event
+/**
+ * @brief Wrap a member function inside MemberEventWrapper to use it as an
+ * event callback. This wrapper should be prefered over EventFunctionWrapper
+ * for better performance and type safety.
+ *
+ * Wrapping a function *process* member of a class *klass* can be done by
+ * adding a member variable of the following type:
+ * MemberEventWrapper<&klass::process>.
+ *
+ * It is required that klass::process takes no explicit argument and returns no
+ * value as these could not be handled by the event scheduler.
+ *
+ * @tparam F Pointer to the member function wrapped in this event.
+ */
+template <auto F>
+class MemberEventWrapper final: public Event, public Named
 {
-  private:
-    T *object;
+    using CLASS = MemberFunctionClass_t<F>;
+    static_assert(std::is_same_v<void, MemberFunctionReturn_t<F>>);
+    static_assert(std::is_same_v<MemberFunctionArgsTuple_t<F>, std::tuple<>>);
 
-  public:
-    EventWrapper(T *obj, bool del = false, Priority p = Default_Pri)
-        : Event(p), object(obj)
+public:
+    MemberEventWrapper(CLASS *object,
+                       bool del = false,
+                       Priority p = Default_Pri):
+        Event(p),
+        Named(object->name() + ".wrapped_event"),
+        mObject(object)
     {
-        if (del)
-            setFlags(AutoDelete);
+        gem5_assert(mObject);
+        if (del) setFlags(AutoDelete);
     }
 
-    EventWrapper(T &obj, bool del = false, Priority p = Default_Pri)
-        : Event(p), object(&obj)
-    {
-        if (del)
-            setFlags(AutoDelete);
+    /**
+     * @brief Construct a new MemberEventWrapper object
+     *
+     * @param object instance of the object to call the wrapped member func on
+     * @param del if true, flag this event as AutoDelete
+     * @param p priority of this event
+     */
+    MemberEventWrapper(CLASS &object,
+                       bool del = false,
+                       Priority p = Default_Pri):
+        MemberEventWrapper(&object, del, p)
+    {}
+
+    void process() override {
+        (mObject->*F)();
     }
 
-    void process() { (object->*F)(); }
-
-    const std::string
-    name() const
-    {
-        return object->name() + ".wrapped_event";
-    }
-
-    const char *description() const { return "EventWrapped"; }
+    const char *description() const override { return "EventWrapped"; }
+private:
+    CLASS *mObject;
 };
 
+template <class T, void (T::* F)()>
+using EventWrapper = MemberEventWrapper<F>;
+
 class EventFunctionWrapper : public Event
 {
   private:

From 1bb8cd3d44c563877d486953f0534c4dc9daa9e1 Mon Sep 17 00:00:00 2001
From: Gabriel Busnot <gabriel.busnot@arteris.com>
Date: Thu, 2 Feb 2023 16:16:24 +0000
Subject: [PATCH 257/492] sim: Switch from EventWrapper to MemberEventWrapper
 before deprec

Change-Id: I25c81787d522a0dd063112b6727669da46e0f0e7
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67655
Reviewed-by: Daniel Carvalho <odanrc@yahoo.com.br>
Tested-by: kokoro <noreply+kokoro@google.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
---
 src/arch/sparc/isa.hh                                |  6 +++---
 src/base/remote_gdb.hh                               |  6 +++---
 src/dev/arm/smmu_v3.hh                               |  2 +-
 src/dev/arm/smmu_v3_deviceifc.hh                     |  4 +---
 src/dev/arm/smmu_v3_proc.cc                          |  3 +--
 src/mem/qos/mem_sink.hh                              |  4 +---
 src/sim/power_domain.hh                              |  2 +-
 src/systemc/core/kernel.hh                           |  2 +-
 src/systemc/core/scheduler.hh                        | 12 ++++++------
 .../systemc_simple_object/feeder.hh                  |  2 +-
 10 files changed, 19 insertions(+), 24 deletions(-)

diff --git a/src/arch/sparc/isa.hh b/src/arch/sparc/isa.hh
index 22bfba4f06..dee71d6e0e 100644
--- a/src/arch/sparc/isa.hh
+++ b/src/arch/sparc/isa.hh
@@ -133,13 +133,13 @@ class ISA : public BaseISA
     void processSTickCompare();
     void processHSTickCompare();
 
-    typedef EventWrapper<ISA, &ISA::processTickCompare> TickCompareEvent;
+    typedef MemberEventWrapper<&ISA::processTickCompare> TickCompareEvent;
     TickCompareEvent *tickCompare = nullptr;
 
-    typedef EventWrapper<ISA, &ISA::processSTickCompare> STickCompareEvent;
+    typedef MemberEventWrapper<&ISA::processSTickCompare> STickCompareEvent;
     STickCompareEvent *sTickCompare = nullptr;
 
-    typedef EventWrapper<ISA, &ISA::processHSTickCompare> HSTickCompareEvent;
+    typedef MemberEventWrapper<&ISA::processHSTickCompare> HSTickCompareEvent;
     HSTickCompareEvent *hSTickCompare = nullptr;
 
     static const int NumGlobalRegs = 8;
diff --git a/src/base/remote_gdb.hh b/src/base/remote_gdb.hh
index 1c5cd9c7af..7981a13064 100644
--- a/src/base/remote_gdb.hh
+++ b/src/base/remote_gdb.hh
@@ -274,8 +274,8 @@ class BaseRemoteGDB
 
     BaseGdbRegCache *regCachePtr = nullptr;
 
-    EventWrapper<BaseRemoteGDB, &BaseRemoteGDB::connect> connectEvent;
-    EventWrapper<BaseRemoteGDB, &BaseRemoteGDB::detach>  disconnectEvent;
+    MemberEventWrapper<&BaseRemoteGDB::connect> connectEvent;
+    MemberEventWrapper<&BaseRemoteGDB::detach>  disconnectEvent;
 
     class TrapEvent : public Event
     {
@@ -308,7 +308,7 @@ class BaseRemoteGDB
 
     // Single step.
     void singleStep();
-    EventWrapper<BaseRemoteGDB, &BaseRemoteGDB::singleStep> singleStepEvent;
+    MemberEventWrapper<&BaseRemoteGDB::singleStep> singleStepEvent;
 
     void clearSingleStep();
     void setSingleStep();
diff --git a/src/dev/arm/smmu_v3.hh b/src/dev/arm/smmu_v3.hh
index 25b91ff620..8721352c47 100644
--- a/src/dev/arm/smmu_v3.hh
+++ b/src/dev/arm/smmu_v3.hh
@@ -167,7 +167,7 @@ class SMMUv3 : public ClockedObject
     SMMUAction runProcessTiming(SMMUProcess *proc, PacketPtr pkt);
 
     void processCommands();
-    EventWrapper<SMMUv3, &SMMUv3::processCommands> processCommandsEvent;
+    MemberEventWrapper<&SMMUv3::processCommands> processCommandsEvent;
 
     void processCommand(const SMMUCommand &cmd);
 
diff --git a/src/dev/arm/smmu_v3_deviceifc.hh b/src/dev/arm/smmu_v3_deviceifc.hh
index c4ffa379f6..3152f8a698 100644
--- a/src/dev/arm/smmu_v3_deviceifc.hh
+++ b/src/dev/arm/smmu_v3_deviceifc.hh
@@ -114,9 +114,7 @@ class SMMUv3DeviceInterface : public ClockedObject
     bool atsDeviceNeedsRetry;
 
     SMMUDeviceRetryEvent sendDeviceRetryEvent;
-    EventWrapper<
-        SMMUv3DeviceInterface,
-        &SMMUv3DeviceInterface::atsSendDeviceRetry> atsSendDeviceRetryEvent;
+    MemberEventWrapper<&SMMUv3DeviceInterface::atsSendDeviceRetry> atsSendDeviceRetryEvent;
 
     Port& getPort(const std::string &name, PortID id) override;
 
diff --git a/src/dev/arm/smmu_v3_proc.cc b/src/dev/arm/smmu_v3_proc.cc
index 2cf2cf9e98..ef3db5dd4d 100644
--- a/src/dev/arm/smmu_v3_proc.cc
+++ b/src/dev/arm/smmu_v3_proc.cc
@@ -199,8 +199,7 @@ SMMUProcess::doBroadcastSignal(SMMUSignal &sig)
 void
 SMMUProcess::scheduleWakeup(Tick when)
 {
-    auto *ep = new EventWrapper<
-        SMMUProcess, &SMMUProcess::wakeup> (this, true);
+    auto *ep = new MemberEventWrapper<&SMMUProcess::wakeup> (this, true);
 
     smmu.schedule(ep, when);
 }
diff --git a/src/mem/qos/mem_sink.hh b/src/mem/qos/mem_sink.hh
index d2310c65fe..bd42a9a2ed 100644
--- a/src/mem/qos/mem_sink.hh
+++ b/src/mem/qos/mem_sink.hh
@@ -222,9 +222,7 @@ class MemSinkCtrl : public MemCtrl
     void processNextReqEvent();
 
     /** Event wrapper to schedule next request handler function */
-    EventWrapper<
-        MemSinkCtrl,
-        &MemSinkCtrl::processNextReqEvent> nextReqEvent;
+    MemberEventWrapper<&MemSinkCtrl::processNextReqEvent> nextReqEvent;
 
     /**
      * Check if the read queue has room for more entries
diff --git a/src/sim/power_domain.hh b/src/sim/power_domain.hh
index 96233e436b..1264d8f1ba 100644
--- a/src/sim/power_domain.hh
+++ b/src/sim/power_domain.hh
@@ -151,7 +151,7 @@ class PowerDomain : public PowerState
     /**
      * Event to update the power states of the followers
      */
-    EventWrapper<PowerDomain, &PowerDomain::setFollowerPowerStates>
+    MemberEventWrapper<&PowerDomain::setFollowerPowerStates>
                 pwrStateUpdateEvent;
 
   protected:
diff --git a/src/systemc/core/kernel.hh b/src/systemc/core/kernel.hh
index 9dba9030be..ec47569279 100644
--- a/src/systemc/core/kernel.hh
+++ b/src/systemc/core/kernel.hh
@@ -65,7 +65,7 @@ class Kernel : public gem5::SimObject
   private:
     static void stopWork();
 
-    gem5::EventWrapper<Kernel, &Kernel::t0Handler> t0Event;
+    gem5::MemberEventWrapper<&Kernel::t0Handler> t0Event;
 };
 
 extern Kernel *kernel;
diff --git a/src/systemc/core/scheduler.hh b/src/systemc/core/scheduler.hh
index 6eabb5606c..49ad6c6f0d 100644
--- a/src/systemc/core/scheduler.hh
+++ b/src/systemc/core/scheduler.hh
@@ -465,13 +465,13 @@ class Scheduler
     }
 
     void runReady();
-    gem5::EventWrapper<Scheduler, &Scheduler::runReady> readyEvent;
+    gem5::MemberEventWrapper<&Scheduler::runReady> readyEvent;
     void scheduleReadyEvent();
 
     void pause();
     void stop();
-    gem5::EventWrapper<Scheduler, &Scheduler::pause> pauseEvent;
-    gem5::EventWrapper<Scheduler, &Scheduler::stop> stopEvent;
+    gem5::MemberEventWrapper<&Scheduler::pause> pauseEvent;
+    gem5::MemberEventWrapper<&Scheduler::stop> stopEvent;
 
     const ::sc_core::sc_report *_throwUp;
 
@@ -484,7 +484,7 @@ class Scheduler
                  timeSlots.front()->targeted_when > maxTick) &&
                 initList.empty());
     }
-    gem5::EventWrapper<Scheduler, &Scheduler::pause> starvationEvent;
+    gem5::MemberEventWrapper<&Scheduler::pause> starvationEvent;
     void scheduleStarvationEvent();
 
     bool _elaborationDone;
@@ -502,10 +502,10 @@ class Scheduler
             _changeStamp++;
         pause();
     }
-    gem5::EventWrapper<Scheduler, &Scheduler::maxTickFunc> maxTickEvent;
+    gem5::MemberEventWrapper<&Scheduler::maxTickFunc> maxTickEvent;
 
     void timeAdvances() { trace(false); }
-    gem5::EventWrapper<Scheduler, &Scheduler::timeAdvances> timeAdvancesEvent;
+    gem5::MemberEventWrapper<&Scheduler::timeAdvances> timeAdvancesEvent;
     void
     scheduleTimeAdvancesEvent()
     {
diff --git a/util/systemc/systemc_within_gem5/systemc_simple_object/feeder.hh b/util/systemc/systemc_within_gem5/systemc_simple_object/feeder.hh
index c843c83e2b..865362cf8f 100644
--- a/util/systemc/systemc_within_gem5/systemc_simple_object/feeder.hh
+++ b/util/systemc/systemc_within_gem5/systemc_simple_object/feeder.hh
@@ -64,7 +64,7 @@ class Feeder : public gem5::SimObject
     // except to help interact with systemc objects/models.
     sc_core::sc_buffer<const char *> buf;
 
-    gem5::EventWrapper<Feeder, &Feeder::feed> event;
+    gem5::MemberEventWrapper<&Feeder::feed> event;
 
     void startup() override;
 };

From 7813e294ff9fc83511f0d6f1d8fb433c1b56db7b Mon Sep 17 00:00:00 2001
From: Gabriel Busnot <gabriel.busnot@arteris.com>
Date: Thu, 2 Feb 2023 16:14:04 +0000
Subject: [PATCH 258/492] sim: Deprecate EventWrapper in favour of
 MemberEventWrapper

Change-Id: I87363fb36cd998e7f0afeb25381e5b230a15b493
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67654
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Reviewed-by: Daniel Carvalho <odanrc@yahoo.com.br>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/sim/eventq.hh | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/sim/eventq.hh b/src/sim/eventq.hh
index b46a25bf38..ff9d4bfa08 100644
--- a/src/sim/eventq.hh
+++ b/src/sim/eventq.hh
@@ -1129,7 +1129,8 @@ private:
 };
 
 template <class T, void (T::* F)()>
-using EventWrapper = MemberEventWrapper<F>;
+using EventWrapper [[deprecated("Use MemberEventWrapper instead")]]
+    = MemberEventWrapper<F>;
 
 class EventFunctionWrapper : public Event
 {

From ba19f967d7529542f790bcd15a2746e399591fdf Mon Sep 17 00:00:00 2001
From: Gabriel Busnot <gabriel.busnot@arteris.com>
Date: Thu, 2 Feb 2023 16:54:33 +0000
Subject: [PATCH 259/492] sim: Use ref constructor of MemberEventWrapper
 everywhere

Change-Id: I77989aa7318142634c771c558293138e7b1e8e51
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67657
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Daniel Carvalho <odanrc@yahoo.com.br>
---
 src/arch/sparc/isa.cc            |  6 +++---
 src/arch/sparc/ua2005.cc         |  6 +++---
 src/base/remote_gdb.cc           |  2 +-
 src/dev/arm/smmu_v3.cc           |  2 +-
 src/dev/arm/smmu_v3_deviceifc.cc |  2 +-
 src/dev/arm/smmu_v3_proc.cc      |  2 +-
 src/mem/qos/mem_sink.cc          |  2 +-
 src/systemc/core/kernel.cc       |  2 +-
 src/systemc/core/scheduler.cc    | 12 ++++++------
 9 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/src/arch/sparc/isa.cc b/src/arch/sparc/isa.cc
index 38b3d1c3e2..e7807c2b0a 100644
--- a/src/arch/sparc/isa.cc
+++ b/src/arch/sparc/isa.cc
@@ -953,15 +953,15 @@ ISA::unserialize(CheckpointIn &cp)
     UNSERIALIZE_SCALAR(hstick_cmp);
 
     if (tick_cmp) {
-        tickCompare = new TickCompareEvent(this);
+        tickCompare = new TickCompareEvent(*this);
         schedule(tickCompare, tick_cmp);
     }
     if (stick_cmp)  {
-        sTickCompare = new STickCompareEvent(this);
+        sTickCompare = new STickCompareEvent(*this);
         schedule(sTickCompare, stick_cmp);
     }
     if (hstick_cmp)  {
-        hSTickCompare = new HSTickCompareEvent(this);
+        hSTickCompare = new HSTickCompareEvent(*this);
         schedule(hSTickCompare, hstick_cmp);
     }
 }
diff --git a/src/arch/sparc/ua2005.cc b/src/arch/sparc/ua2005.cc
index 3f5372af53..45cc9d75c4 100644
--- a/src/arch/sparc/ua2005.cc
+++ b/src/arch/sparc/ua2005.cc
@@ -107,7 +107,7 @@ ISA::setFSReg(int miscReg, RegVal val)
 
       case MISCREG_TICK_CMPR:
         if (tickCompare == NULL)
-            tickCompare = new TickCompareEvent(this);
+            tickCompare = new TickCompareEvent(*this);
         setMiscRegNoEffect(miscReg, val);
         if ((tick_cmpr & ~mask(63)) && tickCompare->scheduled())
             cpu->deschedule(tickCompare);
@@ -122,7 +122,7 @@ ISA::setFSReg(int miscReg, RegVal val)
 
       case MISCREG_STICK_CMPR:
         if (sTickCompare == NULL)
-            sTickCompare = new STickCompareEvent(this);
+            sTickCompare = new STickCompareEvent(*this);
         setMiscRegNoEffect(miscReg, val);
         if ((stick_cmpr & ~mask(63)) && sTickCompare->scheduled())
             cpu->deschedule(sTickCompare);
@@ -193,7 +193,7 @@ ISA::setFSReg(int miscReg, RegVal val)
 
       case MISCREG_HSTICK_CMPR:
         if (hSTickCompare == NULL)
-            hSTickCompare = new HSTickCompareEvent(this);
+            hSTickCompare = new HSTickCompareEvent(*this);
         setMiscRegNoEffect(miscReg, val);
         if ((hstick_cmpr & ~mask(63)) && hSTickCompare->scheduled())
             cpu->deschedule(hSTickCompare);
diff --git a/src/base/remote_gdb.cc b/src/base/remote_gdb.cc
index 43f53d1247..dd37a3503a 100644
--- a/src/base/remote_gdb.cc
+++ b/src/base/remote_gdb.cc
@@ -393,7 +393,7 @@ std::map<Addr, HardBreakpoint *> hardBreakMap;
 BaseRemoteGDB::BaseRemoteGDB(System *_system, int _port) :
         incomingConnectionEvent(nullptr), incomingDataEvent(nullptr),
         _port(_port), fd(-1), sys(_system),
-        connectEvent(this), disconnectEvent(this), trapEvent(this),
+        connectEvent(*this), disconnectEvent(*this), trapEvent(this),
         singleStepEvent(*this)
 {}
 
diff --git a/src/dev/arm/smmu_v3.cc b/src/dev/arm/smmu_v3.cc
index 41f7424e77..8ce8bd92b2 100644
--- a/src/dev/arm/smmu_v3.cc
+++ b/src/dev/arm/smmu_v3.cc
@@ -100,7 +100,7 @@ SMMUv3::SMMUv3(const SMMUv3Params &params) :
     deviceInterfaces(params.device_interfaces),
     commandExecutor(name() + ".cmd_exec", *this),
     regsMap(params.reg_map),
-    processCommandsEvent(this)
+    processCommandsEvent(*this)
 {
     fatal_if(regsMap.size() != SMMU_REG_SIZE,
         "Invalid register map size: %#x different than SMMU_REG_SIZE = %#x\n",
diff --git a/src/dev/arm/smmu_v3_deviceifc.cc b/src/dev/arm/smmu_v3_deviceifc.cc
index 166b85d727..0966150541 100644
--- a/src/dev/arm/smmu_v3_deviceifc.cc
+++ b/src/dev/arm/smmu_v3_deviceifc.cc
@@ -78,7 +78,7 @@ SMMUv3DeviceInterface::SMMUv3DeviceInterface(
     deviceNeedsRetry(false),
     atsDeviceNeedsRetry(false),
     sendDeviceRetryEvent(*this),
-    atsSendDeviceRetryEvent(this)
+    atsSendDeviceRetryEvent(*this)
 {}
 
 void
diff --git a/src/dev/arm/smmu_v3_proc.cc b/src/dev/arm/smmu_v3_proc.cc
index ef3db5dd4d..f0c2633cf5 100644
--- a/src/dev/arm/smmu_v3_proc.cc
+++ b/src/dev/arm/smmu_v3_proc.cc
@@ -199,7 +199,7 @@ SMMUProcess::doBroadcastSignal(SMMUSignal &sig)
 void
 SMMUProcess::scheduleWakeup(Tick when)
 {
-    auto *ep = new MemberEventWrapper<&SMMUProcess::wakeup> (this, true);
+    auto *ep = new MemberEventWrapper<&SMMUProcess::wakeup> (*this, true);
 
     smmu.schedule(ep, when);
 }
diff --git a/src/mem/qos/mem_sink.cc b/src/mem/qos/mem_sink.cc
index b6b77ca9df..66b945153b 100644
--- a/src/mem/qos/mem_sink.cc
+++ b/src/mem/qos/mem_sink.cc
@@ -60,7 +60,7 @@ MemSinkCtrl::MemSinkCtrl(const QoSMemSinkCtrlParams &p)
     readBufferSize(p.read_buffer_size),
     writeBufferSize(p.write_buffer_size), port(name() + ".port", *this),
     interface(p.interface),
-    retryRdReq(false), retryWrReq(false), nextRequest(0), nextReqEvent(this),
+    retryRdReq(false), retryWrReq(false), nextRequest(0), nextReqEvent(*this),
     stats(this)
 {
     // Resize read and write queue to allocate space
diff --git a/src/systemc/core/kernel.cc b/src/systemc/core/kernel.cc
index ae67e4676c..f6c96e5b2d 100644
--- a/src/systemc/core/kernel.cc
+++ b/src/systemc/core/kernel.cc
@@ -56,7 +56,7 @@ void Kernel::status(sc_core::sc_status s) { _status = s; }
 
 Kernel::Kernel(const Params &params, int) :
     gem5::SimObject(params),
-    t0Event(this, false, gem5::EventBase::Default_Pri - 1)
+    t0Event(*this, false, gem5::EventBase::Default_Pri - 1)
 {
     // Install ourselves as the scheduler's event manager.
     ::sc_gem5::scheduler.setEventQueue(eventQueue());
diff --git a/src/systemc/core/scheduler.cc b/src/systemc/core/scheduler.cc
index 42a2ca43b6..bcbc262119 100644
--- a/src/systemc/core/scheduler.cc
+++ b/src/systemc/core/scheduler.cc
@@ -44,14 +44,14 @@ namespace sc_gem5
 {
 
 Scheduler::Scheduler() :
-    eq(nullptr), readyEvent(this, false, ReadyPriority),
-    pauseEvent(this, false, PausePriority),
-    stopEvent(this, false, StopPriority), _throwUp(nullptr),
-    starvationEvent(this, false, StarvationPriority),
+    eq(nullptr), readyEvent(*this, false, ReadyPriority),
+    pauseEvent(*this, false, PausePriority),
+    stopEvent(*this, false, StopPriority), _throwUp(nullptr),
+    starvationEvent(*this, false, StarvationPriority),
     _elaborationDone(false), _started(false), _stopNow(false),
     _status(StatusOther), maxTick(gem5::MaxTick),
-    maxTickEvent(this, false, MaxTickPriority),
-    timeAdvancesEvent(this, false, TimeAdvancesPriority), _numCycles(0),
+    maxTickEvent(*this, false, MaxTickPriority),
+    timeAdvancesEvent(*this, false, TimeAdvancesPriority), _numCycles(0),
     _changeStamp(0), _current(nullptr), initDone(false), runToTime(true),
     runOnce(false)
 {}

From 99852d56876eb3b0e26ec2a15752321a4a047ebd Mon Sep 17 00:00:00 2001
From: Gabriel Busnot <gabriel.busnot@arteris.com>
Date: Thu, 2 Feb 2023 16:21:57 +0000
Subject: [PATCH 260/492] sim: Deprecate pointer version of MemberEventWrapper
 constructor

It makes no sense to initialize such event with nullptr. Favor the
reference version for safer behavior.

Change-Id: I695f41362a56aca98ceb52d49cf84be43f1465a2
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67656
Reviewed-by: Daniel Carvalho <odanrc@yahoo.com.br>
Tested-by: kokoro <noreply+kokoro@google.com>
Maintainer: Daniel Carvalho <odanrc@yahoo.com.br>
---
 src/sim/eventq.hh | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/src/sim/eventq.hh b/src/sim/eventq.hh
index ff9d4bfa08..cf1734b992 100644
--- a/src/sim/eventq.hh
+++ b/src/sim/eventq.hh
@@ -1095,16 +1095,12 @@ class MemberEventWrapper final: public Event, public Named
     static_assert(std::is_same_v<MemberFunctionArgsTuple_t<F>, std::tuple<>>);
 
 public:
+    [[deprecated("Use reference version of this constructor instead")]]
     MemberEventWrapper(CLASS *object,
                        bool del = false,
                        Priority p = Default_Pri):
-        Event(p),
-        Named(object->name() + ".wrapped_event"),
-        mObject(object)
-    {
-        gem5_assert(mObject);
-        if (del) setFlags(AutoDelete);
-    }
+        MemberEventWrapper{*object, del, p}
+    {}
 
     /**
      * @brief Construct a new MemberEventWrapper object
@@ -1116,8 +1112,13 @@ public:
     MemberEventWrapper(CLASS &object,
                        bool del = false,
                        Priority p = Default_Pri):
-        MemberEventWrapper(&object, del, p)
-    {}
+        Event(p),
+        Named(object.name() + ".wrapped_event"),
+        mObject(&object)
+    {
+        if (del) setFlags(AutoDelete);
+        gem5_assert(mObject);
+    }
 
     void process() override {
         (mObject->*F)();

From a589d7b5697b3fbe61e1842e1831aef50aa96f32 Mon Sep 17 00:00:00 2001
From: Razeza <borisov.dn@phystech.edu>
Date: Wed, 8 Feb 2023 13:22:50 +0300
Subject: [PATCH 261/492] arch-x86: Add instructions from SSE4.1 set.

The following instructions were implemented: PHMINPOSUW, ROUNDSS, ROUNDSD, EXTRACTPS, INSERTPS, PMULLD, PMULDQ,
PCMPGTQ, PMINUW, PMINUD, PMINSB, MINSD, PMAXUW, PMAXUD, PMAXSB, PMAXSD, PEXTRB, PEXTRW for memory, PEXTRD, PEXTRQ,
PINSRB, PINSRD, PINSRQ, PACKUSDW, PBLENDW, BLENDPS, BLENDPD, BLENDVPD, BLENDVPS, PBLENDVB, PMOVSXDQ, PMOVSXWQ,
PMOVSXWD, PMOVSXBQ, PMOVSXBD, PMOVSXBW, PMOVZXDQ, PMOVZXWQ, PMOVZXWD, PMOVZXWD, PMOVZXBQ, PMOVZXBD, PMOVZXBW.

Also fix bug in PACKUSWB_XMM_M, it was marked as sign operation, though it is unsigned.

Jira Issue: https://gem5.atlassian.net/browse/GEM5-1308

Change-Id: I1a8d26c0426690841dcc80a6fa5dcffb8cbc5d9a
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67737
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/arch/x86/insts/micromediaop.hh            |   7 +
 src/arch/x86/insts/microop_args.hh            |  15 +
 .../isa/decoder/three_byte_0f38_opcodes.isa   | 124 +++++--
 .../isa/decoder/three_byte_0f3a_opcodes.isa   |  61 +++-
 .../compare_and_write_minimum_or_maximum.py   |  20 ++
 .../data_conversion/__init__.py               |   1 +
 .../floating_point/data_conversion/round.py   |  72 ++++
 .../floating_point/data_transfer/move.py      |  39 ++
 .../simd128/integer/arithmetic/__init__.py    |   1 +
 .../simd128/integer/arithmetic/absolute.py    |  96 +++++
 .../integer/arithmetic/multiplication.py      |  40 +++
 .../integer/compare/compare_and_write_mask.py |  20 ++
 .../compare_and_write_minimum_or_maximum.py   | 160 +++++++++
 .../data_reordering/extract_and_insert.py     | 121 +++++++
 .../data_reordering/pack_with_saturation.py   |  25 +-
 .../integer/data_reordering/shuffle.py        | 120 +++++++
 .../simd128/integer/data_transfer/__init__.py |   2 +-
 .../simd128/integer/data_transfer/move.py     | 229 ++++++++++++
 .../integer/data_transfer/move_with_shift.py  |  59 +++
 src/arch/x86/isa/microasm.isa                 |   2 +-
 src/arch/x86/isa/microops/base.isa            |  12 +
 src/arch/x86/isa/microops/mediaop.isa         | 337 ++++++++++++++++++
 src/arch/x86/isa/operands.isa                 |   5 +-
 src/base/bitfield.hh                          |  16 +
 24 files changed, 1534 insertions(+), 50 deletions(-)
 create mode 100644 src/arch/x86/isa/insts/simd128/floating_point/data_conversion/round.py
 create mode 100644 src/arch/x86/isa/insts/simd128/integer/arithmetic/absolute.py
 create mode 100644 src/arch/x86/isa/insts/simd128/integer/data_transfer/move_with_shift.py

diff --git a/src/arch/x86/insts/micromediaop.hh b/src/arch/x86/insts/micromediaop.hh
index bd897f90e2..be82429f15 100644
--- a/src/arch/x86/insts/micromediaop.hh
+++ b/src/arch/x86/insts/micromediaop.hh
@@ -40,6 +40,7 @@ namespace X86ISA
 enum MediaFlag
 {
     MediaMultHiOp = 1,
+    MediaPartHiOp = 32,
     MediaSignedOp = 64,
     MediaScalarOp = 128
 };
@@ -77,6 +78,12 @@ class MediaOpBase : public X86MicroopBase
         return ext & MediaMultHiOp;
     }
 
+    bool
+    partHi() const
+    {
+        return ext & MediaPartHiOp;
+    }
+
     bool
     signedOp() const
     {
diff --git a/src/arch/x86/insts/microop_args.hh b/src/arch/x86/insts/microop_args.hh
index 9dd121b3b1..c9850ca171 100644
--- a/src/arch/x86/insts/microop_args.hh
+++ b/src/arch/x86/insts/microop_args.hh
@@ -91,6 +91,19 @@ struct Src2Op
     {}
 };
 
+struct Src3Op
+{
+    const RegIndex src3;
+    const size_t size;
+    RegIndex opIndex() const { return src3; }
+
+    Src3Op(RegIndex _src3, size_t _size) : src3(_src3), size(_size) {}
+    template <class InstType>
+    Src3Op(RegIndex _src3, InstType *inst) : src3(_src3),
+        size(inst->getSrcSize())
+    {}
+};
+
 struct DataOp
 {
     const RegIndex data;
@@ -271,6 +284,8 @@ using FoldedSrc2Op = FoldedOp<Src2Op>;
 using FloatSrc2Op = FloatOp<Src2Op>;
 using IntSrc2Op = IntOp<Src2Op>;
 
+using FloatSrc3Op = FloatOp<Src3Op>;
+
 using FoldedDataOp = FoldedOp<DataOp>;
 using FloatDataOp = FloatOp<DataOp>;
 using FoldedDataHiOp = FoldedOp<DataHiOp>;
diff --git a/src/arch/x86/isa/decoder/three_byte_0f38_opcodes.isa b/src/arch/x86/isa/decoder/three_byte_0f38_opcodes.isa
index 0f4330bf7c..ea54e1578d 100644
--- a/src/arch/x86/isa/decoder/three_byte_0f38_opcodes.isa
+++ b/src/arch/x86/isa/decoder/three_byte_0f38_opcodes.isa
@@ -43,40 +43,102 @@
             0x09: psignw_Vdq_Wdq();
             0x0A: psignd_Vdq_Wdq();
             0x0B: pmulhrsw_Vdq_Wdq();
-            0x10: pblendvb_Vdq_Wdq();
-            0x14: blendvps_Vdq_Wdq();
-            0x15: blendvpd_Vdq_Wdq();
+            0x10: decode MODRM_MOD {
+                default: Inst::PBLENDVB(Vdq, Wdq);
+            }
+            0x14: decode MODRM_MOD {
+                default: Inst::BLENDVPS(Vdq, Wdq);
+            }
+            0x15: decode MODRM_MOD {
+                default: Inst::BLENDVPD(Vdq, Wdq);
+            }
             0x17: ptest_Vdq_Wdq();
-            0x1C: pabsb_Vdq_Wdq();
-            0x1D: pabsw_Vdq_Wdq();
-            0x1E: pabsd_Vdq_Wdq();
-            0x20: pmovsxbw_Vdq_Udq_or_Mq();
-            0x21: pmovsxbd_Vdq_Udq_or_Md();
-            0x22: pmovsxbq_Vdq_Udq_or_Mw();
-            0x23: pmovsxwd_Vdq_Udq_or_Mq();
-            0x24: pmovsxwq_Vdq_Udq_or_Md();
-            0x25: pmovsxdq_Vdq_Udq_or_Mq();
-            0x28: pmuldq_Vdq_Wdq();
+            0x1C: decode MODRM_MOD {
+                default: Inst::PABSB(Vdq, Wdq);
+            }
+            0x1D: decode MODRM_MOD {
+                default: Inst::PABSW(Vdq, Wdq);
+            }
+            0x1E: decode MODRM_MOD {
+                default: Inst::PABSD(Vdq, Wdq);
+            }
+            0x20: decode MODRM_MOD {
+                default: Inst::PMOVSXBW(Vdq, Wdq);
+            }
+            0x21: decode MODRM_MOD  {
+                default: Inst::PMOVSXBD(Vdq, Wdq);
+            }
+            0x22: decode MODRM_MOD {
+                default: Inst::PMOVSXBQ(Vdq, Wdq);
+            }
+            0x23: decode MODRM_MOD {
+                default: Inst::PMOVSXWD(Vdq, Wdq);
+            }
+            0x24: decode MODRM_MOD {
+                default: Inst::PMOVSXWQ(Vdq, Wdq);
+            }
+            0x25: decode MODRM_MOD {
+                default: Inst::PMOVSXDQ(Vdq, Wdq);
+            }
+            0x28: decode MODRM_MOD {
+                default: Inst::PMULDQ(Vdq, Wdq);
+            }
             0x29: pcmpeqq_Vdq_Wdq();
             0x2A: movntdqa_Vdq_Mdq();
-            0x2B: packusdw_Vdq_Wdq();
-            0x30: pmovzxbw_Vdq_Udq_or_Mq();
-            0x31: pmovzxbd_Vdq_Udq_or_Md();
-            0x32: pmovzxbq_Vdq_Udq_or_Mw();
-            0x33: pmovzxwd_Vdq_Udq_or_Mq();
-            0x34: pmovzxwq_Vdq_Udq_or_Md();
-            0x35: pmovzxdq_Vdq_Udq_or_Mq();
-            0x37: pcmpgtq_Vdq_Wdq();
-            0x38: pminsb_Vdq_Wdq();
-            0x39: pminsd_Vdq_Wdq();
-            0x3A: pminuw_Vdq_Wdq();
-            0x3B: pminud_Vdq_Wdq();
-            0x3C: pmaxsb_Vdq_Wdq();
-            0x3D: pmaxsd_Vdq_Wdq();
-            0x3E: pmaxuw_Vdq_Wdq();
-            0x3F: pmaxud_Vdq_Wdq();
-            0x40: pmulld_Vdq_Wdq();
-            0x41: phminposuw_Vdq_Wdq();
+            0x2B: decode MODRM_MOD {
+                default: Inst::PACKUSDW(Vdq, Wdq);
+            }
+            0x30: decode MODRM_MOD {
+                default: Inst::PMOVZXBW(Vdq, Wdq);
+            }
+            0x31: decode MODRM_MOD  {
+                default: Inst::PMOVZXBD(Vdq, Wdq);
+            }
+            0x32: decode MODRM_MOD {
+                default: Inst::PMOVZXBQ(Vdq, Wdq);
+            }
+            0x33: decode MODRM_MOD {
+                default: Inst::PMOVZXWD(Vdq, Wdq);
+            }
+            0x34: decode MODRM_MOD {
+                default: Inst::PMOVZXWQ(Vdq, Wdq);
+            }
+            0x35: decode MODRM_MOD {
+                default: Inst::PMOVZXDQ(Vdq, Wdq);
+            }
+            0x37: decode MODRM_MOD {
+                default: Inst::PCMPGTQ(Vdq, Wdq);
+            }
+            0x38: decode MODRM_MOD {
+                default: Inst::PMINSB(Vdq, Wdq);
+            }
+            0x39: decode MODRM_MOD {
+                default: Inst::PMINSD(Vdq, Wdq);
+            }
+            0x3A: decode MODRM_MOD {
+                default: Inst::PMINUW(Vdq, Wdq);
+            }
+            0x3B: decode MODRM_MOD {
+                default: Inst::PMINUD(Vdq, Wdq);
+            }
+            0x3C: decode MODRM_MOD {
+                default: Inst::PMAXSB(Vdq, Wdq);
+            }
+            0x3D: decode MODRM_MOD {
+                default: Inst::PMAXSD(Vdq, Wdq);
+            }
+            0x3E: decode MODRM_MOD {
+                default: Inst::PMAXUW(Vdq, Wdq);
+            }
+            0x3F: decode MODRM_MOD {
+                default: Inst::PMAXUD(Vdq, Wdq);
+            }
+            0x40: decode MODRM_MOD {
+                default: Inst::PMULLD(Vdq, Wdq);
+            }
+            0x41: decode MODRM_MOD {
+                default: Inst::PHMINPOSUW(Vdq, Wdq);
+            }
             default: Inst::UD2();
         }
         default: decode LEGACY_REPNE {
diff --git a/src/arch/x86/isa/decoder/three_byte_0f3a_opcodes.isa b/src/arch/x86/isa/decoder/three_byte_0f3a_opcodes.isa
index 0c66fa5cff..b1e84733fa 100644
--- a/src/arch/x86/isa/decoder/three_byte_0f3a_opcodes.isa
+++ b/src/arch/x86/isa/decoder/three_byte_0f3a_opcodes.isa
@@ -33,22 +33,57 @@ format WarnUnimpl {
         1: decode OPCODE_OP {
             0x08: roundps_Vdq_Wdq_Ib();
             0x09: roundpd_Vdq_Wdq_Ib();
-            0x0A: roundss_Vss_Wss_Ib();
-            0x0B: roundsd_Vsd_Wsd_Ib();
-            0x0C: blendps_Vdq_Wdq_Ib();
-            0x0D: blendpd_Vdq_Wdq_Ib();
-            0x0E: pblendw_Vdq_Wdq_Ib();
-            0x0F: palignr_Vdq_Wdq_Ib();
-            0x14: pextrb_Rd_or_Mb_Vdq_Ib();
+            0x0A: decode MODRM_MOD {
+                0x3: Inst::ROUNDSS(Vdq, Wdq, Ib);
+                default: Inst::ROUNDSS(Vss, Md, Ib);
+            }
+            0x0B: decode MODRM_MOD {
+                0x3: Inst::ROUNDSD(Vss, Wdq, Ib);
+                default: Inst::ROUNDSD(Vss, Mq, Ib);
+            }
+            0x0C: decode MODRM_MOD {
+                default: Inst::BLENDPS(Vdq, Wdq, Ib);
+            }
+            0x0D: decode MODRM_MOD {
+                default: Inst::BLENDPD(Vdq, Wdq, Ib);
+            }
+            0x0E: decode MODRM_MOD {
+                default: Inst::PBLENDW(Vdq, Wdq, Ib);
+            }
+            0x0F: decode MODRM_MOD {
+                default: Inst::PALIGNR(Vdq, Wdq, Ib);
+            }
+            0x14: decode MODRM_MOD {
+                0x3: Inst::PEXTRB(Rd, Vdq, Ib);
+                default: Inst::PEXTRB(Mb, Vdq, Ib);
+            }
             0x15: decode MODRM_MOD {
                 0x3: Inst::PEXTRW(Rd,Vdq,Ib);
-                default: pextrw_Mw_Vdq_Ib();
+                default: Inst::PEXTRW(Mw,Vdq,Ib);
+            }
+            0x16: decode MODRM_MOD {
+                default: decode REX_W {
+                    0x0: Inst::PEXTRD(Ed, Vdq, Ib);
+                    0x1: Inst::PEXTRQ(Eq, Vdq, Ib);
+                }
+            }
+            0x17: decode MODRM_MOD {
+                default: Inst::EXTRACTPS(Ed, Vdq, Ib);
+            }
+            0x20: decode MODRM_MOD {
+                0x3: Inst::PINSRB(Vdq, Rq, Ib);
+                default: Inst::PINSRB(Vdq, Mb, Ib);
+            }
+            0x21: decode MODRM_MOD {
+                0x3: Inst::INSERTPS(Vdq, Wdq, Ib);
+                default: Inst::INSERTPS(Vdq, Md, Ib);
+            }
+            0x22: decode MODRM_MOD {
+                default: decode REX_W {
+                    0x0: Inst::PINSRD(Vdq, Ed, Ib);
+                    0x1: Inst::PINSRQ(Vdq, Eq, Ib);
+                }
             }
-            0x16: pextrd_pextrq_Ed_or_Eq_Vdq_Ib();
-            0x17: extractps_Ed_Vdq_Ib();
-            0x20: pinsrb_Vdq_Rd_or_Rq_or_Mb_Ib();
-            0x21: insertps_Vdq_Udq_or_Md_Ib();
-            0x22: pinsrd_pinsrq_Vdq_Ed_or_Eq_Ib();
             0x40: dpps_Vdq_Wdq_Ib();
             0x41: dppd_Vdq_Wdq_Ib();
             0x42: pcmpistrm_Vdq_Wdq_Ib();
diff --git a/src/arch/x86/isa/insts/simd128/floating_point/compare/compare_and_write_minimum_or_maximum.py b/src/arch/x86/isa/insts/simd128/floating_point/compare/compare_and_write_minimum_or_maximum.py
index ec9bf0e06c..e5aaf694b9 100644
--- a/src/arch/x86/isa/insts/simd128/floating_point/compare/compare_and_write_minimum_or_maximum.py
+++ b/src/arch/x86/isa/insts/simd128/floating_point/compare/compare_and_write_minimum_or_maximum.py
@@ -173,4 +173,24 @@ def macroop MAXSD_XMM_P {
     ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
     mmaxf xmml, xmml, ufp1, ext=Scalar, size=8
 };
+
+def macroop PHMINPOSUW_XMM_XMM {
+    phminposuw xmml, xmmlm, xmmhm, size=2
+    xorfp xmmh, xmmh, xmmh
+};
+
+def macroop PHMINPOSUW_XMM_M {
+    ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
+    ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8
+    phminposuw xmml, ufp1, ufp2, size=2
+    xorfp xmmh, xmmh, xmmh
+};
+
+def macroop PHMINPOSUW_XMM_P {
+    rdip t7
+    ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
+    ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8
+    phminposuw xmml, ufp1, ufp2, size=2
+    xorfp xmmh, xmmh, xmmh
+};
 """
diff --git a/src/arch/x86/isa/insts/simd128/floating_point/data_conversion/__init__.py b/src/arch/x86/isa/insts/simd128/floating_point/data_conversion/__init__.py
index c8a2d2f2b3..6661dc8120 100644
--- a/src/arch/x86/isa/insts/simd128/floating_point/data_conversion/__init__.py
+++ b/src/arch/x86/isa/insts/simd128/floating_point/data_conversion/__init__.py
@@ -38,6 +38,7 @@ categories = [
     "convert_floating_point_to_xmm_integer",
     "convert_floating_point_to_mmx_integer",
     "convert_floating_point_to_gpr_integer",
+    "round",
 ]
 
 microcode = """
diff --git a/src/arch/x86/isa/insts/simd128/floating_point/data_conversion/round.py b/src/arch/x86/isa/insts/simd128/floating_point/data_conversion/round.py
new file mode 100644
index 0000000000..ea2a7341d6
--- /dev/null
+++ b/src/arch/x86/isa/insts/simd128/floating_point/data_conversion/round.py
@@ -0,0 +1,72 @@
+# Copyright (c) 2007 The Hewlett-Packard Development Company
+# All rights reserved.
+#
+# The license below extends only to copyright in the software and shall
+# not be construed as granting a license to any other intellectual
+# property including but not limited to intellectual property relating
+# to a hardware implementation of the functionality of the software
+# licensed hereunder.  You may use the software subject to the license
+# terms below provided that you ensure that this notice is replicated
+# unmodified and in its entirety in all distributions of the software,
+# modified or unmodified, in source code or in binary form.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+microcode = """
+def macroop ROUNDSS_XMM_XMM_I {
+    rdval t1, ctrlRegIdx("misc_reg::Mxcsr")
+    rounds xmml, xmmlm, t1, "IMMEDIATE", size=4
+};
+
+def macroop ROUNDSS_XMM_M_I {
+    rdval t1, ctrlRegIdx("misc_reg::Mxcsr")
+    ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=4
+    rounds xmml, ufp1, t1, "IMMEDIATE", size=4
+};
+
+def macroop ROUNDSS_XMM_P_I {
+    rdval t1, ctrlRegIdx("misc_reg::Mxcsr")
+    rdip t7
+    ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=4
+    rounds xmml, ufp1, t1, "IMMEDIATE", size=4
+};
+
+def macroop ROUNDSD_XMM_XMM_I {
+    rdval t1, ctrlRegIdx("misc_reg::Mxcsr")
+    rounds xmml, xmmlm, t1, "IMMEDIATE", size=8
+};
+
+def macroop ROUNDSD_XMM_M_I {
+    rdval t1, ctrlRegIdx("misc_reg::Mxcsr")
+    ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
+    rounds xmml, ufp1, t1, "IMMEDIATE", size=8
+};
+
+def macroop ROUNDSD_XMM_P_I {
+    rdval t1, ctrlRegIdx("misc_reg::Mxcsr")
+    rdip t7
+    ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
+    rounds xmml, ufp1, t1, "IMMEDIATE", size=8
+};
+"""
diff --git a/src/arch/x86/isa/insts/simd128/floating_point/data_transfer/move.py b/src/arch/x86/isa/insts/simd128/floating_point/data_transfer/move.py
index 2a4a152c9f..607a53d828 100644
--- a/src/arch/x86/isa/insts/simd128/floating_point/data_transfer/move.py
+++ b/src/arch/x86/isa/insts/simd128/floating_point/data_transfer/move.py
@@ -276,4 +276,43 @@ def macroop MOVSD_P_XMM {
 def macroop MOVSD_XMM_XMM {
     movfp xmml, xmmlm, dataSize=8
 };
+
+def macroop EXTRACTPS_R_XMM_I {
+    extractps reg, xmmlm, "IMMEDIATE & mask(2)", size=8
+    extractps reg, xmmhm, "IMMEDIATE & mask(2)", size=8, ext=PartHi
+};
+
+def macroop EXTRACTPS_M_XMM_I {
+    extractps t1, xmmlm, "IMMEDIATE & mask(2)", size=8
+    extractps t1, xmmhm, "IMMEDIATE & mask(2)", size=8, ext=PartHi
+    st t1, seg, sib, disp
+};
+
+def macroop EXTRACTPS_P_XMM_I {
+    rdip t7
+    extractps t1, xmmlm, "IMMEDIATE & mask(2)", size=8
+    extractps t1, xmmhm, "IMMEDIATE & mask(2)", size=8, ext=PartHi
+    st t1, seg, riprel, disp
+};
+
+def macroop INSERTPS_XMM_XMM_I {
+    movfp ufp1, xmml, dataSize=8
+    insertps xmml, xmmh, xmmlm, xmmhm, "IMMEDIATE", size=8
+    insertps xmmh, ufp1, xmmlm, xmmhm, "IMMEDIATE", size=8, ext=PartHi
+};
+
+def macroop INSERTPS_XMM_M_I {
+    movfp ufp1, xmml, dataSize=8
+    ldfp ufp2, seg, sib, "DISPLACEMENT", dataSize=4
+    insertps xmml, xmmh, ufp2, ufp3, "IMMEDIATE & mask(6)", size=8
+    insertps xmmh, ufp1, ufp2, ufp3, "IMMEDIATE & mask(6)", size=8, ext=PartHi
+};
+
+def macroop INSERTPS_XMM_P_I {
+    rdip t7
+    movfp ufp1, xmml, dataSize=8
+    ldfp ufp2, seg, riprel, "DISPLACEMENT", dataSize=4
+    insertps xmml, xmmh, ufp2, ufp3, "IMMEDIATE & mask(6)", size=8
+    insertps xmmh, ufp1, ufp2, ufp3, "IMMEDIATE & mask(6)", size=8, ext=PartHi
+};
 """
diff --git a/src/arch/x86/isa/insts/simd128/integer/arithmetic/__init__.py b/src/arch/x86/isa/insts/simd128/integer/arithmetic/__init__.py
index 01ae49f88e..c2de13b845 100644
--- a/src/arch/x86/isa/insts/simd128/integer/arithmetic/__init__.py
+++ b/src/arch/x86/isa/insts/simd128/integer/arithmetic/__init__.py
@@ -35,6 +35,7 @@
 
 categories = [
     "addition",
+    "absolute",
     "subtraction",
     "multiplication",
     "multiply_add",
diff --git a/src/arch/x86/isa/insts/simd128/integer/arithmetic/absolute.py b/src/arch/x86/isa/insts/simd128/integer/arithmetic/absolute.py
new file mode 100644
index 0000000000..daea1b7902
--- /dev/null
+++ b/src/arch/x86/isa/insts/simd128/integer/arithmetic/absolute.py
@@ -0,0 +1,96 @@
+# Copyright (c) 2007 The Hewlett-Packard Development Company
+# All rights reserved.
+#
+# The license below extends only to copyright in the software and shall
+# not be construed as granting a license to any other intellectual
+# property including but not limited to intellectual property relating
+# to a hardware implementation of the functionality of the software
+# licensed hereunder.  You may use the software subject to the license
+# terms below provided that you ensure that this notice is replicated
+# unmodified and in its entirety in all distributions of the software,
+# modified or unmodified, in source code or in binary form.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+microcode = """
+def macroop PABSB_XMM_XMM {
+    pabs xmml, xmmlm, size=1
+    pabs xmmh, xmmhm, size=1
+};
+
+def macroop PABSB_XMM_M {
+    ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
+    ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8
+    pabs xmml, ufp1, size=1
+    pabs xmmh, ufp2, size=1
+};
+
+def macroop PABSB_XMM_P {
+    rdip t7
+    ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
+    ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8
+    pabs xmml, ufp1, size=1
+    pabs xmmh, ufp2, size=1
+};
+
+def macroop PABSW_XMM_XMM {
+    pabs xmml, xmmlm, size=2
+    pabs xmmh, xmmhm, size=2
+};
+
+def macroop PABSW_XMM_M {
+    ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
+    ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8
+    pabs xmml, ufp1, size=2
+    pabs xmmh, ufp2, size=2
+};
+
+def macroop PABSW_XMM_P {
+    rdip t7
+    ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
+    ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8
+    pabs xmml, ufp1, size=2
+    pabs xmmh, ufp2, size=2
+};
+
+def macroop PABSD_XMM_XMM {
+    pabs xmml, xmmlm, size=4
+    pabs xmmh, xmmhm, size=4
+};
+
+def macroop PABSD_XMM_M {
+    ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
+    ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8
+    pabs xmml, ufp1, size=4
+    pabs xmmh, ufp2, size=4
+};
+
+def macroop PABSD_XMM_P {
+    rdip t7
+    ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
+    ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8
+    pabs xmml, ufp1, size=4
+    pabs xmmh, ufp2, size=4
+};
+"""
diff --git a/src/arch/x86/isa/insts/simd128/integer/arithmetic/multiplication.py b/src/arch/x86/isa/insts/simd128/integer/arithmetic/multiplication.py
index 3246686d2c..6cdde2af57 100644
--- a/src/arch/x86/isa/insts/simd128/integer/arithmetic/multiplication.py
+++ b/src/arch/x86/isa/insts/simd128/integer/arithmetic/multiplication.py
@@ -74,6 +74,26 @@ def macroop PMULLW_XMM_P {
     mmuli xmmh, xmmh, ufp2, size=2, ext=Signed
 };
 
+def macroop PMULLD_XMM_XMM {
+    mmuli xmml, xmml, xmmlm, size=4, ext=Signed
+    mmuli xmmh, xmmh, xmmhm, size=4, ext=Signed
+};
+
+def macroop PMULLD_XMM_M {
+    ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
+    ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8
+    mmuli xmml, xmml, ufp1, size=4, ext=Signed
+    mmuli xmmh, xmmh, ufp2, size=4, ext=Signed
+};
+
+def macroop PMULLD_XMM_P {
+    rdip t7
+    ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
+    ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8
+    mmuli xmml, xmml, ufp1, size=4, ext=Signed
+    mmuli xmmh, xmmh, ufp2, size=4, ext=Signed
+};
+
 def macroop PMULHUW_XMM_XMM {
     mmuli xmml, xmml, xmmlm, size=2, ext = MultHi
     mmuli xmmh, xmmh, xmmhm, size=2, ext = MultHi
@@ -113,4 +133,24 @@ def macroop PMULUDQ_XMM_P {
     mmuli xmml, xmml, ufp1, srcSize=4, destSize=8, ext=Scalar
     mmuli xmmh, xmmh, ufp2, srcSize=4, destSize=8, ext=Scalar
 };
+
+def macroop PMULDQ_XMM_XMM {
+    mmuli xmml, xmml, xmmlm, srcSize=4, destSize=8, ext=Scalar + "|" + Signed
+    mmuli xmmh, xmmh, xmmhm, srcSize=4, destSize=8, ext=Scalar + "|" + Signed
+};
+
+def macroop PMULDQ_XMM_M {
+    ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
+    ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8
+    mmuli xmml, xmml, ufp1, srcSize=4, destSize=8, ext=Scalar + "|" + Signed
+    mmuli xmmh, xmmh, ufp2, srcSize=4, destSize=8, ext=Scalar + "|" + Signed
+};
+
+def macroop PMULDQ_XMM_P {
+    rdip t7
+    ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
+    ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8
+    mmuli xmml, xmml, ufp1, srcSize=4, destSize=8, ext=Scalar + "|" + Signed
+    mmuli xmmh, xmmh, ufp2, srcSize=4, destSize=8, ext=Scalar + "|" + Signed
+};
 """
diff --git a/src/arch/x86/isa/insts/simd128/integer/compare/compare_and_write_mask.py b/src/arch/x86/isa/insts/simd128/integer/compare/compare_and_write_mask.py
index 7fb4fe621f..548a00e93f 100644
--- a/src/arch/x86/isa/insts/simd128/integer/compare/compare_and_write_mask.py
+++ b/src/arch/x86/isa/insts/simd128/integer/compare/compare_and_write_mask.py
@@ -153,4 +153,24 @@ def macroop PCMPGTD_XMM_P {
     mcmpi2r xmml, xmml, ufp1, size=4, ext=2
     mcmpi2r xmmh, xmmh, ufp2, size=4, ext=2
 };
+
+def macroop PCMPGTQ_XMM_XMM {
+    mcmpi2r xmml, xmml, xmmlm, size=8, ext=2
+    mcmpi2r xmmh, xmmh, xmmhm, size=8, ext=2
+};
+
+def macroop PCMPGTQ_XMM_M {
+    ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
+    ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8
+    mcmpi2r xmml, xmml, ufp1, size=8, ext=2
+    mcmpi2r xmmh, xmmh, ufp2, size=8, ext=2
+};
+
+def macroop PCMPGTQ_XMM_P {
+    rdip t7
+    ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
+    ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8
+    mcmpi2r xmml, xmml, ufp1, size=8, ext=2
+    mcmpi2r xmmh, xmmh, ufp2, size=8, ext=2
+};
 """
diff --git a/src/arch/x86/isa/insts/simd128/integer/compare/compare_and_write_minimum_or_maximum.py b/src/arch/x86/isa/insts/simd128/integer/compare/compare_and_write_minimum_or_maximum.py
index 7e863091a0..5793118e01 100644
--- a/src/arch/x86/isa/insts/simd128/integer/compare/compare_and_write_minimum_or_maximum.py
+++ b/src/arch/x86/isa/insts/simd128/integer/compare/compare_and_write_minimum_or_maximum.py
@@ -54,6 +54,66 @@ def macroop PMINUB_XMM_P {
     mmini xmmh, xmmh, ufp2, size=1, ext=0
 };
 
+def macroop PMINUW_XMM_XMM {
+    mmini xmml, xmml, xmmlm, size=2, ext=0
+    mmini xmmh, xmmh, xmmhm, size=2, ext=0
+};
+
+def macroop PMINUW_XMM_M {
+    ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
+    ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8
+    mmini xmml, xmml, ufp1, size=2, ext=0
+    mmini xmmh, xmmh, ufp2, size=2, ext=0
+};
+
+def macroop PMINUW_XMM_P {
+    rdip t7
+    ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
+    ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8
+    mmini xmml, xmml, ufp1, size=2, ext=0
+    mmini xmmh, xmmh, ufp2, size=2, ext=0
+};
+
+def macroop PMINUD_XMM_XMM {
+    mmini xmml, xmml, xmmlm, size=4, ext=0
+    mmini xmmh, xmmh, xmmhm, size=4, ext=0
+};
+
+def macroop PMINUD_XMM_M {
+    ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
+    ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8
+    mmini xmml, xmml, ufp1, size=4, ext=0
+    mmini xmmh, xmmh, ufp2, size=4, ext=0
+};
+
+def macroop PMINUD_XMM_P {
+    rdip t7
+    ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
+    ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8
+    mmini xmml, xmml, ufp1, size=4, ext=0
+    mmini xmmh, xmmh, ufp2, size=4, ext=0
+};
+
+def macroop PMINSB_XMM_XMM {
+    mmini xmml, xmml, xmmlm, size=1, ext=Signed
+    mmini xmmh, xmmh, xmmhm, size=1, ext=Signed
+};
+
+def macroop PMINSB_XMM_M {
+    ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
+    ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8
+    mmini xmml, xmml, ufp1, size=1, ext=Signed
+    mmini xmmh, xmmh, ufp2, size=1, ext=Signed
+};
+
+def macroop PMINSB_XMM_P {
+    rdip t7
+    ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
+    ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8
+    mmini xmml, xmml, ufp1, size=1, ext=Signed
+    mmini xmmh, xmmh, ufp2, size=1, ext=Signed
+};
+
 def macroop PMINSW_XMM_XMM {
     mmini xmml, xmml, xmmlm, size=2, ext=Signed
     mmini xmmh, xmmh, xmmhm, size=2, ext=Signed
@@ -74,6 +134,26 @@ def macroop PMINSW_XMM_P {
     mmini xmmh, xmmh, ufp2, size=2, ext=Signed
 };
 
+def macroop PMINSD_XMM_XMM {
+    mmini xmml, xmml, xmmlm, size=4, ext=Signed
+    mmini xmmh, xmmh, xmmhm, size=4, ext=Signed
+};
+
+def macroop PMINSD_XMM_M {
+    ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
+    ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8
+    mmini xmml, xmml, ufp1, size=4, ext=Signed
+    mmini xmmh, xmmh, ufp2, size=4, ext=Signed
+};
+
+def macroop PMINSD_XMM_P {
+    rdip t7
+    ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
+    ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8
+    mmini xmml, xmml, ufp1, size=4, ext=Signed
+    mmini xmmh, xmmh, ufp2, size=4, ext=Signed
+};
+
 def macroop PMAXUB_XMM_XMM {
     mmaxi xmml, xmml, xmmlm, size=1, ext=0
     mmaxi xmmh, xmmh, xmmhm, size=1, ext=0
@@ -94,6 +174,66 @@ def macroop PMAXUB_XMM_P {
     mmaxi xmmh, xmmh, ufp2, size=1, ext=0
 };
 
+def macroop PMAXUW_XMM_XMM {
+    mmaxi xmml, xmml, xmmlm, size=2, ext=0
+    mmaxi xmmh, xmmh, xmmhm, size=2, ext=0
+};
+
+def macroop PMAXUW_XMM_M {
+    ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
+    ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8
+    mmaxi xmml, xmml, ufp1, size=2, ext=0
+    mmaxi xmmh, xmmh, ufp2, size=2, ext=0
+};
+
+def macroop PMAXUW_XMM_P {
+    rdip t7
+    ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
+    ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8
+    mmaxi xmml, xmml, ufp1, size=2, ext=0
+    mmaxi xmmh, xmmh, ufp2, size=2, ext=0
+};
+
+def macroop PMAXUD_XMM_XMM {
+    mmaxi xmml, xmml, xmmlm, size=4, ext=0
+    mmaxi xmmh, xmmh, xmmhm, size=4, ext=0
+};
+
+def macroop PMAXUD_XMM_M {
+    ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
+    ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8
+    mmaxi xmml, xmml, ufp1, size=4, ext=0
+    mmaxi xmmh, xmmh, ufp2, size=4, ext=0
+};
+
+def macroop PMAXUD_XMM_P {
+    rdip t7
+    ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
+    ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8
+    mmaxi xmml, xmml, ufp1, size=4, ext=0
+    mmaxi xmmh, xmmh, ufp2, size=4, ext=0
+};
+
+def macroop PMAXSB_XMM_XMM {
+    mmaxi xmml, xmml, xmmlm, size=1, ext=Signed
+    mmaxi xmmh, xmmh, xmmhm, size=1, ext=Signed
+};
+
+def macroop PMAXSB_XMM_M {
+    ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
+    ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8
+    mmaxi xmml, xmml, ufp1, size=1, ext=Signed
+    mmaxi xmmh, xmmh, ufp2, size=1, ext=Signed
+};
+
+def macroop PMAXSB_XMM_P {
+    rdip t7
+    ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
+    ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8
+    mmaxi xmml, xmml, ufp1, size=1, ext=Signed
+    mmaxi xmmh, xmmh, ufp2, size=1, ext=Signed
+};
+
 def macroop PMAXSW_XMM_XMM {
     mmaxi xmml, xmml, xmmlm, size=2, ext=Signed
     mmaxi xmmh, xmmh, xmmhm, size=2, ext=Signed
@@ -113,4 +253,24 @@ def macroop PMAXSW_XMM_P {
     mmaxi xmml, xmml, ufp1, size=2, ext=Signed
     mmaxi xmmh, xmmh, ufp2, size=2, ext=Signed
 };
+
+def macroop PMAXSD_XMM_XMM {
+    mmaxi xmml, xmml, xmmlm, size=4, ext=Signed
+    mmaxi xmmh, xmmh, xmmhm, size=4, ext=Signed
+};
+
+def macroop PMAXSD_XMM_M {
+    ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
+    ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8
+    mmaxi xmml, xmml, ufp1, size=4, ext=Signed
+    mmaxi xmmh, xmmh, ufp2, size=4, ext=Signed
+};
+
+def macroop PMAXSD_XMM_P {
+    rdip t7
+    ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
+    ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8
+    mmaxi xmml, xmml, ufp1, size=4, ext=Signed
+    mmaxi xmmh, xmmh, ufp2, size=4, ext=Signed
+};
 """
diff --git a/src/arch/x86/isa/insts/simd128/integer/data_reordering/extract_and_insert.py b/src/arch/x86/isa/insts/simd128/integer/data_reordering/extract_and_insert.py
index f955cbaa16..8d14aa296e 100644
--- a/src/arch/x86/isa/insts/simd128/integer/data_reordering/extract_and_insert.py
+++ b/src/arch/x86/isa/insts/simd128/integer/data_reordering/extract_and_insert.py
@@ -34,11 +34,96 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 microcode = """
+def macroop PEXTRB_R_XMM_I {
+    mov2int reg, xmmlm, "IMMEDIATE & mask(4)", size=1, ext=1
+    mov2int reg, xmmhm, "IMMEDIATE & mask(4)", size=1, ext=1
+};
+
+def macroop PEXTRB_M_XMM_I {
+    mov2int t1, xmmlm, "IMMEDIATE & mask(4)", size=1, ext=1
+    mov2int t1, xmmhm, "IMMEDIATE & mask(4)", size=1, ext=1
+    st t1, seg, sib, disp, dataSize=1
+};
+
+def macroop PEXTRB_P_XMM_I {
+    rdip t7
+    mov2int t1, xmmlm, "IMMEDIATE & mask(4)", size=1, ext=1
+    mov2int t1, xmmhm, "IMMEDIATE & mask(4)", size=1, ext=1
+    st t1, seg, riprel, disp, dataSize=1
+};
+
 def macroop PEXTRW_R_XMM_I {
     mov2int reg, xmmlm, "IMMEDIATE & mask(3)", size=2, ext=1
     mov2int reg, xmmhm, "IMMEDIATE & mask(3)", size=2, ext=1
 };
 
+def macroop PEXTRW_M_XMM_I {
+    mov2int t1, xmmlm, "IMMEDIATE & mask(3)", size=2, ext=1
+    mov2int t1, xmmhm, "IMMEDIATE & mask(3)", size=2, ext=1
+    st t1, seg, sib, disp, dataSize=2
+};
+
+def macroop PEXTRW_P_XMM_I {
+    rdip t7
+    mov2int t1, xmmlm, "IMMEDIATE & mask(2)", size=2, ext=1
+    mov2int t1, xmmhm, "IMMEDIATE & mask(2)", size=2, ext=1
+    st t1, seg, riprel, disp, dataSize=2
+};
+
+def macroop PEXTRD_R_XMM_I {
+    mov2int reg, xmmlm, "IMMEDIATE & mask(2)", size=4, ext=1
+    mov2int reg, xmmhm, "IMMEDIATE & mask(2)", size=4, ext=1
+};
+
+def macroop PEXTRD_M_XMM_I {
+    mov2int t1, xmmlm, "IMMEDIATE & mask(2)", size=4, ext=1
+    mov2int t1, xmmhm, "IMMEDIATE & mask(2)", size=4, ext=1
+    st t1, seg, sib, disp, dataSize=4
+};
+
+def macroop PEXTRD_P_XMM_I {
+    rdip t7
+    mov2int t1, xmmlm, "IMMEDIATE & mask(2)", size=4, ext=1
+    mov2int t1, xmmhm, "IMMEDIATE & mask(2)", size=4, ext=1
+    st t1, seg, riprel, disp, dataSize=4
+};
+
+def macroop PEXTRQ_R_XMM_I {
+    mov2int reg, xmmlm, "IMMEDIATE & mask(1)", size=8, ext=1
+    mov2int reg, xmmhm, "IMMEDIATE & mask(1)", size=8, ext=1
+};
+
+def macroop PEXTRQ_M_XMM_I {
+    mov2int t1, xmmlm, "IMMEDIATE & mask(1)", size=8, ext=1
+    mov2int t1, xmmhm, "IMMEDIATE & mask(1)", size=8, ext=1
+    st t1, seg, sib, disp, dataSize=8
+};
+
+def macroop PEXTRQ_P_XMM_I {
+    rdip t7
+    mov2int t1, xmmlm, "IMMEDIATE & mask(1)", size=8, ext=1
+    mov2int t1, xmmhm, "IMMEDIATE & mask(1)", size=8, ext=1
+    st t1, seg, riprel, disp, dataSize=8
+};
+
+def macroop PINSRB_XMM_R_I {
+    mov2fp xmml, regm, "IMMEDIATE & mask(4)", size=1, ext=1
+    mov2fp xmmh, regm, "IMMEDIATE & mask(4)", size=1, ext=1
+};
+
+def macroop PINSRB_XMM_M_I {
+    ld t1, seg, sib, disp, dataSize=1
+    mov2fp xmml, t1, "IMMEDIATE & mask(4)", size=1, ext=1
+    mov2fp xmmh, t1, "IMMEDIATE & mask(4)", size=1, ext=1
+};
+
+def macroop PINSRB_XMM_P_I {
+    rdip t7
+    ld t1, seg, riprel, disp, dataSize=1
+    mov2fp xmml, t1, "IMMEDIATE & mask(4)", size=1, ext=1
+    mov2fp xmmh, t1, "IMMEDIATE & mask(4)", size=1, ext=1
+};
+
 def macroop PINSRW_XMM_R_I {
     mov2fp xmml, regm, "IMMEDIATE & mask(3)", size=2, ext=1
     mov2fp xmmh, regm, "IMMEDIATE & mask(3)", size=2, ext=1
@@ -56,4 +141,40 @@ def macroop PINSRW_XMM_P_I {
     mov2fp xmml, t1, "IMMEDIATE & mask(3)", size=2, ext=1
     mov2fp xmmh, t1, "IMMEDIATE & mask(3)", size=2, ext=1
 };
+
+def macroop PINSRD_XMM_R_I {
+    mov2fp xmml, regm, "IMMEDIATE & mask(2)", size=4, ext=1
+    mov2fp xmmh, regm, "IMMEDIATE & mask(2)", size=4, ext=1
+};
+
+def macroop PINSRD_XMM_M_I {
+    ld t1, seg, sib, disp, dataSize=4
+    mov2fp xmml, t1, "IMMEDIATE & mask(2)", size=4, ext=1
+    mov2fp xmmh, t1, "IMMEDIATE & mask(2)", size=4, ext=1
+};
+
+def macroop PINSRD_XMM_P_I {
+    rdip t7
+    ld t1, seg, riprel, disp, dataSize=4
+    mov2fp xmml, t1, "IMMEDIATE & mask(2)", size=4, ext=1
+    mov2fp xmmh, t1, "IMMEDIATE & mask(2)", size=4, ext=1
+};
+
+def macroop PINSRQ_XMM_R_I {
+    mov2fp xmml, regm, "IMMEDIATE & mask(1)", size=8, ext=1
+    mov2fp xmmh, regm, "IMMEDIATE & mask(1)", size=8, ext=1
+};
+
+def macroop PINSRQ_XMM_M_I {
+    ld t1, seg, sib, disp, dataSize=8
+    mov2fp xmml, t1, "IMMEDIATE & mask(1)", size=8, ext=1
+    mov2fp xmmh, t1, "IMMEDIATE & mask(1)", size=8, ext=1
+};
+
+def macroop PINSRQ_XMM_P_I {
+    rdip t7
+    ld t1, seg, riprel, disp, dataSize=8
+    mov2fp xmml, t1, "IMMEDIATE & mask(1)", size=8, ext=1
+    mov2fp xmmh, t1, "IMMEDIATE & mask(1)", size=8, ext=1
+};
 """
diff --git a/src/arch/x86/isa/insts/simd128/integer/data_reordering/pack_with_saturation.py b/src/arch/x86/isa/insts/simd128/integer/data_reordering/pack_with_saturation.py
index 2307ecfcef..7457e1fa85 100644
--- a/src/arch/x86/isa/insts/simd128/integer/data_reordering/pack_with_saturation.py
+++ b/src/arch/x86/isa/insts/simd128/integer/data_reordering/pack_with_saturation.py
@@ -76,6 +76,27 @@ def macroop PACKSSWB_XMM_P {
     pack xmmh, ufp1, ufp2, ext=Signed, srcSize=2, destSize=1
 };
 
+def macroop PACKUSDW_XMM_XMM {
+    pack ufp1, xmml, xmmh, ext=0, srcSize=4, destSize=2
+    pack xmmh, xmmlm, xmmhm, ext=0, srcSize=4, destSize=2
+    movfp xmml, ufp1, dataSize=8
+};
+
+def macroop PACKUSDW_XMM_M {
+    ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
+    ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8
+    pack xmml, xmml, xmmh, ext=0, srcSize=4, destSize=2
+    pack xmmh, ufp1, ufp2, ext=0, srcSize=4, destSize=2
+};
+
+def macroop PACKUSDW_XMM_P {
+    rdip t7
+    ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
+    ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8
+    pack xmml, xmml, xmmh, ext=0, srcSize=4, destSize=2
+    pack xmmh, ufp1, ufp2, ext=0, srcSize=4, destSize=2
+};
+
 def macroop PACKUSWB_XMM_XMM {
     pack ufp1, xmml, xmmh, ext=0, srcSize=2, destSize=1
     pack xmmh, xmmlm, xmmhm, ext=0, srcSize=2, destSize=1
@@ -85,8 +106,8 @@ def macroop PACKUSWB_XMM_XMM {
 def macroop PACKUSWB_XMM_M {
     ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
     ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8
-    pack xmml, xmml, xmmh, ext=Signed, srcSize=2, destSize=1
-    pack xmmh, ufp1, ufp2, ext=Signed, srcSize=2, destSize=1
+    pack xmml, xmml, xmmh, ext=0, srcSize=2, destSize=1
+    pack xmmh, ufp1, ufp2, ext=0, srcSize=2, destSize=1
 };
 
 def macroop PACKUSWB_XMM_P {
diff --git a/src/arch/x86/isa/insts/simd128/integer/data_reordering/shuffle.py b/src/arch/x86/isa/insts/simd128/integer/data_reordering/shuffle.py
index 0fe09b6d37..946d59f6b5 100644
--- a/src/arch/x86/isa/insts/simd128/integer/data_reordering/shuffle.py
+++ b/src/arch/x86/isa/insts/simd128/integer/data_reordering/shuffle.py
@@ -112,4 +112,124 @@ def macroop PSHUFB_XMM_P {
     movfp xmml, ufp1, dataSize=8
     movfp xmmh, ufp2, dataSize=8
 };
+
+def macroop PBLENDW_XMM_XMM_I {
+    blend xmml, xmmlm, "IMMEDIATE & mask(8)", size=2, ext=0
+    blend xmmh, xmmhm, "IMMEDIATE & mask(8)", size=2, ext=1
+};
+
+def macroop PBLENDW_XMM_M_I {
+    ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
+    ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8
+    blend xmml, ufp1, "IMMEDIATE & mask(8)", size=2, ext=0
+    blend xmmh, ufp2, "IMMEDIATE & mask(8)", size=2, ext=1
+};
+
+def macroop PBLENDW_XMM_P_I {
+    rdip t7
+    ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
+    ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8
+    blend xmml, ufp1, "IMMEDIATE & mask(8)", size=2, ext=0
+    blend xmmh, ufp2, "IMMEDIATE & mask(8)", size=2, ext=1
+};
+
+def macroop BLENDPS_XMM_XMM_I {
+    blend xmml, xmmlm, "IMMEDIATE & mask(4)", size=4, ext=0
+    blend xmmh, xmmhm, "IMMEDIATE & mask(4)", size=4, ext=1
+};
+
+def macroop BLENDPS_XMM_M_I {
+    ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
+    ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8
+    blend xmml, ufp1, "IMMEDIATE & mask(4)", size=4, ext=0
+    blend xmmh, ufp2, "IMMEDIATE & mask(4)", size=4, ext=1
+};
+
+def macroop BLENDPS_XMM_P_I {
+    rdip t7
+    ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
+    ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8
+    blend xmml, ufp1, "IMMEDIATE & mask(4)", size=4, ext=0
+    blend xmmh, ufp2, "IMMEDIATE & mask(4)", size=4, ext=1
+};
+
+def macroop BLENDPD_XMM_XMM_I {
+    blend xmml, xmmlm, "IMMEDIATE & mask(2)", size=8, ext=0
+    blend xmmh, xmmhm, "IMMEDIATE & mask(2)", size=8, ext=1
+};
+
+def macroop BLENDPD_XMM_M_I {
+    ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
+    ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8
+    blend xmml, ufp1, "IMMEDIATE & mask(2)", size=8, ext=0
+    blend xmmh, ufp2, "IMMEDIATE & mask(2)", size=8, ext=1
+};
+
+def macroop BLENDPD_XMM_P_I {
+    rdip t7
+    ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
+    ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8
+    blend xmml, ufp1, "IMMEDIATE & mask(2)", size=8, ext=0
+    blend xmmh, ufp2, "IMMEDIATE & mask(2)", size=8, ext=1
+};
+
+def macroop BLENDVPD_XMM_XMM {
+    blendxmm xmml, xmmlm, fpRegIdx("float_reg::xmmLow(0)"), size=8
+    blendxmm xmmh, xmmhm, fpRegIdx("float_reg::xmmHigh(0)"), size=8
+};
+
+def macroop BLENDVPD_XMM_M {
+    ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
+    ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8
+    blendxmm xmml, ufp1, fpRegIdx("float_reg::xmmLow(0)"), size=8
+    blendxmm xmmh, ufp2, fpRegIdx("float_reg::xmmHigh(0)"), size=8
+};
+
+def macroop BLENDVPD_XMM_P {
+    rdip t7
+    ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
+    ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8
+    blendxmm xmml, ufp1, fpRegIdx("float_reg::xmmLow(0)"), size=8
+    blendxmm xmmh, ufp2, fpRegIdx("float_reg::xmmHigh(0)"), size=8
+};
+
+def macroop BLENDVPS_XMM_XMM {
+    blendxmm xmml, xmmlm, fpRegIdx("float_reg::xmmLow(0)"), size=4
+    blendxmm xmmh, xmmhm, fpRegIdx("float_reg::xmmHigh(0)"), size=4
+};
+
+def macroop BLENDVPS_XMM_M {
+    ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
+    ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8
+    blendxmm xmml, ufp1, fpRegIdx("float_reg::xmmLow(0)"), size=4
+    blendxmm xmmh, ufp2, fpRegIdx("float_reg::xmmHigh(0)"), size=4
+};
+
+def macroop BLENDVPS_XMM_P {
+    rdip t7
+    ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
+    ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8
+    blendxmm xmml, ufp1, fpRegIdx("float_reg::xmmLow(0)"), size=4
+    blendxmm xmmh, ufp2, fpRegIdx("float_reg::xmmHigh(0)"), size=4
+};
+
+def macroop PBLENDVB_XMM_XMM {
+    blendxmm xmml, xmmlm, fpRegIdx("float_reg::xmmLow(0)"), size=1
+    blendxmm xmmh, xmmhm, fpRegIdx("float_reg::xmmHigh(0)"), size=1
+};
+
+def macroop PBLENDVB_XMM_M {
+    ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
+    ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8
+    blendxmm xmml, ufp1, fpRegIdx("float_reg::xmmLow(0)"), size=1
+    blendxmm xmmh, ufp2, fpRegIdx("float_reg::xmmHigh(0)"), size=1
+};
+
+def macroop PBLENDVB_XMM_P {
+    rdip t7
+    ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
+    ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8
+    blendxmm xmml, ufp1, fpRegIdx("float_reg::xmmLow(0)"), size=1
+    blendxmm xmmh, ufp2, fpRegIdx("float_reg::xmmHigh(0)"), size=1
+};
 """
diff --git a/src/arch/x86/isa/insts/simd128/integer/data_transfer/__init__.py b/src/arch/x86/isa/insts/simd128/integer/data_transfer/__init__.py
index a539b156fc..4117b59325 100644
--- a/src/arch/x86/isa/insts/simd128/integer/data_transfer/__init__.py
+++ b/src/arch/x86/isa/insts/simd128/integer/data_transfer/__init__.py
@@ -33,7 +33,7 @@
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-categories = ["move", "move_non_temporal", "move_mask"]
+categories = ["move", "move_non_temporal", "move_mask", "move_with_shift"]
 
 microcode = """
 # 128 bit multimedia and scientific data transfer instructions
diff --git a/src/arch/x86/isa/insts/simd128/integer/data_transfer/move.py b/src/arch/x86/isa/insts/simd128/integer/data_transfer/move.py
index 576b5dc81c..fb2055ec97 100644
--- a/src/arch/x86/isa/insts/simd128/integer/data_transfer/move.py
+++ b/src/arch/x86/isa/insts/simd128/integer/data_transfer/move.py
@@ -132,4 +132,233 @@ def macroop LDDQU_XMM_P {
     ldfp xmml, seg, sib, "DISPLACEMENT", dataSize=8
     ldfp xmmh, seg, sib, "DISPLACEMENT + 8", dataSize=8
 };
+
+def macroop PMOVSXDQ_XMM_XMM {
+    movfp ufp1, xmmlm, dataSize=8
+    extmove xmml, ufp1, destSize=8, srcSize=4, ext = Signed
+    extmove xmmh, ufp1, destSize=8, srcSize=4, ext = Signed + "|" + PartHi
+};
+
+def macroop PMOVSXDQ_XMM_M {
+    ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
+    extmove xmml, ufp1, destSize=8, srcSize=4, ext = Signed
+    extmove xmmh, ufp1, destSize=8, srcSize=4, ext = Signed + "|" + PartHi
+};
+
+def macroop PMOVSXDQ_XMM_P {
+    rdip t7
+    ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
+    extmove xmml, ufp1, destSize=8, srcSize=4, ext = Signed
+    extmove xmmh, ufp1, destSize=8, srcSize=4, ext = Signed + "|" + PartHi
+};
+
+def macroop PMOVSXWQ_XMM_XMM {
+    movfp ufp1, xmmlm, dataSize=8
+    extmove xmml, ufp1, destSize=8, srcSize=2, ext = Signed
+    extmove xmmh, ufp1, destSize=8, srcSize=2, ext = Signed + "|" + PartHi
+};
+
+def macroop PMOVSXWQ_XMM_M {
+    ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=4
+    extmove xmml, ufp1, destSize=8, srcSize=2, ext = Signed
+    extmove xmmh, ufp1, destSize=8, srcSize=2, ext = Signed + "|" + PartHi
+};
+
+def macroop PMOVSXWQ_XMM_P {
+    rdip t7
+    ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=4
+    extmove xmml, ufp1, destSize=8, srcSize=2, ext = Signed
+    extmove xmmh, ufp1, destSize=8, srcSize=2, ext = Signed + "|" + PartHi
+};
+
+def macroop PMOVSXWD_XMM_XMM {
+    movfp ufp1, xmmlm, dataSize=8
+    extmove xmml, ufp1, destSize=4, srcSize=2, ext = Signed
+    extmove xmmh, ufp1, destSize=4, srcSize=2, ext = Signed + "|" + PartHi
+};
+
+def macroop PMOVSXWD_XMM_M {
+    ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
+    extmove xmml, ufp1, destSize=4, srcSize=2, ext = Signed
+    extmove xmmh, ufp1, destSize=4, srcSize=2, ext = Signed + "|" + PartHi
+};
+
+def macroop PMOVSXWD_XMM_P {
+    rdip t7
+    ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
+    extmove xmml, ufp1, destSize=4, srcSize=2, ext = Signed
+    extmove xmmh, ufp1, destSize=4, srcSize=2, ext = Signed + "|" + PartHi
+};
+
+def macroop PMOVSXBQ_XMM_XMM {
+    movfp ufp1, xmmlm, dataSize=8
+    extmove xmml, ufp1, destSize=8, srcSize=1, ext = Signed
+    extmove xmmh, ufp1, destSize=8, srcSize=1, ext = Signed + "|" + PartHi
+};
+
+def macroop PMOVSXBQ_XMM_M {
+    ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=2
+    extmove xmml, ufp1, destSize=8, srcSize=1, ext = Signed
+    extmove xmmh, ufp1, destSize=8, srcSize=1, ext = Signed + "|" + PartHi
+};
+
+def macroop PMOVSXBQ_XMM_P {
+    rdip t7
+    ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=2
+    extmove xmml, ufp1, destSize=8, srcSize=1, ext = Signed
+    extmove xmmh, ufp1, destSize=8, srcSize=1, ext = Signed + "|" + PartHi
+};
+
+def macroop PMOVSXBD_XMM_XMM {
+    movfp ufp1, xmmlm, dataSize=8
+    extmove xmml, ufp1, destSize=4, srcSize=1, ext = Signed
+    extmove xmmh, ufp1, destSize=4, srcSize=1, ext = Signed + "|" + PartHi
+};
+
+def macroop PMOVSXBD_XMM_M {
+    ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=4
+    extmove xmml, ufp1, destSize=4, srcSize=1, ext = Signed
+    extmove xmmh, ufp1, destSize=4, srcSize=1, ext = Signed + "|" + PartHi
+};
+
+def macroop PMOVSXBD_XMM_P {
+    rdip t7
+    ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=4
+    extmove xmml, ufp1, destSize=4, srcSize=1, ext = Signed
+    extmove xmmh, ufp1, destSize=4, srcSize=1, ext = Signed + "|" + PartHi
+};
+
+def macroop PMOVSXBW_XMM_XMM {
+    movfp ufp1, xmmlm, dataSize=8
+    extmove xmml, ufp1, destSize=2, srcSize=1, ext = Signed
+    extmove xmmh, ufp1, destSize=2, srcSize=1, ext = Signed + "|" + PartHi
+};
+
+def macroop PMOVSXBW_XMM_M {
+    ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
+    extmove xmml, ufp1, destSize=2, srcSize=1, ext = Signed
+    extmove xmmh, ufp1, destSize=2, srcSize=1, ext = Signed + "|" + PartHi
+};
+
+def macroop PMOVSXBW_XMM_P {
+    rdip t7
+    ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
+    extmove xmml, ufp1, destSize=2, srcSize=1, ext = Signed
+    extmove xmmh, ufp1, destSize=2, srcSize=1, ext = Signed + "|" + PartHi
+};
+
+
+def macroop PMOVZXDQ_XMM_XMM {
+    movfp ufp1, xmmlm, dataSize=8
+    extmove xmml, ufp1, destSize=8, srcSize=4
+    extmove xmmh, ufp1, destSize=8, srcSize=4, ext=PartHi
+};
+
+def macroop PMOVZXDQ_XMM_M {
+    ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
+    extmove xmml, ufp1, destSize=8, srcSize=4
+    extmove xmmh, ufp1, destSize=8, srcSize=4, ext=PartHi
+};
+
+def macroop PMOVZXDQ_XMM_P {
+    rdip t7
+    ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
+    extmove xmml, ufp1, destSize=8, srcSize=4
+    extmove xmmh, ufp1, destSize=8, srcSize=4, ext=PartHi
+};
+
+def macroop PMOVZXWQ_XMM_XMM {
+    movfp ufp1, xmmlm, dataSize=8
+    extmove xmml, ufp1, destSize=8, srcSize=2
+    extmove xmmh, ufp1, destSize=8, srcSize=2, ext=PartHi
+};
+
+def macroop PMOVZXWQ_XMM_M {
+    ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=4
+    extmove xmml, ufp1, destSize=8, srcSize=2
+    extmove xmmh, ufp1, destSize=8, srcSize=2, ext=PartHi
+};
+
+def macroop PMOVZXWQ_XMM_P {
+    rdip t7
+    ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=4
+    extmove xmml, ufp1, destSize=8, srcSize=2
+    extmove xmmh, ufp1, destSize=8, srcSize=2, ext=PartHi
+};
+
+def macroop PMOVZXWD_XMM_XMM {
+    movfp ufp1, xmmlm, dataSize=8
+    extmove xmml, ufp1, destSize=4, srcSize=2
+    extmove xmmh, ufp1, destSize=4, srcSize=2, ext=PartHi
+};
+
+def macroop PMOVZXWD_XMM_M {
+    ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
+    extmove xmml, ufp1, destSize=4, srcSize=2
+    extmove xmmh, ufp1, destSize=4, srcSize=2, ext=PartHi
+};
+
+def macroop PMOVZXWD_XMM_P {
+    rdip t7
+    ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
+    extmove xmml, ufp1, destSize=4, srcSize=2
+    extmove xmmh, ufp1, destSize=4, srcSize=2, ext=PartHi
+};
+
+def macroop PMOVZXBQ_XMM_XMM {
+    movfp ufp1, xmmlm, dataSize=8
+    extmove xmml, ufp1, destSize=8, srcSize=1
+    extmove xmmh, ufp1, destSize=8, srcSize=1, ext=PartHi
+};
+
+def macroop PMOVZXBQ_XMM_M {
+    ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=2
+    extmove xmml, ufp1, destSize=8, srcSize=1
+    extmove xmmh, ufp1, destSize=8, srcSize=1, ext=PartHi
+};
+
+def macroop PMOVZXBQ_XMM_P {
+    rdip t7
+    ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=2
+    extmove xmml, ufp1, destSize=8, srcSize=1
+    extmove xmmh, ufp1, destSize=8, srcSize=1, ext=PartHi
+};
+
+def macroop PMOVZXBD_XMM_XMM {
+    movfp ufp1, xmmlm, dataSize=8
+    extmove xmml, ufp1, destSize=4, srcSize=1
+    extmove xmmh, ufp1, destSize=4, srcSize=1, ext=PartHi
+};
+
+def macroop PMOVZXBD_XMM_M {
+    ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=4
+    extmove xmml, ufp1, destSize=4, srcSize=1
+    extmove xmmh, ufp1, destSize=4, srcSize=1, ext=PartHi
+};
+
+def macroop PMOVZXBD_XMM_P {
+    rdip t7
+    ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=4
+    extmove xmml, ufp1, destSize=4, srcSize=1
+    extmove xmmh, ufp1, destSize=4, srcSize=1, ext=PartHi
+};
+
+def macroop PMOVZXBW_XMM_XMM {
+    movfp ufp1, xmmlm, dataSize=8
+    extmove xmml, ufp1, destSize=2, srcSize=1
+    extmove xmmh, ufp1, destSize=2, srcSize=1, ext=PartHi
+};
+
+def macroop PMOVZXBW_XMM_M {
+    ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
+    extmove xmml, ufp1, destSize=2, srcSize=1
+    extmove xmmh, ufp1, destSize=2, srcSize=1, ext=PartHi
+};
+
+def macroop PMOVZXBW_XMM_P {
+    rdip t7
+    ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
+    extmove xmml, ufp1, destSize=2, srcSize=1
+    extmove xmmh, ufp1, destSize=2, srcSize=1, ext=PartHi
+};
 """
diff --git a/src/arch/x86/isa/insts/simd128/integer/data_transfer/move_with_shift.py b/src/arch/x86/isa/insts/simd128/integer/data_transfer/move_with_shift.py
new file mode 100644
index 0000000000..155e46b2ea
--- /dev/null
+++ b/src/arch/x86/isa/insts/simd128/integer/data_transfer/move_with_shift.py
@@ -0,0 +1,59 @@
+# Copyright (c) 2007 The Hewlett-Packard Development Company
+# All rights reserved.
+#
+# The license below extends only to copyright in the software and shall
+# not be construed as granting a license to any other intellectual
+# property including but not limited to intellectual property relating
+# to a hardware implementation of the functionality of the software
+# licensed hereunder.  You may use the software subject to the license
+# terms below provided that you ensure that this notice is replicated
+# unmodified and in its entirety in all distributions of the software,
+# modified or unmodified, in source code or in binary form.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.1
+
+microcode = """
+def macroop PALIGNR_XMM_XMM_I {
+    movfp ufp1, xmml, dataSize=8
+    palignr xmml, xmmh, xmmlm, xmmhm, "IMMEDIATE", size=8
+    palignr xmmh, ufp1, xmmlm, xmmhm, "IMMEDIATE", size=8, ext=PartHi
+};
+
+def macroop PALIGNR_XMM_M_I {
+    ldfp ufp2, seg, sib, "DISPLACEMENT", dataSize=8
+    ldfp ufp3, seg, sib, "DISPLACEMENT + 8", dataSize=8
+    movfp ufp1, xmml, dataSize=8
+    palignr xmml, xmmh, ufp2, ufp3, "IMMEDIATE", size=8
+    palignr xmmh, ufp1, ufp2, ufp3, "IMMEDIATE", size=8, ext=PartHi
+};
+
+def macroop PALIGNR_XMM_P_I {
+    rdip t7
+    ldfp ufp2, seg, riprel, "DISPLACEMENT", dataSize=8
+    ldfp ufp3, seg, riprel, "DISPLACEMENT + 8", dataSize=8
+    movfp ufp1, xmml, dataSize=8
+    palignr xmml, xmmh, ufp2, ufp3, "IMMEDIATE", size=8
+    palignr xmmh, ufp1, ufp2, ufp3, "IMMEDIATE", size=8, ext=PartHi
+};
+"""
diff --git a/src/arch/x86/isa/microasm.isa b/src/arch/x86/isa/microasm.isa
index 632cb07c92..a5ff1d179f 100644
--- a/src/arch/x86/isa/microasm.isa
+++ b/src/arch/x86/isa/microasm.isa
@@ -187,7 +187,7 @@ let {{
             assembler.symbols[reg] = \
                 ctrlRegIdx(f"misc_reg::{reg.capitalize()}")
 
-    for flag in ('Scalar', 'MultHi', 'Signed'):
+    for flag in ('Scalar', 'MultHi', 'Signed', 'PartHi'):
         assembler.symbols[flag] = 'Media%sOp' % flag
 
     # Code literal which forces a default 64 bit operand size in 64 bit mode.
diff --git a/src/arch/x86/isa/microops/base.isa b/src/arch/x86/isa/microops/base.isa
index aded50b472..a4fee547b6 100644
--- a/src/arch/x86/isa/microops/base.isa
+++ b/src/arch/x86/isa/microops/base.isa
@@ -105,6 +105,8 @@ let {{
         idx_name = 'Src1'
     class Src2Op(object):
         idx_name = 'Src2'
+    class Src3Op(object):
+        idx_name = 'Src3'
 
     class RegisterOp(object):
         def __init__(self, it):
@@ -189,6 +191,9 @@ let {{
     class IntSrc2Op(IntOp, Src2Op, Operand):
         pass
 
+    class FloatSrc3Op(FloatOp, Src3Op, Operand):
+        pass
+
     class Op2(object):
         @classmethod
         def isDual(cls):
@@ -198,6 +203,13 @@ let {{
         FloatType = FloatSrc2Op
         ImmType = Imm8Op
 
+    class Op3(object):
+        @classmethod
+        def isDual(cls):
+            return False
+
+        FloatType = FloatSrc3Op
+
     class X86Microop(object):
 
         generatorNameTemplate = "generate_%s_%d"
diff --git a/src/arch/x86/isa/microops/mediaop.isa b/src/arch/x86/isa/microops/mediaop.isa
index 5fcf1d378b..599b5faef5 100644
--- a/src/arch/x86/isa/microops/mediaop.isa
+++ b/src/arch/x86/isa/microops/mediaop.isa
@@ -1554,4 +1554,341 @@ let {{
             super().__init__(size=2)
         op_class = 'FloatMiscOp'
         code = 'FTW = 0xFFFF;'
+
+    class Blend(Media3Op):
+        def __init__(self, dest, src1, src2=0, **kwargs):
+            super().__init__(dest, src1, src2, **kwargs)
+        operand_types = (FloatDestOp, FloatSrc1Op, Imm8Op)
+        op_class = 'SimdMiscOp'
+        code = '''
+            assert(srcSize == destSize);
+            int size = srcSize;
+            int sizeBits = size * 8;
+            int items = sizeof(double) / size;
+            int offset = ext ? items : 0;
+
+            for (int i = 0; i < items; i++) {
+                int hiIndex = (i + 1) * sizeBits - 1;
+                int loIndex = (i + 0) * sizeBits;
+
+                if (bits(imm8, i + offset)) {
+                    uint64_t resBits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
+                    FpDestReg_uqw =
+                        insertBits(FpDestReg_uqw, hiIndex, loIndex, resBits);
+                } else {
+                    // do nothing
+                }
+            }
+        '''
+
+    class Extmove(Media2Op):
+        op_class = 'SimdMiscOp'
+        code = '''
+            int items = sizeof(double) / destSize;
+            int offset = partHi() ? items : 0;
+            int srcBits = srcSize * 8;
+            int destBits = destSize * 8;
+
+            for (int i = 0; i < items; i++) {
+                int hiIndexSrc = (i + offset + 1) * srcBits - 1;
+                int loIndexSrc = (i + offset + 0) * srcBits;
+                uint64_t resBits = bits(FpSrcReg1_uqw, hiIndexSrc, loIndexSrc);
+                if (signedOp()) {
+                    resBits = sext(resBits, srcBits);
+                } else {
+                    // do nothing, already zero-extended
+                }
+
+                int hiIndexDest = (i + 1) * destBits - 1;
+                int loIndexDest = (i + 0) * destBits;
+                FpDestReg_uqw =
+                    insertBits(FpDestReg_uqw, hiIndexDest,
+                               loIndexDest, resBits);
+            }
+        '''
+
+    class Blendxmm(Media3Op):
+        op_class = 'SimdMiscOp'
+        operand_types = (FloatDestOp, FloatSrc1Op, FloatSrc2Op)
+        code = '''
+            int size = srcSize;
+            int sizeBits = size * 8;
+            int items = sizeof(double) / size;
+
+            for (int i = 0; i < items; i++) {
+
+                int maskBit = bits(FpSrcReg2_uqw, (i + 1) * sizeBits - 1);
+
+                if (maskBit) {
+                    int hiIndex = (i + 1) * sizeBits - 1;
+                    int loIndex = (i + 0) * sizeBits;
+                    uint64_t resBits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
+                    FpDestReg_uqw =
+                        insertBits(FpDestReg_uqw, hiIndex, loIndex, resBits);
+                } else {
+                    // do nothing
+                }
+            }
+        '''
+
+
+    class Palignr(MediaOp):
+        operand_types = (FloatDestOp, FloatSrc1Op,
+            FloatSrc2Op, FloatSrc3Op, Imm8Op)
+        def __init__(self, op1, op2, op3, op4, op5, **kwargs):
+            super().__init__(op1, op2, op3, op4, op5, **kwargs)
+        op_class = 'SimdMiscOp'
+        code = '''
+            int zeroRegisterBytes = partHi() ? 24 : 32;
+            if (imm8 >= zeroRegisterBytes) {
+                FpDestReg_uqw = 0;
+            } else if (imm8 == 16) {
+                FpDestReg_uqw = FpDestReg_uqw;
+            } else if (imm8 == 8) {
+                FpDestReg_uqw = partHi() ? FpSrcReg1_uqw : FpSrcReg3_uqw;
+            } else if (imm8 == 0) {
+                FpDestReg_uqw = partHi() ? FpSrcReg3_uqw : FpSrcReg2_uqw;
+            } else {
+                int shift = imm8 * 8;
+                uint64_t firstFpSrcReg;
+                uint64_t secondFpSrcReg;
+                if (imm8 < 8) {
+                    firstFpSrcReg = partHi() ? FpSrcReg3_uqw : FpSrcReg2_uqw;
+                    secondFpSrcReg = partHi() ? FpSrcReg1_uqw : FpSrcReg3_uqw;
+                } else if (imm8 < 16) {
+                    shift -=64;
+                    firstFpSrcReg = partHi() ? FpSrcReg1_uqw : FpSrcReg3_uqw;
+                    secondFpSrcReg = partHi() ? FpDestReg_uqw : FpDestReg_uqw;
+                } else if (imm8 < 24) {
+                    shift = partHi() ? 192 - shift : shift - 128;
+                    firstFpSrcReg = partHi() ? 0 : FpDestReg_uqw;
+                    secondFpSrcReg = partHi() ?
+                        FpDestReg_uqw >> (64 - shift) : FpSrcReg1_uqw;
+                } else { // < 32
+                    shift = 256 - shift;
+                    firstFpSrcReg = 0;
+                    secondFpSrcReg = FpSrcReg1_uqw >> (64 - shift);
+                }
+
+                FpDestReg_uqw = firstFpSrcReg >> shift;
+                int hiIndex = 63;
+                int loIndex = 64 - shift;
+                FpDestReg_uqw = insertBits(FpDestReg_uqw, hiIndex,
+                                           loIndex, secondFpSrcReg);
+            }
+        '''
+
+    class Extractps(Media3Op):
+        op_class = 'SimdMiscOp'
+        operand_types = (IntDestOp, FloatSrc1Op, Imm8Op)
+        code = '''
+            if (imm8 < 2 && !partHi()) {
+                DestReg = (FpSrcReg1_uqw >> (32 * imm8)) & 0xFFFFFFFF;
+            } else if (imm8 >= 2 && partHi()) {
+                int shift = imm8 - 2;
+                DestReg = (FpSrcReg1_uqw >> (32 * shift)) & 0xFFFFFFFF;
+            }
+        '''
+
+    class Phminposuw(Media3Op):
+        op_class = 'SimdMiscOp'
+        code = '''
+            int minIndex = 0;
+            uint64_t min = 0xFFFFF;
+            int destBits = destSize * 8;
+
+            for (int i = 0; i < 128; i += destBits) {
+                uint64_t FpSrcReg = i < 64 ? FpSrcReg1_uqw : FpSrcReg2_uqw;
+                uint16_t value = bits(FpSrcReg, i + destBits - 1, i);
+                if (value < min) {
+                    min = value;
+                    minIndex = i / destBits;
+                }
+            }
+
+            FpDestReg_uqw = min;
+            FpDestReg_uqw = insertBits(FpDestReg_uqw, 64, destBits, minIndex);
+        '''
+
+    class Insertps(MediaOp):
+        operand_types = (FloatDestOp, FloatSrc1Op,
+            FloatSrc2Op, FloatSrc3Op, Imm8Op)
+        def __init__(self, op1, op2, op3, op4, op5, **kwargs):
+            super().__init__(op1, op2, op3, op4, op5, **kwargs)
+        op_class = 'SimdMiscOp'
+        code = '''
+            int countS = bits(imm8, 7, 6);
+            int countD = bits(imm8, 5, 4);
+            int zmask = bits(imm8, 3, 0);
+
+            uint64_t tmp = 0;
+            switch (countS) {
+                case 0: {
+                    tmp = bits(FpSrcReg2_uqw, 31, 0);
+                    break;
+                }
+                case 1: {
+                    tmp = bits(FpSrcReg2_uqw, 63, 32);
+                    break;
+                }
+                case 2: {
+                    tmp = bits(FpSrcReg3_uqw, 31, 0);
+                    break;
+                }
+                case 3: {
+                    tmp = bits(FpSrcReg3_uqw, 63, 32);
+                    break;
+                }
+            }
+
+            uint64_t tmp2l = partHi() ? FpSrcReg1_uqw : FpDestReg_uqw;
+            uint64_t tmp2h = partHi() ? FpDestReg_uqw : FpSrcReg1_uqw;
+
+            switch (countD) {
+                case 0: {
+                    tmp2l = insertBits(tmp2l, 31, 0, tmp);
+                    break;
+                }
+                case 1: {
+                    tmp2l = insertBits(tmp2l, 63, 32, tmp);
+                    break;
+                }
+                case 2: {
+                    tmp2h = insertBits(tmp2h, 31, 0, tmp);
+                    break;
+                }
+                case 3: {
+                    tmp2h = insertBits(tmp2h, 63, 32, tmp);
+                    break;
+                }
+            }
+
+            if (!partHi()) {
+                if (bits(zmask, 0)) {
+                    tmp2l = insertBits(tmp2l, 31, 0, 0);
+                }
+                if (bits(zmask, 1)) {
+                    tmp2l = insertBits(tmp2l, 63, 32, 0);
+                }
+                FpDestReg_uqw = tmp2l;
+            } else {
+                if (bits(zmask, 2)) {
+                    tmp2h = insertBits(tmp2h, 31, 0, 0);
+                }
+                if (bits(zmask, 3)) {
+                    tmp2h = insertBits(tmp2h, 63, 32, 0);
+                }
+                FpDestReg_uqw = tmp2h;
+            }
+
+        '''
+
+    class Rounds(MediaOp):
+        operand_types = (FloatDestOp, FloatSrc1Op, IntSrc2Op, Imm8Op)
+        def __init__(self, op1, op2, op3, op4, **kwargs):
+            super().__init__(op1, op2, op3, op4, **kwargs)
+        op_class = 'SimdMiscOp'
+        code = '''
+            bool isMXCSR = bits(imm8, 2);
+            int roundingMode = 0;
+            if (isMXCSR) {
+                roundingMode = bits(imm8, 14, 13);
+            } else {
+                roundingMode = bits(imm8, 1, 0);
+            }
+
+            union floatInt
+            {
+                float f;
+                uint32_t i;
+            };
+            union doubleInt
+            {
+                double d;
+                uint64_t i;
+            };
+
+            double arg;
+            if (srcSize == 4) {
+                floatInt fi;
+                fi.i = bits(FpSrcReg1_uqw, 31, 0);
+                arg = fi.f;
+            } else {
+                doubleInt di;
+                di.i = bits(FpSrcReg1_uqw, 63, 0);
+                arg = di.d;
+            }
+
+            switch (roundingMode) {
+                case 0: {
+                    // to nearest
+                    arg = std::round(arg);
+                    break;
+                }
+                case 1: {
+                    // down
+                    arg = std::floor(arg);
+                    break;
+                }
+                case 2: {
+                    // up
+                    arg = std::ceil(arg);
+                    break;
+                }
+                case 3: {
+                    // to 0
+                    arg = std::trunc(arg);
+                    break;
+                }
+            }
+
+            int destHiIndex = 0;
+            uint64_t argBits = 0;
+            if (destSize == 4) {
+                floatInt convertBack;
+                convertBack.f = arg;
+                argBits = convertBack.i;
+                destHiIndex = 31;
+            } else {
+                doubleInt convertBack;
+                convertBack.d = arg;
+                argBits  = convertBack.i;
+                destHiIndex = 63;
+            }
+            FpDestReg_uqw = insertBits(FpDestReg_uqw, destHiIndex, 0, argBits);
+        '''
+
+    class Pabs(Media2Op):
+        op_class = 'SimdMiscOp'
+        code = '''
+            int size = srcSize;
+            int sizeBits = size * 8;
+            int items = numItems(size);
+
+            for (int i = 0; i < items; i++) {
+                int hiIndex = (i + 1) * sizeBits - 1;
+                int loIndex = (i + 0) * sizeBits;
+                uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
+                if (bits(arg1Bits, sizeBits - 1)) {
+                    if (size == 1) {
+                        uint8_t tmpBits = (uint8_t)arg1Bits;
+                        tmpBits = ~tmpBits + 1;
+                        arg1Bits = tmpBits;
+                    } else if (size == 2) {
+                        uint16_t tmpBits = (uint16_t)arg1Bits;
+                        tmpBits = ~tmpBits + 1;
+                        arg1Bits = tmpBits;
+                    } else if (size == 4) {
+                        uint32_t tmpBits = (uint32_t)arg1Bits;
+                        tmpBits = ~tmpBits + 1;
+                        arg1Bits = tmpBits;
+                    } else {
+                        assert(false);
+                    }
+                }
+
+                FpDestReg_uqw =
+                    insertBits(FpDestReg_uqw, hiIndex, loIndex, arg1Bits);
+            }
+        '''
 }};
diff --git a/src/arch/x86/isa/operands.isa b/src/arch/x86/isa/operands.isa
index d8bc947085..ef635c50cc 100644
--- a/src/arch/x86/isa/operands.isa
+++ b/src/arch/x86/isa/operands.isa
@@ -165,8 +165,9 @@ def operands {{
         'R9':            IntReg('X86ISA::int_reg::R9', 21),
         'FpSrcReg1':     FloatReg('src1', 22),
         'FpSrcReg2':     FloatReg('src2', 23),
-        'FpDestReg':     FloatReg('dest', 24),
-        'FpData':        FloatReg('data', 25),
+        'FpSrcReg3':     FloatReg('src3', 24),
+        'FpDestReg':     FloatReg('dest', 25),
+        'FpData':        FloatReg('data', 26),
         'RIP':           PCStateOp('uqw', 'pc',
                           (None, None, 'IsControl'), 50),
         'NRIP':          PCStateOp('uqw', 'npc',
diff --git a/src/base/bitfield.hh b/src/base/bitfield.hh
index 288c5cabe9..eecea02981 100644
--- a/src/base/bitfield.hh
+++ b/src/base/bitfield.hh
@@ -131,6 +131,22 @@ sext(uint64_t val)
     return val;
 }
 
+/**
+ * Sign-extend an N-bit value to 64 bits. Assumes all bits past the sign are
+ * currently zero. For true sign extension regardless of the value of the sign
+ * bit, see szext.
+ *
+ * @ingroup api_bitfield
+ */
+constexpr uint64_t
+sext(uint64_t val, int N)
+{
+    bool sign_bit = bits(val, N - 1);
+    if (sign_bit)
+        val |= ~mask(N);
+    return val;
+}
+
 /**
  * Sign-extend an N-bit value to 64 bits. Zero any bits past the sign if
  * necessary.

From fa0795ff5b923e753823abaff1ef46d11f4cc518 Mon Sep 17 00:00:00 2001
From: Roger Chang <rogerycchang@google.com>
Date: Tue, 14 Mar 2023 11:19:51 +0800
Subject: [PATCH 262/492] arch-riscv: Add new misa bit union

The new misa bit union type can help get and set misa CSR more
clearily

Change-Id: Id48b140968a0e8021b09782815aa612b409ac75b
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/68917
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Hoa Nguyen <hoanguyen@ucdavis.edu>
---
 src/arch/riscv/isa.cc       | 43 ++++++++++++++++++++++++-------------
 src/arch/riscv/regs/misc.hh | 31 ++++++++++++++++++++++++++
 2 files changed, 59 insertions(+), 15 deletions(-)

diff --git a/src/arch/riscv/isa.cc b/src/arch/riscv/isa.cc
index 7964de51ec..d744fe369b 100644
--- a/src/arch/riscv/isa.cc
+++ b/src/arch/riscv/isa.cc
@@ -287,21 +287,33 @@ void ISA::clear()
     miscRegFile[MISCREG_VENDORID] = 0;
     miscRegFile[MISCREG_ARCHID] = 0;
     miscRegFile[MISCREG_IMPID] = 0;
+
+    MISA misa = 0;
+    STATUS status = 0;
+
+    // default config arch isa string is rv64(32)imafdc
+    misa.rvi = misa.rvm = misa.rva = misa.rvf = misa.rvd = misa.rvc = 1;
+    // default privlege modes if MSU
+    misa.rvs = misa.rvu = 1;
+
+    // mark FS is initial
+    status.fs = INITIAL;
+
     // rv_type dependent init.
     switch (rv_type) {
         case RV32:
-            miscRegFile[MISCREG_ISA] = (1ULL << MXL_OFFSETS[RV32]) | 0x14112D;
-            miscRegFile[MISCREG_STATUS] = (1ULL << FS_OFFSET);
-            break;
+          misa.rv32_mxl = 1;
+          break;
         case RV64:
-            miscRegFile[MISCREG_ISA] = (2ULL << MXL_OFFSETS[RV64]) | 0x14112D;
-            miscRegFile[MISCREG_STATUS] = (2ULL << UXL_OFFSET) |
-                                          (2ULL << SXL_OFFSET) |
-                                          (1ULL << FS_OFFSET);
-            break;
+          misa.rv64_mxl = 2;
+          status.uxl = status.sxl = 2;
+          break;
         default:
-            panic("%s: Unknown rv_type: %d", name(), (int)rv_type);
+          panic("%s: Unknown rv_type: %d", name(), (int)rv_type);
     }
+
+    miscRegFile[MISCREG_ISA] = misa;
+    miscRegFile[MISCREG_STATUS] = status;
     miscRegFile[MISCREG_MCOUNTEREN] = 0x7;
     miscRegFile[MISCREG_SCOUNTEREN] = 0x7;
     // don't set it to zero; software may try to determine the supported
@@ -425,10 +437,10 @@ ISA::readMiscReg(RegIndex idx)
       case MISCREG_SEPC:
       case MISCREG_MEPC:
         {
-            auto misa = readMiscRegNoEffect(MISCREG_ISA);
+            MISA misa = readMiscRegNoEffect(MISCREG_ISA);
             auto val = readMiscRegNoEffect(idx);
             // if compressed instructions are disabled, epc[1] is set to 0
-            if ((misa & ISA_EXT_C_MASK) == 0)
+            if (misa.rvc == 0)
                 return mbits(val, 63, 2);
             // epc[0] is always 0
             else
@@ -617,15 +629,16 @@ ISA::setMiscReg(RegIndex idx, RegVal val)
             break;
           case MISCREG_ISA:
             {
-                auto cur_val = readMiscRegNoEffect(idx);
+                MISA cur_misa = (MISA)readMiscRegNoEffect(MISCREG_ISA);
+                MISA new_misa = (MISA)val;
                 // only allow to disable compressed instructions
                 // if the following instruction is 4-byte aligned
-                if ((val & ISA_EXT_C_MASK) == 0 &&
+                if (new_misa.rvc == 0 &&
                         bits(tc->pcState().as<RiscvISA::PCState>().npc(),
                             2, 0) != 0) {
-                    val |= cur_val & ISA_EXT_C_MASK;
+                    new_misa.rvc = new_misa.rvc | cur_misa.rvc;
                 }
-                setMiscRegNoEffect(idx, val);
+                setMiscRegNoEffect(idx, new_misa);
             }
             break;
           case MISCREG_STATUS:
diff --git a/src/arch/riscv/regs/misc.hh b/src/arch/riscv/regs/misc.hh
index 8cb4ca0f91..5ea3536141 100644
--- a/src/arch/riscv/regs/misc.hh
+++ b/src/arch/riscv/regs/misc.hh
@@ -752,6 +752,37 @@ BitUnion64(STATUS)
     Bitfield<0> uie;
 EndBitUnion(STATUS)
 
+/**
+ * These fields are specified in the RISC-V Instruction Set Manual, Volume II,
+ * v1.10, v1.11 and v1.12 in Figure 3.1, accessible at www.riscv.org. The register
+ * is used to control instruction extensions.
+ */
+BitUnion64(MISA)
+    Bitfield<63, 62> rv64_mxl;
+    Bitfield<31, 30> rv32_mxl;
+    Bitfield<23> rvx;
+    Bitfield<21> rvv;
+    Bitfield<20> rvu;
+    Bitfield<19> rvt;
+    Bitfield<18> rvs;
+    Bitfield<16> rvq;
+    Bitfield<15> rvp;
+    Bitfield<13> rvn;
+    Bitfield<12> rvm;
+    Bitfield<11> rvl;
+    Bitfield<10> rvk;
+    Bitfield<9> rvj;
+    Bitfield<8> rvi;
+    Bitfield<7> rvh;
+    Bitfield<6> rvg;
+    Bitfield<5> rvf;
+    Bitfield<4> rve;
+    Bitfield<3> rvd;
+    Bitfield<2> rvc;
+    Bitfield<1> rvb;
+    Bitfield<0> rva;
+EndBitUnion(MISA)
+
 /**
  * These fields are specified in the RISC-V Instruction Set Manual, Volume II,
  * v1.10 in Figures 3.11 and 3.12, accessible at www.riscv.org. Both the MIP

From b305019ac4420063dee4aaf15162fa779b81dea6 Mon Sep 17 00:00:00 2001
From: "Bobby R. Bruce" <bbruce@ucdavis.edu>
Date: Thu, 9 Mar 2023 10:13:21 -0800
Subject: [PATCH 263/492] python: Replace 'getargspec' with 'signature' in
 SimObject.py

In Python 3.11 'inspect.getargspec' has been removed. It has been
marked for deprecation since 3.5. The SimObject.py class has therefore
been rewritten to use 'inspect.signature' instead.

Change-Id: I9efd831e05e0b1619f366ffe722abb0a072fd519
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/68817
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/python/m5/SimObject.py | 28 +++++++++++++++-------------
 1 file changed, 15 insertions(+), 13 deletions(-)

diff --git a/src/python/m5/SimObject.py b/src/python/m5/SimObject.py
index 6caa532897..354a8288cd 100644
--- a/src/python/m5/SimObject.py
+++ b/src/python/m5/SimObject.py
@@ -478,19 +478,21 @@ def cxxMethod(*args, **kwargs):
         return_value_policy = kwargs.get("return_value_policy", None)
         static = kwargs.get("static", False)
 
-        args, varargs, keywords, defaults = inspect.getargspec(func)
-        if varargs or keywords:
-            raise ValueError(
-                "Wrapped methods must not contain variable arguments"
-            )
-
-        # Create tuples of (argument, default)
-        if defaults:
-            args = args[: -len(defaults)] + list(
-                zip(args[-len(defaults) :], defaults)
-            )
-        # Don't include self in the argument list to PyBind
-        args = args[1:]
+        # Create a list of tuples of (argument, default). The `PyBindMethod`
+        # class expects the `args` argument to be a list of either argument
+        # names, in the case that argument does not have a default value, and
+        # a tuple of (argument, default) in the casae where an argument does.
+        args = []
+        sig = inspect.signature(func)
+        for param_name in sig.parameters.keys():
+            if param_name == "self":
+                # We don't cound 'self' as an argument in this case.
+                continue
+            param = sig.parameters[param_name]
+            if param.default is param.empty:
+                args.append(param_name)
+            else:
+                args.append((param_name, param.default))
 
         @wraps(func)
         def cxx_call(self, *args, **kwargs):

From 07fca546e6e85ed5a5c5d729f12cda2cc6428ce0 Mon Sep 17 00:00:00 2001
From: "Bobby R. Bruce" <bbruce@ucdavis.edu>
Date: Thu, 9 Mar 2023 10:23:23 -0800
Subject: [PATCH 264/492] ext: Update Pybind to Version 2.10.3

Updating Pybind11 is necessary for gem5 to compile correctly with
Python 3.11. As of March 9th 2023, 2.10.3 is the latest version of
Pybind11.

Change-Id: I32c68c507770040d3fac2de442d88a8f46b48896
Issue-on: https://gem5.atlassian.net/browse/GEM5-1295
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/68818
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
---
 ext/pybind11/.appveyor.yml                    |    6 +-
 ext/pybind11/.clang-format                    |   21 +-
 ext/pybind11/.clang-tidy                      |   82 +-
 ext/pybind11/.codespell-ignore-lines          |   24 +
 ext/pybind11/.gitattributes                   |    1 +
 ext/pybind11/.github/CODEOWNERS               |    9 +
 ext/pybind11/.github/CONTRIBUTING.md          |   60 +-
 .../.github/ISSUE_TEMPLATE/bug-report.md      |   28 -
 .../.github/ISSUE_TEMPLATE/bug-report.yml     |   61 +
 .../.github/ISSUE_TEMPLATE/config.yml         |    3 +
 .../.github/ISSUE_TEMPLATE/feature-request.md |   16 -
 .../.github/ISSUE_TEMPLATE/question.md        |   21 -
 ext/pybind11/.github/dependabot.yml           |    9 -
 ext/pybind11/.github/matchers/pylint.json     |   32 +
 ext/pybind11/.github/pull_request_template.md |    4 +
 ext/pybind11/.github/workflows/ci.yml         |  638 ++++--
 ext/pybind11/.github/workflows/configure.yml  |   24 +-
 ext/pybind11/.github/workflows/format.yml     |   25 +-
 ext/pybind11/.github/workflows/labeler.yml    |    6 +-
 ext/pybind11/.github/workflows/pip.yml        |   39 +-
 ext/pybind11/.github/workflows/upstream.yml   |  114 +
 ext/pybind11/.gitignore                       |    3 +
 ext/pybind11/.pre-commit-config.yaml          |  137 +-
 ext/pybind11/CMakeLists.txt                   |   35 +-
 ext/pybind11/MANIFEST.in                      |    1 -
 ext/pybind11/README.rst                       |   18 +-
 ext/pybind11/docs/Doxyfile                    |    4 +-
 ext/pybind11/docs/_static/css/custom.css      |    3 +
 ext/pybind11/docs/_static/theme_overrides.css |   11 -
 ext/pybind11/docs/advanced/cast/custom.rst    |    6 +-
 ext/pybind11/docs/advanced/cast/eigen.rst     |    2 +-
 ext/pybind11/docs/advanced/cast/overview.rst  |  183 +-
 ext/pybind11/docs/advanced/cast/stl.rst       |    8 +-
 ext/pybind11/docs/advanced/cast/strings.rst   |   19 +-
 ext/pybind11/docs/advanced/classes.rst        |   98 +-
 ext/pybind11/docs/advanced/exceptions.rst     |   17 +-
 ext/pybind11/docs/advanced/functions.rst      |   60 +-
 ext/pybind11/docs/advanced/misc.rst           |   77 +-
 ext/pybind11/docs/advanced/pycpp/numpy.rst    |   16 +-
 ext/pybind11/docs/advanced/smart_ptrs.rst     |    2 +-
 ext/pybind11/docs/basics.rst                  |   11 +-
 ext/pybind11/docs/benchmark.py                |   28 +-
 ext/pybind11/docs/changelog.rst               |  610 +++++-
 ext/pybind11/docs/classes.rst                 |   19 +-
 ext/pybind11/docs/compiling.rst               |   30 +-
 ext/pybind11/docs/conf.py                     |   28 +-
 ext/pybind11/docs/faq.rst                     |   42 +-
 ext/pybind11/docs/pybind11-logo.png           |  Bin 58510 -> 61034 bytes
 ext/pybind11/docs/release.rst                 |    9 +-
 ext/pybind11/docs/requirements.txt            |   14 +-
 ext/pybind11/docs/upgrade.rst                 |    6 +-
 ext/pybind11/include/pybind11/attr.h          |  285 ++-
 ext/pybind11/include/pybind11/buffer_info.h   |  121 +-
 ext/pybind11/include/pybind11/cast.h          | 1081 ++++++----
 ext/pybind11/include/pybind11/chrono.h        |  148 +-
 ext/pybind11/include/pybind11/complex.h       |   31 +-
 ext/pybind11/include/pybind11/detail/class.h  |  312 +--
 ext/pybind11/include/pybind11/detail/common.h |  992 +++++----
 ext/pybind11/include/pybind11/detail/descr.h  |  100 +-
 ext/pybind11/include/pybind11/detail/init.h   |  282 ++-
 .../include/pybind11/detail/internals.h       |  302 ++-
 .../pybind11/detail/type_caster_base.h        |  590 +++---
 ext/pybind11/include/pybind11/detail/typeid.h |   28 +-
 ext/pybind11/include/pybind11/eigen.h         |  596 +-----
 ext/pybind11/include/pybind11/eigen/matrix.h  |  699 ++++++
 ext/pybind11/include/pybind11/eigen/tensor.h  |  509 +++++
 ext/pybind11/include/pybind11/embed.h         |  155 +-
 ext/pybind11/include/pybind11/eval.h          |  103 +-
 ext/pybind11/include/pybind11/functional.h    |   46 +-
 ext/pybind11/include/pybind11/gil.h           |  118 +-
 ext/pybind11/include/pybind11/iostream.h      |   68 +-
 ext/pybind11/include/pybind11/numpy.h         | 1309 +++++++-----
 ext/pybind11/include/pybind11/operators.h     |  231 +-
 ext/pybind11/include/pybind11/options.h       |   57 +-
 ext/pybind11/include/pybind11/pybind11.h      | 1878 ++++++++++-------
 ext/pybind11/include/pybind11/pytypes.h       | 1598 +++++++++-----
 ext/pybind11/include/pybind11/stl.h           |  243 ++-
 .../include/pybind11/stl/filesystem.h         |   75 +-
 ext/pybind11/include/pybind11/stl_bind.h      |  676 +++---
 ext/pybind11/noxfile.py                       |   40 +-
 ext/pybind11/pybind11/__init__.py             |   10 +-
 ext/pybind11/pybind11/__main__.py             |   18 +-
 ext/pybind11/pybind11/_version.py             |    6 +-
 ext/pybind11/pybind11/_version.pyi            |    6 -
 ext/pybind11/pybind11/commands.py             |   32 +-
 ext/pybind11/pybind11/setup_helpers.py        |  232 +-
 ext/pybind11/pybind11/setup_helpers.pyi       |   63 -
 ext/pybind11/pyproject.toml                   |   54 +-
 ext/pybind11/setup.cfg                        |   34 +-
 ext/pybind11/setup.py                         |  141 +-
 ext/pybind11/tests/CMakeLists.txt             |  232 +-
 ext/pybind11/tests/conftest.py                |   50 +-
 ext/pybind11/tests/constructor_stats.h        |  143 +-
 ext/pybind11/tests/cross_module_gil_utils.cpp |  111 +-
 ...s_module_interleaved_error_already_set.cpp |   51 +
 .../tests/eigen_tensor_avoid_stl_array.cpp    |   14 +
 ext/pybind11/tests/env.py                     |    5 -
 .../tests/extra_python_package/test_files.py  |  161 +-
 .../extra_setuptools/test_setuphelper.py      |   28 +-
 ext/pybind11/tests/local_bindings.h           |   41 +-
 ext/pybind11/tests/object.h                   |  100 +-
 .../tests/pybind11_cross_module_tests.cpp     |   86 +-
 ext/pybind11/tests/pybind11_tests.cpp         |   60 +-
 ext/pybind11/tests/pybind11_tests.h           |   34 +-
 ext/pybind11/tests/pytest.ini                 |    9 +-
 ext/pybind11/tests/requirements.txt           |   17 +-
 ext/pybind11/tests/test_async.cpp             |    5 +-
 ext/pybind11/tests/test_async.py              |    1 -
 ext/pybind11/tests/test_buffers.cpp           |   82 +-
 ext/pybind11/tests/test_buffers.py            |   16 +-
 ext/pybind11/tests/test_builtin_casters.cpp   |  328 ++-
 ext/pybind11/tests/test_builtin_casters.py    |  220 +-
 ext/pybind11/tests/test_call_policies.cpp     |   44 +-
 ext/pybind11/tests/test_call_policies.py      |    1 -
 ext/pybind11/tests/test_callbacks.cpp         |  117 +-
 ext/pybind11/tests/test_callbacks.py          |   26 +-
 ext/pybind11/tests/test_chrono.cpp            |   33 +-
 ext/pybind11/tests/test_chrono.py             |    3 +-
 ext/pybind11/tests/test_class.cpp             |  369 ++--
 ext/pybind11/tests/test_class.py              |   30 +-
 ext/pybind11/tests/test_cmake_build/embed.cpp |    8 +-
 ext/pybind11/tests/test_cmake_build/test.py   |    6 +-
 ext/pybind11/tests/test_const_name.cpp        |   55 +
 ext/pybind11/tests/test_const_name.py         |   29 +
 .../tests/test_constants_and_functions.cpp    |   98 +-
 .../tests/test_constants_and_functions.py     |    1 -
 ext/pybind11/tests/test_copy_move.cpp         |  191 +-
 ext/pybind11/tests/test_copy_move.py          |    8 +-
 .../tests/test_custom_type_casters.cpp        |  146 +-
 .../tests/test_custom_type_casters.py         |   11 +-
 ext/pybind11/tests/test_custom_type_setup.py  |    2 -
 ext/pybind11/tests/test_docstring_options.cpp |  102 +-
 ext/pybind11/tests/test_docstring_options.py  |   24 +-
 .../{test_eigen.cpp => test_eigen_matrix.cpp} |  246 ++-
 .../{test_eigen.py => test_eigen_matrix.py}   |  107 +-
 ext/pybind11/tests/test_eigen_tensor.cpp      |   18 +
 ext/pybind11/tests/test_eigen_tensor.inl      |  333 +++
 ext/pybind11/tests/test_eigen_tensor.py       |  296 +++
 ext/pybind11/tests/test_embed/CMakeLists.txt  |    4 +-
 ext/pybind11/tests/test_embed/catch.cpp       |   29 +-
 .../tests/test_embed/external_module.cpp      |    9 +-
 .../tests/test_embed/test_interpreter.cpp     |  216 +-
 .../tests/test_embed/test_interpreter.py      |    3 +-
 .../tests/test_embed/test_trampoline.py       |   16 +
 ext/pybind11/tests/test_enum.cpp              |   81 +-
 ext/pybind11/tests/test_enum.py               |   12 +-
 ext/pybind11/tests/test_eval.cpp              |   31 +-
 ext/pybind11/tests/test_eval.py               |    3 +-
 ext/pybind11/tests/test_eval_call.py          |    1 -
 ext/pybind11/tests/test_exceptions.cpp        |  185 +-
 ext/pybind11/tests/test_exceptions.h          |    1 +
 ext/pybind11/tests/test_exceptions.py         |  150 +-
 .../tests/test_factory_constructors.cpp       |  109 +-
 .../tests/test_factory_constructors.py        |   10 +-
 ext/pybind11/tests/test_gil_scoped.cpp        |  139 +-
 ext/pybind11/tests/test_gil_scoped.py         |  228 +-
 ext/pybind11/tests/test_iostream.cpp          |   33 +-
 ext/pybind11/tests/test_iostream.py           |   40 +-
 .../tests/test_kwargs_and_defaults.cpp        |  240 ++-
 .../tests/test_kwargs_and_defaults.py         |  122 +-
 ext/pybind11/tests/test_local_bindings.cpp    |   41 +-
 ext/pybind11/tests/test_local_bindings.py     |    3 +-
 .../tests/test_methods_and_attributes.cpp     |  199 +-
 .../tests/test_methods_and_attributes.py      |   76 +-
 ext/pybind11/tests/test_modules.cpp           |   67 +-
 ext/pybind11/tests/test_modules.py            |   31 +-
 .../tests/test_multiple_inheritance.cpp       |  220 +-
 .../tests/test_multiple_inheritance.py        |  147 +-
 ext/pybind11/tests/test_numpy_array.cpp       |  308 +--
 ext/pybind11/tests/test_numpy_array.py        |   20 +-
 ext/pybind11/tests/test_numpy_dtypes.cpp      |  368 ++--
 ext/pybind11/tests/test_numpy_dtypes.py       |   55 +-
 ext/pybind11/tests/test_numpy_vectorize.cpp   |   32 +-
 ext/pybind11/tests/test_numpy_vectorize.py    |    1 -
 ext/pybind11/tests/test_opaque_types.cpp      |   18 +-
 ext/pybind11/tests/test_opaque_types.py       |    5 +-
 .../tests/test_operator_overloading.cpp       |  184 +-
 .../tests/test_operator_overloading.py        |   12 +-
 ext/pybind11/tests/test_pickling.cpp          |   45 +-
 ext/pybind11/tests/test_pickling.py           |   21 +-
 ext/pybind11/tests/test_pytypes.cpp           |  468 +++-
 ext/pybind11/tests/test_pytypes.py            |  448 +++-
 .../tests/test_sequences_and_iterators.cpp    |  289 ++-
 .../tests/test_sequences_and_iterators.py     |   30 +-
 ext/pybind11/tests/test_smart_ptr.cpp         |  104 +-
 ext/pybind11/tests/test_smart_ptr.py          |   13 +-
 ext/pybind11/tests/test_stl.cpp               |  278 +--
 ext/pybind11/tests/test_stl.py                |   25 +-
 ext/pybind11/tests/test_stl_binders.cpp       |   93 +-
 ext/pybind11/tests/test_stl_binders.py        |   43 +-
 .../tests/test_tagbased_polymorphic.cpp       |   97 +-
 .../tests/test_tagbased_polymorphic.py        |    1 -
 ext/pybind11/tests/test_thread.cpp            |   10 +-
 ext/pybind11/tests/test_thread.py             |    6 +-
 ext/pybind11/tests/test_union.py              |    1 -
 ext/pybind11/tests/test_virtual_functions.cpp |  334 +--
 ext/pybind11/tests/test_virtual_functions.py  |   44 +-
 ext/pybind11/tools/FindCatch.cmake            |    2 +
 ext/pybind11/tools/FindPythonLibsNew.cmake    |   66 +-
 ext/pybind11/tools/JoinPaths.cmake            |   23 +
 .../codespell_ignore_lines_from_errors.py     |   35 +
 ext/pybind11/tools/libsize.py                 |    7 +-
 ext/pybind11/tools/make_changelog.py          |   10 +-
 ext/pybind11/tools/pybind11.pc.in             |    7 +
 ext/pybind11/tools/pybind11Common.cmake       |   50 +-
 ext/pybind11/tools/pybind11Config.cmake.in    |   10 +-
 ext/pybind11/tools/pybind11NewTools.cmake     |   40 +-
 ext/pybind11/tools/pybind11Tools.cmake        |   48 +-
 ext/pybind11/tools/setup_global.py.in         |   14 +-
 ext/pybind11/tools/setup_main.py.in           |    9 +-
 210 files changed, 17229 insertions(+), 8978 deletions(-)
 create mode 100644 ext/pybind11/.codespell-ignore-lines
 create mode 100644 ext/pybind11/.gitattributes
 create mode 100644 ext/pybind11/.github/CODEOWNERS
 delete mode 100644 ext/pybind11/.github/ISSUE_TEMPLATE/bug-report.md
 create mode 100644 ext/pybind11/.github/ISSUE_TEMPLATE/bug-report.yml
 delete mode 100644 ext/pybind11/.github/ISSUE_TEMPLATE/feature-request.md
 delete mode 100644 ext/pybind11/.github/ISSUE_TEMPLATE/question.md
 create mode 100644 ext/pybind11/.github/matchers/pylint.json
 create mode 100644 ext/pybind11/.github/workflows/upstream.yml
 create mode 100644 ext/pybind11/docs/_static/css/custom.css
 delete mode 100644 ext/pybind11/docs/_static/theme_overrides.css
 create mode 100644 ext/pybind11/include/pybind11/eigen/matrix.h
 create mode 100644 ext/pybind11/include/pybind11/eigen/tensor.h
 delete mode 100644 ext/pybind11/pybind11/_version.pyi
 delete mode 100644 ext/pybind11/pybind11/setup_helpers.pyi
 create mode 100644 ext/pybind11/tests/cross_module_interleaved_error_already_set.cpp
 create mode 100644 ext/pybind11/tests/eigen_tensor_avoid_stl_array.cpp
 create mode 100644 ext/pybind11/tests/test_const_name.cpp
 create mode 100644 ext/pybind11/tests/test_const_name.py
 rename ext/pybind11/tests/{test_eigen.cpp => test_eigen_matrix.cpp} (66%)
 rename ext/pybind11/tests/{test_eigen.py => test_eigen_matrix.py} (90%)
 create mode 100644 ext/pybind11/tests/test_eigen_tensor.cpp
 create mode 100644 ext/pybind11/tests/test_eigen_tensor.inl
 create mode 100644 ext/pybind11/tests/test_eigen_tensor.py
 create mode 100644 ext/pybind11/tests/test_embed/test_trampoline.py
 create mode 100644 ext/pybind11/tools/JoinPaths.cmake
 create mode 100644 ext/pybind11/tools/codespell_ignore_lines_from_errors.py
 create mode 100644 ext/pybind11/tools/pybind11.pc.in

diff --git a/ext/pybind11/.appveyor.yml b/ext/pybind11/.appveyor.yml
index 85445d41a2..360760ac8d 100644
--- a/ext/pybind11/.appveyor.yml
+++ b/ext/pybind11/.appveyor.yml
@@ -1,6 +1,6 @@
 version: 1.0.{build}
 image:
-- Visual Studio 2015
+- Visual Studio 2017
 test: off
 skip_branch_with_pr: true
 build:
@@ -11,11 +11,9 @@ environment:
   matrix:
   - PYTHON: 36
     CONFIG: Debug
-  - PYTHON: 27
-    CONFIG: Debug
 install:
 - ps: |
-    $env:CMAKE_GENERATOR = "Visual Studio 14 2015"
+    $env:CMAKE_GENERATOR = "Visual Studio 15 2017"
     if ($env:PLATFORM -eq "x64") { $env:PYTHON = "$env:PYTHON-x64" }
     $env:PATH = "C:\Python$env:PYTHON\;C:\Python$env:PYTHON\Scripts\;$env:PATH"
     python -W ignore -m pip install --upgrade pip wheel
diff --git a/ext/pybind11/.clang-format b/ext/pybind11/.clang-format
index 8700fca84d..b477a16037 100644
--- a/ext/pybind11/.clang-format
+++ b/ext/pybind11/.clang-format
@@ -3,19 +3,36 @@
 # clang-format --style=llvm --dump-config
 BasedOnStyle: LLVM
 AccessModifierOffset: -4
-AlignConsecutiveAssignments: true
+AllowShortLambdasOnASingleLine: true
 AlwaysBreakTemplateDeclarations: Yes
 BinPackArguments: false
 BinPackParameters: false
 BreakBeforeBinaryOperators: All
 BreakConstructorInitializers: BeforeColon
 ColumnLimit: 99
+CommentPragmas: 'NOLINT:.*|^ IWYU pragma:'
+IncludeBlocks: Regroup
 IndentCaseLabels: true
 IndentPPDirectives: AfterHash
 IndentWidth: 4
 Language: Cpp
 SpaceAfterCStyleCast: true
-# SpaceInEmptyBlock: true # too new
 Standard: Cpp11
+StatementMacros: ['PyObject_HEAD']
 TabWidth: 4
+IncludeCategories:
+  - Regex:           '<pybind11/.*'
+    Priority:        -1
+  - Regex:           'pybind11.h"$'
+    Priority:        1
+  - Regex:           '^".*/?detail/'
+    Priority:        1
+    SortPriority:    2
+  - Regex:           '^"'
+    Priority:        1
+    SortPriority:    3
+  - Regex:           '<[[:alnum:]._]+>'
+    Priority:        4
+  - Regex:           '.*'
+    Priority:        5
 ...
diff --git a/ext/pybind11/.clang-tidy b/ext/pybind11/.clang-tidy
index e29d929897..23018386c1 100644
--- a/ext/pybind11/.clang-tidy
+++ b/ext/pybind11/.clang-tidy
@@ -1,13 +1,77 @@
 FormatStyle: file
 
-Checks: '
-llvm-namespace-comment,
-modernize-use-override,
-readability-container-size-empty,
-modernize-use-using,
-modernize-use-equals-default,
-modernize-use-auto,
-modernize-use-emplace,
-'
+Checks: |
+  *bugprone*,
+  *performance*,
+  clang-analyzer-optin.cplusplus.VirtualCall,
+  clang-analyzer-optin.performance.Padding,
+  cppcoreguidelines-init-variables,
+  cppcoreguidelines-prefer-member-initializer,
+  cppcoreguidelines-pro-type-static-cast-downcast,
+  cppcoreguidelines-slicing,
+  google-explicit-constructor,
+  llvm-namespace-comment,
+  misc-definitions-in-headers,
+  misc-misplaced-const,
+  misc-non-copyable-objects,
+  misc-static-assert,
+  misc-throw-by-value-catch-by-reference,
+  misc-uniqueptr-reset-release,
+  misc-unused-parameters,
+  modernize-avoid-bind,
+  modernize-loop-convert,
+  modernize-make-shared,
+  modernize-redundant-void-arg,
+  modernize-replace-auto-ptr,
+  modernize-replace-disallow-copy-and-assign-macro,
+  modernize-replace-random-shuffle,
+  modernize-shrink-to-fit,
+  modernize-use-auto,
+  modernize-use-bool-literals,
+  modernize-use-default-member-init,
+  modernize-use-emplace,
+  modernize-use-equals-default,
+  modernize-use-equals-delete,
+  modernize-use-noexcept,
+  modernize-use-nullptr,
+  modernize-use-override,
+  modernize-use-using,
+  readability-avoid-const-params-in-decls,
+  readability-braces-around-statements,
+  readability-const-return-type,
+  readability-container-size-empty,
+  readability-delete-null-pointer,
+  readability-else-after-return,
+  readability-implicit-bool-conversion,
+  readability-inconsistent-declaration-parameter-name,
+  readability-make-member-function-const,
+  readability-misplaced-array-index,
+  readability-non-const-parameter,
+  readability-qualified-auto,
+  readability-redundant-function-ptr-dereference,
+  readability-redundant-smartptr-get,
+  readability-redundant-string-cstr,
+  readability-simplify-subscript-expr,
+  readability-static-accessed-through-instance,
+  readability-static-definition-in-anonymous-namespace,
+  readability-string-compare,
+  readability-suspicious-call-argument,
+  readability-uniqueptr-delete-release,
+  -bugprone-easily-swappable-parameters,
+  -bugprone-exception-escape,
+  -bugprone-reserved-identifier,
+  -bugprone-unused-raii,
+
+CheckOptions:
+- key:             modernize-use-equals-default.IgnoreMacros
+  value:           false
+- key:             performance-for-range-copy.WarnOnAllAutoCopies
+  value:           true
+- key:             performance-inefficient-string-concatenation.StrictMode
+  value:           true
+- key:             performance-unnecessary-value-param.AllowedTypes
+  value:           'exception_ptr$;'
+- key:             readability-implicit-bool-conversion.AllowPointerConditions
+  value:           true
 
 HeaderFilterRegex: 'pybind11/.*h'
diff --git a/ext/pybind11/.codespell-ignore-lines b/ext/pybind11/.codespell-ignore-lines
new file mode 100644
index 0000000000..2a01d63ebb
--- /dev/null
+++ b/ext/pybind11/.codespell-ignore-lines
@@ -0,0 +1,24 @@
+template <op_id id, op_type ot, typename L = undefined_t, typename R = undefined_t>
+    template <typename ThisT>
+        auto &this_ = static_cast<ThisT &>(*this);
+                if (load_impl<ThisT>(temp, false)) {
+        ssize_t nd = 0;
+        auto trivial = broadcast(buffers, nd, shape);
+        auto ndim = (size_t) nd;
+    int nd;
+    ssize_t ndim() const { return detail::array_proxy(m_ptr)->nd; }
+        using op = op_impl<id, ot, Base, L_type, R_type>;
+template <op_id id, op_type ot, typename L, typename R>
+    template <detail::op_id id, detail::op_type ot, typename L, typename R, typename... Extra>
+    class_ &def(const detail::op_<id, ot, L, R> &op, const Extra &...extra) {
+    class_ &def_cast(const detail::op_<id, ot, L, R> &op, const Extra &...extra) {
+@pytest.mark.parametrize("access", ["ro", "rw", "static_ro", "static_rw"])
+struct IntStruct {
+    explicit IntStruct(int v) : value(v){};
+    ~IntStruct() { value = -value; }
+    IntStruct(const IntStruct &) = default;
+    IntStruct &operator=(const IntStruct &) = default;
+    py::class_<IntStruct>(m, "IntStruct").def(py::init([](const int i) { return IntStruct(i); }));
+    py::implicitly_convertible<int, IntStruct>();
+    m.def("test", [](int expected, const IntStruct &in) {
+        [](int expected, const IntStruct &in) {
diff --git a/ext/pybind11/.gitattributes b/ext/pybind11/.gitattributes
new file mode 100644
index 0000000000..d611e1496d
--- /dev/null
+++ b/ext/pybind11/.gitattributes
@@ -0,0 +1 @@
+docs/*.svg binary
diff --git a/ext/pybind11/.github/CODEOWNERS b/ext/pybind11/.github/CODEOWNERS
new file mode 100644
index 0000000000..4e2c66902e
--- /dev/null
+++ b/ext/pybind11/.github/CODEOWNERS
@@ -0,0 +1,9 @@
+*.cmake @henryiii
+CMakeLists.txt @henryiii
+*.yml @henryiii
+*.yaml @henryiii
+/tools/ @henryiii
+/pybind11/ @henryiii
+noxfile.py @henryiii
+.clang-format @henryiii
+.clang-tidy @henryiii
diff --git a/ext/pybind11/.github/CONTRIBUTING.md b/ext/pybind11/.github/CONTRIBUTING.md
index 08d9e7cb93..00b1fea4cf 100644
--- a/ext/pybind11/.github/CONTRIBUTING.md
+++ b/ext/pybind11/.github/CONTRIBUTING.md
@@ -53,6 +53,33 @@ derivative works thereof, in binary and source code form.
 
 ## Development of pybind11
 
+### Quick setup
+
+To setup a quick development environment, use [`nox`](https://nox.thea.codes).
+This will allow you to do some common tasks with minimal setup effort, but will
+take more time to run and be less flexible than a full development environment.
+If you use [`pipx run nox`](https://pipx.pypa.io), you don't even need to
+install `nox`. Examples:
+
+```bash
+# List all available sessions
+nox -l
+
+# Run linters
+nox -s lint
+
+# Run tests on Python 3.9
+nox -s tests-3.9
+
+# Build and preview docs
+nox -s docs -- serve
+
+# Build SDists and wheels
+nox -s build
+```
+
+### Full setup
+
 To setup an ideal development environment, run the following commands on a
 system with CMake 3.14+:
 
@@ -66,11 +93,10 @@ cmake --build build -j4
 
 Tips:
 
-* You can use `virtualenv` (from PyPI) instead of `venv` (which is Python 3
-  only).
+* You can use `virtualenv` (faster, from PyPI) instead of `venv`.
 * You can select any name for your environment folder; if it contains "env" it
   will be ignored by git.
-* If you don’t have CMake 3.14+, just add “cmake” to the pip install command.
+* If you don't have CMake 3.14+, just add "cmake" to the pip install command.
 * You can use `-DPYBIND11_FINDPYTHON=ON` to use FindPython on CMake 3.12+
 * In classic mode, you may need to set `-DPYTHON_EXECUTABLE=/path/to/python`.
   FindPython uses `-DPython_ROOT_DIR=/path/to` or
@@ -78,7 +104,7 @@ Tips:
 
 ### Configuration options
 
-In CMake, configuration options are given with “-D”. Options are stored in the
+In CMake, configuration options are given with "-D". Options are stored in the
 build directory, in the `CMakeCache.txt` file, so they are remembered for each
 build directory. Two selections are special - the generator, given with `-G`,
 and the compiler, which is selected based on environment variables `CXX` and
@@ -88,12 +114,12 @@ after the initial run.
 The valid options are:
 
 * `-DCMAKE_BUILD_TYPE`: Release, Debug, MinSizeRel, RelWithDebInfo
-* `-DPYBIND11_FINDPYTHON=ON`: Use CMake 3.12+’s FindPython instead of the
+* `-DPYBIND11_FINDPYTHON=ON`: Use CMake 3.12+'s FindPython instead of the
   classic, deprecated, custom FindPythonLibs
 * `-DPYBIND11_NOPYTHON=ON`: Disable all Python searching (disables tests)
 * `-DBUILD_TESTING=ON`: Enable the tests
 * `-DDOWNLOAD_CATCH=ON`: Download catch to build the C++ tests
-* `-DOWNLOAD_EIGEN=ON`: Download Eigen for the NumPy tests
+* `-DDOWNLOAD_EIGEN=ON`: Download Eigen for the NumPy tests
 * `-DPYBIND11_INSTALL=ON/OFF`: Enable the install target (on by default for the
   master project)
 * `-DUSE_PYTHON_INSTALL_DIR=ON`: Try to install into the python dir
@@ -132,8 +158,9 @@ tests with these targets:
 * `test_cmake_build`: Install / subdirectory tests
 
 If you want to build just a subset of tests, use
-`-DPYBIND11_TEST_OVERRIDE="test_callbacks.cpp;test_pickling.cpp"`. If this is
-empty, all tests will be built.
+`-DPYBIND11_TEST_OVERRIDE="test_callbacks;test_pickling"`. If this is
+empty, all tests will be built. Tests are specified without an extension if they need both a .py and
+.cpp file.
 
 You may also pass flags to the `pytest` target by editing `tests/pytest.ini` or
 by using the `PYTEST_ADDOPTS` environment variable
@@ -203,16 +230,19 @@ of the pybind11 repo.
 [`clang-tidy`][clang-tidy] performs deeper static code analyses and is
 more complex to run, compared to `clang-format`, but support for `clang-tidy`
 is built into the pybind11 CMake configuration. To run `clang-tidy`, the
-following recipe should work. Files will be modified in place, so you can
-use git to monitor the changes.
+following recipe should work. Run the `docker` command from the top-level
+directory inside your pybind11 git clone. Files will be modified in place,
+so you can use git to monitor the changes.
 
 ```bash
-docker run --rm -v $PWD:/pybind11 -it silkeh/clang:10
-apt-get update && apt-get install python3-dev python3-pytest
-cmake -S pybind11/ -B build -DCMAKE_CXX_CLANG_TIDY="$(which clang-tidy);-fix"
-cmake --build build
+docker run --rm -v $PWD:/mounted_pybind11 -it silkeh/clang:13
+apt-get update && apt-get install -y python3-dev python3-pytest
+cmake -S /mounted_pybind11/ -B build -DCMAKE_CXX_CLANG_TIDY="$(which clang-tidy);--use-color" -DDOWNLOAD_EIGEN=ON -DDOWNLOAD_CATCH=ON -DCMAKE_CXX_STANDARD=17
+cmake --build build -j 2
 ```
 
+You can add `--fix` to the options list if you want.
+
 ### Include what you use
 
 To run include what you use, install (`brew install include-what-you-use` on
@@ -228,7 +258,7 @@ The report is sent to stderr; you can pipe it into a file if you wish.
 ### Build recipes
 
 This builds with the Intel compiler (assuming it is in your path, along with a
-recent CMake and Python 3):
+recent CMake and Python):
 
 ```bash
 python3 -m venv venv
diff --git a/ext/pybind11/.github/ISSUE_TEMPLATE/bug-report.md b/ext/pybind11/.github/ISSUE_TEMPLATE/bug-report.md
deleted file mode 100644
index ae36ea6508..0000000000
--- a/ext/pybind11/.github/ISSUE_TEMPLATE/bug-report.md
+++ /dev/null
@@ -1,28 +0,0 @@
----
-name: Bug Report
-about: File an issue about a bug
-title: "[BUG] "
----
-
-
-Make sure you've completed the following steps before submitting your issue -- thank you!
-
-1. Make sure you've read the [documentation][]. Your issue may be addressed there.
-2. Search the [issue tracker][] to verify that this hasn't already been reported. +1 or comment there if it has.
-3. Consider asking first in the [Gitter chat room][].
-4. Include a self-contained and minimal piece of code that reproduces the problem. If that's not possible, try to make the description as clear as possible.
-    a. If possible, make a PR with a new, failing test to give us a starting point to work on!
-
-[documentation]: https://pybind11.readthedocs.io
-[issue tracker]: https://github.com/pybind/pybind11/issues
-[Gitter chat room]: https://gitter.im/pybind/Lobby
-
-*After reading, remove this checklist and the template text in parentheses below.*
-
-## Issue description
-
-(Provide a short description, state the expected behavior and what actually happens.)
-
-## Reproducible example code
-
-(The code should be minimal, have no external dependencies, isolate the function(s) that cause breakage. Submit matched and complete C++ and Python snippets that can be easily compiled and run to diagnose the issue.)
diff --git a/ext/pybind11/.github/ISSUE_TEMPLATE/bug-report.yml b/ext/pybind11/.github/ISSUE_TEMPLATE/bug-report.yml
new file mode 100644
index 0000000000..4f1e78f33c
--- /dev/null
+++ b/ext/pybind11/.github/ISSUE_TEMPLATE/bug-report.yml
@@ -0,0 +1,61 @@
+name: Bug Report
+description: File an issue about a bug
+title: "[BUG]: "
+labels: [triage]
+body:
+  - type: markdown
+    attributes:
+      value: |
+        Please do your best to make the issue as easy to act on as possible, and only submit here if there is clearly a problem with pybind11 (ask first if unsure). **Note that a reproducer in a PR is much more likely to get immediate attention.**
+
+  - type: checkboxes
+    id: steps
+    attributes:
+      label: Required prerequisites
+      description: Make sure you've completed the following steps before submitting your issue -- thank you!
+      options:
+        - label: Make sure you've read the [documentation](https://pybind11.readthedocs.io). Your issue may be addressed there.
+          required: true
+        - label: Search the [issue tracker](https://github.com/pybind/pybind11/issues) and [Discussions](https:/pybind/pybind11/discussions) to verify that this hasn't already been reported. +1 or comment there if it has.
+          required: true
+        - label: Consider asking first in the [Gitter chat room](https://gitter.im/pybind/Lobby) or in a [Discussion](https:/pybind/pybind11/discussions/new).
+          required: false
+
+  - type: input
+    id: version
+    attributes:
+      label: What version (or hash if on master) of pybind11 are you using?
+    validations:
+      required: true
+
+  - type: textarea
+    id: description
+    attributes:
+      label: Problem description
+      placeholder: >-
+        Provide a short description, state the expected behavior and what
+        actually happens. Include relevant information like what version of
+        pybind11 you are using, what system you are on, and any useful commands
+        / output.
+    validations:
+      required: true
+
+  - type: textarea
+    id: code
+    attributes:
+      label: Reproducible example code
+      placeholder: >-
+        The code should be minimal, have no external dependencies, isolate the
+        function(s) that cause breakage. Submit matched and complete C++ and
+        Python snippets that can be easily compiled and run to diagnose the
+        issue. — Note that a reproducer in a PR is much more likely to get
+        immediate attention: failing tests in the pybind11 CI are the best
+        starting point for working out fixes.
+      render: text
+
+  - type: input
+    id: regression
+    attributes:
+      label: Is this a regression? Put the last known working version here if it is.
+      description: Put the last known working version here if this is a regression.
+      value: Not a regression
diff --git a/ext/pybind11/.github/ISSUE_TEMPLATE/config.yml b/ext/pybind11/.github/ISSUE_TEMPLATE/config.yml
index 20e743136f..27f9a80441 100644
--- a/ext/pybind11/.github/ISSUE_TEMPLATE/config.yml
+++ b/ext/pybind11/.github/ISSUE_TEMPLATE/config.yml
@@ -1,5 +1,8 @@
 blank_issues_enabled: false
 contact_links:
+  - name: Ask a question
+    url: https://github.com/pybind/pybind11/discussions/new
+    about: Please ask and answer questions here, or propose new ideas.
   - name: Gitter room
     url: https://gitter.im/pybind/Lobby
     about: A room for discussing pybind11 with an active community
diff --git a/ext/pybind11/.github/ISSUE_TEMPLATE/feature-request.md b/ext/pybind11/.github/ISSUE_TEMPLATE/feature-request.md
deleted file mode 100644
index 5f6ec81ec9..0000000000
--- a/ext/pybind11/.github/ISSUE_TEMPLATE/feature-request.md
+++ /dev/null
@@ -1,16 +0,0 @@
----
-name: Feature Request
-about: File an issue about adding a feature
-title: "[FEAT] "
----
-
-
-Make sure you've completed the following steps before submitting your issue -- thank you!
-
-1. Check if your feature has already been mentioned / rejected / planned in other issues.
-2. If those resources didn't help, consider asking in the [Gitter chat room][] to see if this is interesting / useful to a larger audience and possible to implement reasonably,
-4. If you have a useful feature that passes the previous items (or not suitable for chat), please fill in the details below.
-
-[Gitter chat room]: https://gitter.im/pybind/Lobby
-
-*After reading, remove this checklist.*
diff --git a/ext/pybind11/.github/ISSUE_TEMPLATE/question.md b/ext/pybind11/.github/ISSUE_TEMPLATE/question.md
deleted file mode 100644
index b199b6ee8a..0000000000
--- a/ext/pybind11/.github/ISSUE_TEMPLATE/question.md
+++ /dev/null
@@ -1,21 +0,0 @@
----
-name: Question
-about: File an issue about unexplained behavior
-title: "[QUESTION] "
----
-
-If you have a question, please check the following first:
-
-1. Check if your question has already been answered in the [FAQ][] section.
-2. Make sure you've read the [documentation][]. Your issue may be addressed there.
-3. If those resources didn't help and you only have a short question (not a bug report), consider asking in the [Gitter chat room][]
-4. Search the [issue tracker][], including the closed issues, to see if your question has already been asked/answered. +1 or comment if it has been asked but has no answer.
-5. If you have a more complex question which is not answered in the previous items (or not suitable for chat), please fill in the details below.
-6. Include a self-contained and minimal piece of code that illustrates your question. If that's not possible, try to make the description as clear as possible.
-
-[FAQ]: http://pybind11.readthedocs.io/en/latest/faq.html
-[documentation]: https://pybind11.readthedocs.io
-[issue tracker]: https://github.com/pybind/pybind11/issues
-[Gitter chat room]: https://gitter.im/pybind/Lobby
-
-*After reading, remove this checklist.*
diff --git a/ext/pybind11/.github/dependabot.yml b/ext/pybind11/.github/dependabot.yml
index 73273365c0..2c7d170839 100644
--- a/ext/pybind11/.github/dependabot.yml
+++ b/ext/pybind11/.github/dependabot.yml
@@ -5,12 +5,3 @@ updates:
     directory: "/"
     schedule:
       interval: "daily"
-    ignore:
-      # Official actions have moving tags like v1
-      # that are used, so they don't need updates here
-      - dependency-name: "actions/checkout"
-      - dependency-name: "actions/setup-python"
-      - dependency-name: "actions/cache"
-      - dependency-name: "actions/upload-artifact"
-      - dependency-name: "actions/download-artifact"
-      - dependency-name: "actions/labeler"
diff --git a/ext/pybind11/.github/matchers/pylint.json b/ext/pybind11/.github/matchers/pylint.json
new file mode 100644
index 0000000000..e3a6bd16b0
--- /dev/null
+++ b/ext/pybind11/.github/matchers/pylint.json
@@ -0,0 +1,32 @@
+{
+  "problemMatcher": [
+    {
+      "severity": "warning",
+      "pattern": [
+        {
+          "regexp": "^([^:]+):(\\d+):(\\d+): ([A-DF-Z]\\d+): \\033\\[[\\d;]+m([^\\033]+).*$",
+          "file": 1,
+          "line": 2,
+          "column": 3,
+          "code": 4,
+          "message": 5
+        }
+      ],
+      "owner": "pylint-warning"
+    },
+    {
+      "severity": "error",
+      "pattern": [
+        {
+          "regexp": "^([^:]+):(\\d+):(\\d+): (E\\d+): \\033\\[[\\d;]+m([^\\033]+).*$",
+          "file": 1,
+          "line": 2,
+          "column": 3,
+          "code": 4,
+          "message": 5
+        }
+      ],
+      "owner": "pylint-error"
+    }
+  ]
+}
diff --git a/ext/pybind11/.github/pull_request_template.md b/ext/pybind11/.github/pull_request_template.md
index 97a6ff7dda..54b7f5100d 100644
--- a/ext/pybind11/.github/pull_request_template.md
+++ b/ext/pybind11/.github/pull_request_template.md
@@ -1,3 +1,7 @@
+<!--
+Title (above): please place [branch_name] at the beginning if you are targeting a branch other than master. *Do not target stable*.
+It is recommended to use conventional commit format, see conventionalcommits.org, but not required.
+-->
 ## Description
 
 <!-- Include relevant issues or PRs here, describe what changed and why -->
diff --git a/ext/pybind11/.github/workflows/ci.yml b/ext/pybind11/.github/workflows/ci.yml
index f90c199526..b36bbfe1b9 100644
--- a/ext/pybind11/.github/workflows/ci.yml
+++ b/ext/pybind11/.github/workflows/ci.yml
@@ -9,6 +9,17 @@ on:
       - stable
       - v*
 
+concurrency:
+  group: test-${{ github.ref }}
+  cancel-in-progress: true
+
+env:
+  PIP_ONLY_BINARY: numpy
+  FORCE_COLOR: 3
+  PYTEST_TIMEOUT: 300
+  # For cmake:
+  VERBOSE: 1
+
 jobs:
   # This is the "main" test suite, which tests a large number of different
   # versions of default compilers and Python versions in GitHub Actions.
@@ -16,66 +27,66 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        runs-on: [ubuntu-latest, windows-latest, macos-latest]
+        runs-on: [ubuntu-20.04, windows-2022, macos-latest]
         python:
-        - 2.7
-        - 3.5
-        - 3.6
-        - 3.9
-        # - 3.10-dev  # Re-enable once 3.10.0a5 is released
-        - pypy2
-        - pypy3
+        - '3.6'
+        - '3.9'
+        - '3.10'
+        - '3.11'
+        - 'pypy-3.7'
+        - 'pypy-3.8'
+        - 'pypy-3.9'
 
         # Items in here will either be added to the build matrix (if not
         # present), or add new keys to an existing matrix element if all the
         # existing keys match.
         #
-        # We support three optional keys: args (both build), args1 (first
-        # build), and args2 (second build).
+        # We support an optional key: args, for cmake args
         include:
           # Just add a key
-          - runs-on: ubuntu-latest
-            python: 3.6
+          - runs-on: ubuntu-20.04
+            python: '3.6'
             args: >
               -DPYBIND11_FINDPYTHON=ON
-          - runs-on: windows-latest
-            python: 3.6
+              -DCMAKE_CXX_FLAGS="-D_=1"
+          - runs-on: ubuntu-20.04
+            python: 'pypy-3.8'
             args: >
               -DPYBIND11_FINDPYTHON=ON
-
-        # These items will be removed from the build matrix, keys must match.
-        exclude:
-            # Currently 32bit only, and we build 64bit
-          - runs-on: windows-latest
-            python: pypy2
-          - runs-on: windows-latest
-            python: pypy3
-
-          # TODO: PyPy2 7.3.3 segfaults, while 7.3.2 was fine.
-          - runs-on: ubuntu-latest
-            python: pypy2
+          - runs-on: windows-2019
+            python: '3.6'
+            args: >
+              -DPYBIND11_FINDPYTHON=ON
+          # Inject a couple Windows 2019 runs
+          - runs-on: windows-2019
+            python: '3.9'
 
     name: "🐍 ${{ matrix.python }} • ${{ matrix.runs-on }} • x64 ${{ matrix.args }}"
     runs-on: ${{ matrix.runs-on }}
 
     steps:
-    - uses: actions/checkout@v2
+    - uses: actions/checkout@v3
 
     - name: Setup Python ${{ matrix.python }}
-      uses: actions/setup-python@v2
+      uses: actions/setup-python@v4
       with:
         python-version: ${{ matrix.python }}
 
-    - name: Setup Boost (Windows / Linux latest)
-      shell: bash
-      run: echo "BOOST_ROOT=$BOOST_ROOT_1_72_0" >> $GITHUB_ENV
+    - name: Setup Boost (Linux)
+      # Can't use boost + define _
+      if: runner.os == 'Linux' && matrix.python != '3.6'
+      run: sudo apt-get install libboost-dev
+
+    - name: Setup Boost (macOS)
+      if: runner.os == 'macOS'
+      run: brew install boost
 
     - name: Update CMake
-      uses: jwlawson/actions-setup-cmake@v1.7
+      uses: jwlawson/actions-setup-cmake@v1.13
 
     - name: Cache wheels
       if: runner.os == 'macOS'
-      uses: actions/cache@v2
+      uses: actions/cache@v3
       with:
         # This path is specific to macOS - we really only need it for PyPy NumPy wheels
         # See https://github.com/actions/cache/blob/master/examples.md#python---pip
@@ -85,17 +96,20 @@ jobs:
         key: ${{ runner.os }}-pip-${{ matrix.python }}-x64-${{ hashFiles('tests/requirements.txt') }}
 
     - name: Prepare env
-      run: python -m pip install -r tests/requirements.txt --prefer-binary
+      run: |
+        python -m pip install -r tests/requirements.txt
 
     - name: Setup annotations on Linux
       if: runner.os == 'Linux'
       run: python -m pip install pytest-github-actions-annotate-failures
 
     # First build - C++11 mode and inplace
+    # More-or-less randomly adding -DPYBIND11_SIMPLE_GIL_MANAGEMENT=ON here.
     - name: Configure C++11 ${{ matrix.args }}
       run: >
         cmake -S . -B .
         -DPYBIND11_WERROR=ON
+        -DPYBIND11_SIMPLE_GIL_MANAGEMENT=ON
         -DDOWNLOAD_CATCH=ON
         -DDOWNLOAD_EIGEN=ON
         -DCMAKE_CXX_STANDARD=11
@@ -109,7 +123,7 @@ jobs:
 
     - name: C++11 tests
       # TODO: Figure out how to load the DLL on Python 3.8+
-      if: "!(runner.os == 'Windows' && (matrix.python == 3.8 || matrix.python == 3.9 || matrix.python == '3.10-dev'))"
+      if: "!(runner.os == 'Windows' && (matrix.python == 3.8 || matrix.python == 3.9 || matrix.python == '3.10' || matrix.python == '3.11' || matrix.python == 'pypy-3.8'))"
       run: cmake --build .  --target cpptest -j 2
 
     - name: Interface test C++11
@@ -119,15 +133,16 @@ jobs:
       run: git clean -fdx
 
     # Second build - C++17 mode and in a build directory
-    - name: Configure ${{ matrix.args2 }}
+    # More-or-less randomly adding -DPYBIND11_SIMPLE_GIL_MANAGEMENT=OFF here.
+    - name: Configure C++17
       run: >
         cmake -S . -B build2
         -DPYBIND11_WERROR=ON
+        -DPYBIND11_SIMPLE_GIL_MANAGEMENT=OFF
         -DDOWNLOAD_CATCH=ON
         -DDOWNLOAD_EIGEN=ON
         -DCMAKE_CXX_STANDARD=17
         ${{ matrix.args }}
-        ${{ matrix.args2 }}
 
     - name: Build
       run: cmake --build build2 -j 2
@@ -137,32 +152,35 @@ jobs:
 
     - name: C++ tests
       # TODO: Figure out how to load the DLL on Python 3.8+
-      if: "!(runner.os == 'Windows' && (matrix.python == 3.8 || matrix.python == 3.9 || matrix.python == '3.10-dev'))"
+      if: "!(runner.os == 'Windows' && (matrix.python == 3.8 || matrix.python == 3.9 || matrix.python == '3.10' || matrix.python == '3.11' || matrix.python == 'pypy-3.8'))"
       run: cmake --build build2 --target cpptest
 
+    # Third build - C++17 mode with unstable ABI
+    - name: Configure (unstable ABI)
+      run: >
+        cmake -S . -B build3
+        -DPYBIND11_WERROR=ON
+        -DDOWNLOAD_CATCH=ON
+        -DDOWNLOAD_EIGEN=ON
+        -DCMAKE_CXX_STANDARD=17
+        -DPYBIND11_INTERNALS_VERSION=10000000
+        "-DPYBIND11_TEST_OVERRIDE=test_call_policies.cpp;test_gil_scoped.cpp;test_thread.cpp"
+        ${{ matrix.args }}
+
+    - name: Build (unstable ABI)
+      run: cmake --build build3 -j 2
+
+    - name: Python tests (unstable ABI)
+      run: cmake --build build3 --target pytest
+
     - name: Interface test
       run: cmake --build build2 --target test_cmake_build
 
-    # Eventually Microsoft might have an action for setting up
-    # MSVC, but for now, this action works:
-    - name: Prepare compiler environment for Windows 🐍 2.7
-      if: matrix.python == 2.7 && runner.os == 'Windows'
-      uses: ilammy/msvc-dev-cmd@v1
-      with:
-        arch: x64
-
-    # This makes two environment variables available in the following step(s)
-    - name: Set Windows 🐍 2.7 environment variables
-      if: matrix.python == 2.7 && runner.os == 'Windows'
-      shell: bash
-      run: |
-        echo "DISTUTILS_USE_SDK=1" >> $GITHUB_ENV
-        echo "MSSdk=1" >> $GITHUB_ENV
-
     # This makes sure the setup_helpers module can build packages using
     # setuptools
     - name: Setuptools helpers test
       run: pytest tests/extra_setuptools
+      if: "!(matrix.runs-on == 'windows-2022')"
 
 
   deadsnakes:
@@ -170,30 +188,31 @@ jobs:
       fail-fast: false
       matrix:
         include:
-        - python-version: 3.9
+        # TODO: Fails on 3.10, investigate
+        - python-version: "3.9"
           python-debug: true
           valgrind: true
-        - python-version: 3.10-dev
+        - python-version: "3.11"
           python-debug: false
 
     name: "🐍 ${{ matrix.python-version }}${{ matrix.python-debug && '-dbg' || '' }} (deadsnakes)${{ matrix.valgrind && ' • Valgrind' || '' }} • x64"
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-20.04
 
     steps:
-    - uses: actions/checkout@v2
+    - uses: actions/checkout@v3
 
     - name: Setup Python ${{ matrix.python-version }} (deadsnakes)
-      uses: deadsnakes/action@v2.1.1
+      uses: deadsnakes/action@v3.0.0
       with:
         python-version: ${{ matrix.python-version }}
         debug: ${{ matrix.python-debug }}
 
     - name: Update CMake
-      uses: jwlawson/actions-setup-cmake@v1.7
+      uses: jwlawson/actions-setup-cmake@v1.13
 
     - name: Valgrind cache
       if: matrix.valgrind
-      uses: actions/cache@v2
+      uses: actions/cache@v3
       id: cache-valgrind
       with:
         path: valgrind
@@ -218,9 +237,12 @@ jobs:
         sudo apt-get install libc6-dbg  # Needed by Valgrind
 
     - name: Prepare env
-      run: python -m pip install -r tests/requirements.txt --prefer-binary
+      run: |
+        python -m pip install -r tests/requirements.txt
 
     - name: Configure
+      env:
+        SETUPTOOLS_USE_DISTUTILS: stdlib
       run: >
         cmake -S . -B build
         -DCMAKE_BUILD_TYPE=Debug
@@ -261,16 +283,22 @@ jobs:
         include:
           - clang: 5
             std: 14
-          - clang: 10
-            std: 20
           - clang: 10
             std: 17
+          - clang: 11
+            std: 20
+          - clang: 12
+            std: 20
+          - clang: 13
+            std: 20
+          - clang: 14
+            std: 20
 
     name: "🐍 3 • Clang ${{ matrix.clang }} • C++${{ matrix.std }} • x64"
     container: "silkeh/clang:${{ matrix.clang }}"
 
     steps:
-    - uses: actions/checkout@v2
+    - uses: actions/checkout@v3
 
     - name: Add wget and python3
       run: apt-get update && apt-get install -y python3-dev python3-numpy python3-pytest libeigen3-dev
@@ -300,11 +328,11 @@ jobs:
   # Testing NVCC; forces sources to behave like .cu files
   cuda:
     runs-on: ubuntu-latest
-    name: "🐍 3.8 • CUDA 11 • Ubuntu 20.04"
-    container: nvidia/cuda:11.0-devel-ubuntu20.04
+    name: "🐍 3.10 • CUDA 11.7 • Ubuntu 22.04"
+    container: nvidia/cuda:11.7.0-devel-ubuntu22.04
 
     steps:
-    - uses: actions/checkout@v2
+    - uses: actions/checkout@v3
 
     # tzdata will try to ask for the timezone, so set the DEBIAN_FRONTEND
     - name: Install 🐍 3
@@ -328,7 +356,7 @@ jobs:
 #    container: centos:8
 #
 #    steps:
-#    - uses: actions/checkout@v2
+#    - uses: actions/checkout@v3
 #
 #    - name: Add Python 3 and a few requirements
 #      run: yum update -y && yum install -y git python3-devel python3-numpy python3-pytest make environment-modules
@@ -367,32 +395,32 @@ jobs:
   # Testing on CentOS 7 + PGI compilers, which seems to require more workarounds
   centos-nvhpc7:
     runs-on: ubuntu-latest
-    name: "🐍 3 • CentOS7 / PGI 20.9 • x64"
+    name: "🐍 3 • CentOS7 / PGI 22.9 • x64"
     container: centos:7
 
     steps:
-    - uses: actions/checkout@v2
+    - uses: actions/checkout@v3
 
     - name: Add Python 3 and a few requirements
-      run: yum update -y && yum install -y epel-release && yum install -y git python3-devel make environment-modules cmake3
+      run: yum update -y && yum install -y epel-release && yum install -y git python3-devel make environment-modules cmake3 yum-utils
 
     - name: Install NVidia HPC SDK
-      run:  yum -y install https://developer.download.nvidia.com/hpc-sdk/20.9/nvhpc-20-9-20.9-1.x86_64.rpm https://developer.download.nvidia.com/hpc-sdk/20.9/nvhpc-2020-20.9-1.x86_64.rpm
+      run: yum-config-manager --add-repo https://developer.download.nvidia.com/hpc-sdk/rhel/nvhpc.repo && yum -y install nvhpc-22.9
 
     # On CentOS 7, we have to filter a few tests (compiler internal error)
-    # and allow deeper templete recursion (not needed on CentOS 8 with a newer
+    # and allow deeper template recursion (not needed on CentOS 8 with a newer
     # standard library). On some systems, you many need further workarounds:
     # https://github.com/pybind/pybind11/pull/2475
     - name: Configure
       shell: bash
       run: |
         source /etc/profile.d/modules.sh
-        module load /opt/nvidia/hpc_sdk/modulefiles/nvhpc/20.9
+        module load /opt/nvidia/hpc_sdk/modulefiles/nvhpc/22.9
         cmake3 -S . -B build -DDOWNLOAD_CATCH=ON \
                             -DCMAKE_CXX_STANDARD=11 \
                             -DPYTHON_EXECUTABLE=$(python3 -c "import sys; print(sys.executable)") \
                             -DCMAKE_CXX_FLAGS="-Wc,--pending_instantiations=0" \
-                            -DPYBIND11_TEST_FILTER="test_smart_ptr.cpp;test_virtual_functions.cpp"
+                            -DPYBIND11_TEST_FILTER="test_smart_ptr.cpp"
 
     # Building before installing Pip should produce a warning but not an error
     - name: Build
@@ -419,20 +447,20 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        gcc:
-          - 7
-          - latest
-        std:
-          - 11
         include:
-          - gcc: 10
-            std: 20
+          - { gcc: 7, std: 11 }
+          - { gcc: 7, std: 17 }
+          - { gcc: 8, std: 14 }
+          - { gcc: 8, std: 17 }
+          - { gcc: 10, std: 17 }
+          - { gcc: 11, std: 20 }
+          - { gcc: 12, std: 20 }
 
     name: "🐍 3 • GCC ${{ matrix.gcc }} • C++${{ matrix.std }}• x64"
     container: "gcc:${{ matrix.gcc }}"
 
     steps:
-    - uses: actions/checkout@v1
+    - uses: actions/checkout@v3
 
     - name: Add Python 3
       run: apt-get update; apt-get install -y python3-dev python3-numpy python3-pytest python3-pip libeigen3-dev
@@ -441,7 +469,7 @@ jobs:
       run: python3 -m pip install --upgrade pip
 
     - name: Update CMake
-      uses: jwlawson/actions-setup-cmake@v1.7
+      uses: jwlawson/actions-setup-cmake@v1.13
 
     - name: Configure
       shell: bash
@@ -474,7 +502,7 @@ jobs:
     name: "🐍 3 • ICC latest • x64"
 
     steps:
-    - uses: actions/checkout@v2
+    - uses: actions/checkout@v3
 
     - name: Add apt repo
       run: |
@@ -495,7 +523,7 @@ jobs:
     - name: Install dependencies
       run: |
         set +e; source /opt/intel/oneapi/setvars.sh; set -e
-        python3 -m pip install -r tests/requirements.txt --prefer-binary
+        python3 -m pip install -r tests/requirements.txt
 
     - name: Configure C++11
       run: |
@@ -569,29 +597,37 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        centos:
-          - 7  # GCC 4.8
-          - 8
+        container:
+          - "centos:7"  # GCC 4.8
+          - "almalinux:8"
+          - "almalinux:9"
 
-    name: "🐍 3 • CentOS ${{ matrix.centos }} • x64"
-    container: "centos:${{ matrix.centos }}"
+    name: "🐍 3 • ${{ matrix.container }} • x64"
+    container: "${{ matrix.container }}"
 
     steps:
-    - uses: actions/checkout@v2
+    - uses: actions/checkout@v3
 
-    - name: Add Python 3
+    - name: Add Python 3 (RHEL 7)
+      if: matrix.container == 'centos:7'
       run: yum update -y && yum install -y python3-devel gcc-c++ make git
 
+    - name: Add Python 3 (RHEL 8+)
+      if: matrix.container != 'centos:7'
+      run: dnf update -y && dnf install -y python3-devel gcc-c++ make git
+
     - name: Update pip
       run: python3 -m pip install --upgrade pip
 
     - name: Install dependencies
-      run: python3 -m pip install cmake -r tests/requirements.txt --prefer-binary
+      run: |
+        python3 -m pip install cmake -r tests/requirements.txt
 
     - name: Configure
       shell: bash
       run: >
         cmake -S . -B build
+        -DCMAKE_BUILD_TYPE=MinSizeRel
         -DPYBIND11_WERROR=ON
         -DDOWNLOAD_CATCH=ON
         -DDOWNLOAD_EIGEN=ON
@@ -613,18 +649,18 @@ jobs:
 
   # This tests an "install" with the CMake tools
   install-classic:
-    name: "🐍 3.5 • Debian • x86 •  Install"
+    name: "🐍 3.7 • Debian • x86 •  Install"
     runs-on: ubuntu-latest
-    container: i386/debian:stretch
+    container: i386/debian:buster
 
     steps:
-    - uses: actions/checkout@v1
+    - uses: actions/checkout@v1  # Required to run inside docker
 
     - name: Install requirements
       run: |
         apt-get update
         apt-get install -y git make cmake g++ libeigen3-dev python3-dev python3-pip
-        pip3 install "pytest==3.1.*"
+        pip3 install "pytest==6.*"
 
     - name: Configure for install
       run: >
@@ -649,33 +685,32 @@ jobs:
         -DPYTHON_EXECUTABLE=$(python3 -c "import sys; print(sys.executable)")
       working-directory: /build-tests
 
-    - name: Run tests
+    - name: Python tests
       run: make pytest -j 2
       working-directory: /build-tests
 
 
   # This verifies that the documentation is not horribly broken, and does a
-  # basic sanity check on the SDist.
+  # basic validation check on the SDist.
   doxygen:
     name: "Documentation build test"
     runs-on: ubuntu-latest
 
     steps:
-    - uses: actions/checkout@v2
+    - uses: actions/checkout@v3
 
-    - uses: actions/setup-python@v2
+    - uses: actions/setup-python@v4
+      with:
+        python-version: "3.x"
 
     - name: Install Doxygen
       run: sudo apt-get install -y doxygen librsvg2-bin # Changed to rsvg-convert in 20.04
 
-    - name: Install docs & setup requirements
-      run: python3 -m pip install -r docs/requirements.txt
-
     - name: Build docs
-      run: python3 -m sphinx -W -b html docs docs/.build
+      run: pipx run nox -s docs
 
     - name: Make SDist
-      run: python3 setup.py sdist
+      run: pipx run nox -s build -- --sdist
 
     - run: git status --ignored
 
@@ -687,7 +722,7 @@ jobs:
     - name: Compare Dists (headers only)
       working-directory: include
       run: |
-        python3 -m pip install --user -U ../dist/*
+        python3 -m pip install --user -U ../dist/*.tar.gz
         installed=$(python3 -c "import pybind11; print(pybind11.get_include() + '/pybind11')")
         diff -rq $installed ./pybind11
 
@@ -696,42 +731,43 @@ jobs:
       fail-fast: false
       matrix:
         python:
-        - 3.5
         - 3.6
         - 3.7
         - 3.8
         - 3.9
-        - pypy3
-        # TODO: fix hang on pypy2
 
         include:
           - python: 3.9
-            args: -DCMAKE_CXX_STANDARD=20 -DDOWNLOAD_EIGEN=OFF
+            args: -DCMAKE_CXX_STANDARD=20
           - python: 3.8
             args: -DCMAKE_CXX_STANDARD=17
+          - python: 3.7
+            args: -DCMAKE_CXX_STANDARD=14
+
 
     name: "🐍 ${{ matrix.python }} • MSVC 2019 • x86 ${{ matrix.args }}"
-    runs-on: windows-latest
+    runs-on: windows-2019
 
     steps:
-    - uses: actions/checkout@v2
+    - uses: actions/checkout@v3
 
     - name: Setup Python ${{ matrix.python }}
-      uses: actions/setup-python@v2
+      uses: actions/setup-python@v4
       with:
         python-version: ${{ matrix.python }}
         architecture: x86
 
     - name: Update CMake
-      uses: jwlawson/actions-setup-cmake@v1.7
+      uses: jwlawson/actions-setup-cmake@v1.13
 
     - name: Prepare MSVC
-      uses: ilammy/msvc-dev-cmd@v1
+      uses: ilammy/msvc-dev-cmd@v1.12.0
       with:
         arch: x86
 
     - name: Prepare env
-      run: python -m pip install -r tests/requirements.txt --prefer-binary
+      run: |
+        python -m pip install -r tests/requirements.txt
 
     # First build - C++11 mode and inplace
     - name: Configure ${{ matrix.args }}
@@ -745,102 +781,324 @@ jobs:
     - name: Build C++11
       run: cmake --build build -j 2
 
-    - name: Run tests
+    - name: Python tests
       run: cmake --build build -t pytest
 
-  win32-msvc2015:
-    name: "🐍 ${{ matrix.python }} • MSVC 2015 • x64"
-    runs-on: windows-latest
+  win32-debug:
     strategy:
       fail-fast: false
       matrix:
         python:
-          - 2.7
-          - 3.6
-          - 3.7
-          # todo: check/cpptest does not support 3.8+ yet
-
-    steps:
-    - uses: actions/checkout@v2
-
-    - name: Setup 🐍 ${{ matrix.python }}
-      uses: actions/setup-python@v2
-      with:
-        python-version: ${{ matrix.python }}
-
-    - name: Update CMake
-      uses: jwlawson/actions-setup-cmake@v1.7
-
-    - name: Prepare MSVC
-      uses: ilammy/msvc-dev-cmd@v1
-      with:
-        toolset: 14.0
-
-    - name: Prepare env
-      run: python -m pip install -r tests/requirements.txt --prefer-binary
-
-    # First build - C++11 mode and inplace
-    - name: Configure
-      run: >
-        cmake -S . -B build
-        -G "Visual Studio 14 2015" -A x64
-        -DPYBIND11_WERROR=ON
-        -DDOWNLOAD_CATCH=ON
-        -DDOWNLOAD_EIGEN=ON
-
-    - name: Build C++14
-      run: cmake --build build -j 2
-
-    - name: Run all checks
-      run: cmake --build build -t check
-
-
-  win32-msvc2017:
-    name: "🐍 ${{ matrix.python }} • MSVC 2017 • x64"
-    runs-on: windows-2016
-    strategy:
-      fail-fast: false
-      matrix:
-        python:
-          - 2.7
-          - 3.5
-          - 3.7
-        std:
-          - 14
+        - 3.8
+        - 3.9
 
         include:
-          - python: 2.7
-            std: 17
-            args: >
-              -DCMAKE_CXX_FLAGS="/permissive- /EHsc /GR"
+          - python: 3.9
+            args: -DCMAKE_CXX_STANDARD=20
+          - python: 3.8
+            args: -DCMAKE_CXX_STANDARD=17
+
+    name: "🐍 ${{ matrix.python }} • MSVC 2019 (Debug) • x86 ${{ matrix.args }}"
+    runs-on: windows-2019
 
     steps:
-    - uses: actions/checkout@v2
+    - uses: actions/checkout@v3
 
-    - name: Setup 🐍 ${{ matrix.python }}
-      uses: actions/setup-python@v2
+    - name: Setup Python ${{ matrix.python }}
+      uses: actions/setup-python@v4
       with:
         python-version: ${{ matrix.python }}
+        architecture: x86
 
     - name: Update CMake
-      uses: jwlawson/actions-setup-cmake@v1.7
+      uses: jwlawson/actions-setup-cmake@v1.13
+
+    - name: Prepare MSVC
+      uses: ilammy/msvc-dev-cmd@v1.12.0
+      with:
+        arch: x86
 
     - name: Prepare env
-      run: python -m pip install -r tests/requirements.txt --prefer-binary
+      run: |
+        python -m pip install -r tests/requirements.txt
 
     # First build - C++11 mode and inplace
-    - name: Configure
+    - name: Configure ${{ matrix.args }}
       run: >
         cmake -S . -B build
-        -G "Visual Studio 15 2017" -A x64
+        -G "Visual Studio 16 2019" -A Win32
+        -DCMAKE_BUILD_TYPE=Debug
         -DPYBIND11_WERROR=ON
         -DDOWNLOAD_CATCH=ON
         -DDOWNLOAD_EIGEN=ON
-        -DCMAKE_CXX_STANDARD=${{ matrix.std }}
         ${{ matrix.args }}
+    - name: Build C++11
+      run: cmake --build build --config Debug -j 2
 
-    - name: Build ${{ matrix.std }}
+    - name: Python tests
+      run: cmake --build build --config Debug -t pytest
+
+
+  windows-2022:
+    strategy:
+      fail-fast: false
+      matrix:
+        python:
+        - 3.9
+
+    name: "🐍 ${{ matrix.python }} • MSVC 2022 C++20 • x64"
+    runs-on: windows-2022
+
+    steps:
+    - uses: actions/checkout@v3
+
+    - name: Setup Python ${{ matrix.python }}
+      uses: actions/setup-python@v4
+      with:
+        python-version: ${{ matrix.python }}
+
+    - name: Prepare env
+      run: |
+        python3 -m pip install -r tests/requirements.txt
+
+    - name: Update CMake
+      uses: jwlawson/actions-setup-cmake@v1.13
+
+    - name: Configure C++20
+      run: >
+        cmake -S . -B build
+        -DPYBIND11_WERROR=ON
+        -DDOWNLOAD_CATCH=ON
+        -DDOWNLOAD_EIGEN=ON
+        -DCMAKE_CXX_STANDARD=20
+
+    - name: Build C++20
       run: cmake --build build -j 2
 
-    - name: Run all checks
-      run: cmake --build build -t check
+    - name: Python tests
+      run: cmake --build build --target pytest
+
+    - name: C++20 tests
+      run: cmake --build build --target cpptest -j 2
+
+    - name: Interface test C++20
+      run: cmake --build build --target test_cmake_build
+
+  mingw:
+    name: "🐍 3 • windows-latest • ${{ matrix.sys }}"
+    runs-on: windows-latest
+    defaults:
+      run:
+        shell: msys2 {0}
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - { sys: mingw64, env: x86_64 }
+          - { sys: mingw32, env: i686 }
+    steps:
+    - uses: msys2/setup-msys2@v2
+      with:
+        msystem: ${{matrix.sys}}
+        install: >-
+          git
+          mingw-w64-${{matrix.env}}-gcc
+          mingw-w64-${{matrix.env}}-python-pip
+          mingw-w64-${{matrix.env}}-python-numpy
+          mingw-w64-${{matrix.env}}-python-scipy
+          mingw-w64-${{matrix.env}}-cmake
+          mingw-w64-${{matrix.env}}-make
+          mingw-w64-${{matrix.env}}-python-pytest
+          mingw-w64-${{matrix.env}}-eigen3
+          mingw-w64-${{matrix.env}}-boost
+          mingw-w64-${{matrix.env}}-catch
+
+    - uses: actions/checkout@v3
+
+    - name: Configure C++11
+      # LTO leads to many undefined reference like
+      # `pybind11::detail::function_call::function_call(pybind11::detail::function_call&&)
+      run: cmake -G "MinGW Makefiles" -DCMAKE_CXX_STANDARD=11 -DPYBIND11_WERROR=ON -DDOWNLOAD_CATCH=ON -S . -B build
+
+    - name: Build C++11
+      run: cmake --build build -j 2
+
+    - name: Python tests C++11
+      run: cmake --build build --target pytest -j 2
+
+    - name: C++11 tests
+      run: PYTHONHOME=/${{matrix.sys}} PYTHONPATH=/${{matrix.sys}} cmake --build build --target cpptest -j 2
+
+    - name: Interface test C++11
+      run: PYTHONHOME=/${{matrix.sys}} PYTHONPATH=/${{matrix.sys}} cmake --build build --target test_cmake_build
+
+    - name: Clean directory
+      run: git clean -fdx
+
+    - name: Configure C++14
+      run: cmake -G "MinGW Makefiles" -DCMAKE_CXX_STANDARD=14 -DPYBIND11_WERROR=ON -DDOWNLOAD_CATCH=ON -S . -B build2
+
+    - name: Build C++14
+      run: cmake --build build2 -j 2
+
+    - name: Python tests C++14
+      run: cmake --build build2 --target pytest -j 2
+
+    - name: C++14 tests
+      run: PYTHONHOME=/${{matrix.sys}} PYTHONPATH=/${{matrix.sys}} cmake --build build2 --target cpptest -j 2
+
+    - name: Interface test C++14
+      run: PYTHONHOME=/${{matrix.sys}} PYTHONPATH=/${{matrix.sys}} cmake --build build2 --target test_cmake_build
+
+    - name: Clean directory
+      run: git clean -fdx
+
+    - name: Configure C++17
+      run: cmake -G "MinGW Makefiles" -DCMAKE_CXX_STANDARD=17 -DPYBIND11_WERROR=ON -DDOWNLOAD_CATCH=ON -S . -B build3
+
+    - name: Build C++17
+      run: cmake --build build3 -j 2
+
+    - name: Python tests C++17
+      run: cmake --build build3 --target pytest -j 2
+
+    - name: C++17 tests
+      run: PYTHONHOME=/${{matrix.sys}} PYTHONPATH=/${{matrix.sys}} cmake --build build3 --target cpptest -j 2
+
+    - name: Interface test C++17
+      run: PYTHONHOME=/${{matrix.sys}} PYTHONPATH=/${{matrix.sys}} cmake --build build3 --target test_cmake_build
+
+  windows_clang:
+
+    strategy:
+      matrix:
+        os: [windows-latest]
+        python: ['3.10']
+
+    runs-on: "${{ matrix.os }}"
+
+    name: "🐍 ${{ matrix.python }} • ${{ matrix.os }} • clang-latest"
+
+    steps:
+      - name: Show env
+        run: env
+
+      - name: Checkout
+        uses: actions/checkout@v3
+
+      - name: Set up Clang
+        uses: egor-tensin/setup-clang@v1
+
+      - name: Setup Python ${{ matrix.python }}
+        uses: actions/setup-python@v4
+        with:
+          python-version: ${{ matrix.python }}
+
+      - name: Update CMake
+        uses: jwlawson/actions-setup-cmake@v1.13
+
+      - name: Install ninja-build tool
+        uses: seanmiddleditch/gha-setup-ninja@v3
+
+      - name: Run pip installs
+        run: |
+          python -m pip install --upgrade pip
+          python -m pip install -r tests/requirements.txt
+
+      - name: Show Clang++ version
+        run: clang++ --version
+
+      - name: Show CMake version
+        run: cmake --version
+
+      # TODO: WERROR=ON
+      - name: Configure Clang
+        run: >
+          cmake -G Ninja -S . -B .
+          -DPYBIND11_WERROR=OFF
+          -DPYBIND11_SIMPLE_GIL_MANAGEMENT=OFF
+          -DDOWNLOAD_CATCH=ON
+          -DDOWNLOAD_EIGEN=ON
+          -DCMAKE_CXX_COMPILER=clang++
+          -DCMAKE_CXX_STANDARD=17
+
+      - name: Build
+        run: cmake --build . -j 2
+
+      - name: Python tests
+        run: cmake --build . --target pytest -j 2
+
+      - name: C++ tests
+        run: cmake --build . --target cpptest -j 2
+
+      - name: Interface test
+        run: cmake --build . --target test_cmake_build -j 2
+
+      - name: Clean directory
+        run: git clean -fdx
+
+  macos_brew_install_llvm:
+    name: "macos-latest • brew install llvm"
+    runs-on: macos-latest
+
+    env:
+      # https://apple.stackexchange.com/questions/227026/how-to-install-recent-clang-with-homebrew
+      LDFLAGS: '-L/usr/local/opt/llvm/lib -Wl,-rpath,/usr/local/opt/llvm/lib'
+
+    steps:
+      - name: Update PATH
+        run: echo "/usr/local/opt/llvm/bin" >> $GITHUB_PATH
+
+      - name: Show env
+        run: env
+
+      - name: Checkout
+        uses: actions/checkout@v3
+
+      - name: Show Clang++ version before brew install llvm
+        run: clang++ --version
+
+      - name: brew install llvm
+        run: brew install llvm
+
+      - name: Show Clang++ version after brew install llvm
+        run: clang++ --version
+
+      - name: Update CMake
+        uses: jwlawson/actions-setup-cmake@v1.13
+
+      - name: Run pip installs
+        run: |
+          python3 -m pip install --upgrade pip
+          python3 -m pip install -r tests/requirements.txt
+          python3 -m pip install numpy
+          python3 -m pip install scipy
+
+      - name: Show CMake version
+        run: cmake --version
+
+      - name: CMake Configure
+        run: >
+          cmake -S . -B .
+          -DPYBIND11_WERROR=ON
+          -DPYBIND11_SIMPLE_GIL_MANAGEMENT=OFF
+          -DDOWNLOAD_CATCH=ON
+          -DDOWNLOAD_EIGEN=ON
+          -DCMAKE_CXX_COMPILER=clang++
+          -DCMAKE_CXX_STANDARD=17
+          -DPYTHON_EXECUTABLE=$(python3 -c "import sys; print(sys.executable)")
+
+      - name: Build
+        run: cmake --build . -j 2
+
+      - name: Python tests
+        run: cmake --build . --target pytest -j 2
+
+      - name: C++ tests
+        run: cmake --build . --target cpptest -j 2
+
+      - name: Interface test
+        run: cmake --build . --target test_cmake_build -j 2
+
+      - name: Clean directory
+        run: git clean -fdx
diff --git a/ext/pybind11/.github/workflows/configure.yml b/ext/pybind11/.github/workflows/configure.yml
index 578dba630e..29b041168e 100644
--- a/ext/pybind11/.github/workflows/configure.yml
+++ b/ext/pybind11/.github/workflows/configure.yml
@@ -9,6 +9,10 @@ on:
       - stable
       - v*
 
+env:
+  # For cmake:
+  VERBOSE: 1
+
 jobs:
   # This tests various versions of CMake in various combinations, to make sure
   # the configure step passes.
@@ -16,12 +20,12 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        runs-on: [ubuntu-latest, macos-latest, windows-latest]
+        runs-on: [ubuntu-20.04, macos-latest, windows-latest]
         arch: [x64]
-        cmake: [3.18]
+        cmake: ["3.23"]
 
         include:
-        - runs-on: ubuntu-latest
+        - runs-on: ubuntu-20.04
           arch: x64
           cmake: 3.4
 
@@ -29,22 +33,18 @@ jobs:
           arch: x64
           cmake: 3.7
 
-        - runs-on: windows-2016
-          arch: x86
-          cmake: 3.8
-
-        - runs-on: windows-2016
-          arch: x86
+        - runs-on: windows-2019
+          arch: x64 # x86 compilers seem to be missing on 2019 image
           cmake: 3.18
 
     name: 🐍 3.7 • CMake ${{ matrix.cmake }} • ${{ matrix.runs-on }}
     runs-on: ${{ matrix.runs-on }}
 
     steps:
-    - uses: actions/checkout@v2
+    - uses: actions/checkout@v3
 
     - name: Setup Python 3.7
-      uses: actions/setup-python@v2
+      uses: actions/setup-python@v4
       with:
         python-version: 3.7
         architecture: ${{ matrix.arch }}
@@ -55,7 +55,7 @@ jobs:
     # An action for adding a specific version of CMake:
     #   https://github.com/jwlawson/actions-setup-cmake
     - name: Setup CMake ${{ matrix.cmake }}
-      uses: jwlawson/actions-setup-cmake@v1.7
+      uses: jwlawson/actions-setup-cmake@v1.13
       with:
         cmake-version: ${{ matrix.cmake }}
 
diff --git a/ext/pybind11/.github/workflows/format.yml b/ext/pybind11/.github/workflows/format.yml
index 5cebed17da..b18474bc3d 100644
--- a/ext/pybind11/.github/workflows/format.yml
+++ b/ext/pybind11/.github/workflows/format.yml
@@ -12,24 +12,35 @@ on:
     - stable
     - "v*"
 
+env:
+  FORCE_COLOR: 3
+  # For cmake:
+  VERBOSE: 1
+
 jobs:
   pre-commit:
     name: Format
     runs-on: ubuntu-latest
     steps:
-    - uses: actions/checkout@v2
-    - uses: actions/setup-python@v2
-    - uses: pre-commit/action@v2.0.0
+    - uses: actions/checkout@v3
+    - uses: actions/setup-python@v4
+      with:
+        python-version: "3.x"
+    - name: Add matchers
+      run: echo "::add-matcher::$GITHUB_WORKSPACE/.github/matchers/pylint.json"
+    - uses: pre-commit/action@v3.0.0
       with:
         # Slow hooks are marked with manual - slow is okay here, run them too
         extra_args: --hook-stage manual --all-files
 
   clang-tidy:
+    # When making changes here, please also review the "Clang-Tidy" section
+    # in .github/CONTRIBUTING.md and update as needed.
     name: Clang-Tidy
     runs-on: ubuntu-latest
-    container: silkeh/clang:10
+    container: silkeh/clang:13
     steps:
-    - uses: actions/checkout@v2
+    - uses: actions/checkout@v3
 
     - name: Install requirements
       run: apt-get update && apt-get install -y python3-dev python3-pytest
@@ -37,10 +48,10 @@ jobs:
     - name: Configure
       run: >
         cmake -S . -B build
-        -DCMAKE_CXX_CLANG_TIDY="$(which clang-tidy);--warnings-as-errors=*"
+        -DCMAKE_CXX_CLANG_TIDY="$(which clang-tidy);--use-color;--warnings-as-errors=*"
         -DDOWNLOAD_EIGEN=ON
         -DDOWNLOAD_CATCH=ON
         -DCMAKE_CXX_STANDARD=17
 
     - name: Build
-      run: cmake --build build -j 2
+      run: cmake --build build -j 2 -- --keep-going
diff --git a/ext/pybind11/.github/workflows/labeler.yml b/ext/pybind11/.github/workflows/labeler.yml
index d2b5979681..165a2fd87b 100644
--- a/ext/pybind11/.github/workflows/labeler.yml
+++ b/ext/pybind11/.github/workflows/labeler.yml
@@ -10,7 +10,11 @@ jobs:
     steps:
 
     - uses: actions/labeler@main
-      if: github.event.pull_request.merged == true
+      if: >
+        github.event.pull_request.merged == true &&
+        !startsWith(github.event.pull_request.title, 'chore(deps):') &&
+        !startsWith(github.event.pull_request.title, 'ci(fix):') &&
+        !startsWith(github.event.pull_request.title, 'docs(changelog):')
       with:
         repo-token: ${{ secrets.GITHUB_TOKEN }}
         configuration-path: .github/labeler_merged.yml
diff --git a/ext/pybind11/.github/workflows/pip.yml b/ext/pybind11/.github/workflows/pip.yml
index 4414a12ee4..7c6fc67a3e 100644
--- a/ext/pybind11/.github/workflows/pip.yml
+++ b/ext/pybind11/.github/workflows/pip.yml
@@ -12,24 +12,28 @@ on:
     types:
     - published
 
+env:
+  PIP_ONLY_BINARY: numpy
+
 jobs:
   # This builds the sdists and wheels and makes sure the files are exactly as
-  # expected. Using Windows and Python 2.7, since that is often the most
+  # expected. Using Windows and Python 3.6, since that is often the most
   # challenging matrix element.
   test-packaging:
-    name: 🐍 2.7 • 📦 tests • windows-latest
+    name: 🐍 3.6 • 📦 tests • windows-latest
     runs-on: windows-latest
 
     steps:
-    - uses: actions/checkout@v2
+    - uses: actions/checkout@v3
 
-    - name: Setup 🐍 2.7
-      uses: actions/setup-python@v2
+    - name: Setup 🐍 3.6
+      uses: actions/setup-python@v4
       with:
-        python-version: 2.7
+        python-version: 3.6
 
     - name: Prepare env
-      run: python -m pip install -r tests/requirements.txt --prefer-binary
+      run: |
+        python -m pip install -r tests/requirements.txt
 
     - name: Python Packaging tests
       run: pytest tests/extra_python_package/
@@ -42,15 +46,16 @@ jobs:
     runs-on: ubuntu-latest
 
     steps:
-    - uses: actions/checkout@v2
+    - uses: actions/checkout@v3
 
     - name: Setup 🐍 3.8
-      uses: actions/setup-python@v2
+      uses: actions/setup-python@v4
       with:
         python-version: 3.8
 
     - name: Prepare env
-      run: python -m pip install -r tests/requirements.txt build twine --prefer-binary
+      run: |
+        python -m pip install -r tests/requirements.txt build twine
 
     - name: Python Packaging tests
       run: pytest tests/extra_python_package/
@@ -64,13 +69,13 @@ jobs:
       run: twine check dist/*
 
     - name: Save standard package
-      uses: actions/upload-artifact@v2
+      uses: actions/upload-artifact@v3
       with:
         name: standard
         path: dist/pybind11-*
 
     - name: Save global package
-      uses: actions/upload-artifact@v2
+      uses: actions/upload-artifact@v3
       with:
         name: global
         path: dist/pybind11_global-*
@@ -85,19 +90,21 @@ jobs:
     needs: [packaging]
 
     steps:
-    - uses: actions/setup-python@v2
+    - uses: actions/setup-python@v4
+      with:
+        python-version: "3.x"
 
     # Downloads all to directories matching the artifact names
-    - uses: actions/download-artifact@v2
+    - uses: actions/download-artifact@v3
 
     - name: Publish standard package
-      uses: pypa/gh-action-pypi-publish@v1.4.1
+      uses: pypa/gh-action-pypi-publish@v1.6.4
       with:
         password: ${{ secrets.pypi_password }}
         packages_dir: standard/
 
     - name: Publish global package
-      uses: pypa/gh-action-pypi-publish@v1.4.1
+      uses: pypa/gh-action-pypi-publish@v1.6.4
       with:
         password: ${{ secrets.pypi_password_global }}
         packages_dir: global/
diff --git a/ext/pybind11/.github/workflows/upstream.yml b/ext/pybind11/.github/workflows/upstream.yml
new file mode 100644
index 0000000000..a15861ee47
--- /dev/null
+++ b/ext/pybind11/.github/workflows/upstream.yml
@@ -0,0 +1,114 @@
+
+name: Upstream
+
+on:
+  workflow_dispatch:
+  pull_request:
+
+concurrency:
+  group: upstream-${{ github.ref }}
+  cancel-in-progress: true
+
+env:
+  PIP_ONLY_BINARY: numpy
+  # For cmake:
+  VERBOSE: 1
+
+jobs:
+  standard:
+    name: "🐍 3.11 latest internals • ubuntu-latest • x64"
+    runs-on: ubuntu-latest
+    if: "contains(github.event.pull_request.labels.*.name, 'python dev')"
+
+    steps:
+    - uses: actions/checkout@v3
+
+    - name: Setup Python 3.11
+      uses: actions/setup-python@v4
+      with:
+        python-version: "3.11-dev"
+
+    - name: Setup Boost (Linux)
+      if: runner.os == 'Linux'
+      run: sudo apt-get install libboost-dev
+
+    - name: Update CMake
+      uses: jwlawson/actions-setup-cmake@v1.13
+
+    - name: Prepare env
+      run: |
+        python -m pip install -r tests/requirements.txt
+
+    - name: Setup annotations on Linux
+      if: runner.os == 'Linux'
+      run: python -m pip install pytest-github-actions-annotate-failures
+
+    # First build - C++11 mode and inplace
+    - name: Configure C++11
+      run: >
+        cmake -S . -B .
+        -DPYBIND11_WERROR=ON
+        -DDOWNLOAD_CATCH=ON
+        -DDOWNLOAD_EIGEN=ON
+        -DCMAKE_CXX_STANDARD=11
+
+    - name: Build C++11
+      run: cmake --build . -j 2
+
+    - name: Python tests C++11
+      run: cmake --build . --target pytest -j 2
+
+    - name: C++11 tests
+      run: cmake --build .  --target cpptest -j 2
+
+    - name: Interface test C++11
+      run: cmake --build . --target test_cmake_build
+
+    - name: Clean directory
+      run: git clean -fdx
+
+    # Second build - C++17 mode and in a build directory
+    - name: Configure C++17
+      run: >
+        cmake -S . -B build2
+        -DPYBIND11_WERROR=ON
+        -DDOWNLOAD_CATCH=ON
+        -DDOWNLOAD_EIGEN=ON
+        -DCMAKE_CXX_STANDARD=17
+        ${{ matrix.args }}
+        ${{ matrix.args2 }}
+
+    - name: Build
+      run: cmake --build build2 -j 2
+
+    - name: Python tests
+      run: cmake --build build2 --target pytest
+
+    - name: C++ tests
+      run: cmake --build build2 --target cpptest
+
+    # Third build - C++17 mode with unstable ABI
+    - name: Configure (unstable ABI)
+      run: >
+        cmake -S . -B build3
+        -DPYBIND11_WERROR=ON
+        -DDOWNLOAD_CATCH=ON
+        -DDOWNLOAD_EIGEN=ON
+        -DCMAKE_CXX_STANDARD=17
+        -DPYBIND11_INTERNALS_VERSION=10000000
+        "-DPYBIND11_TEST_OVERRIDE=test_call_policies.cpp;test_gil_scoped.cpp;test_thread.cpp"
+        ${{ matrix.args }}
+
+    - name: Build (unstable ABI)
+      run: cmake --build build3 -j 2
+
+    - name: Python tests (unstable ABI)
+      run: cmake --build build3 --target pytest
+
+    - name: Interface test
+      run: cmake --build build3 --target test_cmake_build
+
+    # This makes sure the setup_helpers module can build packages using
+    # setuptools
+    - name: Setuptools helpers test
+      run: pytest tests/extra_setuptools
diff --git a/ext/pybind11/.gitignore b/ext/pybind11/.gitignore
index 3f36b89e0c..43d5094c96 100644
--- a/ext/pybind11/.gitignore
+++ b/ext/pybind11/.gitignore
@@ -41,3 +41,6 @@ pybind11Targets.cmake
 /.vscode
 /pybind11/include/*
 /pybind11/share/*
+/docs/_build/*
+.ipynb_checkpoints/
+tests/main.cpp
diff --git a/ext/pybind11/.pre-commit-config.yaml b/ext/pybind11/.pre-commit-config.yaml
index 6781ac4f11..d625d5726b 100644
--- a/ext/pybind11/.pre-commit-config.yaml
+++ b/ext/pybind11/.pre-commit-config.yaml
@@ -12,49 +12,118 @@
 #
 # See https://github.com/pre-commit/pre-commit
 
+
+ci:
+  autoupdate_commit_msg: "chore(deps): update pre-commit hooks"
+  autofix_commit_msg: "style: pre-commit fixes"
+  autoupdate_schedule: monthly
+
+# third-party content
+exclude: ^tools/JoinPaths.cmake$
+
 repos:
 # Standard hooks
 - repo: https://github.com/pre-commit/pre-commit-hooks
-  rev: v3.4.0
+  rev: "v4.4.0"
   hooks:
   - id: check-added-large-files
   - id: check-case-conflict
+  - id: check-docstring-first
   - id: check-merge-conflict
   - id: check-symlinks
+  - id: check-toml
   - id: check-yaml
   - id: debug-statements
   - id: end-of-file-fixer
   - id: mixed-line-ending
   - id: requirements-txt-fixer
   - id: trailing-whitespace
-  - id: fix-encoding-pragma
+
+# Upgrade old Python syntax
+- repo: https://github.com/asottile/pyupgrade
+  rev: "v3.3.1"
+  hooks:
+  - id: pyupgrade
+    args: [--py36-plus]
+
+# Nicely sort includes
+- repo: https://github.com/PyCQA/isort
+  rev: "5.11.4"
+  hooks:
+  - id: isort
 
 # Black, the code formatter, natively supports pre-commit
 - repo: https://github.com/psf/black
-  rev: 20.8b1
+  rev: "22.12.0" # Keep in sync with blacken-docs
   hooks:
   - id: black
-    # By default, this ignores pyi files, though black supports them
-    types: [text]
-    files: \.pyi?$
+
+# Also code format the docs
+- repo: https://github.com/asottile/blacken-docs
+  rev: "v1.12.1"
+  hooks:
+  - id: blacken-docs
+    additional_dependencies:
+    - black==22.10.0 # keep in sync with black hook
 
 # Changes tabs to spaces
 - repo: https://github.com/Lucas-C/pre-commit-hooks
-  rev: v1.1.9
+  rev: "v1.3.1"
   hooks:
   - id: remove-tabs
 
+- repo: https://github.com/sirosen/texthooks
+  rev: "0.4.0"
+  hooks:
+  - id: fix-ligatures
+  - id: fix-smartquotes
+
+# Autoremoves unused imports
+- repo: https://github.com/hadialqattan/pycln
+  rev: "v2.1.2"
+  hooks:
+  - id: pycln
+    stages: [manual]
+
+# Checking for common mistakes
+- repo: https://github.com/pre-commit/pygrep-hooks
+  rev: "v1.9.0"
+  hooks:
+  - id: python-check-blanket-noqa
+  - id: python-check-blanket-type-ignore
+  - id: python-no-log-warn
+  - id: python-use-type-annotations
+  - id: rst-backticks
+  - id: rst-directive-colons
+  - id: rst-inline-touching-normal
+
+# Automatically remove noqa that are not used
+- repo: https://github.com/asottile/yesqa
+  rev: "v1.4.0"
+  hooks:
+  - id: yesqa
+    additional_dependencies: &flake8_dependencies
+      - flake8-bugbear
+      - pep8-naming
+
 # Flake8 also supports pre-commit natively (same author)
-- repo: https://gitlab.com/pycqa/flake8
-  rev: 3.8.4
+- repo: https://github.com/PyCQA/flake8
+  rev: "6.0.0"
   hooks:
   - id: flake8
-    additional_dependencies: [flake8-bugbear, pep8-naming]
     exclude: ^(docs/.*|tools/.*)$
+    additional_dependencies: *flake8_dependencies
+
+# PyLint has native support - not always usable, but works for us
+- repo: https://github.com/PyCQA/pylint
+  rev: "v2.15.9"
+  hooks:
+  - id: pylint
+    files: ^pybind11
 
 # CMake formatting
 - repo: https://github.com/cheshirekow/cmake-format-precommit
-  rev: v0.6.13
+  rev: "v0.6.13"
   hooks:
   - id: cmake-format
     additional_dependencies: [pyyaml]
@@ -63,38 +132,50 @@ repos:
 
 # Check static types with mypy
 - repo: https://github.com/pre-commit/mirrors-mypy
-  rev: v0.800
+  rev: "v0.991"
   hooks:
   - id: mypy
-    # The default Python type ignores .pyi files, so let's rerun if detected
-    types: [text]
-    files: ^pybind11.*\.pyi?$
-    # Running per-file misbehaves a bit, so just run on all files, it's fast
-    pass_filenames: false
+    args: []
+    exclude: ^(tests|docs)/
+    additional_dependencies: [nox, rich]
 
 # Checks the manifest for missing files (native support)
 - repo: https://github.com/mgedmin/check-manifest
-  rev: "0.46"
+  rev: "0.49"
   hooks:
   - id: check-manifest
     # This is a slow hook, so only run this if --hook-stage manual is passed
     stages: [manual]
     additional_dependencies: [cmake, ninja]
 
-# The original pybind11 checks for a few C++ style items
+# Check for spelling
+# Use tools/codespell_ignore_lines_from_errors.py
+# to rebuild .codespell-ignore-lines
+- repo: https://github.com/codespell-project/codespell
+  rev: "v2.2.2"
+  hooks:
+  - id: codespell
+    exclude: ".supp$"
+    args: ["-x", ".codespell-ignore-lines"]
+
+# Check for common shell mistakes
+- repo: https://github.com/shellcheck-py/shellcheck-py
+  rev: "v0.9.0.2"
+  hooks:
+  - id: shellcheck
+
+# Disallow some common capitalization mistakes
 - repo: local
   hooks:
   - id: disallow-caps
     name: Disallow improper capitalization
     language: pygrep
-    entry: PyBind|Numpy|Cmake|CCache
-    exclude: .pre-commit-config.yaml
+    entry: PyBind|Numpy|Cmake|CCache|PyTest
+    exclude: ^\.pre-commit-config.yaml$
 
-- repo: local
+# Clang format the codebase automatically
+- repo: https://github.com/pre-commit/mirrors-clang-format
+  rev: "v15.0.6"
   hooks:
-  - id: check-style
-    name: Classic check-style
-    language: system
-    types:
-    - c++
-    entry: ./tools/check-style.sh
+  - id: clang-format
+    types_or: [c++, c, cuda]
diff --git a/ext/pybind11/CMakeLists.txt b/ext/pybind11/CMakeLists.txt
index 2e81869c3f..0d93203881 100644
--- a/ext/pybind11/CMakeLists.txt
+++ b/ext/pybind11/CMakeLists.txt
@@ -16,6 +16,11 @@ else()
   cmake_policy(VERSION 3.22)
 endif()
 
+# Avoid infinite recursion if tests include this as a subdirectory
+if(DEFINED PYBIND11_MASTER_PROJECT)
+  return()
+endif()
+
 # Extract project version from source
 file(STRINGS "${CMAKE_CURRENT_SOURCE_DIR}/include/pybind11/detail/common.h"
      pybind11_version_defines REGEX "#define PYBIND11_VERSION_(MAJOR|MINOR|PATCH) ")
@@ -45,13 +50,8 @@ if(NOT pybind11_FIND_QUIETLY)
   message(STATUS "pybind11 v${pybind11_VERSION} ${pybind11_VERSION_TYPE}")
 endif()
 
-# Avoid infinite recursion if tests include this as a subdirectory
-if(DEFINED PYBIND11_MASTER_PROJECT)
-  set(PYBIND11_TEST OFF)
-endif()
-
 # Check if pybind11 is being used directly or via add_subdirectory
-if(CMAKE_SOURCE_DIR STREQUAL PROJECT_SOURCE_DIR AND NOT DEFINED PYBIND11_MASTER_PROJECT)
+if(CMAKE_SOURCE_DIR STREQUAL PROJECT_SOURCE_DIR)
   ### Warn if not an out-of-source builds
   if(CMAKE_CURRENT_SOURCE_DIR STREQUAL CMAKE_CURRENT_BINARY_DIR)
     set(lines
@@ -80,6 +80,8 @@ if(CMAKE_SOURCE_DIR STREQUAL PROJECT_SOURCE_DIR AND NOT DEFINED PYBIND11_MASTER_
   endif()
 
   set(pybind11_system "")
+
+  set_property(GLOBAL PROPERTY USE_FOLDERS ON)
 else()
   set(PYBIND11_MASTER_PROJECT OFF)
   set(pybind11_system SYSTEM)
@@ -89,10 +91,16 @@ endif()
 option(PYBIND11_INSTALL "Install pybind11 header files?" ${PYBIND11_MASTER_PROJECT})
 option(PYBIND11_TEST "Build pybind11 test suite?" ${PYBIND11_MASTER_PROJECT})
 option(PYBIND11_NOPYTHON "Disable search for Python" OFF)
+option(PYBIND11_SIMPLE_GIL_MANAGEMENT
+       "Use simpler GIL management logic that does not support disassociation" OFF)
 set(PYBIND11_INTERNALS_VERSION
     ""
     CACHE STRING "Override the ABI version, may be used to enable the unstable ABI.")
 
+if(PYBIND11_SIMPLE_GIL_MANAGEMENT)
+  add_compile_definitions(PYBIND11_SIMPLE_GIL_MANAGEMENT)
+endif()
+
 cmake_dependent_option(
   USE_PYTHON_INCLUDE_DIR
   "Install pybind11 headers in Python include directory instead of default installation prefix"
@@ -118,6 +126,8 @@ set(PYBIND11_HEADERS
     include/pybind11/complex.h
     include/pybind11/options.h
     include/pybind11/eigen.h
+    include/pybind11/eigen/matrix.h
+    include/pybind11/eigen/tensor.h
     include/pybind11/embed.h
     include/pybind11/eval.h
     include/pybind11/gil.h
@@ -196,6 +206,9 @@ else()
 endif()
 
 include("${CMAKE_CURRENT_SOURCE_DIR}/tools/pybind11Common.cmake")
+# https://github.com/jtojnar/cmake-snips/#concatenating-paths-when-building-pkg-config-files
+# TODO: cmake 3.20 adds the cmake_path() function, which obsoletes this snippet
+include("${CMAKE_CURRENT_SOURCE_DIR}/tools/JoinPaths.cmake")
 
 # Relative directory setting
 if(USE_PYTHON_INCLUDE_DIR AND DEFINED Python_INCLUDE_DIRS)
@@ -260,6 +273,16 @@ if(PYBIND11_INSTALL)
     NAMESPACE "pybind11::"
     DESTINATION ${PYBIND11_CMAKECONFIG_INSTALL_DIR})
 
+  # pkg-config support
+  if(NOT prefix_for_pc_file)
+    set(prefix_for_pc_file "${CMAKE_INSTALL_PREFIX}")
+  endif()
+  join_paths(includedir_for_pc_file "\${prefix}" "${CMAKE_INSTALL_INCLUDEDIR}")
+  configure_file("${CMAKE_CURRENT_SOURCE_DIR}/tools/pybind11.pc.in"
+                 "${CMAKE_CURRENT_BINARY_DIR}/pybind11.pc" @ONLY)
+  install(FILES "${CMAKE_CURRENT_BINARY_DIR}/pybind11.pc"
+          DESTINATION "${CMAKE_INSTALL_DATAROOTDIR}/pkgconfig/")
+
   # Uninstall target
   if(PYBIND11_MASTER_PROJECT)
     configure_file("${CMAKE_CURRENT_SOURCE_DIR}/tools/cmake_uninstall.cmake.in"
diff --git a/ext/pybind11/MANIFEST.in b/ext/pybind11/MANIFEST.in
index aed183e874..033303a74a 100644
--- a/ext/pybind11/MANIFEST.in
+++ b/ext/pybind11/MANIFEST.in
@@ -1,6 +1,5 @@
 recursive-include pybind11/include/pybind11 *.h
 recursive-include pybind11 *.py
 recursive-include pybind11 py.typed
-recursive-include pybind11 *.pyi
 include pybind11/share/cmake/pybind11/*.cmake
 include LICENSE README.rst pyproject.toml setup.py setup.cfg
diff --git a/ext/pybind11/README.rst b/ext/pybind11/README.rst
index 7ce57b03ae..3c75edb575 100644
--- a/ext/pybind11/README.rst
+++ b/ext/pybind11/README.rst
@@ -32,9 +32,9 @@ this heavy machinery has become an excessively large and unnecessary
 dependency.
 
 Think of this library as a tiny self-contained version of Boost.Python
-with everything stripped away that isn’t relevant for binding
+with everything stripped away that isn't relevant for binding
 generation. Without comments, the core header files only require ~4K
-lines of code and depend on Python (2.7 or 3.5+, or PyPy) and the C++
+lines of code and depend on Python (3.6+, or PyPy) and the C++
 standard library. This compact implementation was possible thanks to
 some of the new C++11 language features (specifically: tuples, lambda
 functions and variadic templates). Since its creation, this library has
@@ -78,8 +78,8 @@ Goodies
 In addition to the core functionality, pybind11 provides some extra
 goodies:
 
-- Python 2.7, 3.5+, and PyPy/PyPy3 7.3 are supported with an
-  implementation-agnostic interface.
+- Python 3.6+, and PyPy3 7.3 are supported with an implementation-agnostic
+  interface (pybind11 2.9 was the last version to support Python 2 and 3.5).
 
 - It is possible to bind C++11 lambda functions with captured
   variables. The lambda capture data is stored inside the resulting
@@ -88,8 +88,8 @@ goodies:
 - pybind11 uses C++11 move constructors and move assignment operators
   whenever possible to efficiently transfer custom data types.
 
-- It’s easy to expose the internal storage of custom data types through
-  Pythons’ buffer protocols. This is handy e.g. for fast conversion
+- It's easy to expose the internal storage of custom data types through
+  Pythons' buffer protocols. This is handy e.g. for fast conversion
   between C++ matrix classes like Eigen and NumPy without expensive
   copy operations.
 
@@ -106,7 +106,7 @@ goodies:
 - Binaries are generally smaller by a factor of at least 2 compared to
   equivalent bindings generated by Boost.Python. A recent pybind11
   conversion of PyRosetta, an enormous Boost.Python binding project,
-  `reported <http://graylab.jhu.edu/RosettaCon2016/PyRosetta-4.pdf>`_
+  `reported <https://graylab.jhu.edu/Sergey/2016.RosettaCon/PyRosetta-4.pdf>`_
   a binary size reduction of **5.4x** and compile time reduction by
   **5.8x**.
 
@@ -119,10 +119,10 @@ goodies:
 Supported compilers
 -------------------
 
-1. Clang/LLVM 3.3 or newer (for Apple Xcode’s clang, this is 5.0.0 or
+1. Clang/LLVM 3.3 or newer (for Apple Xcode's clang, this is 5.0.0 or
    newer)
 2. GCC 4.8 or newer
-3. Microsoft Visual Studio 2015 Update 3 or newer
+3. Microsoft Visual Studio 2017 or newer
 4. Intel classic C++ compiler 18 or newer (ICC 20.2 tested in CI)
 5. Cygwin/GCC (previously tested on 2.5.1)
 6. NVCC (CUDA 11.0 tested in CI)
diff --git a/ext/pybind11/docs/Doxyfile b/ext/pybind11/docs/Doxyfile
index c8562952ef..09138db364 100644
--- a/ext/pybind11/docs/Doxyfile
+++ b/ext/pybind11/docs/Doxyfile
@@ -18,6 +18,4 @@ ALIASES               += "endrst=\endverbatim"
 QUIET                  = YES
 WARNINGS               = YES
 WARN_IF_UNDOCUMENTED   = NO
-PREDEFINED             = DOXYGEN_SHOULD_SKIP_THIS \
-                         PY_MAJOR_VERSION=3 \
-                         PYBIND11_NOINLINE
+PREDEFINED             = PYBIND11_NOINLINE
diff --git a/ext/pybind11/docs/_static/css/custom.css b/ext/pybind11/docs/_static/css/custom.css
new file mode 100644
index 0000000000..7a49a6ac4f
--- /dev/null
+++ b/ext/pybind11/docs/_static/css/custom.css
@@ -0,0 +1,3 @@
+.highlight .go {
+  color: #707070;
+}
diff --git a/ext/pybind11/docs/_static/theme_overrides.css b/ext/pybind11/docs/_static/theme_overrides.css
deleted file mode 100644
index 1071809fa0..0000000000
--- a/ext/pybind11/docs/_static/theme_overrides.css
+++ /dev/null
@@ -1,11 +0,0 @@
-.wy-table-responsive table td,
-.wy-table-responsive table th {
-    white-space: initial !important;
-}
-.rst-content table.docutils td {
-    vertical-align: top !important;
-}
-div[class^='highlight'] pre {
-    white-space: pre;
-    white-space: pre-wrap;
-}
diff --git a/ext/pybind11/docs/advanced/cast/custom.rst b/ext/pybind11/docs/advanced/cast/custom.rst
index 19b9353476..8138cac619 100644
--- a/ext/pybind11/docs/advanced/cast/custom.rst
+++ b/ext/pybind11/docs/advanced/cast/custom.rst
@@ -38,7 +38,7 @@ type is explicitly allowed.
 
 .. code-block:: cpp
 
-    namespace pybind11 { namespace detail {
+    namespace PYBIND11_NAMESPACE { namespace detail {
         template <> struct type_caster<inty> {
         public:
             /**
@@ -46,7 +46,7 @@ type is explicitly allowed.
              * function signatures and declares a local variable
              * 'value' of type inty
              */
-            PYBIND11_TYPE_CASTER(inty, _("inty"));
+            PYBIND11_TYPE_CASTER(inty, const_name("inty"));
 
             /**
              * Conversion part 1 (Python->C++): convert a PyObject into a inty
@@ -78,7 +78,7 @@ type is explicitly allowed.
                 return PyLong_FromLong(src.long_value);
             }
         };
-    }} // namespace pybind11::detail
+    }} // namespace PYBIND11_NAMESPACE::detail
 
 .. note::
 
diff --git a/ext/pybind11/docs/advanced/cast/eigen.rst b/ext/pybind11/docs/advanced/cast/eigen.rst
index 80f1013430..a5c11a3f14 100644
--- a/ext/pybind11/docs/advanced/cast/eigen.rst
+++ b/ext/pybind11/docs/advanced/cast/eigen.rst
@@ -52,7 +52,7 @@ can be mapped *and* if the numpy array is writeable (that is
 the passed variable will be transparently carried out directly on the
 ``numpy.ndarray``.
 
-This means you can can write code such as the following and have it work as
+This means you can write code such as the following and have it work as
 expected:
 
 .. code-block:: cpp
diff --git a/ext/pybind11/docs/advanced/cast/overview.rst b/ext/pybind11/docs/advanced/cast/overview.rst
index 6341fce6d4..011bd4c7a3 100644
--- a/ext/pybind11/docs/advanced/cast/overview.rst
+++ b/ext/pybind11/docs/advanced/cast/overview.rst
@@ -75,97 +75,96 @@ The following basic data types are supported out of the box (some may require
 an additional extension header to be included). To pass other data structures
 as arguments and return values, refer to the section on binding :ref:`classes`.
 
-+------------------------------------+---------------------------+-------------------------------+
-|  Data type                         |  Description              | Header file                   |
-+====================================+===========================+===============================+
-| ``int8_t``, ``uint8_t``            | 8-bit integers            | :file:`pybind11/pybind11.h`   |
-+------------------------------------+---------------------------+-------------------------------+
-| ``int16_t``, ``uint16_t``          | 16-bit integers           | :file:`pybind11/pybind11.h`   |
-+------------------------------------+---------------------------+-------------------------------+
-| ``int32_t``, ``uint32_t``          | 32-bit integers           | :file:`pybind11/pybind11.h`   |
-+------------------------------------+---------------------------+-------------------------------+
-| ``int64_t``, ``uint64_t``          | 64-bit integers           | :file:`pybind11/pybind11.h`   |
-+------------------------------------+---------------------------+-------------------------------+
-| ``ssize_t``, ``size_t``            | Platform-dependent size   | :file:`pybind11/pybind11.h`   |
-+------------------------------------+---------------------------+-------------------------------+
-| ``float``, ``double``              | Floating point types      | :file:`pybind11/pybind11.h`   |
-+------------------------------------+---------------------------+-------------------------------+
-| ``bool``                           | Two-state Boolean type    | :file:`pybind11/pybind11.h`   |
-+------------------------------------+---------------------------+-------------------------------+
-| ``char``                           | Character literal         | :file:`pybind11/pybind11.h`   |
-+------------------------------------+---------------------------+-------------------------------+
-| ``char16_t``                       | UTF-16 character literal  | :file:`pybind11/pybind11.h`   |
-+------------------------------------+---------------------------+-------------------------------+
-| ``char32_t``                       | UTF-32 character literal  | :file:`pybind11/pybind11.h`   |
-+------------------------------------+---------------------------+-------------------------------+
-| ``wchar_t``                        | Wide character literal    | :file:`pybind11/pybind11.h`   |
-+------------------------------------+---------------------------+-------------------------------+
-| ``const char *``                   | UTF-8 string literal      | :file:`pybind11/pybind11.h`   |
-+------------------------------------+---------------------------+-------------------------------+
-| ``const char16_t *``               | UTF-16 string literal     | :file:`pybind11/pybind11.h`   |
-+------------------------------------+---------------------------+-------------------------------+
-| ``const char32_t *``               | UTF-32 string literal     | :file:`pybind11/pybind11.h`   |
-+------------------------------------+---------------------------+-------------------------------+
-| ``const wchar_t *``                | Wide string literal       | :file:`pybind11/pybind11.h`   |
-+------------------------------------+---------------------------+-------------------------------+
-| ``std::string``                    | STL dynamic UTF-8 string  | :file:`pybind11/pybind11.h`   |
-+------------------------------------+---------------------------+-------------------------------+
-| ``std::u16string``                 | STL dynamic UTF-16 string | :file:`pybind11/pybind11.h`   |
-+------------------------------------+---------------------------+-------------------------------+
-| ``std::u32string``                 | STL dynamic UTF-32 string | :file:`pybind11/pybind11.h`   |
-+------------------------------------+---------------------------+-------------------------------+
-| ``std::wstring``                   | STL dynamic wide string   | :file:`pybind11/pybind11.h`   |
-+------------------------------------+---------------------------+-------------------------------+
-| ``std::string_view``,              | STL C++17 string views    | :file:`pybind11/pybind11.h`   |
-| ``std::u16string_view``, etc.      |                           |                               |
-+------------------------------------+---------------------------+-------------------------------+
-| ``std::pair<T1, T2>``              | Pair of two custom types  | :file:`pybind11/pybind11.h`   |
-+------------------------------------+---------------------------+-------------------------------+
-| ``std::tuple<...>``                | Arbitrary tuple of types  | :file:`pybind11/pybind11.h`   |
-+------------------------------------+---------------------------+-------------------------------+
-| ``std::reference_wrapper<...>``    | Reference type wrapper    | :file:`pybind11/pybind11.h`   |
-+------------------------------------+---------------------------+-------------------------------+
-| ``std::complex<T>``                | Complex numbers           | :file:`pybind11/complex.h`    |
-+------------------------------------+---------------------------+-------------------------------+
-| ``std::array<T, Size>``            | STL static array          | :file:`pybind11/stl.h`        |
-+------------------------------------+---------------------------+-------------------------------+
-| ``std::vector<T>``                 | STL dynamic array         | :file:`pybind11/stl.h`        |
-+------------------------------------+---------------------------+-------------------------------+
-| ``std::deque<T>``                  | STL double-ended queue    | :file:`pybind11/stl.h`        |
-+------------------------------------+---------------------------+-------------------------------+
-| ``std::valarray<T>``               | STL value array           | :file:`pybind11/stl.h`        |
-+------------------------------------+---------------------------+-------------------------------+
-| ``std::list<T>``                   | STL linked list           | :file:`pybind11/stl.h`        |
-+------------------------------------+---------------------------+-------------------------------+
-| ``std::map<T1, T2>``               | STL ordered map           | :file:`pybind11/stl.h`        |
-+------------------------------------+---------------------------+-------------------------------+
-| ``std::unordered_map<T1, T2>``     | STL unordered map         | :file:`pybind11/stl.h`        |
-+------------------------------------+---------------------------+-------------------------------+
-| ``std::set<T>``                    | STL ordered set           | :file:`pybind11/stl.h`        |
-+------------------------------------+---------------------------+-------------------------------+
-| ``std::unordered_set<T>``          | STL unordered set         | :file:`pybind11/stl.h`        |
-+------------------------------------+---------------------------+-------------------------------+
-| ``std::optional<T>``               | STL optional type (C++17) | :file:`pybind11/stl.h`        |
-+------------------------------------+---------------------------+-------------------------------+
-| ``std::experimental::optional<T>`` | STL optional type (exp.)  | :file:`pybind11/stl.h`        |
-+------------------------------------+---------------------------+-------------------------------+
-| ``std::variant<...>``              | Type-safe union (C++17)   | :file:`pybind11/stl.h`        |
-+------------------------------------+---------------------------+-------------------------------+
-| ``std::filesystem::path<T>``       | STL path (C++17) [#]_     | :file:`pybind11/stl.h`        |
-+------------------------------------+---------------------------+-------------------------------+
-| ``std::function<...>``             | STL polymorphic function  | :file:`pybind11/functional.h` |
-+------------------------------------+---------------------------+-------------------------------+
-| ``std::chrono::duration<...>``     | STL time duration         | :file:`pybind11/chrono.h`     |
-+------------------------------------+---------------------------+-------------------------------+
-| ``std::chrono::time_point<...>``   | STL date/time             | :file:`pybind11/chrono.h`     |
-+------------------------------------+---------------------------+-------------------------------+
-| ``Eigen::Matrix<...>``             | Eigen: dense matrix       | :file:`pybind11/eigen.h`      |
-+------------------------------------+---------------------------+-------------------------------+
-| ``Eigen::Map<...>``                | Eigen: mapped memory      | :file:`pybind11/eigen.h`      |
-+------------------------------------+---------------------------+-------------------------------+
-| ``Eigen::SparseMatrix<...>``       | Eigen: sparse matrix      | :file:`pybind11/eigen.h`      |
-+------------------------------------+---------------------------+-------------------------------+
++------------------------------------+---------------------------+-----------------------------------+
+|  Data type                         |  Description              | Header file                       |
++====================================+===========================+===================================+
+| ``int8_t``, ``uint8_t``            | 8-bit integers            | :file:`pybind11/pybind11.h`       |
++------------------------------------+---------------------------+-----------------------------------+
+| ``int16_t``, ``uint16_t``          | 16-bit integers           | :file:`pybind11/pybind11.h`       |
++------------------------------------+---------------------------+-----------------------------------+
+| ``int32_t``, ``uint32_t``          | 32-bit integers           | :file:`pybind11/pybind11.h`       |
++------------------------------------+---------------------------+-----------------------------------+
+| ``int64_t``, ``uint64_t``          | 64-bit integers           | :file:`pybind11/pybind11.h`       |
++------------------------------------+---------------------------+-----------------------------------+
+| ``ssize_t``, ``size_t``            | Platform-dependent size   | :file:`pybind11/pybind11.h`       |
++------------------------------------+---------------------------+-----------------------------------+
+| ``float``, ``double``              | Floating point types      | :file:`pybind11/pybind11.h`       |
++------------------------------------+---------------------------+-----------------------------------+
+| ``bool``                           | Two-state Boolean type    | :file:`pybind11/pybind11.h`       |
++------------------------------------+---------------------------+-----------------------------------+
+| ``char``                           | Character literal         | :file:`pybind11/pybind11.h`       |
++------------------------------------+---------------------------+-----------------------------------+
+| ``char16_t``                       | UTF-16 character literal  | :file:`pybind11/pybind11.h`       |
++------------------------------------+---------------------------+-----------------------------------+
+| ``char32_t``                       | UTF-32 character literal  | :file:`pybind11/pybind11.h`       |
++------------------------------------+---------------------------+-----------------------------------+
+| ``wchar_t``                        | Wide character literal    | :file:`pybind11/pybind11.h`       |
++------------------------------------+---------------------------+-----------------------------------+
+| ``const char *``                   | UTF-8 string literal      | :file:`pybind11/pybind11.h`       |
++------------------------------------+---------------------------+-----------------------------------+
+| ``const char16_t *``               | UTF-16 string literal     | :file:`pybind11/pybind11.h`       |
++------------------------------------+---------------------------+-----------------------------------+
+| ``const char32_t *``               | UTF-32 string literal     | :file:`pybind11/pybind11.h`       |
++------------------------------------+---------------------------+-----------------------------------+
+| ``const wchar_t *``                | Wide string literal       | :file:`pybind11/pybind11.h`       |
++------------------------------------+---------------------------+-----------------------------------+
+| ``std::string``                    | STL dynamic UTF-8 string  | :file:`pybind11/pybind11.h`       |
++------------------------------------+---------------------------+-----------------------------------+
+| ``std::u16string``                 | STL dynamic UTF-16 string | :file:`pybind11/pybind11.h`       |
++------------------------------------+---------------------------+-----------------------------------+
+| ``std::u32string``                 | STL dynamic UTF-32 string | :file:`pybind11/pybind11.h`       |
++------------------------------------+---------------------------+-----------------------------------+
+| ``std::wstring``                   | STL dynamic wide string   | :file:`pybind11/pybind11.h`       |
++------------------------------------+---------------------------+-----------------------------------+
+| ``std::string_view``,              | STL C++17 string views    | :file:`pybind11/pybind11.h`       |
+| ``std::u16string_view``, etc.      |                           |                                   |
++------------------------------------+---------------------------+-----------------------------------+
+| ``std::pair<T1, T2>``              | Pair of two custom types  | :file:`pybind11/pybind11.h`       |
++------------------------------------+---------------------------+-----------------------------------+
+| ``std::tuple<...>``                | Arbitrary tuple of types  | :file:`pybind11/pybind11.h`       |
++------------------------------------+---------------------------+-----------------------------------+
+| ``std::reference_wrapper<...>``    | Reference type wrapper    | :file:`pybind11/pybind11.h`       |
++------------------------------------+---------------------------+-----------------------------------+
+| ``std::complex<T>``                | Complex numbers           | :file:`pybind11/complex.h`        |
++------------------------------------+---------------------------+-----------------------------------+
+| ``std::array<T, Size>``            | STL static array          | :file:`pybind11/stl.h`            |
++------------------------------------+---------------------------+-----------------------------------+
+| ``std::vector<T>``                 | STL dynamic array         | :file:`pybind11/stl.h`            |
++------------------------------------+---------------------------+-----------------------------------+
+| ``std::deque<T>``                  | STL double-ended queue    | :file:`pybind11/stl.h`            |
++------------------------------------+---------------------------+-----------------------------------+
+| ``std::valarray<T>``               | STL value array           | :file:`pybind11/stl.h`            |
++------------------------------------+---------------------------+-----------------------------------+
+| ``std::list<T>``                   | STL linked list           | :file:`pybind11/stl.h`            |
++------------------------------------+---------------------------+-----------------------------------+
+| ``std::map<T1, T2>``               | STL ordered map           | :file:`pybind11/stl.h`            |
++------------------------------------+---------------------------+-----------------------------------+
+| ``std::unordered_map<T1, T2>``     | STL unordered map         | :file:`pybind11/stl.h`            |
++------------------------------------+---------------------------+-----------------------------------+
+| ``std::set<T>``                    | STL ordered set           | :file:`pybind11/stl.h`            |
++------------------------------------+---------------------------+-----------------------------------+
+| ``std::unordered_set<T>``          | STL unordered set         | :file:`pybind11/stl.h`            |
++------------------------------------+---------------------------+-----------------------------------+
+| ``std::optional<T>``               | STL optional type (C++17) | :file:`pybind11/stl.h`            |
++------------------------------------+---------------------------+-----------------------------------+
+| ``std::experimental::optional<T>`` | STL optional type (exp.)  | :file:`pybind11/stl.h`            |
++------------------------------------+---------------------------+-----------------------------------+
+| ``std::variant<...>``              | Type-safe union (C++17)   | :file:`pybind11/stl.h`            |
++------------------------------------+---------------------------+-----------------------------------+
+| ``std::filesystem::path<T>``       | STL path (C++17) [#]_     | :file:`pybind11/stl/filesystem.h` |
++------------------------------------+---------------------------+-----------------------------------+
+| ``std::function<...>``             | STL polymorphic function  | :file:`pybind11/functional.h`     |
++------------------------------------+---------------------------+-----------------------------------+
+| ``std::chrono::duration<...>``     | STL time duration         | :file:`pybind11/chrono.h`         |
++------------------------------------+---------------------------+-----------------------------------+
+| ``std::chrono::time_point<...>``   | STL date/time             | :file:`pybind11/chrono.h`         |
++------------------------------------+---------------------------+-----------------------------------+
+| ``Eigen::Matrix<...>``             | Eigen: dense matrix       | :file:`pybind11/eigen.h`          |
++------------------------------------+---------------------------+-----------------------------------+
+| ``Eigen::Map<...>``                | Eigen: mapped memory      | :file:`pybind11/eigen.h`          |
++------------------------------------+---------------------------+-----------------------------------+
+| ``Eigen::SparseMatrix<...>``       | Eigen: sparse matrix      | :file:`pybind11/eigen.h`          |
++------------------------------------+---------------------------+-----------------------------------+
 
 .. [#] ``std::filesystem::path`` is converted to ``pathlib.Path`` and
-   ``os.PathLike`` is converted to ``std::filesystem::path``, but this requires
-   Python 3.6 (for ``__fspath__`` support).
+   ``os.PathLike`` is converted to ``std::filesystem::path``.
diff --git a/ext/pybind11/docs/advanced/cast/stl.rst b/ext/pybind11/docs/advanced/cast/stl.rst
index b8622ee095..03d49b2950 100644
--- a/ext/pybind11/docs/advanced/cast/stl.rst
+++ b/ext/pybind11/docs/advanced/cast/stl.rst
@@ -42,7 +42,7 @@ types:
 .. code-block:: cpp
 
     // `boost::optional` as an example -- can be any `std::optional`-like container
-    namespace pybind11 { namespace detail {
+    namespace PYBIND11_NAMESPACE { namespace detail {
         template <typename T>
         struct type_caster<boost::optional<T>> : optional_caster<boost::optional<T>> {};
     }}
@@ -54,7 +54,7 @@ for custom variant types:
 .. code-block:: cpp
 
     // `boost::variant` as an example -- can be any `std::variant`-like container
-    namespace pybind11 { namespace detail {
+    namespace PYBIND11_NAMESPACE { namespace detail {
         template <typename... Ts>
         struct type_caster<boost::variant<Ts...>> : variant_caster<boost::variant<Ts...>> {};
 
@@ -66,7 +66,7 @@ for custom variant types:
                 return boost::apply_visitor(args...);
             }
         };
-    }} // namespace pybind11::detail
+    }} // namespace PYBIND11_NAMESPACE::detail
 
 The ``visit_helper`` specialization is not required if your ``name::variant`` provides
 a ``name::visit()`` function. For any other function name, the specialization must be
@@ -87,8 +87,6 @@ included to tell pybind11 how to visit the variant.
 
     pybind11 only supports the modern implementation of ``boost::variant``
     which makes use of variadic templates. This requires Boost 1.56 or newer.
-    Additionally, on Windows, MSVC 2017 is required because ``boost::variant``
-    falls back to the old non-variadic implementation on MSVC 2015.
 
 .. _opaque:
 
diff --git a/ext/pybind11/docs/advanced/cast/strings.rst b/ext/pybind11/docs/advanced/cast/strings.rst
index cfd7e7b7a5..e246c5219a 100644
--- a/ext/pybind11/docs/advanced/cast/strings.rst
+++ b/ext/pybind11/docs/advanced/cast/strings.rst
@@ -1,14 +1,6 @@
 Strings, bytes and Unicode conversions
 ######################################
 
-.. note::
-
-    This section discusses string handling in terms of Python 3 strings. For
-    Python 2.7, replace all occurrences of ``str`` with ``unicode`` and
-    ``bytes`` with ``str``.  Python 2.7 users may find it best to use ``from
-    __future__ import unicode_literals`` to avoid unintentionally using ``str``
-    instead of ``unicode``.
-
 Passing Python strings to C++
 =============================
 
@@ -58,9 +50,9 @@ Passing bytes to C++
 --------------------
 
 A Python ``bytes`` object will be passed to C++ functions that accept
-``std::string`` or ``char*`` *without* conversion.  On Python 3, in order to
-make a function *only* accept ``bytes`` (and not ``str``), declare it as taking
-a ``py::bytes`` argument.
+``std::string`` or ``char*`` *without* conversion.  In order to make a function
+*only* accept ``bytes`` (and not ``str``), declare it as taking a ``py::bytes``
+argument.
 
 
 Returning C++ strings to Python
@@ -204,11 +196,6 @@ decoded to Python ``str``.
         }
     );
 
-.. warning::
-
-    Wide character strings may not work as described on Python 2.7 or Python
-    3.3 compiled with ``--enable-unicode=ucs2``.
-
 Strings in multibyte encodings such as Shift-JIS must transcoded to a
 UTF-8/16/32 before being returned to Python.
 
diff --git a/ext/pybind11/docs/advanced/classes.rst b/ext/pybind11/docs/advanced/classes.rst
index 6330af5eb4..01a490b721 100644
--- a/ext/pybind11/docs/advanced/classes.rst
+++ b/ext/pybind11/docs/advanced/classes.rst
@@ -9,7 +9,7 @@ that you are already familiar with the basics from :doc:`/classes`.
 Overriding virtual functions in Python
 ======================================
 
-Suppose that a C++ class or interface has a virtual function that we'd like to
+Suppose that a C++ class or interface has a virtual function that we'd like
 to override from within Python (we'll focus on the class ``Animal``; ``Dog`` is
 given as a specific example of how one would do this with traditional C++
 code).
@@ -133,14 +133,14 @@ a virtual method call.
     >>> from example import *
     >>> d = Dog()
     >>> call_go(d)
-    u'woof! woof! woof! '
+    'woof! woof! woof! '
     >>> class Cat(Animal):
     ...     def go(self, n_times):
     ...         return "meow! " * n_times
     ...
     >>> c = Cat()
     >>> call_go(c)
-    u'meow! meow! meow! '
+    'meow! meow! meow! '
 
 If you are defining a custom constructor in a derived Python class, you *must*
 ensure that you explicitly call the bound C++ constructor using ``__init__``,
@@ -813,26 +813,21 @@ An instance can now be pickled as follows:
 
 .. code-block:: python
 
-    try:
-        import cPickle as pickle  # Use cPickle on Python 2.7
-    except ImportError:
-        import pickle
+    import pickle
 
     p = Pickleable("test_value")
     p.setExtra(15)
-    data = pickle.dumps(p, 2)
+    data = pickle.dumps(p)
 
 
 .. note::
-    Note that only the cPickle module is supported on Python 2.7.
-
-    The second argument to ``dumps`` is also crucial: it selects the pickle
-    protocol version 2, since the older version 1 is not supported. Newer
-    versions are also fine—for instance, specify ``-1`` to always use the
-    latest available version. Beware: failure to follow these instructions
-    will cause important pybind11 memory allocation routines to be skipped
-    during unpickling, which will likely lead to memory corruption and/or
-    segmentation faults.
+    If given, the second argument to ``dumps`` must be 2 or larger - 0 and 1 are
+    not supported. Newer versions are also fine; for instance, specify ``-1`` to
+    always use the latest available version. Beware: failure to follow these
+    instructions will cause important pybind11 memory allocation routines to be
+    skipped during unpickling, which will likely lead to memory corruption
+    and/or segmentation faults. Python defaults to version 3 (Python 3-3.7) and
+    version 4 for Python 3.8+.
 
 .. seealso::
 
@@ -849,11 +844,9 @@ Python normally uses references in assignments. Sometimes a real copy is needed
 to prevent changing all copies. The ``copy`` module [#f5]_ provides these
 capabilities.
 
-On Python 3, a class with pickle support is automatically also (deep)copy
+A class with pickle support is automatically also (deep)copy
 compatible. However, performance can be improved by adding custom
-``__copy__`` and ``__deepcopy__`` methods. With Python 2.7, these custom methods
-are mandatory for (deep)copy compatibility, because pybind11 only supports
-cPickle.
+``__copy__`` and ``__deepcopy__`` methods.
 
 For simple classes (deep)copy can be enabled by using the copy constructor,
 which should look as follows:
@@ -1125,13 +1118,6 @@ described trampoline:
     py::class_<A, Trampoline>(m, "A") // <-- `Trampoline` here
         .def("foo", &Publicist::foo); // <-- `Publicist` here, not `Trampoline`!
 
-.. note::
-
-    MSVC 2015 has a compiler bug (fixed in version 2017) which
-    requires a more explicit function binding in the form of
-    ``.def("foo", static_cast<int (A::*)() const>(&Publicist::foo));``
-    where ``int (A::*)() const`` is the type of ``A::foo``.
-
 Binding final classes
 =====================
 
@@ -1161,6 +1147,58 @@ error:
 
 .. versionadded:: 2.6
 
+Binding classes with template parameters
+========================================
+
+pybind11 can also wrap classes that have template parameters. Consider these classes:
+
+.. code-block:: cpp
+
+    struct Cat {};
+    struct Dog {};
+
+    template <typename PetType>
+    struct Cage {
+        Cage(PetType& pet);
+        PetType& get();
+    };
+
+C++ templates may only be instantiated at compile time, so pybind11 can only
+wrap instantiated templated classes. You cannot wrap a non-instantiated template:
+
+.. code-block:: cpp
+
+    // BROKEN (this will not compile)
+    py::class_<Cage>(m, "Cage");
+        .def("get", &Cage::get);
+
+You must explicitly specify each template/type combination that you want to
+wrap separately.
+
+.. code-block:: cpp
+
+    // ok
+    py::class_<Cage<Cat>>(m, "CatCage")
+        .def("get", &Cage<Cat>::get);
+
+    // ok
+    py::class_<Cage<Dog>>(m, "DogCage")
+        .def("get", &Cage<Dog>::get);
+
+If your class methods have template parameters you can wrap those as well,
+but once again each instantiation must be explicitly specified:
+
+.. code-block:: cpp
+
+    typename <typename T>
+    struct MyClass {
+        template <typename V>
+        T fn(V v);
+    };
+
+    py::class<MyClass<int>>(m, "MyClassT")
+        .def("fn", &MyClass<int>::fn<std::string>);
+
 Custom automatic downcasters
 ============================
 
@@ -1190,7 +1228,7 @@ whether a downcast is safe, you can proceed by specializing the
         std::string bark() const { return sound; }
     };
 
-    namespace pybind11 {
+    namespace PYBIND11_NAMESPACE {
         template<> struct polymorphic_type_hook<Pet> {
             static const void *get(const Pet *src, const std::type_info*& type) {
                 // note that src may be nullptr
@@ -1201,7 +1239,7 @@ whether a downcast is safe, you can proceed by specializing the
                 return src;
             }
         };
-    } // namespace pybind11
+    } // namespace PYBIND11_NAMESPACE
 
 When pybind11 wants to convert a C++ pointer of type ``Base*`` to a
 Python object, it calls ``polymorphic_type_hook<Base>::get()`` to
diff --git a/ext/pybind11/docs/advanced/exceptions.rst b/ext/pybind11/docs/advanced/exceptions.rst
index 40f67d7b83..53981dc08f 100644
--- a/ext/pybind11/docs/advanced/exceptions.rst
+++ b/ext/pybind11/docs/advanced/exceptions.rst
@@ -64,7 +64,7 @@ at its exception handler.
 +--------------------------------------+--------------------------------------+
 
 Exception translation is not bidirectional. That is, *catching* the C++
-exceptions defined above above will not trap exceptions that originate from
+exceptions defined above will not trap exceptions that originate from
 Python. For that, catch :class:`pybind11::error_already_set`. See :ref:`below
 <handling_python_exceptions_cpp>` for further details.
 
@@ -177,9 +177,12 @@ section.
     may be explicitly (re-)thrown to delegate it to the other,
     previously-declared existing exception translators.
 
-    Note that ``libc++`` and ``libstdc++`` `behave differently <https://stackoverflow.com/questions/19496643/using-clang-fvisibility-hidden-and-typeinfo-and-type-erasure/28827430>`_
-    with ``-fvisibility=hidden``. Therefore exceptions that are used across ABI boundaries need to be explicitly exported, as exercised in ``tests/test_exceptions.h``.
-    See also: "Problems with C++ exceptions" under `GCC Wiki <https://gcc.gnu.org/wiki/Visibility>`_.
+    Note that ``libc++`` and ``libstdc++`` `behave differently under macOS
+    <https://stackoverflow.com/questions/19496643/using-clang-fvisibility-hidden-and-typeinfo-and-type-erasure/28827430>`_
+    with ``-fvisibility=hidden``. Therefore exceptions that are used across ABI
+    boundaries need to be explicitly exported, as exercised in
+    ``tests/test_exceptions.h``. See also:
+    "Problems with C++ exceptions" under `GCC Wiki <https://gcc.gnu.org/wiki/Visibility>`_.
 
 
 Local vs Global Exception Translators
@@ -328,8 +331,8 @@ an invalid state.
 Chaining exceptions ('raise from')
 ==================================
 
-In Python 3.3 a mechanism for indicating that exceptions were caused by other
-exceptions was introduced:
+Python has a mechanism for indicating that exceptions were caused by other
+exceptions:
 
 .. code-block:: py
 
@@ -340,7 +343,7 @@ exceptions was introduced:
 
 To do a similar thing in pybind11, you can use the ``py::raise_from`` function. It
 sets the current python error indicator, so to continue propagating the exception
-you should ``throw py::error_already_set()`` (Python 3 only).
+you should ``throw py::error_already_set()``.
 
 .. code-block:: cpp
 
diff --git a/ext/pybind11/docs/advanced/functions.rst b/ext/pybind11/docs/advanced/functions.rst
index abd1084ab5..69e3d8a1df 100644
--- a/ext/pybind11/docs/advanced/functions.rst
+++ b/ext/pybind11/docs/advanced/functions.rst
@@ -120,7 +120,7 @@ targeted arguments can be passed through the :class:`cpp_function` constructor:
 .. code-block:: cpp
 
     class_<MyClass>(m, "MyClass")
-        .def_property("data"
+        .def_property("data",
             py::cpp_function(&MyClass::getData, py::return_value_policy::copy),
             py::cpp_function(&MyClass::setData)
         );
@@ -306,8 +306,9 @@ The class ``py::args`` derives from ``py::tuple`` and ``py::kwargs`` derives
 from ``py::dict``.
 
 You may also use just one or the other, and may combine these with other
-arguments as long as the ``py::args`` and ``py::kwargs`` arguments are the last
-arguments accepted by the function.
+arguments.  Note, however, that ``py::kwargs`` must always be the last argument
+of the function, and ``py::args`` implies that any further arguments are
+keyword-only (see :ref:`keyword_only_arguments`).
 
 Please refer to the other examples for details on how to iterate over these,
 and on how to cast their entries into C++ objects. A demonstration is also
@@ -366,10 +367,12 @@ like so:
     py::class_<MyClass>("MyClass")
         .def("myFunction", py::arg("arg") = static_cast<SomeType *>(nullptr));
 
+.. _keyword_only_arguments:
+
 Keyword-only arguments
 ======================
 
-Python 3 introduced keyword-only arguments by specifying an unnamed ``*``
+Python implements keyword-only arguments by specifying an unnamed ``*``
 argument in a function definition:
 
 .. code-block:: python
@@ -392,11 +395,19 @@ argument annotations when registering the function:
     m.def("f", [](int a, int b) { /* ... */ },
           py::arg("a"), py::kw_only(), py::arg("b"));
 
-Note that you currently cannot combine this with a ``py::args`` argument.  This
-feature does *not* require Python 3 to work.
-
 .. versionadded:: 2.6
 
+A ``py::args`` argument implies that any following arguments are keyword-only,
+as if ``py::kw_only()`` had been specified in the same relative location of the
+argument list as the ``py::args`` argument.  The ``py::kw_only()`` may be
+included to be explicit about this, but is not required.
+
+.. versionchanged:: 2.9
+   This can now be combined with ``py::args``. Before, ``py::args`` could only
+   occur at the end of the argument list, or immediately before a ``py::kwargs``
+   argument at the end.
+
+
 Positional-only arguments
 =========================
 
@@ -566,3 +577,38 @@ prefers earlier-defined overloads to later-defined ones.
 .. versionadded:: 2.6
 
     The ``py::prepend()`` tag.
+
+Binding functions with template parameters
+==========================================
+
+You can bind functions that have template parameters. Here's a function:
+
+.. code-block:: cpp
+
+    template <typename T>
+    void set(T t);
+
+C++ templates cannot be instantiated at runtime, so you cannot bind the
+non-instantiated function:
+
+.. code-block:: cpp
+
+    // BROKEN (this will not compile)
+    m.def("set", &set);
+
+You must bind each instantiated function template separately. You may bind
+each instantiation with the same name, which will be treated the same as
+an overloaded function:
+
+.. code-block:: cpp
+
+    m.def("set", &set<int>);
+    m.def("set", &set<std::string>);
+
+Sometimes it's more clear to bind them with separate names, which is also
+an option:
+
+.. code-block:: cpp
+
+    m.def("setInt", &set<int>);
+    m.def("setString", &set<std::string>);
diff --git a/ext/pybind11/docs/advanced/misc.rst b/ext/pybind11/docs/advanced/misc.rst
index edab15fcb7..805ec838fc 100644
--- a/ext/pybind11/docs/advanced/misc.rst
+++ b/ext/pybind11/docs/advanced/misc.rst
@@ -39,15 +39,42 @@ The ``PYBIND11_MAKE_OPAQUE`` macro does *not* require the above workarounds.
 Global Interpreter Lock (GIL)
 =============================
 
-When calling a C++ function from Python, the GIL is always held.
+The Python C API dictates that the Global Interpreter Lock (GIL) must always
+be held by the current thread to safely access Python objects. As a result,
+when Python calls into C++ via pybind11 the GIL must be held, and pybind11
+will never implicitly release the GIL.
+
+.. code-block:: cpp
+
+    void my_function() {
+        /* GIL is held when this function is called from Python */
+    }
+
+    PYBIND11_MODULE(example, m) {
+        m.def("my_function", &my_function);
+    }
+
+pybind11 will ensure that the GIL is held when it knows that it is calling
+Python code. For example, if a Python callback is passed to C++ code via
+``std::function``, when C++ code calls the function the built-in wrapper
+will acquire the GIL before calling the Python callback. Similarly, the
+``PYBIND11_OVERRIDE`` family of macros will acquire the GIL before calling
+back into Python.
+
+When writing C++ code that is called from other C++ code, if that code accesses
+Python state, it must explicitly acquire and release the GIL.
+
 The classes :class:`gil_scoped_release` and :class:`gil_scoped_acquire` can be
 used to acquire and release the global interpreter lock in the body of a C++
 function call. In this way, long-running C++ code can be parallelized using
-multiple Python threads. Taking :ref:`overriding_virtuals` as an example, this
+multiple Python threads, **but great care must be taken** when any
+:class:`gil_scoped_release` appear: if there is any way that the C++ code
+can access Python objects, :class:`gil_scoped_acquire` should be used to
+reacquire the GIL. Taking :ref:`overriding_virtuals` as an example, this
 could be realized as follows (important changes highlighted):
 
 .. code-block:: cpp
-    :emphasize-lines: 8,9,31,32
+    :emphasize-lines: 8,30,31
 
     class PyAnimal : public Animal {
     public:
@@ -56,9 +83,7 @@ could be realized as follows (important changes highlighted):
 
         /* Trampoline (need one for each virtual function) */
         std::string go(int n_times) {
-            /* Acquire GIL before calling Python code */
-            py::gil_scoped_acquire acquire;
-
+            /* PYBIND11_OVERRIDE_PURE will acquire the GIL before accessing Python state */
             PYBIND11_OVERRIDE_PURE(
                 std::string, /* Return type */
                 Animal,      /* Parent class */
@@ -78,7 +103,8 @@ could be realized as follows (important changes highlighted):
             .def(py::init<>());
 
         m.def("call_go", [](Animal *animal) -> std::string {
-            /* Release GIL before calling into (potentially long-running) C++ code */
+            // GIL is held when called from Python code. Release GIL before
+            // calling into (potentially long-running) C++ code
             py::gil_scoped_release release;
             return call_go(animal);
         });
@@ -92,6 +118,34 @@ The ``call_go`` wrapper can also be simplified using the ``call_guard`` policy
     m.def("call_go", &call_go, py::call_guard<py::gil_scoped_release>());
 
 
+Common Sources Of Global Interpreter Lock Errors
+==================================================================
+
+Failing to properly hold the Global Interpreter Lock (GIL) is one of the
+more common sources of bugs within code that uses pybind11. If you are
+running into GIL related errors, we highly recommend you consult the
+following checklist.
+
+- Do you have any global variables that are pybind11 objects or invoke
+  pybind11 functions in either their constructor or destructor? You are generally
+  not allowed to invoke any Python function in a global static context. We recommend
+  using lazy initialization and then intentionally leaking at the end of the program.
+
+- Do you have any pybind11 objects that are members of other C++ structures? One
+  commonly overlooked requirement is that pybind11 objects have to increase their reference count
+  whenever their copy constructor is called. Thus, you need to be holding the GIL to invoke
+  the copy constructor of any C++ class that has a pybind11 member. This can sometimes be very
+  tricky to track for complicated programs Think carefully when you make a pybind11 object
+  a member in another struct.
+
+- C++ destructors that invoke Python functions can be particularly troublesome as
+  destructors can sometimes get invoked in weird and unexpected circumstances as a result
+  of exceptions.
+
+- You should try running your code in a debug build. That will enable additional assertions
+  within pybind11 that will throw exceptions on certain GIL handling errors
+  (reference counting operations).
+
 Binding sequence data types, iterators, the slicing protocol, etc.
 ==================================================================
 
@@ -298,6 +352,15 @@ The class ``options`` allows you to selectively suppress auto-generated signatur
         m.def("add", [](int a, int b) { return a + b; }, "A function which adds two numbers");
     }
 
+pybind11 also appends all members of an enum to the resulting enum docstring.
+This default behavior can be disabled by using the ``disable_enum_members_docstring()``
+function of the ``options`` class.
+
+With ``disable_user_defined_docstrings()`` all user defined docstrings of
+``module_::def()``, ``class_::def()`` and ``enum_()`` are disabled, but the
+function signatures and enum members are included in the docstring, unless they
+are disabled separately.
+
 Note that changes to the settings affect only function bindings created during the
 lifetime of the ``options`` instance. When it goes out of scope at the end of the module's init function,
 the default settings are restored to prevent unwanted side effects.
diff --git a/ext/pybind11/docs/advanced/pycpp/numpy.rst b/ext/pybind11/docs/advanced/pycpp/numpy.rst
index 30daeefff9..07c969305d 100644
--- a/ext/pybind11/docs/advanced/pycpp/numpy.rst
+++ b/ext/pybind11/docs/advanced/pycpp/numpy.rst
@@ -87,7 +87,7 @@ buffer objects (e.g. a NumPy matrix).
             /* Request a buffer descriptor from Python */
             py::buffer_info info = b.request();
 
-            /* Some sanity checks ... */
+            /* Some basic validation checks ... */
             if (info.format != py::format_descriptor<Scalar>::format())
                 throw std::runtime_error("Incompatible format: expected a double array!");
 
@@ -395,11 +395,9 @@ uses of ``py::array``:
 Ellipsis
 ========
 
-Python 3 provides a convenient ``...`` ellipsis notation that is often used to
+Python provides a convenient ``...`` ellipsis notation that is often used to
 slice multidimensional arrays. For instance, the following snippet extracts the
 middle dimensions of a tensor with the first and last index set to zero.
-In Python 2, the syntactic sugar ``...`` is not available, but the singleton
-``Ellipsis`` (of type ``ellipsis``) can still be used directly.
 
 .. code-block:: python
 
@@ -414,8 +412,6 @@ operation on the C++ side:
    py::array a = /* A NumPy array */;
    py::array b = a[py::make_tuple(0, py::ellipsis(), 0)];
 
-.. versionchanged:: 2.6
-   ``py::ellipsis()`` is now also available in Python 2.
 
 Memory view
 ===========
@@ -437,7 +433,7 @@ following:
             { 2, 4 },                                  // shape (rows, cols)
             { sizeof(uint8_t) * 4, sizeof(uint8_t) }   // strides in bytes
         );
-    })
+    });
 
 This approach is meant for providing a ``memoryview`` for a C/C++ buffer not
 managed by Python. The user is responsible for managing the lifetime of the
@@ -453,11 +449,7 @@ We can also use ``memoryview::from_memory`` for a simple 1D contiguous buffer:
             buffer,               // buffer pointer
             sizeof(uint8_t) * 8   // buffer size
         );
-    })
-
-.. note::
-
-    ``memoryview::from_memory`` is not available in Python 2.
+    });
 
 .. versionchanged:: 2.6
     ``memoryview::from_memory`` added.
diff --git a/ext/pybind11/docs/advanced/smart_ptrs.rst b/ext/pybind11/docs/advanced/smart_ptrs.rst
index 5a22201095..3c40ce1237 100644
--- a/ext/pybind11/docs/advanced/smart_ptrs.rst
+++ b/ext/pybind11/docs/advanced/smart_ptrs.rst
@@ -157,7 +157,7 @@ specialized:
     PYBIND11_DECLARE_HOLDER_TYPE(T, SmartPtr<T>);
 
     // Only needed if the type's `.get()` goes by another name
-    namespace pybind11 { namespace detail {
+    namespace PYBIND11_NAMESPACE { namespace detail {
         template <typename T>
         struct holder_helper<SmartPtr<T>> { // <-- specialization
             static const T *get(const SmartPtr<T> &p) { return p.getPointer(); }
diff --git a/ext/pybind11/docs/basics.rst b/ext/pybind11/docs/basics.rst
index 0b1d85cfd3..e9b24c7fa7 100644
--- a/ext/pybind11/docs/basics.rst
+++ b/ext/pybind11/docs/basics.rst
@@ -32,8 +32,7 @@ The last line will both compile and run the tests.
 Windows
 -------
 
-On Windows, only **Visual Studio 2015** and newer are supported since pybind11 relies
-on various C++11 language features that break older versions of Visual Studio.
+On Windows, only **Visual Studio 2017** and newer are supported.
 
 .. Note::
 
@@ -109,7 +108,7 @@ a file named :file:`example.cpp` with the following contents:
     PYBIND11_MODULE(example, m) {
         m.doc() = "pybind11 example plugin"; // optional module docstring
 
-        m.def("add", &add, "A function which adds two numbers");
+        m.def("add", &add, "A function that adds two numbers");
     }
 
 .. [#f1] In practice, implementation and binding code will generally be located
@@ -166,12 +165,12 @@ load and execute the example:
 .. code-block:: pycon
 
     $ python
-    Python 2.7.10 (default, Aug 22 2015, 20:33:39)
-    [GCC 4.2.1 Compatible Apple LLVM 7.0.0 (clang-700.0.59.1)] on darwin
+    Python 3.9.10 (main, Jan 15 2022, 11:48:04)
+    [Clang 13.0.0 (clang-1300.0.29.3)] on darwin
     Type "help", "copyright", "credits" or "license" for more information.
     >>> import example
     >>> example.add(1, 2)
-    3L
+    3
     >>>
 
 .. _keyword_args:
diff --git a/ext/pybind11/docs/benchmark.py b/ext/pybind11/docs/benchmark.py
index f190793671..2150b6ca78 100644
--- a/ext/pybind11/docs/benchmark.py
+++ b/ext/pybind11/docs/benchmark.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 import datetime as dt
 import os
 import random
@@ -12,20 +11,20 @@ def generate_dummy_code_pybind11(nclasses=10):
     bindings = ""
 
     for cl in range(nclasses):
-        decl += "class cl%03i;\n" % cl
+        decl += f"class cl{cl:03};\n"
     decl += "\n"
 
     for cl in range(nclasses):
-        decl += "class cl%03i {\n" % cl
+        decl += f"class {cl:03} {{\n"
         decl += "public:\n"
-        bindings += '    py::class_<cl%03i>(m, "cl%03i")\n' % (cl, cl)
+        bindings += f'    py::class_<cl{cl:03}>(m, "cl{cl:03}")\n'
         for fn in range(nfns):
             ret = random.randint(0, nclasses - 1)
             params = [random.randint(0, nclasses - 1) for i in range(nargs)]
-            decl += "    cl%03i *fn_%03i(" % (ret, fn)
-            decl += ", ".join("cl%03i *" % p for p in params)
+            decl += f"    cl{ret:03} *fn_{fn:03}("
+            decl += ", ".join(f"cl{p:03} *" for p in params)
             decl += ");\n"
-            bindings += '        .def("fn_%03i", &cl%03i::fn_%03i)\n' % (fn, cl, fn)
+            bindings += f'        .def("fn_{fn:03}", &cl{cl:03}::fn_{fn:03})\n'
         decl += "};\n\n"
         bindings += "        ;\n"
 
@@ -43,23 +42,20 @@ def generate_dummy_code_boost(nclasses=10):
     bindings = ""
 
     for cl in range(nclasses):
-        decl += "class cl%03i;\n" % cl
+        decl += f"class cl{cl:03};\n"
     decl += "\n"
 
     for cl in range(nclasses):
         decl += "class cl%03i {\n" % cl
         decl += "public:\n"
-        bindings += '    py::class_<cl%03i>("cl%03i")\n' % (cl, cl)
+        bindings += f'    py::class_<cl{cl:03}>("cl{cl:03}")\n'
         for fn in range(nfns):
             ret = random.randint(0, nclasses - 1)
             params = [random.randint(0, nclasses - 1) for i in range(nargs)]
-            decl += "    cl%03i *fn_%03i(" % (ret, fn)
-            decl += ", ".join("cl%03i *" % p for p in params)
+            decl += f"    cl{ret:03} *fn_{fn:03}("
+            decl += ", ".join(f"cl{p:03} *" for p in params)
             decl += ");\n"
-            bindings += (
-                '        .def("fn_%03i", &cl%03i::fn_%03i, py::return_value_policy<py::manage_new_object>())\n'
-                % (fn, cl, fn)
-            )
+            bindings += f'        .def("fn_{fn:03}", &cl{cl:03}::fn_{fn:03}, py::return_value_policy<py::manage_new_object>())\n'
         decl += "};\n\n"
         bindings += "        ;\n"
 
@@ -75,7 +71,7 @@ def generate_dummy_code_boost(nclasses=10):
 for codegen in [generate_dummy_code_pybind11, generate_dummy_code_boost]:
     print("{")
     for i in range(0, 10):
-        nclasses = 2 ** i
+        nclasses = 2**i
         with open("test.cpp", "w") as f:
             f.write(codegen(nclasses))
         n1 = dt.datetime.now()
diff --git a/ext/pybind11/docs/changelog.rst b/ext/pybind11/docs/changelog.rst
index bb5457eec1..bb111c5f26 100644
--- a/ext/pybind11/docs/changelog.rst
+++ b/ext/pybind11/docs/changelog.rst
@@ -6,10 +6,618 @@ Changelog
 Starting with version 1.8.0, pybind11 releases use a `semantic versioning
 <http://semver.org>`_ policy.
 
+Changes will be added here periodically from the "Suggested changelog entry"
+block in pull request descriptions.
+
 
 IN DEVELOPMENT
 --------------
 
+Changes will be summarized here periodically.
+
+Changes:
+
+* ``PyGILState_Check()``'s in ``pybind11::handle``'s ``inc_ref()`` &
+  ``dec_ref()`` are now enabled by default again.
+  `#4246 <https://github.com/pybind/pybind11/pull/4246>`_
+
+Build system improvements:
+
+* Update clang-tidy to 15 in CI.
+  `#4387 <https://github.com/pybind/pybind11/pull/4387>`_
+
+
+Version 2.10.3 (Jan 3, 2023)
+----------------------------
+
+Changes:
+
+* Temporarily made our GIL status assertions (added in 2.10.2) disabled by
+  default (re-enable manually by defining
+  ``PYBIND11_ASSERT_GIL_HELD_INCREF_DECREF``, will be enabled in 2.11).
+  `#4432 <https://github.com/pybind/pybind11/pull/4432>`_
+
+* Improved error messages when ``inc_ref``/``dec_ref`` are called with an
+  invalid GIL state.
+  `#4427 <https://github.com/pybind/pybind11/pull/4427>`_
+  `#4436 <https://github.com/pybind/pybind11/pull/4436>`_
+
+Bug Fixes:
+
+* Some minor touchups found by static analyzers.
+  `#4440 <https://github.com/pybind/pybind11/pull/4440>`_
+
+
+Version 2.10.2 (Dec 20, 2022)
+-----------------------------
+
+Changes:
+
+* ``scoped_interpreter`` constructor taking ``PyConfig``.
+  `#4330 <https://github.com/pybind/pybind11/pull/4330>`_
+
+* ``pybind11/eigen/tensor.h`` adds converters to and from ``Eigen::Tensor`` and
+  ``Eigen::TensorMap``.
+  `#4201 <https://github.com/pybind/pybind11/pull/4201>`_
+
+* ``PyGILState_Check()``'s  were integrated to ``pybind11::handle``
+  ``inc_ref()`` & ``dec_ref()``. The added GIL checks are guarded by
+  ``PYBIND11_ASSERT_GIL_HELD_INCREF_DECREF``, which is the default only if
+  ``NDEBUG`` is not defined. (Made non-default in 2.10.3, will be active in 2.11)
+  `#4246 <https://github.com/pybind/pybind11/pull/4246>`_
+
+* Add option for enable/disable enum members in docstring.
+  `#2768 <https://github.com/pybind/pybind11/pull/2768>`_
+
+* Fixed typing of ``KeysView``, ``ValuesView`` and ``ItemsView`` in ``bind_map``.
+  `#4353 <https://github.com/pybind/pybind11/pull/4353>`_
+
+Bug fixes:
+
+* Bug fix affecting only Python 3.6 under very specific, uncommon conditions:
+  move ``PyEval_InitThreads()`` call to the correct location.
+  `#4350 <https://github.com/pybind/pybind11/pull/4350>`_
+
+* Fix segfault bug when passing foreign native functions to functional.h.
+  `#4254 <https://github.com/pybind/pybind11/pull/4254>`_
+
+Build system improvements:
+
+* Support setting PYTHON_LIBRARIES manually for Windows ARM cross-compilation
+  (classic mode).
+  `#4406 <https://github.com/pybind/pybind11/pull/4406>`_
+
+* Extend IPO/LTO detection for ICX (a.k.a IntelLLVM) compiler.
+  `#4402 <https://github.com/pybind/pybind11/pull/4402>`_
+
+* Allow calling ``find_package(pybind11 CONFIG)`` multiple times from separate
+  directories in the same CMake project and properly link Python (new mode).
+  `#4401 <https://github.com/pybind/pybind11/pull/4401>`_
+
+* ``multiprocessing_set_spawn`` in pytest fixture for added safety.
+  `#4377 <https://github.com/pybind/pybind11/pull/4377>`_
+
+* Fixed a bug in two pybind11/tools cmake scripts causing "Unknown arguments specified" errors.
+  `#4327 <https://github.com/pybind/pybind11/pull/4327>`_
+
+
+
+Version 2.10.1 (Oct 31, 2022)
+-----------------------------
+
+This is the first version to fully support embedding the newly released Python 3.11.
+
+Changes:
+
+* Allow ``pybind11::capsule`` constructor to take null destructor pointers.
+  `#4221 <https://github.com/pybind/pybind11/pull/4221>`_
+
+* ``embed.h`` was changed so that ``PYTHONPATH`` is used also with Python 3.11
+  (established behavior).
+  `#4119 <https://github.com/pybind/pybind11/pull/4119>`_
+
+* A ``PYBIND11_SIMPLE_GIL_MANAGEMENT`` option was added (cmake, C++ define),
+  along with many additional tests in ``test_gil_scoped.py``. The option may be
+  useful to try when debugging GIL-related issues, to determine if the more
+  complex default implementation is or is not to blame. See #4216 for
+  background. WARNING: Please be careful to not create ODR violations when
+  using the option: everything that is linked together with mutual symbol
+  visibility needs to be rebuilt.
+  `#4216 <https://github.com/pybind/pybind11/pull/4216>`_
+
+* ``PYBIND11_EXPORT_EXCEPTION`` was made non-empty only under macOS. This makes
+  Linux builds safer, and enables the removal of warning suppression pragmas for
+  Windows.
+  `#4298 <https://github.com/pybind/pybind11/pull/4298>`_
+
+Bug fixes:
+
+* Fixed a bug where ``UnicodeDecodeError`` was not propagated from various
+  ``py::str`` ctors when decoding surrogate utf characters.
+  `#4294 <https://github.com/pybind/pybind11/pull/4294>`_
+
+* Revert perfect forwarding for ``make_iterator``. This broke at least one
+  valid use case. May revisit later.
+  `#4234 <https://github.com/pybind/pybind11/pull/4234>`_
+
+* Fix support for safe casts to ``void*`` (regression in 2.10.0).
+  `#4275 <https://github.com/pybind/pybind11/pull/4275>`_
+
+* Fix ``char8_t`` support (regression in 2.9).
+  `#4278 <https://github.com/pybind/pybind11/pull/4278>`_
+
+* Unicode surrogate character in Python exception message leads to process
+  termination in ``error_already_set::what()``.
+  `#4297 <https://github.com/pybind/pybind11/pull/4297>`_
+
+* Fix MSVC 2019 v.1924 & C++14 mode error for ``overload_cast``.
+  `#4188 <https://github.com/pybind/pybind11/pull/4188>`_
+
+* Make augmented assignment operators non-const for the object-api. Behavior
+  was previously broken for augmented assignment operators.
+  `#4065 <https://github.com/pybind/pybind11/pull/4065>`_
+
+* Add proper error checking to C++ bindings for Python list append and insert.
+  `#4208 <https://github.com/pybind/pybind11/pull/4208>`_
+
+* Work-around for Nvidia's CUDA nvcc compiler in versions 11.4.0 - 11.8.0.
+  `#4220 <https://github.com/pybind/pybind11/pull/4220>`_
+
+* A workaround for PyPy was added in the ``py::error_already_set``
+  implementation, related to PR `#1895 <https://github.com/pybind/pybind11/pull/1895>`_
+  released with v2.10.0.
+  `#4079 <https://github.com/pybind/pybind11/pull/4079>`_
+
+* Fixed compiler errors when C++23 ``std::forward_like`` is available.
+  `#4136 <https://github.com/pybind/pybind11/pull/4136>`_
+
+* Properly raise exceptions in contains methods (like when an object in unhashable).
+  `#4209 <https://github.com/pybind/pybind11/pull/4209>`_
+
+* Further improve another error in exception handling.
+  `#4232 <https://github.com/pybind/pybind11/pull/4232>`_
+
+* ``get_local_internals()`` was made compatible with
+  ``finalize_interpreter()``, fixing potential freezes during interpreter
+  finalization.
+  `#4192 <https://github.com/pybind/pybind11/pull/4192>`_
+
+Performance and style:
+
+* Reserve space in set and STL map casters if possible. This will prevent
+  unnecessary rehashing / resizing by knowing the number of keys ahead of time
+  for Python to C++ casting. This improvement will greatly speed up the casting
+  of large unordered maps and sets.
+  `#4194 <https://github.com/pybind/pybind11/pull/4194>`_
+
+* GIL RAII scopes are non-copyable to avoid potential bugs.
+  `#4183 <https://github.com/pybind/pybind11/pull/4183>`_
+
+* Explicitly default all relevant ctors for pytypes in the ``PYBIND11_OBJECT``
+  macros and enforce the clang-tidy checks ``modernize-use-equals-default`` in
+  macros as well.
+  `#4017 <https://github.com/pybind/pybind11/pull/4017>`_
+
+* Optimize iterator advancement in C++ bindings.
+  `#4237 <https://github.com/pybind/pybind11/pull/4237>`_
+
+* Use the modern ``PyObject_GenericGetDict`` and ``PyObject_GenericSetDict``
+  for handling dynamic attribute dictionaries.
+  `#4106 <https://github.com/pybind/pybind11/pull/4106>`_
+
+* Document that users should use ``PYBIND11_NAMESPACE`` instead of using ``pybind11`` when
+  opening namespaces. Using namespace declarations and namespace qualification
+  remain the same as ``pybind11``. This is done to ensure consistent symbol
+  visibility.
+  `#4098 <https://github.com/pybind/pybind11/pull/4098>`_
+
+* Mark ``detail::forward_like`` as constexpr.
+  `#4147 <https://github.com/pybind/pybind11/pull/4147>`_
+
+* Optimize unpacking_collector when processing ``arg_v`` arguments.
+  `#4219 <https://github.com/pybind/pybind11/pull/4219>`_
+
+* Optimize casting C++ object to ``None``.
+  `#4269 <https://github.com/pybind/pybind11/pull/4269>`_
+
+
+Build system improvements:
+
+* CMake: revert overwrite behavior, now opt-in with ``PYBIND11_PYTHONLIBS_OVERRWRITE OFF``.
+  `#4195 <https://github.com/pybind/pybind11/pull/4195>`_
+
+* Include a pkg-config file when installing pybind11, such as in the Python
+  package.
+  `#4077 <https://github.com/pybind/pybind11/pull/4077>`_
+
+* Avoid stripping debug symbols when ``CMAKE_BUILD_TYPE`` is set to ``DEBUG``
+  instead of ``Debug``.
+  `#4078 <https://github.com/pybind/pybind11/pull/4078>`_
+
+* Followup to `#3948 <https://github.com/pybind/pybind11/pull/3948>`_, fixing vcpkg again.
+  `#4123 <https://github.com/pybind/pybind11/pull/4123>`_
+
+Version 2.10.0 (Jul 15, 2022)
+-----------------------------
+
+Removed support for Python 2.7, Python 3.5, and MSVC 2015. Support for MSVC
+2017 is limited due to availability of CI runners; we highly recommend MSVC
+2019 or 2022 be used. Initial support added for Python 3.11.
+
+New features:
+
+* ``py::anyset`` & ``py::frozenset`` were added, with copying (cast) to
+  ``std::set`` (similar to ``set``).
+  `#3901 <https://github.com/pybind/pybind11/pull/3901>`_
+
+* Support bytearray casting to string.
+  `#3707 <https://github.com/pybind/pybind11/pull/3707>`_
+
+* ``type_caster<std::monostate>`` was added. ``std::monostate`` is a tag type
+  that allows ``std::variant`` to act as an optional, or allows default
+  construction of a ``std::variant`` holding a non-default constructible type.
+  `#3818 <https://github.com/pybind/pybind11/pull/3818>`_
+
+* ``pybind11::capsule::set_name`` added to mutate the name of the capsule instance.
+  `#3866 <https://github.com/pybind/pybind11/pull/3866>`_
+
+* NumPy: dtype constructor from type number added, accessors corresponding to
+  Python API ``dtype.num``, ``dtype.byteorder``, ``dtype.flags`` and
+  ``dtype.alignment`` added.
+  `#3868 <https://github.com/pybind/pybind11/pull/3868>`_
+
+
+Changes:
+
+* Python 3.6 is now the minimum supported version.
+  `#3688 <https://github.com/pybind/pybind11/pull/3688>`_
+  `#3719 <https://github.com/pybind/pybind11/pull/3719>`_
+
+* The minimum version for MSVC is now 2017.
+  `#3722 <https://github.com/pybind/pybind11/pull/3722>`_
+
+* Fix issues with CPython 3.11 betas and add to supported test matrix.
+  `#3923 <https://github.com/pybind/pybind11/pull/3923>`_
+
+* ``error_already_set`` is now safer and more performant, especially for
+  exceptions with long tracebacks, by delaying computation.
+  `#1895 <https://github.com/pybind/pybind11/pull/1895>`_
+
+* Improve exception handling in python ``str`` bindings.
+  `#3826 <https://github.com/pybind/pybind11/pull/3826>`_
+
+* The bindings for capsules now have more consistent exception handling.
+  `#3825 <https://github.com/pybind/pybind11/pull/3825>`_
+
+* ``PYBIND11_OBJECT_CVT`` and ``PYBIND11_OBJECT_CVT_DEFAULT`` macro can now be
+  used to define classes in namespaces other than pybind11.
+  `#3797 <https://github.com/pybind/pybind11/pull/3797>`_
+
+* Error printing code now uses ``PYBIND11_DETAILED_ERROR_MESSAGES`` instead of
+  requiring ``NDEBUG``, allowing use with release builds if desired.
+  `#3913 <https://github.com/pybind/pybind11/pull/3913>`_
+
+* Implicit conversion of the literal ``0`` to ``pybind11::handle`` is now disabled.
+  `#4008 <https://github.com/pybind/pybind11/pull/4008>`_
+
+
+Bug fixes:
+
+* Fix exception handling when ``pybind11::weakref()`` fails.
+  `#3739 <https://github.com/pybind/pybind11/pull/3739>`_
+
+* ``module_::def_submodule`` was missing proper error handling. This is fixed now.
+  `#3973 <https://github.com/pybind/pybind11/pull/3973>`_
+
+* The behavior or ``error_already_set`` was made safer and the highly opaque
+  "Unknown internal error occurred" message was replaced with a more helpful
+  message.
+  `#3982 <https://github.com/pybind/pybind11/pull/3982>`_
+
+* ``error_already_set::what()`` now handles non-normalized exceptions correctly.
+  `#3971 <https://github.com/pybind/pybind11/pull/3971>`_
+
+* Support older C++ compilers where filesystem is not yet part of the standard
+  library and is instead included in ``std::experimental::filesystem``.
+  `#3840 <https://github.com/pybind/pybind11/pull/3840>`_
+
+* Fix ``-Wfree-nonheap-object`` warnings produced by GCC by avoiding returning
+  pointers to static objects with ``return_value_policy::take_ownership``.
+  `#3946 <https://github.com/pybind/pybind11/pull/3946>`_
+
+* Fix cast from pytype rvalue to another pytype.
+  `#3949 <https://github.com/pybind/pybind11/pull/3949>`_
+
+* Ensure proper behavior when garbage collecting classes with dynamic attributes in Python >=3.9.
+  `#4051 <https://github.com/pybind/pybind11/pull/4051>`_
+
+* A couple long-standing ``PYBIND11_NAMESPACE``
+  ``__attribute__((visibility("hidden")))`` inconsistencies are now fixed
+  (affects only unusual environments).
+  `#4043 <https://github.com/pybind/pybind11/pull/4043>`_
+
+* ``pybind11::detail::get_internals()`` is now resilient to in-flight Python
+  exceptions.
+  `#3981 <https://github.com/pybind/pybind11/pull/3981>`_
+
+* Arrays with a dimension of size 0 are now properly converted to dynamic Eigen
+  matrices (more common in NumPy 1.23).
+  `#4038 <https://github.com/pybind/pybind11/pull/4038>`_
+
+* Avoid catching unrelated errors when importing NumPy.
+  `#3974 <https://github.com/pybind/pybind11/pull/3974>`_
+
+Performance and style:
+
+* Added an accessor overload of ``(object &&key)`` to reference steal the
+  object when using python types as keys. This prevents unnecessary reference
+  count overhead for attr, dictionary, tuple, and sequence look ups. Added
+  additional regression tests. Fixed a performance bug the caused accessor
+  assignments to potentially perform unnecessary copies.
+  `#3970 <https://github.com/pybind/pybind11/pull/3970>`_
+
+* Perfect forward all args of ``make_iterator``.
+  `#3980 <https://github.com/pybind/pybind11/pull/3980>`_
+
+* Avoid potential bug in pycapsule destructor by adding an ``error_guard`` to
+  one of the dtors.
+  `#3958 <https://github.com/pybind/pybind11/pull/3958>`_
+
+* Optimize dictionary access in ``strip_padding`` for numpy.
+  `#3994 <https://github.com/pybind/pybind11/pull/3994>`_
+
+* ``stl_bind.h`` bindings now take slice args as a const-ref.
+  `#3852 <https://github.com/pybind/pybind11/pull/3852>`_
+
+* Made slice constructor more consistent, and improve performance of some
+  casters by allowing reference stealing.
+  `#3845 <https://github.com/pybind/pybind11/pull/3845>`_
+
+* Change numpy dtype from_args method to use const ref.
+  `#3878 <https://github.com/pybind/pybind11/pull/3878>`_
+
+* Follow rule of three to ensure ``PyErr_Restore`` is called only once.
+  `#3872 <https://github.com/pybind/pybind11/pull/3872>`_
+
+* Added missing perfect forwarding for ``make_iterator`` functions.
+  `#3860 <https://github.com/pybind/pybind11/pull/3860>`_
+
+* Optimize c++ to python function casting by using the rvalue caster.
+  `#3966 <https://github.com/pybind/pybind11/pull/3966>`_
+
+* Optimize Eigen sparse matrix casting by removing unnecessary temporary.
+  `#4064 <https://github.com/pybind/pybind11/pull/4064>`_
+
+* Avoid potential implicit copy/assignment constructors causing double free in
+  ``strdup_gaurd``.
+  `#3905 <https://github.com/pybind/pybind11/pull/3905>`_
+
+* Enable clang-tidy checks ``misc-definitions-in-headers``,
+  ``modernize-loop-convert``, and ``modernize-use-nullptr``.
+  `#3881 <https://github.com/pybind/pybind11/pull/3881>`_
+  `#3988 <https://github.com/pybind/pybind11/pull/3988>`_
+
+
+Build system improvements:
+
+* CMake: Fix file extension on Windows with cp36 and cp37 using FindPython.
+  `#3919 <https://github.com/pybind/pybind11/pull/3919>`_
+
+* CMake: Support multiple Python targets (such as on vcpkg).
+  `#3948 <https://github.com/pybind/pybind11/pull/3948>`_
+
+* CMake: Fix issue with NVCC on Windows.
+  `#3947 <https://github.com/pybind/pybind11/pull/3947>`_
+
+* CMake: Drop the bitness check on cross compiles (like targeting WebAssembly
+  via Emscripten).
+  `#3959 <https://github.com/pybind/pybind11/pull/3959>`_
+
+* Add MSVC builds in debug mode to CI.
+  `#3784 <https://github.com/pybind/pybind11/pull/3784>`_
+
+* MSVC 2022 C++20 coverage was added to GitHub Actions, including Eigen.
+  `#3732 <https://github.com/pybind/pybind11/pull/3732>`_,
+  `#3741 <https://github.com/pybind/pybind11/pull/3741>`_
+
+
+Backend and tidying up:
+
+* New theme for the documentation.
+  `#3109 <https://github.com/pybind/pybind11/pull/3109>`_
+
+* Remove idioms in code comments.  Use more inclusive language.
+  `#3809 <https://github.com/pybind/pybind11/pull/3809>`_
+
+* ``#include <iostream>`` was removed from the ``pybind11/stl.h`` header. Your
+  project may break if it has a transitive dependency on this include. The fix
+  is to "Include What You Use".
+  `#3928 <https://github.com/pybind/pybind11/pull/3928>`_
+
+* Avoid ``setup.py <command>`` usage in internal tests.
+  `#3734 <https://github.com/pybind/pybind11/pull/3734>`_
+
+
+Version 2.9.2 (Mar 29, 2022)
+----------------------------
+
+Changes:
+
+* Enum now has an ``__index__`` method on Python <3.8 too.
+  `#3700 <https://github.com/pybind/pybind11/pull/3700>`_
+
+* Local internals are now cleared after finalizing the interpreter.
+  `#3744 <https://github.com/pybind/pybind11/pull/3744>`_
+
+Bug fixes:
+
+* Better support for Python 3.11 alphas.
+  `#3694 <https://github.com/pybind/pybind11/pull/3694>`_
+
+* ``PYBIND11_TYPE_CASTER`` now uses fully qualified symbols, so it can be used
+  outside of ``pybind11::detail``.
+  `#3758 <https://github.com/pybind/pybind11/pull/3758>`_
+
+* Some fixes for PyPy 3.9.
+  `#3768 <https://github.com/pybind/pybind11/pull/3768>`_
+
+* Fixed a potential memleak in PyPy in ``get_type_override``.
+  `#3774 <https://github.com/pybind/pybind11/pull/3774>`_
+
+* Fix usage of ``VISIBILITY_INLINES_HIDDEN``.
+  `#3721 <https://github.com/pybind/pybind11/pull/3721>`_
+
+
+Build system improvements:
+
+* Uses ``sysconfig`` module to determine installation locations on Python >=
+  3.10, instead of ``distutils`` which has been deprecated.
+  `#3764 <https://github.com/pybind/pybind11/pull/3764>`_
+
+* Support Catch 2.13.5+ (supporting GLIBC 2.34+).
+  `#3679 <https://github.com/pybind/pybind11/pull/3679>`_
+
+* Fix test failures with numpy 1.22 by ignoring whitespace when comparing
+  ``str()`` of dtypes.
+  `#3682 <https://github.com/pybind/pybind11/pull/3682>`_
+
+
+Backend and tidying up:
+
+* clang-tidy: added ``readability-qualified-auto``,
+  ``readability-braces-around-statements``,
+  ``cppcoreguidelines-prefer-member-initializer``,
+  ``clang-analyzer-optin.performance.Padding``,
+  ``cppcoreguidelines-pro-type-static-cast-downcast``, and
+  ``readability-inconsistent-declaration-parameter-name``.
+  `#3702 <https://github.com/pybind/pybind11/pull/3702>`_,
+  `#3699 <https://github.com/pybind/pybind11/pull/3699>`_,
+  `#3716 <https://github.com/pybind/pybind11/pull/3716>`_,
+  `#3709 <https://github.com/pybind/pybind11/pull/3709>`_
+
+* clang-format was added to the pre-commit actions, and the entire code base
+  automatically reformatted (after several iterations preparing for this leap).
+  `#3713 <https://github.com/pybind/pybind11/pull/3713>`_
+
+
+Version 2.9.1 (Feb 2, 2022)
+---------------------------
+
+Changes:
+
+* If possible, attach Python exception with ``py::raise_from`` to ``TypeError``
+  when casting from C++ to Python. This will give additional info if Python
+  exceptions occur in the caster. Adds a test case of trying to convert a set
+  from C++ to Python when the hash function is not defined in Python.
+  `#3605 <https://github.com/pybind/pybind11/pull/3605>`_
+
+* Add a mapping of C++11 nested exceptions to their Python exception
+  equivalent using ``py::raise_from``. This attaches the nested exceptions in
+  Python using the ``__cause__`` field.
+  `#3608 <https://github.com/pybind/pybind11/pull/3608>`_
+
+* Propagate Python exception traceback using ``raise_from`` if a pybind11
+  function runs out of overloads.
+  `#3671 <https://github.com/pybind/pybind11/pull/3671>`_
+
+* ``py::multiple_inheritance`` is now only needed when C++ bases are hidden
+  from pybind11.
+  `#3650 <https://github.com/pybind/pybind11/pull/3650>`_ and
+  `#3659 <https://github.com/pybind/pybind11/pull/3659>`_
+
+
+Bug fixes:
+
+* Remove a boolean cast in ``numpy.h`` that causes MSVC C4800 warnings when
+  compiling against Python 3.10 or newer.
+  `#3669 <https://github.com/pybind/pybind11/pull/3669>`_
+
+* Render ``py::bool_`` and ``py::float_`` as ``bool`` and ``float``
+  respectively.
+  `#3622 <https://github.com/pybind/pybind11/pull/3622>`_
+
+Build system improvements:
+
+* Fix CMake extension suffix computation on Python 3.10+.
+  `#3663 <https://github.com/pybind/pybind11/pull/3663>`_
+
+* Allow ``CMAKE_ARGS`` to override CMake args in pybind11's own ``setup.py``.
+  `#3577 <https://github.com/pybind/pybind11/pull/3577>`_
+
+* Remove a few deprecated c-headers.
+  `#3610 <https://github.com/pybind/pybind11/pull/3610>`_
+
+* More uniform handling of test targets.
+  `#3590 <https://github.com/pybind/pybind11/pull/3590>`_
+
+* Add clang-tidy readability check to catch potentially swapped function args.
+  `#3611 <https://github.com/pybind/pybind11/pull/3611>`_
+
+
+Version 2.9.0 (Dec 28, 2021)
+----------------------------
+
+This is the last version to support Python 2.7 and 3.5.
+
+New Features:
+
+* Allow ``py::args`` to be followed by other arguments; the remaining arguments
+  are implicitly keyword-only, as if a ``py::kw_only{}`` annotation had been
+  used.
+  `#3402 <https://github.com/pybind/pybind11/pull/3402>`_
+
+Changes:
+
+* Make str/bytes/memoryview more interoperable with ``std::string_view``.
+  `#3521 <https://github.com/pybind/pybind11/pull/3521>`_
+
+* Replace ``_`` with ``const_name`` in internals, avoid defining ``pybind::_``
+  if ``_`` defined as macro (common gettext usage)
+  `#3423 <https://github.com/pybind/pybind11/pull/3423>`_
+
+
+Bug fixes:
+
+* Fix a rare warning about extra copy in an Eigen constructor.
+  `#3486 <https://github.com/pybind/pybind11/pull/3486>`_
+
+* Fix caching of the C++ overrides.
+  `#3465 <https://github.com/pybind/pybind11/pull/3465>`_
+
+* Add missing ``std::forward`` calls to some ``cpp_function`` overloads.
+  `#3443 <https://github.com/pybind/pybind11/pull/3443>`_
+
+* Support PyPy 7.3.7 and the PyPy3.8 beta. Test python-3.11 on PRs with the
+  ``python dev`` label.
+  `#3419 <https://github.com/pybind/pybind11/pull/3419>`_
+
+* Replace usage of deprecated ``Eigen::MappedSparseMatrix`` with
+  ``Eigen::Map<Eigen::SparseMatrix<...>>`` for Eigen 3.3+.
+  `#3499 <https://github.com/pybind/pybind11/pull/3499>`_
+
+* Tweaks to support Microsoft Visual Studio 2022.
+  `#3497 <https://github.com/pybind/pybind11/pull/3497>`_
+
+Build system improvements:
+
+* Nicer CMake printout and IDE organisation for pybind11's own tests.
+  `#3479 <https://github.com/pybind/pybind11/pull/3479>`_
+
+* CMake: report version type as part of the version string to avoid a spurious
+  space in the package status message.
+  `#3472 <https://github.com/pybind/pybind11/pull/3472>`_
+
+* Flags starting with ``-g`` in ``$CFLAGS`` and ``$CPPFLAGS`` are no longer
+  overridden by ``.Pybind11Extension``.
+  `#3436 <https://github.com/pybind/pybind11/pull/3436>`_
+
+* Ensure ThreadPool is closed in ``setup_helpers``.
+  `#3548 <https://github.com/pybind/pybind11/pull/3548>`_
+
+* Avoid LTS on ``mips64`` and ``ppc64le`` (reported broken).
+  `#3557 <https://github.com/pybind/pybind11/pull/3557>`_
+
+
 v2.8.1 (Oct 27, 2021)
 ---------------------
 
@@ -680,7 +1288,7 @@ Packaging / building improvements:
   `#2338 <https://github.com/pybind/pybind11/pull/2338>`_ and
   `#2370 <https://github.com/pybind/pybind11/pull/2370>`_
 
-  * Full integration with CMake’s C++ standard system and compile features
+  * Full integration with CMake's C++ standard system and compile features
     replaces ``PYBIND11_CPP_STANDARD``.
 
   * Generated config file is now portable to different Python/compiler/CMake
diff --git a/ext/pybind11/docs/classes.rst b/ext/pybind11/docs/classes.rst
index 13fa8b5387..c0c53135b8 100644
--- a/ext/pybind11/docs/classes.rst
+++ b/ext/pybind11/docs/classes.rst
@@ -48,10 +48,10 @@ interactive Python session demonstrating this example is shown below:
     >>> print(p)
     <example.Pet object at 0x10cd98060>
     >>> p.getName()
-    u'Molly'
+    'Molly'
     >>> p.setName("Charly")
     >>> p.getName()
-    u'Charly'
+    'Charly'
 
 .. seealso::
 
@@ -124,10 +124,10 @@ This makes it possible to write
 
     >>> p = example.Pet("Molly")
     >>> p.name
-    u'Molly'
+    'Molly'
     >>> p.name = "Charly"
     >>> p.name
-    u'Charly'
+    'Charly'
 
 Now suppose that ``Pet::name`` was a private internal variable
 that can only be accessed via setters and getters.
@@ -282,9 +282,9 @@ expose fields and methods of both types:
 
     >>> p = example.Dog("Molly")
     >>> p.name
-    u'Molly'
+    'Molly'
     >>> p.bark()
-    u'woof!'
+    'woof!'
 
 The C++ classes defined above are regular non-polymorphic types with an
 inheritance relationship. This is reflected in Python:
@@ -332,7 +332,7 @@ will automatically recognize this:
     >>> type(p)
     PolymorphicDog  # automatically downcast
     >>> p.bark()
-    u'woof!'
+    'woof!'
 
 Given a pointer to a polymorphic base, pybind11 performs automatic downcasting
 to the actual derived type. Note that this goes beyond the usual situation in
@@ -434,8 +434,7 @@ you can use ``py::detail::overload_cast_impl`` with an additional set of parenth
         .def("set", overload_cast_<int>()(&Pet::set), "Set the pet's age")
         .def("set", overload_cast_<const std::string &>()(&Pet::set), "Set the pet's name");
 
-.. [#cpp14] A compiler which supports the ``-std=c++14`` flag
-            or Visual Studio 2015 Update 2 and newer.
+.. [#cpp14] A compiler which supports the ``-std=c++14`` flag.
 
 .. note::
 
@@ -483,7 +482,7 @@ The binding code for this example looks as follows:
         .value("Cat", Pet::Kind::Cat)
         .export_values();
 
-    py::class_<Pet::Attributes> attributes(pet, "Attributes")
+    py::class_<Pet::Attributes>(pet, "Attributes")
         .def(py::init<>())
         .def_readwrite("age", &Pet::Attributes::age);
 
diff --git a/ext/pybind11/docs/compiling.rst b/ext/pybind11/docs/compiling.rst
index 75608bd576..2b543be0be 100644
--- a/ext/pybind11/docs/compiling.rst
+++ b/ext/pybind11/docs/compiling.rst
@@ -417,10 +417,10 @@ existing targets instead:
 
 .. code-block:: cmake
 
-    cmake_minimum_required(VERSION 3.15...3.19)
+    cmake_minimum_required(VERSION 3.15...3.22)
     project(example LANGUAGES CXX)
 
-    find_package(Python COMPONENTS Interpreter Development REQUIRED)
+    find_package(Python 3.6 COMPONENTS Interpreter Development REQUIRED)
     find_package(pybind11 CONFIG REQUIRED)
     # or add_subdirectory(pybind11)
 
@@ -433,9 +433,8 @@ algorithms from the CMake invocation, with ``-DPYBIND11_FINDPYTHON=ON``.
 
 .. warning::
 
-    If you use FindPython2 and FindPython3 to dual-target Python, use the
-    individual targets listed below, and avoid targets that directly include
-    Python parts.
+    If you use FindPython to multi-target Python versions, use the individual
+    targets listed below, and avoid targets that directly include Python parts.
 
 There are `many ways to hint or force a discovery of a specific Python
 installation <https://cmake.org/cmake/help/latest/module/FindPython.html>`_),
@@ -462,11 +461,8 @@ available in all modes. The targets provided are:
    ``pybind11::headers``
      Just the pybind11 headers and minimum compile requirements
 
-   ``pybind11::python2_no_register``
-     Quiets the warning/error when mixing C++14 or higher and Python 2
-
    ``pybind11::pybind11``
-     Python headers + ``pybind11::headers`` + ``pybind11::python2_no_register`` (Python 2 only)
+     Python headers + ``pybind11::headers``
 
    ``pybind11::python_link_helper``
      Just the "linking" part of pybind11:module
@@ -475,7 +471,7 @@ available in all modes. The targets provided are:
      Everything for extension modules - ``pybind11::pybind11`` + ``Python::Module`` (FindPython CMake 3.15+) or ``pybind11::python_link_helper``
 
    ``pybind11::embed``
-     Everything for embedding the Python interpreter - ``pybind11::pybind11`` + ``Python::Embed`` (FindPython) or Python libs
+     Everything for embedding the Python interpreter - ``pybind11::pybind11`` + ``Python::Python`` (FindPython) or Python libs
 
    ``pybind11::lto`` / ``pybind11::thin_lto``
      An alternative to `INTERPROCEDURAL_OPTIMIZATION` for adding link-time optimization.
@@ -509,7 +505,10 @@ You can use these targets to build complex applications. For example, the
     target_link_libraries(example PRIVATE pybind11::module pybind11::lto pybind11::windows_extras)
 
     pybind11_extension(example)
-    pybind11_strip(example)
+    if(NOT MSVC AND NOT ${CMAKE_BUILD_TYPE} MATCHES Debug|RelWithDebInfo)
+        # Strip unnecessary sections of the binary on Linux/macOS
+        pybind11_strip(example)
+    endif()
 
     set_target_properties(example PROPERTIES CXX_VISIBILITY_PRESET "hidden"
                                              CUDA_VISIBILITY_PRESET "hidden")
@@ -577,21 +576,12 @@ On Linux, you can compile an example such as the one given in
 
     $ c++ -O3 -Wall -shared -std=c++11 -fPIC $(python3 -m pybind11 --includes) example.cpp -o example$(python3-config --extension-suffix)
 
-The flags given here assume that you're using Python 3. For Python 2, just
-change the executable appropriately (to ``python`` or ``python2``).
-
 The ``python3 -m pybind11 --includes`` command fetches the include paths for
 both pybind11 and Python headers. This assumes that pybind11 has been installed
 using ``pip`` or ``conda``. If it hasn't, you can also manually specify
 ``-I <path-to-pybind11>/include`` together with the Python includes path
 ``python3-config --includes``.
 
-Note that Python 2.7 modules don't use a special suffix, so you should simply
-use ``example.so`` instead of ``example$(python3-config --extension-suffix)``.
-Besides, the ``--extension-suffix`` option may or may not be available, depending
-on the distribution; in the latter case, the module extension can be manually
-set to ``.so``.
-
 On macOS: the build command is almost the same but it also requires passing
 the ``-undefined dynamic_lookup`` flag so as to ignore missing symbols when
 building the module:
diff --git a/ext/pybind11/docs/conf.py b/ext/pybind11/docs/conf.py
index 092e274e09..2da6773f4f 100644
--- a/ext/pybind11/docs/conf.py
+++ b/ext/pybind11/docs/conf.py
@@ -1,5 +1,4 @@
 #!/usr/bin/env python3
-# -*- coding: utf-8 -*-
 #
 # pybind11 documentation build configuration file, created by
 # sphinx-quickstart on Sun Oct 11 19:23:48 2015.
@@ -36,6 +35,7 @@ DIR = Path(__file__).parent.resolve()
 # ones.
 extensions = [
     "breathe",
+    "sphinx_copybutton",
     "sphinxcontrib.rsvgconverter",
     "sphinxcontrib.moderncmakedomain",
 ]
@@ -126,23 +126,7 @@ todo_include_todos = False
 # The theme to use for HTML and HTML Help pages.  See the documentation for
 # a list of builtin themes.
 
-on_rtd = os.environ.get("READTHEDOCS", None) == "True"
-
-if not on_rtd:  # only import and set the theme if we're building docs locally
-    import sphinx_rtd_theme
-
-    html_theme = "sphinx_rtd_theme"
-    html_theme_path = [sphinx_rtd_theme.get_html_theme_path()]
-
-    html_context = {"css_files": ["_static/theme_overrides.css"]}
-else:
-    html_context = {
-        "css_files": [
-            "//media.readthedocs.org/css/sphinx_rtd_theme.css",
-            "//media.readthedocs.org/css/readthedocs-doc-embed.css",
-            "_static/theme_overrides.css",
-        ]
-    }
+html_theme = "furo"
 
 # Theme options are theme-specific and customize the look and feel of a theme
 # further.  For a list of options available for each theme, see the
@@ -173,6 +157,10 @@ else:
 # so a file named "default.css" will overwrite the builtin "default.css".
 html_static_path = ["_static"]
 
+html_css_files = [
+    "css/custom.css",
+]
+
 # Add any extra paths that contain custom files (such as robots.txt or
 # .htaccess) here, relative to this directory. These files are copied
 # directly to the root of the documentation.
@@ -345,9 +333,9 @@ def generate_doxygen_xml(app):
         subprocess.call(["doxygen", "--version"])
         retcode = subprocess.call(["doxygen"], cwd=app.confdir)
         if retcode < 0:
-            sys.stderr.write("doxygen error code: {}\n".format(-retcode))
+            sys.stderr.write(f"doxygen error code: {-retcode}\n")
     except OSError as e:
-        sys.stderr.write("doxygen execution failed: {}\n".format(e))
+        sys.stderr.write(f"doxygen execution failed: {e}\n")
 
 
 def prepare(app):
diff --git a/ext/pybind11/docs/faq.rst b/ext/pybind11/docs/faq.rst
index e2f477b1f5..28498e7dfc 100644
--- a/ext/pybind11/docs/faq.rst
+++ b/ext/pybind11/docs/faq.rst
@@ -8,9 +8,7 @@ Frequently asked questions
 filename of the extension library (without suffixes such as ``.so``).
 
 2. If the above did not fix the issue, you are likely using an incompatible
-version of Python (for instance, the extension library was compiled against
-Python 2, while the interpreter is running on top of some version of Python
-3, or vice versa).
+version of Python that does not match what you compiled with.
 
 "Symbol not found: ``__Py_ZeroStruct`` / ``_PyInstanceMethod_Type``"
 ========================================================================
@@ -147,7 +145,7 @@ using C++14 template metaprogramming.
 
 .. _`faq:hidden_visibility`:
 
-"‘SomeClass’ declared with greater visibility than the type of its field ‘SomeClass::member’ [-Wattributes]"
+"'SomeClass' declared with greater visibility than the type of its field 'SomeClass::member' [-Wattributes]"
 ============================================================================================================
 
 This error typically indicates that you are compiling without the required
@@ -222,20 +220,6 @@ In addition to decreasing binary size, ``-fvisibility=hidden`` also avoids
 potential serious issues when loading multiple modules and is required for
 proper pybind operation.  See the previous FAQ entry for more details.
 
-Working with ancient Visual Studio 2008 builds on Windows
-=========================================================
-
-The official Windows distributions of Python are compiled using truly
-ancient versions of Visual Studio that lack good C++11 support. Some users
-implicitly assume that it would be impossible to load a plugin built with
-Visual Studio 2015 into a Python distribution that was compiled using Visual
-Studio 2008. However, no such issue exists: it's perfectly legitimate to
-interface DLLs that are built with different compilers and/or C libraries.
-Common gotchas to watch out for involve not ``free()``-ing memory region
-that that were ``malloc()``-ed in another shared library, using data
-structures with incompatible ABIs, and so on. pybind11 is very careful not
-to make these types of mistakes.
-
 How can I properly handle Ctrl-C in long-running functions?
 ===========================================================
 
@@ -289,27 +273,7 @@ Conflicts can arise, however, when using pybind11 in a project that *also* uses
 the CMake Python detection in a system with several Python versions installed.
 
 This difference may cause inconsistencies and errors if *both* mechanisms are
-used in the same project. Consider the following CMake code executed in a
-system with Python 2.7 and 3.x installed:
-
-.. code-block:: cmake
-
-    find_package(PythonInterp)
-    find_package(PythonLibs)
-    find_package(pybind11)
-
-It will detect Python 2.7 and pybind11 will pick it as well.
-
-In contrast this code:
-
-.. code-block:: cmake
-
-    find_package(pybind11)
-    find_package(PythonInterp)
-    find_package(PythonLibs)
-
-will detect Python 3.x for pybind11 and may crash on
-``find_package(PythonLibs)`` afterwards.
+used in the same project.
 
 There are three possible solutions:
 
diff --git a/ext/pybind11/docs/pybind11-logo.png b/ext/pybind11/docs/pybind11-logo.png
index 4cbad54f797d3ced04d4048f282df5e4336d4af4..2d633a4d0c129d6bdd94b4134c9516fdc92bb192 100644
GIT binary patch
literal 61034
zcmeGE<9BA=6D@$ow$(w$9XlPnW81cE8=Z7)vt!#G+jcs(-FN5xopbJga6jEKo+lsn
zSkFS$teRDG?U4w1*&m2-cyJ&fAczv;B8ng&5LO@{pz<(Kz!R8?TV)Us%rBnG>duM=
zZp8MEcBU5ACdAGj_9nz8?iQvXAnvP`>1K6g-yMVh9a6hPl=^eZ3Zz{_@$m6a{l+3q
zZZ{hAoqJbSp+MQ$0sB7nob#`1%I~@)nlRh2a!J8zTMpNR>@bRu&u{S64&m?n<Ga$~
zYgX64{wbo@Ym;541E-I-{O3a_XJ6ay&8s~^hmY$+xqlxN_m?4E-A@j^72K3v%iRp|
zIVDq5M$fwDnV0cD?7A6Til+|meRkRtFSph=rT%q<clr7L6SPpaz^@*q&LVJf|3{aD
zxj&`1H1*8!w{+=x$2*6JFX_#w8e<=M?e+S5@D}`C*w=qgC(lmzV=o^saszT7KB6B=
zbf?W?-q+3D;SY}keo}91F}G8gMf~r(f9_Jwz5i{tzjhwhb2B7GE;8iz-4<|pA~R%K
z(|6mId$WGv=`}wZUCjiAi8{He8{E|HhDV%iQ6{$(4B=0`dJGJpNpY23e=ibsx~tFD
z!n2qmYAcO(R~jR|Tt8luJRC_QD#@{mxbzwF>c-@D+x(SMJUp<wcIJ0|{ifIbMoBc)
zdEyrRjiNF(2Z|Ci)1=htB}Hu}&b`~GzPX2=uj?GS8x+2DR?ur~eI+;Sx%E%O6TH^*
zxzmBm;2{GxkBuKgPhd-PZEEVVKq-?}1ye(!X7qgR9I@G7>J-*v0amFD(c*X~896E5
z<IRqefyc%XMVS|-^}_2BHV#jgy15EPgtvy--Z-r$L(_%y`=L;eiALq}#-?xax5-)-
zi<VZE%`+W`x^4HXT9-b~L>9)J(_&w2?wCToJTvjXFFER7z8M#%nWeFxTGvl8`GDrA
zBv-bro><XtSsWtDoVBf8cv<sbe`g$^9+Krfzhn8SSL1q1w`ALS{=i-J?`*ZX-0%4b
zS|@6&s;F7XQ|EPs&G)6@VGm8g+3*DSixm0nbukjiQDJrnp_If)rYDw|Ar)<{xRPD=
zmjxzGbsVquYqLvVmka)rHKTX_6Rj7wEU)>tf3cIi4HwluuNhS~$LpTl4{1URtj5K$
zTR9hUC-e%pevi-AE%ocVK2s*=vpEZ+X(eHfR*}fw8gdg6dK7MU4ou8$IjlC#O?b=K
zi|6$B{bn0GbtLSYL+Kv(il(E7%jjwoC5s&l5O~Oqwd#d?&i!sbUDg`8oYLZUXkE_a
zk$APsvN;dwqVU+bt!)lr)9S8T)oMEo5rbwkVhNy9qUxRfji}{}Gen#89$)UCpM4Fu
zH<B*JgnH00i{CRcJiET1&kc4mV}axJ(ePG6(ji4^dktSs*F}3cc4onobE76#m1lN!
z7V@4ie{*YU+<xnd6hFRe)MaVdVv|3i5oj8e1&KziHn?aq)vG_qARyQ9^}g;qPx>d1
z2I4go-M=U(U=NuuM9+Txqki2K_GuA+ark+EJcPE)Kon8`sK<M4rTX%BWtW?Htv#DH
zbp7&@kL&S825M8YDTyije$&~$LH&I|z9s>6lg0C<1m?{O+j$?Fr%jrH3FyZ<vn@l+
zE<&7AYS`R2hIo(Tv)s%C6xEh}`Wn-eqV-fkipw-*doOUx_lVnzzJ|dDccNRARm6@g
zTnE!g!)hBJX5Xfk9&L>6ngjOZ11Qz2-kWv8U(;YiW&TI#ZSy;4tg?pq&2D2R6D>^o
z_b;>pqfSJiw8*sBc$41yJQsT;_~Hw%@c{u@n$+!-3I0JZsPNGkX}b+#!w6sIbS!mO
zXxy|R>A_)NSMaj`qz<5~<J`I!c4w)vq{RJN)3^2HUoTdLF?!InHDwgzuV>Q!d*+(f
z_-sjRy}Njg)8vyR7SO@KP{L@NYKK$N&XiJ7?sa%t%PmH2>klOrLnkolPJfWB3mUDQ
zMg=w1!<8ElPSbJNWb{|l3#PVr=y%f27<Ti#Y<*+Snj05mC?-2EZOoV@cT#IJ35fuL
zSQhy5eA1YCgJotdWGR<qK6Ki}ny*;P;k>-5$t%Lb3i^-_a;C!&)J%C_X;CHfvF`y`
z4Et<dz5HN=I7kNLX}=Y2v;*~ushWb)B#!_TG&@X(H#nWYIX8p+AKiE!Lv#Ni7oqtS
zI->CNho&#e1r$`nhF9#uTdYbO$4hY2n>cF0Slme2FZfM`mGIRK+3DLdT<NGqoEW#$
z?h*2MAO}5QJtT!5J=_@G{@7#Etv@A<{UveRu}subo;?X%F-#nwkGO>KSZN>It@X?Y
z0x--GX>r`$kIb07SBf9gdxXn=&F{YRxiH}TJF6Pe!}Jcf=3#esRCS!(-i4)st0XpW
zfMLw+g$c>>u8pv$`Qt5csNDZ5ZX-2I3g(ojoB51ZAo@)jtKD8m{K!(riX;N0dZTbr
z?`Z!C+)98;1Hs0G)~JT^i}CCb$i160)tfZ6bQ`A5M8;NCF@upWCM=lLGj$x68;t2R
zYyqb_ecR7MZT4wLog*yyQP@-Ynf+{vrAum0pS*m>*<mw8__#Z~5!EVd^dQ1{uhj+U
z0FZb78G}5TW9nee2TKi~Cb!MZ#@PEirK|ArreRmzpP`Q&1o$`1(PHF;@=I}5t=?#g
z&4(@<#2`4HrD`I658us*QcG7Mm&e*Q?4qxX!&2~w6d_o*@!-N8^w`0ZOR>R{MC6HC
zzC|kT)=Y`~<nX#5zTBo7KIo~)B{=`m+Sk}S*Og0j$H|!~q58EurUgwL%6&JP8BI2x
ziDCM2V%#)95fzj<G>L+~PgDS2#TX*2Pkm@i{vJ2z$5EI8e1-amGmNZo3H3T(O;20L
zumOQ(I6P7Z9c%DaeYCZe<)gHqdy`mYS!#>$a7;8?DhjePdOhg#a3EPYcC`^B#jbs$
zJsNB|LYKMP$vzteqodjf_>jr0ur&gf|AmntHO&nsd$3rf1#Ei?)vZY^)OVsyP1Z}C
z2cePEaI$+Zc}8pMGxS%!+JO6Ze0J#;q;$wLi6LtG4)p$(Oi+kj>q>saqupxypQ&Y`
z@7Ez2jNmnFly~(8?3RxeS8xyCwjMVQ{A2J+FYfMTjTRYMN+~cFrwN8Tv1Js8)|#c-
zORUIrBl_g*4z6kn(LhtjcsLLYF67UN1<VeNcFspMnFPJeHv@fY^Dq?A$>DZm7+JB{
zZ?p!7D*X$K>nhlC3PZ=dYxTBqh;;_FSN%2n`U>q_&++*~y*PTX0x}_#p(vR)Q#)E#
zl^pHjjosC-4eiKt?~m`RO2Oe^D<3#An|laGs`fh-zGx2wA4h+u-}@ynp-~Gp43-h@
zSORmYxmQ7cPtw=GjV-!lk)nwdrj`zZtD1qnNyWm)DO(eUWth<NN9;Vzea&u-a%>7x
zVnfymGLxKT3wv?iU>%82Uf($#l1Ga9mpym~IH*b7pF9XwH2C=uMR-S0pV{8ZkzVXb
zVI*8w6Z~<4Ko|UPH@4wcIm2J5>~59Q!*~xY)!CA8x!j6nf|g`P;ymnk*`UbAOc<+U
z>4p8jQtrKmTR&;hv*E5+RPK;t59Vyw;?0YLUJqh?9eM0S_5Jzcd|7al9ZM>bi2B%p
zebs5Ganh;}K~RiK@pgJ0VR-fRUnt45AO=3mo&2}K-QE2<YwD+xaIPZ3Y+}E0t-(X1
zUo9l!VH6Zto9fDImu4kkHRA~;LSBPNyB?ZDFTWt)Kpd3w$LM#W^b*!dbi%k~Bitbo
zj!7n~Cfs*P#z;p<0umRM>59o~sKXOypoI7F4nPgG-H$mP=&bQ}`XBUTbN?|h{(#X8
z5K{*o+kky%35|x4NUFgdpYL5vp(3Qn*)4EQnj*&TUUjm6$Cgy{SYn~#LIQm$k}#;t
z>A?MAN%h@8WSSaJZhY=Es*l0g8OvW#Gj&@&qACPpWMYbVQ0ETbC|tr4$tn}p&K@!v
zgVAug<yobTc)i)7RBlJq7*P=`ZJWmhjxSm^vlLzsRk>Ep5T}3<mHq&iVO=Cm>lX%T
z=x}fLO{=|W;8ge<a$3MFCfEy=TaO*4<&v?2zv=S;*L&}Q;J);Cl_1MhY&xGHv5>26
z5XWCesno86h=cmXI1#wLp}EfruYUykLE<pH1qK7k!V*2e6t#Op5+!~-O>KDRP0jPD
zNe-HWRtFf_g1;Y`Q`|-FaGkiGi@R+x(v!xy(S*}IaJr@lD86&pnC1_Qdi0QSWJKo9
z66!jkN12{Ez|5!1lg4JOfETsX@k+uXruV9o_&M^Cw<YVnRpH7{Ao)Wb{7}J02`^8y
zVP=}u#}m}prHD*7q-FP44|fS#g}Rf5Rm-w#5-W=k;d&%qBIe6-#9x~;;Ds82DopL)
z!>(SGczJ?*Gord-K~TZnj6#Q*IUm`sBZNY*jmG{H&GDm7!Cv$#ije3<=-am|s*9h)
z7xeYJT)UKHmdCkSeop;Ci4&b={sErW(mj{Nl-ilwW~qY8zu?dV4B(+0#$M@;N%4hw
ze?@~IBU4eF+UtK!7cNms6@}AQetX;A=+}a^fDpF13!SCmrZ(<PY)LLpXS<;!#FYEp
z(|&ds4;!Z9kY$OUFi9f@1)je|W~!-25uw5oz~M5mFitlb@-)0Y%`orjsUt26`%V&$
zCO@R27y#O`JpWw<N@J5!qBJ2*0G`?&rsKHYmp#^SM$a>9VCA87tc!h<LjNQ0tBAYZ
zFXGcBqm+3oXh!rVd(31J*vkvLrJqdwV5nDpp7!??5OJ60;5&&c5|aO5r6ef9T<tK|
z6-d8ViVJ-$n_d?BC`b9eh_Fj|pZ7hxFv1-()$xaCKZJ(3e@xL8q$#AkS)`DE(0cXy
zq>D<1L73y!)4c&NY7iS&C^NRAK$t8jS6~E!3`OlOA@6k?JFd$YKF+T}LcOt2BflWz
z=%={(b|Ly;D<Z9?;Yr#FYNGrxGa~QFG=yPO3H$}F1}A%azRNscI}WQMH`zFMHBnS8
z!<tF=jArXyIz)h}l{h%;)jYLfyGHSw7Q?<J!J7q&AU&5cLomN<P_lvKc%<$hnI0i7
z2P>p($BYF?97n>6jejo&Ck%(dcSI*)|H?>nOAJLDy^BQJoky&s$XOR4Kny=Htq;+6
zQWMops<}DPwf&pISm_E@+PM%E4EM_{0iCT~MZe7=q`u)PSC5fDO!gHH2#L{k=L7MQ
z?p%Fd8i`))d_FORnn>0LdOb)6q_$l_zI7o6@3{ikmwO0lG!CI}@byJhcxeVb#dE?l
zY<t=k=%!RmlqhUFK}^o~V3$xahLub%(2vWoa^>G#a%8X8XcOmUROH7cQK5gKU#=xK
z#o!eB8>4a6<$y)9kX_S3MNfl>l4viK9O6i78lq;gK==wKXPkwPB|JOJ3(lKdCQdqc
z2~B^CCizYvB8!<hyG<hThVzALdB%!Y1pmp_SvA`>91Cp6@Yo%EyQ$z#ns8v2aOekg
zCUhS0R+#j`dfIcxM(=N3q>M!JdRP0kgln~rXz8y(ow}P4l+&aR#ST83o)1CXAfj{k
z#<I|e8E|4{9+OnOZp5fZBKX~qaL$B0#xU?PM5vEK)p4)$M9Etwhh>c_HKU0JaE(-F
zNPFT;lAGwr0pKi@sY+-}!hK<eEIpw?y#g3T*$aUZ^8q9<4KOna(6t;|Lv&_hOH6E&
zaSZ`jT1TFf>$k)l98l4&eB)e%yRJf7edo-WsiIWqNwJYk1-a`8R2y0X5Ibl+YCS3E
zm4}N6Rv~HG`9qrNdp$jBa>FfJmM{?N!La3{)Qo|J!pm+!jm%zUTo(S>BrXG0VTASG
zF>LT(<OvD`M74)^`hJjJjoW7@V4n`7IG<!*BaWy)$rVyH{)ILj-0xl-oB3#?%sW5%
z^Nop<tMU&!B;!vTPVf*5D)eq*S!QNsL^J}DheKgSa;mR5cQCy|lQ`teva2<Je2GM<
zVSfh43)=3p@B42zVi}f!dq>Y1Jw2#|uO2lXCD~2?OE(M6-uzavl%P-N>>EB&P%?0R
zOFE?m38v^*3!R-MpeR9^V8VdNr-{V2fs!bD=WH9kP>K(|K&2_}&YuUiu1{<qS#$<%
z9lszDC<spli}-ugAF(|gj<i5RB&JgumKfO*Cx-191q<i;o5w^`j))pXHDX<r(qv^4
zI6I>NF~J7O1pObdAwiM(h}mD*Q_S>EP~VL(xW4Pmjo^khpPa=UxzVfo6Z9gE<i6P}
zK(sOv!_Jch`d;@R<*bYCVExLy>j9$@;rH|&+_h0LC5}wApT*H4fDJB(>lzu~LrGRv
zzH6b%kf7eSE~W0ALaHJ>#^hxYkU``ck`-_Fnw?|Fb-gA)fT90^91?Xy(Ad@pMou$D
z!^<U!1#;^hyW&8R@Ju*hAKQ%T&&1S*_O+OECinZCMr6@XYDLeJSU1=~)v}Xb{k`Rw
zQff>8ZdAxK>MCzzutyhK$z&o?J$rl%+H4tiqx4c#^0|8AaZ+vpTm4Xur@rlda%;E5
zG>LVYuxsxPXTrLJDE<>x*ql@t4@_adgSxV4h{1Q>oBm5>buz<z2R3)2RVbQ*=w%dj
zLj&v)VJ=*n1_&)uk6>~5Kf=U)=pd_z)PXEkj^Q$4KF-Vc;{LT_eUCy?CZx=?uY`Q4
zOA&ap_-sG9X?e?tAw_t}(?vc&Z3t<&vPtGs9yy^lBhIu#g@&{jPX9!IZ5w6Epx!o$
zEWRcD%i9o3&sD;o7~=4QbL(FKScTEL>|D^#3>O3In9gjP=86RAc-q~gI#o4wXq5R$
zH`A~U)74<rXg#lEDEy#cYpnHqg!meEwLv|**-YANG5<I{yao6^Fdb$jL1GeQW5}M0
zlJD}2m-#zE`dE3tlreSkprhfvSt=k+eO|n>YsFm{5!NN?RXg*99qEzyG_Qj3F1jqa
zv9iZ8(thgOFD^&WZF!=pcB4BlH)dy3C*&ElY$3`_JVF$TQu8w)27M`cLFk*>jMEtC
zp@lXR*ft#7PNA+BMKYex_?cH$J{ed{h!B2k0Lfai12X(RLm)t<qu(ddr<)Q;GHB8j
z3kUa0@0n&t$Z_Fc!LO1XEu9EvpPWipIplqR9#WWP>3s?WAqK%(Raz?5`WZUj2ko2N
zBbylo_n`}A4gCp%^aa$gA*jnI`Im)bl=MmbL^fyRzI|g+-Q>>VY(WERyT$8{w82M7
ztwwdfXn_cHQ*tAtKKK>LC(JL;Go^yXc^MVeK7<do5ejH<f6Gq`VWO@s#)%@I-C~RX
zT^}A+cl(=N5KjFA%X_oS*`2W~1XR-i3t?e-31Q*?E0hBAXr|9E87dJ4oKT`5abtsc
zV+ZX1NVIrzVTqV@sK0hQo7~-OkLed*R@A?(QN#1&2ZhA2X~An1CFaB@6=BxSIQy8H
zT2kPa$Q~Z^9=^YHJ^C}`u+udgpwM!kiWCQ}?#|XN-bun|J^A}?IJ2Pd*oUW|o@R5o
zY$BFwHg3JQuI4i>1#`)`ua9$T?6B?3OjwIuchzi3pD64&EyR){!tsCGpt{c375Swy
z_LTKJTv(&0E~Q+nFkm`c8~h^}TwyHvrE37~2c{i$(=5Jes~nqw)wph|Qrj6}ahgFg
zBOVlfp?HFcfUFiqO87do)l+-aM@<WYLlNTu(*PHi=S=KN(AG-Z^-i8*(IPQxm;2@q
z2G(;Sl0%QJp=>qIfETltI+~Aro@k%Q>~iw}QuD3QsMAQ(v);c*&b%hhKNuwM>wK;`
zJ90)i=mdx88Y&VUX||kRKyKqzj-Zf`@)$@O)vU%hw6oT_?M9#9LFiB&qf$HPrvdRA
z%24Wu2nZ4gA_$5SK`1rw1lC?$!wCcg9{uwR3X-0Q13ZLwmXHyJ-iN~i1E)i8<U|4i
zAqJ5U5ma_xJ?r#v*SgPo@R2$+*E_E;?xmm!_Rj*NBeS7f#gWviIfe^al+<jSaNeD~
zv)NA9MK?3eF3GbiVPd&u)hQOqmRL(OJIa+deQ`~1QjkP9Cs{^{qkss{j9J#8{l2*`
z>iKVa=6WW=TB6CNVI$|1dHAQb#9_1V<?Z_>zdwo)3`y|+|IhzVZD5N3g#q+)Mb=zY
z0Xh(55DtI5fo>O27_g(bt~U^&4&(o?N9v=X{gEAZ$dmu35c#L1#Qi^486V7p!4P`|
zP%NmSTvrf$x-qUUz(oG@r)e^pAl6F%YJUHQBaDRxJ_`#ASMEGF-H>3Q!9pYuV4%Rq
z+10i9r5ze|ssZ)}4gLSYpl~RrWlLJoR9Z(z=WN9+C&onB%gd{xtc+SESqA2M_S^p(
ziNXxI1jDAHqEZVEjr7N~uS}`L5D^ivOaz~S;rr+DByp1kQH8{x<!K-no{3?Ss}sN3
z{&~09pUX;5PoG|0g=_Y5ZOfQC6n1iA>D#Hf8OlK6V4w(&)7~A~vvTvN+(Z3bE0G6u
z{O8W8bC;(3vs7|nbb1!I+n}Q$u&MFCj%BYZbPW6?WMM70Ew+O_b#ni+191+%Z1#at
zdc(IhkH?FRTJ3nD8dws)6~o}T*`b8P#sn0_!SD`<U=&5HVO$vc6!^x*RylD__p98S
zH6(UH2`DBlv@KRPw%*hGgC_K)#jhwZ`WtO7j&5Qd7?Fm=BxGv01pZGEu7F*?`_#%)
z74jffC#2+jI;ZgNTF8M*uFAsBPR)!zZdRDy?{`^QgvH2Y7PrPX0hk#uen;)yGUK%-
zn^BL6p)omekAg#TV!(L<vPIYKjzmkY&d#Mlo{F08d5jnj&kZru=@}UcYHB5K3h)`h
zDDh*BN>S0uRawC(<QP_feefr>?Kr_Z-pIo^u5-)EBog?Fii?UwpKZRFvrEII>8ykN
zM%%g)gZ>ZVBan`k_kg1uvqyE*)P6Ya%ob^$ek2<3TbazE2$`ZHv(nz5ou3!AGLn$N
znl7RUi618z(`Q-*iTEG%k^iLa0#0l(n?oUNVlJ%^hr*oUzAvi$8m*iTk1j;@IUF}`
z3aymUpEx78Fi!N>;NaT{>9^WqINFe}!5GJ@xG)g$$%?wZLNEh(LNL??h?G_rBee1*
zCVrByrtvYM)W8&`KIes>%M;0009&@@bZXr}xVlNfxG4OolHq3~LXM+S9|1#B&$=|D
zor)5#_TRF{N+l-t6NqLKq;AA2V(<WJ_O;cNloD>S;X{OuPEHg*sCp=fgFmN?V_vbh
zG@-9RvkJ5Zi=RWvrWNpX7X>EtH;ex+7Uhy~&=-sX@@d+_eIpGzj?7+tlpT9c3*=D>
z;#M*YIC7sc8es2Djh_SKl>hIP2YCk{#0>l+Y`yw%f=DD}W>%DAVX!36frf}A!S?+l
zh?FCV+hjIb9Li~I!6?j1=Je7q)G+^T-lCwal~JixDqDY7kdr{wtZ^Um;k(c<t+11U
zkhq{gZ77P+sD6JMrIP{FWKo2*ZyA~-D<uY8UVVLn&(Ht)An|iAJVKvCC9%`t6SPsp
zd9GgHJ$y)iFI_^&5s?OCtgMcl2?z}v|8K*~_Osfl<6pb}1a*2n9xSl@2bszo8DW^T
ziN*w7yWlwQ|7L61*$+)HUYG|l4hmUZT<qw8&6z5B@VKhAfFWTC{xlVUd9uSlXnG~G
zS&@I!+uGVvciB1tIL2_yEYhb)C43YWHaByqs;Cs5+3~T%k{DxMksTzZr{l>SF#pw@
z7?Xoh=o|k;gA6MNwrUmvhB0~~tFQP9+m1cX#Kc5;m%Efy6jyn1hzyX}AbD=SN`9Ip
zJ5OC0hNLK>m|UL6v*8Q|G!8QzIEkey(%;+Xt9d0kYQKgr7|^gJ^&PI34hcd3XQzLE
z&=fYD*sL}E<w<(&2>>5uwYB9OgYM4XFlu+ZoV4J#va^eA)uw-76B4h2p*TnZQro`w
zxwW~u{G{1L$J>9$Es`Uy1@(SdF>~tLS#Pqze-|D_Z)R>0M1ssP%kfdAn-9)yftLjk
z#h5eVByDGBH@PI6^q3qbBk;+!iZAAt^yBS8CNgxpeG%)>rra4)<WP#GB*FV&a)v+4
ze*ew~;QzVMh{P3b&U^u62lp@^bdtW3k`g+wy&MORtdm40I5r?r&cvsh;o;$_^GScs
zEM)q{&JbK(O>L+>NQB0+$e*-~A4p;*=kNC0Jpt-KIJD^vpfI%N%L~JdK=PR%-4EUX
zrsNXQ1p~}Kl8Xzn4J4osLmtmWvNo_?4HY_&CNkgU#l_I3CU#{NmAu+o)W*if(yA)i
zlZ*fX5FjwVeE9;v>?C0TjfedG9EE|6r0~z8>}TQtHk1eoI<1|$j~!b_G<ih>lDvp2
z<cK^G0$(Wz2hjwv#-Mk!67v~lrBXbll@mS#FqcU27-SI1?Vx8_2V2WD?_ED$ynu{H
zV_aJZP?>ErcL@U)T*y3baOR#O#EUnBb+@A(|9>1~K~l~Fr18&u>?at>W()^5o1z65
z%A-i+0$!@XQ;mLoQ;allS+gF2lK;G#7M4LkOGF>o!-W+!;LMfvlTJC)M!n+v$7lSs
z>|~<;ZV$q+68|Oy@-~YNoa6p8akbcDNyN!5`#uMMMs(s)s1kgT$U<m6{CFsFFiKye
zlVs|zbe-I-xw)W%AF-A%zN%H)5x;+bV;`q&&?7_Zb(-QWIeyh>bEa0U)}09bM1nj-
zZeHsZKnzJx5##B=EQ*P>6z24q&BT7+R@c^y?(gsG4ODX$)EIFJXRd&07;(m>UeC9>
zbIyMNhO_6$1cGk#IkX-~`>mW!S2qBSQ#12KxKse^9-f-o*;RAZ80Y2$?(FO=XY+co
zBHy6v_4oH9JeX0<pKW#X+W|+80kU?ULWX*wHhl`k5Xs{F{M7|9kR}a_zt{d`V`bg<
z-r%v{_IKQ%T|+A^t5R@tbF;oZnDBtV<FMNtC+#qs_RW*0YBC!7+0jTODQ;b0_GzAE
zb#%YnU#<3LX0s$%744M&SqN@)Lz`1)+WYCsz<Rlcr26vuC-=p6X{zaK7N-N4-t(wh
zjo-hIj?STp432(d=A=LOi}hnb1#xV{X#u25l3<L@RgKAmzGG?g9PgWv4tO?@8pDA|
zE8jt*_POS*%}y_ey%>%Il~PbiDQhFs&wXr%Dn+#i@k|5)r-YU7Z)EWk<O6YbaoMRI
zA{_II%h5lcEh50wXfTkIL+-lGy4)V2%%Kf`l>E;uf>lMi|HWcFHQ}^2=b@l)Ui!4`
z114O^ky?Vm>)+D`dH~?S%&3AuP6F~E_l*TT0qvmxKL*@DlkJA4y^xHL5AQ?>i+T5_
z3#xS5;i}%{4Jkcp5h8(1{i7m+|9=Z8+FO~mrov+f!JZp;{CmGVEERaEE1}GG2b1G}
zckb=HRAk16`j0F|Y&dWA<8YuRLSO-2IUhGYr<JIs$bvm*nc@lhLFCh*zxB_idBb&L
zYINk1ENl9qT<;wY021WuhtxS4LpBsZb>vMug@RuJ1jeI2vtlD9D*cCAAS+@v$b%;~
zx7t&bxM0@Fga7Yv0adlr)7tK1$#ksP6aYrY-Q)C}4BV(8pOqYrrt<<>@*sf|+IH(R
zs<WQ|_|Fg_p$7+jE-o&k1GT?CvtHqFZma!I@OL5h|Hh(OzWMsiG!RLSYvbtSVgPQ6
z{2yGddV`>ow(35O1G6+qO(p8IQwFwERYmXpJf=68(sfsq|IG8(l%7ZBHKgBw=i_P5
z+?qD~)uG6g9${J^@Pwr0Cr|DWXM8p`OOqq8PzlMS`B+ww8^Ny3qXPi?UR|7>w@>x{
zc?xDPvSEJykg@)gH7)xk{^vtdb9$b!0N>iwX<4!FsqiIM-9-PMhSZyFb!K4MK>jm3
z#?dPLXT<|UlC~QVdeEonuRRZ-l5gvJZzyUo7|X=+bB8W&ZbP3Sm_JvGJm{kh`WhU!
zE7tYp$gx#kM<?qR+xQcpaf%w;x7%uSki^Q?f*;ENy4)@|^KaNCL;>7r)2m6iJ~*Gd
z{V!hfj`$DzLg1yU&ecDixNdI>*!_PX$TaL4VN&_@qLMR3LxRF@U^k=LU5}lYNx||;
zdU|?}2YnzcpCyekd!`2?Hq>YJK@x0b#%^V%5YAR)Da4Hnm?E?49pLY)Wuw?6>DQ#+
z%VM>m;}3l$6}?&3_wc1)yii`j<9+`H)bBZ3q?`ZJjSyh22?2hzi0w49?B0S{K260h
z`HABpQuhs2WmG8%9WWyh$o{4vd=|a11_#CRg2#4^U{~M3<%Ndlt(aL^u{WD+G91^N
z{x@5n5A8%q3CvvybKXXWhm%6abp9XUfn~Qm6~<bv(GQ8HPa8|k5~QcGt)dRkow|zm
zjY^k7PE*m@x_tM{QJGD-xeO~hJxJ*i5?%j@sh>ugBPZ+h{k{h-pLO^c7v(y=uB~no
zUn=^o8GmHg=9~<F!~eu5L;z+)C6{~UNtzAnJE2sLG)sAs4@vl&iRzrTdpxn<Z$Ac1
z^i}1=z*hUV2X2R5h_0KY>O|?4gmgvL&$u*itt}gyvw}9QiSsDhuKQLnNp@tUTF;rk
z)L$5cq*@P?p%JWG9Yl3Rd~}Z!8Ev5Nj=%*{U^J6=u}Lz9;WrDJEBLY+<319CJV3o3
ze6tMA`Qcv-=1#O0<<k$l-*C~T`NGqvn9-Uit~BLuL+!g55>e5KbWAf$A}B8ABvr{u
zU&2DDG*gBX20ERY>9f1)q51qz+Yf7EankUrTte+(eswi+h{&(QnA@%PHLK97TkmZb
zo7@157q`ee9(w@q9IM3}b@=94_YaEzg^s_DEBYB0Do14}=Hp^Fa#V_T7`JVCFAM)G
z!^WkbTMsLNFFuq4gCDLaF9Ou@zkI^{+%VvHy3kVjiC4~U{4Y>qC>KP%#}}ZP$H#K_
zvo5o0G`smSR;ZA=&%QafiI-Uu$zblt2K&Xv;z~h(^clnLIeD})WSbjy<c!Z%{%+Hi
zj4q!FX<s3V|D!x!g0Rch^<rlhZp>wqJN4GbI^-~cvZ34MkTT7{J1&=CVxSl_s5P5P
zKG|er`f>PMPMS>)8snWbF=(oIWmRL?P^L8vh-D_=+}xF?+*{8`*#LRO+jc9m{m*yX
zpdhxossUQbDirT~5mhM3^fmp)D7j1ETu1&EvdF)FIq<x$={H3MYOSJhlxRlFh=DFm
zH5>g^r2HIdjbh)l6)MN~f=ipqkBy0G3**Vl_dfi?;nk*jH^+@XW;jWS5f1`KlvhLn
zSz)@DID$1$7dlH8`Y(q2J6QW0=P!Mawss=QsZfU(A%6PM&?LJNNd2h01&Yo~p{;##
zw$Y%=%Fq!i<GZK=BL#xX6Val6gn9}85vo4(Y2oQp;v*30x@6!LJp%f$up}Qw`B0;&
zf^`tztEwXV)oALRr@<9I+HA8BK@8%f0PCfr=>?ccJ-om%(;ju!$V0tzg%78y7jwQ#
zB-thHWg43geCD55mX_PDLo8<-wvTK%Y&Zy`gF|`pFj-ky$6!H!-cXalK=smo##Ri)
z4Pr=Lb&<<ebI<Xul|GGfqE!F>E%~jrqOvk+sN`+1ur8K$Pa%u99UFb6p3HA=BZ?N%
z*(C=kAiIibIe=EGk^~x=P?|%ILqf0h2%i;O%md(8F{X6*lr;+-C56$0J`5{XEGygA
zysr|e;xL$gPkW*jx|RV8*5cHr#-k%F5^kN`EX!x?q`qbs=`pLL<n$FfssTw-3IEF8
zD+PJZ?)xkodyJWT%~42TJdd2Y);mnu(%Jp6_}^1D1pcM9sFNNM!+0J$4Gfe@^R3;(
zpWQUVAxCc)?O|V^WORl{|D?%TB(q`Fl7BKS$%;S>9HG*lLsmq^#T~kgz1Lqc#I7iI
z*GO9->oG?mE<UDIO$Z-D-)b9kK5HC?zxL){{2i=F{vK4I&a(tnpx{q+lsjbzlFj*L
z@M@UzcXfTgA=>p#gw#zpu5=3zUI4?7oND@5{7LHzBvmP=7`NZKHO0kH|C4V@D=TGi
zAxsJWN(BVm(1=bSDMCQ#DEeIo6l!<h2tZd)RYb8h&Z=iB<~p=$1A~)>j0~k^-FPS<
z76gd>fBr-Q6d#vlLX1Adq5LLgqpv|#lI46PZo8jVynJ>EC8|TB59u!*ULB^T0*o}%
zmk*ZJH}Yy;NTzBT%%iY=1Ml06E!t;m1oD9cxA>|v3qVzQnoa3aW<OVxr1Vgz7(uQJ
z<nQiFzk$8%u*fD`=G+U60U=KA)SsK<PCO3W{)N?E7_wrFVgGv4s@~p#R)v8J!_e)3
zc~A6#Q=v*zfyFXziY^ia>e)%?FN<Z(w|wRBHs$w3EYe>mU$7+TDf@suZNjqq-b@F)
z!K)Rmw$&CW@VbUWK&S+z&5fB6Beby%jqa0Yr%zVU<h^H2bkH)+h<nF~yZhHTU-2n@
zS97wOm1b<jQo+}ahF6lwsJ{}vxMPvMp5zoeJUV*sNJ|YR1{A3rl%l}D_L@pII}1ig
zjPyyfD(DXwJ9braU{zwaARu*_TU!SY?WNsoS~jmC#ttPU?h?X<puj+EKfdlfzAp3{
zV(z-xYvE88>vkg%aK_zY-+Fce@<VzA&M%JIV!)e~m93p-_KzEGavVoe6|pV}kh>Ar
zyVvffI&U%%kE7SiGc%w$Q_$U8)z(fwMQudWB@WF%%+1Y*HFDyxPcTfxM?JVix0WL!
z@SW*caO?k?(`#Q)G?Y=XcPBWT$jR|sLuC8D9M`3fOD<@q8pgPSl7jSDu=H7{cijEW
z;+PN~8j)(L{SC&Ig%rpaYJHj~>xN8LiCSieW3iW@9H`F>)AByp3Vc1N$DF>B?zuNw
zp=2ZT_&>vA_1HG|8P@g~vkGBA9NoQ;?dZQ}HShenM(6Z91MyclR+5%0Rj7B>ARiY)
z<?R%H>L>DIWX4m0!UUBjpwTsVCadyl@iUeL-Bwp)2n`$BX}|?mW5nNEVTE!KDwSB`
z7>5$vxcD2|5uwGk#b-h>m(&Hbd4^=*&<$>7TPpq<)(=kV4tO?{)Tl-Sff4Vn21hhC
z2F&+<_WQF37Pl_`$&`j3t+CUNL=Z^I7-aPleDzZK%Bw0IXG}~?9pgUWq1)#gpuAEJ
zFwAFV^}i9-?f(7yH|w>lJfx{;l12gwAw2ETt)x-yw*__S=rC^_IY98pZ*0V@tgM8A
z0K0njHn+0cQ=b<rv=|^ODwUCu@i;}71RdJD&IJYmov^u$jSBo3{U4dXowI8CTt-dn
z;~{e<Mm)FUEbezRKgMhYFrv0##}*ZpfW!updzfCdA!h3v^8Gx4Cd>3r8&`-GPnZ?T
z*h{EmG_{#@V6a`dQ^2mp1hi|}9i4r>jj>4H2_OnP9jvQ>!7wJj4<Swh*JbPLaXjIC
zh;JeGH6yRo(HdA|IgqKHdl#=^Vzu58=s6!fk`-EP92WnmZ7$#xDqv-`=K`!gYD?T;
ze#mO~MGY~4P4`-G^jSxz@TMVvMMLq5_^dxtCMecDV<soQ*F*=7N7q?`=o`~UuBp-b
z50Gu&vlw)E-=HCNdV*3;1v>ycDrtXppk0od<fz&XS@N_$Q)X}mAz&vR7X!so>d5kZ
z{p$WYoBQS#_HtdKIZg*@O27W~T`OK5&@+NI+TBP2C9APf#VL;h6L1uuCUCl(U^H2;
z<m;YUfe^$6raRJ+?q4@SI>*!@yx<kFnKf&cT9}*n)KBq6$x}h(OsSzz2D|eRK6&%q
zJv<l!x++0bD~Ut>MWQE>x{68&utY&~bF=<Su%q^~Hy=b$ud+{p<O%)<?TeU3I(*64
z;Bu3VmIfS~fWuQ(A^PfPaZrb0=|9VkU94Wvs%|9lx_^qj%!Gp54-lp~Yp6OFSlDlf
zZxxytnuU-un+<eXcKz7JC>EZcVOOii2FkQLVQpT7{&UCvV}FCkj{P@GLsqN<dx=Li
z*vbwXUEJ0~fDDsgi(@#4l0#A8rjNsr(-Dqd(tlOaj+Ep5kGavoS}8fIx!6#~oH%e#
zhs3$d0r%5amp^p{uFlJ+1xI_B;c8ML)F;4b#ejsBs6zT@Y=l`uh?$L%UO^KLd6B+6
zlewINhcWQ_8poMS|C$G1mBD4_3(eu~gUanR2WQ}Rh;L>$Yxqa6LiEp=e&gwWQ>Gqk
z$%bDr<r2XIWcly=!BTM}k+S;_vNgp4&V4;<`A+eVa{*5qvK}0OZX5=vVz~tc;JVe#
z#N1LYE^L}wTBrz6wI*Y1y0uz`Qz}}ad*MTC#U-*Uv8)6^Uz8d&Kziy&?>GNC=9QJf
z0-{CW)+NI-srH5~ho+_`3If#3{CuJ%E!3zp?PQ{<1#-Nz0{X2JBTn$r>B66G6ZV(E
zCY!bSf9r7rJUXrieM*ooVaP;-pUsBq2dgFA#-;J^htrdjzW2v1z1gAPB?XBSA=HcJ
z9V-)m<gq}kO;LBsNj2aJO4^l77h1??gmhOB^1~a;tNOWZGa0@lZ7}#{@t_alV-H@b
zR%2e^Z-zXszoM?UJYXreWR+Gp8_5>o%a(j$FUN%XWT-g6xB-Wqr>KqeSqo?B*_$NV
zhfO<xX&vV7Z96=wkW;FW_6G5_OP$jVe-CiW3k1e=80vk&83-8*!Xd={L5meMVHj7>
zLQ4Uvs-z9Vh(7?EX9}l4iwPe;(A&n0=R|1yOXYU4m9OQ?V2eh5K0`({n5`Bs@@1Qa
zuJ-uM=`Oj2;@a{!)ba$B`ovdYmP2}^?e&3(=(1t`$yn~id2P-ZJsB2%*S{2;4khd;
zFy@Yrj!l69i;Joa&CLZ>^cKU)5>64=e<5R=C$3)1nUGjo_~>RirB6Dvnydu@vriq)
z{aLeY8q5bSXS}?)EiEnQ16;rV95_~_)2Ysq4?kjKW8<{n7EqX&vYa`XFH<TzIDTOD
z=`YYO7+uiD2{$wHzVzDa4@JZu9nGP@95`{SbGy`4R#Qvyc}=QoWgNh>DNQ4D&rSpa
z%b|<`z^S_G3L9^y#E=%HNF)fxan)Fg%Fd9@KmUL0uLxbw^ZYv(gF}pNf4v;uEAQ1^
z*PtE7+VK_Iclg%3HiLejnAo>xr7CFzcsL#E%bGy0+fov^a!Z?%B0w?e#<PU@Z##1_
zy7lfBo~5Tb7Mp=!)x&fmF{VLOsPF!1!*p_3l7X+WUC`d(X%_W)haXVq2zDPt2hkru
z!_J?<5$Pl}74AqJG-Vgp`$`*;kRt+`pPR$E$SXX`Szk<kt$$uOFSdeiWC~-nSK9*G
zq>J4>*k;T@oS@JXs)gNS!S)p|rq>cppC!r>UnBafG!*EROBe4oH`W{NPzLkFL<l7;
z2D-&P_d>4$iP>ynXG>!vz&6XREXPLVlC)Z*4)5O%_iR{j!+z&7QI^V%9Lsmcil}Bt
z3k!qGW^sLr@`017;v87qTwL>M7!kG0<8%j!tdj5fvaPyjo1ORoF^N*n0?J)g@}Y%_
z#k{SwFBg>*H!6eI%S}qRc3cUhh0dr_v@hi^afn#{4|hRRaof!fjoT$!6DNa6w$>lI
zuvzRzs=f78M~s9?e_{~ot$-^G5-M3DFpw#)%RYYZ!|K|D;-u+gXM3)!mVd(d*C|^6
zFp`pNqeaB@8#T9`R~2?#N4`#Uv?isp48_1Tqa(E=n%2XL9Yd5p&CDpl!@y(F4LrFc
z)S7F%rbp<bC(9~G5<pj_=;=|K2#pVvoTLboFVE-M^H}dAy0T*R>sVn6R4*Cu;AK&j
zG08p6;{->{QoghPbW%$S^%ylCH%U}M2P*UoMb#L=>9HXf%9n)ng{5|Vrl{~}F$3m7
znVbnRP^ch6O4+a(4o0r)<}gQBFlAl#%!F_@SSEGZa-fUp3jl|X<l<DGU0;uWcyPTJ
zv&A3XbH<CU(;0h4awdruUU-5x%CA_YiRs6?=e&D;7fp~z_?euHMZjq<z>DP;NGU2K
zMzdRFe*<Y{ZXP$5B^hyWU<wrB$|~d<EH-I{b~O#?Ju?MAOze4>{_nd0@HY*_-CMDC
zKBrpgU8YPgSpZoD(rT?WT48s6+*ez@q!-!JOV^6^$7mO8u)#w3XjeoGGLMx<L`Rp$
z2FXf#biQ_8_T|`lAOLM>cpxz>9lD&Y=iS~@53fs$`!Vykb^7rvt%&DaG$9#CZF<Tx
zB<^w3dgYJAG<k54C@h9S_1${1`4WqpRjp3@wV24gp6UrUCR|ACm+5b`rP6H&GE~v+
z_7RBelj6Ipl{DWBSYdGRV|&e~hq?=`@kk^DhO^7+BqbA*{{%T}urYM|!rF$xiZSV$
zQe<$*?Z7rzmcawfbo5X@3d9$PP9!6so%L<>%KtY|ZyBgLL8rO?1u;bEM9H%XH3%OM
zPjc7xoDI9J2@B{@bRuVzp#;BP3Sd{M&*i}kyU{GYh^T6n#q|HtUYeOv_-Vw1kM_0A
zy6dlF>(yEPRNv)eN^m&e3Qv~77FGM~q_QvNkDmn8r4QxR)kA=SG&4P&yJ@wbX?Vx|
z-RVHOx3fez3<-Fd`|T^eURTBOX>!|Vl0xKgOv{8Q`eTj3NS}gKgw3g}!8U?ELpN5l
z@=Bd5Ly;N{1(Oex#h1;Sn;C1XTaTBs^+pZRKG}2b+IQ~S`!~{TPMr=57!{tMnj}xk
zfaOV=KjW@C;arps@O_PjNO&M26t%o8nP8zOjm%?ZpLU7{o&MjoRC%P`updJSBK{XN
zK!@*);v5;DiA(5qQJ11(8Uu}My1?_#k^`VMF$LrVLF6Dow86V$_z7DL-H7H%9T_wq
zG@&x$uV}b^hmv%0vJPtexd<rDHfyT00sd>}V#Fxom@w|ti;1%GO2&p0Y@i<0CXF&;
z`FG?4>c$fD!Td2DNWd)sK=1|nh(&DaB}qO(MOh%ae((2z7R_I{UG=xReAGKV*+90R
zuJ_Wre1U!)JAS#z<JQ#K&CTfj<sQUesYKS|<>mjPb$u3LA|y$~NPt#2K-dK>aXkI+
z?bR|iVbFqQz<^<BuepEjlANaEo-<d?PSx7=D9w;TZR+QTIZdAthZsgkGd5khLZ=>%
zDTt*!p>jU+;$mo_7gfCog!^JAKOwd@JNn0k259?0?wdlFTzEY8X*P`np>U~&LIlTT
zXi+&v5K@nH34Y3iyzJft<zuTvon;?5>66XS+Gh_T<8W(gl)p|(@?o+K?`)KW574Q;
zyD`F#pL+3r^x@_q?6{zSOzuh@lKxps78Mm~g9mJczX~PZy#NBW1h|n}_MddR6Mr5Y
zgk)oO87w0Q%F#7v&K)f>y~N^T3TJ2ML}}`EgjX?A^;KBn3pYh7a{1TC%b~?eE#QRv
zITk*@6@XyJh+J+mn<DgnY~LDqd%pdf(|I#Wlin4k$N?0h30`&V*s5qzCbsn6ZC=}&
zd8R8oJTGY!%Qo!GHmd5hrZ_QT;rWb3%Z|6{3+i?5B=p)x0kWev)5k83>@rP(f6Iq-
ze>mq-UUag=M7i%e<6zSx_t`T2@zF*=H<W2}Gh~Ky-`<dL8CMyu7Az$gE{9}KQ1G8H
z?9n%7rJtYZQ#kxRp03DuF%Lh$)Ob>nK0M61DyQ35s}DopuJ8cCj0++0mVxrYe7s4Y
zfePMl#Ob(;_Ej-7k=W4K7>Bz7As=+VUmHg<uXz3qxfm7Zsj#Qhc!?n$UXB;3kr6xF
z8i4}rMsc!u#E`ZrpC|TSmSBlKDvaCh0w_gj#$p~ZZk-E~oiy{3U3FXRNiHw*y1EE~
z>qmSJTj>`MIl0(_%L1H<kXyQ@g{7r}&dx8fJU8O;(@+9V3|ap(pB7I9^h!GXm|@IY
z&RyoQ1bvWZy&(yq=RPW}b_<GZ*caD!On{R@5yn#4E4snMvi)H9jEus?jOrbBAMV&o
zDH%vu8Nu2YKoXd~w_wdaMUkm9Z-wHQ$FtXI-L&K-xUHj#;wlHKNa-ATw)-vq<Wj>$
zNV&1?BEJ?$Q~7M${!h2{6${e(;Auq0w160v-7`F&5D+{(bzIY=0qwz$(^cU^mo;Cs
zU?R0xaMw7%uoaeVRG{bbk%q&a9%C}XslLm1Mw|1UbKGSc8vzOxDXjYWIhjstl$}@&
z>hJH`Bbw_*`bznFuSZ)}jubZ)_1z;km96FcB6X$Gl|V|l?D5mn%hg8s*N1Z@d?9d<
zP4_)SnJp;*d;40}Yi%wx4`(Y0jtCa#9Lh-lNpGveGL+>9C)?(OQz~ZW746SZ)~jzC
zwCXh|NMT<df)Ard64}eeM#A`?d^@fTxd#UaQKCePq+`zpf|m67BYt7@{ZY;Y<SwTy
zY?K%<B&j!d<9Y@E5gd-71v7;@Fc9R9c1b$zhJ#wW;d+g4)g<LBj8;FgG7?1l6g&5h
zLcW#{0-6udQ(m<fU{7yw!(@?Fq(+0$4n9R_0X1Ab$z}?~2ocOMz0sNblE{@Y<Bp>-
z$`)-SF{-!)y>^q3OyJ3h<<q;wi!`F-skj}`><twxu%%1W+Su5LwkDv+Nx9R?e#b0A
zvmEuXl}-O+X+??2GS=FlWF;sf(HE?m74ysAm-2X|WclFnwbtkLp|FmU{BSaB?(RJ?
z=#wB~*xLb6f%h!zFlBnbew)<66MtQAo|H??+H7-WBYYn(u;=<>-<oOPo@vi%?a+;<
z5Uk5!tUcP`Jn|I*3BkzO8DVd4#=>hd_@%1PrT+_#3w{5JFTU8|K;o2?u^_;X`0qia
z_A4>hJ*yb}qpQ{!#@%=%QSnI-9J)Eq^_57^EbE9Agr%}qU?8-aM!iMTR}};<eEb?z
zv?|XUo2k`0^=M89de8->VH<}2D=3%TstKQW5fg^tkJJSq1qLrZ)IggEsA*sQ`0+-b
z6*fHJ3l{>u1_eY+d;q{o<X;V)r4Gw)wFlK9{n>kG<na005|NGIG5$$A>eNwzJPRby
zw4Zoz>D_7)6XozHeLs(4=qi1FmCu-#g?g8rO+c=d@z7i{7Qz>(rBe&f9ChmcHMXeD
z$;|wdt+@j|$NkB`*|}kFSjw+wK?*xJN{o?@9jSc+_GoviepQFl=Q|uoi^s$_?~nS}
zKGXWXotpl?&OOHs{icn5=Bz>}!4>bO(iGrdNl6`y3<Z&4R+5~uzIzd3Vq&(q?QNv8
zlz;)XCNvVE*kSn>_uD5TCW4c}?JJ-u{u5-@NyzT>_J<O%jlwq#Ch}ms@8uM)O7V;9
z+k<N<(Dub<0=WB!M#9)}f4&C4R%2Revy%d9nZ7SA6bV)W8ecL0BaR5K*iinc?}~!Z
z-Z;Fd>ckxX6A}<q7$2|vIRgvX&BT$OI1s!DyMI!sj`ZVrVuLLnigJjuIoW572hxUf
zi+>DRTGCgn+q1K=G2zHh&(4AY!rwU0O_F$=IB@k@77AQ0j*b`5X*Jm+$TDGJ{&g<8
z(Ax{^Q;QUmIk<IhO>wn6cdN5nqPJdcuoxxg2Yr`~r-rDtIbE!D-;JPT{>G6HT;dfk
zr#7`-rAhU>Kh+`&U_h=rO^NX=KY<`Xfxo=IUXR!Ri;+M2CQSjfOQ>Q6sQmxVt<*c*
zNxJTqOPJg)2$s)Pmu(g!ZYZ7JTIarx9B-Un_FN)$hP&CdfGYriCd%Ppe#Z|80{$}s
zq6&&AXduetXlwIk(CbAfzS&e~4AEN+qM4klzPB|_pe`l!ER+Irfgo}L&XhkRm7M>q
z%w=NXGbR?T-kD+~n~Mh;0|rHZKXr~i`%!`{SVomHARq?=eckpO<ce0&yydeN8ZzJY
z46@O2yr_DcG3vooP889rZ}803G1)IMX*7#sH`bTqix0Bc(qUr0gjixttJ>nKs;Y{2
zCdFS@qAjl%?MY6N&;tf>88e^sT{e`Le9#Aod9*(1U^!D^E~kssuQk}y%Hm6rs#GFg
zaBRni$3W-I^Ryk5K4B4}xms3KA`aZn0>Z_t>D>NaC;6rxkU`C<o1Wi&Sy@?)-@GGj
zT7fq)TYD4S8NWCg^l~9n6rcjiK|Zl~XwS7#Bmb!nPR`SE7!xBWO#KK%L9hSL4$Q|(
z^#_r%3SIA;N78323)c+)*(N!?eNRh;E9pCS-ad!t#lgq_!HPHW8|8QP<+m4TC?#Jk
zVqMdFj<uYgSH3&gmP6X`*N9XR^}mjoLEe^f`P!oVF;j63=G@WhtV_S&6!^5C_J@t#
z$r@9phEw-7z@^uWJ|l6u6dWFlA}d;1=l|e%sp|#undq~kjdkaTAIr3qxSJaroHrD@
z$_NEog|MHlW3e)-&Xxq)2LswI#l>Rh@PcRwokIn-QOSxL-M}j>z61wcSHQb>Fk$_G
zdY!ADUZr4Z1*kmAl15CJG4eQnnb(;5qZXH`rsn=?ua+g1i1!7vE9HreJtpVkyvsJA
zH6)tIg*<E~Ue*$fe`+~3zsA_h4;rgL-PwIXC=vjmf(B9vl`Mg#qEIK(YXn#NU7G~I
zzA&-!MB9Dbt19bs!#Eu(=jZn!Hjlw($Z)+|KmmLp8il2A?agCLpZWhG>MMilY=UOd
z;Bau)5Zv7@NN@-q+}+)SySrPE;1Vpjy9Jlv8sy+E_u>7%dv6s#Ql#qS?9T2?Pj}BI
zBw$qd;T4-cRYUPX3u1FN&Ib#P(--pWSiRdO(tA!b`Xb3I`U7NPSs0mIjgEvo15O{~
z7=4PcRp9M1RZl<k4Xxf~_uSF$v7qiortyKKBN&==AaZZMm~LU1@IqWYCNXQVauAN>
zp2wvm(nB&;>?F%hpU;!3a<Y8p+eyU&-Irv3YZiEbzI2m<HV>i;pDZO+S4Kbw90u$N
zj{xIBGiUO}+PcCCT0f+vOrK8ma{_I@UxM!j#Aw?u6(q$3LGN2ROEIJ$x7Y<6(q6wn
zzuZoi0)QH2c=K;wtC4h|X|0E?jL6=tffe`ch*)gCoM2qPmMj<Dm_uaxxESJU<=|~O
z&!$)S?6RQA3<cmzD%IJ?sigDKw};TF>*37U-ujJLuS1^BSFEWGSR#46&!8rAz4?!L
zBQ7x^H2n{3m+r3g4PT#IYc<F5eN%7T*FzJAS^OHnr7Y25J5Vu&I<V=jO3?jDs@3`i
zVSQFgxh?bXwkpLzqxJy!zEqyA*DQi7tUHR}0D3|qe(u;da0jVd77A5S0oDKkGu24B
z@?8bykmBYueU}JBt^)f<Ki}Q<=cRw$ojnzqX*L!(lvG^VLV1ephGiBX%l=sbch3O4
z-Vs5{S>z#G00<)@i!@k><REw@h6zyfY7EeS+iU>LOA$`80&Ox{qDJD_o>7x=Q;_;-
zJDs`nmX25C+~VSn_Qs?7%5lZ)k;}KPW>u`B4e4sws+Xrb<DJ16%T;Dlu)I9r)KRIN
zG5DDFv(>@Ggvhs!x2(-tuu@%CfL;=7HWTXYD6Ree4DA&^=57P2s0Tv%F((+IX3n$W
z6{73j6rzTNKLB`nMs#Rqk%faJ`)}yTUjGDl%^<&hO;6aFx4&fGfJ!rDse<d)nsrxz
z9ycFjT+Xj34Gt!N3pf!!7#VxOu!h(q1M;L*otXz^q$b1DRkMQooiW+|ARK{EIFLG+
zN}<8Ojfrocub01KS_7Q$SJQc#heN5ep-FDN5D+geSY_4)((9&*Ub@sBMQ<Q4#fwZ=
zTU)k(>NDUrPBD@X;#deAX!o;pg!`7F1sGA^xp^WTt?e5x;4iUd`lQP75%UOud<h^d
zF1J*RlnB+b;+)n?tYrX3zCKUA2^`xHFs`qfU=_7GRKrv~K6*W$hY5=V8k*$0>C8x;
zHa?}*K@kk{lCo*3xq#$3aa*dyNuMwa{{VeWI~qDC@HicGkM_nq?eHd8&%>TmU+V8|
zM(TGV4Eb#IHh#xZ;6RE>?1vtP+7+JT|E9Qf-+8xL5VsNcbmqH}47N_XQ@P*pUk$Oi
z=ecHgT*wI?wrJ0;FdHB$)|~zBL7SX<bVqLbq+f$^Q;iw}QEU1<1e{79Z{Embjmkb^
z?sb?n7nim^!?0x|bvOcvE?lV(0OOLa09UPFhjS2{8=^p(By&;E{qr^FPsLzho<t}I
zT0bWWMk56tG6i0={wN^128)gK)iyQZ*fkT{HMivpHw^8F%FD+%C7nq)dYJ*1Dl3k=
zm_L!h4kbiMqpv9a4a11Klz=!c2^c+BR#uXY>(U1@t@d>OOiwFuQ0UQMB@TI%`>8Qp
zJ-G}fh>v6;nHz~^DU<=%-)vWGdZo@74v1Z;KoxybG<KvdmUSdT91*3N0h7<;Hppc`
zqUB)y@m_fq)qBl;&?HzCD_;fgQUN=<T1PpC-5e<1^>pxs5V~>@AX)mQ>l@txwA@z&
zzx$%Re6ZD9@|1472}vav`pyh1gFdG)?eWgUqBjc{!RHotkndKHwcnjamnUl1<Ak%O
z^DN!-L{p6+AP>s>L2|DLZ3uW1JT8H*>yBU-R3Zpu4TActzR#M8Y>2GO6!1|dz@w*(
zoR6>@R<_@8W%K)!Bp_+93^5rQ7Gs;KIO+cbl9vZfXhIo6lzj^VxYHK9QXPrNjS%3k
zbeQ0XPzl%DwsJ=GE9e4PsqDYK6+T=6BtPX+%S;d{u{Vj>FErK;CTo=Q=@}#^^%$Ce
zk{>_UO|gE-ULX7mz5^%@Xn-FDjQ<9=GGg*50wibxB#J$-41n=?>6fJnYZT|@V{u5U
z0}a+!`#+XUmGe+@GYK3xv#_#}a@d}zGSNL^?WlS+8ng=><hdCbbbt1(LJFUMD*0Mr
zeDV$uF_66SwzaKVYmTgYuF)9}IX*tVKzln!Q}N0&Avl}`Qwp|%exVZcB`?76CEXVN
zht-CMb$9?w&dn6PNenLke1kH!>BhU6%0^zP9%X{eKsH<J)c450xq1a|@jEgKFQj>z
zm(%!>!M_;uX{@T2u@mBeh$ut9_=(uz6D6Yn`KC;Lyff~14>y_9X*I+7y=G(ZiC<ss
z^Vc*4+{v4fDgN7`#fzCRp|~L~;piGpe@KPWoy<i&Aiu*u|4R!Gjr&+WD>^-)5IF%v
z=+Kp~-H$I?li@@2-yc}k0-hk?5wFrVxLVN*G_xTcpUWLI!}T?p799nE`#rz&f4k%H
z+s9(D>(Ay*nLc(c)8Rwo9@mFZ#xmr}0;pkw=H*Q%zD<XvEr%t8hHr)(1oJgjt~fK5
zXFZsKNwbDS2i*R2Mf%?Wm{*A%#o|R9ZNc#&=X>;!I*@gliMuHMYjBM>dNlQ_^_4$F
z02URYJT#yn<PZok^6~S}EH6it=6WFlzF;7ESy)=qhPS-{f0C&yk+Eusr6ACMxUMJw
ze2EuNFP9oTM7c8?d>-dys3Zd6r>o6Hfa6%N1{XL8u$VgD0wjR{nKmfgCz*dr-9Kp-
z2XW#=ki7fp#m^M~F#6yQFh~OsTw3T<CMt3KE3P$@UOSJqzxDs(@Dk+oh6}X!GHs?Z
zz0&T<-uW<LKUrqOr~zig9326KdihxHHY?|?^KN)Av5(XEM70{~JGoERhRv&6ZoEp$
zTcLoI_}30FPy||l8g6Yegxyhro`(ew1q35#<V9{OIl6@#YcQs7(>6%CWW;KyR)rm9
zkJ2S%e>AV#JukA#UAWA|QNN=*yAj(OP&bHFdM&SeYBqI|M$KmDcYm$uc>nNm>7}nr
zHUqz|>6ZBLd7|zfL8xw-6_<nre+VUts5WDVTA}rX3ZUM)%%H31Nu-vUljCr8x;_+=
z^>>81@U`fm{`&Qd?ZQWV`*K&~aGheT+2SA+EeQ>D89-~nfWv;mMF?Em6E{K|u6Msj
zwB$!|<$hhT7hLY!%h;$80nML_`9zQNC3E4FuP0e9K03`<rQWfk(U9Q-6D90N8bH9q
zhb;u2q(QD9KlarTBV){B0?Of^KZWgWf~F^>c(|jyLsjW2)&(@s(BB<+qJg0NYHS1)
zM_O=U%<8E+A&NKTgd#FML7k);6KIojRG#H8Fol#il{WHss@CY)m1g0mm0a?)Uedzi
z{Co&-2#f)y<Xs)^3FiV1j>o^BrIEUomhu8+6&3X9Hog7NxdrEoKM!RC<PO{+E}!)P
zY4qJQfae@y#pLsRf1&lT%FgIrxG{2_&RA!=Sgi{rmD%XA++-!2B2ATFMiWYMaRxQ`
zfiwmta$%#q&Pv{6h`ilR%asniJNTTA{^*1ZI7mQO*2@vQ>Ua0Zf4)|LdvSKt+((8u
zeS8x>K-Hhy*;#S#c6@iySL&a6?VXb1R_S%Ox%a7J@O5PW>`4!Ze9v+V$aVt{O*-qN
z{_KWixkmM(|I_dk3aQ|mIt;}53aS3k;1kK9|MM!E!&T-k$Ea&#^XA^hZ}g;5_3b?4
zNAM@0MJO1?+y6HKy5>7{?XH)Q4)DYN#R-#w>riZYx=Kz=;^iPH&dRiKcs=5~`HE&M
z{?~gwL|6D87iefiO+yHAqVUxLMllPUt3rY=V2mM)@A&4?+Gv!5SX)o-BcP_vnDl#a
zgjz6!YKD4pZUrR#s3e%dv&JVKTmAc12#>)Ivg9j%C5?ybwHeIT#;77Fe$$}kzVYiK
z1-u#OULBq&AZ(+U(&54MXTPxrk5$?-$%K1JQx?>|NT9EdV9ywrXaw}l$dxAh4W_8i
z4>&mOdNp4wX`AZOHLpz2eu~lfBZi1s*xBWl>${Ts&Dv32c^k#19oz-Mp&0>#U%(k4
z1o}P1-!q8wI`<btq<J%7b8E3Ilxu!zDh;Dmn`A*{n^_XU#uP0nJ<Y7H-`Mlq9nkdl
ze$)#c&VuUezi<>vq3xP60a0u`M<s$c1>2!bw_eGZBYax6l**L<fHwyf85z~c<ed+?
zm<`6r21pFjhuwpuV1kPI;70I#2>9))K%V*8+~tgYgT%l6lyHX^oqna0m1ONMZYFNt
zo}{f{-|lZ#y^>=i>H5$sl&`Nw65Mv81k2LCfdEdDkd%#NjX&VhXtk5oDNBF>HAr_q
z?|$X1a$mnZEdwO%cTWAEHu(FDMQ{&jD)!IuwNl!5;A`2utfV;}{lQ+cQEw$xYteO_
z`dEQYRb*I5qySgh;R%pU2R7uuV{a4Xkl+6irW*SQ5a3#~ov6I`h+3Q<Fqx!2T*!Zj
zP~lLLMY3zz3RNY1V1m$xbqJb!%PdCmZMu^G@+es2B2UsUu8!Kf#lmH~lB`iD`N?T6
z-=kl-nT=<=?r%~g76jrVHyz*<DK3TuMo@kZ8sdp$aoDYW6<MQzU04pECJx^Sw}erG
zG)UqY+;aqab+mm~BC5yQLb*P*VC%g0eL%6^>zHc*oI1deNYVs&wfLdK3uph%><GM8
zmAw(rf_He~QO#P&j0J?arL@#sva8E@2=!#OnVDJ)<wb$bn967sVC-l9{uL}%DoRS9
z+$epo*Xi-@N3uBjVUKrRaQZK!2b(VZpcsiQCq88@bX?2*sjR40XF4E*2hLiCsgche
zmXgm6qm<7AtQXeKjJOhFyW0s2*ZLic-qX(U<rU6`L5vp(vG2e6jlnhG(%xCR2D}IP
zL;$4iSRvIce9_QwupW9>*VVG$%9MsVWAV&BE?&<2_b*z90-W;n0BpRtCw{zrog}IP
zQ%ESQQF>gK{m%eEXBja`2A05`{yNL->A1V6VbHAo$v^X+*2u9y>I+@bnEDcpNPD`d
zx_qDogP)H#l`q|Y#9-#R89~gZH8KV@+HY%X>l#+_6s{F_SwgV-xZi%Jex^HVu-LPW
zwe^^GzNW5j)%vtoq)GIMFN5NKZ-CKn$q<_K4^WVvMCCFnzbQqFzh%n4G%P14I;h07
zkY#b80-tzApjpvBYlI&qZJj%|CkFgxef<j!mYA{e$Ii$38io0qZ%!;-a6h=(2rr_?
z?+cyI4SEo{Lg+;Dg#G<iy4w8T{A_?5sy=3xJPqI}cJx(%#0~;-_f)2E48eae7{&9>
z?J8645{>l1<fC1uB`1^K?`U3Uy6TbO+VqLUsqyh(0tnzj;^E^D+%Mr7&EV<ryFvYT
z{p5ldZKhrApKVMYru;z@S!um9A}6^i%}0L8_pc8dq&>~rUH<Y0#JJYm39dE-k@5gA
zG57jl(12$Ex_0c5>>T84tWjF;&lJuj?iD2Uf^cg?QIEo>N<t_E$cwDsM3YZPaa)UB
zyfC87BFX(Z;`{NJcMDW$8J>|gI;{m|t9c6QC=l6R$MQfk-p`i&>UZ_0e}B+q0DpH8
zoNo{$-nD@iH&mqXa+b)oL=qF|(1pd~LI*vtzyB=fSPoIAdA9HU>%_P`za(kV!i<BP
zd*`eTtZtv3((?<gwQFpHt|K)Y1prDJi{0Pm6$XCYHqxwe>63^ckn5R}5UJl(=wQb0
z&oqf0X$(*;$WiB4*VIU{;Pns25SAien*YNV>q@$old=I+RVlzHLgKlbkd0sqF~Akc
zDZ#K%HmXqjI=SjF8$<8-;KU3-^%FPxN-laiDir}tBLWi6pWy>Wc*Mj5OZPl_zNauk
zkLS3F!I|2Ezh8z>2XOYw*Jue9NFsdgw%GpKV{W{iO9}U6G(;lMQXz27j#mPz_s-pr
z7t4@k;7op4PkvaLhRMQV^+@H<g?o(QwrEeJG(BgRHkv^}>k<k_o2O;x1N0b4Ocndl
z18mlbc$x2JiE756km#qGqcgv30e(j~R<`gdmM`r%KwAq(>Z9b_d_%iC)Xzt$OYQmj
z4VWqH0pD3~XbyOCcey3r<z*exx9a2<EmbQQME3z6_LY@y#oV$K>Fd>oEP{JHCnqNi
zSX;k-e|gNnjjYV{(QOL`)gWHq?f=yOR6SNAcBp|L$r<3&9=3eWzuai8p)i_j?hA$W
zf4u;Q?rehf*sH(*N#r}z)2bjZWg$E7hqX$xfK{d00PJhc!_YhFLa!l)M?w%Pie(6r
z2v0;mw}IzGs57&%Au^xrj0qFEqHdu>;KYeS2e(MQ?Y^I0vEK%`>yezAHjkoYXZlyC
zIU9<P48Z)^{dIh+2_^|%&%3p`dKDSa|3=8VX$IM?uOAVdwbBHSG>-{lgge{g!=4^X
zd|jSGZ`|`cb~vMBV#o<%O2&z_e(&<_HM>f1l&mRbm$3h>B!J*?dv?9Pm0<HaD=7MV
zX`H0S)o&VKw#Mn*F=||n3Az3EtM}fqoB0>77lpm2xyQ4L@|eg^4ODqUZouG0<|5EN
ztLMzSDOruBLV<`(dP=IQ7@3O&%H7Io6YG`=v%4-6)Yv0HT?meOg+3(>_n<Cvvbzc&
z_{${avi;sJz%#+_cUJH@?0kUPkF|6ePv+1)Q4cgCX>;}T!prFW_47LsQAchMvp_eR
zpDb`40q1ak$E=uqN7lB}I_r4k#Qga9RJX_6G$Muqv38wXN3H*AdVK-aZbse%dhyxL
zdV6YkOKh<I-RAwHKIJ4hv&dej=b{?4|9gUk@ZFs%2N+EpDJrNEe7EcIRU)9jq00&I
z_&z>I$J*rq;zSzLoso+TQJuwgW@4S{P!TRdL!#R3qC577B;v7kNAAkSjgX2#E1g4b
zayGWy>vo_{6Y;IYunS@uQ?sSt6{R6DeUWn<m7_*H_;MeQr4$^{vix(wU0=<1S8Quz
zMv)D7Us~yB7UxAi$ft;pj}Msdoa?!E>u>lL2<tS_9&T<ld?m?JcB<{%_SeckNDNm|
zaI`KxZX6xJ#4_RfVZFSM?f8*j+xGYO18KtORGooA=xBfoEA?I9pU85y63aq-%rTLq
zGSUVJ@mNyf*wD}q6TwYiD8=44R+$`Zm&VX(H`?~rT%{eg(KQ!)ap5R#eDOYaIWd~m
z;_`sl`P9MH?sv)fU`}s8`%)$nLI5G+4=;=p(~^Gwx{<pa22c|_E_a?fEo_t{bVmoN
zfN{d3`U+}ru*g9Z1=wUwIjHeLfNmuLhAN0%q)R#`O?o89VzS$<iSdYFKv}mlr=-u_
zPv?Keq*%H50RN`npM>9vj9Narqp#mKA$KcH=AUof{YCzIpY=<T+_SXjdE)s$q$zGH
zcHN>ZrFFp#)$p9J#x`H*9KgPN`=k_xb<x$~ZVSHGTOpEvxqk4m`nWX-g#^MhmObMK
zgvCs_2u@CE?h7Q{%AYyAz6UUwNK?O+lFmVvwB5)1DNu{;@TIgX71N65NvkZR)FQw<
zTG=WM!vvTZcB!T};=6F(hw)MC(&r{6rnwL#+&@C*K;LkG=H6AI#U%;#nDc`g^FM*-
z9%!%KEA=|OMsKrDxyFCkPf!7FaJf1vDMH?qjAPFCQHKw3p{#s-p@$wE#(lFc@1U~l
zb)x2COn-!nMe~<f^WAR3(gEVjc1+CXalPgh_-T5^6B%&sg<bS0`az~tY=ngya|Zgl
z0pL*akq_rY;%P+o{_hOZAt_P7m|gHZL$6UL$O^Eu6_#<V%ln;TEpvXOI2g}G%*~mZ
zIV~hJaIpaZ?GEig7sYsWLW30%`!E0#xfNY|D2XGO5HJ4p70cpmDR!P~J$EU-GX4X<
z@$55d29NooBh%O0eyG`=!lR_4_$MObJH$yohqcXHE<|(I+4ReTaHw1cH*(9a)66$|
zzqf&D&O?bu3(ZRGs{sncQk#1XA9R4RW1tnyXmTS?Q((`giEjmneIrnbNI;~{EOKdc
zJcBk!ziKoW$OnX?{op%bdX-ou60h|!8c@mSuD>N00rGAp5v(frMmybU5+e>Klm5}b
zlE=ULl%w+~eUk1OjrDX8`To}01GiC(gwxAz;SK=I2BW0-YOCEC*IlI;Nj&d&mVVz@
z-ygr-bk&;mLF2LMggN8FIlTTMdkh-{#Zcr*8)F!<0#;seO?k&2D)KpaFXHROpANr$
zbxFADFA!SW%q69oqY6R72y%`#-u0T!x~#BqBU+qUq@jI=c?+bEBV7-5Mp~Ree+@L+
zFv`i%S+19L`}(ijW&+no@`wLO68)!4dZv`{mMLJ-DL({>a&S?}Q|rPq%nf^c?1f5>
zsd}o>Di>@_EUJQjv^r?P%u36nhB*AYdiZ7AvOQ9nu-Fa^>Q6jr85<Y<uB@JZQm4n-
ztNwOZ!C@p%MhYfRA#>UtPDq}TPoK8Zy8RIfh~*h_fO}a`e!!+tzr6EikkH=3i*Xcq
zsR5ukxSW2Dhs^tIlA!7FCDLd|P^c}-H2J*c-5T()Awsa=x$bO5X=;lvGQ&vNK0P#h
z8*mIZkim42@Ermy(fLw|)I}Bb8&2+%P-J8)k|EX&3*k0Y%mhQPS;X6h5rzTVq2U({
ztFLk9DI>KDZ4REI!tSrV?}?V*UExpN=|7=Hm!m=>M3SU>E1?5l53i`T<tscQqEG;s
zFWW>$ae8&Q<fsv5#%ch%95Mt*JX$>Tyj3hq`;vCSqeIU%KHa`(MDF~va$thg$5AUp
z^rR1*9JrGv0mvb9JuY=X)dgsFfVM?E8{UKJp=bV1U_Srr*RQe(M{aD<4Mfod<y8Bk
zEh9GE^#=2}5@2Q{AK+~RztGj$0%vcj**<UIRMAS$=R!ihMv*rY5EwFa|70TRf5d1Z
z!ow%-yZ3Mz6r0$12CHZDZ+!H(uOG~eB#X~fZQ;QdfG+AG!$19(1QRVz)$fZ!I@DWC
zZ@`66n<)<XgDcIT$Eki^OMijqR6!zR$|BFp<4yNCe@2orv*F0$X%ft&&>l-Ux%)bt
z!6^-#E#WH!o}6X#`F9F9*WVXMHkBcoG|Y~xhb$8GgN^;h*&pD9hc}QY+;E2PN4(?H
zmxf6NJd_@a_9+lCULKalD^|*XcMlOxgv@f7Z;N*N6alRCe2)QZOi%ygx*9})Yr%S%
zh1~cJ@Hj-yK>BJZ0?eF{iDH4n*uS}9urr|Gcw$0E0kR@s@i{#`_1I02;$SbxK?Vv<
z=r^0yE`MPF6J_4rMu}5+JRX&DcHsP%_RMbM-J6fbLsF1YQGw>_vOdKo@Q?q)$kZ<Q
z6#M=&P*a$Ck7;V$hF<Ua<Vrjo0LWhL>(uvhx6PjSBA|l=$bNG7vxkOQu<(H-f$=Lv
zCEf4}zl`6a-<Fn^TEw1Z&y={Sy=k$%=vt5<01~S`@1g(KVE^00tME?AJUu{V0)*xd
zl_unAdYOET=gp(Md__J%d5%L#$<VqW(ZV^=AkiDi&Z=;(OX)06#tybi$F(!LC&0Bd
zt6$^;U*gxL@B6rNOF<ZPDK!$NS!C59)}g$WqLwu%DSv>bhNDOwe}@TbD1b&pSF#qS
ze2~W~gQ&u)!7x=+nK1ljVoV7fC$HrH3NbKiz-70l3Hl$Mr7oeawHBjvmeq7wro_kV
zO2eVip9S3__U3LoNy3Xb38!UD!;3t;uov%s<>6qTvqJf;=ZReNpdXgNSLX-%b<6%-
zso2|i&CA6N{tMu{stA|FB>A#<`%%NmO{Tz36Sz(!o@Ia+NsylWYJB<XehA!XxOZs<
z2s%7OT<@BWoFtw&BwxzC`~JxTK<spY=#wim+u<Ta+MlIGH4<!hF^%PJIhAb75rD@b
z&Io=6G0x)&g!c;E(9+k1_8xH_>-AA?ODjk#zwVlLWnQ$avqZ^>fuz%?_bW~!{HSO1
z-U99|bpfjGGoUW%t=N}$7Gv*OF(E5eX54(I!*n(r*yRKkPvvtpQabH<-SXTqgYhIx
zQ~A-QL87{`#p8laoSZ~Qu{ElC_4}uab#z6_gwpatkX1eo>}0l}@Tu^R*`w{D?y(p8
zhYht1&Ja39fY|`ZtT>;`o=U%mp|^iS>%Lw6fcWdcbK=lS<gj#F`;C!>AZGd)_E2@G
z+u!@(Zm*Ic2q8@>RQb!$+5zJiIh%9UJw-h%ea}z%!|T&l>@|p>W5oNFX`PntYUMw{
zOKX+4KWPUI9y1;BP{YuXajw)&)f239-v^eCjR8ZfZ{#00EDcND^?i5F5Cf*{$g?v$
zd3lS506*#s1d4At3n^53#Y?GDGz;UGl1!M+TQC42#LC4LYr&CPZ}aId_*M6q>3M6Q
zFFd+mJ_J@77ypEQh2L&599K$D2vKxeA5*7vyFk$oc_-HNPouc{Vm(BB(H6V;M(!=C
z=iU-4OfG-$>no%sM%yj#<Su4PInoIqrRls*my3`>=YyG~@z!$38)+^8t<WRHYaMz7
zTC!19TXY?GiEEcDSu*565~8DjGFh^OoNonQ!BelYui8cEJ~5(3PWfd9%#lSVpZ4^=
z;pbcYacFEDaIyu4%sJ<Wlk8Q=KuT%CjrlG1gOpY7Sk@-(mqEJ><=KA#78nbvEuTlQ
zM)Sa3Ej8W@tGxC6W1K*uJ21Bm5CiPMjYwad{aq4hl;sw#^U*w{av}tC#>Ix9Sh0qf
zAM(xr6<0EJUGj+@GmBU>x-mAcU@1=X@Ov_9CGNG;h|KTS;UgD=zr^lahdm9`LxcHo
zM^xAhi1B1U_`zZN9tg+acE)4jX#l#Xc~AD7hHpzLc16G<;T9Acp{g_JC%Bwex7h_`
zOHja)hy2pc9WJlSJU0b<pYKO(Y^B6Pw>Y`NEb2DH$fhw2&A*ZZJ9P*q?M|U57nhV&
z(%KsQQylefrLP@gx6)AnL@)k#0{sKJ*NCR3r{JsA)5b^~3Y88?3|2Cl_<Eo?nO}SL
zsP>Q6;e!-aBhl8+&Rf^J6Wry%vrvpqzfwi=kZ7L*aeNg%A!4tP{3M7R=dbF|G+;@W
zfoE8FF;)^BvS2KyJETiJPM@lT0gS2cPs;CnwaqYpjLmom<mSM~)Ay;E^)+7nC=NK(
zS5(2jy0v&xj%Ux<;uOZ=NQ8KFWGZu7TmR+*fZ;I=f%7fznb%%X_yp(he}I`;+ErPi
zZszc~jju}B;k@;r&MJdA#g0E46^X$NTHKMS%m3)$e&_hWfHR9M+$*>*Z9c1C43WT;
zb#uMVK0vQ#D04o_un~_u5{gOhY{)+EjngJp0`D*nFhU3S84eK@L7FuHd^0{^CO{2C
zL2V;;%fbd2Xu#SONG4MAZ4tH00j6t7xNZbswu}L03k9pWs0Dl&Muo68N}P#g?ilk6
z6+-P(IOC^32fKu9-*VQ92(Ch?KwinlwZFe*nQC!%xfk%DR3tTt#)~soDF)L~LVhcq
zJu<R9gTNI#f`3sc+3*XQ4*joJKQ-efbl&P?CJIm{?Va*}Wq;o1#@1}I!B?-=3^-0>
z>b;T0g$_@%$mV_G3e*+N=5s~-*k>5t3r~cD4t(jbR~FV~iJ=R|{KE#T#Na4ne;>i3
zN`g;O)%vkWLsP&UgeWOJ!3Ke+qyJ6gQ3i6?qZKffjztzZQflzm^9gaybqMkKEM#%U
z^%f(Qsk}erOo=>_6S0N>^UPz2?Wo~!&n6xAD*Il%kJw{x2|wH8*mbS_7FDy#3~;V%
zR$VY>O(14nFTIaYe<7N$Qe<$rMDlyjXG2u<d>?~gae4NuU}RF%FeNWGX3|#GKj9?v
zlQ6!QwNhd^Qt^vavc#VWFIJc7YO^{rxT@BoozwNOrHJ=e=)1GvA1$lQ*#FCDB*nCz
z1LF{Nm!Y0zE~^I3S%zFOTbIRY=Rt40F`eh|HeBr2k9aP1hCq2Pr0KT8F<{FuWF>Sg
zpbl07&<C})?5@`bHB1%tu6BbNjBIbl&~(seIa2O5MyMXi*YR0ayw?lGH)y*jn&=Nm
z$b}H1HDL;QIv;M&_%#go210lqSMc|0a3VF6lGS#!`~f`exZO||tKVfh@s*(>a`T|e
z50bQ{1|8t<xU2=D293CSOxSuYqRn>>vGF-aJ6-NWK6<>~N$rAxO=;Ddkd05LP_mJ*
zgzqtlyouWH4RMUYSd>mtn;ozImPnZcRV*Q`8TrtG6git5nU4A;gVPho8Y%?i9M{Qs
z2K>WgCC*6uwV>ym!ETq06Zh=6EpjYvvd;iSHoOeo9q6gxv)ieCe(|(uM2R73;{&2f
zg5;_|P-w$vI(VB}&ys9*%smQ3v)WLCJ*yYbWa;=)A}5_-tF=zy6~)+wIdin@c2M#Z
znQ^4RTzc@0JEt{AxVbMk!`bdpuH<uG>tFWM++TYx{IQt)C3m_dHd0ZhRz+V{6XgO5
zxtO-0FepWhGDy{Ea75x^MCz4bed6L`k;oR%^nEv;gl^iW=nKSW--!9x+zY1}a_PrS
z=$@|5ANhohq60MroM-m~5{`cLM@)!*fY|IEI6)3=``y*OCu>dykHr%U*+)S2+j6j`
zTx<aB|KfJ6>lgMsT~aQY0K3+DDhV%ulxi$8i$yaUG<T*~KfW^qw_v>SRP$M-V_Frq
zTqA%%76b#yFh-RkR9t<e_<R#LVXP;{pY1u%$2^+Jp~qPj(p0}hp46-z2@d6z91md^
zhGSepb9Xy*ME?4v_j`~~=BaMJq?FXmLTdlSFRan}+!}7T^43}v(PxD<G_j^-B(Vhs
zQmrf`;irm?{6Z`Pe1hnN=zk+WhUCP%o~qhvOGd-KP$Je)beBrgT#TGsvT3FcUZ@;Q
zEJjUk(_XS4E@^g=D93%*M3%O##O=G5n?nf5&LAe`nM;_#4D4}izkNK8zh{BTg`Hv@
z^pOD(U6CVJ-jqy13Tq{n?{|9Jhtkw;dg^L@p2>xsR|7Vn$NTvFUqu(}SluQM4#QZW
zA)eBl&V%Z|t33a$dCIR00ukT@O2;1Y3w$h8^mc#9Ki#NCy%C>;`*l1lm(B<|`S>y?
z?^6u+Kn~O9tct&ZVTIL&yJS=)?~&wTVmnUUN$T*8h&It-t|!sqOzag4c1~O&hcf7$
zBHW(3=;}(=>eo`)FW}Z4DvmXsn1TXFnn;Km3Xud)BB2#yp~25v6kiy<F}4T&?NG?r
zS@BUn3|%x(zaa>)e~{iKhtICAEU-)}lOA&9n4_+n%_2VU(8l}`{14ml;FMbgi|1Sb
zX^<i4+x<xWc#&$3#K8z=ICW<OT$C5_C(rqC5A;cn$%&{|2Qm<is>-a7@!W`WbUG5V
zprq&5`Rl|N--j>c2stiHnaDA^Ci2$SiAqk)8N~E_&Ee%#(4a0%_n>&&+n{((daPkn
zBc?U?EsWb!Z$dr%-%j7>Q$Rm+iPYyF=pAk4@vBi4zaQY83q-wiJ=w&jhi{ecntusU
ze%WRFZ8mAr@)RXO>anQ=_w5brAFPr2o#G<?6KY{Mf!S}>L>YSMT@~FOxLSV4ot_%I
zV2TTn1Zw-<+84$E8HvxnD13Z=VHp|t;zgl9tRSC%-Q>xuq4ar~^{L#GOJm?7@80$O
zCC-P(kt)i0Uz&L+R((l|-0WUoA*JJ2u@2b55JdS|8<nPFP>J`*iRO%JOg!h$EuIU0
zS{_jSgdeEhECi#xcs%5ZNvF3<=*3>CpG7=iHU$)FF3+bJJ7;qJZy1KE)Qp7(OMLS=
z{$|P+maF8oKOj|t@EXR+()TYn%U1Bjf?Q-9&LKQDa_r8}yf?yie=zp24}2tph%(F<
z4733A9+rs2-vb+mzgHrEIrE@dNE6+2Al$7t3LS%ua{Z6TX4YRJ&CJcwqXt0Sf>DV>
zV12*WvDSgO*tejd5%|@levSwVgxxe}G5#(I9#&kpB|KQD0M)msCMW^U@qk}N@~)X|
z*rnacf0j%cvc(3wki`a>2^D~G96sP^dW#HmnSXZ)`c{x{%<8-a9maRQ?rq;gFaJ5A
z-{cdFgz4PH;nowu`D5D<I%3EH6@7@xg=UX!li78uyn7Xlf@_|njS~dODR<=|@k<u}
zt0v->3lPg#68_MFo-qRfOdyuPQq{z1>LHZpWb-XT^%C!)Atd&zxP3jJbL7<~CgHda
zu5~fW%9R{<`#s0UCj(8jW*Y`0OJ8r&;A4;bi6%UH*=H~E;wfhmv6~vTYDD4+&W8!K
z54;eyX7x_~i>n*zH-|!zB+Dy-UqO}9yl6QbxlL&z#p5*{WSU&ztqyKve$;1h<f|ue
z<kqQS5)&aoK&n`}<zvczL#_vnX(1$tL{ROdaK~jd@31~+Kf{b1o;)Lycj6dhexyk<
zQ=(vgUJ6K{y+#erxty6GyWHGDeIA?26Xg+zh{9zKG&UDiafD|;5e4Oc5x|o8PJt9p
zi3&9oSU@|CpGfH1m~wd1X&}FJ>3GvLki95`f9KyQR0=kdFmWSd-@-o^K+7!Hs{3WD
zs5U<>*Um}cSc#s0Ds7Z0(g~_L%sOt=)u<=VWdS;n0mC+X)db4f{gy|cim5!lWR9EV
z4w7Q>omG)q29WdC$$OSX5o_9m3$7d?-apH<`tMq<uilY+L7S_KBP4X+EUInw7H^3u
zmOYPUPBR1vzrEfm>8VGaO-*e!OmzZv1ecZbPuSrJa&H4Vd_5XFI$F6kp@Xj)45^P5
zUC|wXKqcgNc6}F-M4n+N6d=Zq95A^dl>bOL6Sqz&91#A*je3xbjErXbybP9<mmJM#
zAB1NzbVXsiHtz8_Uh<r+`B85(ASpTa@BE%Q_kU{3%Pc2Q=pG*D@Doq$m%x<37_MR9
zMR)i|^l`seHZDPzF-2v(1L=b3y2p%SrNFjr;=Zn_X#pn$U28+bH-@@A6ItZ{FIp==
zr23b5UsRVbY~RO^fEqd2-_K$(#Q_tmh5kt#DlC?g(J6#3fvYHWKxXXER(Anva#~to
zp@jzx;%c+)fCjh~8cSlUk$YXreDOdD3!kW>*>m5Mq;%?<=co+~R|^fngciNMPq-+S
z=rODL4NDU~P*05dymICxqW;&<$>g~*IYN$=@YH@$t)GE4f8Av#HwGfh5wVGNaUHxl
ztnL@4&pli&uNRp+&4ldC2!zR`x1U9XigV}@_BF^J+zb+V!0pugy_0xF92!SRj{eOl
zlrTivA515!1X4t>w;%0}A2Q8!KbpuRW^cOJ8}@~I-l02ht%(jdJ%0xhG024sM{KB+
z^t&NVEiy_(X^&%XBrQ1x53j~=D6DF6K6Bmc3ryXmIfsVzOWtDoKZ~T^ApWxs=(or5
zt9%%e|LQ*;35n|;Gi$%}-`4b?m;n}kLJ8AWL}h~>LKu`OHUm6Fj?V|GOh0xtw06QI
z)UM#<-M@3Inzj0*sfG)HVg`|&w|F2FzgF0#4$Uvu?o1J{22Cc5bauoq9s|st4`oCw
zF`5z6$uKKjzcY)a+^JF$kLHYN^~DbuE9}58d4jhl{W?6}OQ-Hei+=jyWWZiP9(`zp
z5*<3#|JgS#2UD1z#&1~;13dfCPyKn8(xZc7mohJNZ4Nq2NKhM@u&+=^1Zk@;4y~xN
z5=LMuz*LKc!#4VA%~*9lO7Lwg&)_tZZhTx)>D%^a1`=7>)Ln%ANnuv}L1Q%A9^lil
z_5vuK;>C5<`kcHhRjYR;O(hb4ID}1X)O~VPQr;i<eD}A$5}at}EGmG*&M!&~z>Xi~
z87Mp2l+tpf<J$H(M$dNUiSXS=rtbj3@!rt6RUv`W?;+b=LD^;bOe0AQ28T|0r}+sb
z0!T{3VH+bWjn*UGHDo}J@)H370vR?*XBQ;e$x?VvfulzUpO#9Y^t391p2T;Cf5i;y
z15!=WM?D{%#bbtoI-&mJB{GGT;WwR~wK*URDY_N4S7Jd;1s!NkBSXhYLr|jKf(0KT
zIOe0a5|m+}d=*w~pd*4N?OvQueH<|P+8#yN`9%AsIV>VaM~`W+)v&FEf@oFLuEM&y
ze3v&qVPj&d@EJmrk-)vyEbywKFmDATC6WQ<<`#G(q~)KLH5u4J4TTkVGElX4jGJzt
zri3^og3*(bHk%DR&fE#ivTCc6No;GVWDT`S07&usUq5OHB`q!K4}KvmTwDNnyn+G1
zpmReNg9iDSKslYN!0S>KkMAuaRz?`Zm1LCNUpbG~e*xl`sDcQ8H4X#T2D8{z=3#<%
z?(YutdR-ei<0&BvX}<69>S{H>)7~ykJ~_I2zyN}vHhA*R%-YQ31zPlKQi^0L7&<ST
zb)F*PvU)F`FlEnb<)M8PsItjvo{n#VH}sdfmFMNf`xiZZfQ|LO%#&XrLXc9LVMS!!
z-kdB?Y~jTHf0L0i2^N_#ja(N&w^KqzIJRz#Ta5&hz?+la#wpZA#R!SpQ_k})wJ>>5
zXCQEJAE1F+gFig~aG<HJwePsmadZECVp@xA{9Gh5JEnn_eR4*BJ;7z!HzBTu319=#
zw+CEJ|JfTQHCR1~0Lq9>&4q2P2A0=D{*C^WS5fMiiicO|&pkgp)&3=+6Xl6<a2*V>
zb!BomtE!bh+}i?`bn&bWc1Dw^4*Q-7;Tq-xC}OfhY3%I+ZNF}J0Q@DfrTYgxY7Me>
zkY^aZHNjQ-dqvrne;#KA9AJ$S%3{Nro+i(JSS3*XIvc0IhXK4gAUYm`gy!q(+tf+Q
zikl0~il=8Lrgl*CMIn%0Rp;o=$ijo@cp(|0<o8GSLMkl89i5t@FCl6h4&dKc3;zS^
zZv9>0kL@di6tBco)I2&6#YG{;>T&e*L*P0po{)|wSi_=w@S@q$bGyGwe^_rkm2AJg
zb>vuYfV^hoqxzKpw4hTvc`%==!Wa=0`e+3Jql7P59H)?b1sdX7&S%eL3U|5U=8M21
zEaKBI%WG=cBaYAL(ijK8Tp)*u>`uV{^#U}Wb#5-E3eZ?*bCvY#EEyV$KU=Ar+d7vO
z2H($D0*MtDxPiAo)Xqft>Z^*1`WB`{#zPK7k~zna`MBn%xP@1I#v1RpPq4+uaeBwR
z8?e;m3z|+82sCUVP31R2WbZAHg?Ci%aQT#XRFuDN2bFjh&|rAvLQeo0KQ(v77ng~j
zHG)N0%7QKX8ZNXLNUM!y)=pB9_7hh6+t+=C<qv1gXJQdl##|T@3<UbO?$TsDtTAEW
zzri^%gburh(pP?Z{ZUMO=9d{?a0gZr!{{H21_XwHZ!H>|u)3e4f~6a?ApR$|v1}<h
z$qi%x0bER+hjV`N{a*U0yAA)1g*%|2ras3Ziz=|xM9<BaaMzS;X$Ou$8<%yKU~yoF
z4MF$sjNMg;8nWM;?bg@%b$FWhirkMeL!T>=sH!K6Dr4Pq)F{lA>3MXp!2nTY3%!Jq
zO%=_NQD5LOZ9wThoOY>iLqow~J1_aoc}R*}Xr@bxml>!hl)Re#l~TwS)FA_0I*vJw
zqJqv3FF~T$xum`Fi#4pHFwSpG5YxhV!R7GzG>G=#<NnR*md@_F+~srC=P)Kj9$R^g
zsC-j*sHWKC!^1KaXip=iY}lXiz$>MJ)YjpxwY~>>k)A$1Q&j>W1E6|9gMhbHjcsg@
z!JSXlRbc7&I1Xo;+B6|JZ@E~M)$|;LNro~JQ8-Qm%<+p!1UsMyK*Nx!Tz{nET(@Uq
zG1D_qgUorq)I-Rt^jr~{Xa$ydZOy7$WFppRgwnuoYJR|yXV!X67SUKdQcdhffY(HT
zr~mhy;&Bk9qS-bWrpbUBDK_am`mZr~UTxt%p|BhhcvsBaos?n*Z>i->tAWUI2aeQr
zXaB|;JvU;rxyILOU2*2Zzq|5DmD97l&7S$1bCpTUMdp&&VVkI-PiLukA2_ofuE2q`
zrLv9`S4+a`1AWS7H*DTOV8zwb0)XFhU_9p-{BA0n<-BUgT#%_n|4$w##>V55P9<c*
zYGGO<VxbsKoq-iAVV+pnvHDF<RDb}gpi$~TY}bR0q!7iDNGdB}qqKt1t=s8FX(sND
zgETO3=!`*?@C3#DGgZ3qICGAaVZzj&I4t?(7_T%>NhT{`1U$er_RKWI@2#v?5@?M7
z%YQ9q<R?A^#Qy^J7DrohKhBDVru!E^ZTQH!HX;t37*9DX;LKi7`R%9Uw6;13jp-n{
z`ob~bJ`VF{`SbLEgVT{}7~ZR)sDO29-7?o4vz}t9CoD2@G~kdS#~G_SmX|?LWk$R!
z&)UeG1N&^^toa0ezY4IkD3APuK{`4nqpYPIds&Z>oNNJP(yy`NC=f2bZYvRl)wHFj
z2*{I9MU;TEuwk5=cg9!9vb=iRo=;INNrZ;O_8z?Z`hGC8&sw46u>qsg#n~&<l|2Wh
zeMsBxNE20r<+GI}lCLefZCiwXOy5wM5~h_AQ)OJh2Z+oP^m;Sw1$Q#$T$2#KYI?-B
z7X@GuXYcssy$qncFzjluOYzMaIC!ja1W$aTJw?EhM@Y_)ILe8<{+a^^?m*2WvXj!%
zLU><+`nN!gybqi+4-v+q2O@JsJjuV#d4BGDhz4!7hG1!;YF|CehAAnRh;1&zLg!-l
zuD3cDc#*WyN<$gdwfw{wy_F3SGi<+km9DI!3+QgJ@ZO;*K~_VdHZTD&Ck2B14BVx(
zbrkVvX0jtnJnw$_%h)?6d-Oa|l1Mu>Hn^yu-EB2x`sbE{29$`Cc(g7|Bc}HH_nHPv
zZe2f=-CFB!c~u?};qVoZpLw*1J}F5Pp4~J$pH7=*4E4ouZwC3=iz!pmM8Zg8^jryO
zrZ)bnn~ZgkrwwrWCT1!LMLSw<x}(u02s2MFmf%_0Rzi7tUt<txBdPGSoB$$xppZ`s
zGmD>@O;ExM3fg1u)VQpQHi*(O9d&ARGofLB!@T4I8h7%|hbls%u4%Ubc~sh3wsRun
zFAI<l&*|c<(X){3M|N79NkgI}`4`=RbRh_&i8i2`U7?9B?6%MjW8|&8@-RvciH*@|
z(^8mDWE;_jz$BTHyt)zzJ?y&Y(-I)};SmQ5@S<}<OsA|Nj9N+uw?33+BEt=pI(hZH
z_K^8qJcBn=V2A4=q_;(sbAKV6A{GEwLqQGtaI`>^!OmmB<hA!bnaxb<RsNjIABMux
z>gFfSW_z2wY(b1FAF{p5ql}<O)kcWKsviWx@vwKwhU>PK^1|pw<p<QLR7cnBRu*N2
zYMN(H0rQA=&IQP8A4yFJS#cFUkXt)ZVbQU9^=8U29@M8O(L@In7}VLcL^XY`xzZ>+
z7*lEC69@)gwOGsYe%SuWB=tBy{*?2?NYw|NaO5~S6Tz@`k2{_f|2urT(RL*)$jI@}
z2_AFynrU2{S_Szo%4+g7Sd#hn!@&DVFr=h>G5VbzpoRwJ<TSc-Iw)0%{(-TNJm(|$
zSB?#aK4TNsvLsLsVz<3)n3R7I=xT~-p8j&_e?gf0mmMTw!(=na<EgZO#rAWPdgmVz
zkgs~7$yYDY{-Dqe8MxCrZ|xQOvxMsd|Ix*<E_I}#j%kE`E%77tUG<Q4O=mRN7p&OL
z!XbbKs~8(R0f01xb%)tZ<h<8QHj4BfS$mRz5}B5x-_{NtU5JsrQ(63i0g%XX3I--F
z7?~R45mcfe9vU{Z8Va#gSQd^PV0Dnc#&f8T)HsLuyzA`b5fohRC}Mex5_lGLRU-NP
zu@~@k7x>zqSa7m?m+mPgC8@X6V1QLW-o<+ad0?DB@;eKYGH;1{{7LyiC5@rM8&92+
zKTI@c#!OKhenzAcmWh;gkgFT4mf-RT&43$vnO{k}>!`~L1n=&9yzL|m5IJPNR5n}%
z$}FNGvzreEN_bCbKA{mxOE#i9atQNZBR{2P47zxH;OP5aNYhVCu$<dV>&K}eTbAf&
zT~Tjl;Q@8GrjC|JC7S3lJ}G&=9+t!74~!9w%0NPFWx#7@29i1DTO#fiBYMP!kk!Uh
z=Iq9sLKb!#UNLfdT+DyG@(aR-m%&w=hv?p%ceGinMg5csqlMG$i2Pf)>pS<ID;Wn#
zNI4=1_OC#u>EW-9&xsx|lW1EFv(8=S9vI$V#8he{uIm?YwlRVG#GX$7jpP6cC_@jd
zf})chfwyX`a4NGL+QS6Ets@3*9l(1wJp(M^GC95TR!_wa&w?+`0n_#86jP4%4oE!-
zxu>PcGa=w#FtYI>$kYf{Ok!c<(_+XH8;mRtMX?5+v-iGV`OM$LhD;-K$3IeT>V9WQ
zO|2^3b<?^o8uSZ6Obtc};j<}3;;`}J<m%?d;U_V3uSaEdx}X9}7G#vvV)Aojn1CRf
zS`aL1;jrkYmID!Vrc8OD&QDoiM&~Y5FE#l$15o66P{B`LFgXOu&7#rlp!Ll1->{^=
zzMGynL}TA~@Zj4;=fZXV+}+=8JWu-Q;#OECCW<>r_)8d?tx24ji-d=d4}RG=v^~Fu
z1eS~<9<nnYf`H$}cfdg2lZ^o6Ry$BVKrL3I@YJ*7`aN%*mSAjG-SM;X3c`9o<ZgA>
zBbSt#v#?_ZP+nbO!C)2~87(_Zz*m<WIjYcV?X&tckv1g2w^(3t$eY@-_|PruEJf__
zWy;vKH@{)owYIL1aqqi#|2BV;znA&UsONc1Q%=YEg#s?uocxl(BOs)^0a%E@7(fe5
zBgafM>sb3{P0(NSGyN?0w11<uSyKXt9MPG%_NHT$8_u$ds(x!QQCK1T`6vZtLJ*}B
zD7Y1TvDL#2;t@6xVit&?YOZ1YaZ}coL<v_RZIffn#EEM+ZY(B09ugH?NiP1=d+`n^
zEZ+^J|Cc15gkZ&+G*lVx*J){JL)$a8O2pVcMzim6C+Q+=>vSX33?0;Tzz`V9{~;bg
zfdHfz4z=JZ>Vkgi9j7;o8QT`uM%g%94z`$#3@vfvx=MZo$nJn3VmmyItHk4gzEWki
zE$B3|Z-?TzmAll#Y;oY{*Bl`ELFSt0dr6d)!>gee;1w$p4ShGXm`;YXM9xgBsJ6`b
zsPm`7N=V3uKn<IKM_uEUg`;uP{UK-w+&y1`X@HNY*N3N->Jc7Tjr{1kYIt?g9*(O^
zB!C|A{I44>3tJY|ZWUN>j-Vv4sM;aUyuY<8QRHrvew+_*0DCO(KjHq1#m0&&^s<L-
zQJ$`j05rLsb<Xr>>uw*#lGv3f^{6dXbADV{Vg&9a4J0Z(R3WHQ?U`OIRVplqQ-lv)
zt7~5{;1PypAx+6%14&9$MIvc4pB9m%=+x~u<Kb#sQvo%LMKUVB6;|v_o^>)YB%i30
zE*o!CHcdUcY?&2MI`P{1P~}thg%m$*?2UCpXr_J`Ix<ye_`K8T9!u>igQU0>77{;=
z#amEoW!*$<%81IzH8J)d@mBQZ8~rL@i6xfqsRG~Kr3sT<GUq@?^&WOVxL=UWgpk}C
z(Mm;gROjBqoS8#S?<<1xK}~;kj<(~)CA?}4ynIsrPYjNl@PM8RBk8hnk&=Q=z9MQG
zwxH!@E%8F-extg|WKP|YPfErPkXk`J<7`x^hSz_XeG#<JN8;AH5P;My>M4*<larr8
z&z+eSCF68ZwI`myxUj*RGJS%8yqQ!aSVnWRf33?G3l}Xxn4X>4<IGIL@ivKY@12(S
z@dvd07CIJnq-SjFw9XP-cl4$7PZ`llTk2sv1LTmZ>|Zz8pZ0r~1MYKLNP}&hKfr4x
z@w#bBr-n>p;&mV8(&N*Un`8{Vujf`r_(!l~dAFg5OQ9x4c_p9CCs2eqIzV(u$nV=s
z{4{Ykq_%I`Hzx$PTKbQm1>PW;UfDo|nby9twr=Qpr)bi&%(&n&ksSV&&c_i+hQe~*
zh2^L|Deu}68359>`le;FAtS@0M-5$EW=WxEB11!13}WVHgl4%W`Ry+1r0cjc`UD1!
z*D>rz>hAYSB;N1MRuuoWG>(VcGg0;+UU8f(sRW0dur?aKbKD=uSF@qT7ED@~nar4o
z5JT#~MN_p+<&^X{-x;o!TKtG7Z%_kEXjnKxs;8K#PP+zJnlh5pAiF(i0yP;TJs48e
z9xx$@_<ZIfdUW{GHTnhvlj1*-K$WIJ6qg(0hyjWV(@h*XMg%!d6!24;<4ylPwZG?f
z)sDq~2mnR7zFQDpPt`{9#UCVa@`uGHH}=NUG~P=oy*)8IxHQ9qe$97p+Rb^3f=$|n
z5+>y3oxV@%pMo-CvN_Te$I`B7s78lc$@qV30WiXtydk;sR&F;0s-G7Q_ng}P50bJg
zjcwpSQ;E$QXyHQ#^#F&A?JI*8c_hX51+SodiO&RmO-GRXhP!-)D*Hbod=ybEo?4<J
zcf~?B<_#VH7lLcFXrsR?U#B@>|8AJa5=kpB>|#iBee>@FgFDdx0Fiz8uW#-eS%w2Y
zv0pSHgtBNyr&BgUaq#WV{)yGH%!&uym|f*wx}cKL4JI3yfwq(T;Vd6-W2<($Nx#ys
zlMZlx-#!0ex3<={3h(#8G}9z=N>r{mA%|~BZv_A-08)9u1b?_##lO9y22+aPyUtC1
zq<#(>jdMM)h#ERvyij`oCu}=wI=_u+Yo;>43&j1l3n?nm<J=rABWHCD;4gSR^M3>L
zc=`xeqqfm0bN-c?y48XFtYU^+D%YIH=l_XGhoGlYnJcxtS~<0*(o&je&E<vQ<f#^k
zm>LRTaRGRMB>^O$8h8nG7mOrhr&lJpxNKJYDLF&5HT}4a^dZU%7)0D3#J}11@mO)0
z*87<^_IrnNI6h>S>;+XKVErGe-U28K=ldR}8|jc1>5`Q0mKG3@?k?$WkS^&4sfTXq
zR6syLx;vzi?sxI``Of^`8OL!P6n3B8yXT&B?!CQ4N<W7(k=NbU$$(TgNAWYAi5tEQ
zdkp||9yec12<2Y*pLZpQoG)`o4rMc{2`GlC@(qp6sFeZUi620dLTCjurn3CQU?q_)
z+K#QOvnK+Up`=1VOBV-tPMYqTE(3Px>HR<v8mL|odj<j2i2PbgZtUEbD1ZQBOJ<vt
zain_pmw}OUU)*i;<SuP=u4N8mijgQQgAj<^Kpd4Lp$eaQNc|Q(&vbYE_W%`f*!Y=}
zX1z@CPlWrcNI^%k{36T!is^+Bi;lZ>2H;P8`TL>#6ZwBvoG<={%7Uviwz)c9(MQ`k
zi1-&Lgm1@VZG42q8ky0+1w-I?MT)Mte(00sX&Et)*QORg;h;=K7Li|Om0Upf&3YmF
zddK_{Pgl&v$5v8GX_v?G1v;^{@}4SqlRm>lI`9O>^j57MH2Wfvpr+>mNuDqiW|V!E
z7eBCTeR79jv2l(}X}G=X=G9{RA<bqFn-dd=opkx$X!1-il*uL>cvQJ2U@X}DI`<*U
zT9rhHJuaOzDr4GVkua!0QGB`6r^9$Ryi!8T>VI|Rmeyq6dz}cA`s6!nbN?}FJ5e&*
zdqL<qS^5o<I!TV>zp|Qme?vOF-DVj!zj;eOD=N_e0niI8iE*~5L0?2oYv>)Ag8ux`
z8XlHdouDd-KmvGL^}<iDVAz;<4;qTFg-?YEIY~AU%x&@ZFhFAxr)`kXz$J5Aal}dQ
zbb6zv-ep6D5@7yHE^r)MS0Y@gXKd=L>+T^7A^F+feU|}da|ASATE7GxC{K@Z7Z4iV
zD6Z8}$k;TCL(Aw~A@{pe3>KR>@r1X41Y;MVw`*WgM7AmFvDe!*9u7Wcy6+K?r}no6
zgXz8~mbQb5%?_`sm~S}W+P;~7_9)b4GukP;x15p)=tVg(hD5zF5$h(=$Pad+d6wou
zWhV81>x|>lf@|F}MUe=Y8p>j#@>fb)y@+U0LKl{v+)Vfa({<hFJRjVy%&o1p&cROL
z>!nW%siKGP-_sNM^D$reb`0O9^TJbJF$5$G77(Ny2GO&3JPQ-YlfhQq*Bp+ltenYb
zk(`q=tG~Cf3PRkx^$;r{KQkw}D#C;Ajw||Y^_$7GiL4w=4bVU?mVEJ-R%O*$5?$Q9
zf?X~b;HIo4CrHT&)g|uyyitlohs@91;fBJ-#>QC%L0S5Uy8h2#_|fIJ^hgW<#<Ok!
z<9>#*?)KhYhQ3(G1+&)H5wn&iQlv6p9^?ed>R3VwfNb2~gRc5UxfcK@1`y{gAyI`%
z&--0uGztL@RLQlA&749}kk~Sh@82iQwhYF0=jGa6mm?7pk@as*MlHakb&(i4zS<!`
z4(XwhO{{eM5Mi%CGxv^zNkgIY?yz;(amAsR3+Ws^z?5(!M=wEYOJ)*kP>(AVQq(^7
z3%Ye6o$?QysWpW#AK#uy4wCi`2s1RuE<!&hyCW*SJzs&T<#sos0!1Xx+yvRz*9$^V
z_fFcvAaleADsC+jU&rHs1}hKei{v~7Y!Lv}U|MZ1{P9vMslqK$)U_tQ@O(SFBu=Ix
zjv?7+6I5}Jyc!hS9&vemx9C0bQRkZgT8!zKo8|1Sd6Uj5`qvn7mfb(3A|oyD+wbYo
zwG-F169=isz}xn4HC+G^%dmz&s_NS`nYF1xPplt4%yhT+(||yV!l^i$Un$&3=H-w7
z5)m@hQF^9RzS4kDc^E>*jaQRjHhv`W3G`<pl!15BIb<NG0kFf+z`#EhwUq=U`C1ev
zBfn=2si8XBt%-k~>p+jPt@0vdj%Rx{??E#b_11-Suw5Idm@yTVgKc(Sn^JNZNynC<
z#CWChL41NnrEbi9zpopOhPHS2_C8-5)`a6`|If(CiM0`Of^__Kz33&3MI#M2vU)V0
zerpx9G(UJ-3O@I5du^Honfn>5Ji9<boEpY3GNScWVQ!#MaD?{L0k8$~RMA(M=ab~f
zyzVsCc48=vtVZ-cPkHA0hrlmpklR83^R)m7j0mD3p~C>SJp(3LvmZ_jiEuFvyno@Q
z<&A`bk^zN_F`Y!}-shj6T)JZ<z{@IdwIB}YoG?mi0cA}GCgbGv!eo)c^}HY24sB%^
zx;fNISPiWf<-=XMeyphrJ^Z+R<B2=0yu68*$Mn9Ua`d1T4<d#)mzKJ*WH`-h!c5Te
zpF`kNt+~ze!b%9VT<gUNuCL~2>*tg-OJtd)6Xc;bOlp7@UT9vg$3JES*;po`Qf-J)
zm{>HqEb<qQ|1@)6jIS<kp&7PWew7XNOlKt(EaD(9oU)d~)bHj-QwWV@$a|*&0|uAX
zJaz35=A`}rN_slA`TK+br|rH}N=HIxeOkJGl394DhoELhx4L*bwW_Eq;l043cX1Lg
zX39^~I5pnQEg7kpaxg*CpROxf@x51d7wq~vFj{L4r}<P}k}U|b$2~AnHcHL6`9x$H
zvNO@})lMwED01@4>D@Oz@xyo+EKeO=8f6*(Ll~ty6l?@<UmVazqc#k?prSS87Fm9^
z8HWbgDFjihkrxA)09fPXn5QfO=S_T^-Ie|hos$%HE3XDyPCUXhzvSNP7BT4h>jqOG
z&IH}K3DA)|cbFxEDN!*c+u<V2cFW)WGBX3vh7y|<IpWvRf%Yp`*Q>l*o$n3%KdWu6
zNvYytd&hX(H<@5|5W9hnNF!@d%>4HgkZQiGaUqX!hRARtyu}x1+SVG=rz4$TK41-(
z3%wYYN7!HvNgvxR#hE-6)7J)*Y!Ap(i)A-$|E()C=!Q@gk+J38wd(ZGdZ?K0$|2=O
zcrA?=NG6{Nlq0BD*6e6}dsaJg^;XkK>p!~)Av4K<DH$W9Ak){QR|6}<<PI%@d)4KH
zCiR2n8CJ9Lz6=e^r;sB7R2L@Pi-<mP6*%BWkzJ>7U?NQaIp23<!ag{DRC~%ER2fc!
z7OLdu0S|!Y%e`cY6n2cGjt!c{x(AwPs8-M|>p&8emiC!zKpy=v9jL#}UW2FRT14@4
zbVf|ZC8O(_5F7L{b<n!mak5Qe{4~d~arLuYez{iG=D>$KO~e$Dx#x%1VLDu&9|cEZ
zJc<2<Yf0q$U!5WWznHZL-9cb5M+Kk?c#lRrD6+XCUk~c=;DNM!x77hJUIwj3(?&}Q
zvMrWU@Ftv?d%<Pqi;;l?_*)Xas?YCp;DC4RB4SMKAxEPIlPgM*$7CPfd3QjI8*#wQ
z!J*QPwTu>ggrcg{4otA20Ci-0FmI6q!MVx4`31EQFV`OhNxAjafGWGb^mi_US62)$
zX0KHwjM@5(5p)x~d7ojFJm8UyY|My4isH|E%{DZ0>7o=y8cacj-HlA#e56MVmXqP9
z7ez7F@m>+L9Y3@&>AF0&!oH)1r=s%Xfb4OJ!X$$OXp3ktFUp!v+MvG*rbbI(MgG9j
zhi=?HPthpnMm^nYN>diRZ5T;K3hA!nGzkyWz*<wKzUF)>q!2p8D5m(gG;cc|T@K{G
zaeJ_n;BzAi`EJ4Y-QQC*Sj;f<_5ctFaQ(jxw1_w*yAz9oK4f)bn^M2CQZ|v~NZy4?
za)qrJ1EHT#Wn*z*^BhQ3>s#DAYFr$uv#S0(0f*Z?sKaGq;&oX80zh&Z8IA)V3BLva
zOM6jefY&)}u_8f&i=c5`Z1bpN783ai$MAeMpWSfJ<R1I1O~rOksR1ET9S4>~0b8Gu
z=S+qUZw4T_Hn>n+omk~^O~U(u=_&7q|H{aS9WC3~g)5pSErLjEv)c)cuz<_6Q9{8;
zp4ok_aKr3Z5bnCAXxsU1afzNb08rg$OoY8V{kc}#N|&M572K)?x!{e0<$uc}qf?X*
zOrMjRQF(krQN!MsA<Z9_m}1MIF7f})8_nR&02c;k>+ns5Y`(597k;48u;D%zy`<wM
zA#jlNkZCL|xE7b6hVXQzj|6R-dBMB^lt117(v|0RA#J`N38@%sTh}R-765W@Ua-M#
z;G^s?npNv}&1+%Kr2sh%DG?R20c;Gd^;alNnlaPi=}dxF<viD=WpcU0JWP%b?68Sz
zJNkUVKT*&FyFgBMLIOKY(Z<esb5RT@;{-{!UrIU+J3>YShxpbber;oAWyN$k8+}6w
zMKEs=M@w~#TG>_C#pRoRbj5SxNN_;N>nXovz+!`Fs1A41YsTL3`PUpC{3*X9Xk@w#
z5)+!B+}Byn)z3F)++uq#z}fvU%UH9ik+Up@mn8_J>!>d}%+*HzcB84ok)N;Cr8dIQ
zqFoM4(sb&A9uH(HSxW@vZVk=O2Qbse^_U=B&fYCbE4O2}p@2n3z4+L|R@d=ZNIs-6
z82puM!d8~#V7{5lSRFmL%wJ7?IJL`rkHD#-#RHjE=iZB#A&(xLGXwFht_vU^P-Ec1
z6UBFG{buYej;`hXrE=|$DkK})4QTK{F@why5T|d=fv$&Ytm0(IILsbPva)-V@*+<n
z@fAv@VXpS<pc=>xKUAFW24y(jPkJPte2mf=fTL&lqW5iv<QLk3m*<3i=(7Ux2%BSk
zNJOqywY(>gr@TH}uErHEL35fh1>-sQ@hwnX08bQCTACI`wlKGpB2gYfTT?7D3M>5A
zT0pDB*y>JwCPm+Ou5_^J@OjhUpc}EsZ<D^lPbL@ME`^H#w*v|UEWmJ%G*eWxrXca)
za}Xe$yZ%h*+fjSOMF|<dG(!Z5v|bgI);fxomNCtWLad#_9hBBKpiwY^lObGbTJS^i
z_zS^8xSt~o=o3uUw5&5&5TFUfYzQL7D2XS#f4MP{!A6vPJD(Lv;vNH^B}!0%`<E%c
zy`Ji5w!w+fByDt0JT^*U^}JaQ)17RqQ1774Pj$Zk0t4YRR@iT}?Q1HZ)&INzBXT@g
z*-ZD0qaK_DEVB)!pm^|xF5pwSJ}|TB0V6oL^cuJTFkqZn*QDl&T*|+P*@F4s4F>{P
zGLkC+SYaGT^QQxoH!VV){(srufFMP68Yj`AJeW?SNwtG<^v$#zQE1W~_0nfH+O}!>
z&r+m7Go!W6axK4YE@nE~#&?Za12Y;J3`x;so-fxXpJ7l{HswdjWj8617Ig?p4*DIG
z*0~d@$Xf9!X7R=7a1plhPGvUmEu`otrNcV9Zsf#2OBK+aS)RE2m-XU&bN7JrAqX4U
z38fuZL$gLznJOntAA!xNY-wzucI>qMXBA+Ty?>7!LCH;^2y<Gl^;VwU7ewvGBoPn*
zz5^75f&r*779_1Y35urFW>%=o+cpf2crBZ!p9<eR9_FcWW{tDjpT7}_TtTf^2%Up6
zt~j#Fx1U5#bv_3DKdAADIO37a%nO`IqdsYR@weU$|B@xc__5(QbC+{YGmyv#Yy7RT
z1?~R`^?MSP*C^$sC5DMVbCPM=E6Xann)Oth&+7WRxyc?;OvD9~|0q3^&dNJgtjjrp
zbk)~;S;V0F-eIp!2v}!=s7_1h&Yns1k)2aYJv1e}`o(YKzo=@E#?T5jr6)KSDqFCf
z3}<k#g3BFmQd`u<CF4lFtFNbKC4Uf%^S0(cGfdF078c=w2MNft|JX8>y|Z;j^ah@Z
ziP)5tU4M$&I4XRB0!A`$?*S?g(g*^$Pbyp~v?$_VqQ3VIDJ={*l0DugrOv`!aok2%
zUq6|wdwo(}0t6F+N?qmTZVcXN3+`U7q&iOQMiV?MEi7^^4JWTLHb;1jQn+!pc~>2p
zMd;7@TrFAgd|Aq{6)$(W4-!Cn`!VTE$Rr*F82fr!*;iA_`oR7|)ONaz?Cj@@yn&;-
z?F2l|hBbhH(tX9`kUH)KW>Yh3)W8OaK%Qh&7=bO~8De+^WJgMjb0Io8jwaUr2<xE>
zu?iLaF*7|qFZBZ!9C7VuQ1$$9!Lt!6#zqZ?#Afl?Ll7HnD|$C#mqM%jY#E8(UOSfh
z;YurA8o8Um1ZnhlvvNWhI01YD;_ZaQmuo7EbU5|#2T9Es*~6czNVjvq{V49^BDE9@
zOc6YOYYdd}ghn-s^LgX~?^%7h&S37r1gJR~Gzidol5$Ha+w@e9AN%$rh(e3%j=m<v
zq<U`#D8xyyl(CpswYlI<S-Wgu3=x`Yh8hjuYPl{pLnFojo+=V0Tgouw`o@sWx8b!c
zlLP0+nLRK&M0=UD5N<EHHzEHkuaw&B$V8N9=fsbsRMgS<09SfCRqPzGc-wTyfdJ&$
zhk)=uo)I0K0&E8+Qx@;jzYZTSMASnfguT5%I~D^R;P38Y=cI~%7DT|@c<<W0>iAW4
zIFN(IOyDfwnno`SrhheI$gmUBlN4bBa5v0#_gR8OgP*`AWeH8gl%iCj!;W3O=A=@w
zA7%c_^jxH|Td36R{QDiV(uM~<#>z^ZdP-d^Mh-Yz2&4PdAJMG8)Ivig2+J>IkRpYd
zLql<HAS)@XB7U!JoK2!)Nf;x58QxuDF*Y>FPB5ubea@Wqob_v&<3V=W7-c=qNRFah
zI)@f?{+kos>vDF6BH@dODPRNs1?f4Ea#D658EyC2x4B_Y&3s{jkOA}c_<fca8y6RB
zwsEe9qy^+rN&WotXMR_j>|03>Wv#rz2PPJP4RiB4JY}EA8P`vF@@q5|A?c0Wu^;V)
zR4KY^zMHrc0lw!opp215rMiDR&({{)Fbb<CaV<1XgSXz~=d`!?HD_pO;E5&nzXA;d
zw9$06H$_0ZPFYF4%d((Z{@5iu+;%o+*O~t1iV^Oxl)TH%DX&IWodc9v%-3dsPcZ<z
z`u6KH7A!0v2QmoR=m$M%wTA`)I!id^1<DKSKQa=mFkn$^Q%uPKYV8CKH$?xvAQaTo
z+9|(-sRJ74Rsa()Xp`la+(kOj(^OkCOx5Rn53vXZcGia{XZ;UKG2hpvkk!P1C}8bW
za`G-0DiW=!?hXd3UE+_)j7Q%(8%0d!B<BZBL2Hw6>4#gFm%>*Yy~X3gm=czy1PKG;
zGI4QcydT&-&Y$rztVO!0yBX2<>QCJBX;dHMX~tYyt*W>jt3hw9g?B&SzsvPp6YyxO
za2hV$LlB$(<}Du%7Gv#QFx08Y!9;*}pf-j3zkmhIX@{%bL5;o$VvVLC@n*ty{(5$T
z6w>o7#p|eUX}=dV_}<i6gT7S!Cdja)`8WLr?=%&E2*q!mbhVrE!O3T&xc5-k^(jA*
ztLB?&fYz-RpnbFUVI&P<;AN(vM4oc>w$*`h!3ZlVVeHt4_MVY|3)`PceWp^nsi#$9
zrn2wm1gwhH*#*Y5kR3OCrrik+Xrxq0CR)OGL--e}oPom9Yh_k8N*2jTc8l_G+%U6A
z-wabA2oByuc487Hjoi<8Gu`b@G>-4cj%Fiqy&nwe?zzMUfD;JD0Qa0ecfGucl15Gj
zsou7+=s)gX&*zS?vVwk8MldcS3&W--PpNZK9aDzJvV63)vXPRD&5`)Bhqd_&HJM*>
zAp`+REYVO`7iToa`})v2C%ylaP>-p)*-{D<Ht1IUI|!wj&-Q_MIwRiXXzI8V^8{)#
z>zOZ1douv9oc?I-{|~bWDFka^<<Yf#l3&$gl@E-P_dTk6L&6%XMW(SPuo64vPww%S
zKoQw}Nq)LMHL@WZlFmu3?3E?Ze;|{tn>q=c4(enYU|q$DoetAig5fUmKpG#s=5&hu
zA8MM_g@MPcW!8?HUP6T<DFqZepV#S4+R(S6cSB&UFF@taB<*V+wh-(b8=Bfu+fqQr
z@yzgB{19?T;roi5T(-ROFr9T4WzErjB12c!e+3ZFcpjOvpX0cvn{~j5lB&;d970xg
zP3rWhIT>6g@z;vqYe=XKO`9VSiMfrOnSzxqh;S7E#LC<!?b|#@3k{<Mn5<_&6RA)L
z9?w({2?kN}H+p^hbNPm>e0thSJmf!8`IwuS<@&?Yw8Z>073fGF9X-7{U(tO}ripYh
z{ozjWwB759<~#Qbd2CxQ0!3M?#RSd_`DaQ=1#F9V&i&xV(e#B6sy}%~<U_V#W0MfA
zMDAIl&$?S}Ehs3@KRlKlXgmPMJ@~+-;ARKF#T*|B!)swz7rnP{&kFZ6c(wTK7JWQ&
zJosiEWcThM?P-zUzO_4j>}~h&jxl)T(zJ14HZ-0oXfk`C1czMmd-}yLA2IBDLEN?0
zqKnr>TmJ<dg2UQh!ceKAwTll%4(><xWGDn1HGvBjvAq;MG*R#0K+U$De$3qEiya}Z
z;<}GrKSS!0f0jDM`R%&9Uz@WD3I<^a9U=7B#ku2`i^{We0_Oy}K))EKrc8q3G*w$F
zTnxOpYVJh+3%`FPb$(!FX?VW!)ND4D8Rjr|ivyZZb%~Yh@v`l{W>FlMB30o3xZFg?
z25X<0sOF=_8q0=X)UE1VmfIir)3gSk>76jAonLdezQH!HO-3T%KczFQf!Y<2Oh#cV
zOyj)QO5I<t|Lkc%Zvlx)n=8>0DipRq8qC<UrjYaQ@hY4~3eD`Wyv}9xAO2hX^p}9&
z$un<`E;5X)4FhkhaEuc#NL<75d{@>vdoPqq0$7b<hEPr*L~VQMC5)yNNCLj6i`GuW
z@6d#qyKXF{u(FnXwzF!7jXgc_<1NshC8zzyi-)DFybTE`uKjl^AM-_2B}s%`$Ou@_
zd-qMd**SS)0=hUb;dFIrP(1G(UX2Sao;bV{qy?rMc_$O7)buWWyp8#UH?=f{0(tm+
z%|s64gz5W{MV5^aE6^b+`{S>5i3Ve1vhdHYpUCi;O95U_NriQ-xZ!s_sD{uI>%QIj
zXrQ7{6t{6hX^n)H`pi^%#l_g{9~{(QaDdC^g*ggN)eDu))jR>ZH|n``e6j_eIN2nT
zWr)c=(Jtz{g2KTvoDr#pxz>U{KnK2Du&)*>oP#%>L5ak@?Y#f~I?^*6TpYS68F?u2
ziCjyfFa1H*Wi#tn<bG&=S4~fNQ!{oVrs~XAl@;QqTSO%g0%~d^n05kIfy}S0E86~N
z6Ze4+u-zVhQBnn={Yu_i_Uy#rfu`s<Ht9V}fD#M>r|U!p3Ap}?B0veGcRHNHvlJ00
zsmHxwW>LrFz*a@1*Mp2Zqh8jQ$R(8XrU9;OP;#S*mMgAFl`JQL=OBzjmPJg0$q@zc
zNd)t4PLTYed!2%UOZ5&eE^6jK(JUF_W;YW!N&@i&`rQ8y=LAeDc0p-qYWk|61`}lA
z0Pz4b#>~4()<09QX}N%4m|Am`$Oo(jTVi$1dLAip1-D!8y2qNx0hiEgs8wSAruAPY
zwe-f`?%xobmOaDb&nHe&muj88uQ15_xiegiEG!7qA^G)vVSj7idshp~hTfo|Ob~aW
zT_T1i3=#na?~MNL!_J4c_G2oBq4rgtA532PbnS)r+~lq30UfUrznwF{fC7P3)_JpC
z_`#td>0dOZRy~Q86coA{R4l=u&z2fYAG!tHNnKyGZmVk{4Ru+>`1rV5tyz_LA}NT)
zubvN#hPlq6=E7(FkscW>0yka(yZWQp!)XZ?@-3xj=QR9(L=``XKb+8rqiiKw8dfEU
zCKl;=WHNtCWvj>r+*j#Q;<a?Oj4iAa9+&^&bE{%nr0`KwY_6*n?bzd;u?OTaK?|xv
zY?6LZhmu8ivKyR=$2DbrO0e=|oO#@eq?HV;1dfn>r+x8{sP5S@TK$XxngTHvRLPTA
z4B7!BXKzTwF6MtXC(tA-vX6eiC2J*{ix?f%K^W!uVN6&Z3S(aLsjE0U?F_Oe*`{eC
z+6aoqLc6{5`+om1o-sq;JWTFyYG}|JV)?PEQ;Q&?HypB*Fq!I<1uV97He3X*8TpLS
zSXU6xldt%QZG`2O73aREbyI1zHs}%dUr|*vY;d_O&cO%uWN;Xyn!^CWXrczVx3&m>
z@S2QkqMdRUDnp$t*)N&4H}@J6SG`FpIi2~kq4cC`*Q^iHnDEh16~8kYnFOTloi6rp
zMeQ^4Du~|wls;#{h6Qc*!<5|i16pJ+e8*t=N`;?;qApH4+H0c7S42}uTyvOQfPxwA
zbVj01FE8tlYCLC>%b=M>1|FZ=3v0)sVtrvmv@O*hH~J;}i{-~kDr#kShy|a>7PYJn
zL+VPqSJOo$ei-6kFT>N)L1~Jd_2>;X%G56+@oYc$=gm@@tL1>bB^;K2C>N?dDB+^0
zCZ?x*!=zSDN%+B8?@9&`U_Ad3U?7KN)~W*~Ey3n5W%Bjbfvrk2Mz4PO^}ZyA2vx27
zrzLOz4x5(Z(&>2es0&tR2Zqb5%yeIV^gUOQp$xN;a}dj8)$=JPZd&riz?BT8LRs}7
zL+kC@ki|6qgLt2Zc+|ab-(kNTGn_xB0mG5L`zK3vTA2(ZzvkfK*~sI*RZ;L<4($mK
z*fmg#b%v%Hs<`l{{Fi?C=_$PgrIW;lQRiV@CpZdNcmmPuiTFE9RW#vQvdO8({v7Sr
z-lYNFU=qgR#?10^_Q#*KDZ^~wCiFg2e@W!38c}6gKZ!)%xHPznSvb(V61Mm81G8`5
z4>_MbXk(cw{L7M_b7fGx{S}$7quH8P(~vZuZI`A84L&`;_UN|0Pd3PcQ>C%iA>kpY
z(;Y5p3*xe2EA5M%L1T==!952`H|}Si?g^;^>byIL^~wRYqp|$w$7L&-f01XNC*D9U
zT)<8jYm51a(Y4>x*2;)+x}<RCT%jp7n-wX4qH_%<cUsJyT$S6ZLw^b#qdJ8gaMRLS
z0|CPPv`m(J=Xlu_hfgt6oGRgDQ1(g4*4{N0LX>_QTugHkWTdB^DKh<4W#%BHuCl{U
zR@eA>zIPkkU;4gj4hNQTi)o>)Zl08Xh0lj3w%Vs`E}vT6&#oG^wGz#iJYjw@xG7+A
zZKpSYnPA$jcg%4A!>NFua|2a(mVryr7u;*Hop-i6yT!oB6Tl<C5|gz46e=ywCmZzL
zg$|UNcRn}}n1m}WCQ;ZNbPyeJFJDn>?nSyCE=G7YRfTocNzvvH>z}xVjjj)bmwh|?
zb+<Orc4El@-1!MqpdMBk7aaqi?4$!V<z$~=XWm8H{Iq~Fwb%@khqdI+;$N+g^vQGw
zj{Dt1BG1JQe6ZMBJ={otnBsFy>w5cx?y4pc_)HAHRwQyI3<QjJiD)N-Qd=oj4&rgS
zo8R{*;GX@TV!?6x0k_gV$T*=-ehhTtQkZx@gG-C~R^f+v=?J&V1UNoKZO1O2J>`}}
z_rAiiR`@SpgL}fa@eZk)3J#w~!$#D}$!Sl(-f7@gDE2eBgKo?7&TrV8{-0!f5jGS}
z^ivo3*eBKY6D=o-Dt0@s=^2JBluvmqFfe1Q&<r{Fh<;wz0Il|Z>**ZoarEgll&jej
zxhrvuy0R<{>Mr_9rUmy+IEn8`3jZ@4L#-n1X_(qNVGxsg<9;%rb&e6@Sn*_5ewNZ(
z?D4N0r&Jji8PvuZzN~{Ye-*N*<aashBmouqxN9TYxUa{G1WaWxP(u7Z$Zkt5WQ9`n
zs+>f<cjT^XpuihfOT?W9CKeI&DhSLa=GOQ^kLivNtfa~Asw#;G>UGzM&82qefFml=
z(zc=-U6<XFe$Lw!LecY|om??bUsGq2y)#g?|8IJ_p1|Yv#&$o)z-P0~Fy*tC`@=ZX
z{AZGrP-m5j$8HXvQk7g_fx{aX=$1o^PVCV5G7pQcWeitGOOaqYTrjU^V4VH%hR!0a
zoQ5zh78gr~b__Hiyt)4o-lhugs#~9HcXo6a>?NPnfm?>Nbm-Toabd!~)d501pC;H&
zerKW}@r_7q;?CQ{S3n34wD&P4=3xPBQ4bQz$Ld;UA|I*W(=S$a7*y&W8i}i~{JhEC
zmVIG6cqU@Kj(l;yI0(XMz_mS`n%0x=O758ZJQw?boB${sAi-x^Uh+e*wa<cPo-`7F
zI4mhCk+2L8{f(WI2Z?_bk-yBI{&BHvyDKk7O?@PXU{HrEsB6rG5H?5@97mVH@5;Rc
z*s;2?cHf`wrTMFm(y@VrQt;3Qr?oyZy^v1RT|8(;Y&Xa{t_P`8g#iad=bJlTyoF8n
z@q3UVbb1?BX7SPX`T4J_{)5;-MMX3MrxMuQwL?5R53YW=uX(_Xh9b-7?$&>$Ccmmo
z)qgX5qNG-qi^Y0{_rcc|)JXjiK7u*x&PMJum9w%OH!~h?+%(K4QKGg2VZ*W?lgrY#
zz%>D%I_NqJm+XIgPT@9_)kO1<*D@bFCj<2_$r`ET9_l7AXEE&0x}dwDS8k_Nbj|AT
zh;7O9lw9hX-4LbYj0UAbD(e(O0-B)+qczu$M|Z$o-tSoHHa2Vxbw7LEMBcJ^;hvlk
zJxUlPwV^-1Ts1TGVRh7XB@866XC2}67)0Fm5|WZHIj2zdRzZVc;bq@r*!!aiA@_D{
zlV+66K9Om{@(rHP`A}NPxpeH{%c?X2@?|h3u>c91|3@;Mh(b%f_a7bZUPZ*HNEko>
z%)^9@oI0q<dEJCma``40P)BpVI32&0BOAH-sxOIh6bDYqD<N2#ATNAoFXa`N<!hNJ
zh@vU+%`s#9M^+Bz2)5-zK!OZnuhRF9MxN+$=<`n{V8X1x*)T^bv=8cs&NG#QQz6;?
z5zqm3e0thUj(1edAs#}rA|b)gixHVz{wku9TIiFJ?GU0CY!_8)=T-YL^$`T#0j+E0
z<^G+Oo``H~2Ou%?D=J1D?_L2b-7~iZ)E<Cd;qjaojZ~(gx-^dGVpi^3h`IHoGa`cA
zNro6gS_D`_0aXv2RtjF#;AA_DUS?pTjI8zs<rsP!5k)WohcGJ|eDYK{k?0uC!P7+*
znPh)BRJPYC1t0;--?YCrH9ly7<2<BanK9>`#=mPrznGeBIQy{Zafd!g>*+>5ZEpcJ
zRAT@#YRPU`3@gaYx>DzrG!p3AG1>Wu%?X++P$R)$$YQ@5N|2qVs|DpTuTBIsgG%y}
zDXg;n*3?M^77dqQC>bfmk7#goweYb+$Cx^sj=&hx0`D;J)SP5!2zB4?w=1{jD~_Eo
z@gyS*QEk6%us}U>PE8A^z*a~_+kYYd5VYa$=a+JmI_EInu;+@d1-tvn19W7j&GBY;
zOhI^H*d=>CYp30vyNsbLTZf3RPb8u-aWGd$n?zSb%Az{e)v}F4Ny!XC_F6obU8>t5
z>trCkspAce4b;QBUY+Rdfo}K3lDXE(&eVg@X&(+43{oQ*lJxu!{?}*$No(URRT`>x
z9z?HC5F0SSnlz!B!^U(NpL+YllnQRs-wSq|q1j_@T;PWE7hu?mg4_EnRk_S02W=3l
zB6pufo%#W0bjJTjEBETasgwIwbit{}igF8vx;659H<L5oA-VR`*5hyEXR9&MC1=G|
zY-l=}J+Q=(?way7HgRe!c*x#&jH~;XdR`lXI;3Qxvf&sJqQ3#sP{E}@5zySYkxLSS
z7GdRy>3jf92%3_x+XUI(J10zC_8tF$K7YFa6mrEV{aad}JK}|tj1=K+jpKPJpupDh
zy|hrDfSSXK&mVY2B<svo01fqYqGbQ;L05EUQbNTYnU_*_7!MTHTG-e~fY-2RgQCV)
zz!vV@_D36kwkgHFbgk8!3dhl-rB*O{BSC{p!Rk}u=e0HL=W1#_&!~S&Gvli+{Kevt
z8%!j)zl|MS+}y@P$;=j3R<$&94P&x5IneHN*KLQq<;T9pw)P(_d{)5hqL13zq+Z>D
zfLi*G>jn1#lpK_A_k3_D_m?rjt|2sbwu^Y3^1cM*uFD;-o}K?O#$BxSdW|qt?RtRp
zUU_O?IQel$Tm5)zz5N9<GyV%P^&4PGw2p3CwT?#5c4}|o9v(bG<yoF1&Ui6uwk7Rs
z>!Nl3V}E9BX66H6hY?&Je@b?)wjP2(9e-Fr<jLOYW{;9gi;r6IIu~tw+`dB!p{cQ3
zB67!jnNI<Vx2w?#7{#LgW)<Q(D&4!F25#WoZkh_#;k-T-X6oU!M!*J=!z&~bo{2iM
zQ|uyX^2MkJM4{exH@mTg-&G=Vw_xIhg__26=D}WX=_pI8Uk0EL;v2qe0fiOJEkO<t
zfnsK6{*Z*8%V_KRdajirz9zN^?kc=|q)B=8NG$<jOt$O`MbR9yQ`DDyrq5FR{UxCD
z)*+9dZb|vekV2E@_I5#gGM(_|9L*JNZS9J$0=aidhqa4BhG|WQe|}M$I6yl96>S!D
zBrSNO_RZODi}e3;M_`fH$xybHrw@3STb@r+$FT#tDRlL%i^LQvziD3%*sm({@2W%4
zDoDxxs=7D8p7S+$)dL|jFFV)bC!JeBn#s%!<3Cfqv^_i&nA=cL>;~m;cR6og@6iD?
zxTGzH=@kcb_m}|4yKl?-$gzmdMJolsCa9M~URbylCxHdJADE>|BB%7b3i;2h%!kRP
z+>Sd8Avg#i#+Zs`JaTF#lz8lCxwc&#=`4!WosvtNMgFdzmX8DcJtjPUres<@p-Gzb
zNXxT5z2+!AVc72+o|T<QOuf*o@#t|Y(y6E?G)379tBM(vMlwQ!_yT4;f+&IrPCjTL
zY;UEU!=k*Bq#oMj!2$Z0jxR1A(D8ax#@DRM62xHv@hOsxa*L0#vk^;jCf`CSjtg{t
z_Mo=L2GK<J;|jsIo(^8(_cTkl%afCvgxEquPK~L?$@Tx4o3LtsG?*xW#!{wvVw~*7
zWCI~39*>3MC6GyZag7*bw49Xha!T9uuXcyueK1Mmj`k;Jx*|Wi#s<Y6fX|@%WRQCB
z`vO?XZ!0An7i_u3k|lMudL2RFCbM#68#Rtyj5W{kLT<9r!H7Mb+j(Rb%I#j|$-dqv
z&j5kKo0l!{N$;!EbQBYd9vLXGL^w@J;{!Dg2=*Or;9Ecn&zYJq*s;<T)3raSE4cqY
zg#-t_!`8t5ziBHLV012M^$*%%IDi2+lfm?_4`yp%wHs~NOs^Hs8KL4n!c3#?cuj3)
ze72z7F&t0e0yd$SmpvyH9yePlpd0nx=Zn#uv*$q`;Ar#s-t&O}>bHlBsr)aSx!UJx
zvLNs{ql>Br1M6}iC~-i1<_Q%<;YpjAa?4+mk%jb7&*Stbwk$J+fH<JBB7~rTmp355
zqD$7DvV(AP@_?SZyZK8HHJ$~P{?(_n?YzP2M^Y%0h4A|<j25UCIP?s=iID(sf9ZBB
z8ZDP}-08g^Y7kG27KpF6E3*s;5pc7#M77Rx9vieC`%aymji_!7utw*oxW+9UumR0(
zqW)`hf+`BWSoxz^29=UPHmM~JunD-CqD|zS8|^bsUEkC>1(3a2|9+x2jwo13ZCx${
zw}uP{0}Xz041jTC3X~q$-7Ochf52WUD5za_k!~>pp)b7f1^8Wk=57A&$NK_tT3>7E
zrD2~@GV=>@8O%Rs0&IL%F>ZOq+T0BP^8%C~%K=5n9P$4gSI_KMZ{T(ha$mk-AC@%H
zr%x&%w{j@iko$6L^3T`ecwhoW?tTA~=(zc1!oXZ*@Ic7G0Xdq{inO`IL}*oRc{xe?
zUI@r(2NDc1hr5igr<g8oA$yXN9=t`p90r70n_!NMe+Y7Q=fjF``s9Ob#pj2uv5<Uj
z|Am~?*4|zmkV8b+c$*)!Js<$HK5>(M1+0SC>gkx)XerJT!HI_7V~&cJR%nj!nKjyr
z=nOwpB|H~II?$CPxP0k_W{`8S7nVsN=a$kNac(tNg&z5LD#>L3WoWp%az<~?kL1k4
zOAEMJaz=G#vZ_0pnwqNic7O|f4glJ3#J~YSUPAfwDEB;*nU<86O#u66>h7~es&Izi
z;Tzb9Bsx$K5<w}F?4h$q{(W;2P0#%XM^!y>aOgu7bN5yN8(;_PBtHpcKM)8lPlaK;
z$nH&gJf+5C1{D1F*1OTwiC-$5HXS&ordxEUrT@PiHw$3EfU4y>^gCg`;JG=Z{Bgqd
zNDJ?0V01s!DP+FPqPuakG!}BLx;dGJN~h@Ki`7gBr^*=usHhr?O9?8dD1;3IUVPOF
z!?v4gp)+5V?g^^Z8g5xIU8Q%q-D^-Q9u~L_%eRz_{jGmq$<Y7&ZBg$O6#;r{)3qe`
z=~*Xy8GXBK*PAWq^}(l223hU0df|A}{&CxWAh(JFV0|In)><=Jr?sYKOX*%_ZAZQn
z-e0;r=s7+4`HPYJ)@8YL0#NNFUyI$kIR6$}66vLdK}tZsvRY7D3lAjVj>ujpe{kJG
zi~8*()kBiU5p97p^V$DdBnEm=IfBIXejF6hx9s0v*A^29rc~wcENUrF^6<nWCeT5X
zVT4K-vvHuRRg{W;CxQ>`5>KYKfDi~Ngp3x0YHK6``}%`l!xTKw>KB2ggu&)!U7wgN
z(qVRQmU(rT^bJNaw1*cy(i4+(u-|!Y)+^9hizTjQe))hVZs<xASfcVnO64?4E)THq
z)xP4eJTc#^{LUD_oEI3V+?Pu3R+Nvhf6r!9sA6Jb@|&6nK;((J40fahkzF+jbA;tx
zwJiNRd}9fjkR<(u&-siS?YA!pm(Ib+CnUo}=u42i0`!SZ?tK!FwZn8{_<j|xGch#u
zxqne-;-a50uoMjt%_MN4rlH10MXgO&aT@IOMxR&`QOS1w;Sh8`ni(~;Fu=^uO~eKK
zMG<|89!OzCa5b|^uslE$HQ9AmHn!-zGN(dH;XA+Oo|UI-j9R(M*DBR$(LBI`@s(a+
zD8EkwCfT&wXW@fP0#vA>!^w$Cy`MX#b0*^G-;$P4=zd4g0T8q|O@MR9dv1j|QzoK9
z^*O)9Ybw=LYt9EhW(V`ds0glmABb;wN=#ieDrY0Y*Bv+R$puCY8I&1{jRdAw#r15V
z%_l^<owXs`cx=SslthuHrkSKDA&y5qxMYjU(P<}Wj}IDe()<e2+^spqd|&e?_DNLF
zEi6QWv1gMBWPy?i0p{5rKRjm=9i(p(aXx=`WtWnao4!8dDY9c$wN!}Xdh+vfBO~wE
zDn`C9ZG{}$>BiX3{XM+?yhRd+%9I$~3!Tr(3TNhKREL@RlH#o)g6oOA&D&Pi)|b^`
zFDa;~@ZmC{WKBL9U}JB_^~D&|x_bAyQOVUX4>o?C-s*r{#_qAWG*Ru>$4s9+c;7oV
zwM<nUL0G2lcCQ|nZ5JAK|CS1FJZ_qel9hjKu#)ie+rBn;)r@xTOnk{<cA`Ko^``y&
z_h%vU3{(u#Vo~agNgW;%J4sb14qWw`yRv~x_6by$oUo^MU%_c#b7mTaY>ZA}BwdX)
zb~SfFK5so(p&V4`PGX@P=ql;3A!QY}J?|eI$9}~xw6hnMj=_9ilU8g>!yZn=w|ILJ
zF0h<}V}}h;Fm~ULth{%$^?Rq}s=7jC;K004-%bLLJPMS*;&e;{(!P3_-MQpKIfyL1
zFTP5c&m9K%Z3jXNTkR{pB2(((9El27F3qKxQr4WH!qvC)SwCbb3nkYIbL#oHf=PBB
z)VEP2h&QtLq*fx_=8QR`dnHO38bjM;BdM+J#a@C%IgV-&EcDJ0-QY?>#vVylunYqE
zga9kMV8D$>rZlT_z0Q6s#WqItXiUu5a{I)d&`$_ukh|P}sd%U{QLNOtnUrtCBEVWO
zer)pd3%&P7m@E}`cQ1ypK>q09b`P3$=%GndF&M_;vHo&6hnpAz5s6n-r^;9LQ5JAv
z?|6vS?^oCG!-Q-H%oOw<JG~-wc(v=+y~71+p;^)JAPgHOOU-tpr)4X){o#9$>Qe5D
zL?0L4n5c=J&jb|gINk&@dipR!opfX9M8+=HqM46oNz)OE`1&Hj-*^n5!egObubp`s
z?0SCMOMr8X6Gey8@jFzvu;7Vwr!)+M8ze|1orLQ8GBolgYt~w@KbFx`pwxng|GN5z
z{Gr@K+fx~Ttl?!Q^_UBH&)Qpoa~TccS6U6zu;q|$B;Z6N9|j%%qiztAjSX3q_4KkG
zACYst+g_yd;uRiWSx_PPKxLrDifF2G#jG?<{mG%eB;2oTZhpzAQR(dYIT|#e%0Rv9
zwK(TrGw{34<aILZ%Sjvwc{z<Xge<Kz5~9CQL!7`QI>BiHNk15&yd+fgUZ2L;fF;)n
z_-5IaQeJ-Q>HJNX!C4^yA`?fAe79V>f7yOWigmwAt3AF<_U8;yuPvAGILnI2fx7QZ
z9g)>-`L}O-atP|S;gsJK+6z)%yQ<sGtDc>Uu0vPR87{gA_T5sYIVR0&`3VnF#1~|L
z@F$0x-(BB?3r>sQms}3q;PiI{H3Tgnl#W%AxBfo!6M~Xs>e?JsU2`RNcRow}Wg8Ld
z6#v3IHiPPHmK}Q}q)w4XMMb4M8;QW9>gUg&wZteX#`hsnHY|%xwejAuG|EnfADq$u
z7Iq!!8q;!@Y1E;xGnUXZG7cdL@WNX;M~C>*g0s~YLng?#{@-cZGU0A*cCcNdZ>#ab
zpiyE7G~5NEZ$u^vkEd_-epNe4-8PeCKRj|eL_`&@Bmzw`nL^g}+o_#Gfs@}zh@a~V
zU#{ruZl%T-1`dM$?>^7Oc5kIcD6>u;&#a$$IBvLTYz&O!fZi(*v#YB3ua$A;JVnGG
z&1S4`Mvk+)^_Rv4OL$urZYhO}+CL%4+dd%O)p9jBpYQt~9%2dTH50=p;v0AlGPwgy
zuEA}!xgIaqC?2^}?WzPFyY4X^qsuOHC_p4f6x{%kYvYtEOH~Eogp5s%@bT&ko^DVr
zk?U+Y@PU=<G$$aMos&#P_0>d=gdo~|z00a@KEeE<DH!}bErNJFCtxHb46&EU<SoIA
zFzwUb?P6v2c0#gPV!IS*g$arwA4EKIeMeL#`W3yGvbevG7<1Qb2oFhhVfSPZJ2ccN
z1+rT@jAv?uU^8&0&`)lnd^N<{Zg1K8LgF~O$Of#nz8R2#^B&x5TUucJ={qf&oU*F6
z8%pR96x4mcpJ0@|_kOFQa=|_d{xaSDXHkWf9im?yY@ca*$7Ra(@5buhkbTc*tdypE
zg8|)%;hRJ7bc`tMcvL`CA~pZ_D<|KBRYH(Iepj}}ujjoOfjD{x{wk&OVSHICx*~yQ
ztvSy1O$X*O!U@WsS-a7oHxwPs$`i>-YVC@a(fXE^=s|H-$KAdC^at_lTW=ed;cBy4
zMLuBf#vl=hA{Rc98_wWI_0^w0Cszmo9fA8guBw!h{9ukrS`C!c-^r=f-u||M^}n^+
zkLmdnct*VU-4>CBXtv%>V)csX!PC*t{STB8lHu+7T#zrb+x-RY>0idG*JP31@~PFK
zrz2Gr{Luzov0P8*x?n3@Nbk86dej-QugKiLgC@t>K9HmNP9ntnkwemP69YsFTen%a
zYW&fTYVsuQSTRS1emuEl#GKJcWmbRxl`!M}veRvC(Ah+qza}EX$QglCFw>?|jvT4l
z4?QzU11$P&^hU~eus7Q55yvX(mE;SA{^%d6e;0{YXw{kZC~CKBD)(;p7>|M=V(7HC
z2L8M;Bca!_z&n>EWQ)Q;_CT7Rq0Q>{3k~jG5AGgNt7PAF5O5^}=-y$Dxpl3bd9WEz
z!_>QF`!mVKNuUOfF_-_bdZBHc6dLh0^z0%Wrt5_b(8c|(#_eK#DoaUO#M^hzj`=ks
z3ywoo*v~!%i`^h0c!Q0CQjR0<nR8M|Ra1kK#WoDbhO!=*;!;u<3R@AnVWJ$b+>5!V
z>}AOGS#zoqvK0IkYxeZZv^6jV754UrhQ2TYU5NW*5|O*}%Nq|NXT8tjADlOL2n>JL
zV_QH_bZ1swTUSrN9gdCeeDCetofk+)^?FUpM6SqTH4>5t+lM{~J(UGj?YJTJB{^Af
zmLUV3_^rW6I@g>&*|8{kasGLQ4He?QSUh&htWmDmks=3`+tx+`#znmTn;nyum|{|J
zRX!E+5$amn=*h@wEr2-MmzLxJgZwQ4StpH^Y4n-4$PAajU04bQl8ECeE*y*yToCm2
zgOIxb%WitI%>d)M7s^<_#$wdf9?s`}oWZ0F|0WLNF0=dORL^FQXkmS4sf9Oo{QgzG
z`vgnrFLLay4r{R20R+%n_Qmb=@f7~Z2cucNON5JyONo2_n(*W`CgHuc*6BFL+KAvC
zXr9}uj*2eDvl!v`aP;7QUh^|5*X*Y}@29PZf!d%d_L7%SJDWx;-7U~Q#;VpZ<Q+HS
z*Tr0J8|l_|5=?8Nr{_0x^l3rL1KCHoDDZJPIbrwrn19d2kq{nco2@Zi4}BLrGG!1o
zTgHcKOmWjuk7bFGcwYCbmwiz9mLv^8vI=<HNGI!|G-FPMO=DG6e$0CHZTGzP^d1Tq
z$36I?toW2(TU%>4vhJjAYD)ba>4VavtIJ}fOrF+pOCpI;|L@AyG_&@mJ9^c6&3zS<
z&(?wKTnSSiJC?sV@myj>gN>lXu)26HdGQ)wsY`xV!Fd7<IDay7Z~X~L>ljEEwyrsv
z{L`sD@mXEp3v*$m8YAW++K>uPSa04Jd$hv=Nxdq&FS06AVr%*}&s0edL3*rYWM%gk
z`tL~aumA|e6QEFHp7d_KoVQlNgO9hE)H7S)M0_Z+_vt|4_Bn8JKUo`jsjq)8YWOsw
zUri(U^!bLsmkRN#4eQGJ{I3nowrig;7jy_@eLB_$>FFeie;f#my<?bYxA7?iVxppC
zD5zosqo}XG=(Ien96i;X@vywfM#%Vk(V`$_#5y7OUEJyK&H$1P7$M62>aED$n@PZ#
z43deYYz;jAETuDMDhKx$OR!@rLU7RdcQDHH7ltj^ceWQ>mX=NU`PDX`%yRtxw~r^Y
zI;n;VidoJ&LVhv2qHzSDdZN{o3mDiXWQivCq=CCioXJX~Ak)%f(1K-gLbiWF{liFf
zPIS`nM~1eP+mBw-a90m^_|Gt*#z%w8?qRk<^Q@t2ZSTqlQ)ME^;L&+FvjOuaB+;uP
z_~b^w0sXYWB0WNeDndfw1xG_*;?xsL1}q}RhH6ebYS~c>qRVlXYX=#UtmdP~X@Jig
z?MTLHns3e{U2t*JIh&2Kx^q?b5PP(O260KcFBmC?A`@ydwNWBnVfNk+asY`Ticf$K
zhXNOEHc88ue9?{|+xJMx{a8)orKvRItcC?HVmKJty43N-wOQMs&iCOqQ;!+~P*j;l
z)sMehLv>F^t<=rBmk#6^tU*KwADbIiw)Qxa>;gu97s@%+V%N5`5ET{{YIgeiT)MWC
zx;7a9z%}|X9mNcg#p8b`@^{V2-{${=+SUCI;v}2wQ<3<?l1phw*hO>3z{06kuE0s|
zH<bR-Gw~Wy$@ptVf6*1H)7^&n@w(?NT|{84-+}n`p2m@cwlzPRcpamqXK662Gid}w
zgs8`wKHKx9VisSJy8!gWfnUMejO)RVrK`or3=MCx1^tyQ$Av)R;M;|cfI`2=h8sLz
zG(=OEy}loZzfZVxIyy$}E$m}gN2@ZselxCMG%Zx8e9}-KNPrWq$AH&=Xib`Z`eg{^
z-+vf5WIBt-lOBOCeA|4cnI1sf#k*X<mpjNeckt)iZ|3UKl9GYJ&Sks#dSq&9>e+9O
z#+M4#25&~Q<dE?|hy*y}&00+~^lTxj4OA;OdrII1h@!dm^_;C$6U&?XmBY)O$xK>3
z;ITmipw|B1yQ`CPpx5lo_E$_4E{tsv>OiU&c|wfRP|q1fEw-~+pVR)ni!|IX^Sx23
zE_pp2nWqVd(8b&v!y%40UF0W*BQu@BKYpCCI#=|+L(-kerTdv%Bvhyku!mxfWp%Nr
z7XFyD`p8D77g5gbf$nt~|51;9wmZ#6V<`t``8T;m_Y-yN_uuE?{6t?UQoNjog0j<g
zq+YbE7|q?j8YS<$;9fT2Mssa7By17nQpDyUQhe%7Iqs|EUJ0d$h@(PIpfc)>CHn-)
zH<A1-#n){*<+aRG0A~L-jlO57cHD+LTxundOJNxaw^RWS1sy$oH$=Uxc4+SkMiZrV
z?UUj^n6F{_hc*(Nr29KE*lIcaSsv;DJUl!)W@geEM#;jMQS0gQK_MY~`Hp2lK_9z0
z9rpup%}>E*znqR*z<JTM5aQuv?#gT}D8D8ZLDg!Yg?~v7AwCITM9mn!vwt*lo=({6
zaa1_|SigUshTy%=|AdH|vK;L7G}E~uZaQQ5#4in{>0qudCR2;%n5q{EkD6?G$@lQ5
ztGoAl=6E^QBQnMCf_uiL>}%Rb5|O!`J`MX@^3#{255p#^^Lbj)n-R!%DWQL3bzR2!
zb4u*G&K6?U9**Ax&1}dX9V0j^gyaukO4Mlg{;{rXfoWawCN8qfFD&FL7>%RT5|%i6
zJ4TXhJR}3=pz(!w&N>#pbaj!Hdbqj0*jIV~o}ESRNHvL5P~O8TKvyAAZX)}~Pye)$
zkh@0`5X}nbu4sD5T`#GwbZJQ|N=v)JT?X&W@<ja%8|m*aNl6#Z-jxAMn?5cWIf>(t
zcU$D>b)#2vIy^ho|2-qbf4y=cC~68J5Qey~YpC2%6ab~wvaW*8fDsMhd+mOWW-GT>
z)50mqf3O5?=Z>bQK?XNqw%E3Q<_`J9*x_{i^!=#mk~ax6pN@txh?S}8`@0KBfUf2<
z){T8SvRw2TZBK1-e`%+8>LGA_Viv5uz}`{i-a<}(Yrnpn(XrUlW>x25{C!@6OW0NX
zJ<RIzWK!#q&a$<-f|nPZpZh6Pj9P!S`=HB-r{Kw5`!Dt*2;{OQ>aGX=%;9p;1C*vd
z|2*|HO=VQ5@w@QV)|#U~r5sr&az>|GHp4$D+!`26LDV-W>q{#uf3&#lXTD1>7lndS
zqY@@3{^U-+q4tFhe;OnPg>0h5rC%9d%~05WoJ?lcW;OnS26Bi`@=AsZ6@Tl!Z~h(}
zBF;{67Vfscv#Re&XUJ}OzVH1QZwY=0{*`!P9ms+chC$ks@cVri^4n0_#cE+ztV}@y
zQE78$8Cp`MM~c&S{^MRHGyQ0L@&(Isq1vHIzTZUn<nB92PzF0Jt+;pdSIplpW^|(^
zL2~)=)PzZg)n2A*qdUwm`WGtJ{du0W<9(kjXwAb^pyKehcZ~H81)X}PUJxsN@-hd{
zTH%qML}>p#0m_-rH;=g<J2xJcC4aY=w-k0(sl3+oi!88uRfygG<RF!>YeG#g5=}0Q
zWZCFAfSoZ~7~(&1kespFtF@|7>eEu^n5WGc85x-~-FkS4Qm2NEc9AI?dl&L{f4V~4
z!-Mxjm7X9aZ0#c)XHrtqY`vxGrCQ=aoEa&N6g_`&KH93FQEn2>-I7FRQ7yFVn-m3^
zG7CsaiHM{mJlNOJBu0(=wl;5xqMM6`3NWcW7(ktD80W4^+>dkpZ%HCmKhuYUeeH~Y
zu~4L#HGgQ0wm>+Iw)fQ-i2MwGcZB#XI!Iax2at3_&xH9F)iGSGW0H!Y$3iZ;!Ks7}
zpB<m^|F697{-^r=|38S#vd4KLjy*zgBC?OnV|MIWw#djzW+6)UI+2litV6c4C1h`o
z5G7mL`*W4v-+$u!yg7b4=XqV%<9b|=>;AZ3kL&T+6h0KuXZ^}o>ReJMxY_)5zkbHM
zv#$pGV9A8ZV%t}mDGT{)q(|En2hR{;cIh}HP`+83uaJ2LlEsPssMir`sw=m4JL!~r
zs@Gxs9(}Fc3WYz{H<B2d+~2$>?g){WQ&zr2iF$7G?sJbodxC>{<|@<_NH^L{XJMbK
z$dUY`MbR@lt@KxAPj>^et4dfPnf4M4%gy_>Oh+=+klLJ&L=~F1_OE<nALF`=zdE#E
z2<k&V#MM&$C+^Z7$E6NPkk<eSQZen|dl3Vx`Te9pMQ%9m<WO7#8;q-1+h5UzHs5O~
zx9(DDRHiAZb5&I(*>8x|p=jvN6ZB2<k~yBPc5bV7p2j+k^5eXwo%`IL<lygNKz?m~
zeRIJ~)%4BM)g`;u$)-H5jDW6vVF1t>!Dp7SIx3Ln#&`Gbwno!{+s0Gb2Rq%Pyk^Fv
zNOeszrmz)scmDZ{{OS`@ie2Z4_x*=9)l?2qR^cms$2k>!`ZyHG9dp${>y{?7^vl<K
zWE{!NU;4tuK{ox9>IJ<e(tgN9dtOWO9y6b3tJnH*S$ll+DS7`12wbh&hmihO(55BC
z@GI)j+M|h*)x$g2!^T&`#utLYrfl_78}%pMLx0Gcvjy-SL-%J6B6fsJ)UL!7Hb8>%
zbe?RGP6XC5?<n8)H8qZ~&P7J^wN%*j1!d;sSbxUr18ja;%VSSZIH+pwQSQoZBV2#B
za%YhLgBsokcx1mU!2?-F0fMb1ou_A(uQ!rW%T_JKCef9VLnOuIMp?+}<>CBym6fyr
z05Ns0v^cuBd;)L8+V!*EGceev%Ovo6VIC1|AX%z7wX;Dr5_d9#Tz-~Q>>w6+G@Rku
zbG_!(PXB;^b9eVEdFLi@`ao5dP+D5LF~)|Tj<)Iz((-~Uwax^7B!*~TBblFeI;RWE
zQq*5$FTqdzz1k^I?lb?i`0U3=37?-6T-VmV`exiC$}G<MbmViuT)JXUK6m0kL$T?Z
zDe3_o3#yypH#MipQlM%qbUXO%_H@N`K5>!6l<s)o)d|~TP170QAD)yrB*?Xv?y5`$
zQ#>iz+^>48pOWN?;XO8h!D?B*Ip@waAVz_KDaLUUhfLV<)~%@TTdtC!Kvv(-DV584
zZlbTc<iA15wK9?bSZV(baar9Iziu)H_aeT8Ka;0weV$kuZccPcVt<S4G9JyL;d|<4
zj(0n_1sm=6(B`Yk%XLncmoskrp2<e``%Bd)dFPe7eq?sg3m5vdxo^ju{9ZBb9&fzN
zK)p(bhE_Fa&7h^?PIvH0AJg6Gm@%i<PgvJ#0AF};^S5-#rH&Svnpp&VFi-w=Qwiy_
zu<Z`RPG3a!;9}eij7?SXNgNpG_4!*bvSV0BAjw!n{e9<b{=<V+$XwsCv08l7h&o<j
zPA=7U+mfK!;2QNW(M>PeeNr=SxlugP;3EXH9cg6ft(hn@xooCpR<^+}XQJ<H%~o2g
zHccW|%hrwYx|g=Uy=~ZR{n05v71b_c^P98QY_HogtW<t@q|_0;7`M|!U{|KKH}!f(
zQ=0%1d*$T1xO@iB!1Ueay6{!e$ux4MTkN8>d~HhnE){LbfT<zhU7B#Z_DDN4!bpC2
zeD1#3tK`To+eYQvo)3Gz?|Wh~R^jlX_@&Cig~w&;l+~4$*39{rsZ#8UR9Q2^R~gNu
z)s82)H#Gf0aqg|-(|1Rru9>opHP<NI&ND2_ya|FD@aDoglxpWARY9h!U1!6?(ZH5h
zseqehsSu=aP%5xETOpxk(7Nm7B(-sL+3xUmlF5)|B7qOTr_JhcQ#;*Dp-s1Q+{v5k
zDdKkAq-5JxjFMu=XRzr}F)z{i#%EB-!HC_ady>Vt>BN7_a(v=#&-KwK2KZ7R_~d2J
zF@99nyMhfpe_Nn_OSUn=vV#TGrDT#Veez1zk*pc}Ij2QGOwt>k7$faRE*FtI)t+*T
z=#N2C^cP!7#l5n9jBjWRKPPk;;L``(Wu>>50t$6+oLQ|YB9JA!#XYa~WOQ8sYd?Qt
zzIjmB)JP$FI4P`i))g%mYr|WSMt-d$VE_IT=S~-%0=yuV&aGXeY&Rz@;ZE#GCfh0F
z8x}PKyL6nvwspn}`w3Lml3LH&g1eD$=fRKS&TJAD%1c&)6bH8lmiFHK<Tf^3I7!SB
zv9`v=h=7Vp)Lwc5Jr(0ytk0FN`H_P9gU?HyBz|qEZZTY=7*xwai9FrjXkgo+;yUYP
z`R?Q1qn{A?jiZa(xituFJL|7ykRhAQpygfZKAUTf;t=(jZ&W~tRa;+PzoR)^%edu{
z*}tw#BT(#yaTHSunJux!*`w(NhugLH)+x>pJ-)oX&B9vqpHOlzV8Vp=dg2O%@p5)C
zhpp)uO1~=k{E38RG8jS$eI>jbN#Y|(*}AANspDJjGELk$YslBx|LU;xL|LG*uU8I%
zK!}wSojhfzJ+<G7p!r#~CqdY4WrP(ZOY(PXzIC$HuVWY!ZOH~H>!hgZX$C%kW)oQ(
z6k0H<qCW~qG2C!82Ax26?Ud3y;Jau0eYX@68hu%Y?b`G3s}jj@0<EB7)Qi8*UEK5H
z;#B=b)@l$cA{A;Os~^!LB%si|8QQ-Z+_`YKw`+fBbF5Wfm_i%!bA@iZ1Le&|xkX_w
zsV5)sHAw$<Ltq<$CMv3!xLhTFSILD4_UFlt6=!dQHt~3nj&vQ}dTKqHh9_+Z*q;1=
zCSU{fHB#@806~C10E1kTgK2mihc^2#&?cDJ7YKy@P!%D6@6S+#lGk>hxQ;W($j;Gq
z+a_oz0ag2wE=`aps7SlMf_}@&0U!e?np5RrmQZ|+?GwuP-<aAEq)zpq@*(n>qvOlW
zZ^?8MAMHu#K_zYzSuc;WtL}4iR%;W7^nDqRf@SBF0?diQqy;qnA$QHo{tL@m*oviD
zp4IO#iz3Rr6g2$Z`?Lv1&@?MPm%GRyloH+<aP0R*Fm-H+-%i%=V4S$7dh#-%(Tlsn
zhO<kyY<~p+Wuf6J=0i5cv4HHM+-eFpQg;2YWxrVNow|`dJyfEkqa!@j%Piz)BbLyc
z1<WZ~n~2C^g~HEzqMB*KvT=f@ezJ^yf|?#e6v}ODU~nhj(uRVfy?!LLP*13hCFAfC
z(KOGO&F#IqZX@TJ!dDomYSmcBLNSK@qV?-ELdASSJJvYYibe{V<ojATf+48L2N+?z
z8~V?Ze(-uVy}uf7XZqSf)YfVxp$B*!z;I+axz?F(`Iv6inr->uT$zZ(%z(EvQNG=~
zlW%3HczS`n{Pbr-v&0_P6gL{&Zup~3DER*lNrL}M-+o7<7MmxMV~2D7VagM!K!m`f
zL^N!%!Nv9(PzN*3UBky)`VXXCjDES*l7qZAZQpOsc`*ULOh_@j)CnMuNZ3NUV&_ms
zNN=bE+pSz7)BQ66w@y2m+m*`iT?bKp;GA$?T<gw)X|&Q7(gpwTh_eUbI!PN|t?2AZ
zKJ8IE<M|<*&vAgbWtXHd6%}C0qw^K%osL}M4}7N08x-<~EL0#SA|O8ScWp4!G~|ec
zUHRa_c{*)`0>cce{Hiu<x}B(g_WZp5Y&N|Ij8BE^<%Me5Kh{XV=9zfRhvjLe-5w`N
z7LrdwsAOA)!#~~$%2P{8Om3!$J|%UjnNB^+S%m^;j<W|u_=d2}S=t)vb<DgvasZo_
z=VlZ2xP{+n%rz0**x0Ib+4@9OiSXm~x8=k#y~sbSAFTf)ZUsZVxPV*{dk{)GH{oY>
z=_5a_(R0C<e=4mI%l$0BEC7+hQcXGi$}j0xNV5K1L2i`97Ey>{_A^=>uin%=zokE>
z9axMoF7WAJ5x#ne*EX-u>%J+<vUxsFV0K(h=GS5wdogDkDa2ODyMITuCgV<z7fp(c
z&E%)PH(6S&3TJ9eY0V5~nSQJ6-^P@ZmwJLZl`g!CO(Rkwpp`R5_1ejJu43<veyDyR
z{-ZB-a=|(9$uHrFvnK`xjLXfA|1$~YRxmL}uU4oI?bgP!{-rzok5OTHY6G-g8%7*?
z{11)-ntSs7e*Mf--in?*vjdT#^0etOOI0jPC^+2G!)j&P5i*=EU~!!N{>iYmeu(7L
z1CaooUm{87zHrjt+nJwl0ysA~pdD>-x_7njREz-wWBGsULEw)llj`TjY<$c}LAzP1
z11J&<OhFUQaQFQ3r{3MoZwj0%JD73=NeF~l!bjrNr5Kadm;f<DT?aQ{nmQJHA}x(h
z$*%bQaiw{pH`*Qtk{??HQuUTl*<)rj8qS{bNJ_rz^XFhO?D%WF!Tfn?6gX%f;$7z`
zC@*qEyeQ<;CSYXNB5{~F`3aJ~-4{WU1%c`h*mDT5+c8MU2_%#Mrp$h-R;w5?zrq&#
z_UZ5a7geK*l$XAwu5`ka8Ja5uiD7?aw-oS)4O<bu(EJ-*<8A1;5jjQ(cc^Kqd{mk-
zq2nBi9Na=(OO1cgv7o|9-8XmCEMIy_=~Vx9WnSyJQ0kSLfmjiwVNyiP5BMM5qY3|7
zvF|+Sl2%ekU21ag#XeMWfGL6iVog}C!qeFGzY$e0J~3PX7vPB~80!Hm=r?Lj1b_DC
z(GaT($or<)DrRL&FZi-Iv~U(5TjU6t=q+2)>{q_SstkKedR~lYFflTg-WuOD`O4?`
zwqLF&UmfR1Ha2RwPP#sdL+t?15T$sb_&>nOR32MFTW%L{C?myoGKnC?hHqHL+83}I
zWYgYjYE5-Sa-!ZhR+PBnP!?#IRmX0J$aOaQmmF5ymgun{0jJh^{gZ>vtJziam+mQ5
zwn1;ed#_IT*HXnA;zQWFfTf;0#*zPR4Dmp#RGd*r-7JOHT^^;&-rMXqvcnDX>AqEI
z7DKMde#Ssok||W)oIM^Mji2`}DsX-cW7Q&rd)ZJNP3{~4TFPh3?rPS@UnyqNNv3h;
zNo$4LgZB6hSJFK<gu4!`V0|M0mY87oF7{JlacaU+h+xFtZye)WFs^UEE>V*-unQW5
zWCs|M%x#P$Dv2r@iiuu=5dWZSfwqt2XUP5*_)xb^hiJ3QEUK7^S53_PYz8u#ee|6w
z#OQd6DGAR_<7oelMNcRk&ZXicWJ)^{E?s~J8P;f4EBzWjT9y~tz+huUG8a3N2$u4j
zQuG?G_I$w2wA!D+<+xq>=xHWE=O%B@IQG{4$(FRPg<IaenVgfn&b7>?BUdXOcFNNy
zHT&PkphvEHlvID7D6=}|F<|?Z*a-;IP(#9QyjVerM9Ryvx6uKj0#3X$L^-GtpeRo_
zK)$(qBoNoiYC`$m)>UED{ms(x8-5$62Pg7h(#1QMi)gRZWG|a?Xr}ovcvivfrA)|-
z=(D<EV!fpWsus(FRh*|8-Wl!z#7b>-hB-y$H~zLy6(vmrD;~T@LNXk}MjZ2eUj5;e
zFsJJY0{(kD#K(WhWLX?=;kgMdR~(aa8e{$hMTFYp%V!YP2(=BMDW$fF`bN+P+a7NQ
zr&z;b(=9J~Nn;Rr6`XSF`U3@#iyHpAHa8yRzMl%a#a!a#_LhgfCBIJfVhi|GZF<G2
z8<0CEl9I3+KIC-SHkaXTH|Q{MTqMse*$@Ns049nhli^4AQN8W-5bZ0L@F4|gvw8Nh
zUXR+%5OmTWZ?+qa?#&le@yB_2YKD?L*JADQk7u0~1m6bnt7~&oMZ&nBL8&l;o+W3T
zlXURH#6ENXhOfS?y`djtuW0sjTx{~I+z6ZAZ$|8ClG72BdbBBR_sl%NcF$fG_92Ma
zAe;UXt88L`XdC+1^_a$U^0_mvuJh!b$NB%!Gt7qxaC;sf1$(~{;ajYD1}%5Wy~%#P
zvd+<5=1Rsx`C4iGGWBu;KixG9)QCEhQxVFye!QO{5e=lV<O^dl>_Vzvtkk{lj)r<6
zm7pB3Jb|4F>;;cT!%8#sN&LL%lB5pp%{5XYD*J~U8clVP4P&+XM017nPq0A2fy)Cs
zm~pI`2CW4v8-V+RoP|!id2hY?eTPr%?N<<|hnfd77(FCMM3*D9yeQa<6W#o>p&lRc
z7n|Qia9&8@syiGYaabf7$S0x$p(Y_qvTmU!G#s1DS4b~@^en5V4<bb`J~;7|{ZxpV
zcIoq(sNo^r4BB9gfxee?mvIOpr>MtcJoS#)7tR}Jzo``x^N<&}-DkuoHh`m7s^y5x
zjv<TVRfyBo%dc1XY_ON^mb8*Y*Dh)Ns|HtmvE&vM`@-R&>@1MEA)9t2S)R>gkvP}o
z8UnfqZtd}b*V!@if>F4M1ga50YaSRR)l>d<NXk<II6frhmChwRF!9;EDHR~HjhAgs
z2iO;0awso3(^kXi%MDWsWV*zjIU{?St-)trnDvd4iXX69>6(e?kDYz*cvBk5yb0kv
ztJB!y0i&g}xDdaE8yoTedZknv2Jf?M;I)^q5Cgp;@vCztFLmmDqud8u9A2|6?Lv94
zK&OW`bvtGpaUQLWT3;p1cr~WOb+71)zY{5V8OE)shsZb32>jr`c1QA^8`!QZ7j>sG
z4-Lj%WDk^`x}S;3zW&JYshCejB|B(G`Y5+Dx1Qpvrf2oMA1>!#C&E)!pRMl%SKAn2
zEh&!VYc|WmO_I|;l}2)cu)Z|sdD;SI-H_q{Wod`a7gr8~O{U%*^n4wD!KLwcllDr&
z=`WUI&}Z#0(ERMN*tNhso69luTPxFc0|O(YvW4#{YTV<g|5}46;W|U3;RQ=6YUD%@
z*Hu_i1cGM5^QR!Lp1dIrW<<jNl9CXQ0!i-Vu2K`AJPD>;VjQZp)X+=pbMFiKTc~=n
z-;B5BSPeUlwJj86Z%m$HG&HbbL|-yPqDDE3JO<8xjBn>1x>X9jn$+{hBJiS+={}wo
z<GM3v_;&_|KA--x@gKT6uzCG0dva;Tj}&`TZyn$YzsJR&TQ_v72)e*<Z*^>RI%fFA
z_cHD^y?fZX+Nw=Q6=IpSu0Y-pofgWk9&@Q#rF3m%idCr<{~HNSJ-<9Hp;SRENC3n#
z^s|dkyFw2oibZvcp+a~khL0sV-0tkUS9!dd=1FsMsBCQQRsAb?v9ra;hVe2JGSlvO
zo9XM#IEMh*0^N+O73b}~m0s;^s@d-~DoWJ`;3R06B*EJIrJG4>Uwh0PaRTaQyerP6
zBcT%HDEB8{>?4WE@W0)l$?vb?_&Lg^2D8Q!5hID?m7`Vyb#{>D1W@EUrY{cR2hVRQ
z*5p=&QRjt%tA>5V6cpE79$oc#@NaIE>lnu#|MyTio21gSJ@YdREczCb8|2ACiaMMJ
zGo+BrZ3TRK!AkE>gG^D1dP+ah<Vw_;+?OZnx#bPk&S0QH+TeJK_2vFvKAwfyijmN7
z*&B&Q;ZDYxPB1Z}FwTgWOMbTLgk(|aH#A7=*ocF8LAexQ_v3op1>!!g#Ph%OdgiFA
z@%8Gdi)LEy0Otq2nODz7k$vMQgg$c*({~wM)Gm<JFTNT9r#P$LhJ2GO2Wnc?=e!Y=
zc$^w!5h~18yCt8mGGddhi4gKZo!V>>;KuWF%!|Gq9^^9AI{)aUScL^rb3+OZYuVHn
zb$NxLvA5rldutJ%Hzog;ml$iQr6ehQckULgoorl=wP8H{#OM3t7_A|;JQ*7&$Edj~
zf4j8RuXjc&#X^QN{%temrescP1Bw8Y|In<36N0+Ev`TDs+zB>#k%9^aoN|zMYo+H2
z%-YjdQPO5y{@rRvYz>Wk51_Qb<d<}!+I7E5jJya5!c$CK+Y+9%oHCE-0*poLs&0Kq
z(_$lx`&?CF+(y1R2u-rjNsY{0=^cLY33=RB3>$n>x(dJ8<_8ZFoUD*FKU)1>zYXoz
zw)#oEXiZ~C7GyIO8`Y^jchi~}Tf~mg!3wZ(N~P&=;<;C(VpGzN5}?*#y1{A8;HjN)
ztkgo-`tj<#w}6+2Wv{vW^c5!vF}_Am2&6f;D`)_fn~46#nR9*>(ndM*@NC!(ai3(#
zDOkbka8j};_07MP^|WX5k5l<BtB@u(CE`woi*Ri5MDR-+BgoqMwB!SaQOdnJV>L|=
zHRJ=;P>JlA+n@U|7;rRcIu$r{#PZR1Q4nMeF70*@2#Elti%A}U;^NcVnfx$_#QZA{
zrqts;ZRxraD22oE?1*n`(<qB=2J}KHOk;omj8+sr7tPz7F%kyMxR4jC4Oo+8Coo?(
z@v@03Rddb9u(Qcg>VdT^nwVR|Cs4KPylepC{Z2g4WDH^CVq%`(z)x8f`wUxDW8<am
z*D9cXO_V`FS{-#;ga9M)q~VD?#6xy_OgrgJ4^(Z^Y0{V%NImp_^CtSQD)5RgKD6mg
z-A+$&OJzW#tM9qH@1{RaBC&T+_mvt(#XVSmxl89@=`o*%^9Hq`VyEca?{k0Xa%3_?
zZYV|)bB$U`KB#lXlaCI2S9KlWT(h`qBEX~Un90Wild$9zZ%5Xt#qwU}ORw@@1Ck)+
z&R5JQZ|pI@9Dm0vkZRPtLp5<IzhuJS3DP{krJ`#96^)4`2Gh%DQ%Gi>jBiA3RiGQ)
zQ=N|~(LGkJ-(kJk#y9$o+kZvxW<hE%^50ucz+2t;Iewo&AEUf5?79SNJ3pEWt-Qo+
z6`0%a(1F+8?mMH|aqP<c@vWzNzj7Lgrm|AG;rg*OE!YZu+)X?`oqfs-yO4=@M4iOM
z6<<%rE*ZF^EYU>l&*ma_P20U9FzL)wf!s?CfCH#~Oc=&3MR%(&fOAI%UMgZgXYbsD
zJ=Jcw0>8ga*2y$-q2)-nI@+;>Tp`*^yNf$KBo(60xl4F^Y4-!YwuXc$((n_Vl5&JJ
zy1d(*d0id$c6fLSVue^o$UQUE#)nut^rkVthBow=^orCTQCrw(UwAPI)9&uj5gT72
z@Z~sl7mJ5M(kU_=V7neJyIIUM3-IF|Aws;esilf_h6UhPlsr=8M1Qt7pRoX)`!-s$
zD!QkxM2Fp>pKtP#aSBL$b3s7x)_hmHfL!t0W-;*svJqXQ=|Z-B_hk0~Km*w7G@!M(
z8_mC=wt%MDp(7%<j~oF__#26dJ~s=8miyQi%m(+K(JcMyo(AQrV?;cYXf51ddLF-*
zg=fs?{ndF`w|^b=n8dSnKEjQFU3%60Lh7e3bYKbqIPw45lq-!ki)!meGfxHrcE?)v
zr^n>pS?!Ym`HWyBz-fc^Nvw)cB`Vk5lGeUDg9Q#cr(RnZ?u*qF!M+4Ini#8ndZsx!
zKjYX=2l8nH%VAfUcNp9Myx+N%33^&RiOrv7w+J+qZh{<ZrG@vG+y~z~JQ|#$XD%UI
zuy<CE9^Rt25LUJ`wF#Ktm%SNS^)c?h3!7;V>q^9aVtU9x7$t9j_`<ihFMtL5kVXv=
zk@mEi;_S!Dwq&QW->G>|By}Z2ehuHX`cR?FKh9Pt;r{j`Y=EgquW_g2zm*@s2f{PM
zbm?MA9||F8m=Y<MCrl;sC5~VBaP9f=oAw`ve#F1WiN3&)59}RYW>?RD6Hu%(qqa>V
z9rPVHk$rr5%Rfx)0u9la2tEJno86fR_Nu?#5ipYG(qXZe-byFB4R9Y*&HD=Oe4vAm
zAE!+0;9{wKb=$bI3lPo4MJ3?G?wd?A2Kd01Di_Lypmd*uClL6MLd2AyN!U-7g3qNm
zu!|Cwk(5Z1NKSba;&XP286Uf=ICPvJ59d{wfzS^QxsRu3OU5hM5*u=Vo(IAYqM9&u
zBo~6?eL$W{P}Y+Ai8_MoD9!z*tL<-#;{5+WbVB%EM%Hg1&RQHyV-O*KBlC#{ttQRN
zOhDF@MNkR02#Yb4|JNsmcrq*GB(IEkzFbrwm-knmMEnVn=bZ(bX9SkF^7mdwR_W7O
z{$ly333y$he0Oqz>Dguwrnwcp7=LT*+w9k;Ur!vy+0rjM_5as@_b=y{<Hsokev68D
PL%^e|q=_h1unhS>A&ROf

literal 58510
zcmeFYWmjC`vNhVcySqEV-5r9v6Wm>b1b2rJf;$QB?ry=|9TGHHaDNxcK4+g_a6jEK
zR@2a{mpwIWR@JN`Qdv<N2_7FF1Og$wmyu8ffgo)`ATVWEXy6mr@=JB#4bDkM#}x!Z
zz<7IsfzmT^K_GY~YjJU9Wh+NFM^`IHC(`%g;-pS4juzH-<{*&gN|u_Xy4oSO;O)kh
zm|R4_S2;&jELc)ivFHHI1ZsLRI8@mPvb<$1l^!%nNl1bpxe*X?aRD({DvXFx@Jld1
z$@1fZzePliJZ^h`vt4Mv-5Y;tSQOeXzs#zegz1JwOqO9+;S4}55hF(13Lfeo-2TZV
z5(rP}1VV+aGb3|-rT_!o`3VRxkaxp$g1|gy5nw^x3K`ujgyF9!M`CIEU;!au-EIjy
za<BoIAdydTVuc_PDX@U7RBAO)E+oik)YN1br2GzKMB~3V3kt}(OY;T;>3t=`0?SPR
zkz$xfNPw*PLFJR0QIa5S77(U|Tt6>p=^cpWy_SUxsJaQ%J%Nf)3xY)iv8Y6Z(t#ko
zK}J6)C_F(SX&_9gKUxA843((+^uS7`)e5vw@=6Bk!M<~b(b8ffrk!|?!+^<U3~rn*
z!qn@PhQ}7jfwFZu1_I?KU;$5i`RqA?Tsbkp71xMt`0l6+`jy<oWcTIQWSNr)2(<0y
zH~qrMT!$aP4;^6tl23ICY4rgi_v3A}MJ<X*11R@!MeEY(ZEYm;KDR6{|N8lJNw!~1
z&v-=3_r<c;uv_cd?%q$}>Gpc7bB8jJ%^*-3@@}hl>`K0XaPkXWh{@Vsy!2BO!s`>!
zEP4NXlNN1y%v}|9=QxS<m`k<-@<_2SbOTf~uVy=&jE@jPo1CC(8zC_7gm=XTrVuxB
zG^pBc-8;7+(4RKPo;g}rhybhL?MaW<GvR0HTq;n2#rrSLAkYU1N+$J@I^iK$5J)07
zkiJ@!;Hn3gu^XDO2WqVc;mL?AM2xb(R}57Q-YkI7#fYvvNQ^dYyqbvKh<Q$gh@)G@
zDlE|nnW<N!8ClQ?<-r&>yN9+t5DrrG2P}p$*-8YMNt8B494t;+=p9*)3?zCqCFyVk
zrV6=S0;deCYLq&uh78dkK^Jh|aDA!P1pXf&wxFl5c4^kHfwd}vbBGP%EydjUAyWAW
zQ)X_g>G9aP8B;Fx_<}K9dHYjkRwyg+LgGU#-3PcZ?EQ8uOoM%5H9U-PiKe49<mHHE
zgZR~&<q&O=*QS#-9+2pPlHG7zkRD>B#>ApB+Va|pOESfzgp?d;D{$O!5FskPG~|iJ
za`n`$X!rfNCTy(X+A@q33+V9}%&6WG;{Du|=#k=VG%cUO-`9LspFy9InsHF2IAkoz
z;E=(mNE}`<v#o{U{mMwn@FVnR*3Z75>^}*9lKs(x&oU8l{(h&nL#sMsBa8P7^%uu4
zX!BGyQH^ius_Vsh>S&ztx?&Z1jjB~D;l&snAJciqgR$Ss6;$LW&Ei|(SlwDz9k{ik
zttSyHrc7zgj2=oKq#Qt8c_1Q%VFeFGSkmHU;KJZq;(6d!rOFrL%|_!5sk3mi9;fc7
zp`<x#Xwx^+`(P!Ez$eKjQ6~}5Gpm=B7M5<5B4}J{jH<_%d{FbPl+s{V+bw~cOD@wW
z%~C#ABT%y};V%v^wJdg27yj<2^Bf7S(<k4jB>r6`x5jX{eUKEv6tB*ck<1pUEbL<-
zXFqk#__B{XeOu}?QCqZNX-OWhIJ+#nR-NkQR|{d7-BjnhOgBZiecGawOTVZM%rm+j
zI)XwD`4(1lecRIHlw|EPnKG3!>EjNr%9En3!VbwcoyS0A(IHtHeHv-Y_z9@2eYIt^
z^&<EpYz0BS>q@3l+X8~THVKa|hoaNe?9LAX+47D>8(tmz4}`wV&+5<i4@K9~h}zI~
zh?LN)h!eO11Dwu|0z}M1s~0Nb27Myp1;$CnDcd0lEz<(u=A~k#P-3hH^>-<E?Fa1#
zzfnU|lTphl94VZpt17e<(iQT{^UDv7kB#S#FQjp12(m7+?Pu&~EM{yr8R>fJGBy@B
zHk-e%{i$21bK2PM5UR_oQ=qM(YfvXukySyp&{ok_gjUp|n5bBmy<pi_ebG4FSki=7
z{c8z&wX)H^(bw401jn*zptA2Yu_Mv9)i?H85C$oH8M%x|m&nhx-UX9SlW!w^BJ)((
zci<}dkl9ju5_NKkeYX)~l&;@8OD$to+7idI#WnacB05qoXp|{Mh$(|$aQ#QePg%EX
zXM^oeGuK%ebs4=U#j91PT&LEj;cGc#965B^I_$1feLuvvRDMkhBjhBp*lw2lDTylu
zS~l!cFq<+bXjLp$Hx+o4-Ws3b2;B&|<m?LV3UF_*ZRi+|ad$WUXc|@ObIdjj-4yqy
zcr|<d^g0bH3ZMeBgoq3j4b%gFy!+%s))gnrw`zV<yL=)VliMPSB^n=85%jC4g3Rj2
z@6Q!vxBICR2@}@Hp~`*-3sW%0d&bdyeuy-o;h|b#!f{R!+W6C~DV!eDYvQh(5)Tr|
z5*rzotV+C9?_@dYg-V%h1Xj3PIa-AWnJje1=C!l5-`mL%@-op^ayi&_Za$q{hVNwU
zAnbbMq%!|7aPrEV=)DRp!Y*M#XXHy3k&jU1`FbvjN?(i}L-hy4gDMW*tCy(PlAL*(
zYdT^{T%Vp;@sA9>!ly646WTewC<oXJ7=N{d(3E^`mq(Ic&*00(PIo7@XA|V>U=99~
z@Yz|cluRM9(elW0&%%AQ+&r}QWxyf2iJ3SFX4tmwb2*gGJNQPi!UJ_(+C_SpT1#^+
zi>~p=5#HpoY=-fZvAU7f&)k`3Ij<+^z3AIt8VkbYwB8YE?{$>h@YV`Ad#%FnVnH#4
zX+oC^G)Fbk+s`YNooJ<0`gKr$Qm_sD&@&R$(*S0BjGzJkE7bRRZSllFNt;<`v%&Zw
zEQ>%0D>AAQa}_5A%YTV>&GQ#QxZ_Ay+S=FplCu65vq_5?i^IK*ciDQ#$)zcKDaZ~;
z%PaLro0|0}*Ef=@%qiovt8KxJ;w|601e)8;i-sr0`GwWLt6!-<sB@jptTz}h-Z@O4
z5_q@YaQvQM)oZ_f78>qc)d15_n75cWe|-N~cPm^OS$cSv{Ah1bp=j@XG6XRL@eD(O
z+_=~>H%~MpsID5nz;G;$JVes@l6B_s4v7m%BQ|qzhr&t1>*wJu+~zGY65on@jCc7q
z%q)pJktGqcjad4hbg2xr^hZ4ty;h|$q3MOAjZaU~t0X9y90EFCvX|<^)+>iWvx$~}
zCS$UavV8rR?$?Y~^BcYQO(!;OP#n)%QQfv@BwwTV`P=y?^#3%w{i$93g`w4~m0rbX
zXn*8(B=C|rt2ES>*_K|}qHo)B`l+MA+v4_+Ae(z){i?(30{eAgKATr?z2owe<L$uh
z^(OYZp9{_N#Aw^H2hC|kyHdO1mB@+cviN%Vys+NO+S1vYzfHGC?&b8F^euE~@Z?Lv
zbv%kVRD4?eXc$u1aW;<-FYk`9%k$w=k@%$9By~2<Oa9AfB@Q;T-`)K2;>2|7bY9Az
zl*BH3pMvM3?qj^F)xq9D;?7}DcGeG9nvW+v9%~*%XWuqalz#e<`qREz-Pc^JO%**R
z;w2`&LPDfoKAEz=TLtn>Qd1dK1rX>H6$lg%3<BN10`G?)&_@;!=*S2J;!6dAa2&rF
z4oHDO%PsFEMAbc4es_5J5bS%MJ!bf_Xups8T0RhBzFgaodWBzw*4`hNCFzz-Z9NmA
zSeJQoMaXJ@YJGOn!ReE{dXt@*E+jYk{u8>IeIkcFp_=o~Qlx!gr2TL-qcVcMTrFhO
zii{1IM-U{-Z9E<#jQD3r5f>IvwxduKm;-3Teq*0|^3oemLqil5^1mAoL~?onDQwXH
zY`wgnwZ;F>7q&@d%E|t_JID!@a^e5%7Uh9OxBWl6NeLk%Iseb;QIUiC@&EVaz%MYO
zCP@FiI%-HTX-(MwTpQTkEBgOm{=duj|M}vd&q4mZIwLn?Up>eQ8WkKBG-dYS&K!Un
zQ2bJ*rZLacfdAKg8f_k`8cDi=Z?=ml*fe92hZSK6zy`g5`}=ZpOcw_0e)+qrbX7Xx
zAE5fbI56QB;)(DF*vp;oe*&v7DP4L0PVNm%67#={{sS|UVJ>V$4CwRb>*LtiSlCdp
zw+%WN_&In>o&dXZ!2|pRw$`>b*XKhfP(^9!U~v-M8^nt51hM`DKE_YtJuF}#G&ApM
zqt!-xD_dJ}IXSrve+K;6{EiNyHusZSKU-Ll=+aU&8|T09r2ph7`5$CPugL#IwZSJK
zn4X?aND@_}t<grbx3^#F@NT-+i%&@ThhMCYF7h|%=G%ZDSy&aFe+|M-s#5>m4Bt&Y
z_j{=Z;^gE67BbnQ`J<a#Zbb!>xVZQpbE1(rY6y|7esKshNkQTTY<|=t$zS&=wdlj^
znZfPE?!zc84J*N+chQ$LMz_iK`SZTMq2ZlxKYA=VCcF+*27!h~tSU1qWR3i>9;o{l
zdS|G2$?rtcQk=hDoNu$F*}GU6(8>mp=AIgoUX<haNhbs!WaZ!>MTs7`ne!3cQv#(B
zFj=zsA>FU&L}hW*c_bwT5mZPOEYXIV?W@OUD~As+&z4$RDfiZ2KjrNVs>60qj9n7(
zyXQ7EeDwN8ZBX5EF-Q=*jhG7|f^ZEhiaIJndzo8PgVtl@q+`fP9G{`ocU-tf6LA#w
z=J^pvxy$(HOf=J0bA-w$35E)iGud8Kb8{haame1#F0Slv&**cRu{q{r&ELmD)(WA3
z+VAp#e|2Rg`JF3^I1eFLG^dJ<Ur@0_F=7<3+#_6>(Y#qShWXRwg4Fgr42WUIy2kYW
zwQ|L*Fbb=U<{H<bC0ejS+7q@#M0h1`CzP<gh<{O0zpa0YWEqm)wPQ6duvB9j{?2qn
z7C61|x&_lU1#kpoD~YD*9`E`AKtm3H;aH3u!vWdKFT=<d0GB?LD9KS1t4wP)UTIyh
zK?Z9A$I6tfprA1NVshb0*wN83=t>V<?JwU&jG$D;A{A|(8M+5c1dN3;WG0UV+pjh*
zvt{q~npmxg8Hl6yqoa=q(Do=Gv*A$x=P3HzmJk9(Hk?WU-58j6@2Gq!uxknvWb<Hy
zXcujZ2JVDq$CXh35v)f2_V;ve6SyUdeNWx<akCULPPJPA7juh>^d!`<`I!HBlwji5
znv+!VJp;_T6NY<SQD`-+l;{x^0065B3RmMGQd#)Fi<5+shJYa#${N6k_YQDKtd@m@
z78X&|f`3l~Ux>Nai(Nx4)gi)YE0Oe0B&=8xSTJxg*K$^OeePlXR8m;b-?*Ue!)Uu7
z#*Q6rY-RPsv$E9YYioeE;?9NVf|?Cl5b4Ot%KN=uNYS7F&P%`Sl$^WQ8J(V^?O;-a
z3&jg5%DCE)I1&yQM2q$Jd{beGT$vJ`{d)(;UyO&}B9S(x8IwF}h}z${kzZRgub-;g
z!T|V%Y1$CrWDDiVUqnpfNF%5Qo3+3EeTI;~ID{Vn+v~;T(V`B!FOa0I$?_D4J3V#4
zcLgp<l{W_QW`z8&e-}-qg=4j4o6>c=l<bSw)bg^<47Z8bvxDm%3cysn`uueKpGhNH
z{#_rQD-*aKMlzx_m^P0yrV>l}H$<I$uLSTxSC#eiS_)Nyj_hxU<I1exru-u{naYNU
zy|TJ0_eY42kFVB;*a$rVaLA~@u~4`mC-i@-$n<tnob9pTn;ZAl*s$G;X3jDToFox6
z$Qnpo9J#+YJNNHq1Hbm_FQHefZRsz&iA`4G;p2<x>19+f6C;OL=x`bzp#!!KP~HOr
z><g)4^AFbY=_hMWGwVG^VgR8BG>%=aw|T5;BF|4x2Nq?28hI+9p)CsoXuZ>Y#W^vU
zO%*OE73D8a(fjo21f_eo@@qpCm#)AWdF<VDcik(vxw$bu^3<7KxRsUS7j6Jv3SOD@
z=Cq_^|88=dK=Dr*S#pAA>R1?w_wO2W3_Cu4el+;dv7tVc!zuvOHCE+&y=G%ds1hZA
zmn)y`&9YpX^N}Br=fM?tz;fZZurD_yF_6PA4!*_fhNge^W5W_f?N>F@XCxpXKm*A3
zdZ5@Rl{6yspJ{q&Lt9uPIp?F9QbUZ;?Xo?0;5fLs^@b#SB*@W1#K;6L`Y`}X1}L!K
z1$9J#^vxSGLI1o#x-N@A5peueNCJwjgVWQ&SniF8g#}I9w6!4M==#RSoel3Z$v2kN
z>mbbo*6J6T)&4)^YQtu~WmNe)EbeK58J9v!OI5;*#irt>H`4AODh=AIZ5Nq1xVZ8Q
z3ZTZv$ARFcW6F*mNs1j!21KX2y4g>R9$`hGkn0%+fIekg-K9$I;ex8g-lCHPlq1jK
zDXB3{8bt?Q*kK6u3hu^xfO-}vnl1|v(q%C6u&H59@T_YuSKhyUA3@Yy8F1$$7{K~e
zk{a_7rKMz`+g`VJkHFv1PF28P=r({$DPEO}fd0CbYwp+^X^*Tpaaj^4wy>z!T!AR_
zNg@emUO6FPyWMaIbno@`NrcfrX!IWBu)R_-wlr_Z+Q$5gtX!o1{OY@Ci4p`@hSu|$
zl2?FNi;rw+l29-p6HxwP`ygx<Iap#etS+VYZ(`Z%K7nHX9Xl+WbIs&(RH|6Oe)xeI
zlsmqc5?fljqH}65{rAa39e9ST{-%n5g2qC#2o9RStzfpA?h7d99^_{H$*go5+;2BA
ziUaIEu;BuPTFm(4jh2E);Ss38KDSi&Jy3PC$ioB_$)O(1W?NW4upk;KMLtxK?_rt0
zWwh!Ok~O@S%@TXPfSUq`jUes5aaG*Pq#L*u<FsCG?dia^A*u+BJU6PGNIm+>7q+*=
z*Q!s4V|{%c%ftBXpo5hb{m(7uL4ke*0T8rV%Hq$p%`djot5sjc6b2&fzv=*~wMyU{
z#RGRp3cGK>U*h9AK`IP-mtm0sv4MOo=lqNVU7h&gc=!**;M|!%wf^SgUZQ)w1t?eI
zw6SpZ12B(V*M{u%wMEoNT?L^dxxmTjVg8*oqE`YOu%&*l5bQTM>Uu1a->Xyp6;lET
z8H87j^cEjfEZ%TruFsgk9{-oPgGm4MHoGTpQA;}}o#_Ql)mo<$-YkTvYs5~^X;;63
z10OW<pPxuF6lBrWy%~TjQxdqBw>;5+|1x|mG*+Zb^7Tmp*~hy+N2mEU{o36nUs+9s
zIO?Chm%{2Gp(DlL-~}l<)lipfYdIeYj@So@8+HgfXUx>NF{2inHQ5kC{B~9Jah!jg
zzNDk%Z>aC(`rn6Fkp<`&9g5IOOb@H)1^vblG{IX@X7dpL*9i2%s_6*Q#ekcTMzA3W
ziKh(}u;5fus{|OQa`8-tG|~q>Q)M!+@)#DEGz-%GHTTz7$@>48yXFmsZ|2s4cipz3
zC2?<tvuFCD@99p`2(hFi<=B#c^Zh@;sLrUhwUz#+SLW}dxR#V~xf%-+e1n*{<|?@p
z@lSZh31L?{PLGem5dHTEz+zFc;l!!n00*GDon&TcGiq|?2RMke!Th3{1F0`$9d>E*
z+}2lmH5Df<;QRxK=>GsDqMg)ry8z`61T6~2N-5k;^g)+9h`;zX6YSqJ2>)AD)O(nM
zyfIV$u~4wwwzjtXU|Aq1QWm1F*TDFVGvz=2knJFi=N}Wbn)u}{6<2w|(faG{z%6;1
z=<~~MX{kAP;<v(`)PbJNpIA#9%k_jSjzJ?;zpWRr8)hED{sIg@I_m!#F#xt!%+ck5
zJBE**^hedmvpin*0w;-hl4mdeKX3X0SbeYeFR8)?Rc1jM@H2>EPAOPX6eH$A7uVK0
zbYQZCjiA~9h}6j&!#AO$@nsJPIF!p1!0y9^@`kM8AW-B#Mb=fUJILRoc{30y8xE6S
z7m0{h#+0T;wXifv7?{8ZV-g0{NM9gxR~dsuIRrCd56FGl6tht$X?hX_9fTI1O0mBJ
zNLIDQ=nwijqo2A*3&2(-?os>AMI=@^#raQ4YQV##>fs0dP<5AQxsJO21oY!SQcHhV
zhtSfPZ=}*r)yp*>VO;#_F!yTD-f@b`kyMW%V#&Kmc7%LyPxa}(xwQ11hR`ctE?jhh
z+3;iDTdI`UUyS676F}!J<DY43NmNtr?F@vt^1w<VoQ(Y?jv;Fy&GFRg%F#Tpppr3d
zT62pAR*PZ%=3GAqh;y@0<lQK4oCVVa<gZO|SM98hRDQJRw!c@GzRLE*Fz)oO!Yh0q
zmJ{6P)zmQai!vu;HOl=Vu*QaxY~JmP$df!EK^|E%Oo<Dc9lnxy{ROuu;6H;>5$D1m
zPy~dG8Jsj1OoyFfd8{`5y&MeHk2$orNHo<xX`B!1*%r@+GMjfesxZQagC8K-@w{2n
zu^<x`iF5JNe;ADUNW|SQ#ispUhBcqmyiH9t&5AoTHEmh6)~m|;m7P$!$#GbI?rT>c
zwC?~C!R?-aKkbUbT9qlU3ExBSbpPHBw+{af!fXPedXu7OcgOiTnc#d%`m`^-L2GLv
zBjY^L(r=>u;)vMV<$w?}oAV`706}!>b+m4a7n^Z3z<g_ld5&jZIBL6w4l!17Tm2RH
z0b0tn<BIFquq~Tz`hyfk-S@LZ<5Ur>&1FRXZF2C6PVeOfY<@&^(;D2cu&|i@Xt>6!
zadB979q-Wu)hq_nDy>a7DRqsL@1trT7DE<P?Xz(*@NCow8d47UNK`a9+a+@HmN{@T
zY6Lt!WL)|4UoDoW{$7U9dK@q0{M0ho?{n-%$u5*^Y`663gwe7_s%v{ba%6mvmi_~M
zo!4662y0-rYr4h}3`NZ#aZGgY-Zke^YbV!n#@X#)jY9#<m+nAwFlfYX%v-U@63pyp
zh4S2<G8hH>hc2Dy8cLVu+MZzBh5KsYo3-8BSH%Q5k25JXwZ>33HsD*@*j={+O1sG!
zj@_HW&%TsQSs}Des9x9tlR^8qu&pwEHHf%jV5Sc-T9sG0@n<uy-lhJSLy9QdbN4EU
zgEZrg!zc<w8^bJPOdxy3&7UQu_@?qCche{^WRdzGy7;r?cRMyli?2sMQz3u(A&#l-
z#0U`wR*OH=U$w~>94(%>$BJ9h7->0f<NY)4<9W+Krm*+Z70bdL1F9!7q3znmXvUok
zxFxfE_ZjwQ`J$TJ^ML!+QBJAYh5|?rn6{|jiJHEbIk$%pK=~9_BtnXsdaYwxlWOck
z7(Z080crsc9(!Q@?kL8uu2Io?pFpGI`om&`%9DA1eMB-=N03VBlKD)eyrzb=>}PFD
zi&yf#bm4E)z<aCl?DdMt0mXE8st$|Bd=ap9KI)*Jt@7G6Pl4=@NQXy9x>i{md9QkL
zH9!Enj}2#`2ZDo5kgspoeB21m%81Q}1npg?8jE8MPMux}sbbH|bx}M0{UCA#-hmh&
z-UV(mzm*Q<fa1lRFjs}2whIq6=DtXkIw{W;5Hy1qp$nR{Q3zisMKL{{2ob`JtG#BW
ze&3Cp?MlC%bFi!wMRF2C#F<`C7DTGt`!q6WeTr$s$Is|pJww_#he=6DRZR1@g9*S6
zGwv-dZRGbMbhG4w{qkcRAEre{zTjMH60}ftA)ZiQ%ciqK3Qu?kH5f^L(adwnp<Lj~
zoat3v*%SN@J8y36;ErRV+({gh_nZ)-F#zuFrG!jgu>iHb7b2y9H|v%ecN2qBKQmpO
zKl$)7DYT{f!_HhG4A{%PM4B>fh!IP~-rioOsL_T>Coyj7>n~XR=d=Kku0#UOdGq74
z+AhE8J>LB#Ci6#$D4#BS1UL?Wl7TFC&}VFH*ks>SN;y`7JVNSQgao;c&L_TAFuS4_
zt4CbqBU{&FAMkP2csNs3Y4V5xlR-Yg@CZ|4f|FJ!48sso^9Xq}CV~Cb1BqXjrt06d
zzioz`iTolQHqdyMHc_x<!EOt~r`ktZbNB6FTDs%t34_SRW?Dkj&;DV%b%axfd>pES
z&bwCY4ArP_fZ!j87~&BZzePdviijy0xbuZ<6x~x4G+GP76|eR(d$hdRQR43|2uI8d
zGtYgM-$pv{h;Dw}<9vJZ`0SvQ!%P_auvsHFXODe9=5Iz8O2$LzGgb4t{&g|iSA048
z84~+TZc^ff#kOsBqg`>*_%E~ZhtQ^Uu3WOZROdw`hJAb6CPvIBe88`fvztTe;XSdy
zzWg9dligi9Od~bp3v_(v@%cdA)!4TWXOJ*k6i~XXtgS_9F~#KMM!$3}ijW2<(|%3b
zA&wtHi<AIL(8!4iMce8n*b`)jRPced#VSGX8^*`zhQ`MHrluw<)_h3Z%j%L6I6!n7
zeK5J*@OD(mP*dYwZEOL*81sP(6_aCnu?X_M^of!Bc5rYYVrf~_GHK1ULhyOAMF^t%
z=43U+Uf%47Mwr=$BG;H$8n6(&Fl**o#Hl+{Gp0;e{rcQm1uIt-RzDgnKV6(i&i-1c
z7!W8&FNAU2;K60}`Z|KRE#Mg6wwnc!M^iHITDj<9sJ0S~cS8xpJE!@6%W3o@1Ms@$
z9d$)lqa!>el0NvBBhfdKbnc1Stj{Eq51YN)U2T`_<g|anz59H-F|?d52r9f*Qpr>{
zH6MM@{XrfdIEn%IBbdBG(YUB&2$0!@JDldHxs%{$rE7r>4m)CohcSH|z`k^gVmH{~
z2j-yJVB-_iex8pHq|fvlq;@a}w%m{}l<R}@nIaiVS7GKd(5O4t@U?><QbI9hO9)w|
z62a7NmLRVEv0czKi7UMSXY0q^kJ?yCJ?_l3E&3dUT_4ea%`<6RTLxV{J$vs>3$UVs
zf&tVppppfY!a$&g-iOCr%}uJ3R4&jy6ItCpF@hT#F}^fM+=)3q_@1kTW8K@rb?Vo(
znTjI!E_rX>X?)zu(NWsln;#G~0WsjMFielWiQqU%-K~6#BjT6kq8<ZR1YiMtC4ZV0
zPz@@;+eo^13p3mjO!gSl5GJEgR<>gx@Mx!)D^dE2vuhj5?Y*c-h(%3eV+mTq@V$CS
zDzAren>hkCKEeYL0RfZJNjt{t8!y)e(SpLNfu7a`-Ha9Z6z$*)u}}Rkg!B9M^G||K
zZ4G1iWNP3^;yj+^*B%%ByV5;zITGo{-K?R}O0KMYtg&T_57jl(@g5sp5fIYMsmxnS
z?1&hu8V&Qtres{Rg{EQLN@0M(4hV4jLYP2z0{PT)GK7?Y!L-#|I^(xAIjXf`_*xUr
z+LEF`{>KSoNs`}9Mq)rCQP{HDM5QiD1Q>5>%LDJy#LWbM_B|FxB&E7WD5cYRt7L=#
z!uvVMhS5H_%r+K>r}apcrvxI~=)|$m>v1sxMvq_V>FHAo3t<bI`|_jmBIwZrQyUv`
zfZBLzX$M{%Zk$?_yS^U(?V*~pm~M90-rn|`Sdh^3si!wng2m&_iIu(mJ8KppL#OO}
zY1ipaqjo^q+;+l!_wHTeS12rwp7e}2TLbaL){f)I)U=6dr5_=!&Wy<}Ce}hd9PT7D
zB?mrrHLG|-&GK8))3$c2&obRYGxpOq!1pZ4GUREB6j_>iV&jjYj8w|vdq%P-xi_iY
z4#)UwcT%I*&BtLHyrJp9%JI0^pmQEO^f$r?!{tmObZx?PO(MkW1u_DH`NogZVe=oK
zb}Kwc(_|?sN|iN4G*SvxnBx_8vP^?LU*}N8xIY9=u6J$N5Vi#yrz-k_xn^o(kjQOy
zZ1A+k8}`54GH=ZX@1yZ^gF}CUPX5tD<*aOm%#;vJd8EUcsi~M8ACOWN=p2eZA+S{2
z>Aq2(o>xQnU6mdeT3hXGaPXtwn%g-1nv1<)*iaEFzi?C?&LAtFQ0$=@{Fm<D6LeC!
zG@+e583BFtc!(hj{UA!H^-&+1r3Dc_)2*`EEZe#v7J1AS10wO_<wkpgo8Qa9RvbsP
zd4uvSxDo);0VUA=H1ii$8-_@I{|2Q7*}~VuNV?Ur-5-OzxLFJbCb-n#TRNB58)vts
zLvchrUVkV77bK#ZONE03hYjxoq6;AJN>{evmdvy?FBw%TYzCWg+F1C;4VVDt{g|oD
zzPRN1bfGg0v5KHwG_3D=vlO9TqpY^j)9L5Rra2%MOR*1h63knd$A^@$L2GRBc?l&!
zP;qL{b#*qlVQ?&nvv6v<ncbI5E+<&chpo`h#lKZ{H)}e{2!(U{BS+!EwR~Z31+M7y
zo6H$(z=h@(pp~@<0w(tZ^;Ae<!L^e|(=-yvuD1CTG#ALSG!rzzj4o`(yMMvyqdDy+
z+Y&fECxQ7mS>WPGQ#-F~_{}{~&bwU1@yJxOGC@<A&hq#>6DF$B6_>&{$py&mSVDr{
zV}Q&O0K(ft8*r&%(<3ZTmu#OZPsgwfLpzU3*Qv8+lp_z%;9l4jUtF{`K^{FwQ*3v*
zVeF<rP&OlGitJ14Va>$Lq!X8qVSaQTYD`C%+}HZ%c!QkE?G1z`&xgYzAPm{Hc(<tc
zhyVscCgcgQU#%^a|JIpYpWn&?jlCW?@h--UQ!|p?_9}x7ru_JMB8wjlaJ}hi<LIV&
z)$swq0SS4WKtRH`Akv1vZ&G8*Q5@}|)JvJq&B<vznJsAEAfzXmkw~Mor(1+F#?AFZ
zpQr2{+lA=D5@}6g`JM-VJ;BACKNez==sRHrSI9|MmtV3d$XU&ZUDA-F#YQ}13=v}y
zxH?06mJ(4B_k#KzooN2|Brk|fYi<zKG&C2hiyOUp6ldbj)>1tJLxKBV5&59^=-V#C
z=gsTqUV`VIYv-TVofzOLbwWtbb>B}2fCo+P4@4!|IvtvkXeWb}F>N#zIvOD|Jh5p*
zf2KqxSa|5JK`bnsJs%-=J-?#@((193O}G=>kuUkBHu}Q!ODG`VJJZ}O@R5rnnSU#q
zb_;68bKOIXq4CV`Nz59VIv8&bCe);IGnSbq!d74dy~GHS9J(>FeBueD5e*nQ=<jLK
zVG<S}qW5)RGu?NV9or_ypW+ZE<>Q!hra#h+g}OpTxfaZyt+ilmJ1tFnSnq4lrf0k*
z4FKsm`dwy7sfjbxAYxnF=qxNUw?brj7n>1l2MwC{xX3Eb)e^gPyIA_X(Vn9tGeMvQ
z;4`sOXn@4pkD;W!Z<33s7|3W&x(FVvQVv8uZ|^qKIzj|8Ttxh1H$dl3krEtdk*_N)
zwZ9x+wqZJPYlb`dJia5oe4q6*>Womr5pGaJGMe+NPF;KAFe<E%Lrv~h+CJ|08C?sc
zKVy|OW()nse7>+CF_KP**AvGVK`sM-B;2sT$^AYWp&QW4lhP|}Lj#yRRbehI)143T
z9OP4RS%v6x6yID`v7~_kh!c#YotWE9C#X0FR6s-|*cDLia?J-2)m**iTtljAG1)U_
z!I<+_`Ncy_He3yO*JDDN3dxTNdSl$eKC-@EZRMHLDE`KB$Wb<#f?0Md&WDRNAb;4h
zkj+^lk8|Zt2}>I?4V`RhueAZv7N1<pc{50yOV8Tc3t3@(w)lRxmGj~RN#vmJoo6+1
zT;h!E`&EolS8(+buFRHx{8@{hziq@C%Sqz;rlyn%Wz#~S69|J$7_gbs;OMg^dK~#~
zk(qC!@#=Dl?KNggY?A!PO1<Ttv8AP@4g#k|?=}lSj;2}BN}7H8j4m&Y;tZke_;!G~
zq*~Odrhy<QLm4zUSbpcUPiWe=U|SSi2c<_yKPLb+ei;Y;d=JN~Uc77&^ipQ!YK(4c
zxAx;#hDKtbDQ69J#ooSdVydi8eDH*<ZToE>NiWIS6aHIe$*YK?YvJcPlp+)zFVu__
zjKh>qy)_ht)nHXT3CqcO{^)z|Dfro3huMEbsTeF^-gbdB?Vb_ux<n=}vpFU}nu7>N
zh<{N6omb$d#<r%Gt8Cokpa8#MP+R%I&G{q@sR0!VdH5^5icy2VKoQY&78NPxgpUpt
zmMqVuQC@kvN8AUIZiaEQVY9J}{R_xe_bd`1iB`42Cmaee#iLBLZue{Uw|Htfk3qsg
zHnp#z2o)rb*Hb!->?4PE-0UQpwyOWFdW2(luxHV4!fH!57)K9_ZjWU1L%;bvR|(}u
z4wvDv$LX3*!X-=Hg5<Ypo6P9Y2oA80-@`1Me;%JbZ<Z~mfsPLv`GfwkTlT+29l$So
zhbj&tp0CSGxA!}d;Wf-5>gU$bum?k}zyJ8H*a?t0tl56KqJPEH`T3bG#tR3Vhf#-9
zXRZCCVCT$3+h;1cFSG%>UM%Iwf=1Y2T#hRdRsO#`(2h^UNw<J4e6z}YmX-;JQUx@5
z2?fWAr>-f%S7APn$uQsUZv*RqgOY7Zb`a=74M)DHw|sHlpuIh5b4x>HByR<IkvLnz
zh?e_01F0PbHk2ysg6*}|vPr>rztcXKF^BqRtaRDCnZzM&QPBmx{tJ7Y=~ZImp5^g;
z1CO%>rQFppYyn}sgbBD|MZL(%CoRLxHC(Wr-FV>-(}6%$E<%Yuj0Ic&MO*DcG?ro@
zT_4Pg%DiD*vha#(NZ*bkFAVO;w$7Qz&c;TJ1OH=rUcJI`y_cP`b&*gK;njy$5Tu62
z31b4PN?cE8;>~3T$blp2`^vgMnmP~V!cu*LUP@6*by_`qu@^jHa$+tbmw`cimG7>t
zGA52b6J8K)fax-?oQ;nV^J(iYHX+{?G4Lr<3gUZUumGGVPH*ZPHI5#a0fNRkmRoI7
zlG^h(T)dp1e3&JHp`##Z_Nw?%9FjFqyc~;v$sCOYUi3(J0b@KFhkfzD#jN-zh7zLy
z$;ZdLburHICS=pG@dOQa4zAEmiOC+SKcA__+jYjiW|;SP3HSt7+9Ga^ZQ)WEeOY?V
zcD57XAwJ<UhXmBbB}Z(VY`+KVkiKmjB*^RqUo!fK2g$u&SLneP^hi$r3NSB5?6Mzr
z>EL@iiSu@R<(m;v=>ri!FP#)WR#TpCjeJN7A;}%zlnS)}RYWka)aFO6W9_3zj0>i1
zCR>RIm=#x!5F$Dtq0zC!bK!7(E4?RK_bv6eYgDf5BsP5bgY3EH;JF45tV=#^(0+^8
zK=OD7m&r-G29LiVC?@+?PjyU!f|}L)iS6ufGVw@O8~l<SJC8+SPA@w=0!*Tp&S)0>
z>7g5ulrDEa*L4=ZOQp)1%LX`$*23F;`O$NN&XS$F5YkG;bF}tFXI8%iLt*nf0g?T9
z>3jJCr>!NZtNxvHZ;3t(8FU_lrwp{0KX@B{O%Z_hc+=$%1}7*Uwad!M8QE29ttR9;
z(736OYmrwt^Mn!w3(0rY-C<H?2-KDs-&B6H;AGn(TY&$OohYO;Vrtadt=V|6jO(T(
z7P|kL2$ZYEBeA#o#~d6v*sBX1Fe&bQ#&L*X&5Je*hC%mwm3zIh$Om0R>KgXrtd%D>
zGeG;0_7htuTRZ|M4p%?^?kjeoBWW9=&1{&)-4jk%ghrX>Aq_9xXLC5QY;$>!buWew
ztB+i&!;~}DAL$g=m>i&iTJ~-UQy>cTehj&?1?owXt=!&ew{@^Q_V<LU{lVCgYp*(V
z()P+nzpHElgW7KszRSy0N$nZjOm}<b=>QOyUSFWvuQy2NIJT0#PVVdzW}yg9BxsV&
zd)PwF*}Sm89B&ZuC%w>&Gq_9}p(BA9K2JTD3K0lY`E}DJBr7HOSYFTMuG^ve+b%GE
zHQHl7Gvi~ScKZ{hNI%9!Tg;i<zw&rD{uCRZRT7SEY-}vu2)2y#Yl?}@a*yKtsl`sy
zmX{<bNqD|7!aC*1baCtf0SgS<ux@VjP}6Vdr}pD#P^+=(a8m5fdcBr&iti4NYi-w5
zJqc-6>O)oe{cPNE5&iNVs?KK2=PBW_=BxrjhB{GszCQa<rP<Tz>VQ@L7b^AME)etX
z>~Fok&ju_G*qSqt!@QpxGg=0h)dX!NdT{H8M&F>vA@!ATrYwFYN07nylZ^Hz<JF_I
ztL%@o7%#nOll;xk7NCWaE&ck-nWeRwJd#j9-+d=pZjbDEsKB*V7*$z&aLK!ay(f`$
zd*^5BLrM}hB{=VRXqjg5Fe)PS^m1CkBnK-ZMrHpoP8tF;;rWt2xcOKQ&@xx&$Geyl
zz)@NGt;1jCD+)~WlzQ1u==P^)>|?T*wb$2u_3m*5nu~0c_>Q;U(O<Wxkdw|Ea)PMm
zB#37GRn)-%eyYaBwqZg9Q6#=6<mV1z#~H+aTE?)n98+{$szn2QFJ<e}Yy0*Q_55+`
zx|OpFKO1J$#VduaLaz;f*EBITTai3T7UzT)gM>KnwN#JwL@$2!n}dA8U8Zo#aFFM-
zld#lK(bjE*uF*SdX`NAW%e#3esITQ_&8H&)_lJ$>#O6e!0@ayYP#|rM{oc^KpI-<+
zZhU@+`M}^-iAQkxE~8S^pbra>e2G&)I#Yvj<QGn(0&7g71Zw0kDztu#NU;jr=aF)Q
zmxukeB=ZKfA}jc`sw^yZ%bP8(MfY;?TFSiYbVJ%-S0eU|@?0-2hDf;mtu5iI&fMWR
z!L0CnDd)z3k*Vm9Sxob-dz31<m?p9FAq#Y86@<ahs#iJAx{Ki4YQOy)(bT~N-u)UC
zPJ`#fQIs#Jwm!DM%VPEcn?3;h?f<9{qtAmvoh3q_7BqAMfqSx#8BbEgR0syj;5LY^
zdVNXRa%TVphx5nw^Lqgx`Ib<D9CC(w9GCeL&_DOpLVm0Uq@~TPwRu=qTvE~_j<Ocr
z+KTTf=<xgS6VZD3x4W$l4_3Uhp*u5lM2L5u7CO!5C;c=DmSjg%460JathF}q!c^9?
z8eo}iJYk)ItgfF8qmT?BTLV|74(w_M&#;|K{SlE$;T1KmL*jR`!l1GT+}N^~w>T%i
zMWpP8HDDE%r=R)XIyybwy?@%VoHQ}pg7nM|?XE$+OY;{P5}6W=Zzdm_xvaV!A47xk
zO*E~W6-`0Y8xI{Pm;hK_nZ<~l5cwO>($Dmo3moD3Sg}k9o2bohyAIbY<!bD@Hw{!t
z*2oj=I10bqPKu3kxD1M|$@2=&Kc>s_+u!mpBjs{V#=QT-@0_hYi<C<>>GI9uNEqFA
zBr<<I^pQz^2f^@>)-$^=7)2;RT2Ba06qSXKFDggi5utyV;xi>$P`5ayq`+$ig?60<
z*2|V{S2z~K(bfg+d2>?Q9~zy<WfMi0*2f9Giz8dd?b9{~$@Kl6S{sYH^iAqITjt8n
zQZ#|dz+H~2%y1#}-aeG|=vm8&?Kb}=gra!K1X;x(Q#^{#*q+$=-C{o*5Mg_Kz%kX<
z=-wl7GU;~=I%3;4Cdst3Flj@i(73(?Fjni(1Y$AF1Cp%>kicgOI(n8t(%WT+vZMIz
z&yozaHk8#koY-)1ly{@>b%r12sp*g-<xtIFdT>0Hyi7C37x(jY+L?Y%{YX}?Ru1R}
zdK!<WKgoBaoz6OL(-*%^5jNbJ|0T_P?(gpD2xm(*!Lm7fdEdT4aBp~iV+{iY&GBDl
z%BMi_j+|(vBsSiA>;}}b#J*Q5KvXG8c3;VK=7J>6l;%>P+aOTx^rLZ)vR49VQ*&%1
zus^*ZcqBL3i<NOuKUM9BERq~j6g27FN>=S&esk932iA3&7H?|jLs$!_|B&*E)aPff
zJFg<9ySf~6i}{s}(41hxe=C*8G#~j;*X>I$=hG+9mtoAALpRY-<-R@&Tq1(WdmT{4
z*paZnh5t@x{4+B6M+Qz|w8(IiuVt`<C4y-#Sd9^rpFd9DL}_=3vHF)(K#KQk>r)rG
zc;CxYeVoM4wTj>FSBJl_X4b46;lrcT7QHN=_nCfJ;O;W^al9;yy)!vJ>(juQlN4TH
zDcoZZGDH(Y0=2tWZ+bG)9W?T?G;zv|K}G>3x~3tl7!5FQ_(-l#_f|D_oD1lmb~6*l
zn_5}Ht~NQ+)VSp-T00B^5+%I_(*&!qo)Ozff!)v(pklKw9MUtN*fk!0WTUP>LH2Pp
zCpcK8wZb!30)8L#E{FbGi)bSvjF<Ax@P|!7kc3i3qDq1hP6iv}f!N6h03=)#`Tq&W
z#U~!<|4I4sXY-W~ze+zA>tjQ42^{IxZO_SqlD}?<a-#9|QIbrdYI=(k7&QHB+ip(v
z_R&^*4iw1mEIMa!-@^wYkbz4inIj%Qr8sMj0-n$_8)^tktI|YLL&L74b>XRih>C}|
zS6jcl$P*7E(KzOYX`!ibe~g3p;b&3J_K}ff6z7iw4O5G^wL72@Mvy9<W~-GqF|08?
z)ZYBeSyni{H%VB}Aj1J|%NPb|jHUee&8Uo`SB-49qgIWsF$pc_y`qL{Y8Y^wUNajT
z4nknXDmd2+lErXh3=NPVd>qLbBw#ww-ZR!9brWD$6NT@YRg<GKIQ6rwd_)^SB!W|i
zZWv1}L3P~*G(oHOMmQt}EgL-6d{CVEHOTd5@^#h!8TaM!^|y%YEVZW~F>Ms0&I}a3
zB7YoDqF{BZ9j1l`s{ls~h4e-^=^q~7JpsiW-jr+hv_BZ2=F6Lmi?vJUygXwleJ0+j
zNf$tSH>GEd)XIKD3z_>&J(W755io$j8Ib(_O>1e7G7@qw8)yg9bpSf*X^*B>b|XAC
zE2y^Gq!rjSGn9p@2fT^B9Vouw#7zWWXw@^d*$p*gKk9e9ZangMA3zoB@{Q+vdcErG
z*6a+pn)WrOv@Ky%T5x3&5fL@QiT{#i!R-ec5jEbor(-EA%AJEA<L>b_Y->Kt3sS&<
zWr9pd|BL{j@BzcTeHcmt3HPc?6UUEivta8C!)Lc@jiz~tnv?B7)?{u9E3!&1i$b@=
z>`U;LRX58&RO#PpQ!en>v;DGV%1o6t)~@Jl<Ub9`Z>}%L%{H9Eyo-Oy#qxh#?~)#+
zAr7L2-MZc!PCY*gwC^F=3N@5;YToy4@AMGPhlw{bF{!OaO56+|&*0u^)Js3F2|t@W
zt_x{g4^5=;cHaBJV3c;4FW`=iDLa!6Tk98#vOdIQ(YdA?G_H>$Pu-_kW8p%Nr19?Q
zsp73<QG>WUxRF-IIPW{V+!1fui12@!u9MQ&A#)vTPW?Gj%6fUI<O{AuER4wHEJdIu
z*$N^Oa;7`Js=PpZT9rllo{^F9zIZsXv5~7zyDrN()hMm-0H4TVh{#ILB6gR1(meyA
zslI;dC;ew&e63D<RvS)86`1Yr`KdMw=-5UqGFb+s?&jpY&GoJ=0+OwHX|U2ncN=@;
zx?$p;ElfP(Q}Q7cfa503`AdqLjI%L^-G3ED($DVxMaNC-a8KDzjItJmP|FyM16j>G
z5?1bWcp1M_l*MfHNJGrgpJp<W?e~<pf8HC7)_#6GaAv(D-sgF8-E_Wk?6^Jr`p|XJ
z?R&d*nez-u?03-VwYg|o(G~rAD||yPctyN4z;iqyYNE@qZ?mFo3B(hi#Zb-rEaY(d
z%J43K<3~Rx$m6B#_Y$U}q5SR$68Fx+ab>unaO?W<wTQd9xku9TDbr?oI-*bxj+Vnx
z5Ic-gu(3u5W|dRok=FPJP9_0%UI#}~2anHIIKYH=P3pqeX)DYN&l?UqU|brgeo0ZC
zt}Ky5#jI>>*c@l{RI;=2fP!T|Y>sJ@w(SS3<sPp!@n%4><epmsDK4T3AVn`V*rs?f
zC0A&Ft9l;Pt@VApsYSckxoBn`a*Zu>(b<enljfT@V$v^$tH?UDpRjjL+mP2m7A=#*
zvDpC@nR4N?c3pe4vCv*`5j0IVZP~?^ILUO}%FQQ^F1+_4-L6LRVeOYt-xlp)^Utny
z-Xu=K$v_oe&{0s(`1v_(=i@WU&ROTJIAVOv#ABW9!h*Q-rA$F$^TSG5d3iia95N|y
zg~`Dmp0Myi%jZ+MswZsbjz4;KtuMMy;X_@a{El5-r@8xqFc%|rMct(sM)N!1$M+Jk
z4NiS!u;(pMi1A5<n6idujS#w;Nol6O;>~+u8P0pZbtoqc#j?VON_C-XzX~QpGbtHa
zi4Cah_NJFr4y|(=B1k&-96R@&-S(W__8x!S?ekc-cxf1fPqi2XT6`;=LXMgWVKBt7
z+unlSJq6);wemgqTedKh1Uj4olkzmwb5*LJwbuqa>gq563jv#ffkEgQbb2}}d?>IH
z;#31e^4m2-#0+{hgAT6So&pX+H|_ddhU0r8b^4J~-*jB3XHe=`%u8`?l`W9@YKVXa
z-nhv)a+vYTNIwc4&IwDBFz<X3wOTDsqy%<DCpR}YfoU1x=xgbZDhpU$E`=h9O}zoA
zhfzu^-#QTtEB?*ZcGD`|vC#MBI(q|7oqziygu=q18AA0)J!K0NTesx7mSVI~a@^2+
zpB=Tm7PFf2cS&BMv?}!=viFg?(oH1Ou^7O$y&J%9(=X5Gjs3+bF6o`<Dw&;}w^SQH
zyTX7@v=8<o(N0&Q_vcn0;t{kA50ac-#;Pw|1<!~FiiBT3`kXKrb~mkbg5^x|A2*z}
zKU+wAc)*$wKO-Aw*-9;VUjxi5KqebIUCp7YYe;reqz$Ai!}a5Xx=UVI*UTTnej5Z%
z`rcd?8>#@^rSOt|%$H;T=PJ*tGs@5CDq30y$>z#!mBd<{<&cw?K(QRP1Ela_oBN@Q
z$AOE-VL&G%D*vMj(%0j{`Am)7%TMRmRZk8dx|)C_j+V2G)z=vIupT~4e8ZK2H3^t;
zFks29Yk%NAa(lm0#TeWVOkV>X0bp)eTdglTzpae`n4$DMDgi2@$LX1wASa&R5pB;v
z)6D{ye>YCulPnC?y27o4<TPVyA9Hhe$A1MTda$pXfN8v``T0<qY)|mF9=g4~BryAI
z8gvXyG7Jt%)xpjOv0YV82j`xo?JKf-Pr{yOWo7NLO-WXr0sjKhYZ@<Tk?C$O{oVpA
zf>X{b;dfYaY>hft-Fme3_src<s$8wJE~~T~Lre`|=6;kX4VX|CH&w%pPznUtY!3Qj
ziza>h!V~q}TSW~6=nMxBeG^HhJO?+dwv_9O;-f4qB#i)ss?ZS^UUt<Fn@uGAuRkwz
zsC$@FLTodz%3F*Nte#sFUg2US_+A7w4nhvb1gxHwd_XddnZm=T2ZE&G8{U@GyL)Gy
zuM}^=C;O%C^$R<@iQJ{afnR|%e~+{QzU%gI31+44NvPg}V&)%j%FH8@tOl?q+jvuA
zoV_82sx`>$U5+!du)xhTNuDPhBV6do=N&3Sfw^FfGJMh~a7fVr%1Eacvo|40&;f9q
z=5=_?6hsxwo=bt#yD5dG$3gx*R(!hId#nk8yOpz72gMr4iF@Y(zD)cBuqtq6&bmXN
z!FgbYg`Bm*T-aHFos+ZV1q&(?0V8?<6*+v|qiH|(S^l!OeGgLJ)`nf)+z7<N!4YE6
z=5D-LY4Fyk8<xxvAvNN}cYv}JW-Yht$Cqmn30bl73OWQT`M1gPR6&RnMX$Mfyts^@
z_Obr-q=m)R)#x{yD+ydc68k?n4@GbRSb4+Gh;Dgcvc@;gr$+m@Vd}B+GcS1m^}-s=
zVGmyzqw+K0e}Tlky}fnDTIgADKDaGoKs`#<sApuXPqF3in6LT&!tcX``iAYghREs>
z%qEZdX?;UMud+Lgp$c-yHC2|axT36YWeb|3+r16z&R16|c8_@{Z1h(U?O5ss_6|S0
zfI!e@=MDC0=OuQtKAUIu<O2ba+uylAdYpS~zN##o%`bd%I$7VvS^O~i<mdr<E!&+;
ztSN9vS$OS-Vb0R(>Uj0qp0!k>jH!BTCw{sb+8o$ZAgJ@}1nA!9tm}o!>?5v#@GYI{
z{g+hPyvIAqa=pVqy*;vM!Y_1Kug_@|dn9cm6ZL#y#xcXUXPw1KpE^A(@N@isqg30N
z3mzTWY+ulRmYC-86P%5Iy=;387#wUUu7K<Q^(Bv%2{8Ryl$>y&9G85SPV#_d=lRDv
z?aKr7f{GQvCS5RrEIe+<j+@J%{&bR;gPe<(&Kyx}URbZV5}-Nk_Ow$>wjcxlQZ>wa
z%`A6yzneDxI}iefu+4g0{QKbob3GbpT5ON7j|I%2-e&qWLt|kdaCM+bwXm@8#rFju
z16Oy2rqa@JbB-aCG_LhJ%gNf!aR;EDK+u!>64x<yf&uBocDXkESR}Bw4X}9no{7Mu
zyVU~s{yP?P-_(6$X=&-<&`>LmC<4d!>AUvQQfOtV)G9BK)1RNiUo24vcb$PKG0t82
zDQ~@AZ|oskXV)^xUijs17ibwPo^fB!;=_0?FuM2X!egxb`s5huqS2Bx&4P#_id@q!
zlKhL$HqO^RpK&+U#Q6OO9haD%m78&4$@7N{r}4d2r(JuGo%G|T?)^)0)bqSXu7I~{
z1?W|gZcU=wERl!ci(CS-4av})8+wU4pDo3*#{;9#bxV~c9A^}{@o0Ejp3>7rzp1xJ
zk>=B=zv9cX3oIvS>SBL*Y4Szyh@IqK_D^Y{cNz*}ptTuhh2bLxB1*j3^Ak`jku@KG
zR@HcRWpp4qi!>OF3P2zn#icPHMh_4Bt;*RJOuy?s2eg&}Vi*`Ca0sQ1LWO+CA7!qs
zE!Pc5>n~S&{t*Z`Og$%>G0y53=eF6>QQ+?}Ek?F+jYjuk3!XCNLTkhsILe?wL0Sf}
z>h!U0V5Ub6!OvmJ6f(`)VtLdNY%Dm);eE|VrDZPmKfS?x)HX>u%Su-YOKmLkGN?2(
zb|20fN=xw}&uJ}cUEX|uh}gUQyGg9%memZzXO|^d=&_#v$JJj)RrS4JpfG)C5b2bZ
zPU&utmhJ{=kVYB-X#wdJMY=l=B^@H&aTG+l8}8!s{k`uP_x`109Ea!Zz1Du#eCC|b
z+|MYMasU&Y{f~?QSe~#WCzqGA9VoHk=L=mt5P=)6xRg{!oA-@^L`kW=mi7#`wzeVM
zG@Ore9;{!)|7=oTNeG9>a{iD%RRzk<o$`m*1%NSnB70z%d4`FyZLpU2bMA5-ul&&I
z<|eSpHvsbL@S8g0tC5eZOak*pJ~ft53<=ka51F4<-XC(G3+s={7Lj$ujrd4^csk;l
za7Ho47kPEpX)o8pTpqHjdWeo_D!y-AUaK2^je3Q3&Qe`aiNebp2qa2IhA$CpQpH?C
zpUpW%MdP%Do_|J=_gfu6#0i3zUlp~#Th1)|?D1D=N{!;*YY3?O3+B3Ho_D_7gaPy&
z8xgptyQ6cUMT?xRx*safJHHK3C~BCfCNA01dSnC&&jeQ{ToMxp;%l~9;+V1UE4uF=
zP#jkmk!0N@lVn7BY@EY(#BaiM_JX!?MJQ!2izZc{O?{wbFN>{IH}4H`FC&oknbA`$
zTRh&sj(vJ;$;G%}W3O_$niv(bh5tG`JLg(UmAV`q%<v!WD|JZz;=iCauIK8T0OdM`
zRUH#0?5*QyD>XTDvYkzA&6{+Lw4ur2F&pZxs1OSqN`u#$ex8!_#TYp`N&*lVF!p_5
z7_=Gd1%|>Tlv%>#dZR6iSzeB0E<I*2#%W*x#@1W{dA6H>&0QXGmmfX@HcVX{zJ{3B
zx@nfkS%6Or1mj6RxzW|=m)@sM`c<d%Eh4|DH+t};YfRMBbS6%=JN@nu#}Rn!&NlCV
zFNbN;{IRt)Q$GWjU4J&fhVbPyoWzUuf$8a2fZLz0kERtpglXp8`lUu4eQ!&DfCP%W
zopW?1+p}m(dmbu^`;3i^UUE-dcGgg>IX{k<cNsi)s2^-ik39vPN&upzYa0?Oc^Mj?
z92t*}>6av9&Q(#-5QzANA~-Y|*-4V<cB~!u+VyoRpqC`=cf$_rdf4sk2UgsXqtGV$
zwoz6s)#TSun9H74%up$OaPmvB;L=j{)6WkkzPIJadT)LfmD)p)qw03{_GB3GYcnC2
z|7cVO<6juzh=pxo-Gi~ApR+od^y7Fh_|k^7Fct*+StV8Y8zwiO+To7NRfqFJF31$c
zNI;q?)Pd{+6Bz(iZQnu~3iF0{7K!*Sok~Q4xux2Fi1-YOR66Rd(fSGQuouOaF+X#N
zINSA^4>@$=l2H%DUkXatK?rCMz2S&v4(rRbCaq4beZK5_JJ$Tr>fn03Y3<ViU;;XR
z6M|w=y)tSlTj?3ofGpP3)D&Q11pI#o_}^fx4dU55qjEvTPVD_=-lYf=^h`EQnHo3b
zUaM?fuLT@N{uK_GJu~;{{ysRG3GS}T8D{?YOG$h^R0ND%Uh7%TI&OzhKK_szpV=w{
z{@uS`x!YaUrwNgxHTy*q*|I`8<AY>^L6xy~VI~Om>E)y$+b5LrMK|(pduO8*eSjxa
zmfUm%e!8QlL9xR|G35M(17)-T9z1TBzj=QDm;iuGH7I9FgRAk4zl4gk{n>jL^0uWR
zQFF_jl~CH<y>3B)zfLizr_yXu2<P1KV|mvib&p*&IZ6hge6*a>o^7%rtUJFYF`)2*
zQh`$R#rk84ztJD2Sph|SMvv~{Gxxrb!wS-{d#R;jGSEH9BVU*zX`=uP`N|C7^i=(|
zcEG*3?YtpRex8i$Ly0gEwk_BzlByp{n*g9XcZSs3j{DVJ+9R)=$Cv&;cy3c+d#2uC
zkN*9JA{->Oa`3JW7QAJ8y8jZIe6G)(!E2zS@{lz4a~^;7FG*K)Fs9<~7ii=%CL`tM
z3mMW`_qpYk6<Mzv)-kbghypxrjlEZ+5hrPZrlU|aCCDQ`ThY;T=V92Q%qUvcKOaYs
zrHu$DdPwo(&xVYN`0ZkMAgtzF{t`SrX*>p=A!wZBS~K8|O;%i9m_%QGgF2HbXOLD@
zR5Y#yG@NO;XDEXaJH3|c8G%WD=Hzp;A^Y7>hSUh9>(#TaMO;U({YCAhH<U`^yeUIQ
zm|1gGZ>(lUsr$sg-;)1O5~vi)U%}<<*JeVx7Nq^X*Yfw9XT>Mv-7LOKp6eV|r$^Cn
zYu5Mgg7q}xfh#yJjFT{%?K?LkPu^pW+U7a`Y6jDxr1^k5M_+hM8kdMrbWYGoGxaoF
z*ZCzK$W@6c5qCET16KF#4`*{c6OY#!d1MQ`_xFf?3$M+k#=2o?M<y=hEC}3I8p79#
z!`X0sHmY#H7MBhQ-J=k_o{fkIc>dcfh<C0kE9-vxf*fT@l_J}e*gry)9m;Qe@vLUR
znJn>1L@#KnL%p&ck=6otTb>@!V*29zuD%FW6npY(U8Bp2v1u27tzV)<394r7$f4ER
zZ9Q`|$bWB(iG{TgcscV+KClBPhgAm2jGL&Dwk~2coM<iZi~1m`vTZxP1$IkklT@3u
zOF3ChdWQ=eE^4|;#D+(*`O0H=dQ9OUc}@0PNB~<$O<!TxmfH-9W{ENAhiHetkUO$x
zH$<jKny^-(?Hj>XREK8{>GC`sP&0koo%l!3IuN6r=w?4dblf+t4<6S1VTA3+7x3TS
zFn!Zzt2Qa6mEbUH{{RJ(7OrRWXfTJl<x&5MC@GbQ#F2kEKF`83w#6TY+`&^#SadV7
zc~^*LtfQ}2{bWQ8TneBZ(jqmH4Q4%YRV26KO4jZLx&h~)SnWa@+x&cEB9AF&fByh{
zv1^MFE>h=uZL_?XAJuB$_KPG?Qi>Xq<I64W_cK{u`-ZSy(3tIM5hZJ%X6df=HNJNq
zC=$DNs}H=!xH=Vo6hD?<yMb=B+)%cA55)i-f1_z-`?x#9d@pE?I9k2kXIxRAR}~dc
zSV*W`t|!C8aKJJ6?9w7O2b^sSlyrQ&*88TR*m=%l?(K)xU>G;esCdZd`3H)5=L$bu
zJ-m%q@<GVtV&k!5tUu7y!8H4#yma*?djK`e9u>~-{d*<*E=OnKNR-dlta@!<R$QZm
zjykkG^Ye=4Q(Meu^*8QyJ-2%3W?83#8`4|GEdZlTOqJDl=7G1PCFIgO75;SdIG`uo
zf#*r)h+#8i=_~SUszEb5acS8-P@cZbAYbgs1IlIZwSnWo?aq+JxJ&Adm5=k)5K#B(
z9?tvNVO=#CKIiMcvGHo%htIwYiWvQhRi{NG_gQsp&$?fn-6PH7dS|27ugdWc6jT^z
zjxrD^|KL9kbGChFa}POTrvVsGOke>+Q5-L4yZ%?Nr-LrcGzfNiocDMmzDb*R#rjXT
zvj4{h5`)SDAB^^@37>8pH%g}5+lWU>W!WxF9*W_(9Do(MA7{x{h%ttp=RhqyKn#T0
z7WL=vUe8L$`x4I~I<V0!cPwRwNVszeGxFg3wuM*dvGUE$M+><c{V)^q3GT}*K>SBN
zZ+f9Md`&EQU87DWT3BlD9wig|X=SRM6d482A=RAQY1Mx}RDAE9VzbW<cT&l+HOr5q
zo~n!W^)E0U8X3{?Y{67#rsRv=Q4zp-ldmpP{fHYgJo;job^L?FpD^gAXsrkOsP>4K
zb#Oz+@PWnE#ud|+Bi7P`Cdz}<fFZ80m;Rlrq6og?b4P9|tO0H_PtA?+3s1fvXV|7s
zXONkOeAB5&Q4W%gP@||}c}sy~=zzXHujw^HyNQ}c&~BvoL0`nVq(Ty^27cFZyYuoW
z@j&L|2J{A9?D2iVm!_beE775FTk?W+VH$;4U`n+cGEnu)VV+Ls^m<|_8ov)4zK%k0
z<s0*g=K^T}cV>%tdh%v?s$lQ#?Khtcm2*}40-2^Up)NI*&_O?EYCuO=`A?-+LK$)&
z>B}etcjW1eEVg}YLK?>TytcU5+9KwEP}EY~r+e461q#c_Bt+Wheb9GD`ZyzNmhh9D
z$NPkIWo1M-q(nt5EDNQUSGkl&Bn|tv6Ic@X4Xmi~*4@D&Yui8OC~xr|pwNc|RM9)T
z>2f_}ptWLTO2ATmn~fKPjPW3fX4F<_ye-JMF+;9Hgt<Jn6JWbt0NJrN-l-|R#B*0C
zP4j3TMN?-?*AS{cO|=8IJq(r%)Zb6miq9SNbX>Og@>ZxsV{48T;ot^<91&6_<mALu
zs5iB^Xpe<wNX|ZO6@SL}n)CHr84@~bi}C#a<1E{kS8_jKZVsHrgi>))u3OyzE|Gix
zu9bev%O1>UJM*E}yiD-G()tBLKRhwIN=aPA$f%KzZpj?ZiSSUCdVH0qq;N&qRETBF
zO+p{(Ky7}@5o==C;gA2iHNQ!zv7oD<B|vlr9~-@yrclNCw)LD{wg2V?dZ=^#hz%NX
zhVsYArXa$yS*KuDH+4G7=uzoJ<iRA9n?CCgnb*j;P(#K)w(-`Kdn-*rlX6oTmP=ED
zwGDR*fgEG16iBn>nN|MR-s`6Xx!O+*LeO22ZH?DH^S*$GerVU}`WZLR?!?2&W#C(p
zZ6A@H57#h!_YGHp*vd-w)3n85Muk1rA2zGMbR{Dv__>M3-K7nf*NTB4;J~S;>du*L
zJhdC#CA9w2cRopf_dXEH8{_87Z<6?NCUTFdD0SvfmrvePbU_BHp4?UsNl=*%q+Py^
z|5b_wdp1{6LYj}G^*)6oO*HcdAcB%q_aAssyPNaCV9(QYbK4vXKjoiM1zyD-FKdSJ
z%d?k>GxmOeRp|vd%5XZn?gob0(T9!wJRelkhx@8t=sQF!E|^-c)NesBAQeBduL0w<
z;JFuQ{VSfT{9Sp|^dfjMd%qCh)itZ$nvaMwfvm#xA*W!g@9}DU4R#Ypw(iyjc#8Ya
zZ1CkJNX)(7&_`*^-J+sn1i(5)K?`1JQ03R8d(uM7V);(=vwVd+xL><1A6WcXg_h+C
zag~LIg)54xL^W*Oe_k<eO;{OjTVLweNiTipJ<SKR9JA-WNk68HH*2Fqe<F{pz9~P}
zfA-BC`EP!U%LZ#u87}e-Mt_>*=tn1MxC75miR~*6cG{U3h@h-r2OAF>&Bsn`=QKIe
zn+6`TJfhHkg=I+a2>Oxau?)!7uhO@JQYDbfPl7n1WBlX+Ob|IibA6q|@UuD-%<H9l
z{!5nT5<5-wPGJ9sgN2juB4^y^#Tcdh!`>dgyt5w2axlH~#{dOJQpyJSA+8P|9f&Gh
zgixyn5Ub{}nl?j$it2+MD|@#q9Kgp?7V&5LCB^3yonJ4W{T$9yKFho<ik&6so$IPC
zVF16>&B=-Z7$>*Q(xlY_h9k${AkC-}sh;nyUI_LjThq&5YqG?Z$yPb2whmg&R>q=L
zB(G+zWMwLl#P5Rv+m)Zgxir}1hK&PCGXT~qwXgrSWOqcT*<v#siiF<n7X1u35{0$=
zee)=v!IB_X@nd~m_9&asXrj%)u?@qaszXtUjw|%27T4a40Qp8W&v-OKO&DeMYH4Au
z$!hkBVv)`%<8sT3<!W=AlI?F=(2-is{QYtjkau6So8a69asb&0{|7Q)DQn4zN4SpG
z0#AhHbmCrYSXfAT{^Gi%$H0T%KWHkvD`~(SVX5P@^K#l^gWu$XG^y0(SjLWqV;JEs
zEtZS%{vlSjt@M`L#=*j(CS=*J7*6^HB0uKm-!2iy{IHnS{G7{J^sDtv<zZcc!geMG
zM8og-BJ!Lz`o;DNV(Ljf`^$3x>_LKxl8D(U7tuH;czUh4!#xbf^c!n`9zLX9JVx=a
z`B6kMjcKLBGWxELVxvqmELi1k*91}HEp3tKTCVQ4#lk*&cdVu|7PC<VKf}7r5z{@R
z5}ck28+0-4{ri_Vc=@I2wem~mot>2U?o4g>KdmU_x9ef;TRnAcK~d0$i#3`=1+$r9
zzpDTf;E)Dh8rL=gg(Rfh?K9R2lDeJFDcj?TiM!=1!zncXn9#mGiOIeLZYKQ<?_?X@
zV7-5qvpGWjz#ki&V@GGf7KE-VLF$vX;$DOKmY34XR)dWN7|U6aE_yutOj;@rQ(38(
zTuWQ34^*ahx;%N|Kmjd!?4cK{V5#arJYPHMnc{t?$T)8#@LZ)#_OlqpjxSY?_^se~
zhVhNAFBxOl17-U|JGJe*PC2nl@WFrdzFJvR!;JGLV#r)N5Fu@<VcN-N+BT})Jt2Mh
zYra7NYfDc$HO2!X;8B|eTv6a%NY`#3AbKtKy!Q70z>dJuck=#0z!Jxb_tx7eyWY@w
z1WDFewuh>5H4JAph#<1yy{nxSx~@?T;qI(ST;e1exJSek6upH?Zh)azH+<gJ>RAS=
zijOFubN?rAfPq1D5xo_?`DMzW<Gvs+N(?-9RTZHDNWt`04^XT1S7{jQPKsLCPqa{Y
zFC#KW!?)KyE$)*MU`N);U_)FMF{qs>F}z|0V;J$h=gBd9KKt6H=>bCg9a@NWOOTc}
zuT5vA{8x=7S?%W?e-`5jDF5nKtVL10hqPS+)E>nfLsi|c`W4sR9L~q#5&mg>1X{rE
zl4zqgS4RBzFkC6SKcg<~lLFlrG_rKpPfY)kIJvn~b%BVc7I;TZK0k-d?uKFq{J>T-
zvVs}u`W{RXm1_q-8g@mCv3%A~Ti272kNnYH+V4eHMg35sSwmp@m{`WI9u0n)V?e1<
zL_7|4s##-H$mS&6qSk$}!-(XMvpP6>_l@IfCQ(okaH{6QgfxO#N7V62fYfhNMP`x!
zp=?4(Q;J}WUKhYFmu-0;uoege-wc&m4}`ogFWkl@j-$BO{`PI7F2C(9m)`2ZXi8jx
z&o4ycGNQI*N57AVHGX-ix79wycSn6nj)}70!fH5sxysF3KE8+|Nz}0?_N){W`K^51
zd-SuB_lIWBwb=j%RQCa~5jZ2}F83n``;YHVC)QJR?W+Ut{fz3&5i5<FNT1|336@EK
zt+8uXiB)q&qDqFSkCyS<*(ur6V&_&xZ0<Td&>v01i?92w_Ixkjf>|^h9aCT4ap)Wb
zjMJAYw^+>*6D9V_NAr+~`8Sj8>bR|6?`c*$L#+`HnmrLsR=n{$ia5+x{uyK+@<%BC
zlyv%u(px6iQ^8*`CywD@)mvVEx#8Wo7A-9r;Pcq0;kf#a1z$D-7%VmFgPfZXnjZ9$
z=srqq6+2gKPKl&;-v9QF9Y_j27~&d0j;BY%3jw%NnsM#SM;d?dtb^Y0Bi|_B6DDLA
zN<UzR;mKG88gS#hPS>8&QN*%udZA{aqK|Z2AILuCIe8#ZLP}j@gGQ7sH5!gXDe}>w
zvQ*xw3oAiBf|cIwxM%6=Af|{@<P~z0A<9Up*|Z`!lXJK82g&IaBa{f;0$;>m{cOHg
zYt&VEZ)0qofiRPrQZ~0a`iy|NNXnYGcZD0*F?PZzw)+St@Jtg)WC|CV2q_nFpw?`D
zkGs0f1xh%FS=rM~8?a@HCA86NSZ6QgZ35BwZ7UY@=jyy7Yt}r#5@7<`jzbmDj0}!?
zXW5Ygw;?<_#Se`33j*a#U;9N;pG33wYx6E5YO%ua>Oi6#mwf)bs@+OT5OBwu+tzD*
z&jChzr&n4mIJ->hUY|ew(OY|;^VjZ;6=xF`V~d`2fkenIGxF_p(6td^Sxe23nGYJo
zv2&~EshId|up)&Z+FIejvFrw1P4^leFQ_?oYgwS$+mk2JbyX-F`C45CpxfUqLIYeV
z_#Y@=uFI#8C=*oH8g+<$e03;kFt%3jyMAJ&y3xlzTEvkprkUP-l$d+zcCg1a$c>@#
zyBni5$~peRQ2(&ENZhULT1hg}!-MRsoHW6(pQ6baQ6KNcM-hQ|<Pq-b+=D+}N<P0I
z;(Y%6G3`1$?KwQ{+CJ~v*Lg9*4YHt_18SoKfqdu&sF)X<Kig&-AyE>lRhbvB4fXdF
zlADAHpyd6a*V-0`sCcC8pozwJ)8O@!pUr~Zu_uJgOquZHLcA+<Eo9v-^fNEY#1Qr|
z#tObuWoEc2d@u}DDAyN3&cb+H&wD^VZwT&kUq3g~!eAhw#l7FOJ?JY<fu~Dm@)6}m
z2t?+E=SK+SM+%WXJ7R(z7JffA{)Z9=Z09sT1{I1DJ!8-P!L)gHbs|pz3_O?<yF?L@
z@<*qpcL>p+gIUo&$(T^7k%U<CvFme+V+s5&SV{E_JQSVlrN{V0$5t2dCta|vY}pwN
zXSt4YLiAD-wE#{7ANc)!fW3D7vzmU;<AAs?yAr-qEQ|{sZyOWeTl*94GH>Ktvq;X#
z{VuN526G0$VE_x@HRuSA_4}(ltWDooVEb|Oe{-<2t8oh8&x6y3H{qoZ9}OR&`S}vq
zg_ecqW$H7RvimZBVCli6vZ`8+c5g2%=>ky-x*VQzF&>TJcUGuc2@R`EV=##1){h5|
z{D5MLx9YRqH~QYpEy8}GPUHMBWfz5;DtTjL<C)0y^qSOWWPUT<U~RGWtZO#)^zUB+
zjm`$Pfs4O`CsuNw(fgYaJrxk6T2w{6E;4I%_s7~LwlA~%ang{sFE(O4VZI&lwe9vs
zKK);a+?K**B$TMC)HSExJoi2!rUlOMrD7R<-VTod(@^8UMuorMTIMZ8suj!aTwEqd
zonHy1dN|6K$ez&1gaqnOIiBuncg)LKjNxI;`;Ug-s^<M*scJ{9A~V<u&bOA*6BQgS
z$KhKa_VjG_vBL*}4@pQK9Tx|q5&Us3n*4e{%kqYmE4kPiBHyp(T|!L3L%ILBAJkPA
z+w)bym4(KrHa}~pR$t!$;|t4gddUgOg+$+nu1Nk~@M(*T!uJ+)wvgId&ncn3wMWwL
z-@ku)y~CvKezbpaNUYD>-USREM{Hm%mj}HkSBn45b0)V`^DLsK)6dpVaT<r_JSa1u
zWxsjT&#BCFMn(XpryEE4l%gCUaq;xVa|-ybHq}&L7)*0_3s#Ob8l=EQvI$^hU;r2u
z1ESh-j+*gYhYC{BzPz9Pee$~+<tq_Kf`z{YdKgi}Ghxr}yZv!=hbgnI5NNRFPab^t
zGtafI;^&0Ow2Wfh15r=L%&UXT2Y3$X8BcQ|m(Mb{2qj50jbtj)k^lE^&JRSbr8S+O
ziBxz%R9moVx2U%>>s1}`sS6BUhUWc~0OTQrHFLx|><oq8uHx7dimTp}L1LbPr&ka`
zc5#ryMoRec<B`pOJjph{4eJ9@4uJq~Y(zaQ-QFQ1I&AUF54}AnZo<Zo$8CzSrlE{U
zfDddyQLk3JPn+O2*~d~Xl4P3<&Y3W}IuE!#{-wmxRi@nYwYipuT92>lc>-mtH%s90
zE@OW`UjH01VdPXvblW^>Ta%RP+%WL8G5&%f1{*E^WH;}qapNMXMOCdjTuSi9ChrRG
zc3;oXXeM|@*@T%lfSh0$F;bW!;U?0Qe*}S}Y_bX@W&9D@U^64)AEc>~N8dXPIJ<vh
z;UznEsG(*)v+36dmq8ylo~fk6d>FX?=Lq;-TwOQi5AiET=)iQO*adNdZ}y+C_-R%c
zLo3UxbiS=G6c%yTltLw<hOrR76bb!)UC2jS02Yv|%HTh(DgNl|oDRB>uo&A+pEL6D
z@j;sKct9vp>%eHrW*2%+BOSLEmv?{C6<htO|FRgPR?y?Gajaq-F=<DvkTp8*K}(R=
z`U#RB1W(m>x1ZtNA=&M*_u5jpj0L&vcy<V*a+sy|AOb``bwh*BM}%9%8LYDuA@UJ9
z2D_S@Wq>6j_IWlRM{5rKRlB*R5oEdfus19EhSer%iZ=bVvSbpSs|cju!?BIGECM!j
z@w=WU4o=CM-Em>iTW9^mtbEREWr#fuiwDf?L#Z1^_)A}h+I=-f4Cy*C)uUj{{C7Gr
zd(Ie1yoP3>@u|gCAL#}-U1HxuvOem(Z%fwGybI>7tOV@r6PW+S0+n>xn@_TmvTD95
zO%VvVcvA?`>XyZN8!`aBxv|vK(2dExjNe!RWMNC4EvH)xPRR}v-S^~AAU%Peq}*va
zx=fd$X4{FDr?pIr%0PQ2UGVwu`P<tsj5N#KklgJT(d1T&!JEHC?D1EYWC?i+sIo2v
z-r3sDetsRxS!-`V+2Si#%yd+juD{ldumbto47(7{M@Q%agXm*#uO59jmq0Vd2zzHC
zb<YDHY%F{`T4?>I%xKBeauYezw#92TroY!+CZFcNX+)pI7HwRb7w6;STTC*S*}bi;
zI@YjH?CBRnzOh!U8>^DHAOpu!61<@yY=B4no`l`FGj=46!`$NxPrwGRc927nK_&v6
z&^+Q5)A)9lbKItO`s`20fkkIJ)LM{}HD<}B4UMYvR@zLTP(0|tnq26FQiPIUMpAf7
zKZ%Z9)T>7^uLGlt4^Ds|ci?=kJfc7$qNZMn@WqG$S`I7P&5m06PWnH8*|>-|CIax9
zIx5B$5S-Pv!oL$1Hr5?Dw+Y$(6(ye!40}V&Umc(2<z2TNyK5$2S)$|Fy8LDvX!&Im
zh!*MsL5Zr{63xI6{u=jgxv;8lw{7y1*T)aRmLvJGHQEJu#jlOloN}rJXL<3Evq|<e
zp{!FPUmxl~?qaG%w_<|%<I?-CQ&yzhFd8>pS>GXUYHspuCS!PJepw;)v)BW%P_jmo
zDvE1IL4uv<RPW)I4z6)Kp}PZS)@Wi?5a*^)rL;{|1N>P2x`$!EZ@=u6)rx*r_dwEE
zC4gO6alQMLRUnt8)M!w%XOz@IXKdR*3`uc<n`Ns`&rMN+Ic0V%TpLb#R%0dfLyTr}
z{G~-rz?9v>pggma#-y4Fd_LIRlAr$0KMmpt*TykBV;d|@$2;OdY&}REL{cQ!dKrF2
zoCpE0GZBTb##8kgC``=ERs|`+@b7oycDZ;{J}Au&=iG@Q!$Zjc#JCGYHmSlmV9m?i
ztc6^31hkW)9JPx{xE}-RU&7cJ3&6w|s9;#tWjiD9_`3b=Ex+lHf7&cuyg4#XDsXnZ
zIhw3Y&-!JpJw!qh0=|)GYARN<BL1M4a<kLt4J}TQBVciUHGMm#*0gX((whM}_cZbU
zy~-AI!>R*vCNfpYUP^X!x&-OyjQ!WYa9!H0-qqt>9(tif!DX9VSnh%|*c|<iob#ZS
zg1Zr+dZDVH2FXk3?>zeAeJDs4o1O>K9J+pQ$>SanQR#6BC|g}v4%Qk;!T@|Yx3^G<
zGYwfQ>e@}*@Ukmp-HJ_+3j5UoKqi~dUrp_XO_b`+Ph?joBCTz}_alRIVJ2(LW~CTP
zhraGZ8A}ak#eXqC)j}1RN0i1s@GRzB&`#r{H6{7K$<?`DoaJC?`J1leOo?81a)DH~
z!QH{(VLwwQ#22(>Tc>%Q0h4(@hX4Nkz%UP|uDuSepRk%yKyS@CNYRLX!G#y4)oZM4
z$bX_bUP|r0(ZlxyqxA3WX^NFIl^EX$STB=MwU?9(rLulvJ}$Fk-I#xZ;V3u7eR?H*
zx0-lMuR(t16Nq~Kw6~fJd%&?kc|SiU;CCk?;*93p^o?q=A_I+s@BNGC?G7<GCM9R#
z{-@{H)=$#eI#tjaYo)yrL^csj<>S(8jFe}{<KNErUT$m_jtIQ3T)X@8#?&zmwJOML
z{%TYH5DH%f^&K4~9Qy3dCb#@nZ(~F-H!TCOztKV*2W{`ClKcsIOiU||eL$(Nxs3CU
z-Hvjr*Z8E@Kz5y<L*^T!H}z+l3viwoO&Vz263h$gH6<VR|9w^o8a!wi6nRw7u&$JB
zGflp;C*;3yDu~JS_pOZK1Hn;3fbV<PRf#j6-q5;&chF(`d{sCQ8)0JiQO|XGvkroN
zai)FtW0q9?Px60!?&EO|bfDj%Dgp#R%)vqmngMS-RA=Mdc^0m=;s2O#8H#$ki(^DQ
zPO94g&mSSr@agr$Rn6Z;6^(MoYW}wVOzqS44#Q8c%M>`eZHHWG_=2tMw{XHAVQa(*
z1RN$`UvTkjZ>E^<+N^E_@_xZX83h}#FeLB#yM(3XS2hxQy4g`YSR3;z#6G>X{njDL
z|9Zq}4iKgQPlCzubS1O4knVG_?8D2=p2BD4`r3%Z3lzZkZ)u5^y+=`A8vJiu*g5xq
z$j=zM5*asFF5h(+zP23(yr4>{AU<)3=<R`=_^;E>>zz;1i)N^PB%@LD!PF?LzDvAU
z*D)F&4mz=yX;hVm^RH*K`4YDOP-+6i1e!pab=^<O-@Du}^&$+DMl+L(P~7S-Utm-C
zLXJli&)S9E{T8oKJ7s{L4{X#82c4U9sd!ei0u?G*`{b73I$hUwOWc3X@_D+-5na!)
z!fu=m@j+@Q{+=!sb(U$uRtmN?Sgh_A^QqPa-Ay*#&3o8(;;VREljRuwTKiTO0k+Y%
zx{u@;ya7cMn@js^-5a@=yk8jBrBzhm0-=C{Mo`4-J|-u&+4!>JeUts1$P*R-@{wvg
zyD?b57ioUq%iOHFnng-IMbxje@4U(o)_{3i3S~$M6|&#dSrJp_^hsYX2S-ZJZB%$;
zlbWU(`FxMvQf5NjDb+K4?J6&>DkXF6m`h>CPF`^)sP|*5N3PmZ*Gr?pT6K{+7#>{U
z@7C8Kbg4E%^Kk*gdKN#@&DJWwQhJ<9uc;Mf8zrLpBHF;U96bVF%kwypx7e|8a=yiJ
zwSMx-qF>@F49CJu4+ZPo^T<a8^{OLN9d-)r*V3uhs0dZIxn|wm#k~s|ysG;67C+VA
z3e8VNRxofKEuNSC6c>o~bg=JsXRSX03E-dQ6ZedQL6tP*ulO;mvitP$swR=^{!G~!
zclV|^2HY7AcJFp#cP*=l{~ufwj@Y*t*-x;3+9hSurhoQD&98(#K68gQV8N4j764RZ
zHI`iz2*vgcTXfuRc5iK|Hg=z|pfo+UEJH#7U92$A|N64LMWpjQ>?q!F96cC%7T{N(
zzWpf1RKBKk6(`=Zi?v`HCUYp!fN5fBdp92Pq-gOoFg*W+^eoyi0{C%(v-QcQX@Ybo
zc6BcXj4RDnBn4ezf0~g;^0>(b-%T?o352BU2Xoem%?F$A3&zfrAt)4SSF=V_YT8uM
zvs&7yRF;!UjO+n6Hqwe2OL9iUflw2@VY^ARIS^(~_dV%qXJU=7bNt=EwPMXbY1*0v
zt+xt=RSR)(7t8Z?NNZd#<GDa8XT{d{-((Hz<RDV|kDYGOURG>FF;b{DQi@V=zs_}e
z*;7&Zvl0YCgn+kF7IQva+2CHRVp@7#u(&)kE%;N6v}jQobw0Yy*e@mdM4bS=4nCkP
zg02{xf+O_3yT?yd!R~n%Ok@3B3kWqE(=3LskEMW^xYEK=LV`oO7x;u&M_#1-8qU{h
zxZ?U@lNQCcO+!8|GH5kp<;sHhYv<Lkm^;=<=?*=O?J&lpkL`<L$5Bi`f8&WNQ9Q32
zc5|YJX43!AGYsv{XF~`liF6W>qQ`s+Rd|gk4#=o6Qd3!rF}}>YKXsL0%|^H@i~&-Y
z&gG3Et~PYPnn^!?Z;gqGNuBbVHi!LSnoJ2z%LSPwPSRcd@#x;r^&W+!$N~(3`U5NK
z(@{tIPZ#`wWp956bV2oKe0#X{!8ZAvX3jny9MwJ)+W<|ss7&>`L86@l!4zJtG`lcL
znRqbhVQHD%`lxzbH3>H8*O%kHhwk7TH|C7xV-MZh7foz-hQP43;GO+uN@wHGnw8(W
zup_?G>EK0BJ_=emdpXB494gihU)I>zU6P>C<07zlj=!l^`@IbI;TX$vQ1vakGe)Xx
z<UI5uyOn4os2#Z^ccS!WK1+~bgm}G4)G0mf64l)p<EJSG7Ln*ygXx*se1`Q%4)>E}
z#$XnyVqd_Do@x$ElV>?NR-p9qh$FJfRa_s}-Kv{zhq0o$JNu-5jL&GP_DXFoVdVa0
zH#3OGd?p<iM}O2a0;%z{#<EhB*gicplbR@fW^JUZTpT0^uj1+QyTT!cB7-;8D{hjQ
zCGQD>>Gn6fzm-edcQQLEE3taSw$x}U^G^rhtLgui$Mk7=EL1i|(=i}qAe?G)QbuRt
z^%wH;P&~$6<ja5k8L*!#fQ1R?Gn-(6ei--)_7=!s0jh!|uJV8$k&culQ^-353F|#1
z)(q{Kt-Qci9I+}4%BByzy3@ZMAL&~KBF$c_G}_F-NeJcq>dUMLBRpG|muh_|M|DIv
z`EJG(__BRg7U=KSgq;0@?D2w(#<NIT!JBa$l+cFrEwD)~;XltBHIw)2)vbh30TY2J
zHUV0elE5YN0+iNj`$l!E$NJgd<d!4u12w16n+agh4~OJue#}>xg(bM-QIQ&IExZQ7
z&crb-+DPtg^hk3beNM;bx@y1-;G7z+MKm7RR0<c0>Z%wtu`$pYD<2=Lf@1M`uyde~
z#|}rW*3B#2__81vMi6n4o+jJ(6H6J&PwvQHcG{QaLytJnt6}PP_hO<l(z~N(YrqQx
zHf&cP2T1mS(L&*2&pXXe&A;cF)%AcJU%}QN*)(F*jCr`nzeIAC*iy~5PwKgN_jPoL
zo?fp<Pzl~TWVciWbbY>;R3P<;49_8&5dYulO}IAJL&1m+=-r8%IUOn6Q~bMgE(Olc
zyHT>Q);ZZtT7x}=DuNoWU}4x)Zr@DD%Hy=5Lsrc}uEc5V34|gMxsRUsiF2#60Xg6t
z!%Q#?l~1hRY2_8<7m1<IE6=Q{L9z%Simfg5_wiiQxI4<|07L8MvK0`M1kmz_^K7%j
z&iNBa#QAf&Y=?cFn7ZXUEjp_I(iQ)TtKsgbqhdBElPxpH)~RW!t{GsGv{9jtL6@|j
zJJHt#3u2vdfz2mrk%cUTRXPaY1~RcNcBshv*B+-_#!UZJPRq^2UBbO3QF5n<415$N
zJ3ig^g4GOkqCYwPSkq~cvpQV%yia;4Cqy{2Fsbg>$&=E1>Y)A@fq9`0RjmlR5ZxYF
zqD@@R^BQ=g=!DI8{^WsG7`%bVwX8j`)b;b`b+3_iLH6wkTQt;((d1=)UI_NN#aJn+
z$K{z+c(lixADJxnHDb+pWoMeR*ym^FZ+#G?X<^OeRFQMEma-M{x8%{^Fx0pMkIOO+
zlJPy%;!_HX6(u*hU4HM-F_Kz_8^KEOYebf&zijpNFV`tmmnJ1Yz)i({0pLXIU0bSq
zHB}+tq<|u3Y6s%029a$6oK~)8ik6b(;N(k{nA<(H@L)A3mo!p=pOGR_LJ*G{>cbZR
zBxkL<K6bc{Bmmvn>CU!i4T5zb_FH2)V1xnb4jo^%2RMc}?zeDNbIAHiTMghOESS+k
zEUDXMT*+ey8YmSC;PeUe88ZhryhdDyXfhkidwC{0c*}57%`}Bvbg<wsO~|{qc4zbZ
ztGf16siQ^wR)ynCJ8P*dX_>D4xs7UVDA|sT#wWfo;#m~2udjUNxz8Y$CqsTkK&HT9
z??j!4%1vZqwvXPUm8Rys!(!pI`@KcCrIUG!EC3L5gf&lgwWi89)uyWs0{^>oPIPV}
z%65TJM9~4a6Ay8JXMQPfV@TCN%Bd~R`5CpBJ+3ru^3i)^)GGp`_ypaNbzZ{IaPHJb
zlO}XyV~&mP1QE;-FqNLUrYI@zbxH2|P9kMy?&bT2(aeW#^D7bV`PeVqa6jnHZ+n}9
z&Fk^LSiZc`(A`Cua09|sXqRNF&tNTT8fzDQ44@(hli&a;U!$-6DTLnda3iC0G)H?%
zTmU>82op6ttI}|d&aHU-j-7asemAbwML0=x68JtJX>jy^g_EYGp9?-5o0{-EIJel$
zfc;!+A+tQQXDcBkV?vDBhpk5SG{}p~T%hWJhzCov9Zy1)3kgI4x2v-wY=sN=3xM4Z
zh_iaG=v+jBXJ3}$ls^)gh<!L7LJNkcGXarf>Dk#m(^`1ZhM6gnoI-s1Y+-Zr-4*3|
zK`O3o^hQFf2|HGJ7R<=;mKc1*V%bjYd(B?j+PuyW%J*TtJ|(-HjYd!%B7U2^V9(3_
zxy}BHxT^(>Lo@3zs%WJTbZD-M2z2^S{5u^8$PC-ocNK~J&Ze7b+GvNirGV;dHPb%r
zNt{(>2o#{McYEcQb4|_$k>_z(SJzbqM?O|QE9{Ky&%po0I^iF`wiiHEqe&FIrWE_N
zaG9US<Ls?3L=k(Op~f>drKH8R_yasIID?|}q^0bxfnw|x7vfr=Cmr|_U>v%}Kf#z0
z02W-QV^ICu0b^38It+a;Dee1-C_6BgsM?VcCcwi;o;3LNdp}|VO)qGXFI#X?8H>@H
z-73XRJc*Xi<1m_>m<6LsO6<P<+6h&|LkOO`Jm84*;$;`8g1X_e>>}dwNMWQ2BD35Q
z0V%=s>Wtz)Uc0Y%+p~Myy?fiO`)it56b%lz)*8%ml(C!1&KPPD+UDQmrM~BJU-k44
zBN^Xjh>Hx2U+arq{*81!-eg3|4cFzD^J2j>s!b(1o5*n?7UyU=QYL{27vkeH3(AVY
zW$~nD`#JIhbH$Dme#KA0w~WsLey^<|D2eEx1%BAnPk_q`dEg%7YO|!W06w<p|J{ZA
zb&)$U-En19&@aEF>O}Ki@xtowFIv~2Q~7pmN2AhJW#(hmZh%mlFPJ}E)UK?e=3t@1
zjmBF+(NIX3KQFLs<fD^6w=x9+BCS!$$wxvb6IRAkw9Xdxn>J(|)j24B)oI9>+!}Z6
zq5eclDy#ibvn6|GiHc!0ll>9P>}V79PRx4i9!#c*eZ-Q$75G@7Lr8x_9tcvyW)<zC
z(EQ8r_;|G08CjPyo#gdnp2w*u@dCSmk9q0ux7LPBujJ))hn8mGw|Y{@+ZTm8!TC^&
zdQoDUYIi7cuPX$ii!8eJ!Kvm~Oca>+zFYYtIKCW%t{Acfy-5IK2!tX))k4;Gd%D&Y
z8j!U5rK&xr7TW{6VOv~)J-1>G_Yg8xw;4;^!z-qS6fKRNwV;2ERb2P2I8$0e8NVK(
z>NrFp7nsHXzP8#-2_8-7S3HEG3ol*RY=}tsF28F36nRG0m35dc5m3=oJW(r7I>q1F
zb=8L-cs6PRyDf-?vY+CGzHt_yovCirs;Mb?{sDEsrSpuH73S#8tX=Z4p(1M$PMRQB
z&~7)I-hCtVmJ#(txFnr7`=-j{lU}m8+-#Ts0@X_kL;=4||J4j!=cW()IBOqZFK@qC
z#cEnVOWXW9gQre{Au0CK(OQ8)Q=#yepPb5r{n&=1yZK8F^#1o`?E(n0lOk_@Oo&Y8
zuLJzRj*WBJ{kg>d7S$l5#+EWHm5;+-!-TPHtLuwN`|&thxxr3k!N(r~sh*wy7sS?=
zT3m9hb<g`Cz;zufTL37f%><y7umS>dT+Od|g0|7{GP)x$Je}@UjO(;3WHpps{`*|i
zYCF53{4iyYOM5&-44A87tKA3lk?A1m;tr*}W_{F_U4$>Siv=1$RHi$N|5<e1OKHB{
z4NLU`JJeGDh&+b&FlVUsL8sRuSIw6ME;g1vQMD-l`xJ7DIDA(p|Ek-j4LgyWr7ouk
zb`uQ<<i&g8;5BOGxKfX8dm9Nle(<joRm%hiR`GDhMVjwZvE;-raIj$zabJp#0HT`G
zZX6&v46qU1>&FMUQcCwe2h|J`TjdU5{FXO%+_6hqiz`6}U4%k@br6VI;6x65ay5Ba
zJhOIWn`o6os6V|OWEedB)_$AN*C?1jm5)wd?Vf>=2X_47(m;)QfV9Q};%*q$)eFAh
z$r?q}P%^}@Ua(SsQfjRF|Gbmbwmq>snQ;ibI5pjv<47COC@C$K<EE5PgIR)e6=dS8
zs#%<h-`m4xK(~xLkU7(^E<l(Goro2%7%FG-d1*IRdVLfB-VKm72ebj{>Df|oYgn?+
zHrz(^<pzHEgST~&J>F8aa#q^Td}PN|QTYr;pqcoxkA2$75$*7~*u$!?i|rw6lkC<Y
zB(_JJ6u`!#JyBIOWmgu#BeaE<P5^y3Zw46{f0};ZEl~M1&j<OeuelyPmU4f*$r5=q
z{>7x+M7yqdFC^hZ<9bdQ7hQ4*>#0^Gi8lL)O{IA_M-}WR{QCXZ03YD6k<0xhwSQBW
z_lT}`7YXg_eoMq|Ldske4&pFH12;u{;(1lwhA-<+o{*>5dy*-?g>>Gw{<^EtOhC3Z
z=vMWxKm|kZSGbNuo-l}yNTvb|m`sibGe0t&$OfeXVv{FVO(nkq;oq?|Jtkh821uDy
z9QdLcp+OA`_y0Ovr;?LCX+1uCaeH5g>tHcc-LL6O0#tjBkv8xA2Nt%cD+{$8mzlqv
zb;}mvMa>&1ME<UnD7x+q8}nBEjXBTaOFiEz2*oP~m^NvM$%s-U^f2%ex9g*sKFOE&
zRIgN*5Q6nwFw;1Ud%C)OU5O(Z;lhY&Yg7D2UYv`dtVp=faABodW8m>~yNbjB16O_5
z?t%g{H@z>`x&EJGWZ4E_(ag#WJ@Pfa8B!5>(ngZjbz~T$zg=@hWjNOLh>zd|p0pn8
zeR6fPf3u&D_0s8g&I@&cJAtz;yvA@CKL|QAN_5E5Jot&S?}!MeDwgQXwC%`qHmV3+
zb5UCI^7Pd1%{CquLl3=E9nNu@4jKnOXatC(zYjS_=Q{-!6}KoojolE|m?K6!5lXf)
z8?FUJ?^Q3X&br-{QjFqC=W?L4UwG&B&5rG=lO1(|bB-QEfez%=^-s9nS&3U|zzx)a
z4JtArh3E4hMs&UEnEKPmD8pmP;q?#Upml#SG@KL;7nyQuis{?8S6B7u)v@LR*)Duj
z<R9W5+U)5`b*|1}J-kqcv~miHgCyW?h11A;&Lh9D{U`;>pHIu~$76q?kSd);n}`E!
z_KC$eW8M527(@)r04tr;I}l8CZ{dx1yzZRT9rgm16ybJ|eelpq@e+HC-beo*R4|{r
z7y+f6@d*d@0~`boMF6^E)c`V7?RI-+M1(L|6ue54JzfVOM44*;vG55jOv&|@a@t8z
zN;L0)zXtVz8<YUTeotlsKdp&oSH7T4{kgj9hQ+Mg-$d@Wc|Kd+UF)tIQds#ZLO?qH
z^twb{+cR}QtrtDdw>|PCn~2)|@qX3zuxoyvuHA$LG#aG$#C}!s5li%ToAeWQHcQGI
z?wf5v(1EA493Ab3x+Fdx@NSpN-wlPI`@bjJQ#|pJM|wNp;;ilF-txw?#Sohgg@#U6
zen#8AyN^==5+>;IqMS}8EL7kTy<~GB?D{{4fe&Az3rGQ;c4EB1v9GDDjt)orHCYhx
z;&l%zdNTnNhx-PRso;kO-DxTOR~-39@&)22i8jlDr!&NP3QrOOz(+?+NAC&9#U~G^
zgYC!fZNa66e<^qz1az6}#j^E1s*C3=ngI)0^l+u!gh=yYu1fQt{<6vThG*N5h;=t5
z5cqxaGL6-qgjYn@HB~<Xn3V*kd6EfIh~*6nE7-gW2X&=MKF`e8537heJzb}Ah-u}2
zbY=f0C{Ie!T%Dl$UI$n`$N$R#tVawAj#&^q2Xx}<b5F6sTDZK4@L*HPxU~vv93Nw)
zB+*aij*hjY?3f@fzZw_p9tCBUURYxNy6JjXt@1eS<Q#;|bqY%o(}Z{yGf$M8-H5dN
zs_*kLk9EO%zYS8%l<Z?-P0a`DU%!O}Mm7!W))7DqEsL_Z3iN70IL-^|+)MEB+&H-F
z^hUgbE{^R@F??-@{aJaYLg5&ETVJNV0NU_DoWBUAcdgooiYHByDW6?7OGw~r*=G}l
zn%Crdes(W?l{Kox%d(1xN*<XB{=1aCtG{u??VJc9+nCRQJIHmYr=Sipnk`B-5sl1T
zZ*0ho0}LI#p%%S{5LMz_#-yQJEB4bZP&18!_U*I9ywM7}z8>z;pEiw1bvJw;NQk44
zz7c-u2dQ>esnB`&EM4(LB%fR*17iI4VBtf``9sFnt}rZODmCH(hhAK^n5^d<o)k4U
zGxh2cn&h)5G3X|B(q7KL+4dJzEX)1*OtCqM;<au%OAL5g%NJz5a1#c~sIRydXDUz3
z0Y~Sv#~%hPFBIHFUf>n3_?%|oCLcXv#QCVxPcoFi^)a2WZQ^(loH5i_FKAZP3}5c@
zDQ-0On%e;|5LZL)E}PNXyy`}Hzm@f|21umFjP@(gAe!Lktq;Dp9rg@!L<#~|6McjD
z72_iqJ{@iu%Kdt1FxIWYP}V;hcIo^>IMM>l2&zxjiM~VMfIM0Zm%43aygv433_mWj
zrlpcbE~sK#dQI#vU*-#rbXQ6YA<#vPG-Fs%&TD`ydaaMzKwnF7bp)S<oUZ-9s)z53
z8ion@T^%}F#Eq|)E5S_#FB|eE1D(MkZ=>2*i*WJ<-DwfLf-!=iQ&5dg2w=hlz*xoY
z>U4dz5>ZF`h~V-$Q(q~++@G!<d6G6j;KW&nMB`iR;_y5%;I@g*At>yau$-`$Tw-D=
zr?-UA{4Rtb_H19*$4HO}K01B1LQI+aq>_Rw({(iYClm@l<+l4DcIM{+LcR!@Z`&Q9
zOZ7~P0cZNNX0x7lH4P6eO32F$dY=j{jqZ8zH)x+vFZuZ-|F2DAgQHl<*$y_Q$`&+O
z+)w@4+=vYtJ-xP8lz(`g>+?PZr3j@)^y)yO@#uJ<l#_eucCL*jH<XcNy`Bx_+|rQ2
z%v23!=9vJ@<38L^alLFW{v74*ZBN*rHA)~o0%vElg?87c7FKI-clKZ4d|SI8&2jJC
z%4sqV(+!*oTXHsWvC6RqQPU2TvGhkP#yl@2n8ZYQ`Lr~$)Ry>9Qb#j=3ai+>9Dj%C
zd)Lv<JXwg-5ybC>%#}sBiDs$hIY=*o9%>(v%l7y3w!|?p^!{~1N@?Mvf}<fEp2G=w
z_3WD*AOLM#n|fPnhf$HIp^Ui3jF(O#oNU;m76aefsD;YoI_K(g5P9#{<l^JDn%eH7
zoc2F%-LTY+cXssZ<P!v1@A!Cgn73uSaTx)LLvS|&8h4;PMzYLJZ!~6s^qVL9DxetS
z=Sw7y$RY6)udB1F#s#u{IZbs`P~t5Ul?$oAfhn^L!Ta!clh-5n5w1fRU^%NvE44Jf
z$h?%4WeISy1Gcd?m7G)>m;Y4X%&@|@nn2e^-Zz1{?~^fm1~e(f7L8^Wy!b!GHyb{R
z*B5*KJgzPw)uib=At<rcUY=NbsmKFxrLE0x7?mz$eqfn_S9EcQ2GgB1i@b_of%-#W
zFf>k9j3%0b&T$NV!vC)c#jrZ+wA(R}_Kl5-jbp1^3@5SJO}DS39`wzG^HBDi{`E>6
z<CrJYCBO@)#~W@RX%b3GtiC6N&gQ#&>5Lg;G(umvDySBLygt!e<sVYk4#BC~b+^|*
z6lE<ED^b-pY6-4@$5Ow};4KuO@9U;lZ%lZynF<`m<a5_}E(;2L9l1L4vLe#oGA)-H
z(1|(jLgAtWZzrRfo!P=`XFVVJC1sJ-`(h$f1XBKE4ly{@c*^^RCTZx^ubs1kSS=n*
z{2iq?@kC32AGR=aeaeQZnhReHMgRxw#BS(X(fz5PvJ_So8g{rJ;{N+xA3;!jWcREN
z%_N~d#YH%Qc?cT+nH1tPJipyZUpoD$7}Eth9p9rp=Rh8YE#>m##+csBy5Z4z2PMhW
z5dNE;SJU<EbHBOd1CNsD;>t-U!@ra$-f1I8qc@U55wy)6?aSFflpTj`Fn-Av7m$Hp
zK`QnQ{^Zg$Rvj}~5HKPjnV(1jd3$>zs0}PFeU9z_fC@4wH3{$zd_=i3!Q^4!l?OYY
zOUX*u<G)g6^NYvnzLPi~3{xSdP?EqCXeK~2Nx*>53k_b23tq#jnnU(ZRl^cia@70y
zye5Q|DxdqbEW}wt(CR_t8nA+08{TR_vyj9WKt+IniL2iNk`4jY@SePsr&SO(_b&t(
z*>EW_5q{z~<0ewC^T@81695Oc_wYwqXKWg2k@@F`DLunIZlYXEBW=<Oa5BJi6Y<eV
zgH`k>{||(f(e~%~KE9o+S*g)0x4(#g8X>!a(?FZvtQ7tK7oI;)+ZR)>uBDnFcbii*
zMTR*=PN*WsPqW;{y|_s2ku1g6jWbQ6g#Zm_EeO#7>WU*b@cfgE<jD797CE`NC3;Cb
z-=mw!zv1Y-8GO7oZOU2W#1KNne}cXjL@t*5{|o>;1D3~KG4WO{p7+%*9l!3TTQShV
z3PPPuvwI3tQqIMcW7T%_ZLgy;#IJ@%|MQ%bmU?NOELzM_=#I#BZmMR~t$H?$RQ1!T
zV+uO<n*X02<II<sI|g6dk=@bXwJm<XvHUnipwl1@Qc-c?*bWLWd1wHzkGFvmG=d09
z5OK8*m}Cm9a`-+i&1O<OWF>WEB<mgZOmfAUoA4ng6=!4bdc;g0#aTAgFcEm>QE7_M
zc#-|4(-&^N^wT6rZmUPb!hF*)AFmby-WFd<r^+^4Jp<?HPvj~QmB6s?W8N&I`n-89
zJcs-NG<<M*bA@9uB0&#-DrrF~D^jPym(v$kwx9Iq$$UYO`EF%d@4da_e;~5P>Gxkg
zHi)~qFjCp;B76&fg0Y3d2w>K=!YX=j$EKV8&1D9f*(^ndDTTt53Ch7A6G5JE4mxv$
z9u}$@#-PLSvdRszTG}c<4BAR=+Eh<q&^ZS4if_Pcdvb29tQ}cHfT9R^c-E<{m<;hL
zBXGujg~>z1C&~bx1f9)265CjMhWVEB2w5$%qWd>8@S1{$BNboU88`#NPZQh2hFy0D
zYZK%%Z2v>Tzkn+5L!!l8(T|vQ(3;e8l<l-}HTHi(l&hTXr`ap|Li*D#_WxhWc!TNn
zFRd`Vj}Ibcp+bqsm)25E@^$p<Akoc}8V2+3rH^S1akhqNVzKxO)}>JT=A%va7GZWf
zh`CWyvFbB+SUY0dI_{~RZC#Z69;ZnE^}81>Hz5d~D5LPq9a6^&53RL_6AfPi$y7Su
zp8o#Cb1^V{Ke<@|*jB&Cz=GU~pfv-npuRsjW$}CCn&%35yEko^3wT(RwLn?>V>idl
zPt=%$PDB`cZyEJEA8enlYZs#5-nHM}N4{}XD0w^yjz2Il>l(LZ$9y}$#1gHgbv-q&
zhmZqZP9TMXsR+y_8XZR)EY9ctO(O3!$Eg5~`|XV#W`Z)%1ZPEb2;InA3z16eVwc!+
zeSxXNokj~BcNg+%fbGL`mX$K@`j0Qz9mr{u0WBMQf@VGIrapronr&zG{YDnxV~{&`
zMx?(A_?@s?DfUDga8Pz0Mr4c60SW8zxFaog`)Q1z=7c+x-G)b#*?r@+iD9GDl1?-p
z<G{@mft^kK8UYAvOxiB&M$yM9w80hW$AyOjB}k>C{|{AP8CK;Mb-QUP3F!tE=}zfZ
z0RidmMnbwvx<sTK6p(I^hD}H#-QA6J-?h&<-*@lj;qix$!MpZ)XN)=K7#IJy4%t8r
z<3`7rz6Rcsc+90_eA6)GryDSA=ED)CVzcVMhvQj085=&|xby$0y?HkSgB#+P&X#La
zfnzlxdlw)LUdJwIMiA%nb^wfOSn!e{FYl{?27#@Vq<kADfaW)FF^4GuH7lVjSve~y
z;I^1~%p7Rq+>*i1=|o1oY~vskCb0_EXAQbJXt{LQ-A4jz>ElRu21LuNKa^lFqPIB<
zace4(j4XFC^EcHhlWGzyn})sFnVIDC>J-@SpYn3PJ|~8MPTycq706IhV+ro%-nTAu
z|9q-U|2HT+!?h+LDnCs|M`&BQykUz^>ptMsi`e5W)noG$vM(SsAJPq`vj?ww`^$)9
zw==oWzQJO4mfB0Zxh8|)ZWWHOot^OkRB5RKZcv9r#>EjKKi_ehxy?KiCsY{1!dU66
zaRwcB&TokXv(F@nOhm-o;%e7p9XV5~QRjR!)+UF7$?nI!A-12jhbj8&7=5ZPy^FCX
z_sWJlUZP(elTY~@5|R1TVWD{5WYOX7?lrlsY)Et;y@d%G>iya2rWOe7d?&*?af$%M
z$m$MC8I#cyo3a1MeN<WesFg>oD`~jr388HzY3#gWNU4&=yiQm7P4fPJiwV#x|GCEi
z@e}>n7C~CavpOB-v+v_0Vv-1v!l63R>Xra^^F?gWa+hIb<a^)<0fgWH9qo&$SpR2x
zS%6CDd^NzQO;OqmZ}F)c)Mz_B5&GqF&j8|z@j{B$y%ST-@`r-vykZj7E(Jydx$dk8
zZsRrOV755^6Z%t)uWoPPv7*h<c`Pp0vH##DhQ^4D>wHn<1Szr81Rj3aAIvMr;^L`m
z0EZj~Ar7~WM`c_N8JReBTrG4Wwx}{&N9ejfVI(uYTpS%W>et=ZU@|+W+$rA70=rr0
zPw!~(UlV>LcYQj%%|USa!DJGsIu(5RP@uML<OL{FAB_Y>H#?*vfBoM=MuIwU1qpfI
zYqJC8X(mS{ifyB%5FoNKUv&oZ8tyrZr5(h8cA&Z5$sG@DN41@jsZHJjY!3iL$G5r#
zsC37-_*a4{t)WzZCzajd@kbK=Wjk#;r;OYfAiPg+2{qw}a%!kw8@X&zW%uE(;<@^%
zLNeCN1=a%Kg^_$R(e=t?1@7ODa#*=VGhV$Y(dk%+5#GXOnYO=GWi}r_oh+T?@I};P
zg^qG|4;-1!kx#9h*F_T#>rx2n_R{*Ot*HwHbfPVh1rJy|^O4nVI5WRgTzHQb4&oS_
z5=`D8Ul$l3Nn=zq237(iMRWac{2!B1UJk%`fACwax{5ptUk}OORhTIP3(+v{Q@~0Y
z2pUdn{Kn~N07$QyzGp68XAh)o!t6-?TVT-vP^Q5T&J9<WVVDb&NOt{;&>VtkEe>n=
zcsx1$Pbpufo<C22|6I4zVH-630Ju(5-lj8-^kYkwYtMZ{M3MY?bu@a*<t@BqEXuI;
z4~4F@@21QOaWCMo7V2uUl!x@Z*XXR^ZQp!V%jrV*pa=UVm4S=;hcW=UgDeA%IeWNB
zX~JNnuGlIWBI9?VsD(z|?vA!%xvx+2Vf#`UabME?xsuW6MoU#&cgm{e<WDWX0qeq#
zA3t75eNln0DX@<DUw{S)aLfe}T!bB0w)9_z0^nsRRj9mjF=Uc;J}@kd{tXRQ>(oEE
z)Tu=eW24ERCzYBBViL`*-{Z`~P|Yd86?GANr0jno&5+K*P?UF?c$AcRSomeIqM{%w
zN?{e0>VerSVEa%Rn~%kOMmbweWO5*7&Q=R6k%>mXW^;l!dbK0S5X^f4()i-<GLUvI
zB7~)u#>~u}SJCQojC`TV+?fX#MHNkY#z;2nfe;rW^eA~v?i06p4-;2&ZdmD_*QSse
zLv8E;wzZAIE@NN{EeUa;bq4!kTwDJXL5!de+axY`3`2)OFfa0!Pu`%LVL{rC1aGL*
zD9PY^IP4ou>B12cWlsrlm}$$4NW6Ze3CjL*w+^B&+T7D`smoeY$X9JfWQfy@YqhE>
z`}-~nGFXKSu`k%-f|N+^EXOLpDY6umQkvm18_3`WTbX%L)_)%s)R*|eAN&o+Cq2M5
z`Av*7O4rjz5Cf~zF%c}HW&W9AAX&@)9-{ga>6}}1>++5BAQ=T{_f?gAANT-2vDbFy
z21I6(&3L)yW~4!XjX?;iTs?~@EHG7O2SrLb(CG=^-2VV(j`)=#KOP(@00**a=l8}6
zJBTTJb`(&2Mg;JzN!R<Y>?DT&I4--p<T<^2BzOGs5hwI+gGgoPXy!>CAndy99c9jM
z8_8>NfBa4GPy(RrOn#*Bu7bjZdwRv>K?^m-X_10>cka>>=(WG$_fdw?h5}VN9$dfT
z;?jeddBh|5d>7MAD1e7wv*?gX>6VaENR2i*n!x3>na7(XFWuTIUnnz#`Klsma@V48
zsWEhoiMB?x3Z2da=2_c`j7uDL>{1PkEN4CchkSPCfP!vn*l<DQaQ@fW1uiQnSL%n~
zRb*!QXlkVZUug6&?yBT5ohwNj$Z)e81nfV?s$clUhk(#=yZ9j5A0S%$#`vMRh@`5o
zOx$!jn0!Q@Wg+~t9bOX9{4ribWp0#amh*zK+A?tXM&0r{y7MXmiHODz8Eiq^l@Z|!
zfvq>kni>;xJjj91b61u7^oAS8P`WG2-M-uK0Z_=3E8SxconyD^0d`H?wo_tuj$2n>
zSP#|%xZ|)ozTJ#A6gL%Jo2O~ebUw|{xT2Q5h*kzwB_ysViCoN)7@1o-_n&?RmcWA1
zw3g;L_L4wGQ;i&Eul=tM`zBuptHU~z$9<7RWn9g%G>qFTQqU>+#v?AH)4|7k?{12<
z-f@tiNNR8F<^&G{6V7j~m>+tSHf_C&V#Y^-91I)41qvyzaZ&$nfV?Ww`gCP<qawhk
z1@LED7hK_mjxyPOe>HamTL_*mOWL&k?f{k*Fwq|vE6<AiCrfBBzOW$nY~<AlQFk|O
z`zNyb@XvA0fkc_0iIL3(5v^?_n|&Nvu%PTzJVDA0Z%}k@4~AUO(Dn=q$L@lUh}D&x
zUC-{d(us^Ssi$RQpd=#@`XSRv+%U@k_LfPA=NB%wd2TW?A=j2=2)q%}nX`s0HmCqc
z;zI#kLD|O9JYSc_#J!tEjOCMoQ=s@T7bE<N+TLM`A~nmV;qRdc?i?CWZQw_?>DGbK
zf&hFNtQ}iXC??<`kh$;wtNygs`X4&w!O$L;vXR-v(V{~?vZLr6{Mbp*)7BAW!-lyg
z=P@lC-}TLa$Dzf#=v<!(X)^u{<Z(VcOA^<`2A9d<^ia&@vtQIY^@6Bnb#Q@+az{a3
z$xel<1c7u;)HW)t=kR`cgX%fAzv;O?wtTo0`CyLfcU=hxmoYYT0pNY=SzpJyXqrV(
z!p(XEkEolp&QHL&cmG8au98VqiK~Kkt?NYuyjwI<;@bA{(ZfB(T19{0^R?&knX1Zw
zu^uz%Re-_!7cQyN%4L7r&{`Xqe7L;QZ!N&<iv6e(o2m@p&fjfA6vIBjhAI7QR93t(
zFB_|@K1G&1i7M^@b?#wMm}x!BEMnM4B(OV1hA?@gd|aj968Pr?(dy2Ynaa7JGoNn|
zXsKiSWRfb6(dT--yKPR^Z!sKu4Y2vO=F#75a&J^sW%RkuL<^tg{mr*~XopJOm+XA=
zixISE-Blm&IJz`E?vQFa9TFpZJ&z#u-q6hiK7b0kcM9E`^E<B*G|}u>dfeXXUQ!&p
zazTn#Sc7*i$REDSnPWEm^oazhCk5#7sM&lPoN}Fy08_SNZRLnr60AF6!kiF=m9rVy
zV-KV!f?%_jS{D3M{Va~XvnMN`Hm~d#puMkO0bGoD^EZSI<j2D9njb1gQ9lH;fuycM
z@((xCy$JAHaTn2&L}~SFzD{p4Wbwzw$#&PM$q6X=sAap^0d$UPxqU(8-Q4e0<d5P9
z-o$LC@>-oIAHuj_psk}^r+?9qxUY-+Vh%gH4czu}47G8O<r52<sNvL;^&}Bb1L{)E
zR@~-%7GPCLT|&B@(ku-o+;vM0?RShNrXwx~n@G2*zQ=#ug%mc8Kx1m61>^4UJ!OI}
z_W>h-9T}0}R}ZU8h6nLe*@mXiCv|^OU}6>l?-eEGHJRc|E*9PJ)JGK=*s&p8U||g3
zm@&kU3i*snHUD<vKoT2~FXtzKF)ApgYmopguKmAxNh8iN<MAz?TQf_&?pL$gK}TuL
zWiLJMuK~9C=wCL-M%HPtpJ>RIYxP+?Uou<xJ(o;jhf3B!#_%Z?oBfdp4RC!JKhE=?
zKP@?F0mSr=th1wEYrco{nL;%CGhKkc7_L4q_%jjPjUSxw{=>$oD`-id6KQx#F*i}b
zs1I<N@U6p0Wts&mj^<eFunjsebrs_PXiGW|A}J^3Ozq5_Q;43?`gm6ZLR<8}se-WA
zv%}v6sed<l?b6x?d41nt-SD$@TWXT%-`(Y2sq{nZ?N@N`gek48;bA*qLb{%VzGmt#
zJrI(*qWsZkjG~73MthW3WYSYX&m0IACW7exO3ys&x031e-8HVGnsjZ)W&!XSMIEAg
zw?*pKCLZf$i?bkO5iPqO0ldi!a4PyUn=-Ng(X^V_i{ljL8#tL|q8Zk+rdE*$&2E1R
z7Le)xo%a=yTa#W-3`Cjg&L6IO=Wg6sI9dJd%Y=Y5y?<Bdz$$m@V|l4wtH5n_-2nhK
z&nk2X7((n#j_=MO*k0XxT!<zp$RF<eH<4zT#->ejSf9<O6d&1vBm_w4&Q5+=b=oL$
z;)C9oHXTzDeD|m$f);oR2$D)*D=0;=%_1Nv875za)$-uUH{ps5)}KJ$g32s%KEClp
z!kVCV^fY}_#PFflX);VPh<5r8AUFV8cgg$ycDZt(3QQE|L_EM&hs)EioqHlsi4jiw
zO-7Wv!&2fh5+K}ExNl_b`6X$~tffMda94b4<Nc}h;^Em9uy1ml>G~p}z{=UJfeMB#
zFALba!4QL$9$0E7YD*_hgnII$A_q=d%N0SVE+Jgm-FA9*2R!&JQxzN4|4#whWq~}7
zScS6R7ff>1#L_%3;Xn0!OLbG}iC#=3KpcNMkyu54po3h2-E1zAg>JS;V^9M@5sSs;
z?HACz4*I*gd|-otR!`K_QqVN~qYZO4@K(FZcX8`<6=rn+1V}zp-i=jtPr1X{sa%O0
zc-20%C-0|JoFD?^^aDL}RUUS>8F(KlMH;cVed-c~@+f6lpQITEq_-a=lv}P@kqP`M
z8-x=&rC)3OR#XON9$({8za<r<Jj;5A`+}u9d8xuU<XYA+#BN={;O|fRF6qV9sIkMd
zJwl{`acQ0*CF74(fD|jo(Lj@o;*uZ+H;ejiGymo_8cJPUHjB{x+1Sfw>97Bkt9aXq
zu8*<PYKQ>%A}R-<<r}$EMIEiM5LvK4Ki_0Z5tIY}hIR|96vkPbv+HGeWyNa_iMJ0V
z7-uz+9<Q^NC-3mRpa%rDpoC?BC>+n1dD!=txI)()BpiCbyg(HM)*~Dp8i&Y_zwcew
z2wS~o+%nR-^;NI<xLH(Qd6;upYXpli=+^5GhD}3!E;<AN8Exgf4>0F-Dj{!IsR3g$
zZfFvP6sU0cn|zYCNGr4RJ!a(u76Vurr5)o&+JT)x<=>O+jEVBP{KX8~8e;A2?CP5D
zX<|cbzWoS<E$$Qkk&Y)Z_*N<3zxuQ!J(=52ZFhFsLS^A*MN~|$btbW<1z3EJY2|6}
zw$Lb1i2!Dc_@w==$QFRlqTz0T9$-m)nWBmhHlrD+TGpstR?gg%P*wf@A-OXCSKz+L
zLtspfVbHMGO4PRx<$n+w3*7w|8!VY07FzF#@HDx+Tkan8K!fjdQA9ECvvm)%V{!*B
z(D=W<KFOF8wBB6vy<ZR^iQ$L42lboU6<<{9+z_^|T!(zi!NWf5#ah%oWmf4VLLCis
zZL*UDU^!gZ6YC_WAnq~)R)Qo)@=0&+{cC`X*_lk(3&tf8by2FH&aNU>(8{>Thr}JW
z?Cxn#lANB6DI}Uax5KG*;n%R>vdVmCkhb?9j5w<XXfveuA6?Dlqn)ku<N9>v#hWV3
z4yG%hlVkUr0iex4?EetS_z$XB%K_*UWT^{-sij><-X~Rj3&YeUg??zzI%E3JB#?SM
z$}vfjfy|Iq7uhS{mKt(fsXj0I=hM~GO>`*d0@TADD*8<TVzVDUAf=WK@WEAj@I9^J
zjb1A`jHq9W29{r*I55{Q55S0j5&%iqXfrrOJ_15!k9PwD4hugm?=<lvm5rbAu@zif
z0FE8iPui0X#s#;NjX=J;MN4o4KcR7z9SVelZDn%6yF|=s%Qg&B;2r(<?E0Xha|a1*
z-07X;P9GRI`v07OcX{9Efqac0i09z%&n+iWu~PzRW8gkkV*RcS?qDJ(=A`=rI(O`w
z&Ib1*`)wl59Fx|i7+~%pHYk__1Njshzd{nEY@&Y|p@T+n(qVDu%Xu>~0kscICs`oR
zGou3779tJ(H|dD^G5zJkBR-sh{FmmLb9W()Iu~xpaxkf__+xV2AH^PjRaJY8tU}r6
z)17O^?vMKb(s(##yj&A`yxbaWpQyrs3L;T8rNNc!{lyFZxsD0}kdDX$G^7GleVKP&
zl%+b2@Om+6Q%m!J&~v8$X1Q<qT;rLJzu!_sN($u8hf<INsq%$Cp<6DZc`0szoyc`M
zF}0|u@29@l?#%vLRKV#&gfw}cF&#F!KesynkOyR`6MY>K!D>HXf5#@h3owNEZ&CH9
zI5>Kzqg1{C5aS9M4d>F%6`4Koemb~O-3DCrhKU-F&Eynrk=MSzC_(R|TVRN*9Ji~l
zy7JcuM0YHIQ3%lbaRchoQFSQH(7YPFfE0>1pW*hoe4ACoG1>MGoe~3MQg}hAuO$7i
zChwni+|Sbo3)SvRxKYv{hYR<&w?*mj`jT?Z5w8`0cGE!FKC-3nZoSCPOY2sPkKkT+
z{jdnMOtoVb&7w@(Y+;*QtLb%1kq;3}Uwc(1=>;NwfWkd6F$4sy%e5MB8Fn{A6FtK<
z*uq8rux_`xusf+FyR7~kDish&aHEg07~c!iK9@_0Q_J4G(B{cnYxN~}oo|3UqTK6Q
za20}AYayvP<<RpwC!;C51WF>_u%iHB4b+J%0C-rN((M_!?qIk7^t^P=iE($2R8f0P
z=A*pmaG?Wq<CCR~bk(WnL{p1hD|gYRzA99d8$}wN9R|izW2@IcP?<b<BDG`7?_*Ps
zZ}n$`!LW}i{@q^BFrz)=SBJCOr825zSJEDP#)hy9@mB(-8lW57;UolOSo3R^bHM)#
z5lQBMulsF6q_nm;<pW|^&<HStNEfK8jy<>u7Pi>BWs6rMvFirL-@q~Y<uj{KCKI3g
zS8k+UDx%lngrUju*hRI2QQj#^g3IlsN%yZG)di@c<A)p`!G_`(P|1>i91{<SvT0=~
zrRbPGnUpK|`~1VnkrD?BG;p3<<h<tI#s{-hd2#HZrjA#QQnn+2-u4U`&5@`6%dA=f
zsD|4{oe;qKQvBJW1~xXhV3?Um1A0xWW3m@YZLl(3|BfLX^n%hl2+IzT7*32aq=Te6
z3u;&Y^Tydh4+G+WpDzXm0P1_y9qm~GP|Y>#+-Df#@~KMt`Y@mOw+Z%#IwE>rmRNe8
zjaYhR@O7RqC4)ldhK-}>IySzp0Mk5RR}G>xu+DSCtJh%(g~v)*+r+o>+b<@0D|WfA
z(d9Xjv@^-82cu1nG@ZW#52Ht@$??(?V*ejKeA~-ML`@%O1d;gdNTrntXt%^+IIR9R
z-ncpD=kcIe|Es7vv0X)ZXVR?#W43uo-p^U-Yp2mpLbUi*1Sxue$YL?+O?hGA0S}_5
z;hx9X`1of~0%`ZTZ{|c+TD@bIl(e8w%hy2Y+wOZ+KCi}5f$T@^B62ZpoXpA%&)3E_
z@C-7W$j>OSb+b4<9SZ(xk1iH{RUX~%qtBfrFr&MD%5rVFWNyguN~2v*>zyN3>9253
zj;AX45ez}EdD%YWJL??_qDLFs-!hZJp3BI_2Ca4GmIcru02&Ks|A6p@&6O>kPy@Mr
zyvmg+Zxslp6sjUzd#1c8(#-5=I3BpEH}Gvxk|~qdz+*r2=pMiv1ap)v#IExx&K&r#
z&5$)`ZR$|afFkr)8nhWK%uZa*8tZrb=e85D#s~aYfdO?6*f?Pf)>w031n#w9RXYov
zdmmnWFvj;U*+omml3NB~QpHE98wWa{{H2rhpW?9Iv=9J<Z$c5zR}UNJc1CLxdZ5&k
z&x!?P6cqpA$d4|I&+8q*Uc4knJUj6uS<(%P%<-owoHg^jXR59$t}~&)my3n+3F7g%
zvVv*LI?D0)j9LohkzXpQ9zxdnNTQ^2EnI#H@?(>eX87-H@&_ud44wwR3H{B#2@mF5
z@19xr8IDo<XE0mPM=;hHAXxBTX#tBol#z<hd!B98EyHN*z$ZD(p3g>>KrI_AC$)Q%
zGqm@AXk}vlM}A$+1Pma-=Di{@Qm+XKe5Um4!R=&}u9IC-nF_Fz%G|c;Y=?b+<SvLy
zas7t#1|<YWOSPYfXQT>mibbV<+w`Chpxs9N?8Z{4_Mf<`nbV_!iWc-=iz>O6aSbJ3
zE`Q}r7A>A<TPLB^e(b(6kQmAgOHXdQpDGEWd(zN@k@^zv%1h6y=z#R<`ji#6Ar8#p
z+HxbffT$J_B9l#^+HP%C+<69pZI-iUT-Cx9DhXv+_-{%tKYfJfR(d)TJwxDjtdn>D
z>2cIt#GUlP^fLA8IDHC;yF3Y?MNX-*td6jMu<P8wjeTeKo_h@f@jb0;O}lPEk?EEx
z?2!G9BW6?qe7&FJp=H<9aIZOh(G<x*9vNbj%NIV(2^uHcTb1K9tGxJR`xHPE{`P!2
zqc$Yg_Hi>%0sbPeD1-hCAJFLCXXAl$R@qp$TWFWjOYJ4#Oa#miB7v^!*~LZ5)a9#e
z)YHcEOHz6xZ>(Kw&wib~TQJ1~?(T=vqyzx^tpERql3<i`XEp^U;L>;|FXGnT<h$Rm
z)8co169*XG$eb%2Y{diYz?`P3)xw+HH-lNb*Uehp@^xZk#mO$C83(nb5Z#olo{45#
zU^cTGTXj7DOp!a3Y@a`BJA3iW#HfKvB!_Wo@<-_}4}4c=V<Dhv6ms4k>@Kog_FTpu
zis=f%6ZKnM#0*|jGn3O38HzSmpKFfp?(Pb+K?dM31KYrTA-?R+{9KZC$&f%1&E?@O
z5%hWP>@t4R&Y%9>ts?tr64J2ti05BSadK8PIek&bo?~%k@146k43ke;_KdQ&$DV-(
z=;yZtO~>OmluuhdXRj7J=^jFPIEMhv9|2AdIDZ4hSH}d*EYsQjkw?=_3tFQXZI-rV
zOi_sYe2v4`HwYFv@$^Y>ev_Ywja9-qUQr9o6_5-D9VE4GBxvRqTBR8h<(ly0hJ70d
zWaPZy+ykC5jhf$3xLPX@^3Iz{If<KMbsSOM=OObg-U73;vt-`6Ib=O*@xXoe--J#I
zG$A<+c(vbUag0xtwVjQ>1)3YjmiTm)c)K(D`#lrt6A}rCy2XFqDSR^BtzY<{M*I22
z3HJFuYJ&0kCqg2z3s6*nmI9Va;rz%+WtSz7AdtcR8hw$?>w#bWpm~ROW2%edx?g#r
zl|JS1Gi=+q<p=Sqo%BggtqT%fBKp@C0c+RpNQp5*ULroA00oZ0l-hMVC`!20Xz+Z{
zOk{L4Zq_@7074(3D!XOG^OK#Qg<K`v@ZZ5~q8<R$^qiK$0IC5!x#4mL+oX<CoNxYc
z`^FWK8qZZ!ZlmwjK;+Nz!<TqJS8KW^sT}B^0Bb%wI~i?lQc#=z`SZtcVSH=0z>bx!
z!`e#R+L{sEZ1VW0nW!dmf~v?ypV`a7?O)-^`IP>n-Ag@zqha&qR~+lt0s^uOlP18D
zMRTuFDvc*$M_e*lXU`K@(Hcu$!Ef1cA8DqM?JH;L!^)<APq_rh)&kjl2L3o9#Yn}|
zHS<eD)4(M*CwwWrOZVi~GZQsh#6<nx@Hu8;={9Y7eP8286xfar7-6pBwx0$C?Sc8n
zwm@bFbDa)C-P{k;$xs>)@1aAP1E&=+iy@SE+nP2rO*oMci{=l5OIbh^YWn(B4C$~C
z_m#o=*PBGu*Je=>lgW;Wpj%+M+ejp@0xA9O?~g$6zi`H9;9p8h_s~{#C-4Ix<?yQb
zb2u7q;#UK<P~dL_iyydQS^qejjgzo#birQk`FWe~E1<oG_|+g56AoP#(%1Y0%cUD(
z(&aMhzs|ay++S=a1=Aj!J$+zQ$44d&zPU)G%+1YN%1WEMk~cTaK8sU-7SFE4u_&}3
zVgc7u+z9iWGK~8IYns{7WKyRAFGOa5Orh8;-J=Q{G#G-Ht8iD|EI`$4D5EhF>;2Q^
z?AzLajY@^XWLc+V#%DQ65KkMyY;R;pP=@3~k&q0e6j|G@UWyatq8`X~%qq>LnXxos
zXI@PsxaFO?ds*E=m)$9t67|J2_of~?>3nftvT%FwpNrN!)evf)b)%r9=mFCMq<BuG
zJG9{7SBlzDVw#vj0@_*7AC0*BJc+ZrSj}U$K53>LusM4{n2}6YcJr;L65o2jglC1=
zsDj9-BB`P`s%K4;M!~Gg)@R=dT)PtgciPF{%E0tM5H>+5`}pk-_fgI3J~lZ!GYnuR
zl~qzQs7d}4eW%-phJtst@x$b05XgoBQxl4)dI>kCmedtdohg+@?;Te#c2wXQRC33k
z>w}f^a=S;A0?c~QMF370%G2)htqE70hFcFhmOmGAPx3<b_dylS*eVokUDk->?98})
z3A~}*R~#4{#MrQ;FN-v{k8Zv6#>+JP!J4O=-ZOWV1O&D^$OYl!!<=Tm$o1}+FW<EK
zIkLTOa)sy4DBWa9sT6e!C#-NUyURnVTdbxa>+v(J59m>G9k#st(|S{9VNn3Rbh+mQ
zatlODvT`ctsER;VEV<f6fX!|+{ZQ%)7Jpx+3ozo*L^d7>Eib?`HT-YxOP^~k_Bt!Q
z#pGMk>ttZPRlN5e_<_X!u57^q@Z9`e9r|AZ&CzBFdRm_IB^rYwuQn3&A)U<*$noYw
zAQ*Ww*=6T~reyfRNMv#P5pLH?5FjBy7Y*z_Roy#8D#)$8YFy%~wEBBpWvxE7Jl}@>
z^=Gw%WMv@%yLR-M;qYP{-;zkBfpk9@D8TxS<BM#*TAl^}h6els)GO@&ueJW$SJ(@y
z{v@XCE)6|ihp8}NI%kk&Qa-Np-z{AFnh2KZb~W)#CqA&qFAy)KK#$VJ*pb_rdW>Ki
z9IEx$sE`vw`~W%wBMyK4mgDdf=R_2RvEZIXstWMRf%cAzF&rlRtn<Ky_)Y#=j~>Ac
z<dEm(TuS(x9pC=cP<40A&&_F%8aMUBeAZ3vSQ-4a=4*Or8-DioH^8{8F9b0)qJ9ko
z>0hZLYS+zw&aJq#|9hr{C<wo+?&jdU_RdWZ7g9pDE_vi<xZdwT)?|w+9D~J@2)6wK
zdYFMk-nM`vD&%-R0%hcHMQTOmV&)X-LW3o_-nBfI+Qe<d&}Z1zi`R~;tHHghn3sJH
z;j1Ct&ak+i_R8nj#K_^Ya74(7t8qyw$ZDlpAvdZNzbg^zw1HN;RNIjR)pFoJo^<!W
z5AQGnI6=YmL@=gv91jufgH%9x=LELi4BuMZs!h8cmf;7pre{9z$<NtTnAr#_spR1g
zS0!b<qxq3oQBj=YN<W^RA#BrR%P6VTCi2+t$me(=2=N1&#+?N>J>x<bNMXREwYj+&
zMDFN|&ki&&m_aUdk^`+g%3taS-M{S_0sy+mzIPG@&mi4%HQV#a1;|qXhqvXQ`57G<
zLZ16+RpabK7#H*-tVa((U<pWHnY9g>vUpwp#kOs6V5IP&`|P)bB^B!cTqy3gGkrg!
zBLv#_ro73n2m8<HnNvtg%PU`z<O>|$D&nDr(-=89;S#b8eG3=77SV6h7kT*A5wK^1
z20tujIuRh7f;$Fp1<#F*NKv=s@(fQ3dH59Ob{BeQ#>%G}@ZybzI}(->9(6dpiDqyP
zP<#{wnAra|T!Tg3bo~tb&7ogIC5_NnG{yh5&VFw3ggf(Qekv-HP?BfCP2z9xp^&Qk
z%~%s*S=`M}{h-y$hU3R7*R2zWG}DYWUuX66g_8aXX6+Vo%8E3EPyVL3rHG;R!EO8R
zO$gvQq1ef6=mKV;(DwvpynaHQWYpBL)a^h+la~7P=NBC7_L1qoPjHqJn7V4$B|hx!
z7g;?P9Db-{(#(J9ymxu;b?0!NZR^}@r#^AJ?Via?pg?7q0%b^Bm?3TlKAH3NZnQbE
zbQC^kZ@1Xr*79^3+vJ{*y~tzX+1uNzFoXuTD0^(f2l~sW^88q;VJFQKk^d?xiFx^F
z9Agqfm)z!^QZlB(^F4rDyUw<GTiLVnVKuLoz<RQnc<S_3AUw3g<JC+_<UyitC;h{9
zdX=beR}gf?rMkLSTnWFiCESLUvw3|~s($OR@ub9gsiS%)V&xJgdyhdU7_CeuWZ1fG
z%aMB76}rW_e7<c@%s`@$*6(&NWXEgN&nXac*Uu1cV=n$eE@|^PPn^8OIyety=$6UP
zZ?E(h?q17@Lj9)=)9o<HWtYa4OV3zBv>Ymt&m93?>sv@T7&pLwdzoY3@7p$0qtIP-
zv_V@;aQCy&o@Z?GoW?XN42p3Kbj6mq;jA^n4SO%EGW0b=9NTBd$tX@DI@j`q@CQk5
zR0>3z>anJDPW6a_qhHkuMnp#snPf?aN_rj(@eFn%T0<IX#XZE{?CQ(QM{yFpnYGeX
zGfnWuARuh<YjMsc8X>tgCgp4hrsYR!yWZnK*d??*QfW;Q^1P55#xr$TF{D)Qr_Ir}
zAJ$JhrJ0LqPbqGgJ}9d{E~E4%i69&dDP&MwS0M=7Hlf5DPK&mCI8}G(ia!b}bO(oZ
ze5>;WS?9@(?y)<jo8W5<iJ-zhzD2i-rc1Z=y0R4uV0QRi&=&I`U5GdtTcYN2gkPs&
zy%G-S_%vM#sjo>K=?4Md25LMEDiM*-0e6DyK><&Y@4)S6a}wfrejC8oA&Lpn@jS=3
zuoVt;qcKE5{Ysf<mxT;5)3Ne7c~f$Y(OSBtdj|(e8IBaxFwVOf#m;=}yMQLO8X!3>
zMA_FPU<r<EE4)!FX>_cYhK4Ex6MBlq2`5=vTc@5OF};EsWHmJj-j{knHZ&)D(%?Ar
zb1*1Wh+@Pj+g3Z)u#128uHLI9Kj<1oRh-t;*5>5obo`{|8PeotVi6oTdgRkPnzXMo
zx4UK6fEo-Y>Fi&q>s(EI6rk?K<&qD~E9{+RGHPuPi&?ybYuak&rM4wGe#vRwL_uzw
zh^DKY#efUgl!tYs!?!a-rB47U#r)3<G4YwJUdwl)qcjU*7urroZqEw)KYi5R2|r{B
z3<rH5xh^OLjmmD;FFRnGy)I%?+;rg_t3MUC$o&l4Ipnt-_ZM7`n?wf#7(ZuHx{rDJ
z=}-9ZCrdY8az~!Mcw!y8S1)oos;6NDm#2`4S~u4QP0h5YQ07WI*r{&mr|lIaj+Sv)
z5hhHVuC{z;O`~I=9}ig4%yc>#L1NuuWNcOS_9A(T4u)n<QTsaD=xxcanwpxuOuvQw
zQz|PfBmG{1b&ql^aP=&<x*UWr{?N&745W;nL8KbMwZGaM(vr(mLxhfGTQcDB<1cSN
zc9k~Ypbco3<LV0Ga;f^~F(~2_hck(N-CypmxEUKd<tBW1S>s*&zy&*<5cYZsY1+=c
zkd?m!l5|8Qf6W0ZQ2&s^=x{uuR8R=7TD3Rz=7ZfA^J;$*Ncq6x@ZY^9;V6Yg51*{f
z<)^3UcPUSQ6SFyEP)|>(%W6-3$(RD+$#_CQiR~k*S>ixo&ziK1Ibp_IZ8!#k<XKr~
zg>|ZmVf|CM=^#gbY`({iCWS2Ias=)fH|iB9ruS0BUX<(wY&=PSxUJ&L5juf8nuL@T
z64t=Bh$1IN4-Wz1P<XpA$_?R86W@Cc9XAX%jlhA`jum?Wip9GZ@5~6nPzZX!k!49@
z;(jdJC1dL$!bHEMNBkCVG=nkgT8=1Pw;5qK<2`6jUUnG&oSSnYqu?@dtS%)M&SYPg
z3su(>iLA%|gggzsW|B>066c%ux37Qh9G{?bf@+LgU}+%;2Q`I29@b>AR;`z=qMsG~
zaYm9YBYcSSPsz^gjE>IwZO?^E`hoK19zCRdztS3~uV13=C=htBJl?h|q-_--?=tll
z8aYNjQdQV>OIT4U>gx~ti)P-BD`c6;6XdHam{8GZUugEaVcy)#$ozWUmh{^v@qA3>
z>x|lvzk7cI^K`i(^qVjb7G6QJA1qJ@8Z^hvzYeatuS5yt-ezWLa$SPEyK2!l`qSZl
zd^?)=Xu$vgE~=*#->)REWlk<uC_D>4Xi5H1&`l|U*zYE}Td&x(^Gh_BJ0Bq-@TBYF
zTs3!p5uT?65^F@_NkX0vp(6byzNhSW*BetfF<l0<avH|<r=%nbG(^Y72ak78VTbYh
z(mLfcqwTx-nl08%_3Lvo6<xc>*FyW0hx_5Q2riJ(2I!$T=GFbFr{euSHC^dUy-imm
zU;*HuA%lX^{^||+qMKvP>751dWH~*pndsPX*XGTcKV)yJP1F5Hrl`CsfiLWK%!i}S
z%W(hlu`!0&n0#d%;vdVNEY%e-%`efTQ-rQ|kZo7#b`u1CoUvwLT{zI$AIjiHNTqmS
z*3zCUw{Isyyn~5}xiwSN>mcJv`P03_W^?m{Zk-wpHND*47e>=epb8^ETxl|h8gozG
z9T;u0{B-&&PZG`m#Rjr$?faO0s&{K6`${L+qy2KJxeP8OI!KA*^22BG>nF%w&mWi|
zieD%@lB<}L)KtKdgkcyi#Cm*&TR}}Ea7pmQQ!=M{hWuJ8Se37JtBO^`R=B%t^Z02%
zx~>rPrXAco47wn<!wnBNC*o^&VW-PZYR1=pPb1ij)lj)qgG#OK8+8F}>@qUE)fL#f
z{gGlYNLMJG>49gY*S=}2p1FVaOIo2lDa2;J9_Po8AId!aX3$_vjYgM`ON~{p6`?=r
zBxYqy*Z8*)z<VPYZO30t$ryM02?_EoF)=qU&rIiH=ld_Ww*($#Wy2clRhWLhf;WT_
z=rM0*+*+w4(Dl>Scmug-Oy8MJe%IgMZ3td^T#<SLuc#x4;O~dkZUE%=uqa~C8}eSF
z|3#C?fIo{(we1v;N9|k-Tkn%^(@sV*;Cy{FAUjxecb&sI>tnbF#O6p_`O3DBe<fV_
za8gH4a<2`Sk$2c-%m&t$LK2u7(U6dWo*><W<n;tm`a#q7X5{u*`hcR-=;X9ANScPs
zwx_Puk(I)y`t@@~PUBLtUw3)G3!7Ov6*)16y1EX%qrkByTp$YDuiYMtOfC8sl{QdR
zd`9W2BFz160w+%G*`Q2^C+g!v@%L2X_b?VU(V6jWwATLgb;9ZE*wRIO7P?atKDe$R
z>oH{bq2CUJDcs7o8?lp<YT(2!F~_UnG|~~2FK9otr7;JlI4sD3{8idhSDeU0-uF{q
zSxwDFui{EaRaZ9(MDZ5?lqg7HPm!+!nu;36g{F?1O}u<tMf4IgvbD5t+b<I>>O2M3
zfTbe|x^34y4<L3&6bvU7D^q<Fn{$H`@ydX9pQd4_E9f?Z$fen2a#+udIipU;_3v5C
zL}qu2Zv$o7{W%*XYsZNQwLOMVUm$u(q?MApjI0Npl~U%s3dLcy8cpAVJ%PCqjnfS8
zIVcWT@g()=LPguKV$7W~rBv}GbCf)jKaP=|y!XBMMKXW9GQ9eL!fQU?de_{MK`LS=
zf4deK)_Tn7euC9hW-Ah~(Z7VGJC#+)VEb@<dmN+Rf_ZRoz=na@o<-^IPK}4Ro?TA$
z`K}3Dy!Xc(*vzIBC|moxO8+onyv#!!(=O*rjMeKes=4Ghcyqp&FNyH|LA=?tKOqv1
z%de7br|f;ts>-)soMJ<Vl@o!M4aK%R9{0424{)jHdsAz`irvk3>;vQ2a3&MWfB`0v
zWdFs5vBL^Gjl%g;&z)7ICqeN4Pr<LlhJjE>Bk}D!PKi&o{R&Mrx1Ef5tV8XO=ehOv
zzOmi9tjXv2m%m?r2@;Z7tTuzVM5XxNJ`l{8ThVVO$n^|TKD-D=JFW2UVIh0%g^`Gd
zpYuj3+Y-%5KG1L**xuPUf-=q4ZJq@L74Ul;;lnwW3^cKH7`lP<{n%2B&L=$IGnX2V
zAP?t<M{6}xu4eO;Pu^-=Q}4A9Qoi*{*6%XZ=R^6TmbspQr7!$F)A}{Jv}a<tA{`|Q
z)GzPn7ZYN*ME^4H^N6cI3cT3`SrGSZhz<(^i$qB29<cD3P`JlAO5@vTQd1YuP`7|b
z33GOK_GW$98?DujQx>!OB3ILTd)Npp;X^P<c_RC(zno0Y&W3|?G+yhD@}``H&!P#b
zB&X%TR*L(08wyvFw(ZK#FW*&JJX^h1-C9zD$*5l1DS;*Q%~4r>Z+BNwPjAt+!9e@(
zw9bMn8at;o?FRR&y>vDW=tU|-S-8q<=Lfoy#+R78CaM525iemZ9ZP=_htK^gtlE{>
zQTa4)lFVAcJyxz^S@gKVbTr$+TxD!1<-2SxcLrXt+iG_w*CmR0NR-#$R>iX}1xH`J
zCki$*AI12BYPkIxPt~6*eE5UlDQ+5{1KNojd>;RqIJC@mE==IC37vQG?AIwgw`}RN
z6x(YOH4a5iqxhfC^5=f6*oinFWQmM`+C-L>va7Fu3A1jlJn@sd>TjTlSB-_Fe|?L#
z?|ut`oFU&jVHV~YL46<Q-I)j5*3#C5DiBhH2~)LiiJw&GC<G7Y$)|q$TUMv4s!B&s
zzr(6nHCb8wmn=1*<0oApxP=jJu<T@m!1JcR9ByCkcX&8jb?s)ip*0A{bj-{Lqy;lb
z0&^7&c8^*WMo1Cpj#<Ml#1a^_>>6<sgpG-1I?+vv*zN2Xwh))>mCu&cv23h-NQEMH
z5A+WAp0@ILr;)hOWJkO($&)sT)RVAbFC6U*ABqP*EbG=AsWf5lr@^JvtazeyMCI1w
zSKb$W3Ju6oO43~C^b4avM(eLil@MlMr9m0FaCwi<EuQd6^4v+Uwp|ZsO<u26Bs+4=
zJ2loj9QAKj7dkum#LD=Ll*lc#Y~=N?xL5}wTlrkh3+s5^W4g2()R6`onh(7BnxtfZ
zL)q#?>e6&VfT;BO#oI40a7Z6c;ka5HaUoQU;#{McEJeb<d+pQ4>a9_To_vSr8A{87
zS;%kn#q+Z>7v|+5zXV!(Rc7)_)i=5jHLZji>Yke<$M%7rbO?vfw<3C0I|$9oyCKt3
z=+k^sIy@g|KaY-%7VFV7ID6^AGTho}w@DUlb8LTik#9x)ap*Vsw#%I0M(_QYC5%Zn
zjW|`pM%y~{2!f#l-ARwg08;v|`vIn!(^ahw)9IJLLU1n^uY$Il@QZ?;rz{F+AFTg0
z^D+jeoTKr)%cYO^-c4ouB;a;XLC_C{ld-=tNs#3Z8?^_Rrnr>nQF>mIx4{w-0keKk
zUs;(5Yn+*x1H?2FU;o#NZ|BzCLyP3gR*&2dVe8?jwu!-ivGg6<Us0A=cROlhtt}j}
zIOg5{S^$wFDCak}<VjP&n7Xb5u}jMdf!vs+wzj8qI<-=zPZAV~t7qMWj{>|pI-OH`
z%K`ZlmBpVRuK7#8mkX}B-@m_LjKIq0uv9D4TUfH9v1aaP82r=W@{HfW%weUyHH3-9
z%i%_ENdN3rjQWo^{a=gA%6fJuij?&9d={&xdMUosGcxWSUFl#o{`NE#S&DPGUHtU_
zq^c42D57HG1-Mu1m!hip1u1B%s#I@{3FjexZcNDi9p#+XX>h#_Zn_5%n|G;??Kg>|
z96~wy%ySEii~cASvyr5BH)I*bJ_LJ*H?7mxo3^i>iQ{#dm|=IXS83$w(#-^<YmAM!
z!N=TR(9b(;>1tGa;oCOtIMzS54?d}LHM!F{u@PCkT<X;2_4?_3F5CJ}N56(eucvI`
zH)?<){Br5|!JM78kb#=KxjCY*n<4Ad$*yv*E^Zj$bK5+`aadURv_A3{_5y6~^Gh1D
z3m%V{Rlk~3{TlS98fD2?R_6+&`oLtdR`mqa5|nZhOW)=aa6&2E)|E6C${==vcqx#~
z<&6vnDMuB3TWUIn;q>soDJsI4;l1)(<jH<WvD9pJ0kd|k0r0RaC@4r8SJqH25-ibc
zZm_$BbK#NSL8^R7%b=kQl}&t0T)ZMG(l{5>71Z-ZFkZlB#B{ao;a)>Y3yse|sshz^
z`qko-p|q&1B7qbi4vGu7?USRPR&9rvySQuh)zpwW(s&ftBrSEV_SI%PN<@gwk|9b?
zna|Wa9nZmdjam~NGkk>}@Wj&R{D;2B>f485%lTM>k<Vpfj351>L*J4fh=PN!6D{d}
z6|S9(R7XD+Y9%iB+V2-De8k(lwO=dzu_d%(q06}_JRVv9)9|n<-o6n7)Xs<qK0uVW
zeu2dz1z9tJ1srSPrA}0d?_w|t?|Eq+OVQ-Q2~m|5?Tt|tn5#Z7@haHq=-rBc5>QTI
zNv!oV3zqP~wTtfS>vJIfg85QZ8I(wcg@rB3@;AtqlM1ma7hxEZ<0f;$Mxu<bZWg|!
z<{4>DB8y=c!l_U*=UR^C$-^3HwGyo$5L!Vkrr=g_H9M=NqN<wR(9qBd@vs_EbI9gN
zi&PG^T)r?Y4{lG%)@gA_<kaA#C3jhL!YcGVrAuJZMbrGcU!2135eM4n5j-3_N~?X(
zQIgm~=My!^QgOY^!(upA)@qx}iT1&H;`+hD`H=hYLhZf0&D?`ox3_TFtLGNVAQ9ma
zD!AH)Ko6#?E)BX5RIn}RFeIR9LAH2IwnDm<f)B=2da5-n?W}v#8EmqU`Gkh%vr&Gx
zgR*l!s;nYL_)vz91Euv1e53AS{1InNmPEL+o}q%1+pn73!NT9;hr-R@_awIdZ5>(s
zg9=gn4^&7Xn(aL{nk3nFtl-~wGVHIML=R9z61dtr`+0NY0c!zBd2Ktx@fkt3jt1qY
z1MwNW3pNO}-Pf-E5P9uSR8@xInCoii3+jYpiutqh9E{zLl=3yJ9Uu_zyRJPe|B0ZO
zP~2WEeBam#Tt96V8t?%$*v?!xze<+h!`P;$tQ@$zyNixw@nNcQFXSo&G2eGEL+Cc=
zl9hlk;|#&OwtqUB9NwUuxp8i!viRquWjfZ~kH4~iCULs*85oorD)CYh(g-+^XSiVn
zjDoE@;-EZv!NNh>0FPFJD!*Fhk5!6a#f{{3ycFeqQ0K9z+>oKHqAK(G3)9{^FE<G#
zJeET~4#(_cE<XkW%$5T0I$Gy;m-9x}o@X%+7z&RoNKL=2S<(KAgIc1H0sL`rTN7nN
zdJcl8_JV<od(T0D;kKeGce3j9JiWT5OKS^3e^=edKWB}T8VRah2}H*x(;Q-Ut-Y1S
z;YxRhFUzZJ7`_Zt@9@4Ww~FVa0-2xK8IoUHJ4q}zH8GK0TYDkip8QQ0!UHMDuyT(3
zs;sn_)~0VbWP%m>5~Y?^ez^>Lpvx!O=P()K`YPZ4{kfwY@eSg~8-N+|ypxqx($Y$C
zD5m%cx{LA2$(-6+0#L;TB8|Wagz7~Zn!Tpw6go=rTbMP|RP$43i!1YH>c_3$s=8^a
zPeIBdE+#(jD$RRSzh;*JU^^OWuT$-%V8iV%`0gr2qCf6-E3BCG8;IidThVVXC|kAz
z7s5T)IUuHk=aVuW<_=O*JCLDtu0MD02^+@p45L2Jm2_-y<1pZ@q!NdK_Q8w=PcQgh
zy^~}zoA@#Cz|QAfS;uuS9vcx3;4X*07Mywb1Z`{1&=F^%;HQcGesc!_bEa(@Fcd~b
zD)_})Y($5Vto0Pu#W3^Cx|o_2bWEZ2Nv7J)?_@7Z$s=Af_xdTj|6O91xc{YE<k9og
zUv#<264(3j-K+w0U^b#f;a9kn2KxCP28bY+m|Z4*Ac+_@1Kgc`z+%f{W${<C1J7sf
z*;gmk8|m()JD8=3#-YB<m$&!CJsT}zX)21eKT1mv#fdrZXTD;LM$(05r2Td7N8BGO
z=_L{4a?2#0GA2d2u@Twjp2!p9H!teQTA3%_dmZwID@CVL8}WsuiXHCLx?JXYI3>JM
z1nxE|x!6)I!m_t54<3+$dKngZAD{sI^ZNl8LCCOHL(UBSYJPOO?*V>;q+cO|8N7TL
z-+O#Kbn*sL0Y6DbKP{?1eK(xX%)G7Ec3n$4Yt%1GAseUe4qN1P#;3*;q0&Z4?A|2v
zES{!p5>r{0R(3q{pWfcA!+EvwdOFX&f2D{3iDkJXPhAdp$L4@E(Y-E^-1NH?UUA};
zy>c+R$^02+i_J`>X?1HK%dJ}=iCjBIN;@*)v$K{O%}QUbUok9wK@XAy5ycx*!fSlG
zC;}SF$#)1QyL>{+xSzo;C2`LV4&uOVp#nu_&n}TJub6jnxRBLv@6u?`fCHIlP81nN
z$=sxnc{s=vGi!%)qC5hG%x+h-OZ@@gaTCbmI{&WZD#XwBx&FOI$C43!aq)isA!f;b
zC{eENJI=-2Cx}9xtq2f?bC{`=y*<cG^=(FqAQ+G`G!({=2+6ZlG!aPcW}y{y17I|)
zE=}K<;wE%CchdS{6Gp#)B5R1!C~JUX_m1s`f8=AYzs9b34zeh7Ag83{^Vjbae~@R&
zvhgTSxx9=SuKDS!%nywXwP<3mLT)Rj!E(8C76>=ThjPhle390%sLwt@eBIcXUC6Mz
zdzmky(tjxZyqdb^q&H^iv#Bg0)>Y^5!yxlj@32qU-xzP~a3+b^4U=OfvQNx;>U9TT
z9}R-NBmOegz)H5%NPTX2nTnh4jYv20jbO>sWZ#LOYhA)Pf08Kktw$8{OgW}Ad~{g$
zwtB1$&GUY=65!y-m;WRvc6jJhB;w$pc$0+M`3x1v%V;Dt2VXuByxWe`_+>=zqznCH
z+%rbG8Wt1GteM}@YjAZ#UME5H;@eeSUBX%z^Ki~xE@kyC=VwFw!Irh5#n+!$STlAp
zPIID=$;ig3IkBNF;*wmyqQ`2y4_6&hLg{2fl`TfJQ)VaeP<`rBPU<bS%H^KrCKJ@I
zwE9F}JDNk+T%a2q0E@dJ3Qp$c+7~7j)hbCDUb-O08%|-jSnk?7%_*nCG9Rv?iha{o
zEjP{nyB+wfwR)pP9i=(hy4I`}*`U(EzH&}pzRw=kN|}9=3BOU!1fH#bk*pT;@!S!Q
zVEsAKleRq=NL#)ZRX3XX0USM4CCo%EYHF`NUE+7)+h02pN6<`tAMy$fGP91)3soqt
zM_5OWwcH3Xjtyuyu&+N?sqe<J-YJt-h0hyb<X}d<Ixdnaa_D_21$+`<b3V8E0+EOJ
zH)FGW+do#`m`$kU+4>p|mOMf9f~E|qe97c3RvkKAu+NNBea<M(H2TropIJ;2_Qmy(
zXHwpM)SCwH-1HNHQm4j1gO%WE-CFOGP(-c0fSFt-a^^5P_%545sOxY0lY73SP-d#0
zYgj1x>6Zy`)|?Z0{AF0C$dhkEOG2VxuyV)GW=fo6VLo+sNdgO(kJn{sb2%pWZQ-9(
zDgHgJFt+#r?q*Qf%LQ8Pgc^*o@)~Yc;)4o~7(gpg2ra)Cy-~_P-lp=omSR5m5nj7f
zyUl|o8{vPTOn?(bBcR0OJ!)V!l3=QS3L?+%J6d@3cy;HqRG#=%zrh5D;(qXm4H)xg
z6{X$px9d~SFB($fnNX7)RJ8OYQ2p~Wq!207XvFF15b=p<JPS*rL+vu)nJ~KCsZ%=b
zVLsj<6jM>bTOanzEA*pUR#E1J_hH3=ww=kMW$%&9+Y-V|eT7*;t3S^|sG{1{avw6f
zl#`$v6>R8jWxN?FYrmH-@_JuXK7475+$p{alv~7_++VFqV!D!4CH_5jrM!1$@|IS)
z^&@d7Z6|q8uXMtNx)9-2=%bTc5IZ%5X0>vsuP<!nG9Bs729raQIa&M*8AiA;gG<{t
zKDDm_RVf$nE7X)|#0fu~#(3_6cXTNa_9M_Ft2%1$UZls@#qcR2Of!mN*7nUbi!=@Q
z7ApNuG4<aW@F}xsC=-oxQt&y~l1fn{<UpR}q`Q3~A-P>(xU*fp>RoU<b$YhwJ0`Qt
zf$?9eGi_tTsK+30C>2w}qYIC$yQF;Yi%vk8w^H!#qt5AJzXN!5o(6SUmKmWD26Na&
z-RsG8vF)L`dY$SQ3j4A!l8^t<D632=WvQIC`@RPHqyNiGxUVn7ozUvd%s2)Z{u(6I
zkq@gpl4L7FkgloMw}P-SV5m8mjiwny(S=XnUbLT4mYMSAW@95}V_4S2x3paJ6W$B8
zdS70qf0Tl1_E#28<NkBblFzIQ5v_+9S?1?Qqm<1XS4LlgC6Tvy`v!v$)~uhnn;S2W
zuX1C+5KLe!(@8h55C%Wx(1~Ls?)PI%j+dwY!7fqw_rVS()FW?@pycET!WX->>24R-
zi-*J!%gQ$J|J~}u)Ake-Ol%p$5z!YZKpd&#iH9m(<r7T;YM>H;<2Pc5sRnUrlis#z
z%gneJGI<-!)c?g4^=ndJUqzyn&v{Myj)+>nt+d2;KK>hYhDFWmA4;ueeDhDGDBSYa
zz$aK%XV+NXUs+62i}-I)=uAZe@>f5jR>iSWv>Pf;mx&Pt8MveZ3g0N23RKamnp}ya
z1tQme^nNNom~W6`W>t_*VvhjH7yDEGBmi4fOKlL?e4Zu&G3|zruLxL@=8lYgJyuxV
z4$c>|jJ~7C|4(%5HDtgeVv+wCn&W5uY7XbPU__NTg+WaDMWb0%QePzT9TIE$K^IgN
zYbCSwniV`#0Nrb`4D~uwlZo-FdNxe4mwq!tQNA|;RUTs&Y5}k9mmxkCJ7UF6ckl4l
zx5Q#itE;PXe*Hp?x!c4VI^`m?@_3#6KktdAT#$jhrWLfL|F{8fyAVSnE~^MvMe1`J
z150E!R-|@nboMJ;XR8T~97Hk5#i78LhiWYs_J%L(T+Mys2fkOMONKWurOJ!5k)EQA
zf`+>X>!91l7sFG}7p~+0kj-@E8h3Pf8ya$l9G~lIK>sH^2<DA*72NqH;1pC;SxJWe
zq*sIzkqtkwoW47ThV~e-$)o!r^;edgvj%c=F{wiCAXy9?!nZpy?OgJ&s?5G;xUIT|
zI&+C(BQhoJ^r$8hv{f=ap|RgLX*?AK5Zl1RKUan(Md$ytccqU|c5Qqx)}f?H44&*e
zSw>m2jgc7pGD5~ac3Fm!oiX;1rLkn+$r6P;p+{w3vsOyhjO>((_hz2=<NF`H^XZ;1
z_c{0PT<f{6^E<wZ==6cxTjgE`kX9KgIbXL+6<=JWu~pHs<;i@kDSW9n<)hX+e!XCU
zGPQ<f^3_&sy%%%VyzbAhcQMn+zGRySc;|mc|9H=aSUrQdxK~BHXI@&!TbA_KUI2kC
z)y2n>o;C@bsU1Nu%X+v*1my{(VPb1z)NFEp??~f@yf$AV`ymnw$49$%KNCl@CP2nV
z_!Af`O1&;$zF+TyyRon>&$4a0H4MYyQJP=rmABUi34!^|7K-U<x?>PZUZaZvz#U~p
z(O0CZ@nrqpc`VaKj4U{pC8%kF5FT5lk^^CS6iFDA^11$JTeB@(Yv-q|GT-+}BB?5n
zGV8h~{2%;1rS%|$o&_K_$S#v30YtqoxKxj~|6sjuFqmD~z$9f8muX4LKCo0YC5XG1
zfIMwmpdxF*V){BYzTcQ*Ttl+rmgr5{-!M6<xE_0g5Q`M*V(@`<HJ(O@uA8O=71s8p
zZ*8mH4E_vxA*GyB4(H5-NAKO={F~~<iOczd-90;{VDVP|4sn3uqG%M1BdLB>woTi9
z9hBIGAgg$<@Kn^P!nxj)e3yp3It*-DOn}ss^VN9mE=MojGK8}Mkvix@YFmeg7hV4{
z9t7%o$&2R8KHA%j{i};(&K3!Oz|bZ-C->NtKK?_F+3o^|P#!=sqpe#qcO+;PMaIv!
zZw7;!U+C6S4;eVcj!2L0eII-@QC(?wSac(#k+M^>axO-l>bkY`$9oTkj{ze1X08`Y
z#aB4@n!CT?Jnnr<neJFe<tp6+N3!=?r`>7ED#=1nd>LXp;8a=p?eUD(*1?_m3_R4G
z)?7;y;;b+OJKJ(i8c&>Ngi}{<^z`9D;a;r!Ooy<5!hUaWg$N_;2PtV6u$Q3EhMF9a
z#F#1Dnm2V^PaTfCW43-qp<iGlOe{n=y|nvh#^vwtvB5aLjovLgJ)@DKg(c=0rT;x<
z>85j(h0FH|J+Jk82Ag8m!^(38D)D6*q8@1?z^@01<W^&OBWXWAlirzH9Pn-!varA5
zU8efaLN^$6YUw*^Kw`#+`%D1l2JnaigD9&kJFEmD??_BvE<0!qptsme)J)yzOb})`
zJ&(_?rt3UX5x=MxV(cXCP}+oFKF8_i;rB_1wX1wul<XeTtt4sP`;0lLL%D2H*_81N
zqbzArtD~>fgI{o^c!$zmJ^49|y)l)4Y$(w_Igw$V+0LmZFct|9??4<1nP)hH-DVYw
z32#t0d({%NNjXGOyQRnnh$4ED!ek^`nZhiy;3h9rOOM74QPDWjew6O$GEri#7`#*~
zxDo<C3-#qZ$T$@BdfsxJ6n}ELJ5KgvwyI`?0&+&~ZH^-Ij@vs3!AAhL^(OTqpgE21
zPEQ@{Oa0kY)9#1r?3`Tsqq5evr1j!^qwC7YSBGdwS&TKV07<f`)Vm3pwQdl;LYw@Y
z=L37hROxiJiU`g;Nu1s=O&un(v2w_%5(|K`62fa!fmvrEOGKywd(D~^z`BF|tY-z@
zj&^n7LKk0^dGO8Sdb|ccuZx?q+^T*2N}b(QpPLgXpR@Y1)wS!<>yl!T^R@Yh(-B`i
zw^q-HVD{@iP1qh<!~0W~Griz>>-hOv#Tj^7_CVQ%U<g*bi2h}L@%8Z4orTJwdb9pA
z0kSmw_LXZYgEt#luTp>rln-yB>-iy>!kvxC8<)NXnb0!Y;Ao~BUc`;ps;n7&DUjDb
z8os$i>+f#VV2i$Xb;A204G3_Bqm%4f@2b^P5$&q<8jS@$CwWa$*n(!hcpnR>7SCmV
zV9&m^On;w-$M#c5pJkPmIrF;P+xZ5|xHAQ8)EuI9XV6pxb(Z!Ma#;2H8!fzVo%4zn
zV?h-(v|iNWd2I>%>930{!1u&7*;=~sEsl$$Qi01l!P4x=3t(XN2}PK`H17DK@Nn&z
z`Pw32Y$?aiP4lbMt-$2&{>YQkxHCBP&<OF){<iZVb?AM&hAZvoo@g3LbwCbj+>);c
zU{;Q0hjs<11AB@{>10Wr;kAg88^W{;W90*hWz^%*9UrbC&6}5#qFk7FTyfG<i&aWV
z<w`(;@!Bh6A4nChw1>7BW>3Z%AG~ialB!6+pPJ)x^8v$`0Y5m~ithrk!m~x=xhfBr
zNkWlSNoZ?O=9SoadWTPpSNM(8?x%!GdsceAj=E^GF&Q{h^6w-x2ll&gM{0A8;^=zh
z@1mz0u8~$*dEIsK{Ay|7T-UGA3hPnZj^bl~oL20gS<gJ9-%e9H|F8(Yl%#GGRnqkm
z^Yu0#H0&l2MR`)|NnvG9wA>P(GoI@gww68*H4dcpa;_%Jp;K0Fe+@gfo-0m}w3MdJ
zTpfqFINfl<_@RwaDV_H*Jx>BNHN~I=<K*JQu?b|=CnH;6uw2iSqF-n+%D*n{ItKyt
zC=|bjfY3Tx$ze6F=S#p$OhkYgzEEf6y{S1$Mu}GQYG?-*_-5GaY}IjEU8ql6DaTXN
zcblp?+wfnvb1Y4>&%{A^ZgBx#p-wyZ`Yw<m?UyTl_AAfS)jv+(fbjv6=;Pv@8RKLr
zM>2064Kv<PK2jWG!&ecQ!4NJLX_9gfO_jJh3y80^(InUfN)I6j>8)0Qji4m8BykOs
z4eq4;TuK2YOS#uprFtd;gh-W+r%Ehu)5Gfg{_^`3-BdfcoHdjCFZe>`QF3ch8?v4%
zK`VNfS%-Ft)iP*8QdP8v3p>PUbXfA7(gMsP>vTq-qwJU`3ONmqhmyF;8jH+XVHlRd
z)PNH&svzeK*NUd5*v^hicTu{q=f*5>p%KJJ1nWS#<9Hn{!^@^AV5p73p)^FWTzP7Z
zzMu{-OIb{<cm4n!40hu|E=uQYGLQ0nrO|$$FiL<<6pPVlvEKb0#E~3t_rf2zhPZ3f
z90dBPLXWN=nc>n7gr7rQDs-2D>Vy+HG3!LIO~ZNuzz}EzPQD&k_9{@K-E`6_V^&yF
zQB`(bEri1#6)o5gRMgTi!!=EqFaCjyfM=(p<c_CAr~{2ty-ux{dhhJ(WN5XkHS-HT
ziRHO0^u2$C|E0}Td`N)AP{=Xmhd_X>1Co<QUOuHIoHI%W?MHG{f}<jN<A5*b4F}4M
z7vhQwE4hABP#vTVgbUMcu`%!)h4UsDUK)LTTq4pDMKTRZ9j~!Smj$c)93_|C+-iTH
z_?B-JOtfl$xH45(iQ_hag3<Nmym=S-d^qCAQ}{*LYjhyf@9a0L494inZtTAw`>U(E
z-9X`a$Gr+~&ldSVWcwnVuw+{AT(HnRSo_h(odIeYE*!7sixTdqgjz<|-3$j2dK2kH
z6bf3R<PQ8Z_C&<iqDW<#iLQptBxB3kXWsKVhf{ybjhLk{6=__*by>4pEbJRU2Uc&j
zJ^IvX+xV2w8<sZGE<wj;F8$>gutpOv@3M@(a1DEG#y>lJ5K2RiJ^S=qGcCU|nwkLp
z9_RBSh7IjjSr#2HpdsN-vpgIrg`H0Cm@zJzfGKlPyNUBbrh?z~+IQRgelu}<2q-1m
zT3-9ta1C$Z8>&+yFkd^}6CoGe>#2mMQe(#4W(9*JQ8dw?#DP1Gg?Rpif!Lzd2A5U2
z)cJv<q7W!UwZZl5S`Q>StRiJLbrHKCCIfx~VK2DzpsfSZXisY`-cN}+nIGlS?Z?;C
zP8s_WP26C*q69vM$5Eskr`?mex_^XfKaFzG4tI_?u2$u?@MZ(I)8}Cloh*Gg7)-Um
zAR+m$9(+xkiIW$t)<<$DPn;!LjyvpB*C)u~&r+l{Wb#qzHH|w=fE5tlTF2R8%pwJK
znM)V3GmWzD{$`O%8B7!uk=cC&=G;ngDwL%w_S?-@mi^x!?dJldXFODc5+CI!*zj*3
z%9{HaFFG>8lxp96zf8%zRVmN$UEOZPCsG#lMqDj@XTY5ai~YV49McrN!f}u@@3?#B
zZ!aw2a|de_0_r&YB)2t8#xrl}*ZrM7Bts$LDdO$YcFVUWq_B;55cpWxu>?qUJRyq7
zlhC6Vls+TVMkHy*(+QYi6dhVUDb!Q&noOSAtiCpn=A~IspJVcb>Q`13R{mbwjp5tm
zlu_+u<v)YFk4xd=sfQ0bfT!5lBAPAk8kg?5is3%OKvMqnX%iet?D|M>0%YY;;{<-H
zR4o(1RyDENxKZg*ZK|r1bw|@#ywq)?#31ArjoWh5+L@9Oe03$ACLfhu+xZRiH^VjZ
z7*mn<7CI~KaByc0&MxIkKp&%m{t5m4X2*i%?*5j;DNi6X1wyA4iC-!(0K*`AcN=7w
zLjgI35DtuMm7hL$FMzFAQSKWv&2;MORufOM%Vags9>e)fb$s`u&zg*N8>MD?9wSHj
z6?!UzBr!&q{KUvL=T{TSf65zv{^Vo)cac#%t<V|~T~8=@e{oNUzERe*^n~Id(mrgy
zv;QI7kHNIgGIcgPbrU~l_<4-9W}shyZ1X8_eikBXW90%h@Tz^Cpk&~vS1a0~{vaj-
zMi$u*nd##NlD&Qk5ipU~#e*1{X}(HX-<OrLf1D-SjSpDBh2qT{z!|fk_v%@mub5w1
zX=$3!@{S^q+usUj8&2SN?p2Ev+lp3&XKVu3YaNZpJ+1Hg7fCC9u=^2EQ_A0r%(ech
zqgVIcp9kW6)Br4|cn$|_C-|VZrqa%$&kaaKy2B|@sWdh%AyPoLQSWf#D*l86=G*BE
zP{alOtSw6vF#t*V<wI?^L)X6=obYeMbK=QH@o?#ubCT?IzR~Buj6GBRiK1OA=A*zC
zNq2$t>h7FkF}vmYOsDsaVoS&q5eD{$0(K)#bFmA3J=9S&9=xOAb8;>F93@6j3X4zT
z9=1UOH_*+JqkYa~%R+?t0{co(8PY88YfkL2=#G4h9W7)U{-pn1)l8q)qkSOWZc0$r
zJh#KtlaT;9mC_#{M4d0)%l&m;?%5YZ2ew$)>`@;6`S05biYpAeyH>}1+jKdxXEVpT
z^=4pznM*Rj_=X5*L(&!FP9P5Ri{+-H05zFBcR}qY{tO0y>Xw~bZ`kM*`R9JPY^@5_
zqz<v&G+yHGp`*K9;_fO<u4J^!%5n#4*iq&PQOx*7l%u0Y3DD4zyRgTVrOUZv`r`Ok
zOb4YYKT*w3_xh}&19qKIRQR)7OtaO7WjT{!RBP(Csz2?P3eA7t)&lUK?ny{0k450x
zsCz;uo_M^q#XaeC^4t9qSk0ay+ay>Hn47jXJyR7wAL~yOK5@dL7o;|Ct-l+H`Ek9o
z3aGSrJNc1~6A)3F<HA7d!n9lbAZ$EbunRWc$oxIIGwo}*hz49hI1OxefhWNpSr+YN
zN!e~>e)z-<^xT?sm}*u~fbLBENy(t=Q(u7oK_C7NDDf(`P?8OfQzDPwXIVg3c^m#I
zFww^k>m4WTA94c<Z+OBkk)j>hFVPJ}L7`MtZxLU_3wt*%fBQ`x#EN4zXOhK|QCrpU
z<w2WVi`ca3mwfb5ykE2uC=5~8+d;3IWc{Y?^g!(=R0TLQ9ZHc$WU`RdixSPdVT*qG
zAfx`C^8BGc)$yOCm4PQf<@2o<Il4h090b;*<o^4o6|pEu-lpW`B`UMkkdq|f8S($;
fQ#JeFI2@mV0(2u%UbVApgMgoomOi3R0~7flBK9fv

diff --git a/ext/pybind11/docs/release.rst b/ext/pybind11/docs/release.rst
index b5de60f4e5..e761cdf7a6 100644
--- a/ext/pybind11/docs/release.rst
+++ b/ext/pybind11/docs/release.rst
@@ -22,6 +22,9 @@ the version just below.
 To release a new version of pybind11:
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
+If you don't have nox, you should either use ``pipx run nox`` instead, or use
+``pipx install nox`` or ``brew install nox`` (Unix).
+
 - Update the version number
     - Update ``PYBIND11_VERSION_MAJOR`` etc. in
       ``include/pybind11/detail/common.h``. PATCH should be a simple integer.
@@ -51,14 +54,12 @@ To release a new version of pybind11:
   notifications to users watching releases, and also uploads PyPI packages).
   (Note: if you do not use an existing tag, this creates a new lightweight tag
   for you, so you could skip the above step.)
-
     - GUI method: Under `releases <https://github.com/pybind/pybind11/releases>`_
       click "Draft a new release" on the far right, fill in the tag name
       (if you didn't tag above, it will be made here), fill in a release name
       like "Version X.Y.Z", and copy-and-paste the markdown-formatted (!) changelog
       into the description (usually ``cat docs/changelog.rst | pandoc -f rst -t gfm``).
       Check "pre-release" if this is a beta/RC.
-
     - CLI method: with ``gh`` installed, run ``gh release create vX.Y.Z -t "Version X.Y.Z"``
       If this is a pre-release, add ``-p``.
 
@@ -90,9 +91,7 @@ If you need to manually upload releases, you can download the releases from the
 
 .. code-block:: bash
 
-    python3 -m pip install build
-    python3 -m build
-    PYBIND11_SDIST_GLOBAL=1 python3 -m build
+    nox -s build
     twine upload dist/*
 
 This makes SDists and wheels, and the final line uploads them.
diff --git a/ext/pybind11/docs/requirements.txt b/ext/pybind11/docs/requirements.txt
index 8f293b5d34..d2a9ae1645 100644
--- a/ext/pybind11/docs/requirements.txt
+++ b/ext/pybind11/docs/requirements.txt
@@ -1,8 +1,6 @@
-breathe==4.26.1
-# docutils 0.17 breaks HTML tags & RTD theme
-# https://github.com/sphinx-doc/sphinx/issues/9001
-docutils==0.16
-sphinx==3.3.1
-sphinx_rtd_theme==0.5.0
-sphinxcontrib-moderncmakedomain==3.17
-sphinxcontrib-svg2pdfconverter==1.1.0
+breathe==4.34.0
+furo==2022.6.21
+sphinx==5.0.2
+sphinx-copybutton==0.5.0
+sphinxcontrib-moderncmakedomain==3.21.4
+sphinxcontrib-svg2pdfconverter==1.2.0
diff --git a/ext/pybind11/docs/upgrade.rst b/ext/pybind11/docs/upgrade.rst
index 69609ca284..6a9db2d08f 100644
--- a/ext/pybind11/docs/upgrade.rst
+++ b/ext/pybind11/docs/upgrade.rst
@@ -17,6 +17,10 @@ v2.9
   converted to using ``py::module_::import("types").attr("SimpleNamespace")``
   instead.
 
+* The use of ``_`` in custom type casters can now be replaced with the more
+  readable ``const_name`` instead. The old ``_`` shortcut has been retained
+  unless it is being used as a macro (like for gettext).
+
 
 .. _upgrade-guide-2.7:
 
@@ -520,7 +524,7 @@ include a declaration of the form:
 
     PYBIND11_DECLARE_HOLDER_TYPE(T, std::shared_ptr<T>)
 
-Continuing to do so won’t cause an error or even a deprecation warning,
+Continuing to do so won't cause an error or even a deprecation warning,
 but it's completely redundant.
 
 
diff --git a/ext/pybind11/include/pybind11/attr.h b/ext/pybind11/include/pybind11/attr.h
index 0dedbc08dd..b5e3b7b22c 100644
--- a/ext/pybind11/include/pybind11/attr.h
+++ b/ext/pybind11/include/pybind11/attr.h
@@ -10,6 +10,7 @@
 
 #pragma once
 
+#include "detail/common.h"
 #include "cast.h"
 
 #include <functional>
@@ -20,65 +21,72 @@ PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
 /// @{
 
 /// Annotation for methods
-struct is_method { handle class_;
+struct is_method {
+    handle class_;
     explicit is_method(const handle &c) : class_(c) {}
 };
 
 /// Annotation for operators
-struct is_operator { };
+struct is_operator {};
 
 /// Annotation for classes that cannot be subclassed
-struct is_final { };
+struct is_final {};
 
 /// Annotation for parent scope
-struct scope { handle value;
+struct scope {
+    handle value;
     explicit scope(const handle &s) : value(s) {}
 };
 
 /// Annotation for documentation
-struct doc { const char *value;
+struct doc {
+    const char *value;
     explicit doc(const char *value) : value(value) {}
 };
 
 /// Annotation for function names
-struct name { const char *value;
+struct name {
+    const char *value;
     explicit name(const char *value) : value(value) {}
 };
 
 /// Annotation indicating that a function is an overload associated with a given "sibling"
-struct sibling { handle value;
+struct sibling {
+    handle value;
     explicit sibling(const handle &value) : value(value.ptr()) {}
 };
 
 /// Annotation indicating that a class derives from another given type
-template <typename T> struct base {
+template <typename T>
+struct base {
 
-    PYBIND11_DEPRECATED("base<T>() was deprecated in favor of specifying 'T' as a template argument to class_")
-    base() { } // NOLINT(modernize-use-equals-default): breaks MSVC 2015 when adding an attribute
+    PYBIND11_DEPRECATED(
+        "base<T>() was deprecated in favor of specifying 'T' as a template argument to class_")
+    base() = default;
 };
 
 /// Keep patient alive while nurse lives
-template <size_t Nurse, size_t Patient> struct keep_alive { };
+template <size_t Nurse, size_t Patient>
+struct keep_alive {};
 
 /// Annotation indicating that a class is involved in a multiple inheritance relationship
-struct multiple_inheritance { };
+struct multiple_inheritance {};
 
 /// Annotation which enables dynamic attributes, i.e. adds `__dict__` to a class
-struct dynamic_attr { };
+struct dynamic_attr {};
 
 /// Annotation which enables the buffer protocol for a type
-struct buffer_protocol { };
+struct buffer_protocol {};
 
 /// Annotation which requests that a special metaclass is created for a type
 struct metaclass {
     handle value;
 
     PYBIND11_DEPRECATED("py::metaclass() is no longer required. It's turned on by default now.")
-    // NOLINTNEXTLINE(modernize-use-equals-default): breaks MSVC 2015 when adding an attribute
-    metaclass() {}
+    metaclass() = default;
 
     /// Override pybind11's default metaclass
-    explicit metaclass(handle value) : value(value) { }
+    explicit metaclass(handle value) : value(value) {}
 };
 
 /// Specifies a custom callback with signature `void (PyHeapTypeObject*)` that
@@ -99,15 +107,16 @@ struct custom_type_setup {
 };
 
 /// Annotation that marks a class as local to the module:
-struct module_local { const bool value;
+struct module_local {
+    const bool value;
     constexpr explicit module_local(bool v = true) : value(v) {}
 };
 
 /// Annotation to mark enums as an arithmetic type
-struct arithmetic { };
+struct arithmetic {};
 
 /// Mark a function for addition at the beginning of the existing overload chain instead of the end
-struct prepend { };
+struct prepend {};
 
 /** \rst
     A call policy which places one or more guard variables (``Ts...``) around the function call.
@@ -127,9 +136,13 @@ struct prepend { };
             return foo(args...); // forwarded arguments
         });
  \endrst */
-template <typename... Ts> struct call_guard;
+template <typename... Ts>
+struct call_guard;
 
-template <> struct call_guard<> { using type = detail::void_type; };
+template <>
+struct call_guard<> {
+    using type = detail::void_type;
+};
 
 template <typename T>
 struct call_guard<T> {
@@ -154,7 +167,8 @@ PYBIND11_NAMESPACE_BEGIN(detail)
 enum op_id : int;
 enum op_type : int;
 struct undefined_t;
-template <op_id id, op_type ot, typename L = undefined_t, typename R = undefined_t> struct op_;
+template <op_id id, op_type ot, typename L = undefined_t, typename R = undefined_t>
+struct op_;
 void keep_alive_impl(size_t Nurse, size_t Patient, function_call &call, handle ret);
 
 /// Internal data structure which holds metadata about a keyword argument
@@ -166,15 +180,16 @@ struct argument_record {
     bool none : 1;     ///< True if None is allowed when loading
 
     argument_record(const char *name, const char *descr, handle value, bool convert, bool none)
-        : name(name), descr(descr), value(value), convert(convert), none(none) { }
+        : name(name), descr(descr), value(value), convert(convert), none(none) {}
 };
 
-/// Internal data structure which holds metadata about a bound function (signature, overloads, etc.)
+/// Internal data structure which holds metadata about a bound function (signature, overloads,
+/// etc.)
 struct function_record {
     function_record()
         : is_constructor(false), is_new_style_constructor(false), is_stateless(false),
-          is_operator(false), is_method(false), has_args(false),
-          has_kwargs(false), has_kw_only_args(false), prepend(false) { }
+          is_operator(false), is_method(false), has_args(false), has_kwargs(false),
+          prepend(false) {}
 
     /// Function name
     char *name = nullptr; /* why no C++ strings? They generate heavier code.. */
@@ -189,13 +204,13 @@ struct function_record {
     std::vector<argument_record> args;
 
     /// Pointer to lambda function which converts arguments and performs the actual call
-    handle (*impl) (function_call &) = nullptr;
+    handle (*impl)(function_call &) = nullptr;
 
     /// Storage for the wrapped function pointer and captured data, if any
-    void *data[3] = { };
+    void *data[3] = {};
 
     /// Pointer to custom destructor for 'data' (if needed)
-    void (*free_data) (function_record *ptr) = nullptr;
+    void (*free_data)(function_record *ptr) = nullptr;
 
     /// Return value policy associated with this function
     return_value_policy policy = return_value_policy::automatic;
@@ -221,17 +236,15 @@ struct function_record {
     /// True if the function has a '**kwargs' argument
     bool has_kwargs : 1;
 
-    /// True once a 'py::kw_only' is encountered (any following args are keyword-only)
-    bool has_kw_only_args : 1;
-
     /// True if this function is to be inserted at the beginning of the overload resolution chain
     bool prepend : 1;
 
     /// Number of arguments (including py::args and/or py::kwargs, if present)
     std::uint16_t nargs;
 
-    /// Number of trailing arguments (counted in `nargs`) that are keyword-only
-    std::uint16_t nargs_kw_only = 0;
+    /// Number of leading positional arguments, which are terminated by a py::args or py::kwargs
+    /// argument or by a py::kw_only annotation.
+    std::uint16_t nargs_pos = 0;
 
     /// Number of leading arguments (counted in `nargs`) that are positional-only
     std::uint16_t nargs_pos_only = 0;
@@ -253,7 +266,7 @@ struct function_record {
 struct type_record {
     PYBIND11_NOINLINE type_record()
         : multiple_inheritance(false), dynamic_attr(false), buffer_protocol(false),
-          default_holder(true), module_local(false), is_final(false) { }
+          default_holder(true), module_local(false), is_final(false) {}
 
     /// Handle to the parent scope
     handle scope;
@@ -312,42 +325,45 @@ struct type_record {
     /// Is the class inheritable from python classes?
     bool is_final : 1;
 
-    PYBIND11_NOINLINE void add_base(const std::type_info &base, void *(*caster)(void *)) {
-        auto base_info = detail::get_type_info(base, false);
+    PYBIND11_NOINLINE void add_base(const std::type_info &base, void *(*caster)(void *) ) {
+        auto *base_info = detail::get_type_info(base, false);
         if (!base_info) {
             std::string tname(base.name());
             detail::clean_type_id(tname);
-            pybind11_fail("generic_type: type \"" + std::string(name) +
-                          "\" referenced unknown base type \"" + tname + "\"");
+            pybind11_fail("generic_type: type \"" + std::string(name)
+                          + "\" referenced unknown base type \"" + tname + "\"");
         }
 
         if (default_holder != base_info->default_holder) {
             std::string tname(base.name());
             detail::clean_type_id(tname);
-            pybind11_fail("generic_type: type \"" + std::string(name) + "\" " +
-                    (default_holder ? "does not have" : "has") +
-                    " a non-default holder type while its base \"" + tname + "\" " +
-                    (base_info->default_holder ? "does not" : "does"));
+            pybind11_fail("generic_type: type \"" + std::string(name) + "\" "
+                          + (default_holder ? "does not have" : "has")
+                          + " a non-default holder type while its base \"" + tname + "\" "
+                          + (base_info->default_holder ? "does not" : "does"));
         }
 
         bases.append((PyObject *) base_info->type);
 
-        if (base_info->type->tp_dictoffset != 0)
-            dynamic_attr = true;
+#if PY_VERSION_HEX < 0x030B0000
+        dynamic_attr |= base_info->type->tp_dictoffset != 0;
+#else
+        dynamic_attr |= (base_info->type->tp_flags & Py_TPFLAGS_MANAGED_DICT) != 0;
+#endif
 
-        if (caster)
+        if (caster) {
             base_info->implicit_casts.emplace_back(type, caster);
+        }
     }
 };
 
-inline function_call::function_call(const function_record &f, handle p) :
-        func(f), parent(p) {
+inline function_call::function_call(const function_record &f, handle p) : func(f), parent(p) {
     args.reserve(f.nargs);
     args_convert.reserve(f.nargs);
 }
 
 /// Tag for a new-style `__init__` defined in `detail/init.h`
-struct is_new_style_constructor { };
+struct is_new_style_constructor {};
 
 /**
  * Partial template specializations to process custom attributes provided to
@@ -355,129 +371,177 @@ struct is_new_style_constructor { };
  * fields in the type_record and function_record data structures or executed at
  * runtime to deal with custom call policies (e.g. keep_alive).
  */
-template <typename T, typename SFINAE = void> struct process_attribute;
+template <typename T, typename SFINAE = void>
+struct process_attribute;
 
-template <typename T> struct process_attribute_default {
+template <typename T>
+struct process_attribute_default {
     /// Default implementation: do nothing
-    static void init(const T &, function_record *) { }
-    static void init(const T &, type_record *) { }
-    static void precall(function_call &) { }
-    static void postcall(function_call &, handle) { }
+    static void init(const T &, function_record *) {}
+    static void init(const T &, type_record *) {}
+    static void precall(function_call &) {}
+    static void postcall(function_call &, handle) {}
 };
 
 /// Process an attribute specifying the function's name
-template <> struct process_attribute<name> : process_attribute_default<name> {
+template <>
+struct process_attribute<name> : process_attribute_default<name> {
     static void init(const name &n, function_record *r) { r->name = const_cast<char *>(n.value); }
 };
 
 /// Process an attribute specifying the function's docstring
-template <> struct process_attribute<doc> : process_attribute_default<doc> {
+template <>
+struct process_attribute<doc> : process_attribute_default<doc> {
     static void init(const doc &n, function_record *r) { r->doc = const_cast<char *>(n.value); }
 };
 
 /// Process an attribute specifying the function's docstring (provided as a C-style string)
-template <> struct process_attribute<const char *> : process_attribute_default<const char *> {
+template <>
+struct process_attribute<const char *> : process_attribute_default<const char *> {
     static void init(const char *d, function_record *r) { r->doc = const_cast<char *>(d); }
-    static void init(const char *d, type_record *r) { r->doc = const_cast<char *>(d); }
+    static void init(const char *d, type_record *r) { r->doc = d; }
 };
-template <> struct process_attribute<char *> : process_attribute<const char *> { };
+template <>
+struct process_attribute<char *> : process_attribute<const char *> {};
 
 /// Process an attribute indicating the function's return value policy
-template <> struct process_attribute<return_value_policy> : process_attribute_default<return_value_policy> {
+template <>
+struct process_attribute<return_value_policy> : process_attribute_default<return_value_policy> {
     static void init(const return_value_policy &p, function_record *r) { r->policy = p; }
 };
 
-/// Process an attribute which indicates that this is an overloaded function associated with a given sibling
-template <> struct process_attribute<sibling> : process_attribute_default<sibling> {
+/// Process an attribute which indicates that this is an overloaded function associated with a
+/// given sibling
+template <>
+struct process_attribute<sibling> : process_attribute_default<sibling> {
     static void init(const sibling &s, function_record *r) { r->sibling = s.value; }
 };
 
 /// Process an attribute which indicates that this function is a method
-template <> struct process_attribute<is_method> : process_attribute_default<is_method> {
-    static void init(const is_method &s, function_record *r) { r->is_method = true; r->scope = s.class_; }
+template <>
+struct process_attribute<is_method> : process_attribute_default<is_method> {
+    static void init(const is_method &s, function_record *r) {
+        r->is_method = true;
+        r->scope = s.class_;
+    }
 };
 
 /// Process an attribute which indicates the parent scope of a method
-template <> struct process_attribute<scope> : process_attribute_default<scope> {
+template <>
+struct process_attribute<scope> : process_attribute_default<scope> {
     static void init(const scope &s, function_record *r) { r->scope = s.value; }
 };
 
 /// Process an attribute which indicates that this function is an operator
-template <> struct process_attribute<is_operator> : process_attribute_default<is_operator> {
+template <>
+struct process_attribute<is_operator> : process_attribute_default<is_operator> {
     static void init(const is_operator &, function_record *r) { r->is_operator = true; }
 };
 
-template <> struct process_attribute<is_new_style_constructor> : process_attribute_default<is_new_style_constructor> {
-    static void init(const is_new_style_constructor &, function_record *r) { r->is_new_style_constructor = true; }
+template <>
+struct process_attribute<is_new_style_constructor>
+    : process_attribute_default<is_new_style_constructor> {
+    static void init(const is_new_style_constructor &, function_record *r) {
+        r->is_new_style_constructor = true;
+    }
 };
 
-inline void process_kw_only_arg(const arg &a, function_record *r) {
-    if (!a.name || a.name[0] == '\0')
-        pybind11_fail("arg(): cannot specify an unnamed argument after an kw_only() annotation");
-    ++r->nargs_kw_only;
+inline void check_kw_only_arg(const arg &a, function_record *r) {
+    if (r->args.size() > r->nargs_pos && (!a.name || a.name[0] == '\0')) {
+        pybind11_fail("arg(): cannot specify an unnamed argument after a kw_only() annotation or "
+                      "args() argument");
+    }
+}
+
+inline void append_self_arg_if_needed(function_record *r) {
+    if (r->is_method && r->args.empty()) {
+        r->args.emplace_back("self", nullptr, handle(), /*convert=*/true, /*none=*/false);
+    }
 }
 
 /// Process a keyword argument attribute (*without* a default value)
-template <> struct process_attribute<arg> : process_attribute_default<arg> {
+template <>
+struct process_attribute<arg> : process_attribute_default<arg> {
     static void init(const arg &a, function_record *r) {
-        if (r->is_method && r->args.empty())
-            r->args.emplace_back("self", nullptr, handle(), true /*convert*/, false /*none not allowed*/);
+        append_self_arg_if_needed(r);
         r->args.emplace_back(a.name, nullptr, handle(), !a.flag_noconvert, a.flag_none);
 
-        if (r->has_kw_only_args) process_kw_only_arg(a, r);
+        check_kw_only_arg(a, r);
     }
 };
 
 /// Process a keyword argument attribute (*with* a default value)
-template <> struct process_attribute<arg_v> : process_attribute_default<arg_v> {
+template <>
+struct process_attribute<arg_v> : process_attribute_default<arg_v> {
     static void init(const arg_v &a, function_record *r) {
-        if (r->is_method && r->args.empty())
-            r->args.emplace_back("self", nullptr /*descr*/, handle() /*parent*/, true /*convert*/, false /*none not allowed*/);
+        if (r->is_method && r->args.empty()) {
+            r->args.emplace_back(
+                "self", /*descr=*/nullptr, /*parent=*/handle(), /*convert=*/true, /*none=*/false);
+        }
 
         if (!a.value) {
-#if !defined(NDEBUG)
+#if defined(PYBIND11_DETAILED_ERROR_MESSAGES)
             std::string descr("'");
-            if (a.name) descr += std::string(a.name) + ": ";
+            if (a.name) {
+                descr += std::string(a.name) + ": ";
+            }
             descr += a.type + "'";
             if (r->is_method) {
-                if (r->name)
-                    descr += " in method '" + (std::string) str(r->scope) + "." + (std::string) r->name + "'";
-                else
+                if (r->name) {
+                    descr += " in method '" + (std::string) str(r->scope) + "."
+                             + (std::string) r->name + "'";
+                } else {
                     descr += " in method of '" + (std::string) str(r->scope) + "'";
+                }
             } else if (r->name) {
                 descr += " in function '" + (std::string) r->name + "'";
             }
-            pybind11_fail("arg(): could not convert default argument "
-                          + descr + " into a Python object (type not registered yet?)");
+            pybind11_fail("arg(): could not convert default argument " + descr
+                          + " into a Python object (type not registered yet?)");
 #else
             pybind11_fail("arg(): could not convert default argument "
                           "into a Python object (type not registered yet?). "
-                          "Compile in debug mode for more information.");
+                          "#define PYBIND11_DETAILED_ERROR_MESSAGES or compile in debug mode for "
+                          "more information.");
 #endif
         }
         r->args.emplace_back(a.name, a.descr, a.value.inc_ref(), !a.flag_noconvert, a.flag_none);
 
-        if (r->has_kw_only_args) process_kw_only_arg(a, r);
+        check_kw_only_arg(a, r);
     }
 };
 
 /// Process a keyword-only-arguments-follow pseudo argument
-template <> struct process_attribute<kw_only> : process_attribute_default<kw_only> {
+template <>
+struct process_attribute<kw_only> : process_attribute_default<kw_only> {
     static void init(const kw_only &, function_record *r) {
-        r->has_kw_only_args = true;
+        append_self_arg_if_needed(r);
+        if (r->has_args && r->nargs_pos != static_cast<std::uint16_t>(r->args.size())) {
+            pybind11_fail("Mismatched args() and kw_only(): they must occur at the same relative "
+                          "argument location (or omit kw_only() entirely)");
+        }
+        r->nargs_pos = static_cast<std::uint16_t>(r->args.size());
     }
 };
 
 /// Process a positional-only-argument maker
-template <> struct process_attribute<pos_only> : process_attribute_default<pos_only> {
+template <>
+struct process_attribute<pos_only> : process_attribute_default<pos_only> {
     static void init(const pos_only &, function_record *r) {
+        append_self_arg_if_needed(r);
         r->nargs_pos_only = static_cast<std::uint16_t>(r->args.size());
+        if (r->nargs_pos_only > r->nargs_pos) {
+            pybind11_fail("pos_only(): cannot follow a py::args() argument");
+        }
+        // It also can't follow a kw_only, but a static_assert in pybind11.h checks that
     }
 };
 
-/// Process a parent class attribute.  Single inheritance only (class_ itself already guarantees that)
+/// Process a parent class attribute.  Single inheritance only (class_ itself already guarantees
+/// that)
 template <typename T>
-struct process_attribute<T, enable_if_t<is_pyobject<T>::value>> : process_attribute_default<handle> {
+struct process_attribute<T, enable_if_t<is_pyobject<T>::value>>
+    : process_attribute_default<handle> {
     static void init(const handle &h, type_record *r) { r->bases.append(h); }
 };
 
@@ -490,7 +554,9 @@ struct process_attribute<base<T>> : process_attribute_default<base<T>> {
 /// Process a multiple inheritance attribute
 template <>
 struct process_attribute<multiple_inheritance> : process_attribute_default<multiple_inheritance> {
-    static void init(const multiple_inheritance &, type_record *r) { r->multiple_inheritance = true; }
+    static void init(const multiple_inheritance &, type_record *r) {
+        r->multiple_inheritance = true;
+    }
 };
 
 template <>
@@ -536,34 +602,41 @@ template <>
 struct process_attribute<arithmetic> : process_attribute_default<arithmetic> {};
 
 template <typename... Ts>
-struct process_attribute<call_guard<Ts...>> : process_attribute_default<call_guard<Ts...>> { };
+struct process_attribute<call_guard<Ts...>> : process_attribute_default<call_guard<Ts...>> {};
 
 /**
  * Process a keep_alive call policy -- invokes keep_alive_impl during the
  * pre-call handler if both Nurse, Patient != 0 and use the post-call handler
  * otherwise
  */
-template <size_t Nurse, size_t Patient> struct process_attribute<keep_alive<Nurse, Patient>> : public process_attribute_default<keep_alive<Nurse, Patient>> {
+template <size_t Nurse, size_t Patient>
+struct process_attribute<keep_alive<Nurse, Patient>>
+    : public process_attribute_default<keep_alive<Nurse, Patient>> {
     template <size_t N = Nurse, size_t P = Patient, enable_if_t<N != 0 && P != 0, int> = 0>
-    static void precall(function_call &call) { keep_alive_impl(Nurse, Patient, call, handle()); }
+    static void precall(function_call &call) {
+        keep_alive_impl(Nurse, Patient, call, handle());
+    }
     template <size_t N = Nurse, size_t P = Patient, enable_if_t<N != 0 && P != 0, int> = 0>
-    static void postcall(function_call &, handle) { }
+    static void postcall(function_call &, handle) {}
     template <size_t N = Nurse, size_t P = Patient, enable_if_t<N == 0 || P == 0, int> = 0>
-    static void precall(function_call &) { }
+    static void precall(function_call &) {}
     template <size_t N = Nurse, size_t P = Patient, enable_if_t<N == 0 || P == 0, int> = 0>
-    static void postcall(function_call &call, handle ret) { keep_alive_impl(Nurse, Patient, call, ret); }
+    static void postcall(function_call &call, handle ret) {
+        keep_alive_impl(Nurse, Patient, call, ret);
+    }
 };
 
 /// Recursively iterate over variadic template arguments
-template <typename... Args> struct process_attributes {
-    static void init(const Args&... args, function_record *r) {
+template <typename... Args>
+struct process_attributes {
+    static void init(const Args &...args, function_record *r) {
         PYBIND11_WORKAROUND_INCORRECT_MSVC_C4100(r);
         PYBIND11_WORKAROUND_INCORRECT_GCC_UNUSED_BUT_SET_PARAMETER(r);
         using expander = int[];
         (void) expander{
             0, ((void) process_attribute<typename std::decay<Args>::type>::init(args, r), 0)...};
     }
-    static void init(const Args&... args, type_record *r) {
+    static void init(const Args &...args, type_record *r) {
         PYBIND11_WORKAROUND_INCORRECT_MSVC_C4100(r);
         PYBIND11_WORKAROUND_INCORRECT_GCC_UNUSED_BUT_SET_PARAMETER(r);
         using expander = int[];
@@ -595,7 +668,7 @@ using extract_guard_t = typename exactly_one_t<is_call_guard, call_guard<>, Extr
 /// Check the number of named arguments at compile time
 template <typename... Extra,
           size_t named = constexpr_sum(std::is_base_of<arg, Extra>::value...),
-          size_t self  = constexpr_sum(std::is_same<is_method, Extra>::value...)>
+          size_t self = constexpr_sum(std::is_same<is_method, Extra>::value...)>
 constexpr bool expected_num_args(size_t nargs, bool has_args, bool has_kwargs) {
     PYBIND11_WORKAROUND_INCORRECT_MSVC_C4100(nargs, has_args, has_kwargs);
     return named == 0 || (self + named + size_t(has_args) + size_t(has_kwargs)) == nargs;
diff --git a/ext/pybind11/include/pybind11/buffer_info.h b/ext/pybind11/include/pybind11/buffer_info.h
index eba68d1aa1..06120d5563 100644
--- a/ext/pybind11/include/pybind11/buffer_info.h
+++ b/ext/pybind11/include/pybind11/buffer_info.h
@@ -19,9 +19,11 @@ PYBIND11_NAMESPACE_BEGIN(detail)
 inline std::vector<ssize_t> c_strides(const std::vector<ssize_t> &shape, ssize_t itemsize) {
     auto ndim = shape.size();
     std::vector<ssize_t> strides(ndim, itemsize);
-    if (ndim > 0)
-        for (size_t i = ndim - 1; i > 0; --i)
+    if (ndim > 0) {
+        for (size_t i = ndim - 1; i > 0; --i) {
             strides[i - 1] = strides[i] * shape[i];
+        }
+    }
     return strides;
 }
 
@@ -29,8 +31,9 @@ inline std::vector<ssize_t> c_strides(const std::vector<ssize_t> &shape, ssize_t
 inline std::vector<ssize_t> f_strides(const std::vector<ssize_t> &shape, ssize_t itemsize) {
     auto ndim = shape.size();
     std::vector<ssize_t> strides(ndim, itemsize);
-    for (size_t i = 1; i < ndim; ++i)
+    for (size_t i = 1; i < ndim; ++i) {
         strides[i] = strides[i - 1] * shape[i - 1];
+    }
     return strides;
 }
 
@@ -41,55 +44,85 @@ struct buffer_info {
     void *ptr = nullptr;          // Pointer to the underlying storage
     ssize_t itemsize = 0;         // Size of individual items in bytes
     ssize_t size = 0;             // Total number of entries
-    std::string format;           // For homogeneous buffers, this should be set to format_descriptor<T>::format()
+    std::string format;           // For homogeneous buffers, this should be set to
+                                  // format_descriptor<T>::format()
     ssize_t ndim = 0;             // Number of dimensions
     std::vector<ssize_t> shape;   // Shape of the tensor (1 entry per dimension)
-    std::vector<ssize_t> strides; // Number of bytes between adjacent entries (for each per dimension)
+    std::vector<ssize_t> strides; // Number of bytes between adjacent entries
+                                  // (for each per dimension)
     bool readonly = false;        // flag to indicate if the underlying storage may be written to
 
     buffer_info() = default;
 
-    buffer_info(void *ptr, ssize_t itemsize, const std::string &format, ssize_t ndim,
-                detail::any_container<ssize_t> shape_in, detail::any_container<ssize_t> strides_in, bool readonly=false)
-    : ptr(ptr), itemsize(itemsize), size(1), format(format), ndim(ndim),
-      shape(std::move(shape_in)), strides(std::move(strides_in)), readonly(readonly) {
-        if (ndim != (ssize_t) shape.size() || ndim != (ssize_t) strides.size())
+    buffer_info(void *ptr,
+                ssize_t itemsize,
+                const std::string &format,
+                ssize_t ndim,
+                detail::any_container<ssize_t> shape_in,
+                detail::any_container<ssize_t> strides_in,
+                bool readonly = false)
+        : ptr(ptr), itemsize(itemsize), size(1), format(format), ndim(ndim),
+          shape(std::move(shape_in)), strides(std::move(strides_in)), readonly(readonly) {
+        if (ndim != (ssize_t) shape.size() || ndim != (ssize_t) strides.size()) {
             pybind11_fail("buffer_info: ndim doesn't match shape and/or strides length");
-        for (size_t i = 0; i < (size_t) ndim; ++i)
+        }
+        for (size_t i = 0; i < (size_t) ndim; ++i) {
             size *= shape[i];
+        }
     }
 
     template <typename T>
-    buffer_info(T *ptr, detail::any_container<ssize_t> shape_in, detail::any_container<ssize_t> strides_in, bool readonly=false)
-    : buffer_info(private_ctr_tag(), ptr, sizeof(T), format_descriptor<T>::format(), static_cast<ssize_t>(shape_in->size()), std::move(shape_in), std::move(strides_in), readonly) { }
+    buffer_info(T *ptr,
+                detail::any_container<ssize_t> shape_in,
+                detail::any_container<ssize_t> strides_in,
+                bool readonly = false)
+        : buffer_info(private_ctr_tag(),
+                      ptr,
+                      sizeof(T),
+                      format_descriptor<T>::format(),
+                      static_cast<ssize_t>(shape_in->size()),
+                      std::move(shape_in),
+                      std::move(strides_in),
+                      readonly) {}
 
-    buffer_info(void *ptr, ssize_t itemsize, const std::string &format, ssize_t size, bool readonly=false)
-    : buffer_info(ptr, itemsize, format, 1, {size}, {itemsize}, readonly) { }
+    buffer_info(void *ptr,
+                ssize_t itemsize,
+                const std::string &format,
+                ssize_t size,
+                bool readonly = false)
+        : buffer_info(ptr, itemsize, format, 1, {size}, {itemsize}, readonly) {}
 
     template <typename T>
-    buffer_info(T *ptr, ssize_t size, bool readonly=false)
-    : buffer_info(ptr, sizeof(T), format_descriptor<T>::format(), size, readonly) { }
+    buffer_info(T *ptr, ssize_t size, bool readonly = false)
+        : buffer_info(ptr, sizeof(T), format_descriptor<T>::format(), size, readonly) {}
 
     template <typename T>
-    buffer_info(const T *ptr, ssize_t size, bool readonly=true)
-    : buffer_info(const_cast<T*>(ptr), sizeof(T), format_descriptor<T>::format(), size, readonly) { }
+    buffer_info(const T *ptr, ssize_t size, bool readonly = true)
+        : buffer_info(
+            const_cast<T *>(ptr), sizeof(T), format_descriptor<T>::format(), size, readonly) {}
 
     explicit buffer_info(Py_buffer *view, bool ownview = true)
-    : buffer_info(view->buf, view->itemsize, view->format, view->ndim,
+        : buffer_info(
+            view->buf,
+            view->itemsize,
+            view->format,
+            view->ndim,
             {view->shape, view->shape + view->ndim},
             /* Though buffer::request() requests PyBUF_STRIDES, ctypes objects
              * ignore this flag and return a view with NULL strides.
              * When strides are NULL, build them manually.  */
             view->strides
-            ? std::vector<ssize_t>(view->strides, view->strides + view->ndim)
-            : detail::c_strides({view->shape, view->shape + view->ndim}, view->itemsize),
+                ? std::vector<ssize_t>(view->strides, view->strides + view->ndim)
+                : detail::c_strides({view->shape, view->shape + view->ndim}, view->itemsize),
             (view->readonly != 0)) {
+        // NOLINTNEXTLINE(cppcoreguidelines-prefer-member-initializer)
         this->m_view = view;
+        // NOLINTNEXTLINE(cppcoreguidelines-prefer-member-initializer)
         this->ownview = ownview;
     }
 
     buffer_info(const buffer_info &) = delete;
-    buffer_info& operator=(const buffer_info &) = delete;
+    buffer_info &operator=(const buffer_info &) = delete;
 
     buffer_info(buffer_info &&other) noexcept { (*this) = std::move(other); }
 
@@ -108,17 +141,28 @@ struct buffer_info {
     }
 
     ~buffer_info() {
-        if (m_view && ownview) { PyBuffer_Release(m_view); delete m_view; }
+        if (m_view && ownview) {
+            PyBuffer_Release(m_view);
+            delete m_view;
+        }
     }
 
     Py_buffer *view() const { return m_view; }
     Py_buffer *&view() { return m_view; }
-private:
-    struct private_ctr_tag { };
 
-    buffer_info(private_ctr_tag, void *ptr, ssize_t itemsize, const std::string &format, ssize_t ndim,
-                detail::any_container<ssize_t> &&shape_in, detail::any_container<ssize_t> &&strides_in, bool readonly)
-    : buffer_info(ptr, itemsize, format, ndim, std::move(shape_in), std::move(strides_in), readonly) { }
+private:
+    struct private_ctr_tag {};
+
+    buffer_info(private_ctr_tag,
+                void *ptr,
+                ssize_t itemsize,
+                const std::string &format,
+                ssize_t ndim,
+                detail::any_container<ssize_t> &&shape_in,
+                detail::any_container<ssize_t> &&strides_in,
+                bool readonly)
+        : buffer_info(
+            ptr, itemsize, format, ndim, std::move(shape_in), std::move(strides_in), readonly) {}
 
     Py_buffer *m_view = nullptr;
     bool ownview = false;
@@ -126,17 +170,22 @@ private:
 
 PYBIND11_NAMESPACE_BEGIN(detail)
 
-template <typename T, typename SFINAE = void> struct compare_buffer_info {
-    static bool compare(const buffer_info& b) {
+template <typename T, typename SFINAE = void>
+struct compare_buffer_info {
+    static bool compare(const buffer_info &b) {
         return b.format == format_descriptor<T>::format() && b.itemsize == (ssize_t) sizeof(T);
     }
 };
 
-template <typename T> struct compare_buffer_info<T, detail::enable_if_t<std::is_integral<T>::value>> {
-    static bool compare(const buffer_info& b) {
-        return (size_t) b.itemsize == sizeof(T) && (b.format == format_descriptor<T>::value ||
-            ((sizeof(T) == sizeof(long)) && b.format == (std::is_unsigned<T>::value ? "L" : "l")) ||
-            ((sizeof(T) == sizeof(size_t)) && b.format == (std::is_unsigned<T>::value ? "N" : "n")));
+template <typename T>
+struct compare_buffer_info<T, detail::enable_if_t<std::is_integral<T>::value>> {
+    static bool compare(const buffer_info &b) {
+        return (size_t) b.itemsize == sizeof(T)
+               && (b.format == format_descriptor<T>::value
+                   || ((sizeof(T) == sizeof(long))
+                       && b.format == (std::is_unsigned<T>::value ? "L" : "l"))
+                   || ((sizeof(T) == sizeof(size_t))
+                       && b.format == (std::is_unsigned<T>::value ? "N" : "n")));
     }
 };
 
diff --git a/ext/pybind11/include/pybind11/cast.h b/ext/pybind11/include/pybind11/cast.h
index 20fbb32587..3a40460276 100644
--- a/ext/pybind11/include/pybind11/cast.h
+++ b/ext/pybind11/include/pybind11/cast.h
@@ -10,11 +10,12 @@
 
 #pragma once
 
-#include "pytypes.h"
 #include "detail/common.h"
 #include "detail/descr.h"
 #include "detail/type_caster_base.h"
 #include "detail/typeid.h"
+#include "pytypes.h"
+
 #include <array>
 #include <cstring>
 #include <functional>
@@ -27,61 +28,57 @@
 #include <utility>
 #include <vector>
 
-#if defined(PYBIND11_CPP17)
-#  if defined(__has_include)
-#    if __has_include(<string_view>)
-#      define PYBIND11_HAS_STRING_VIEW
-#    endif
-#  elif defined(_MSC_VER)
-#    define PYBIND11_HAS_STRING_VIEW
-#  endif
-#endif
-#ifdef PYBIND11_HAS_STRING_VIEW
-#include <string_view>
-#endif
-
-#if defined(__cpp_lib_char8_t) && __cpp_lib_char8_t >= 201811L
-#  define PYBIND11_HAS_U8STRING
-#endif
-
 PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
+
+PYBIND11_WARNING_DISABLE_MSVC(4127)
+
 PYBIND11_NAMESPACE_BEGIN(detail)
 
-template <typename type, typename SFINAE = void> class type_caster : public type_caster_base<type> { };
-template <typename type> using make_caster = type_caster<intrinsic_t<type>>;
+template <typename type, typename SFINAE = void>
+class type_caster : public type_caster_base<type> {};
+template <typename type>
+using make_caster = type_caster<intrinsic_t<type>>;
 
 // Shortcut for calling a caster's `cast_op_type` cast operator for casting a type_caster to a T
-template <typename T> typename make_caster<T>::template cast_op_type<T> cast_op(make_caster<T> &caster) {
+template <typename T>
+typename make_caster<T>::template cast_op_type<T> cast_op(make_caster<T> &caster) {
     return caster.operator typename make_caster<T>::template cast_op_type<T>();
 }
-template <typename T> typename make_caster<T>::template cast_op_type<typename std::add_rvalue_reference<T>::type>
+template <typename T>
+typename make_caster<T>::template cast_op_type<typename std::add_rvalue_reference<T>::type>
 cast_op(make_caster<T> &&caster) {
-    return std::move(caster).operator
-        typename make_caster<T>::template cast_op_type<typename std::add_rvalue_reference<T>::type>();
+    return std::move(caster).operator typename make_caster<T>::
+        template cast_op_type<typename std::add_rvalue_reference<T>::type>();
 }
 
-template <typename type> class type_caster<std::reference_wrapper<type>> {
+template <typename type>
+class type_caster<std::reference_wrapper<type>> {
 private:
     using caster_t = make_caster<type>;
     caster_t subcaster;
-    using reference_t = type&;
-    using subcaster_cast_op_type =
-        typename caster_t::template cast_op_type<reference_t>;
+    using reference_t = type &;
+    using subcaster_cast_op_type = typename caster_t::template cast_op_type<reference_t>;
+
+    static_assert(
+        std::is_same<typename std::remove_const<type>::type &, subcaster_cast_op_type>::value
+            || std::is_same<reference_t, subcaster_cast_op_type>::value,
+        "std::reference_wrapper<T> caster requires T to have a caster with an "
+        "`operator T &()` or `operator const T &()`");
 
-    static_assert(std::is_same<typename std::remove_const<type>::type &, subcaster_cast_op_type>::value ||
-                  std::is_same<reference_t, subcaster_cast_op_type>::value,
-                  "std::reference_wrapper<T> caster requires T to have a caster with an "
-                  "`operator T &()` or `operator const T &()`");
 public:
     bool load(handle src, bool convert) { return subcaster.load(src, convert); }
     static constexpr auto name = caster_t::name;
-    static handle cast(const std::reference_wrapper<type> &src, return_value_policy policy, handle parent) {
+    static handle
+    cast(const std::reference_wrapper<type> &src, return_value_policy policy, handle parent) {
         // It is definitely wrong to take ownership of this pointer, so mask that rvp
-        if (policy == return_value_policy::take_ownership || policy == return_value_policy::automatic)
+        if (policy == return_value_policy::take_ownership
+            || policy == return_value_policy::automatic) {
             policy = return_value_policy::automatic_reference;
+        }
         return caster_t::cast(&src.get(), policy, parent);
     }
-    template <typename T> using cast_op_type = std::reference_wrapper<type>;
+    template <typename T>
+    using cast_op_type = std::reference_wrapper<type>;
     explicit operator std::reference_wrapper<type>() { return cast_op<type &>(subcaster); }
 };
 
@@ -91,11 +88,16 @@ protected:
                                                                                                   \
 public:                                                                                           \
     static constexpr auto name = py_name;                                                         \
-    template <typename T_, enable_if_t<std::is_same<type, remove_cv_t<T_>>::value, int> = 0>      \
-    static handle cast(T_ *src, return_value_policy policy, handle parent) {                      \
+    template <typename T_,                                                                        \
+              ::pybind11::detail::enable_if_t<                                                    \
+                  std::is_same<type, ::pybind11::detail::remove_cv_t<T_>>::value,                 \
+                  int>                                                                            \
+              = 0>                                                                                \
+    static ::pybind11::handle cast(                                                               \
+        T_ *src, ::pybind11::return_value_policy policy, ::pybind11::handle parent) {             \
         if (!src)                                                                                 \
-            return none().release();                                                              \
-        if (policy == return_value_policy::take_ownership) {                                      \
+            return ::pybind11::none().release();                                                  \
+        if (policy == ::pybind11::return_value_policy::take_ownership) {                          \
             auto h = cast(std::move(*src), policy, parent);                                       \
             delete src;                                                                           \
             return h;                                                                             \
@@ -106,31 +108,33 @@ public:
     operator type &() { return value; }                /* NOLINT(bugprone-macro-parentheses) */   \
     operator type &&() && { return std::move(value); } /* NOLINT(bugprone-macro-parentheses) */   \
     template <typename T_>                                                                        \
-    using cast_op_type = pybind11::detail::movable_cast_op_type<T_>
+    using cast_op_type = ::pybind11::detail::movable_cast_op_type<T_>
 
-template <typename CharT> using is_std_char_type = any_of<
-    std::is_same<CharT, char>, /* std::string */
+template <typename CharT>
+using is_std_char_type = any_of<std::is_same<CharT, char>, /* std::string */
 #if defined(PYBIND11_HAS_U8STRING)
-    std::is_same<CharT, char8_t>, /* std::u8string */
+                                std::is_same<CharT, char8_t>, /* std::u8string */
 #endif
-    std::is_same<CharT, char16_t>, /* std::u16string */
-    std::is_same<CharT, char32_t>, /* std::u32string */
-    std::is_same<CharT, wchar_t> /* std::wstring */
->;
-
+                                std::is_same<CharT, char16_t>, /* std::u16string */
+                                std::is_same<CharT, char32_t>, /* std::u32string */
+                                std::is_same<CharT, wchar_t>   /* std::wstring */
+                                >;
 
 template <typename T>
 struct type_caster<T, enable_if_t<std::is_arithmetic<T>::value && !is_std_char_type<T>::value>> {
     using _py_type_0 = conditional_t<sizeof(T) <= sizeof(long), long, long long>;
-    using _py_type_1 = conditional_t<std::is_signed<T>::value, _py_type_0, typename std::make_unsigned<_py_type_0>::type>;
+    using _py_type_1 = conditional_t<std::is_signed<T>::value,
+                                     _py_type_0,
+                                     typename std::make_unsigned<_py_type_0>::type>;
     using py_type = conditional_t<std::is_floating_point<T>::value, double, _py_type_1>;
-public:
 
+public:
     bool load(handle src, bool convert) {
         py_type py_value;
 
-        if (!src)
+        if (!src) {
             return false;
+        }
 
 #if !defined(PYPY_VERSION)
         auto index_check = [](PyObject *o) { return PyIndex_Check(o); };
@@ -141,25 +145,26 @@ public:
 #endif
 
         if (std::is_floating_point<T>::value) {
-            if (convert || PyFloat_Check(src.ptr()))
+            if (convert || PyFloat_Check(src.ptr())) {
                 py_value = (py_type) PyFloat_AsDouble(src.ptr());
-            else
+            } else {
                 return false;
+            }
         } else if (PyFloat_Check(src.ptr())
                    || (!convert && !PYBIND11_LONG_CHECK(src.ptr()) && !index_check(src.ptr()))) {
             return false;
         } else {
             handle src_or_index = src;
-#if PY_VERSION_HEX < 0x03080000
+            // PyPy: 7.3.7's 3.8 does not implement PyLong_*'s __index__ calls.
+#if PY_VERSION_HEX < 0x03080000 || defined(PYPY_VERSION)
             object index;
-            if (!PYBIND11_LONG_CHECK(src.ptr())) {  // So: index_check(src.ptr())
+            if (!PYBIND11_LONG_CHECK(src.ptr())) { // So: index_check(src.ptr())
                 index = reinterpret_steal<object>(PyNumber_Index(src.ptr()));
                 if (!index) {
                     PyErr_Clear();
                     if (!convert)
                         return false;
-                }
-                else {
+                } else {
                     src_or_index = index;
                 }
             }
@@ -168,8 +173,8 @@ public:
                 py_value = as_unsigned<py_type>(src_or_index.ptr());
             } else { // signed integer:
                 py_value = sizeof(T) <= sizeof(long)
-                    ? (py_type) PyLong_AsLong(src_or_index.ptr())
-                    : (py_type) PYBIND11_LONG_AS_LONGLONG(src_or_index.ptr());
+                               ? (py_type) PyLong_AsLong(src_or_index.ptr())
+                               : (py_type) PYBIND11_LONG_AS_LONGLONG(src_or_index.ptr());
             }
         }
 
@@ -178,12 +183,14 @@ public:
 
         // Check to see if the conversion is valid (integers should match exactly)
         // Signed/unsigned checks happen elsewhere
-        if (py_err || (std::is_integral<T>::value && sizeof(py_type) != sizeof(T) && py_value != (py_type) (T) py_value)) {
+        if (py_err
+            || (std::is_integral<T>::value && sizeof(py_type) != sizeof(T)
+                && py_value != (py_type) (T) py_value)) {
             PyErr_Clear();
             if (py_err && convert && (PyNumber_Check(src.ptr()) != 0)) {
                 auto tmp = reinterpret_steal<object>(std::is_floating_point<T>::value
-                                                     ? PyNumber_Float(src.ptr())
-                                                     : PyNumber_Long(src.ptr()));
+                                                         ? PyNumber_Float(src.ptr())
+                                                         : PyNumber_Long(src.ptr()));
                 PyErr_Clear();
                 return load(tmp, false);
             }
@@ -194,55 +201,67 @@ public:
         return true;
     }
 
-    template<typename U = T>
+    template <typename U = T>
     static typename std::enable_if<std::is_floating_point<U>::value, handle>::type
     cast(U src, return_value_policy /* policy */, handle /* parent */) {
         return PyFloat_FromDouble((double) src);
     }
 
-    template<typename U = T>
-    static typename std::enable_if<!std::is_floating_point<U>::value && std::is_signed<U>::value && (sizeof(U) <= sizeof(long)), handle>::type
+    template <typename U = T>
+    static typename std::enable_if<!std::is_floating_point<U>::value && std::is_signed<U>::value
+                                       && (sizeof(U) <= sizeof(long)),
+                                   handle>::type
     cast(U src, return_value_policy /* policy */, handle /* parent */) {
         return PYBIND11_LONG_FROM_SIGNED((long) src);
     }
 
-    template<typename U = T>
-    static typename std::enable_if<!std::is_floating_point<U>::value && std::is_unsigned<U>::value && (sizeof(U) <= sizeof(unsigned long)), handle>::type
+    template <typename U = T>
+    static typename std::enable_if<!std::is_floating_point<U>::value && std::is_unsigned<U>::value
+                                       && (sizeof(U) <= sizeof(unsigned long)),
+                                   handle>::type
     cast(U src, return_value_policy /* policy */, handle /* parent */) {
         return PYBIND11_LONG_FROM_UNSIGNED((unsigned long) src);
     }
 
-    template<typename U = T>
-    static typename std::enable_if<!std::is_floating_point<U>::value && std::is_signed<U>::value && (sizeof(U) > sizeof(long)), handle>::type
+    template <typename U = T>
+    static typename std::enable_if<!std::is_floating_point<U>::value && std::is_signed<U>::value
+                                       && (sizeof(U) > sizeof(long)),
+                                   handle>::type
     cast(U src, return_value_policy /* policy */, handle /* parent */) {
         return PyLong_FromLongLong((long long) src);
     }
 
-    template<typename U = T>
-    static typename std::enable_if<!std::is_floating_point<U>::value && std::is_unsigned<U>::value && (sizeof(U) > sizeof(unsigned long)), handle>::type
+    template <typename U = T>
+    static typename std::enable_if<!std::is_floating_point<U>::value && std::is_unsigned<U>::value
+                                       && (sizeof(U) > sizeof(unsigned long)),
+                                   handle>::type
     cast(U src, return_value_policy /* policy */, handle /* parent */) {
         return PyLong_FromUnsignedLongLong((unsigned long long) src);
     }
 
-    PYBIND11_TYPE_CASTER(T, _<std::is_integral<T>::value>("int", "float"));
+    PYBIND11_TYPE_CASTER(T, const_name<std::is_integral<T>::value>("int", "float"));
 };
 
-template<typename T> struct void_caster {
+template <typename T>
+struct void_caster {
 public:
     bool load(handle src, bool) {
-        if (src && src.is_none())
+        if (src && src.is_none()) {
             return true;
+        }
         return false;
     }
     static handle cast(T, return_value_policy /* policy */, handle /* parent */) {
-        return none().inc_ref();
+        return none().release();
     }
-    PYBIND11_TYPE_CASTER(T, _("None"));
+    PYBIND11_TYPE_CASTER(T, const_name("None"));
 };
 
-template <> class type_caster<void_type> : public void_caster<void_type> {};
+template <>
+class type_caster<void_type> : public void_caster<void_type> {};
 
-template <> class type_caster<void> : public type_caster<void_type> {
+template <>
+class type_caster<void> : public type_caster<void_type> {
 public:
     using type_caster<void_type>::cast;
 
@@ -262,7 +281,7 @@ public:
         }
 
         /* Check if this is a C++ type */
-        auto &bases = all_type_info((PyTypeObject *) type::handle_of(h).ptr());
+        const auto &bases = all_type_info((PyTypeObject *) type::handle_of(h).ptr());
         if (bases.size() == 1) { // Only allowing loading from a single-value type
             value = values_and_holders(reinterpret_cast<instance *>(h.ptr())).begin()->value_ptr();
             return true;
@@ -273,24 +292,31 @@ public:
     }
 
     static handle cast(const void *ptr, return_value_policy /* policy */, handle /* parent */) {
-        if (ptr)
+        if (ptr) {
             return capsule(ptr).release();
-        return none().inc_ref();
+        }
+        return none().release();
     }
 
-    template <typename T> using cast_op_type = void*&;
+    template <typename T>
+    using cast_op_type = void *&;
     explicit operator void *&() { return value; }
-    static constexpr auto name = _("capsule");
+    static constexpr auto name = const_name("capsule");
+
 private:
     void *value = nullptr;
 };
 
-template <> class type_caster<std::nullptr_t> : public void_caster<std::nullptr_t> { };
+template <>
+class type_caster<std::nullptr_t> : public void_caster<std::nullptr_t> {};
 
-template <> class type_caster<bool> {
+template <>
+class type_caster<bool> {
 public:
     bool load(handle src, bool convert) {
-        if (!src) return false;
+        if (!src) {
+            return false;
+        }
         if (src.ptr() == Py_True) {
             value = true;
             return true;
@@ -304,22 +330,22 @@ public:
 
             Py_ssize_t res = -1;
             if (src.is_none()) {
-                res = 0;  // None is implicitly converted to False
+                res = 0; // None is implicitly converted to False
             }
-            #if defined(PYPY_VERSION)
-            // On PyPy, check that "__bool__" (or "__nonzero__" on Python 2.7) attr exists
+#if defined(PYPY_VERSION)
+            // On PyPy, check that "__bool__" attr exists
             else if (hasattr(src, PYBIND11_BOOL_ATTR)) {
                 res = PyObject_IsTrue(src.ptr());
             }
-            #else
+#else
             // Alternate approach for CPython: this does the same as the above, but optimized
             // using the CPython API so as to avoid an unneeded attribute lookup.
-            else if (auto tp_as_number = src.ptr()->ob_type->tp_as_number) {
+            else if (auto *tp_as_number = src.ptr()->ob_type->tp_as_number) {
                 if (PYBIND11_NB_BOOL(tp_as_number)) {
                     res = (*PYBIND11_NB_BOOL(tp_as_number))(src.ptr());
                 }
             }
-            #endif
+#endif
             if (res == 0 || res == 1) {
                 value = (res != 0);
                 return true;
@@ -331,56 +357,43 @@ public:
     static handle cast(bool src, return_value_policy /* policy */, handle /* parent */) {
         return handle(src ? Py_True : Py_False).inc_ref();
     }
-    PYBIND11_TYPE_CASTER(bool, _("bool"));
+    PYBIND11_TYPE_CASTER(bool, const_name("bool"));
 };
 
 // Helper class for UTF-{8,16,32} C++ stl strings:
-template <typename StringType, bool IsView = false> struct string_caster {
+template <typename StringType, bool IsView = false>
+struct string_caster {
     using CharT = typename StringType::value_type;
 
     // Simplify life by being able to assume standard char sizes (the standard only guarantees
     // minimums, but Python requires exact sizes)
-    static_assert(!std::is_same<CharT, char>::value || sizeof(CharT) == 1, "Unsupported char size != 1");
+    static_assert(!std::is_same<CharT, char>::value || sizeof(CharT) == 1,
+                  "Unsupported char size != 1");
 #if defined(PYBIND11_HAS_U8STRING)
-    static_assert(!std::is_same<CharT, char8_t>::value || sizeof(CharT) == 1, "Unsupported char8_t size != 1");
+    static_assert(!std::is_same<CharT, char8_t>::value || sizeof(CharT) == 1,
+                  "Unsupported char8_t size != 1");
 #endif
-    static_assert(!std::is_same<CharT, char16_t>::value || sizeof(CharT) == 2, "Unsupported char16_t size != 2");
-    static_assert(!std::is_same<CharT, char32_t>::value || sizeof(CharT) == 4, "Unsupported char32_t size != 4");
+    static_assert(!std::is_same<CharT, char16_t>::value || sizeof(CharT) == 2,
+                  "Unsupported char16_t size != 2");
+    static_assert(!std::is_same<CharT, char32_t>::value || sizeof(CharT) == 4,
+                  "Unsupported char32_t size != 4");
     // wchar_t can be either 16 bits (Windows) or 32 (everywhere else)
     static_assert(!std::is_same<CharT, wchar_t>::value || sizeof(CharT) == 2 || sizeof(CharT) == 4,
-            "Unsupported wchar_t size != 2/4");
+                  "Unsupported wchar_t size != 2/4");
     static constexpr size_t UTF_N = 8 * sizeof(CharT);
 
     bool load(handle src, bool) {
-#if PY_MAJOR_VERSION < 3
-        object temp;
-#endif
         handle load_src = src;
         if (!src) {
             return false;
         }
         if (!PyUnicode_Check(load_src.ptr())) {
-#if PY_MAJOR_VERSION >= 3
-            return load_bytes(load_src);
-#else
-            if (std::is_same<CharT, char>::value) {
-                return load_bytes(load_src);
-            }
-
-            // The below is a guaranteed failure in Python 3 when PyUnicode_Check returns false
-            if (!PYBIND11_BYTES_CHECK(load_src.ptr()))
-                return false;
-
-            temp = reinterpret_steal<object>(PyUnicode_FromObject(load_src.ptr()));
-            if (!temp) { PyErr_Clear(); return false; }
-            load_src = temp;
-#endif
+            return load_raw(load_src);
         }
 
-#if PY_VERSION_HEX >= 0x03030000
-        // On Python >= 3.3, for UTF-8 we avoid the need for a temporary `bytes`
-        // object by using `PyUnicode_AsUTF8AndSize`.
-        if (PYBIND11_SILENCE_MSVC_C4127(UTF_N == 8)) {
+        // For UTF-8 we avoid the need for a temporary `bytes` object by using
+        // `PyUnicode_AsUTF8AndSize`.
+        if (UTF_N == 8) {
             Py_ssize_t size = -1;
             const auto *buffer
                 = reinterpret_cast<const CharT *>(PyUnicode_AsUTF8AndSize(load_src.ptr(), &size));
@@ -391,98 +404,135 @@ template <typename StringType, bool IsView = false> struct string_caster {
             value = StringType(buffer, static_cast<size_t>(size));
             return true;
         }
-#endif
 
-        auto utfNbytes = reinterpret_steal<object>(PyUnicode_AsEncodedString(
-            load_src.ptr(), UTF_N == 8 ? "utf-8" : UTF_N == 16 ? "utf-16" : "utf-32", nullptr));
-        if (!utfNbytes) { PyErr_Clear(); return false; }
+        auto utfNbytes
+            = reinterpret_steal<object>(PyUnicode_AsEncodedString(load_src.ptr(),
+                                                                  UTF_N == 8    ? "utf-8"
+                                                                  : UTF_N == 16 ? "utf-16"
+                                                                                : "utf-32",
+                                                                  nullptr));
+        if (!utfNbytes) {
+            PyErr_Clear();
+            return false;
+        }
 
-        const auto *buffer = reinterpret_cast<const CharT *>(PYBIND11_BYTES_AS_STRING(utfNbytes.ptr()));
+        const auto *buffer
+            = reinterpret_cast<const CharT *>(PYBIND11_BYTES_AS_STRING(utfNbytes.ptr()));
         size_t length = (size_t) PYBIND11_BYTES_SIZE(utfNbytes.ptr()) / sizeof(CharT);
         // Skip BOM for UTF-16/32
-        if (PYBIND11_SILENCE_MSVC_C4127(UTF_N > 8)) {
+        if (UTF_N > 8) {
             buffer++;
             length--;
         }
         value = StringType(buffer, length);
 
         // If we're loading a string_view we need to keep the encoded Python object alive:
-        if (IsView)
+        if (IsView) {
             loader_life_support::add_patient(utfNbytes);
+        }
 
         return true;
     }
 
-    static handle cast(const StringType &src, return_value_policy /* policy */, handle /* parent */) {
+    static handle
+    cast(const StringType &src, return_value_policy /* policy */, handle /* parent */) {
         const char *buffer = reinterpret_cast<const char *>(src.data());
         auto nbytes = ssize_t(src.size() * sizeof(CharT));
         handle s = decode_utfN(buffer, nbytes);
-        if (!s) throw error_already_set();
+        if (!s) {
+            throw error_already_set();
+        }
         return s;
     }
 
-    PYBIND11_TYPE_CASTER(StringType, _(PYBIND11_STRING_NAME));
+    PYBIND11_TYPE_CASTER(StringType, const_name(PYBIND11_STRING_NAME));
 
 private:
     static handle decode_utfN(const char *buffer, ssize_t nbytes) {
 #if !defined(PYPY_VERSION)
-        return
-            UTF_N == 8  ? PyUnicode_DecodeUTF8(buffer, nbytes, nullptr) :
-            UTF_N == 16 ? PyUnicode_DecodeUTF16(buffer, nbytes, nullptr, nullptr) :
-                          PyUnicode_DecodeUTF32(buffer, nbytes, nullptr, nullptr);
+        return UTF_N == 8    ? PyUnicode_DecodeUTF8(buffer, nbytes, nullptr)
+               : UTF_N == 16 ? PyUnicode_DecodeUTF16(buffer, nbytes, nullptr, nullptr)
+                             : PyUnicode_DecodeUTF32(buffer, nbytes, nullptr, nullptr);
 #else
-        // PyPy segfaults when on PyUnicode_DecodeUTF16 (and possibly on PyUnicode_DecodeUTF32 as well),
-        // so bypass the whole thing by just passing the encoding as a string value, which works properly:
-        return PyUnicode_Decode(buffer, nbytes, UTF_N == 8 ? "utf-8" : UTF_N == 16 ? "utf-16" : "utf-32", nullptr);
+        // PyPy segfaults when on PyUnicode_DecodeUTF16 (and possibly on PyUnicode_DecodeUTF32 as
+        // well), so bypass the whole thing by just passing the encoding as a string value, which
+        // works properly:
+        return PyUnicode_Decode(buffer,
+                                nbytes,
+                                UTF_N == 8    ? "utf-8"
+                                : UTF_N == 16 ? "utf-16"
+                                              : "utf-32",
+                                nullptr);
 #endif
     }
 
-    // When loading into a std::string or char*, accept a bytes object as-is (i.e.
+    // When loading into a std::string or char*, accept a bytes/bytearray object as-is (i.e.
     // without any encoding/decoding attempt).  For other C++ char sizes this is a no-op.
     // which supports loading a unicode from a str, doesn't take this path.
     template <typename C = CharT>
-    bool load_bytes(enable_if_t<std::is_same<C, char>::value, handle> src) {
+    bool load_raw(enable_if_t<std::is_same<C, char>::value, handle> src) {
         if (PYBIND11_BYTES_CHECK(src.ptr())) {
-            // We were passed a Python 3 raw bytes; accept it into a std::string or char*
+            // We were passed raw bytes; accept it into a std::string or char*
             // without any encoding attempt.
             const char *bytes = PYBIND11_BYTES_AS_STRING(src.ptr());
-            if (bytes) {
-                value = StringType(bytes, (size_t) PYBIND11_BYTES_SIZE(src.ptr()));
-                return true;
+            if (!bytes) {
+                pybind11_fail("Unexpected PYBIND11_BYTES_AS_STRING() failure.");
             }
+            value = StringType(bytes, (size_t) PYBIND11_BYTES_SIZE(src.ptr()));
+            return true;
+        }
+        if (PyByteArray_Check(src.ptr())) {
+            // We were passed a bytearray; accept it into a std::string or char*
+            // without any encoding attempt.
+            const char *bytearray = PyByteArray_AsString(src.ptr());
+            if (!bytearray) {
+                pybind11_fail("Unexpected PyByteArray_AsString() failure.");
+            }
+            value = StringType(bytearray, (size_t) PyByteArray_Size(src.ptr()));
+            return true;
         }
 
         return false;
     }
 
     template <typename C = CharT>
-    bool load_bytes(enable_if_t<!std::is_same<C, char>::value, handle>) { return false; }
+    bool load_raw(enable_if_t<!std::is_same<C, char>::value, handle>) {
+        return false;
+    }
 };
 
 template <typename CharT, class Traits, class Allocator>
-struct type_caster<std::basic_string<CharT, Traits, Allocator>, enable_if_t<is_std_char_type<CharT>::value>>
+struct type_caster<std::basic_string<CharT, Traits, Allocator>,
+                   enable_if_t<is_std_char_type<CharT>::value>>
     : string_caster<std::basic_string<CharT, Traits, Allocator>> {};
 
 #ifdef PYBIND11_HAS_STRING_VIEW
 template <typename CharT, class Traits>
-struct type_caster<std::basic_string_view<CharT, Traits>, enable_if_t<is_std_char_type<CharT>::value>>
+struct type_caster<std::basic_string_view<CharT, Traits>,
+                   enable_if_t<is_std_char_type<CharT>::value>>
     : string_caster<std::basic_string_view<CharT, Traits>, true> {};
 #endif
 
 // Type caster for C-style strings.  We basically use a std::string type caster, but also add the
 // ability to use None as a nullptr char* (which the string caster doesn't allow).
-template <typename CharT> struct type_caster<CharT, enable_if_t<is_std_char_type<CharT>::value>> {
+template <typename CharT>
+struct type_caster<CharT, enable_if_t<is_std_char_type<CharT>::value>> {
     using StringType = std::basic_string<CharT>;
-    using StringCaster = type_caster<StringType>;
+    using StringCaster = make_caster<StringType>;
     StringCaster str_caster;
     bool none = false;
     CharT one_char = 0;
+
 public:
     bool load(handle src, bool convert) {
-        if (!src) return false;
+        if (!src) {
+            return false;
+        }
         if (src.is_none()) {
             // Defer accepting None to other overloads (if we aren't in convert mode):
-            if (!convert) return false;
+            if (!convert) {
+                return false;
+            }
             none = true;
             return true;
         }
@@ -490,14 +540,18 @@ public:
     }
 
     static handle cast(const CharT *src, return_value_policy policy, handle parent) {
-        if (src == nullptr) return pybind11::none().inc_ref();
+        if (src == nullptr) {
+            return pybind11::none().release();
+        }
         return StringCaster::cast(StringType(src), policy, parent);
     }
 
     static handle cast(CharT src, return_value_policy policy, handle parent) {
         if (std::is_same<char, CharT>::value) {
             handle s = PyUnicode_DecodeLatin1((const char *) &src, 1, nullptr);
-            if (!s) throw error_already_set();
+            if (!s) {
+                throw error_already_set();
+            }
             return s;
         }
         return StringCaster::cast(StringType(1, src), policy, parent);
@@ -507,20 +561,22 @@ public:
         return none ? nullptr : const_cast<CharT *>(static_cast<StringType &>(str_caster).c_str());
     }
     explicit operator CharT &() {
-        if (none)
+        if (none) {
             throw value_error("Cannot convert None to a character");
+        }
 
         auto &value = static_cast<StringType &>(str_caster);
         size_t str_len = value.size();
-        if (str_len == 0)
+        if (str_len == 0) {
             throw value_error("Cannot convert empty string to a character");
+        }
 
         // If we're in UTF-8 mode, we have two possible failures: one for a unicode character that
-        // is too high, and one for multiple unicode characters (caught later), so we need to figure
-        // out how long the first encoded character is in bytes to distinguish between these two
-        // errors.  We also allow want to allow unicode characters U+0080 through U+00FF, as those
-        // can fit into a single char value.
-        if (PYBIND11_SILENCE_MSVC_C4127(StringCaster::UTF_N == 8) && str_len > 1 && str_len <= 4) {
+        // is too high, and one for multiple unicode characters (caught later), so we need to
+        // figure out how long the first encoded character is in bytes to distinguish between these
+        // two errors.  We also allow want to allow unicode characters U+0080 through U+00FF, as
+        // those can fit into a single char value.
+        if (StringCaster::UTF_N == 8 && str_len > 1 && str_len <= 4) {
             auto v0 = static_cast<unsigned char>(value[0]);
             // low bits only: 0-127
             // 0b110xxxxx - start of 2-byte sequence
@@ -534,7 +590,8 @@ public:
             if (char0_bytes == str_len) {
                 // If we have a 128-255 value, we can decode it into a single char:
                 if (char0_bytes == 2 && (v0 & 0xFC) == 0xC0) { // 0x110000xx 0x10xxxxxx
-                    one_char = static_cast<CharT>(((v0 & 3) << 6) + (static_cast<unsigned char>(value[1]) & 0x3F));
+                    one_char = static_cast<CharT>(((v0 & 3) << 6)
+                                                  + (static_cast<unsigned char>(value[1]) & 0x3F));
                     return one_char;
                 }
                 // Otherwise we have a single character, but it's > U+00FF
@@ -545,36 +602,42 @@ public:
         // UTF-16 is much easier: we can only have a surrogate pair for values above U+FFFF, thus a
         // surrogate pair with total length 2 instantly indicates a range error (but not a "your
         // string was too long" error).
-        else if (PYBIND11_SILENCE_MSVC_C4127(StringCaster::UTF_N == 16) && str_len == 2) {
+        else if (StringCaster::UTF_N == 16 && str_len == 2) {
             one_char = static_cast<CharT>(value[0]);
-            if (one_char >= 0xD800 && one_char < 0xE000)
+            if (one_char >= 0xD800 && one_char < 0xE000) {
                 throw value_error("Character code point not in range(0x10000)");
+            }
         }
 
-        if (str_len != 1)
+        if (str_len != 1) {
             throw value_error("Expected a character, but multi-character string found");
+        }
 
         one_char = value[0];
         return one_char;
     }
 
-    static constexpr auto name = _(PYBIND11_STRING_NAME);
-    template <typename _T> using cast_op_type = pybind11::detail::cast_op_type<_T>;
+    static constexpr auto name = const_name(PYBIND11_STRING_NAME);
+    template <typename _T>
+    using cast_op_type = pybind11::detail::cast_op_type<_T>;
 };
 
 // Base implementation for std::tuple and std::pair
-template <template<typename...> class Tuple, typename... Ts> class tuple_caster {
+template <template <typename...> class Tuple, typename... Ts>
+class tuple_caster {
     using type = Tuple<Ts...>;
     static constexpr auto size = sizeof...(Ts);
     using indices = make_index_sequence<size>;
-public:
 
+public:
     bool load(handle src, bool convert) {
-        if (!isinstance<sequence>(src))
+        if (!isinstance<sequence>(src)) {
             return false;
+        }
         const auto seq = reinterpret_borrow<sequence>(src);
-        if (seq.size() != size)
+        if (seq.size() != size) {
             return false;
+        }
         return load_impl(seq, convert, indices{});
     }
 
@@ -586,7 +649,9 @@ public:
     // copied from the PYBIND11_TYPE_CASTER macro
     template <typename T>
     static handle cast(T *src, return_value_policy policy, handle parent) {
-        if (!src) return none().release();
+        if (!src) {
+            return none().release();
+        }
         if (policy == return_value_policy::take_ownership) {
             auto h = cast(std::move(*src), policy, parent);
             delete src;
@@ -595,60 +660,72 @@ public:
         return cast(*src, policy, parent);
     }
 
-    static constexpr auto name = _("Tuple[") + concat(make_caster<Ts>::name...) + _("]");
+    static constexpr auto name
+        = const_name("Tuple[") + concat(make_caster<Ts>::name...) + const_name("]");
 
-    template <typename T> using cast_op_type = type;
+    template <typename T>
+    using cast_op_type = type;
 
     explicit operator type() & { return implicit_cast(indices{}); }
     explicit operator type() && { return std::move(*this).implicit_cast(indices{}); }
 
 protected:
     template <size_t... Is>
-    type implicit_cast(index_sequence<Is...>) & { return type(cast_op<Ts>(std::get<Is>(subcasters))...); }
+    type implicit_cast(index_sequence<Is...>) & {
+        return type(cast_op<Ts>(std::get<Is>(subcasters))...);
+    }
     template <size_t... Is>
-    type implicit_cast(index_sequence<Is...>) && { return type(cast_op<Ts>(std::move(std::get<Is>(subcasters)))...); }
+    type implicit_cast(index_sequence<Is...>) && {
+        return type(cast_op<Ts>(std::move(std::get<Is>(subcasters)))...);
+    }
 
     static constexpr bool load_impl(const sequence &, bool, index_sequence<>) { return true; }
 
     template <size_t... Is>
     bool load_impl(const sequence &seq, bool convert, index_sequence<Is...>) {
 #ifdef __cpp_fold_expressions
-        if ((... || !std::get<Is>(subcasters).load(seq[Is], convert)))
+        if ((... || !std::get<Is>(subcasters).load(seq[Is], convert))) {
             return false;
+        }
 #else
-        for (bool r : {std::get<Is>(subcasters).load(seq[Is], convert)...})
-            if (!r)
+        for (bool r : {std::get<Is>(subcasters).load(seq[Is], convert)...}) {
+            if (!r) {
                 return false;
+            }
+        }
 #endif
         return true;
     }
 
     /* Implementation: Convert a C++ tuple into a Python tuple */
     template <typename T, size_t... Is>
-    static handle cast_impl(T &&src, return_value_policy policy, handle parent, index_sequence<Is...>) {
+    static handle
+    cast_impl(T &&src, return_value_policy policy, handle parent, index_sequence<Is...>) {
         PYBIND11_WORKAROUND_INCORRECT_MSVC_C4100(src, policy, parent);
         PYBIND11_WORKAROUND_INCORRECT_GCC_UNUSED_BUT_SET_PARAMETER(policy, parent);
-        std::array<object, size> entries{{
-            reinterpret_steal<object>(make_caster<Ts>::cast(std::get<Is>(std::forward<T>(src)), policy, parent))...
-        }};
-        for (const auto &entry: entries)
-            if (!entry)
+        std::array<object, size> entries{{reinterpret_steal<object>(
+            make_caster<Ts>::cast(std::get<Is>(std::forward<T>(src)), policy, parent))...}};
+        for (const auto &entry : entries) {
+            if (!entry) {
                 return handle();
+            }
+        }
         tuple result(size);
         int counter = 0;
-        for (auto & entry: entries)
+        for (auto &entry : entries) {
             PyTuple_SET_ITEM(result.ptr(), counter++, entry.release().ptr());
+        }
         return result.release();
     }
 
     Tuple<make_caster<Ts>...> subcasters;
 };
 
-template <typename T1, typename T2> class type_caster<std::pair<T1, T2>>
-    : public tuple_caster<std::pair, T1, T2> {};
+template <typename T1, typename T2>
+class type_caster<std::pair<T1, T2>> : public tuple_caster<std::pair, T1, T2> {};
 
-template <typename... Ts> class type_caster<std::tuple<Ts...>>
-    : public tuple_caster<std::tuple, Ts...> {};
+template <typename... Ts>
+class type_caster<std::tuple<Ts...>> : public tuple_caster<std::tuple, Ts...> {};
 
 /// Helper class which abstracts away certain actions. Users can provide specializations for
 /// custom holders, but it's only necessary if the type has a non-standard interface.
@@ -667,7 +744,7 @@ struct copyable_holder_caster : public type_caster_base<type> {
 public:
     using base = type_caster_base<type>;
     static_assert(std::is_base_of<base, type_caster<type>>::value,
-            "Holder classes are only supported for custom types");
+                  "Holder classes are only supported for custom types");
     using base::base;
     using base::cast;
     using base::typeinfo;
@@ -677,12 +754,12 @@ public:
         return base::template load_impl<copyable_holder_caster<type, holder_type>>(src, convert);
     }
 
-    explicit operator type*() { return this->value; }
+    explicit operator type *() { return this->value; }
     // static_cast works around compiler error with MSVC 17 and CUDA 10.2
     // see issue #2180
-    explicit operator type&() { return *(static_cast<type *>(this->value)); }
-    explicit operator holder_type*() { return std::addressof(holder); }
-    explicit operator holder_type&() { return holder; }
+    explicit operator type &() { return *(static_cast<type *>(this->value)); }
+    explicit operator holder_type *() { return std::addressof(holder); }
+    explicit operator holder_type &() { return holder; }
 
     static handle cast(const holder_type &src, return_value_policy, handle) {
         const auto *ptr = holder_helper<holder_type>::get(src);
@@ -692,8 +769,9 @@ public:
 protected:
     friend class type_caster_generic;
     void check_holder_compat() {
-        if (typeinfo->default_holder)
+        if (typeinfo->default_holder) {
             throw cast_error("Unable to load a custom holder type from a default-holder instance");
+        }
     }
 
     bool load_value(value_and_holder &&v_h) {
@@ -703,18 +781,23 @@ protected:
             return true;
         }
         throw cast_error("Unable to cast from non-held to held instance (T& to Holder<T>) "
-#if defined(NDEBUG)
-                         "(compile in debug mode for type information)");
+#if !defined(PYBIND11_DETAILED_ERROR_MESSAGES)
+                         "(#define PYBIND11_DETAILED_ERROR_MESSAGES or compile in debug mode for "
+                         "type information)");
 #else
                          "of type '"
                          + type_id<holder_type>() + "''");
 #endif
     }
 
-    template <typename T = holder_type, detail::enable_if_t<!std::is_constructible<T, const T &, type*>::value, int> = 0>
-    bool try_implicit_casts(handle, bool) { return false; }
+    template <typename T = holder_type,
+              detail::enable_if_t<!std::is_constructible<T, const T &, type *>::value, int> = 0>
+    bool try_implicit_casts(handle, bool) {
+        return false;
+    }
 
-    template <typename T = holder_type, detail::enable_if_t<std::is_constructible<T, const T &, type*>::value, int> = 0>
+    template <typename T = holder_type,
+              detail::enable_if_t<std::is_constructible<T, const T &, type *>::value, int> = 0>
     bool try_implicit_casts(handle src, bool convert) {
         for (auto &cast : typeinfo->implicit_casts) {
             copyable_holder_caster sub_caster(*cast.first);
@@ -729,13 +812,12 @@ protected:
 
     static bool try_direct_conversions(handle) { return false; }
 
-
     holder_type holder;
 };
 
 /// Specialize for the common std::shared_ptr, so users don't need to
 template <typename T>
-class type_caster<std::shared_ptr<T>> : public copyable_holder_caster<T, std::shared_ptr<T>> { };
+class type_caster<std::shared_ptr<T>> : public copyable_holder_caster<T, std::shared_ptr<T>> {};
 
 /// Type caster for holder types like std::unique_ptr.
 /// Please consider the SFINAE hook an implementation detail, as explained
@@ -743,7 +825,7 @@ class type_caster<std::shared_ptr<T>> : public copyable_holder_caster<T, std::sh
 template <typename type, typename holder_type, typename SFINAE = void>
 struct move_only_holder_caster {
     static_assert(std::is_base_of<type_caster_base<type>, type_caster<type>>::value,
-            "Holder classes are only supported for custom types");
+                  "Holder classes are only supported for custom types");
 
     static handle cast(holder_type &&src, return_value_policy, handle) {
         auto *ptr = holder_helper<holder_type>::get(src);
@@ -754,61 +836,101 @@ struct move_only_holder_caster {
 
 template <typename type, typename deleter>
 class type_caster<std::unique_ptr<type, deleter>>
-    : public move_only_holder_caster<type, std::unique_ptr<type, deleter>> { };
+    : public move_only_holder_caster<type, std::unique_ptr<type, deleter>> {};
 
 template <typename type, typename holder_type>
 using type_caster_holder = conditional_t<is_copy_constructible<holder_type>::value,
                                          copyable_holder_caster<type, holder_type>,
                                          move_only_holder_caster<type, holder_type>>;
 
-template <typename T, bool Value = false> struct always_construct_holder { static constexpr bool value = Value; };
+template <typename T, bool Value = false>
+struct always_construct_holder {
+    static constexpr bool value = Value;
+};
 
 /// Create a specialization for custom holder types (silently ignores std::shared_ptr)
-#define PYBIND11_DECLARE_HOLDER_TYPE(type, holder_type, ...) \
-    namespace pybind11 { namespace detail { \
-    template <typename type> \
-    struct always_construct_holder<holder_type> : always_construct_holder<void, ##__VA_ARGS__>  { }; \
-    template <typename type> \
-    class type_caster<holder_type, enable_if_t<!is_shared_ptr<holder_type>::value>> \
-        : public type_caster_holder<type, holder_type> { }; \
-    }}
+#define PYBIND11_DECLARE_HOLDER_TYPE(type, holder_type, ...)                                      \
+    PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE)                                                  \
+    namespace detail {                                                                            \
+    template <typename type>                                                                      \
+    struct always_construct_holder<holder_type> : always_construct_holder<void, ##__VA_ARGS__> {  \
+    };                                                                                            \
+    template <typename type>                                                                      \
+    class type_caster<holder_type, enable_if_t<!is_shared_ptr<holder_type>::value>>               \
+        : public type_caster_holder<type, holder_type> {};                                        \
+    }                                                                                             \
+    PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE)
 
 // PYBIND11_DECLARE_HOLDER_TYPE holder types:
-template <typename base, typename holder> struct is_holder_type :
-    std::is_base_of<detail::type_caster_holder<base, holder>, detail::type_caster<holder>> {};
+template <typename base, typename holder>
+struct is_holder_type
+    : std::is_base_of<detail::type_caster_holder<base, holder>, detail::type_caster<holder>> {};
 // Specialization for always-supported unique_ptr holders:
-template <typename base, typename deleter> struct is_holder_type<base, std::unique_ptr<base, deleter>> :
-    std::true_type {};
+template <typename base, typename deleter>
+struct is_holder_type<base, std::unique_ptr<base, deleter>> : std::true_type {};
 
-template <typename T> struct handle_type_name { static constexpr auto name = _<T>(); };
-template <> struct handle_type_name<bytes> { static constexpr auto name = _(PYBIND11_BYTES_NAME); };
-template <> struct handle_type_name<int_> { static constexpr auto name = _("int"); };
-template <> struct handle_type_name<iterable> { static constexpr auto name = _("Iterable"); };
-template <> struct handle_type_name<iterator> { static constexpr auto name = _("Iterator"); };
-template <> struct handle_type_name<none> { static constexpr auto name = _("None"); };
-template <> struct handle_type_name<args> { static constexpr auto name = _("*args"); };
-template <> struct handle_type_name<kwargs> { static constexpr auto name = _("**kwargs"); };
+template <typename T>
+struct handle_type_name {
+    static constexpr auto name = const_name<T>();
+};
+template <>
+struct handle_type_name<bool_> {
+    static constexpr auto name = const_name("bool");
+};
+template <>
+struct handle_type_name<bytes> {
+    static constexpr auto name = const_name(PYBIND11_BYTES_NAME);
+};
+template <>
+struct handle_type_name<int_> {
+    static constexpr auto name = const_name("int");
+};
+template <>
+struct handle_type_name<iterable> {
+    static constexpr auto name = const_name("Iterable");
+};
+template <>
+struct handle_type_name<iterator> {
+    static constexpr auto name = const_name("Iterator");
+};
+template <>
+struct handle_type_name<float_> {
+    static constexpr auto name = const_name("float");
+};
+template <>
+struct handle_type_name<none> {
+    static constexpr auto name = const_name("None");
+};
+template <>
+struct handle_type_name<args> {
+    static constexpr auto name = const_name("*args");
+};
+template <>
+struct handle_type_name<kwargs> {
+    static constexpr auto name = const_name("**kwargs");
+};
 
 template <typename type>
 struct pyobject_caster {
     template <typename T = type, enable_if_t<std::is_same<T, handle>::value, int> = 0>
-    bool load(handle src, bool /* convert */) { value = src; return static_cast<bool>(value); }
+    pyobject_caster() : value() {}
+
+    // `type` may not be default constructible (e.g. frozenset, anyset).  Initializing `value`
+    // to a nil handle is safe since it will only be accessed if `load` succeeds.
+    template <typename T = type, enable_if_t<std::is_base_of<object, T>::value, int> = 0>
+    pyobject_caster() : value(reinterpret_steal<type>(handle())) {}
+
+    template <typename T = type, enable_if_t<std::is_same<T, handle>::value, int> = 0>
+    bool load(handle src, bool /* convert */) {
+        value = src;
+        return static_cast<bool>(value);
+    }
 
     template <typename T = type, enable_if_t<std::is_base_of<object, T>::value, int> = 0>
     bool load(handle src, bool /* convert */) {
-#if PY_MAJOR_VERSION < 3 && !defined(PYBIND11_STR_LEGACY_PERMISSIVE)
-        // For Python 2, without this implicit conversion, Python code would
-        // need to be cluttered with six.ensure_text() or similar, only to be
-        // un-cluttered later after Python 2 support is dropped.
-        if (PYBIND11_SILENCE_MSVC_C4127(std::is_same<T, str>::value) && isinstance<bytes>(src)) {
-            PyObject *str_from_bytes = PyUnicode_FromEncodedObject(src.ptr(), "utf-8", nullptr);
-            if (!str_from_bytes) throw error_already_set();
-            value = reinterpret_steal<type>(str_from_bytes);
-            return true;
-        }
-#endif
-        if (!isinstance<type>(src))
+        if (!isinstance<type>(src)) {
             return false;
+        }
         value = reinterpret_borrow<type>(src);
         return true;
     }
@@ -820,7 +942,7 @@ struct pyobject_caster {
 };
 
 template <typename T>
-class type_caster<T, enable_if_t<is_pyobject<T>::value>> : public pyobject_caster<T> { };
+class type_caster<T, enable_if_t<is_pyobject<T>::value>> : public pyobject_caster<T> {};
 
 // Our conditions for enabling moving are quite restrictive:
 // At compile time:
@@ -831,65 +953,83 @@ class type_caster<T, enable_if_t<is_pyobject<T>::value>> : public pyobject_caste
 // - if the type is non-copy-constructible, the object must be the sole owner of the type (i.e. it
 //   must have ref_count() == 1)h
 // If any of the above are not satisfied, we fall back to copying.
-template <typename T> using move_is_plain_type = satisfies_none_of<T,
-    std::is_void, std::is_pointer, std::is_reference, std::is_const
->;
-template <typename T, typename SFINAE = void> struct move_always : std::false_type {};
-template <typename T> struct move_always<T, enable_if_t<all_of<
-    move_is_plain_type<T>,
-    negation<is_copy_constructible<T>>,
-    std::is_move_constructible<T>,
-    std::is_same<decltype(std::declval<make_caster<T>>().operator T&()), T&>
->::value>> : std::true_type {};
-template <typename T, typename SFINAE = void> struct move_if_unreferenced : std::false_type {};
-template <typename T> struct move_if_unreferenced<T, enable_if_t<all_of<
-    move_is_plain_type<T>,
-    negation<move_always<T>>,
-    std::is_move_constructible<T>,
-    std::is_same<decltype(std::declval<make_caster<T>>().operator T&()), T&>
->::value>> : std::true_type {};
-template <typename T> using move_never = none_of<move_always<T>, move_if_unreferenced<T>>;
+template <typename T>
+using move_is_plain_type
+    = satisfies_none_of<T, std::is_void, std::is_pointer, std::is_reference, std::is_const>;
+template <typename T, typename SFINAE = void>
+struct move_always : std::false_type {};
+template <typename T>
+struct move_always<
+    T,
+    enable_if_t<
+        all_of<move_is_plain_type<T>,
+               negation<is_copy_constructible<T>>,
+               std::is_move_constructible<T>,
+               std::is_same<decltype(std::declval<make_caster<T>>().operator T &()), T &>>::value>>
+    : std::true_type {};
+template <typename T, typename SFINAE = void>
+struct move_if_unreferenced : std::false_type {};
+template <typename T>
+struct move_if_unreferenced<
+    T,
+    enable_if_t<
+        all_of<move_is_plain_type<T>,
+               negation<move_always<T>>,
+               std::is_move_constructible<T>,
+               std::is_same<decltype(std::declval<make_caster<T>>().operator T &()), T &>>::value>>
+    : std::true_type {};
+template <typename T>
+using move_never = none_of<move_always<T>, move_if_unreferenced<T>>;
 
 // Detect whether returning a `type` from a cast on type's type_caster is going to result in a
 // reference or pointer to a local variable of the type_caster.  Basically, only
 // non-reference/pointer `type`s and reference/pointers from a type_caster_generic are safe;
 // everything else returns a reference/pointer to a local variable.
-template <typename type> using cast_is_temporary_value_reference = bool_constant<
-    (std::is_reference<type>::value || std::is_pointer<type>::value) &&
-    !std::is_base_of<type_caster_generic, make_caster<type>>::value &&
-    !std::is_same<intrinsic_t<type>, void>::value
->;
+template <typename type>
+using cast_is_temporary_value_reference
+    = bool_constant<(std::is_reference<type>::value || std::is_pointer<type>::value)
+                    && !std::is_base_of<type_caster_generic, make_caster<type>>::value
+                    && !std::is_same<intrinsic_t<type>, void>::value>;
 
 // When a value returned from a C++ function is being cast back to Python, we almost always want to
 // force `policy = move`, regardless of the return value policy the function/method was declared
 // with.
-template <typename Return, typename SFINAE = void> struct return_value_policy_override {
+template <typename Return, typename SFINAE = void>
+struct return_value_policy_override {
     static return_value_policy policy(return_value_policy p) { return p; }
 };
 
-template <typename Return> struct return_value_policy_override<Return,
-        detail::enable_if_t<std::is_base_of<type_caster_generic, make_caster<Return>>::value, void>> {
+template <typename Return>
+struct return_value_policy_override<
+    Return,
+    detail::enable_if_t<std::is_base_of<type_caster_generic, make_caster<Return>>::value, void>> {
     static return_value_policy policy(return_value_policy p) {
-        return !std::is_lvalue_reference<Return>::value &&
-               !std::is_pointer<Return>::value
-                   ? return_value_policy::move : p;
+        return !std::is_lvalue_reference<Return>::value && !std::is_pointer<Return>::value
+                   ? return_value_policy::move
+                   : p;
     }
 };
 
 // Basic python -> C++ casting; throws if casting fails
-template <typename T, typename SFINAE> type_caster<T, SFINAE> &load_type(type_caster<T, SFINAE> &conv, const handle &handle) {
+template <typename T, typename SFINAE>
+type_caster<T, SFINAE> &load_type(type_caster<T, SFINAE> &conv, const handle &handle) {
+    static_assert(!detail::is_pyobject<T>::value,
+                  "Internal error: type_caster should only be used for C++ types");
     if (!conv.load(handle, true)) {
-#if defined(NDEBUG)
-        throw cast_error("Unable to cast Python instance to C++ type (compile in debug mode for details)");
+#if !defined(PYBIND11_DETAILED_ERROR_MESSAGES)
+        throw cast_error("Unable to cast Python instance to C++ type (#define "
+                         "PYBIND11_DETAILED_ERROR_MESSAGES or compile in debug mode for details)");
 #else
-        throw cast_error("Unable to cast Python instance of type " +
-            (std::string) str(type::handle_of(handle)) + " to C++ type '" + type_id<T>() + "'");
+        throw cast_error("Unable to cast Python instance of type "
+                         + (std::string) str(type::handle_of(handle)) + " to C++ type '"
+                         + type_id<T>() + "'");
 #endif
     }
     return conv;
 }
 // Wrapper around the above that also constructs and returns a type_caster
-template <typename T> make_caster<T> load_type(const handle &handle) {
+template <typename T>
+make_caster<T> load_type(const handle &handle) {
     make_caster<T> conv;
     load_type(conv, handle);
     return conv;
@@ -902,44 +1042,60 @@ template <typename T, detail::enable_if_t<!detail::is_pyobject<T>::value, int> =
 T cast(const handle &handle) {
     using namespace detail;
     static_assert(!cast_is_temporary_value_reference<T>::value,
-            "Unable to cast type to reference: value is local to type caster");
+                  "Unable to cast type to reference: value is local to type caster");
     return cast_op<T>(load_type<T>(handle));
 }
 
 // pytype -> pytype (calls converting constructor)
 template <typename T, detail::enable_if_t<detail::is_pyobject<T>::value, int> = 0>
-T cast(const handle &handle) { return T(reinterpret_borrow<object>(handle)); }
+T cast(const handle &handle) {
+    return T(reinterpret_borrow<object>(handle));
+}
 
 // C++ type -> py::object
 template <typename T, detail::enable_if_t<!detail::is_pyobject<T>::value, int> = 0>
-object cast(T &&value, return_value_policy policy = return_value_policy::automatic_reference,
+object cast(T &&value,
+            return_value_policy policy = return_value_policy::automatic_reference,
             handle parent = handle()) {
     using no_ref_T = typename std::remove_reference<T>::type;
-    if (policy == return_value_policy::automatic)
-        policy = std::is_pointer<no_ref_T>::value ? return_value_policy::take_ownership :
-                 std::is_lvalue_reference<T>::value ? return_value_policy::copy : return_value_policy::move;
-    else if (policy == return_value_policy::automatic_reference)
-        policy = std::is_pointer<no_ref_T>::value ? return_value_policy::reference :
-                 std::is_lvalue_reference<T>::value ? return_value_policy::copy : return_value_policy::move;
-    return reinterpret_steal<object>(detail::make_caster<T>::cast(std::forward<T>(value), policy, parent));
+    if (policy == return_value_policy::automatic) {
+        policy = std::is_pointer<no_ref_T>::value     ? return_value_policy::take_ownership
+                 : std::is_lvalue_reference<T>::value ? return_value_policy::copy
+                                                      : return_value_policy::move;
+    } else if (policy == return_value_policy::automatic_reference) {
+        policy = std::is_pointer<no_ref_T>::value     ? return_value_policy::reference
+                 : std::is_lvalue_reference<T>::value ? return_value_policy::copy
+                                                      : return_value_policy::move;
+    }
+    return reinterpret_steal<object>(
+        detail::make_caster<T>::cast(std::forward<T>(value), policy, parent));
 }
 
-template <typename T> T handle::cast() const { return pybind11::cast<T>(*this); }
-template <> inline void handle::cast() const { return; }
+template <typename T>
+T handle::cast() const {
+    return pybind11::cast<T>(*this);
+}
+template <>
+inline void handle::cast() const {
+    return;
+}
 
 template <typename T>
 detail::enable_if_t<!detail::move_never<T>::value, T> move(object &&obj) {
-    if (obj.ref_count() > 1)
-#if defined(NDEBUG)
-        throw cast_error("Unable to cast Python instance to C++ rvalue: instance has multiple references"
-            " (compile in debug mode for details)");
+    if (obj.ref_count() > 1) {
+#if !defined(PYBIND11_DETAILED_ERROR_MESSAGES)
+        throw cast_error(
+            "Unable to cast Python instance to C++ rvalue: instance has multiple references"
+            " (#define PYBIND11_DETAILED_ERROR_MESSAGES or compile in debug mode for details)");
 #else
-        throw cast_error("Unable to move from Python " + (std::string) str(type::handle_of(obj)) +
-                " instance to C++ " + type_id<T>() + " instance: instance has multiple references");
+        throw cast_error("Unable to move from Python " + (std::string) str(type::handle_of(obj))
+                         + " instance to C++ " + type_id<T>()
+                         + " instance: instance has multiple references");
 #endif
+    }
 
     // Move into a temporary and return that, because the reference may be a local value of `conv`
-    T ret = std::move(detail::load_type<T>(obj).operator T&());
+    T ret = std::move(detail::load_type<T>(obj).operator T &());
     return ret;
 }
 
@@ -948,58 +1104,100 @@ detail::enable_if_t<!detail::move_never<T>::value, T> move(object &&obj) {
 //   object has multiple references, but trying to copy will fail to compile.
 // - If both movable and copyable, check ref count: if 1, move; otherwise copy
 // - Otherwise (not movable), copy.
-template <typename T> detail::enable_if_t<detail::move_always<T>::value, T> cast(object &&object) {
+template <typename T>
+detail::enable_if_t<!detail::is_pyobject<T>::value && detail::move_always<T>::value, T>
+cast(object &&object) {
     return move<T>(std::move(object));
 }
-template <typename T> detail::enable_if_t<detail::move_if_unreferenced<T>::value, T> cast(object &&object) {
-    if (object.ref_count() > 1)
+template <typename T>
+detail::enable_if_t<!detail::is_pyobject<T>::value && detail::move_if_unreferenced<T>::value, T>
+cast(object &&object) {
+    if (object.ref_count() > 1) {
         return cast<T>(object);
+    }
     return move<T>(std::move(object));
 }
-template <typename T> detail::enable_if_t<detail::move_never<T>::value, T> cast(object &&object) {
+template <typename T>
+detail::enable_if_t<!detail::is_pyobject<T>::value && detail::move_never<T>::value, T>
+cast(object &&object) {
     return cast<T>(object);
 }
 
-template <typename T> T object::cast() const & { return pybind11::cast<T>(*this); }
-template <typename T> T object::cast() && { return pybind11::cast<T>(std::move(*this)); }
-template <> inline void object::cast() const & { return; }
-template <> inline void object::cast() && { return; }
+// pytype rvalue -> pytype (calls converting constructor)
+template <typename T>
+detail::enable_if_t<detail::is_pyobject<T>::value, T> cast(object &&object) {
+    return T(std::move(object));
+}
+
+template <typename T>
+T object::cast() const & {
+    return pybind11::cast<T>(*this);
+}
+template <typename T>
+T object::cast() && {
+    return pybind11::cast<T>(std::move(*this));
+}
+template <>
+inline void object::cast() const & {
+    return;
+}
+template <>
+inline void object::cast() && {
+    return;
+}
 
 PYBIND11_NAMESPACE_BEGIN(detail)
 
 // Declared in pytypes.h:
 template <typename T, enable_if_t<!is_pyobject<T>::value, int>>
-object object_or_cast(T &&o) { return pybind11::cast(std::forward<T>(o)); }
+object object_or_cast(T &&o) {
+    return pybind11::cast(std::forward<T>(o));
+}
 
-struct override_unused {}; // Placeholder type for the unneeded (and dead code) static variable in the PYBIND11_OVERRIDE_OVERRIDE macro
-template <typename ret_type> using override_caster_t = conditional_t<
-    cast_is_temporary_value_reference<ret_type>::value, make_caster<ret_type>, override_unused>;
+// Placeholder type for the unneeded (and dead code) static variable in the
+// PYBIND11_OVERRIDE_OVERRIDE macro
+struct override_unused {};
+template <typename ret_type>
+using override_caster_t = conditional_t<cast_is_temporary_value_reference<ret_type>::value,
+                                        make_caster<ret_type>,
+                                        override_unused>;
 
 // Trampoline use: for reference/pointer types to value-converted values, we do a value cast, then
 // store the result in the given variable.  For other types, this is a no-op.
-template <typename T> enable_if_t<cast_is_temporary_value_reference<T>::value, T> cast_ref(object &&o, make_caster<T> &caster) {
+template <typename T>
+enable_if_t<cast_is_temporary_value_reference<T>::value, T> cast_ref(object &&o,
+                                                                     make_caster<T> &caster) {
     return cast_op<T>(load_type(caster, o));
 }
-template <typename T> enable_if_t<!cast_is_temporary_value_reference<T>::value, T> cast_ref(object &&, override_unused &) {
-    pybind11_fail("Internal error: cast_ref fallback invoked"); }
+template <typename T>
+enable_if_t<!cast_is_temporary_value_reference<T>::value, T> cast_ref(object &&,
+                                                                      override_unused &) {
+    pybind11_fail("Internal error: cast_ref fallback invoked");
+}
 
-// Trampoline use: Having a pybind11::cast with an invalid reference type is going to static_assert, even
-// though if it's in dead code, so we provide a "trampoline" to pybind11::cast that only does anything in
-// cases where pybind11::cast is valid.
-template <typename T> enable_if_t<!cast_is_temporary_value_reference<T>::value, T> cast_safe(object &&o) {
-    return pybind11::cast<T>(std::move(o)); }
-template <typename T> enable_if_t<cast_is_temporary_value_reference<T>::value, T> cast_safe(object &&) {
-    pybind11_fail("Internal error: cast_safe fallback invoked"); }
-template <> inline void cast_safe<void>(object &&) {}
+// Trampoline use: Having a pybind11::cast with an invalid reference type is going to
+// static_assert, even though if it's in dead code, so we provide a "trampoline" to pybind11::cast
+// that only does anything in cases where pybind11::cast is valid.
+template <typename T>
+enable_if_t<cast_is_temporary_value_reference<T>::value, T> cast_safe(object &&) {
+    pybind11_fail("Internal error: cast_safe fallback invoked");
+}
+template <typename T>
+enable_if_t<std::is_void<T>::value, void> cast_safe(object &&) {}
+template <typename T>
+enable_if_t<detail::none_of<cast_is_temporary_value_reference<T>, std::is_void<T>>::value, T>
+cast_safe(object &&o) {
+    return pybind11::cast<T>(std::move(o));
+}
 
 PYBIND11_NAMESPACE_END(detail)
 
 // The overloads could coexist, i.e. the #if is not strictly speaking needed,
 // but it is an easy minor optimization.
-#if defined(NDEBUG)
+#if !defined(PYBIND11_DETAILED_ERROR_MESSAGES)
 inline cast_error cast_error_unable_to_convert_call_arg() {
-    return cast_error(
-        "Unable to convert call argument to Python object (compile in debug mode for details)");
+    return cast_error("Unable to convert call argument to Python object (#define "
+                      "PYBIND11_DETAILED_ERROR_MESSAGES or compile in debug mode for details)");
 }
 #else
 inline cast_error cast_error_unable_to_convert_call_arg(const std::string &name,
@@ -1010,47 +1208,58 @@ inline cast_error cast_error_unable_to_convert_call_arg(const std::string &name,
 #endif
 
 template <return_value_policy policy = return_value_policy::automatic_reference>
-tuple make_tuple() { return tuple(0); }
+tuple make_tuple() {
+    return tuple(0);
+}
 
-template <return_value_policy policy = return_value_policy::automatic_reference,
-          typename... Args> tuple make_tuple(Args&&... args_) {
+template <return_value_policy policy = return_value_policy::automatic_reference, typename... Args>
+tuple make_tuple(Args &&...args_) {
     constexpr size_t size = sizeof...(Args);
-    std::array<object, size> args {
-        { reinterpret_steal<object>(detail::make_caster<Args>::cast(
-            std::forward<Args>(args_), policy, nullptr))... }
-    };
+    std::array<object, size> args{{reinterpret_steal<object>(
+        detail::make_caster<Args>::cast(std::forward<Args>(args_), policy, nullptr))...}};
     for (size_t i = 0; i < args.size(); i++) {
         if (!args[i]) {
-#if defined(NDEBUG)
+#if !defined(PYBIND11_DETAILED_ERROR_MESSAGES)
             throw cast_error_unable_to_convert_call_arg();
 #else
-            std::array<std::string, size> argtypes { {type_id<Args>()...} };
+            std::array<std::string, size> argtypes{{type_id<Args>()...}};
             throw cast_error_unable_to_convert_call_arg(std::to_string(i), argtypes[i]);
 #endif
         }
     }
     tuple result(size);
     int counter = 0;
-    for (auto &arg_value : args)
+    for (auto &arg_value : args) {
         PyTuple_SET_ITEM(result.ptr(), counter++, arg_value.release().ptr());
+    }
     return result;
 }
 
 /// \ingroup annotations
 /// Annotation for arguments
 struct arg {
-    /// Constructs an argument with the name of the argument; if null or omitted, this is a positional argument.
-    constexpr explicit arg(const char *name = nullptr) : name(name), flag_noconvert(false), flag_none(true) { }
+    /// Constructs an argument with the name of the argument; if null or omitted, this is a
+    /// positional argument.
+    constexpr explicit arg(const char *name = nullptr)
+        : name(name), flag_noconvert(false), flag_none(true) {}
     /// Assign a value to this argument
-    template <typename T> arg_v operator=(T &&value) const;
+    template <typename T>
+    arg_v operator=(T &&value) const;
     /// Indicate that the type should not be converted in the type caster
-    arg &noconvert(bool flag = true) { flag_noconvert = flag; return *this; }
+    arg &noconvert(bool flag = true) {
+        flag_noconvert = flag;
+        return *this;
+    }
     /// Indicates that the argument should/shouldn't allow None (e.g. for nullable pointer args)
-    arg &none(bool flag = true) { flag_none = flag; return *this; }
+    arg &none(bool flag = true) {
+        flag_none = flag;
+        return *this;
+    }
 
-    const char *name; ///< If non-null, this is a named kwargs argument
-    bool flag_noconvert : 1; ///< If set, do not allow conversion (requires a supporting type caster!)
-    bool flag_none : 1; ///< If set (the default), allow None to be passed to this argument
+    const char *name;        ///< If non-null, this is a named kwargs argument
+    bool flag_noconvert : 1; ///< If set, do not allow conversion (requires a supporting type
+                             ///< caster!)
+    bool flag_none : 1;      ///< If set (the default), allow None to be passed to this argument
 };
 
 /// \ingroup annotations
@@ -1059,13 +1268,12 @@ struct arg_v : arg {
 private:
     template <typename T>
     arg_v(arg &&base, T &&x, const char *descr = nullptr)
-        : arg(base),
-          value(reinterpret_steal<object>(
-              detail::make_caster<T>::cast(x, return_value_policy::automatic, {})
-          )),
+        : arg(base), value(reinterpret_steal<object>(detail::make_caster<T>::cast(
+                         std::forward<T>(x), return_value_policy::automatic, {}))),
           descr(descr)
-#if !defined(NDEBUG)
-        , type(type_id<T>())
+#if defined(PYBIND11_DETAILED_ERROR_MESSAGES)
+          ,
+          type(type_id<T>())
 #endif
     {
         // Workaround! See:
@@ -1080,37 +1288,43 @@ public:
     /// Direct construction with name, default, and description
     template <typename T>
     arg_v(const char *name, T &&x, const char *descr = nullptr)
-        : arg_v(arg(name), std::forward<T>(x), descr) { }
+        : arg_v(arg(name), std::forward<T>(x), descr) {}
 
     /// Called internally when invoking `py::arg("a") = value`
     template <typename T>
     arg_v(const arg &base, T &&x, const char *descr = nullptr)
-        : arg_v(arg(base), std::forward<T>(x), descr) { }
+        : arg_v(arg(base), std::forward<T>(x), descr) {}
 
     /// Same as `arg::noconvert()`, but returns *this as arg_v&, not arg&
-    arg_v &noconvert(bool flag = true) { arg::noconvert(flag); return *this; }
+    arg_v &noconvert(bool flag = true) {
+        arg::noconvert(flag);
+        return *this;
+    }
 
     /// Same as `arg::nonone()`, but returns *this as arg_v&, not arg&
-    arg_v &none(bool flag = true) { arg::none(flag); return *this; }
+    arg_v &none(bool flag = true) {
+        arg::none(flag);
+        return *this;
+    }
 
     /// The default value
     object value;
     /// The (optional) description of the default value
     const char *descr;
-#if !defined(NDEBUG)
+#if defined(PYBIND11_DETAILED_ERROR_MESSAGES)
     /// The C++ type name of the default value (only available when compiled in debug mode)
     std::string type;
 #endif
 };
 
 /// \ingroup annotations
-/// Annotation indicating that all following arguments are keyword-only; the is the equivalent of an
-/// unnamed '*' argument (in Python 3)
+/// Annotation indicating that all following arguments are keyword-only; the is the equivalent of
+/// an unnamed '*' argument
 struct kw_only {};
 
 /// \ingroup annotations
-/// Annotation indicating that all previous arguments are positional-only; the is the equivalent of an
-/// unnamed '/' argument (in Python 3.8)
+/// Annotation indicating that all previous arguments are positional-only; the is the equivalent of
+/// an unnamed '/' argument (in Python 3.8)
 struct pos_only {};
 
 template <typename T>
@@ -1119,7 +1333,8 @@ arg_v arg::operator=(T &&value) const {
 }
 
 /// Alias for backward compatibility -- to be removed in version 2.0
-template <typename /*unused*/> using arg_t = arg_v;
+template <typename /*unused*/>
+using arg_t = arg_v;
 
 inline namespace literals {
 /** \rst
@@ -1130,6 +1345,11 @@ constexpr arg operator"" _a(const char *name, size_t) { return arg(name); }
 
 PYBIND11_NAMESPACE_BEGIN(detail)
 
+template <typename T>
+using is_kw_only = std::is_same<intrinsic_t<T>, kw_only>;
+template <typename T>
+using is_pos_only = std::is_same<intrinsic_t<T>, pos_only>;
+
 // forward declaration (definition in attr.h)
 struct function_record;
 
@@ -1157,57 +1377,63 @@ struct function_call {
     handle init_self;
 };
 
-
 /// Helper class which loads arguments for C++ functions called from Python
 template <typename... Args>
 class argument_loader {
     using indices = make_index_sequence<sizeof...(Args)>;
 
-    template <typename Arg> using argument_is_args   = std::is_same<intrinsic_t<Arg>, args>;
-    template <typename Arg> using argument_is_kwargs = std::is_same<intrinsic_t<Arg>, kwargs>;
-    // Get args/kwargs argument positions relative to the end of the argument list:
-    static constexpr auto args_pos = constexpr_first<argument_is_args, Args...>() - (int) sizeof...(Args),
-                        kwargs_pos = constexpr_first<argument_is_kwargs, Args...>() - (int) sizeof...(Args);
+    template <typename Arg>
+    using argument_is_args = std::is_same<intrinsic_t<Arg>, args>;
+    template <typename Arg>
+    using argument_is_kwargs = std::is_same<intrinsic_t<Arg>, kwargs>;
+    // Get kwargs argument position, or -1 if not present:
+    static constexpr auto kwargs_pos = constexpr_last<argument_is_kwargs, Args...>();
 
-    static constexpr bool args_kwargs_are_last = kwargs_pos >= - 1 && args_pos >= kwargs_pos - 1;
-
-    static_assert(args_kwargs_are_last, "py::args/py::kwargs are only permitted as the last argument(s) of a function");
+    static_assert(kwargs_pos == -1 || kwargs_pos == (int) sizeof...(Args) - 1,
+                  "py::kwargs is only permitted as the last argument of a function");
 
 public:
-    static constexpr bool has_kwargs = kwargs_pos < 0;
-    static constexpr bool has_args = args_pos < 0;
+    static constexpr bool has_kwargs = kwargs_pos != -1;
+
+    // py::args argument position; -1 if not present.
+    static constexpr int args_pos = constexpr_last<argument_is_args, Args...>();
+
+    static_assert(args_pos == -1 || args_pos == constexpr_first<argument_is_args, Args...>(),
+                  "py::args cannot be specified more than once");
 
     static constexpr auto arg_names = concat(type_descr(make_caster<Args>::name)...);
 
-    bool load_args(function_call &call) {
-        return load_impl_sequence(call, indices{});
-    }
+    bool load_args(function_call &call) { return load_impl_sequence(call, indices{}); }
 
     template <typename Return, typename Guard, typename Func>
     // NOLINTNEXTLINE(readability-const-return-type)
     enable_if_t<!std::is_void<Return>::value, Return> call(Func &&f) && {
-        return std::move(*this).template call_impl<remove_cv_t<Return>>(std::forward<Func>(f), indices{}, Guard{});
+        return std::move(*this).template call_impl<remove_cv_t<Return>>(
+            std::forward<Func>(f), indices{}, Guard{});
     }
 
     template <typename Return, typename Guard, typename Func>
     enable_if_t<std::is_void<Return>::value, void_type> call(Func &&f) && {
-        std::move(*this).template call_impl<remove_cv_t<Return>>(std::forward<Func>(f), indices{}, Guard{});
+        std::move(*this).template call_impl<remove_cv_t<Return>>(
+            std::forward<Func>(f), indices{}, Guard{});
         return void_type();
     }
 
 private:
-
     static bool load_impl_sequence(function_call &, index_sequence<>) { return true; }
 
     template <size_t... Is>
     bool load_impl_sequence(function_call &call, index_sequence<Is...>) {
 #ifdef __cpp_fold_expressions
-        if ((... || !std::get<Is>(argcasters).load(call.args[Is], call.args_convert[Is])))
+        if ((... || !std::get<Is>(argcasters).load(call.args[Is], call.args_convert[Is]))) {
             return false;
+        }
 #else
-        for (bool r : {std::get<Is>(argcasters).load(call.args[Is], call.args_convert[Is])...})
-            if (!r)
+        for (bool r : {std::get<Is>(argcasters).load(call.args[Is], call.args_convert[Is])...}) {
+            if (!r) {
                 return false;
+            }
+        }
 #endif
         return true;
     }
@@ -1227,7 +1453,7 @@ class simple_collector {
 public:
     template <typename... Ts>
     explicit simple_collector(Ts &&...values)
-        : m_args(pybind11::make_tuple<policy>(std::forward<Ts>(values)...)) { }
+        : m_args(pybind11::make_tuple<policy>(std::forward<Ts>(values)...)) {}
 
     const tuple &args() const & { return m_args; }
     dict kwargs() const { return {}; }
@@ -1237,8 +1463,9 @@ public:
     /// Call a Python function and pass the collected arguments
     object call(PyObject *ptr) const {
         PyObject *result = PyObject_CallObject(ptr, m_args.ptr());
-        if (!result)
+        if (!result) {
             throw error_already_set();
+        }
         return reinterpret_steal<object>(result);
     }
 
@@ -1270,62 +1497,66 @@ public:
     /// Call a Python function and pass the collected arguments
     object call(PyObject *ptr) const {
         PyObject *result = PyObject_Call(ptr, m_args.ptr(), m_kwargs.ptr());
-        if (!result)
+        if (!result) {
             throw error_already_set();
+        }
         return reinterpret_steal<object>(result);
     }
 
 private:
     template <typename T>
     void process(list &args_list, T &&x) {
-        auto o = reinterpret_steal<object>(detail::make_caster<T>::cast(std::forward<T>(x), policy, {}));
+        auto o = reinterpret_steal<object>(
+            detail::make_caster<T>::cast(std::forward<T>(x), policy, {}));
         if (!o) {
-#if defined(NDEBUG)
+#if !defined(PYBIND11_DETAILED_ERROR_MESSAGES)
             throw cast_error_unable_to_convert_call_arg();
 #else
-            throw cast_error_unable_to_convert_call_arg(
-                std::to_string(args_list.size()), type_id<T>());
+            throw cast_error_unable_to_convert_call_arg(std::to_string(args_list.size()),
+                                                        type_id<T>());
 #endif
         }
-        args_list.append(o);
+        args_list.append(std::move(o));
     }
 
     void process(list &args_list, detail::args_proxy ap) {
-        for (auto a : ap)
+        for (auto a : ap) {
             args_list.append(a);
+        }
     }
 
-    void process(list &/*args_list*/, arg_v a) {
-        if (!a.name)
-#if defined(NDEBUG)
+    void process(list & /*args_list*/, arg_v a) {
+        if (!a.name) {
+#if !defined(PYBIND11_DETAILED_ERROR_MESSAGES)
             nameless_argument_error();
 #else
             nameless_argument_error(a.type);
 #endif
-
+        }
         if (m_kwargs.contains(a.name)) {
-#if defined(NDEBUG)
+#if !defined(PYBIND11_DETAILED_ERROR_MESSAGES)
             multiple_values_error();
 #else
             multiple_values_error(a.name);
 #endif
         }
         if (!a.value) {
-#if defined(NDEBUG)
+#if !defined(PYBIND11_DETAILED_ERROR_MESSAGES)
             throw cast_error_unable_to_convert_call_arg();
 #else
             throw cast_error_unable_to_convert_call_arg(a.name, a.type);
 #endif
         }
-        m_kwargs[a.name] = a.value;
+        m_kwargs[a.name] = std::move(a.value);
     }
 
-    void process(list &/*args_list*/, detail::kwargs_proxy kp) {
-        if (!kp)
+    void process(list & /*args_list*/, detail::kwargs_proxy kp) {
+        if (!kp) {
             return;
+        }
         for (auto k : reinterpret_borrow<dict>(kp)) {
             if (m_kwargs.contains(k.first)) {
-#if defined(NDEBUG)
+#if !defined(PYBIND11_DETAILED_ERROR_MESSAGES)
                 multiple_values_error();
 #else
                 multiple_values_error(str(k.first));
@@ -1336,17 +1567,20 @@ private:
     }
 
     [[noreturn]] static void nameless_argument_error() {
-        throw type_error("Got kwargs without a name; only named arguments "
-                         "may be passed via py::arg() to a python function call. "
-                         "(compile in debug mode for details)");
+        throw type_error(
+            "Got kwargs without a name; only named arguments "
+            "may be passed via py::arg() to a python function call. "
+            "(#define PYBIND11_DETAILED_ERROR_MESSAGES or compile in debug mode for details)");
     }
     [[noreturn]] static void nameless_argument_error(const std::string &type) {
-        throw type_error("Got kwargs without a name of type '" + type + "'; only named "
-                         "arguments may be passed via py::arg() to a python function call. ");
+        throw type_error("Got kwargs without a name of type '" + type
+                         + "'; only named "
+                           "arguments may be passed via py::arg() to a python function call. ");
     }
     [[noreturn]] static void multiple_values_error() {
-        throw type_error("Got multiple values for keyword argument "
-                         "(compile in debug mode for details)");
+        throw type_error(
+            "Got multiple values for keyword argument "
+            "(#define PYBIND11_DETAILED_ERROR_MESSAGES or compile in debug mode for details)");
     }
 
     [[noreturn]] static void multiple_values_error(const std::string &name) {
@@ -1363,36 +1597,37 @@ private:
 // fails to compile enable_if_t<!all_of<is_positional<Args>...>::value>
 // (tested with ICC 2021.1 Beta 20200827).
 template <typename... Args>
-constexpr bool args_are_all_positional()
-{
-  return all_of<is_positional<Args>...>::value;
+constexpr bool args_are_all_positional() {
+    return all_of<is_positional<Args>...>::value;
 }
 
 /// Collect only positional arguments for a Python function call
-template <return_value_policy policy, typename... Args,
+template <return_value_policy policy,
+          typename... Args,
           typename = enable_if_t<args_are_all_positional<Args...>()>>
 simple_collector<policy> collect_arguments(Args &&...args) {
     return simple_collector<policy>(std::forward<Args>(args)...);
 }
 
 /// Collect all arguments, including keywords and unpacking (only instantiated when needed)
-template <return_value_policy policy, typename... Args,
+template <return_value_policy policy,
+          typename... Args,
           typename = enable_if_t<!args_are_all_positional<Args...>()>>
 unpacking_collector<policy> collect_arguments(Args &&...args) {
     // Following argument order rules for generalized unpacking according to PEP 448
-    static_assert(
-        constexpr_last<is_positional, Args...>() < constexpr_first<is_keyword_or_ds, Args...>()
-        && constexpr_last<is_s_unpacking, Args...>() < constexpr_first<is_ds_unpacking, Args...>(),
-        "Invalid function call: positional args must precede keywords and ** unpacking; "
-        "* unpacking must precede ** unpacking"
-    );
+    static_assert(constexpr_last<is_positional, Args...>()
+                          < constexpr_first<is_keyword_or_ds, Args...>()
+                      && constexpr_last<is_s_unpacking, Args...>()
+                             < constexpr_first<is_ds_unpacking, Args...>(),
+                  "Invalid function call: positional args must precede keywords and ** unpacking; "
+                  "* unpacking must precede ** unpacking");
     return unpacking_collector<policy>(std::forward<Args>(args)...);
 }
 
 template <typename Derived>
 template <return_value_policy policy, typename... Args>
 object object_api<Derived>::operator()(Args &&...args) const {
-#if !defined(NDEBUG) && PY_VERSION_HEX >= 0x03060000
+#ifndef NDEBUG
     if (!PyGILState_Check()) {
         pybind11_fail("pybind11::object_api<>::operator() PyGILState_Check() failure.");
     }
@@ -1408,25 +1643,25 @@ object object_api<Derived>::call(Args &&...args) const {
 
 PYBIND11_NAMESPACE_END(detail)
 
-
-template<typename T>
+template <typename T>
 handle type::handle_of() {
-   static_assert(
-      std::is_base_of<detail::type_caster_generic, detail::make_caster<T>>::value,
-      "py::type::of<T> only supports the case where T is a registered C++ types."
-    );
+    static_assert(std::is_base_of<detail::type_caster_generic, detail::make_caster<T>>::value,
+                  "py::type::of<T> only supports the case where T is a registered C++ types.");
 
     return detail::get_type_handle(typeid(T), true);
 }
 
-
-#define PYBIND11_MAKE_OPAQUE(...) \
-    namespace pybind11 { namespace detail { \
-        template<> class type_caster<__VA_ARGS__> : public type_caster_base<__VA_ARGS__> { }; \
-    }}
+#define PYBIND11_MAKE_OPAQUE(...)                                                                 \
+    PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE)                                                  \
+    namespace detail {                                                                            \
+    template <>                                                                                   \
+    class type_caster<__VA_ARGS__> : public type_caster_base<__VA_ARGS__> {};                     \
+    }                                                                                             \
+    PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE)
 
 /// Lets you pass a type containing a `,` through a macro parameter without needing a separate
-/// typedef, e.g.: `PYBIND11_OVERRIDE(PYBIND11_TYPE(ReturnType<A, B>), PYBIND11_TYPE(Parent<C, D>), f, arg)`
+/// typedef, e.g.:
+/// `PYBIND11_OVERRIDE(PYBIND11_TYPE(ReturnType<A, B>), PYBIND11_TYPE(Parent<C, D>), f, arg)`
 #define PYBIND11_TYPE(...) __VA_ARGS__
 
 PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE)
diff --git a/ext/pybind11/include/pybind11/chrono.h b/ext/pybind11/include/pybind11/chrono.h
index 61bbcbc540..167ea0e3d1 100644
--- a/ext/pybind11/include/pybind11/chrono.h
+++ b/ext/pybind11/include/pybind11/chrono.h
@@ -15,63 +15,59 @@
 #include <chrono>
 #include <cmath>
 #include <ctime>
-#include <mutex>
-
-#include <time.h>
-
 #include <datetime.h>
-
-// Backport the PyDateTime_DELTA functions from Python3.3 if required
-#ifndef PyDateTime_DELTA_GET_DAYS
-#define PyDateTime_DELTA_GET_DAYS(o)         (((PyDateTime_Delta*)o)->days)
-#endif
-#ifndef PyDateTime_DELTA_GET_SECONDS
-#define PyDateTime_DELTA_GET_SECONDS(o)      (((PyDateTime_Delta*)o)->seconds)
-#endif
-#ifndef PyDateTime_DELTA_GET_MICROSECONDS
-#define PyDateTime_DELTA_GET_MICROSECONDS(o) (((PyDateTime_Delta*)o)->microseconds)
-#endif
+#include <mutex>
 
 PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
 PYBIND11_NAMESPACE_BEGIN(detail)
 
-template <typename type> class duration_caster {
+template <typename type>
+class duration_caster {
 public:
     using rep = typename type::rep;
     using period = typename type::period;
 
-    using days = std::chrono::duration<int_least32_t, std::ratio<86400>>; // signed 25 bits required by the standard.
+    // signed 25 bits required by the standard.
+    using days = std::chrono::duration<int_least32_t, std::ratio<86400>>;
 
     bool load(handle src, bool) {
         using namespace std::chrono;
 
         // Lazy initialise the PyDateTime import
-        if (!PyDateTimeAPI) { PyDateTime_IMPORT; }
+        if (!PyDateTimeAPI) {
+            PyDateTime_IMPORT;
+        }
 
-        if (!src) return false;
+        if (!src) {
+            return false;
+        }
         // If invoked with datetime.delta object
         if (PyDelta_Check(src.ptr())) {
             value = type(duration_cast<duration<rep, period>>(
-                  days(PyDateTime_DELTA_GET_DAYS(src.ptr()))
+                days(PyDateTime_DELTA_GET_DAYS(src.ptr()))
                 + seconds(PyDateTime_DELTA_GET_SECONDS(src.ptr()))
                 + microseconds(PyDateTime_DELTA_GET_MICROSECONDS(src.ptr()))));
             return true;
         }
         // If invoked with a float we assume it is seconds and convert
         if (PyFloat_Check(src.ptr())) {
-            value = type(duration_cast<duration<rep, period>>(duration<double>(PyFloat_AsDouble(src.ptr()))));
+            value = type(duration_cast<duration<rep, period>>(
+                duration<double>(PyFloat_AsDouble(src.ptr()))));
             return true;
         }
         return false;
     }
 
     // If this is a duration just return it back
-    static const std::chrono::duration<rep, period>& get_duration(const std::chrono::duration<rep, period> &src) {
+    static const std::chrono::duration<rep, period> &
+    get_duration(const std::chrono::duration<rep, period> &src) {
         return src;
     }
 
     // If this is a time_point get the time_since_epoch
-    template <typename Clock> static std::chrono::duration<rep, period> get_duration(const std::chrono::time_point<Clock, std::chrono::duration<rep, period>> &src) {
+    template <typename Clock>
+    static std::chrono::duration<rep, period>
+    get_duration(const std::chrono::time_point<Clock, std::chrono::duration<rep, period>> &src) {
         return src.time_since_epoch();
     }
 
@@ -83,9 +79,12 @@ public:
         auto d = get_duration(src);
 
         // Lazy initialise the PyDateTime import
-        if (!PyDateTimeAPI) { PyDateTime_IMPORT; }
+        if (!PyDateTimeAPI) {
+            PyDateTime_IMPORT;
+        }
 
-        // Declare these special duration types so the conversions happen with the correct primitive types (int)
+        // Declare these special duration types so the conversions happen with the correct
+        // primitive types (int)
         using dd_t = duration<int, std::ratio<86400>>;
         using ss_t = duration<int, std::ratio<1>>;
         using us_t = duration<int, std::micro>;
@@ -97,7 +96,7 @@ public:
         return PyDelta_FromDSU(dd.count(), ss.count(), us.count());
     }
 
-    PYBIND11_TYPE_CASTER(type, _("datetime.timedelta"));
+    PYBIND11_TYPE_CASTER(type, const_name("datetime.timedelta"));
 };
 
 inline std::tm *localtime_thread_safe(const std::time_t *time, std::tm *buf) {
@@ -108,7 +107,7 @@ inline std::tm *localtime_thread_safe(const std::time_t *time, std::tm *buf) {
 #else
     static std::mutex mtx;
     std::lock_guard<std::mutex> lock(mtx);
-    std::tm *tm_ptr = localtime(time);
+    std::tm *tm_ptr = std::localtime(time);
     if (tm_ptr != nullptr) {
         *buf = *tm_ptr;
     }
@@ -117,76 +116,89 @@ inline std::tm *localtime_thread_safe(const std::time_t *time, std::tm *buf) {
 }
 
 // This is for casting times on the system clock into datetime.datetime instances
-template <typename Duration> class type_caster<std::chrono::time_point<std::chrono::system_clock, Duration>> {
+template <typename Duration>
+class type_caster<std::chrono::time_point<std::chrono::system_clock, Duration>> {
 public:
     using type = std::chrono::time_point<std::chrono::system_clock, Duration>;
     bool load(handle src, bool) {
         using namespace std::chrono;
 
         // Lazy initialise the PyDateTime import
-        if (!PyDateTimeAPI) { PyDateTime_IMPORT; }
+        if (!PyDateTimeAPI) {
+            PyDateTime_IMPORT;
+        }
 
-        if (!src) return false;
+        if (!src) {
+            return false;
+        }
 
         std::tm cal;
         microseconds msecs;
 
         if (PyDateTime_Check(src.ptr())) {
-            cal.tm_sec   = PyDateTime_DATE_GET_SECOND(src.ptr());
-            cal.tm_min   = PyDateTime_DATE_GET_MINUTE(src.ptr());
-            cal.tm_hour  = PyDateTime_DATE_GET_HOUR(src.ptr());
-            cal.tm_mday  = PyDateTime_GET_DAY(src.ptr());
-            cal.tm_mon   = PyDateTime_GET_MONTH(src.ptr()) - 1;
-            cal.tm_year  = PyDateTime_GET_YEAR(src.ptr()) - 1900;
+            cal.tm_sec = PyDateTime_DATE_GET_SECOND(src.ptr());
+            cal.tm_min = PyDateTime_DATE_GET_MINUTE(src.ptr());
+            cal.tm_hour = PyDateTime_DATE_GET_HOUR(src.ptr());
+            cal.tm_mday = PyDateTime_GET_DAY(src.ptr());
+            cal.tm_mon = PyDateTime_GET_MONTH(src.ptr()) - 1;
+            cal.tm_year = PyDateTime_GET_YEAR(src.ptr()) - 1900;
             cal.tm_isdst = -1;
-            msecs        = microseconds(PyDateTime_DATE_GET_MICROSECOND(src.ptr()));
+            msecs = microseconds(PyDateTime_DATE_GET_MICROSECOND(src.ptr()));
         } else if (PyDate_Check(src.ptr())) {
-            cal.tm_sec   = 0;
-            cal.tm_min   = 0;
-            cal.tm_hour  = 0;
-            cal.tm_mday  = PyDateTime_GET_DAY(src.ptr());
-            cal.tm_mon   = PyDateTime_GET_MONTH(src.ptr()) - 1;
-            cal.tm_year  = PyDateTime_GET_YEAR(src.ptr()) - 1900;
+            cal.tm_sec = 0;
+            cal.tm_min = 0;
+            cal.tm_hour = 0;
+            cal.tm_mday = PyDateTime_GET_DAY(src.ptr());
+            cal.tm_mon = PyDateTime_GET_MONTH(src.ptr()) - 1;
+            cal.tm_year = PyDateTime_GET_YEAR(src.ptr()) - 1900;
             cal.tm_isdst = -1;
-            msecs        = microseconds(0);
+            msecs = microseconds(0);
         } else if (PyTime_Check(src.ptr())) {
-            cal.tm_sec   = PyDateTime_TIME_GET_SECOND(src.ptr());
-            cal.tm_min   = PyDateTime_TIME_GET_MINUTE(src.ptr());
-            cal.tm_hour  = PyDateTime_TIME_GET_HOUR(src.ptr());
-            cal.tm_mday  = 1;   // This date (day, month, year) = (1, 0, 70)
-            cal.tm_mon   = 0;   // represents 1-Jan-1970, which is the first
-            cal.tm_year  = 70;  // earliest available date for Python's datetime
+            cal.tm_sec = PyDateTime_TIME_GET_SECOND(src.ptr());
+            cal.tm_min = PyDateTime_TIME_GET_MINUTE(src.ptr());
+            cal.tm_hour = PyDateTime_TIME_GET_HOUR(src.ptr());
+            cal.tm_mday = 1;  // This date (day, month, year) = (1, 0, 70)
+            cal.tm_mon = 0;   // represents 1-Jan-1970, which is the first
+            cal.tm_year = 70; // earliest available date for Python's datetime
             cal.tm_isdst = -1;
-            msecs        = microseconds(PyDateTime_TIME_GET_MICROSECOND(src.ptr()));
+            msecs = microseconds(PyDateTime_TIME_GET_MICROSECOND(src.ptr()));
+        } else {
+            return false;
         }
-        else return false;
 
         value = time_point_cast<Duration>(system_clock::from_time_t(std::mktime(&cal)) + msecs);
         return true;
     }
 
-    static handle cast(const std::chrono::time_point<std::chrono::system_clock, Duration> &src, return_value_policy /* policy */, handle /* parent */) {
+    static handle cast(const std::chrono::time_point<std::chrono::system_clock, Duration> &src,
+                       return_value_policy /* policy */,
+                       handle /* parent */) {
         using namespace std::chrono;
 
         // Lazy initialise the PyDateTime import
-        if (!PyDateTimeAPI) { PyDateTime_IMPORT; }
+        if (!PyDateTimeAPI) {
+            PyDateTime_IMPORT;
+        }
 
-        // Get out microseconds, and make sure they are positive, to avoid bug in eastern hemisphere time zones
-        // (cfr. https://github.com/pybind/pybind11/issues/2417)
+        // Get out microseconds, and make sure they are positive, to avoid bug in eastern
+        // hemisphere time zones (cfr. https://github.com/pybind/pybind11/issues/2417)
         using us_t = duration<int, std::micro>;
         auto us = duration_cast<us_t>(src.time_since_epoch() % seconds(1));
-        if (us.count() < 0)
+        if (us.count() < 0) {
             us += seconds(1);
+        }
 
         // Subtract microseconds BEFORE `system_clock::to_time_t`, because:
-        // > If std::time_t has lower precision, it is implementation-defined whether the value is rounded or truncated.
-        // (https://en.cppreference.com/w/cpp/chrono/system_clock/to_time_t)
-        std::time_t tt = system_clock::to_time_t(time_point_cast<system_clock::duration>(src - us));
+        // > If std::time_t has lower precision, it is implementation-defined whether the value is
+        // rounded or truncated. (https://en.cppreference.com/w/cpp/chrono/system_clock/to_time_t)
+        std::time_t tt
+            = system_clock::to_time_t(time_point_cast<system_clock::duration>(src - us));
 
         std::tm localtime;
         std::tm *localtime_ptr = localtime_thread_safe(&tt, &localtime);
-        if (!localtime_ptr)
+        if (!localtime_ptr) {
             throw cast_error("Unable to represent system_clock in local time");
+        }
         return PyDateTime_FromDateAndTime(localtime.tm_year + 1900,
                                           localtime.tm_mon + 1,
                                           localtime.tm_mday,
@@ -195,19 +207,19 @@ public:
                                           localtime.tm_sec,
                                           us.count());
     }
-    PYBIND11_TYPE_CASTER(type, _("datetime.datetime"));
+    PYBIND11_TYPE_CASTER(type, const_name("datetime.datetime"));
 };
 
 // Other clocks that are not the system clock are not measured as datetime.datetime objects
 // since they are not measured on calendar time. So instead we just make them timedeltas
 // Or if they have passed us a time as a float we convert that
-template <typename Clock, typename Duration> class type_caster<std::chrono::time_point<Clock, Duration>>
-: public duration_caster<std::chrono::time_point<Clock, Duration>> {
-};
+template <typename Clock, typename Duration>
+class type_caster<std::chrono::time_point<Clock, Duration>>
+    : public duration_caster<std::chrono::time_point<Clock, Duration>> {};
 
-template <typename Rep, typename Period> class type_caster<std::chrono::duration<Rep, Period>>
-: public duration_caster<std::chrono::duration<Rep, Period>> {
-};
+template <typename Rep, typename Period>
+class type_caster<std::chrono::duration<Rep, Period>>
+    : public duration_caster<std::chrono::duration<Rep, Period>> {};
 
 PYBIND11_NAMESPACE_END(detail)
 PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE)
diff --git a/ext/pybind11/include/pybind11/complex.h b/ext/pybind11/include/pybind11/complex.h
index f8327eb373..8a831c12ce 100644
--- a/ext/pybind11/include/pybind11/complex.h
+++ b/ext/pybind11/include/pybind11/complex.h
@@ -10,42 +10,50 @@
 #pragma once
 
 #include "pybind11.h"
+
 #include <complex>
 
 /// glibc defines I as a macro which breaks things, e.g., boost template names
 #ifdef I
-#  undef I
+#    undef I
 #endif
 
 PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
 
-template <typename T> struct format_descriptor<std::complex<T>, detail::enable_if_t<std::is_floating_point<T>::value>> {
+template <typename T>
+struct format_descriptor<std::complex<T>, detail::enable_if_t<std::is_floating_point<T>::value>> {
     static constexpr const char c = format_descriptor<T>::c;
-    static constexpr const char value[3] = { 'Z', c, '\0' };
+    static constexpr const char value[3] = {'Z', c, '\0'};
     static std::string format() { return std::string(value); }
 };
 
 #ifndef PYBIND11_CPP17
 
-template <typename T> constexpr const char format_descriptor<
-    std::complex<T>, detail::enable_if_t<std::is_floating_point<T>::value>>::value[3];
+template <typename T>
+constexpr const char
+    format_descriptor<std::complex<T>,
+                      detail::enable_if_t<std::is_floating_point<T>::value>>::value[3];
 
 #endif
 
 PYBIND11_NAMESPACE_BEGIN(detail)
 
-template <typename T> struct is_fmt_numeric<std::complex<T>, detail::enable_if_t<std::is_floating_point<T>::value>> {
+template <typename T>
+struct is_fmt_numeric<std::complex<T>, detail::enable_if_t<std::is_floating_point<T>::value>> {
     static constexpr bool value = true;
     static constexpr int index = is_fmt_numeric<T>::index + 3;
 };
 
-template <typename T> class type_caster<std::complex<T>> {
+template <typename T>
+class type_caster<std::complex<T>> {
 public:
     bool load(handle src, bool convert) {
-        if (!src)
+        if (!src) {
             return false;
-        if (!convert && !PyComplex_Check(src.ptr()))
+        }
+        if (!convert && !PyComplex_Check(src.ptr())) {
             return false;
+        }
         Py_complex result = PyComplex_AsCComplex(src.ptr());
         if (result.real == -1.0 && PyErr_Occurred()) {
             PyErr_Clear();
@@ -55,11 +63,12 @@ public:
         return true;
     }
 
-    static handle cast(const std::complex<T> &src, return_value_policy /* policy */, handle /* parent */) {
+    static handle
+    cast(const std::complex<T> &src, return_value_policy /* policy */, handle /* parent */) {
         return PyComplex_FromDoubles((double) src.real(), (double) src.imag());
     }
 
-    PYBIND11_TYPE_CASTER(std::complex<T>, _("complex"));
+    PYBIND11_TYPE_CASTER(std::complex<T>, const_name("complex"));
 };
 PYBIND11_NAMESPACE_END(detail)
 PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE)
diff --git a/ext/pybind11/include/pybind11/detail/class.h b/ext/pybind11/include/pybind11/detail/class.h
index b9376b4c0b..528e716f78 100644
--- a/ext/pybind11/include/pybind11/detail/class.h
+++ b/ext/pybind11/include/pybind11/detail/class.h
@@ -15,13 +15,14 @@
 PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
 PYBIND11_NAMESPACE_BEGIN(detail)
 
-#if PY_VERSION_HEX >= 0x03030000 && !defined(PYPY_VERSION)
-#  define PYBIND11_BUILTIN_QUALNAME
-#  define PYBIND11_SET_OLDPY_QUALNAME(obj, nameobj)
+#if !defined(PYPY_VERSION)
+#    define PYBIND11_BUILTIN_QUALNAME
+#    define PYBIND11_SET_OLDPY_QUALNAME(obj, nameobj)
 #else
-// In pre-3.3 Python, we still set __qualname__ so that we can produce reliable function type
-// signatures; in 3.3+ this macro expands to nothing:
-#  define PYBIND11_SET_OLDPY_QUALNAME(obj, nameobj) setattr((PyObject *) obj, "__qualname__", nameobj)
+// In PyPy, we still set __qualname__ so that we can produce reliable function type
+// signatures; in CPython this macro expands to nothing:
+#    define PYBIND11_SET_OLDPY_QUALNAME(obj, nameobj)                                             \
+        setattr((PyObject *) obj, "__qualname__", nameobj)
 #endif
 
 inline std::string get_fully_qualified_tp_name(PyTypeObject *type) {
@@ -54,6 +55,9 @@ extern "C" inline int pybind11_static_set(PyObject *self, PyObject *obj, PyObjec
     return PyProperty_Type.tp_descr_set(self, cls, value);
 }
 
+// Forward declaration to use in `make_static_property_type()`
+inline void enable_dynamic_attributes(PyHeapTypeObject *heap_type);
+
 /** A `static_property` is the same as a `property` but the `__get__()` and `__set__()`
     methods are modified to always use the object type instead of a concrete instance.
     Return value: New reference. */
@@ -65,24 +69,33 @@ inline PyTypeObject *make_static_property_type() {
        issue no Python C API calls which could potentially invoke the
        garbage collector (the GC will call type_traverse(), which will in
        turn find the newly constructed type in an invalid state) */
-    auto heap_type = (PyHeapTypeObject *) PyType_Type.tp_alloc(&PyType_Type, 0);
-    if (!heap_type)
+    auto *heap_type = (PyHeapTypeObject *) PyType_Type.tp_alloc(&PyType_Type, 0);
+    if (!heap_type) {
         pybind11_fail("make_static_property_type(): error allocating type!");
+    }
 
     heap_type->ht_name = name_obj.inc_ref().ptr();
-#ifdef PYBIND11_BUILTIN_QUALNAME
+#    ifdef PYBIND11_BUILTIN_QUALNAME
     heap_type->ht_qualname = name_obj.inc_ref().ptr();
-#endif
+#    endif
 
-    auto type = &heap_type->ht_type;
+    auto *type = &heap_type->ht_type;
     type->tp_name = name;
     type->tp_base = type_incref(&PyProperty_Type);
     type->tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HEAPTYPE;
     type->tp_descr_get = pybind11_static_get;
     type->tp_descr_set = pybind11_static_set;
 
-    if (PyType_Ready(type) < 0)
+    if (PyType_Ready(type) < 0) {
         pybind11_fail("make_static_property_type(): failure in PyType_Ready()!");
+    }
+
+#    if PY_VERSION_HEX >= 0x030C0000
+    // PRE 3.12 FEATURE FREEZE. PLEASE REVIEW AFTER FREEZE.
+    // Since Python-3.12 property-derived types are required to
+    // have dynamic attributes (to set `__doc__`)
+    enable_dynamic_attributes(heap_type);
+#    endif
 
     setattr((PyObject *) type, "__module__", str("pybind11_builtins"));
     PYBIND11_SET_OLDPY_QUALNAME(type, name_obj);
@@ -98,15 +111,17 @@ inline PyTypeObject *make_static_property_type() {
 inline PyTypeObject *make_static_property_type() {
     auto d = dict();
     PyObject *result = PyRun_String(R"(\
-        class pybind11_static_property(property):
-            def __get__(self, obj, cls):
-                return property.__get__(self, cls, cls)
+class pybind11_static_property(property):
+    def __get__(self, obj, cls):
+        return property.__get__(self, cls, cls)
 
-            def __set__(self, obj, value):
-                cls = obj if isinstance(obj, type) else type(obj)
-                property.__set__(self, cls, value)
-        )", Py_file_input, d.ptr(), d.ptr()
-    );
+    def __set__(self, obj, value):
+        cls = obj if isinstance(obj, type) else type(obj)
+        property.__set__(self, cls, value)
+)",
+                                    Py_file_input,
+                                    d.ptr(),
+                                    d.ptr());
     if (result == nullptr)
         throw error_already_set();
     Py_DECREF(result);
@@ -119,7 +134,7 @@ inline PyTypeObject *make_static_property_type() {
     By default, Python replaces the `static_property` itself, but for wrapped C++ types
     we need to call `static_property.__set__()` in order to propagate the new value to
     the underlying C++ data structure. */
-extern "C" inline int pybind11_meta_setattro(PyObject* obj, PyObject* name, PyObject* value) {
+extern "C" inline int pybind11_meta_setattro(PyObject *obj, PyObject *name, PyObject *value) {
     // Use `_PyType_Lookup()` instead of `PyObject_GetAttr()` in order to get the raw
     // descriptor (`property`) instead of calling `tp_descr_get` (`property.__get__()`).
     PyObject *descr = _PyType_Lookup((PyTypeObject *) obj, name);
@@ -128,7 +143,7 @@ extern "C" inline int pybind11_meta_setattro(PyObject* obj, PyObject* name, PyOb
     //   1. `Type.static_prop = value`             --> descr_set: `Type.static_prop.__set__(value)`
     //   2. `Type.static_prop = other_static_prop` --> setattro:  replace existing `static_prop`
     //   3. `Type.regular_attribute = value`       --> setattro:  regular attribute assignment
-    const auto static_prop = (PyObject *) get_internals().static_property_type;
+    auto *const static_prop = (PyObject *) get_internals().static_property_type;
     const auto call_descr_set = (descr != nullptr) && (value != nullptr)
                                 && (PyObject_IsInstance(descr, static_prop) != 0)
                                 && (PyObject_IsInstance(value, static_prop) == 0);
@@ -150,7 +165,6 @@ extern "C" inline int pybind11_meta_setattro(PyObject* obj, PyObject* name, PyOb
     }
 }
 
-#if PY_MAJOR_VERSION >= 3
 /**
  * Python 3's PyInstanceMethod_Type hides itself via its tp_descr_get, which prevents aliasing
  * methods via cls.attr("m2") = cls.attr("m1"): instead the tp_descr_get returns a plain function,
@@ -165,7 +179,6 @@ extern "C" inline PyObject *pybind11_meta_getattro(PyObject *obj, PyObject *name
     }
     return PyType_Type.tp_getattro(obj, name);
 }
-#endif
 
 /// metaclass `__call__` function that is used to create all pybind11 objects.
 extern "C" inline PyObject *pybind11_meta_call(PyObject *type, PyObject *args, PyObject *kwargs) {
@@ -177,12 +190,13 @@ extern "C" inline PyObject *pybind11_meta_call(PyObject *type, PyObject *args, P
     }
 
     // This must be a pybind11 instance
-    auto instance = reinterpret_cast<detail::instance *>(self);
+    auto *instance = reinterpret_cast<detail::instance *>(self);
 
     // Ensure that the base __init__ function(s) were called
     for (const auto &vh : values_and_holders(instance)) {
         if (!vh.holder_constructed()) {
-            PyErr_Format(PyExc_TypeError, "%.200s.__init__() must be called when overriding __init__",
+            PyErr_Format(PyExc_TypeError,
+                         "%.200s.__init__() must be called when overriding __init__",
                          get_fully_qualified_tp_name(vh.type->type).c_str());
             Py_DECREF(self);
             return nullptr;
@@ -201,27 +215,28 @@ extern "C" inline void pybind11_meta_dealloc(PyObject *obj) {
     // 1) be found in internals.registered_types_py
     // 2) have exactly one associated `detail::type_info`
     auto found_type = internals.registered_types_py.find(type);
-    if (found_type != internals.registered_types_py.end() &&
-        found_type->second.size() == 1 &&
-        found_type->second[0]->type == type) {
+    if (found_type != internals.registered_types_py.end() && found_type->second.size() == 1
+        && found_type->second[0]->type == type) {
 
         auto *tinfo = found_type->second[0];
         auto tindex = std::type_index(*tinfo->cpptype);
         internals.direct_conversions.erase(tindex);
 
-        if (tinfo->module_local)
+        if (tinfo->module_local) {
             get_local_internals().registered_types_cpp.erase(tindex);
-        else
+        } else {
             internals.registered_types_cpp.erase(tindex);
+        }
         internals.registered_types_py.erase(tinfo->type);
 
         // Actually just `std::erase_if`, but that's only available in C++20
         auto &cache = internals.inactive_override_cache;
-        for (auto it = cache.begin(), last = cache.end(); it != last; ) {
-            if (it->first == (PyObject *) tinfo->type)
+        for (auto it = cache.begin(), last = cache.end(); it != last;) {
+            if (it->first == (PyObject *) tinfo->type) {
                 it = cache.erase(it);
-            else
+            } else {
                 ++it;
+            }
         }
 
         delete tinfo;
@@ -233,7 +248,7 @@ extern "C" inline void pybind11_meta_dealloc(PyObject *obj) {
 /** This metaclass is assigned by default to all pybind11 types and is required in order
     for static properties to function correctly. Users may override this using `py::metaclass`.
     Return value: New reference. */
-inline PyTypeObject* make_default_metaclass() {
+inline PyTypeObject *make_default_metaclass() {
     constexpr auto *name = "pybind11_type";
     auto name_obj = reinterpret_steal<object>(PYBIND11_FROM_STRING(name));
 
@@ -241,16 +256,17 @@ inline PyTypeObject* make_default_metaclass() {
        issue no Python C API calls which could potentially invoke the
        garbage collector (the GC will call type_traverse(), which will in
        turn find the newly constructed type in an invalid state) */
-    auto heap_type = (PyHeapTypeObject *) PyType_Type.tp_alloc(&PyType_Type, 0);
-    if (!heap_type)
+    auto *heap_type = (PyHeapTypeObject *) PyType_Type.tp_alloc(&PyType_Type, 0);
+    if (!heap_type) {
         pybind11_fail("make_default_metaclass(): error allocating metaclass!");
+    }
 
     heap_type->ht_name = name_obj.inc_ref().ptr();
 #ifdef PYBIND11_BUILTIN_QUALNAME
     heap_type->ht_qualname = name_obj.inc_ref().ptr();
 #endif
 
-    auto type = &heap_type->ht_type;
+    auto *type = &heap_type->ht_type;
     type->tp_name = name;
     type->tp_base = type_incref(&PyType_Type);
     type->tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HEAPTYPE;
@@ -258,14 +274,13 @@ inline PyTypeObject* make_default_metaclass() {
     type->tp_call = pybind11_meta_call;
 
     type->tp_setattro = pybind11_meta_setattro;
-#if PY_MAJOR_VERSION >= 3
     type->tp_getattro = pybind11_meta_getattro;
-#endif
 
     type->tp_dealloc = pybind11_meta_dealloc;
 
-    if (PyType_Ready(type) < 0)
+    if (PyType_Ready(type) < 0) {
         pybind11_fail("make_default_metaclass(): failure in PyType_Ready()!");
+    }
 
     setattr((PyObject *) type, "__module__", str("pybind11_builtins"));
     PYBIND11_SET_OLDPY_QUALNAME(type, name_obj);
@@ -275,16 +290,20 @@ inline PyTypeObject* make_default_metaclass() {
 
 /// For multiple inheritance types we need to recursively register/deregister base pointers for any
 /// base classes with pointers that are difference from the instance value pointer so that we can
-/// correctly recognize an offset base class pointer. This calls a function with any offset base ptrs.
-inline void traverse_offset_bases(void *valueptr, const detail::type_info *tinfo, instance *self,
-        bool (*f)(void * /*parentptr*/, instance * /*self*/)) {
+/// correctly recognize an offset base class pointer. This calls a function with any offset base
+/// ptrs.
+inline void traverse_offset_bases(void *valueptr,
+                                  const detail::type_info *tinfo,
+                                  instance *self,
+                                  bool (*f)(void * /*parentptr*/, instance * /*self*/)) {
     for (handle h : reinterpret_borrow<tuple>(tinfo->type->tp_bases)) {
-        if (auto parent_tinfo = get_type_info((PyTypeObject *) h.ptr())) {
+        if (auto *parent_tinfo = get_type_info((PyTypeObject *) h.ptr())) {
             for (auto &c : parent_tinfo->implicit_casts) {
                 if (c.first == tinfo->cpptype) {
                     auto *parentptr = c.second(valueptr);
-                    if (parentptr != valueptr)
+                    if (parentptr != valueptr) {
                         f(parentptr, self);
+                    }
                     traverse_offset_bases(parentptr, parent_tinfo, self, f);
                     break;
                 }
@@ -311,31 +330,33 @@ inline bool deregister_instance_impl(void *ptr, instance *self) {
 
 inline void register_instance(instance *self, void *valptr, const type_info *tinfo) {
     register_instance_impl(valptr, self);
-    if (!tinfo->simple_ancestors)
+    if (!tinfo->simple_ancestors) {
         traverse_offset_bases(valptr, tinfo, self, register_instance_impl);
+    }
 }
 
 inline bool deregister_instance(instance *self, void *valptr, const type_info *tinfo) {
     bool ret = deregister_instance_impl(valptr, self);
-    if (!tinfo->simple_ancestors)
+    if (!tinfo->simple_ancestors) {
         traverse_offset_bases(valptr, tinfo, self, deregister_instance_impl);
+    }
     return ret;
 }
 
-/// Instance creation function for all pybind11 types. It allocates the internal instance layout for
-/// holding C++ objects and holders.  Allocation is done lazily (the first time the instance is cast
-/// to a reference or pointer), and initialization is done by an `__init__` function.
+/// Instance creation function for all pybind11 types. It allocates the internal instance layout
+/// for holding C++ objects and holders.  Allocation is done lazily (the first time the instance is
+/// cast to a reference or pointer), and initialization is done by an `__init__` function.
 inline PyObject *make_new_instance(PyTypeObject *type) {
 #if defined(PYPY_VERSION)
-    // PyPy gets tp_basicsize wrong (issue 2482) under multiple inheritance when the first inherited
-    // object is a plain Python type (i.e. not derived from an extension type).  Fix it.
+    // PyPy gets tp_basicsize wrong (issue 2482) under multiple inheritance when the first
+    // inherited object is a plain Python type (i.e. not derived from an extension type).  Fix it.
     ssize_t instance_size = static_cast<ssize_t>(sizeof(instance));
     if (type->tp_basicsize < instance_size) {
         type->tp_basicsize = instance_size;
     }
 #endif
     PyObject *self = type->tp_alloc(type, 0);
-    auto inst = reinterpret_cast<instance *>(self);
+    auto *inst = reinterpret_cast<instance *>(self);
     // Allocate the value/holder internals:
     inst->allocate_layout();
 
@@ -360,14 +381,14 @@ extern "C" inline int pybind11_object_init(PyObject *self, PyObject *, PyObject
 
 inline void add_patient(PyObject *nurse, PyObject *patient) {
     auto &internals = get_internals();
-    auto instance = reinterpret_cast<detail::instance *>(nurse);
+    auto *instance = reinterpret_cast<detail::instance *>(nurse);
     instance->has_patients = true;
     Py_INCREF(patient);
     internals.patients[nurse].push_back(patient);
 }
 
 inline void clear_patients(PyObject *self) {
-    auto instance = reinterpret_cast<detail::instance *>(self);
+    auto *instance = reinterpret_cast<detail::instance *>(self);
     auto &internals = get_internals();
     auto pos = internals.patients.find(self);
     assert(pos != internals.patients.end());
@@ -377,14 +398,15 @@ inline void clear_patients(PyObject *self) {
     auto patients = std::move(pos->second);
     internals.patients.erase(pos);
     instance->has_patients = false;
-    for (PyObject *&patient : patients)
+    for (PyObject *&patient : patients) {
         Py_CLEAR(patient);
+    }
 }
 
 /// Clears all internal data from the instance and removes it from registered instances in
 /// preparation for deallocation.
 inline void clear_instance(PyObject *self) {
-    auto instance = reinterpret_cast<detail::instance *>(self);
+    auto *instance = reinterpret_cast<detail::instance *>(self);
 
     // Deallocate any values/holders, if present:
     for (auto &v_h : values_and_holders(instance)) {
@@ -392,25 +414,32 @@ inline void clear_instance(PyObject *self) {
 
             // We have to deregister before we call dealloc because, for virtual MI types, we still
             // need to be able to get the parent pointers.
-            if (v_h.instance_registered() && !deregister_instance(instance, v_h.value_ptr(), v_h.type))
-                pybind11_fail("pybind11_object_dealloc(): Tried to deallocate unregistered instance!");
+            if (v_h.instance_registered()
+                && !deregister_instance(instance, v_h.value_ptr(), v_h.type)) {
+                pybind11_fail(
+                    "pybind11_object_dealloc(): Tried to deallocate unregistered instance!");
+            }
 
-            if (instance->owned || v_h.holder_constructed())
+            if (instance->owned || v_h.holder_constructed()) {
                 v_h.type->dealloc(v_h);
+            }
         }
     }
     // Deallocate the value/holder layout internals:
     instance->deallocate_layout();
 
-    if (instance->weakrefs)
+    if (instance->weakrefs) {
         PyObject_ClearWeakRefs(self);
+    }
 
     PyObject **dict_ptr = _PyObject_GetDictPtr(self);
-    if (dict_ptr)
+    if (dict_ptr) {
         Py_CLEAR(*dict_ptr);
+    }
 
-    if (instance->has_patients)
+    if (instance->has_patients) {
         clear_patients(self);
+    }
 }
 
 /// Instance destructor function for all pybind11 types. It calls `type_info.dealloc`
@@ -418,7 +447,7 @@ inline void clear_instance(PyObject *self) {
 extern "C" inline void pybind11_object_dealloc(PyObject *self) {
     clear_instance(self);
 
-    auto type = Py_TYPE(self);
+    auto *type = Py_TYPE(self);
     type->tp_free(self);
 
 #if PY_VERSION_HEX < 0x03080000
@@ -436,6 +465,8 @@ extern "C" inline void pybind11_object_dealloc(PyObject *self) {
 #endif
 }
 
+std::string error_string();
+
 /** Create the type which can be used as a common base for all classes.  This is
     needed in order to satisfy Python's requirements for multiple inheritance.
     Return value: New reference. */
@@ -447,16 +478,17 @@ inline PyObject *make_object_base_type(PyTypeObject *metaclass) {
        issue no Python C API calls which could potentially invoke the
        garbage collector (the GC will call type_traverse(), which will in
        turn find the newly constructed type in an invalid state) */
-    auto heap_type = (PyHeapTypeObject *) metaclass->tp_alloc(metaclass, 0);
-    if (!heap_type)
+    auto *heap_type = (PyHeapTypeObject *) metaclass->tp_alloc(metaclass, 0);
+    if (!heap_type) {
         pybind11_fail("make_object_base_type(): error allocating type!");
+    }
 
     heap_type->ht_name = name_obj.inc_ref().ptr();
 #ifdef PYBIND11_BUILTIN_QUALNAME
     heap_type->ht_qualname = name_obj.inc_ref().ptr();
 #endif
 
-    auto type = &heap_type->ht_type;
+    auto *type = &heap_type->ht_type;
     type->tp_name = name;
     type->tp_base = type_incref(&PyBaseObject_Type);
     type->tp_basicsize = static_cast<ssize_t>(sizeof(instance));
@@ -469,8 +501,9 @@ inline PyObject *make_object_base_type(PyTypeObject *metaclass) {
     /* Support weak references (needed for the keep_alive feature) */
     type->tp_weaklistoffset = offsetof(instance, weakrefs);
 
-    if (PyType_Ready(type) < 0)
-        pybind11_fail("PyType_Ready failed in make_object_base_type():" + error_string());
+    if (PyType_Ready(type) < 0) {
+        pybind11_fail("PyType_Ready failed in make_object_base_type(): " + error_string());
+    }
 
     setattr((PyObject *) type, "__module__", str("pybind11_builtins"));
     PYBIND11_SET_OLDPY_QUALNAME(type, name_obj);
@@ -479,33 +512,14 @@ inline PyObject *make_object_base_type(PyTypeObject *metaclass) {
     return (PyObject *) heap_type;
 }
 
-/// dynamic_attr: Support for `d = instance.__dict__`.
-extern "C" inline PyObject *pybind11_get_dict(PyObject *self, void *) {
-    PyObject *&dict = *_PyObject_GetDictPtr(self);
-    if (!dict)
-        dict = PyDict_New();
-    Py_XINCREF(dict);
-    return dict;
-}
-
-/// dynamic_attr: Support for `instance.__dict__ = dict()`.
-extern "C" inline int pybind11_set_dict(PyObject *self, PyObject *new_dict, void *) {
-    if (!PyDict_Check(new_dict)) {
-        PyErr_Format(PyExc_TypeError, "__dict__ must be set to a dictionary, not a '%.200s'",
-                     get_fully_qualified_tp_name(Py_TYPE(new_dict)).c_str());
-        return -1;
-    }
-    PyObject *&dict = *_PyObject_GetDictPtr(self);
-    Py_INCREF(new_dict);
-    Py_CLEAR(dict);
-    dict = new_dict;
-    return 0;
-}
-
 /// dynamic_attr: Allow the garbage collector to traverse the internal instance `__dict__`.
 extern "C" inline int pybind11_traverse(PyObject *self, visitproc visit, void *arg) {
     PyObject *&dict = *_PyObject_GetDictPtr(self);
     Py_VISIT(dict);
+// https://docs.python.org/3/c-api/typeobj.html#c.PyTypeObject.tp_traverse
+#if PY_VERSION_HEX >= 0x03090000
+    Py_VISIT(Py_TYPE(self));
+#endif
     return 0;
 }
 
@@ -518,17 +532,28 @@ extern "C" inline int pybind11_clear(PyObject *self) {
 
 /// Give instances of this type a `__dict__` and opt into garbage collection.
 inline void enable_dynamic_attributes(PyHeapTypeObject *heap_type) {
-    auto type = &heap_type->ht_type;
+    auto *type = &heap_type->ht_type;
     type->tp_flags |= Py_TPFLAGS_HAVE_GC;
-    type->tp_dictoffset = type->tp_basicsize; // place dict at the end
-    type->tp_basicsize += (ssize_t)sizeof(PyObject *); // and allocate enough space for it
+#if PY_VERSION_HEX < 0x030B0000
+    type->tp_dictoffset = type->tp_basicsize;           // place dict at the end
+    type->tp_basicsize += (ssize_t) sizeof(PyObject *); // and allocate enough space for it
+#else
+    type->tp_flags |= Py_TPFLAGS_MANAGED_DICT;
+#endif
     type->tp_traverse = pybind11_traverse;
     type->tp_clear = pybind11_clear;
 
-    static PyGetSetDef getset[] = {
-        {const_cast<char*>("__dict__"), pybind11_get_dict, pybind11_set_dict, nullptr, nullptr},
-        {nullptr, nullptr, nullptr, nullptr, nullptr}
-    };
+    static PyGetSetDef getset[] = {{
+#if PY_VERSION_HEX < 0x03070000
+                                       const_cast<char *>("__dict__"),
+#else
+                                       "__dict__",
+#endif
+                                       PyObject_GenericGetDict,
+                                       PyObject_GenericSetDict,
+                                       nullptr,
+                                       nullptr},
+                                   {nullptr, nullptr, nullptr, nullptr, nullptr}};
     type->tp_getset = getset;
 }
 
@@ -538,12 +563,14 @@ extern "C" inline int pybind11_getbuffer(PyObject *obj, Py_buffer *view, int fla
     type_info *tinfo = nullptr;
     for (auto type : reinterpret_borrow<tuple>(Py_TYPE(obj)->tp_mro)) {
         tinfo = get_type_info((PyTypeObject *) type.ptr());
-        if (tinfo && tinfo->get_buffer)
+        if (tinfo && tinfo->get_buffer) {
             break;
+        }
     }
     if (view == nullptr || !tinfo || !tinfo->get_buffer) {
-        if (view)
+        if (view) {
             view->obj = nullptr;
+        }
         PyErr_SetString(PyExc_BufferError, "pybind11_getbuffer(): Internal error");
         return -1;
     }
@@ -561,15 +588,17 @@ extern "C" inline int pybind11_getbuffer(PyObject *obj, Py_buffer *view, int fla
     view->buf = info->ptr;
     view->itemsize = info->itemsize;
     view->len = view->itemsize;
-    for (auto s : info->shape)
+    for (auto s : info->shape) {
         view->len *= s;
+    }
     view->readonly = static_cast<int>(info->readonly);
-    if ((flags & PyBUF_FORMAT) == PyBUF_FORMAT)
+    if ((flags & PyBUF_FORMAT) == PyBUF_FORMAT) {
         view->format = const_cast<char *>(info->format.c_str());
+    }
     if ((flags & PyBUF_STRIDES) == PyBUF_STRIDES) {
         view->ndim = (int) info->ndim;
-        view->strides = &info->strides[0];
-        view->shape = &info->shape[0];
+        view->strides = info->strides.data();
+        view->shape = info->shape.data();
     }
     Py_INCREF(view->obj);
     return 0;
@@ -583,9 +612,6 @@ extern "C" inline void pybind11_releasebuffer(PyObject *, Py_buffer *view) {
 /// Give this type a buffer interface.
 inline void enable_buffer_protocol(PyHeapTypeObject *heap_type) {
     heap_type->ht_type.tp_as_buffer = &heap_type->as_buffer;
-#if PY_MAJOR_VERSION < 3
-    heap_type->ht_type.tp_flags |= Py_TPFLAGS_HAVE_NEWBUFFER;
-#endif
 
     heap_type->as_buffer.bf_getbuffer = pybind11_getbuffer;
     heap_type->as_buffer.bf_releasebuffer = pybind11_releasebuffer;
@@ -593,70 +619,68 @@ inline void enable_buffer_protocol(PyHeapTypeObject *heap_type) {
 
 /** Create a brand new Python type according to the `type_record` specification.
     Return value: New reference. */
-inline PyObject* make_new_python_type(const type_record &rec) {
+inline PyObject *make_new_python_type(const type_record &rec) {
     auto name = reinterpret_steal<object>(PYBIND11_FROM_STRING(rec.name));
 
     auto qualname = name;
     if (rec.scope && !PyModule_Check(rec.scope.ptr()) && hasattr(rec.scope, "__qualname__")) {
-#if PY_MAJOR_VERSION >= 3
         qualname = reinterpret_steal<object>(
             PyUnicode_FromFormat("%U.%U", rec.scope.attr("__qualname__").ptr(), name.ptr()));
-#else
-        qualname = str(rec.scope.attr("__qualname__").cast<std::string>() + "." + rec.name);
-#endif
     }
 
     object module_;
     if (rec.scope) {
-        if (hasattr(rec.scope, "__module__"))
+        if (hasattr(rec.scope, "__module__")) {
             module_ = rec.scope.attr("__module__");
-        else if (hasattr(rec.scope, "__name__"))
+        } else if (hasattr(rec.scope, "__name__")) {
             module_ = rec.scope.attr("__name__");
+        }
     }
 
-    auto full_name = c_str(
+    const auto *full_name = c_str(
 #if !defined(PYPY_VERSION)
         module_ ? str(module_).cast<std::string>() + "." + rec.name :
 #endif
-        rec.name);
+                rec.name);
 
     char *tp_doc = nullptr;
     if (rec.doc && options::show_user_defined_docstrings()) {
         /* Allocate memory for docstring (using PyObject_MALLOC, since
            Python will free this later on) */
-        size_t size = strlen(rec.doc) + 1;
+        size_t size = std::strlen(rec.doc) + 1;
         tp_doc = (char *) PyObject_MALLOC(size);
-        memcpy((void *) tp_doc, rec.doc, size);
+        std::memcpy((void *) tp_doc, rec.doc, size);
     }
 
     auto &internals = get_internals();
     auto bases = tuple(rec.bases);
-    auto base = (bases.empty()) ? internals.instance_base
-                                    : bases[0].ptr();
+    auto *base = (bases.empty()) ? internals.instance_base : bases[0].ptr();
 
     /* Danger zone: from now (and until PyType_Ready), make sure to
        issue no Python C API calls which could potentially invoke the
        garbage collector (the GC will call type_traverse(), which will in
        turn find the newly constructed type in an invalid state) */
-    auto metaclass = rec.metaclass.ptr() ? (PyTypeObject *) rec.metaclass.ptr()
-                                         : internals.default_metaclass;
+    auto *metaclass
+        = rec.metaclass.ptr() ? (PyTypeObject *) rec.metaclass.ptr() : internals.default_metaclass;
 
-    auto heap_type = (PyHeapTypeObject *) metaclass->tp_alloc(metaclass, 0);
-    if (!heap_type)
+    auto *heap_type = (PyHeapTypeObject *) metaclass->tp_alloc(metaclass, 0);
+    if (!heap_type) {
         pybind11_fail(std::string(rec.name) + ": Unable to create type object!");
+    }
 
     heap_type->ht_name = name.release().ptr();
 #ifdef PYBIND11_BUILTIN_QUALNAME
     heap_type->ht_qualname = qualname.inc_ref().ptr();
 #endif
 
-    auto type = &heap_type->ht_type;
+    auto *type = &heap_type->ht_type;
     type->tp_name = full_name;
     type->tp_doc = tp_doc;
-    type->tp_base = type_incref((PyTypeObject *)base);
+    type->tp_base = type_incref((PyTypeObject *) base);
     type->tp_basicsize = static_cast<ssize_t>(sizeof(instance));
-    if (!bases.empty())
+    if (!bases.empty()) {
         type->tp_bases = bases.release().ptr();
+    }
 
     /* Don't inherit base __init__ */
     type->tp_init = pybind11_object_init;
@@ -665,40 +689,42 @@ inline PyObject* make_new_python_type(const type_record &rec) {
     type->tp_as_number = &heap_type->as_number;
     type->tp_as_sequence = &heap_type->as_sequence;
     type->tp_as_mapping = &heap_type->as_mapping;
-#if PY_VERSION_HEX >= 0x03050000
     type->tp_as_async = &heap_type->as_async;
-#endif
 
     /* Flags */
     type->tp_flags |= Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HEAPTYPE;
-#if PY_MAJOR_VERSION < 3
-    type->tp_flags |= Py_TPFLAGS_CHECKTYPES;
-#endif
-    if (!rec.is_final)
+    if (!rec.is_final) {
         type->tp_flags |= Py_TPFLAGS_BASETYPE;
+    }
 
-    if (rec.dynamic_attr)
+    if (rec.dynamic_attr) {
         enable_dynamic_attributes(heap_type);
+    }
 
-    if (rec.buffer_protocol)
+    if (rec.buffer_protocol) {
         enable_buffer_protocol(heap_type);
+    }
 
-    if (rec.custom_type_setup_callback)
+    if (rec.custom_type_setup_callback) {
         rec.custom_type_setup_callback(heap_type);
+    }
 
-    if (PyType_Ready(type) < 0)
-        pybind11_fail(std::string(rec.name) + ": PyType_Ready failed (" + error_string() + ")!");
+    if (PyType_Ready(type) < 0) {
+        pybind11_fail(std::string(rec.name) + ": PyType_Ready failed: " + error_string());
+    }
 
     assert(!rec.dynamic_attr || PyType_HasFeature(type, Py_TPFLAGS_HAVE_GC));
 
     /* Register type with the parent scope */
-    if (rec.scope)
+    if (rec.scope) {
         setattr(rec.scope, rec.name, (PyObject *) type);
-    else
+    } else {
         Py_INCREF(type); // Keep it alive forever (reference leak)
+    }
 
-    if (module_) // Needed by pydoc
+    if (module_) { // Needed by pydoc
         setattr((PyObject *) type, "__module__", module_);
+    }
 
     PYBIND11_SET_OLDPY_QUALNAME(type, qualname);
 
diff --git a/ext/pybind11/include/pybind11/detail/common.h b/ext/pybind11/include/pybind11/detail/common.h
index 713de94b03..12bb173ad1 100644
--- a/ext/pybind11/include/pybind11/detail/common.h
+++ b/ext/pybind11/include/pybind11/detail/common.h
@@ -10,93 +10,158 @@
 #pragma once
 
 #define PYBIND11_VERSION_MAJOR 2
-#define PYBIND11_VERSION_MINOR 8
-#define PYBIND11_VERSION_PATCH 1
+#define PYBIND11_VERSION_MINOR 10
+#define PYBIND11_VERSION_PATCH 3
 
 // Similar to Python's convention: https://docs.python.org/3/c-api/apiabiversion.html
 // Additional convention: 0xD = dev
-#define PYBIND11_VERSION_HEX 0x02080100
+#define PYBIND11_VERSION_HEX 0x020A0300
 
-#define PYBIND11_NAMESPACE_BEGIN(name) namespace name {
-#define PYBIND11_NAMESPACE_END(name) }
+// Define some generic pybind11 helper macros for warning management.
+//
+// Note that compiler-specific push/pop pairs are baked into the
+// PYBIND11_NAMESPACE_BEGIN/PYBIND11_NAMESPACE_END pair of macros. Therefore manual
+// PYBIND11_WARNING_PUSH/PYBIND11_WARNING_POP are usually only needed in `#include` sections.
+//
+// If you find you need to suppress a warning, please try to make the suppression as local as
+// possible using these macros. Please also be sure to push/pop with the pybind11 macros. Please
+// only use compiler specifics if you need to check specific versions, e.g. Apple Clang vs. vanilla
+// Clang.
+#if defined(_MSC_VER)
+#    define PYBIND11_COMPILER_MSVC
+#    define PYBIND11_PRAGMA(...) __pragma(__VA_ARGS__)
+#    define PYBIND11_WARNING_PUSH PYBIND11_PRAGMA(warning(push))
+#    define PYBIND11_WARNING_POP PYBIND11_PRAGMA(warning(pop))
+#elif defined(__INTEL_COMPILER)
+#    define PYBIND11_COMPILER_INTEL
+#    define PYBIND11_PRAGMA(...) _Pragma(#__VA_ARGS__)
+#    define PYBIND11_WARNING_PUSH PYBIND11_PRAGMA(warning push)
+#    define PYBIND11_WARNING_POP PYBIND11_PRAGMA(warning pop)
+#elif defined(__clang__)
+#    define PYBIND11_COMPILER_CLANG
+#    define PYBIND11_PRAGMA(...) _Pragma(#__VA_ARGS__)
+#    define PYBIND11_WARNING_PUSH PYBIND11_PRAGMA(clang diagnostic push)
+#    define PYBIND11_WARNING_POP PYBIND11_PRAGMA(clang diagnostic push)
+#elif defined(__GNUC__)
+#    define PYBIND11_COMPILER_GCC
+#    define PYBIND11_PRAGMA(...) _Pragma(#__VA_ARGS__)
+#    define PYBIND11_WARNING_PUSH PYBIND11_PRAGMA(GCC diagnostic push)
+#    define PYBIND11_WARNING_POP PYBIND11_PRAGMA(GCC diagnostic pop)
+#endif
+
+#ifdef PYBIND11_COMPILER_MSVC
+#    define PYBIND11_WARNING_DISABLE_MSVC(name) PYBIND11_PRAGMA(warning(disable : name))
+#else
+#    define PYBIND11_WARNING_DISABLE_MSVC(name)
+#endif
+
+#ifdef PYBIND11_COMPILER_CLANG
+#    define PYBIND11_WARNING_DISABLE_CLANG(name) PYBIND11_PRAGMA(clang diagnostic ignored name)
+#else
+#    define PYBIND11_WARNING_DISABLE_CLANG(name)
+#endif
+
+#ifdef PYBIND11_COMPILER_GCC
+#    define PYBIND11_WARNING_DISABLE_GCC(name) PYBIND11_PRAGMA(GCC diagnostic ignored name)
+#else
+#    define PYBIND11_WARNING_DISABLE_GCC(name)
+#endif
+
+#ifdef PYBIND11_COMPILER_INTEL
+#    define PYBIND11_WARNING_DISABLE_INTEL(name) PYBIND11_PRAGMA(warning disable name)
+#else
+#    define PYBIND11_WARNING_DISABLE_INTEL(name)
+#endif
+
+#define PYBIND11_NAMESPACE_BEGIN(name)                                                            \
+    namespace name {                                                                              \
+    PYBIND11_WARNING_PUSH
+
+#define PYBIND11_NAMESPACE_END(name)                                                              \
+    PYBIND11_WARNING_POP                                                                          \
+    }
 
 // Robust support for some features and loading modules compiled against different pybind versions
-// requires forcing hidden visibility on pybind code, so we enforce this by setting the attribute on
-// the main `pybind11` namespace.
+// requires forcing hidden visibility on pybind code, so we enforce this by setting the attribute
+// on the main `pybind11` namespace.
 #if !defined(PYBIND11_NAMESPACE)
-#  ifdef __GNUG__
-#    define PYBIND11_NAMESPACE pybind11 __attribute__((visibility("hidden")))
-#  else
-#    define PYBIND11_NAMESPACE pybind11
-#  endif
+#    ifdef __GNUG__
+#        define PYBIND11_NAMESPACE pybind11 __attribute__((visibility("hidden")))
+#    else
+#        define PYBIND11_NAMESPACE pybind11
+#    endif
 #endif
 
 #if !(defined(_MSC_VER) && __cplusplus == 199711L)
-#  if __cplusplus >= 201402L
-#    define PYBIND11_CPP14
-#    if __cplusplus >= 201703L
-#      define PYBIND11_CPP17
+#    if __cplusplus >= 201402L
+#        define PYBIND11_CPP14
+#        if __cplusplus >= 201703L
+#            define PYBIND11_CPP17
+#            if __cplusplus >= 202002L
+#                define PYBIND11_CPP20
+// Please update tests/pybind11_tests.cpp `cpp_std()` when adding a macro here.
+#            endif
+#        endif
 #    endif
-#  endif
 #elif defined(_MSC_VER) && __cplusplus == 199711L
-// MSVC sets _MSVC_LANG rather than __cplusplus (supposedly until the standard is fully implemented)
-// Unless you use the /Zc:__cplusplus flag on Visual Studio 2017 15.7 Preview 3 or newer
-#  if _MSVC_LANG >= 201402L
-#    define PYBIND11_CPP14
-#    if _MSVC_LANG > 201402L && _MSC_VER >= 1910
-#      define PYBIND11_CPP17
+// MSVC sets _MSVC_LANG rather than __cplusplus (supposedly until the standard is fully
+// implemented). Unless you use the /Zc:__cplusplus flag on Visual Studio 2017 15.7 Preview 3
+// or newer.
+#    if _MSVC_LANG >= 201402L
+#        define PYBIND11_CPP14
+#        if _MSVC_LANG > 201402L
+#            define PYBIND11_CPP17
+#            if _MSVC_LANG >= 202002L
+#                define PYBIND11_CPP20
+#            endif
+#        endif
 #    endif
-#  endif
 #endif
 
 // Compiler version assertions
 #if defined(__INTEL_COMPILER)
-#  if __INTEL_COMPILER < 1800
-#    error pybind11 requires Intel C++ compiler v18 or newer
-#  elif __INTEL_COMPILER < 1900 && defined(PYBIND11_CPP14)
-#    error pybind11 supports only C++11 with Intel C++ compiler v18. Use v19 or newer for C++14.
-#  endif
+#    if __INTEL_COMPILER < 1800
+#        error pybind11 requires Intel C++ compiler v18 or newer
+#    elif __INTEL_COMPILER < 1900 && defined(PYBIND11_CPP14)
+#        error pybind11 supports only C++11 with Intel C++ compiler v18. Use v19 or newer for C++14.
+#    endif
 /* The following pragma cannot be pop'ed:
    https://community.intel.com/t5/Intel-C-Compiler/Inline-and-no-inline-warning/td-p/1216764 */
-#  pragma warning disable 2196 // warning #2196: routine is both "inline" and "noinline"
+#    pragma warning disable 2196 // warning #2196: routine is both "inline" and "noinline"
 #elif defined(__clang__) && !defined(__apple_build_version__)
-#  if __clang_major__ < 3 || (__clang_major__ == 3 && __clang_minor__ < 3)
-#    error pybind11 requires clang 3.3 or newer
-#  endif
+#    if __clang_major__ < 3 || (__clang_major__ == 3 && __clang_minor__ < 3)
+#        error pybind11 requires clang 3.3 or newer
+#    endif
 #elif defined(__clang__)
 // Apple changes clang version macros to its Xcode version; the first Xcode release based on
 // (upstream) clang 3.3 was Xcode 5:
-#  if __clang_major__ < 5
-#    error pybind11 requires Xcode/clang 5.0 or newer
-#  endif
+#    if __clang_major__ < 5
+#        error pybind11 requires Xcode/clang 5.0 or newer
+#    endif
 #elif defined(__GNUG__)
-#  if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 8)
-#    error pybind11 requires gcc 4.8 or newer
-#  endif
+#    if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 8)
+#        error pybind11 requires gcc 4.8 or newer
+#    endif
 #elif defined(_MSC_VER)
-// Pybind hits various compiler bugs in 2015u2 and earlier, and also makes use of some stl features
-// (e.g. std::negation) added in 2015u3:
-#  if _MSC_FULL_VER < 190024210
-#    error pybind11 requires MSVC 2015 update 3 or newer
-#  endif
+#    if _MSC_VER < 1910
+#        error pybind11 2.10+ requires MSVC 2017 or newer
+#    endif
 #endif
 
 #if !defined(PYBIND11_EXPORT)
-#  if defined(WIN32) || defined(_WIN32)
-#    define PYBIND11_EXPORT __declspec(dllexport)
-#  else
-#    define PYBIND11_EXPORT __attribute__ ((visibility("default")))
-#  endif
+#    if defined(WIN32) || defined(_WIN32)
+#        define PYBIND11_EXPORT __declspec(dllexport)
+#    else
+#        define PYBIND11_EXPORT __attribute__((visibility("default")))
+#    endif
 #endif
 
 #if !defined(PYBIND11_EXPORT_EXCEPTION)
-#  ifdef __MINGW32__
-// workaround for:
-// error: 'dllexport' implies default visibility, but xxx has already been declared with a different visibility
-#    define PYBIND11_EXPORT_EXCEPTION
-#  else
-#    define PYBIND11_EXPORT_EXCEPTION PYBIND11_EXPORT
-#  endif
+#    if defined(__apple_build_version__)
+#        define PYBIND11_EXPORT_EXCEPTION PYBIND11_EXPORT
+#    else
+#        define PYBIND11_EXPORT_EXCEPTION
+#    endif
 #endif
 
 // For CUDA, GCC7, GCC8:
@@ -105,85 +170,104 @@
 // However, the measured shared-library size saving when using noinline are only
 // 1.7% for CUDA, -0.2% for GCC7, and 0.0% for GCC8 (using -DCMAKE_BUILD_TYPE=MinSizeRel,
 // the default under pybind11/tests).
-#if !defined(PYBIND11_NOINLINE_FORCED) && \
-    (defined(__CUDACC__) || (defined(__GNUC__) && (__GNUC__ == 7 || __GNUC__ == 8)))
-#  define PYBIND11_NOINLINE_DISABLED
+#if !defined(PYBIND11_NOINLINE_FORCED)                                                            \
+    && (defined(__CUDACC__) || (defined(__GNUC__) && (__GNUC__ == 7 || __GNUC__ == 8)))
+#    define PYBIND11_NOINLINE_DISABLED
 #endif
 
 // The PYBIND11_NOINLINE macro is for function DEFINITIONS.
 // In contrast, FORWARD DECLARATIONS should never use this macro:
 // https://stackoverflow.com/questions/9317473/forward-declaration-of-inline-functions
 #if defined(PYBIND11_NOINLINE_DISABLED) // Option for maximum portability and experimentation.
-#  define PYBIND11_NOINLINE inline
+#    define PYBIND11_NOINLINE inline
 #elif defined(_MSC_VER)
-#  define PYBIND11_NOINLINE __declspec(noinline) inline
+#    define PYBIND11_NOINLINE __declspec(noinline) inline
 #else
-#  define PYBIND11_NOINLINE __attribute__ ((noinline)) inline
+#    define PYBIND11_NOINLINE __attribute__((noinline)) inline
 #endif
 
 #if defined(__MINGW32__)
 // For unknown reasons all PYBIND11_DEPRECATED member trigger a warning when declared
 // whether it is used or not
-#  define PYBIND11_DEPRECATED(reason)
+#    define PYBIND11_DEPRECATED(reason)
 #elif defined(PYBIND11_CPP14)
-#  define PYBIND11_DEPRECATED(reason) [[deprecated(reason)]]
+#    define PYBIND11_DEPRECATED(reason) [[deprecated(reason)]]
 #else
-#  define PYBIND11_DEPRECATED(reason) __attribute__((deprecated(reason)))
+#    define PYBIND11_DEPRECATED(reason) __attribute__((deprecated(reason)))
 #endif
 
 #if defined(PYBIND11_CPP17)
-#  define PYBIND11_MAYBE_UNUSED [[maybe_unused]]
+#    define PYBIND11_MAYBE_UNUSED [[maybe_unused]]
 #elif defined(_MSC_VER) && !defined(__clang__)
-#  define PYBIND11_MAYBE_UNUSED
+#    define PYBIND11_MAYBE_UNUSED
 #else
-#  define PYBIND11_MAYBE_UNUSED __attribute__ ((__unused__))
+#    define PYBIND11_MAYBE_UNUSED __attribute__((__unused__))
 #endif
 
 /* Don't let Python.h #define (v)snprintf as macro because they are implemented
    properly in Visual Studio since 2015. */
-#if defined(_MSC_VER) && _MSC_VER >= 1900
-#  define HAVE_SNPRINTF 1
+#if defined(_MSC_VER)
+#    define HAVE_SNPRINTF 1
 #endif
 
 /// Include Python header, disable linking to pythonX_d.lib on Windows in debug mode
 #if defined(_MSC_VER)
-#  if (PY_MAJOR_VERSION == 3 && PY_MINOR_VERSION < 4)
-#    define HAVE_ROUND 1
-#  endif
-#  pragma warning(push)
+PYBIND11_WARNING_PUSH
+PYBIND11_WARNING_DISABLE_MSVC(4505)
 // C4505: 'PySlice_GetIndicesEx': unreferenced local function has been removed (PyPy only)
-#  pragma warning(disable: 4505)
-#  if defined(_DEBUG) && !defined(Py_DEBUG)
-#    define PYBIND11_DEBUG_MARKER
-#    undef _DEBUG
-#  endif
+#    if defined(_DEBUG) && !defined(Py_DEBUG)
+// Workaround for a VS 2022 issue.
+// NOTE: This workaround knowingly violates the Python.h include order requirement:
+// https://docs.python.org/3/c-api/intro.html#include-files
+// See https://github.com/pybind/pybind11/pull/3497 for full context.
+#        include <yvals.h>
+#        if _MSVC_STL_VERSION >= 143
+#            include <crtdefs.h>
+#        endif
+#        define PYBIND11_DEBUG_MARKER
+#        undef _DEBUG
+#    endif
 #endif
 
 // https://en.cppreference.com/w/c/chrono/localtime
 #if defined(__STDC_LIB_EXT1__) && !defined(__STDC_WANT_LIB_EXT1__)
-#  define __STDC_WANT_LIB_EXT1__
+#    define __STDC_WANT_LIB_EXT1__
 #endif
 
 #ifdef __has_include
 // std::optional (but including it in c++14 mode isn't allowed)
-#  if defined(PYBIND11_CPP17) && __has_include(<optional>)
-#    define PYBIND11_HAS_OPTIONAL 1
-#  endif
+#    if defined(PYBIND11_CPP17) && __has_include(<optional>)
+#        define PYBIND11_HAS_OPTIONAL 1
+#    endif
 // std::experimental::optional (but not allowed in c++11 mode)
-#  if defined(PYBIND11_CPP14) && (__has_include(<experimental/optional>) && \
+#    if defined(PYBIND11_CPP14) && (__has_include(<experimental/optional>) && \
                                  !__has_include(<optional>))
-#    define PYBIND11_HAS_EXP_OPTIONAL 1
-#  endif
+#        define PYBIND11_HAS_EXP_OPTIONAL 1
+#    endif
 // std::variant
-#  if defined(PYBIND11_CPP17) && __has_include(<variant>)
-#    define PYBIND11_HAS_VARIANT 1
-#  endif
+#    if defined(PYBIND11_CPP17) && __has_include(<variant>)
+#        define PYBIND11_HAS_VARIANT 1
+#    endif
 #elif defined(_MSC_VER) && defined(PYBIND11_CPP17)
-#  define PYBIND11_HAS_OPTIONAL 1
-#  define PYBIND11_HAS_VARIANT 1
+#    define PYBIND11_HAS_OPTIONAL 1
+#    define PYBIND11_HAS_VARIANT 1
+#endif
+
+#if defined(PYBIND11_CPP17)
+#    if defined(__has_include)
+#        if __has_include(<string_view>)
+#            define PYBIND11_HAS_STRING_VIEW
+#        endif
+#    elif defined(_MSC_VER)
+#        define PYBIND11_HAS_STRING_VIEW
+#    endif
 #endif
 
 #include <Python.h>
+// Reminder: WITH_THREAD is always defined if PY_VERSION_HEX >= 0x03070000
+#if PY_VERSION_HEX < 0x03060000
+#    error "PYTHON < 3.6 IS UNSUPPORTED. pybind11 v2.9 was the last to support Python 2 and 3.5."
+#endif
 #include <frameobject.h>
 #include <pythread.h>
 
@@ -191,43 +275,62 @@
    tends to weak havok in C++ codebases that expect these to work
    like regular functions (potentially with several overloads) */
 #if defined(isalnum)
-#  undef isalnum
-#  undef isalpha
-#  undef islower
-#  undef isspace
-#  undef isupper
-#  undef tolower
-#  undef toupper
+#    undef isalnum
+#    undef isalpha
+#    undef islower
+#    undef isspace
+#    undef isupper
+#    undef tolower
+#    undef toupper
 #endif
 
 #if defined(copysign)
-#  undef copysign
+#    undef copysign
+#endif
+
+#if defined(PYPY_VERSION) && !defined(PYBIND11_SIMPLE_GIL_MANAGEMENT)
+#    define PYBIND11_SIMPLE_GIL_MANAGEMENT
 #endif
 
 #if defined(_MSC_VER)
-#  if defined(PYBIND11_DEBUG_MARKER)
-#    define _DEBUG
-#    undef PYBIND11_DEBUG_MARKER
-#  endif
-#  pragma warning(pop)
+#    if defined(PYBIND11_DEBUG_MARKER)
+#        define _DEBUG
+#        undef PYBIND11_DEBUG_MARKER
+#    endif
+PYBIND11_WARNING_POP
 #endif
 
 #include <cstddef>
 #include <cstring>
-#include <forward_list>
-#include <vector>
-#include <string>
-#include <stdexcept>
 #include <exception>
-#include <unordered_set>
-#include <unordered_map>
+#include <forward_list>
 #include <memory>
-#include <typeindex>
+#include <stdexcept>
+#include <string>
 #include <type_traits>
+#include <typeindex>
+#include <unordered_map>
+#include <unordered_set>
+#include <vector>
 #if defined(__has_include)
-#  if __has_include(<version>)
-#    include <version>
-#  endif
+#    if __has_include(<version>)
+#        include <version>
+#    endif
+#endif
+
+// Must be after including <version> or one of the other headers specified by the standard
+#if defined(__cpp_lib_char8_t) && __cpp_lib_char8_t >= 201811L
+#    define PYBIND11_HAS_U8STRING
+#endif
+
+// See description of PR #4246:
+#if !defined(NDEBUG) && !defined(PY_ASSERT_GIL_HELD_INCREF_DECREF)                                \
+    && !(defined(PYPY_VERSION)                                                                    \
+         && defined(_MSC_VER)) /* PyPy Windows: pytest hangs indefinitely at the end of the       \
+                                  process (see PR #4268) */                                       \
+    && !defined(PYBIND11_ASSERT_GIL_HELD_INCREF_DECREF)
+// The following define will be enabled by default in the 2.11 release
+// define PYBIND11_ASSERT_GIL_HELD_INCREF_DECREF
 #endif
 
 // #define PYBIND11_STR_LEGACY_PERMISSIVE
@@ -239,11 +342,11 @@
 // If UNDEFINED, pybind11::str can only hold PyUnicodeObject, and
 //               pybind11::isinstance<str>() is true only for pybind11::str.
 //               However, for Python 2 only (!), the pybind11::str caster
-//               implicitly decodes bytes to PyUnicodeObject. This is to ease
+//               implicitly decoded bytes to PyUnicodeObject. This was to ease
 //               the transition from the legacy behavior to the non-permissive
 //               behavior.
 
-#if PY_MAJOR_VERSION >= 3 /// Compatibility macros for various Python versions
+/// Compatibility macros for Python 2 / Python 3 versions TODO: remove
 #define PYBIND11_INSTANCE_METHOD_NEW(ptr, class_) PyInstanceMethod_New(ptr)
 #define PYBIND11_INSTANCE_METHOD_CHECK PyInstanceMethod_Check
 #define PYBIND11_INSTANCE_METHOD_GET_FUNCTION PyInstanceMethod_GET_FUNCTION
@@ -267,96 +370,42 @@
 #define PYBIND11_BUILTINS_MODULE "builtins"
 // Providing a separate declaration to make Clang's -Wmissing-prototypes happy.
 // See comment for PYBIND11_MODULE below for why this is marked "maybe unused".
-#define PYBIND11_PLUGIN_IMPL(name) \
-    extern "C" PYBIND11_MAYBE_UNUSED PYBIND11_EXPORT PyObject *PyInit_##name(); \
+#define PYBIND11_PLUGIN_IMPL(name)                                                                \
+    extern "C" PYBIND11_MAYBE_UNUSED PYBIND11_EXPORT PyObject *PyInit_##name();                   \
     extern "C" PYBIND11_EXPORT PyObject *PyInit_##name()
 
-#else
-#define PYBIND11_INSTANCE_METHOD_NEW(ptr, class_) PyMethod_New(ptr, nullptr, class_)
-#define PYBIND11_INSTANCE_METHOD_CHECK PyMethod_Check
-#define PYBIND11_INSTANCE_METHOD_GET_FUNCTION PyMethod_GET_FUNCTION
-#define PYBIND11_BYTES_CHECK PyString_Check
-#define PYBIND11_BYTES_FROM_STRING PyString_FromString
-#define PYBIND11_BYTES_FROM_STRING_AND_SIZE PyString_FromStringAndSize
-#define PYBIND11_BYTES_AS_STRING_AND_SIZE PyString_AsStringAndSize
-#define PYBIND11_BYTES_AS_STRING PyString_AsString
-#define PYBIND11_BYTES_SIZE PyString_Size
-#define PYBIND11_LONG_CHECK(o) (PyInt_Check(o) || PyLong_Check(o))
-#define PYBIND11_LONG_AS_LONGLONG(o) (PyInt_Check(o) ? (long long) PyLong_AsLong(o) : PyLong_AsLongLong(o))
-#define PYBIND11_LONG_FROM_SIGNED(o) PyInt_FromSsize_t((ssize_t) o) // Returns long if needed.
-#define PYBIND11_LONG_FROM_UNSIGNED(o) PyInt_FromSize_t((size_t) o) // Returns long if needed.
-#define PYBIND11_BYTES_NAME "str"
-#define PYBIND11_STRING_NAME "unicode"
-#define PYBIND11_SLICE_OBJECT PySliceObject
-#define PYBIND11_FROM_STRING PyString_FromString
-#define PYBIND11_STR_TYPE ::pybind11::bytes
-#define PYBIND11_BOOL_ATTR "__nonzero__"
-#define PYBIND11_NB_BOOL(ptr) ((ptr)->nb_nonzero)
-#define PYBIND11_BUILTINS_MODULE "__builtin__"
-// Providing a separate PyInit decl to make Clang's -Wmissing-prototypes happy.
-// See comment for PYBIND11_MODULE below for why this is marked "maybe unused".
-#define PYBIND11_PLUGIN_IMPL(name) \
-    static PyObject *pybind11_init_wrapper();                           \
-    extern "C" PYBIND11_MAYBE_UNUSED PYBIND11_EXPORT void init##name(); \
-    extern "C" PYBIND11_EXPORT void init##name() {                      \
-        (void)pybind11_init_wrapper();                                  \
-    }                                                                   \
-    PyObject *pybind11_init_wrapper()
-#endif
-
-#if PY_VERSION_HEX >= 0x03050000 && PY_VERSION_HEX < 0x03050200
-extern "C" {
-    struct _Py_atomic_address { void *value; };
-    PyAPI_DATA(_Py_atomic_address) _PyThreadState_Current;
-}
-#endif
-
 #define PYBIND11_TRY_NEXT_OVERLOAD ((PyObject *) 1) // special failure return code
 #define PYBIND11_STRINGIFY(x) #x
 #define PYBIND11_TOSTRING(x) PYBIND11_STRINGIFY(x)
 #define PYBIND11_CONCAT(first, second) first##second
-#define PYBIND11_ENSURE_INTERNALS_READY \
-    pybind11::detail::get_internals();
+#define PYBIND11_ENSURE_INTERNALS_READY pybind11::detail::get_internals();
 
-#define PYBIND11_CHECK_PYTHON_VERSION \
-    {                                                                          \
-        const char *compiled_ver = PYBIND11_TOSTRING(PY_MAJOR_VERSION)         \
-            "." PYBIND11_TOSTRING(PY_MINOR_VERSION);                           \
-        const char *runtime_ver = Py_GetVersion();                             \
-        size_t len = std::strlen(compiled_ver);                                \
-        if (std::strncmp(runtime_ver, compiled_ver, len) != 0                  \
-                || (runtime_ver[len] >= '0' && runtime_ver[len] <= '9')) {     \
-            PyErr_Format(PyExc_ImportError,                                    \
-                "Python version mismatch: module was compiled for Python %s, " \
-                "but the interpreter version is incompatible: %s.",            \
-                compiled_ver, runtime_ver);                                    \
-            return nullptr;                                                    \
-        }                                                                      \
+#define PYBIND11_CHECK_PYTHON_VERSION                                                             \
+    {                                                                                             \
+        const char *compiled_ver                                                                  \
+            = PYBIND11_TOSTRING(PY_MAJOR_VERSION) "." PYBIND11_TOSTRING(PY_MINOR_VERSION);        \
+        const char *runtime_ver = Py_GetVersion();                                                \
+        size_t len = std::strlen(compiled_ver);                                                   \
+        if (std::strncmp(runtime_ver, compiled_ver, len) != 0                                     \
+            || (runtime_ver[len] >= '0' && runtime_ver[len] <= '9')) {                            \
+            PyErr_Format(PyExc_ImportError,                                                       \
+                         "Python version mismatch: module was compiled for Python %s, "           \
+                         "but the interpreter version is incompatible: %s.",                      \
+                         compiled_ver,                                                            \
+                         runtime_ver);                                                            \
+            return nullptr;                                                                       \
+        }                                                                                         \
     }
 
-#if PY_VERSION_HEX >= 0x03030000
-
-#define PYBIND11_CATCH_INIT_EXCEPTIONS \
-        catch (pybind11::error_already_set &e) {                                 \
-            pybind11::raise_from(e, PyExc_ImportError, "initialization failed"); \
-            return nullptr;                                                      \
-        } catch (const std::exception &e) {                                      \
-            PyErr_SetString(PyExc_ImportError, e.what());                        \
-            return nullptr;                                                      \
-        }                                                                        \
-
-#else
-
-#define PYBIND11_CATCH_INIT_EXCEPTIONS \
-        catch (pybind11::error_already_set &e) {                               \
-            PyErr_SetString(PyExc_ImportError, e.what());                      \
-            return nullptr;                                                    \
-        } catch (const std::exception &e) {                                    \
-            PyErr_SetString(PyExc_ImportError, e.what());                      \
-            return nullptr;                                                    \
-        }                                                                      \
-
-#endif
+#define PYBIND11_CATCH_INIT_EXCEPTIONS                                                            \
+    catch (pybind11::error_already_set & e) {                                                     \
+        pybind11::raise_from(e, PyExc_ImportError, "initialization failed");                      \
+        return nullptr;                                                                           \
+    }                                                                                             \
+    catch (const std::exception &e) {                                                             \
+        PyErr_SetString(PyExc_ImportError, e.what());                                             \
+        return nullptr;                                                                           \
+    }
 
 /** \rst
     ***Deprecated in favor of PYBIND11_MODULE***
@@ -373,16 +422,17 @@ extern "C" {
             return m.ptr();
         }
 \endrst */
-#define PYBIND11_PLUGIN(name)                                                  \
-    PYBIND11_DEPRECATED("PYBIND11_PLUGIN is deprecated, use PYBIND11_MODULE")  \
-    static PyObject *pybind11_init();                                          \
-    PYBIND11_PLUGIN_IMPL(name) {                                               \
-        PYBIND11_CHECK_PYTHON_VERSION                                          \
-        PYBIND11_ENSURE_INTERNALS_READY                                        \
-        try {                                                                  \
-            return pybind11_init();                                            \
-        } PYBIND11_CATCH_INIT_EXCEPTIONS                                       \
-    }                                                                          \
+#define PYBIND11_PLUGIN(name)                                                                     \
+    PYBIND11_DEPRECATED("PYBIND11_PLUGIN is deprecated, use PYBIND11_MODULE")                     \
+    static PyObject *pybind11_init();                                                             \
+    PYBIND11_PLUGIN_IMPL(name) {                                                                  \
+        PYBIND11_CHECK_PYTHON_VERSION                                                             \
+        PYBIND11_ENSURE_INTERNALS_READY                                                           \
+        try {                                                                                     \
+            return pybind11_init();                                                               \
+        }                                                                                         \
+        PYBIND11_CATCH_INIT_EXCEPTIONS                                                            \
+    }                                                                                             \
     PyObject *pybind11_init()
 
 /** \rst
@@ -427,7 +477,7 @@ extern "C" {
 PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
 
 using ssize_t = Py_ssize_t;
-using size_t  = std::size_t;
+using size_t = std::size_t;
 
 template <typename IntType>
 inline ssize_t ssize_t_cast(const IntType &val) {
@@ -452,7 +502,7 @@ enum class return_value_policy : uint8_t {
 
     /** Reference an existing object (i.e. do not create a new copy) and take
         ownership. Python will call the destructor and delete operator when the
-        object’s reference count reaches zero. Undefined behavior ensues when
+        object's reference count reaches zero. Undefined behavior ensues when
         the C++ side does the same.. */
     take_ownership,
 
@@ -468,7 +518,7 @@ enum class return_value_policy : uint8_t {
     move,
 
     /** Reference an existing object, but do not take ownership. The C++ side
-        is responsible for managing the object’s lifetime and deallocating it
+        is responsible for managing the object's lifetime and deallocating it
         when it is no longer used. Warning: undefined behavior will ensue when
         the C++ side deletes an object that is still referenced and used by
         Python. */
@@ -477,7 +527,7 @@ enum class return_value_policy : uint8_t {
     /** This policy only applies to methods and properties. It references the
         object without taking ownership similar to the above
         return_value_policy::reference policy. In contrast to that policy, the
-        function or property’s implicit this argument (called the parent) is
+        function or property's implicit this argument (called the parent) is
         considered to be the the owner of the return value (the child).
         pybind11 then couples the lifetime of the parent to the child via a
         reference relationship that ensures that the parent cannot be garbage
@@ -489,10 +539,14 @@ enum class return_value_policy : uint8_t {
 
 PYBIND11_NAMESPACE_BEGIN(detail)
 
-inline static constexpr int log2(size_t n, int k = 0) { return (n <= 1) ? k : log2(n >> 1, k + 1); }
+inline static constexpr int log2(size_t n, int k = 0) {
+    return (n <= 1) ? k : log2(n >> 1, k + 1);
+}
 
 // Returns the size as a multiple of sizeof(void *), rounded up.
-inline static constexpr size_t size_in_ptrs(size_t s) { return 1 + ((s - 1) >> log2(sizeof(void *))); }
+inline static constexpr size_t size_in_ptrs(size_t s) {
+    return 1 + ((s - 1) >> log2(sizeof(void *)));
+}
 
 /**
  * The space to allocate for simple layout instance holders (see below) in multiple of the size of
@@ -502,7 +556,7 @@ inline static constexpr size_t size_in_ptrs(size_t s) { return 1 + ((s - 1) >> l
  */
 constexpr size_t instance_simple_holder_in_ptrs() {
     static_assert(sizeof(std::shared_ptr<int>) >= sizeof(std::unique_ptr<int>),
-            "pybind assumes std::shared_ptrs are at least as big as std::unique_ptrs");
+                  "pybind assumes std::shared_ptrs are at least as big as std::unique_ptrs");
     return size_in_ptrs(sizeof(std::shared_ptr<int>));
 }
 
@@ -530,21 +584,21 @@ struct instance {
     /**
      * An instance has two possible value/holder layouts.
      *
-     * Simple layout (when this flag is true), means the `simple_value_holder` is set with a pointer
-     * and the holder object governing that pointer, i.e. [val1*][holder].  This layout is applied
-     * whenever there is no python-side multiple inheritance of bound C++ types *and* the type's
-     * holder will fit in the default space (which is large enough to hold either a std::unique_ptr
-     * or std::shared_ptr).
+     * Simple layout (when this flag is true), means the `simple_value_holder` is set with a
+     * pointer and the holder object governing that pointer, i.e. [val1*][holder].  This layout is
+     * applied whenever there is no python-side multiple inheritance of bound C++ types *and* the
+     * type's holder will fit in the default space (which is large enough to hold either a
+     * std::unique_ptr or std::shared_ptr).
      *
-     * Non-simple layout applies when using custom holders that require more space than `shared_ptr`
-     * (which is typically the size of two pointers), or when multiple inheritance is used on the
-     * python side.  Non-simple layout allocates the required amount of memory to have multiple
-     * bound C++ classes as parents.  Under this layout, `nonsimple.values_and_holders` is set to a
-     * pointer to allocated space of the required space to hold a sequence of value pointers and
-     * holders followed `status`, a set of bit flags (1 byte each), i.e.
-     * [val1*][holder1][val2*][holder2]...[bb...]  where each [block] is rounded up to a multiple of
-     * `sizeof(void *)`.  `nonsimple.status` is, for convenience, a pointer to the
-     * beginning of the [bb...] block (but not independently allocated).
+     * Non-simple layout applies when using custom holders that require more space than
+     * `shared_ptr` (which is typically the size of two pointers), or when multiple inheritance is
+     * used on the python side.  Non-simple layout allocates the required amount of memory to have
+     * multiple bound C++ classes as parents.  Under this layout, `nonsimple.values_and_holders` is
+     * set to a pointer to allocated space of the required space to hold a sequence of value
+     * pointers and holders followed `status`, a set of bit flags (1 byte each), i.e.
+     * [val1*][holder1][val2*][holder2]...[bb...]  where each [block] is rounded up to a multiple
+     * of `sizeof(void *)`.  `nonsimple.status` is, for convenience, a pointer to the beginning of
+     * the [bb...] block (but not independently allocated).
      *
      * Status bits indicate whether the associated holder is constructed (&
      * status_holder_constructed) and whether the value pointer is registered (&
@@ -558,7 +612,8 @@ struct instance {
     /// If true, get_internals().patients has an entry for this object
     bool has_patients : 1;
 
-    /// Initializes all of the above type/values/holders data (but not the instance values themselves)
+    /// Initializes all of the above type/values/holders data (but not the instance values
+    /// themselves)
     void allocate_layout();
 
     /// Destroys/deallocates all of the above
@@ -567,26 +622,44 @@ struct instance {
     /// Returns the value_and_holder wrapper for the given type (or the first, if `find_type`
     /// omitted).  Returns a default-constructed (with `.inst = nullptr`) object on failure if
     /// `throw_if_missing` is false.
-    value_and_holder get_value_and_holder(const type_info *find_type = nullptr, bool throw_if_missing = true);
+    value_and_holder get_value_and_holder(const type_info *find_type = nullptr,
+                                          bool throw_if_missing = true);
 
     /// Bit values for the non-simple status flags
-    static constexpr uint8_t status_holder_constructed  = 1;
+    static constexpr uint8_t status_holder_constructed = 1;
     static constexpr uint8_t status_instance_registered = 2;
 };
 
-static_assert(std::is_standard_layout<instance>::value, "Internal error: `pybind11::detail::instance` is not standard layout!");
+static_assert(std::is_standard_layout<instance>::value,
+              "Internal error: `pybind11::detail::instance` is not standard layout!");
 
 /// from __cpp_future__ import (convenient aliases from C++14/17)
-#if defined(PYBIND11_CPP14) && (!defined(_MSC_VER) || _MSC_VER >= 1910)
-using std::enable_if_t;
+#if defined(PYBIND11_CPP14)
 using std::conditional_t;
+using std::enable_if_t;
 using std::remove_cv_t;
 using std::remove_reference_t;
 #else
-template <bool B, typename T = void> using enable_if_t = typename std::enable_if<B, T>::type;
-template <bool B, typename T, typename F> using conditional_t = typename std::conditional<B, T, F>::type;
-template <typename T> using remove_cv_t = typename std::remove_cv<T>::type;
-template <typename T> using remove_reference_t = typename std::remove_reference<T>::type;
+template <bool B, typename T = void>
+using enable_if_t = typename std::enable_if<B, T>::type;
+template <bool B, typename T, typename F>
+using conditional_t = typename std::conditional<B, T, F>::type;
+template <typename T>
+using remove_cv_t = typename std::remove_cv<T>::type;
+template <typename T>
+using remove_reference_t = typename std::remove_reference<T>::type;
+#endif
+
+#if defined(PYBIND11_CPP20)
+using std::remove_cvref;
+using std::remove_cvref_t;
+#else
+template <class T>
+struct remove_cvref {
+    using type = remove_cv_t<remove_reference_t<T>>;
+};
+template <class T>
+using remove_cvref_t = typename remove_cvref<T>::type;
 #endif
 
 /// Index sequences
@@ -594,114 +667,189 @@ template <typename T> using remove_reference_t = typename std::remove_reference<
 using std::index_sequence;
 using std::make_index_sequence;
 #else
-template<size_t ...> struct index_sequence  { };
-template<size_t N, size_t ...S> struct make_index_sequence_impl : make_index_sequence_impl <N - 1, N - 1, S...> { };
-template<size_t ...S> struct make_index_sequence_impl <0, S...> { using type = index_sequence<S...>; };
-template<size_t N> using make_index_sequence = typename make_index_sequence_impl<N>::type;
+template <size_t...>
+struct index_sequence {};
+template <size_t N, size_t... S>
+struct make_index_sequence_impl : make_index_sequence_impl<N - 1, N - 1, S...> {};
+template <size_t... S>
+struct make_index_sequence_impl<0, S...> {
+    using type = index_sequence<S...>;
+};
+template <size_t N>
+using make_index_sequence = typename make_index_sequence_impl<N>::type;
 #endif
 
 /// Make an index sequence of the indices of true arguments
-template <typename ISeq, size_t, bool...> struct select_indices_impl { using type = ISeq; };
-template <size_t... IPrev, size_t I, bool B, bool... Bs> struct select_indices_impl<index_sequence<IPrev...>, I, B, Bs...>
-    : select_indices_impl<conditional_t<B, index_sequence<IPrev..., I>, index_sequence<IPrev...>>, I + 1, Bs...> {};
-template <bool... Bs> using select_indices = typename select_indices_impl<index_sequence<>, 0, Bs...>::type;
+template <typename ISeq, size_t, bool...>
+struct select_indices_impl {
+    using type = ISeq;
+};
+template <size_t... IPrev, size_t I, bool B, bool... Bs>
+struct select_indices_impl<index_sequence<IPrev...>, I, B, Bs...>
+    : select_indices_impl<conditional_t<B, index_sequence<IPrev..., I>, index_sequence<IPrev...>>,
+                          I + 1,
+                          Bs...> {};
+template <bool... Bs>
+using select_indices = typename select_indices_impl<index_sequence<>, 0, Bs...>::type;
 
 /// Backports of std::bool_constant and std::negation to accommodate older compilers
-template <bool B> using bool_constant = std::integral_constant<bool, B>;
-template <typename T> struct negation : bool_constant<!T::value> { };
+template <bool B>
+using bool_constant = std::integral_constant<bool, B>;
+template <typename T>
+struct negation : bool_constant<!T::value> {};
 
 // PGI/Intel cannot detect operator delete with the "compatible" void_t impl, so
 // using the new one (C++14 defect, so generally works on newer compilers, even
 // if not in C++17 mode)
 #if defined(__PGIC__) || defined(__INTEL_COMPILER)
-template<typename... > using void_t = void;
+template <typename...>
+using void_t = void;
 #else
-template <typename...> struct void_t_impl { using type = void; };
-template <typename... Ts> using void_t = typename void_t_impl<Ts...>::type;
+template <typename...>
+struct void_t_impl {
+    using type = void;
+};
+template <typename... Ts>
+using void_t = typename void_t_impl<Ts...>::type;
 #endif
 
-
 /// Compile-time all/any/none of that check the boolean value of all template types
 #if defined(__cpp_fold_expressions) && !(defined(_MSC_VER) && (_MSC_VER < 1916))
-template <class... Ts> using all_of = bool_constant<(Ts::value && ...)>;
-template <class... Ts> using any_of = bool_constant<(Ts::value || ...)>;
+template <class... Ts>
+using all_of = bool_constant<(Ts::value && ...)>;
+template <class... Ts>
+using any_of = bool_constant<(Ts::value || ...)>;
 #elif !defined(_MSC_VER)
-template <bool...> struct bools {};
-template <class... Ts> using all_of = std::is_same<
-    bools<Ts::value..., true>,
-    bools<true, Ts::value...>>;
-template <class... Ts> using any_of = negation<all_of<negation<Ts>...>>;
+template <bool...>
+struct bools {};
+template <class... Ts>
+using all_of = std::is_same<bools<Ts::value..., true>, bools<true, Ts::value...>>;
+template <class... Ts>
+using any_of = negation<all_of<negation<Ts>...>>;
 #else
 // MSVC has trouble with the above, but supports std::conjunction, which we can use instead (albeit
 // at a slight loss of compilation efficiency).
-template <class... Ts> using all_of = std::conjunction<Ts...>;
-template <class... Ts> using any_of = std::disjunction<Ts...>;
+template <class... Ts>
+using all_of = std::conjunction<Ts...>;
+template <class... Ts>
+using any_of = std::disjunction<Ts...>;
 #endif
-template <class... Ts> using none_of = negation<any_of<Ts...>>;
+template <class... Ts>
+using none_of = negation<any_of<Ts...>>;
 
-template <class T, template<class> class... Predicates> using satisfies_all_of = all_of<Predicates<T>...>;
-template <class T, template<class> class... Predicates> using satisfies_any_of = any_of<Predicates<T>...>;
-template <class T, template<class> class... Predicates> using satisfies_none_of = none_of<Predicates<T>...>;
+template <class T, template <class> class... Predicates>
+using satisfies_all_of = all_of<Predicates<T>...>;
+template <class T, template <class> class... Predicates>
+using satisfies_any_of = any_of<Predicates<T>...>;
+template <class T, template <class> class... Predicates>
+using satisfies_none_of = none_of<Predicates<T>...>;
 
 /// Strip the class from a method type
-template <typename T> struct remove_class { };
-template <typename C, typename R, typename... A> struct remove_class<R (C::*)(A...)> { using type = R (A...); };
-template <typename C, typename R, typename... A> struct remove_class<R (C::*)(A...) const> { using type = R (A...); };
+template <typename T>
+struct remove_class {};
+template <typename C, typename R, typename... A>
+struct remove_class<R (C::*)(A...)> {
+    using type = R(A...);
+};
+template <typename C, typename R, typename... A>
+struct remove_class<R (C::*)(A...) const> {
+    using type = R(A...);
+};
 
 /// Helper template to strip away type modifiers
-template <typename T> struct intrinsic_type                       { using type = T; };
-template <typename T> struct intrinsic_type<const T>              { using type = typename intrinsic_type<T>::type; };
-template <typename T> struct intrinsic_type<T*>                   { using type = typename intrinsic_type<T>::type; };
-template <typename T> struct intrinsic_type<T&>                   { using type = typename intrinsic_type<T>::type; };
-template <typename T> struct intrinsic_type<T&&>                  { using type = typename intrinsic_type<T>::type; };
-template <typename T, size_t N> struct intrinsic_type<const T[N]> { using type = typename intrinsic_type<T>::type; };
-template <typename T, size_t N> struct intrinsic_type<T[N]>       { using type = typename intrinsic_type<T>::type; };
-template <typename T> using intrinsic_t = typename intrinsic_type<T>::type;
+template <typename T>
+struct intrinsic_type {
+    using type = T;
+};
+template <typename T>
+struct intrinsic_type<const T> {
+    using type = typename intrinsic_type<T>::type;
+};
+template <typename T>
+struct intrinsic_type<T *> {
+    using type = typename intrinsic_type<T>::type;
+};
+template <typename T>
+struct intrinsic_type<T &> {
+    using type = typename intrinsic_type<T>::type;
+};
+template <typename T>
+struct intrinsic_type<T &&> {
+    using type = typename intrinsic_type<T>::type;
+};
+template <typename T, size_t N>
+struct intrinsic_type<const T[N]> {
+    using type = typename intrinsic_type<T>::type;
+};
+template <typename T, size_t N>
+struct intrinsic_type<T[N]> {
+    using type = typename intrinsic_type<T>::type;
+};
+template <typename T>
+using intrinsic_t = typename intrinsic_type<T>::type;
 
 /// Helper type to replace 'void' in some expressions
-struct void_type { };
+struct void_type {};
 
 /// Helper template which holds a list of types
-template <typename...> struct type_list { };
+template <typename...>
+struct type_list {};
 
 /// Compile-time integer sum
 #ifdef __cpp_fold_expressions
-template <typename... Ts> constexpr size_t constexpr_sum(Ts... ns) { return (0 + ... + size_t{ns}); }
+template <typename... Ts>
+constexpr size_t constexpr_sum(Ts... ns) {
+    return (0 + ... + size_t{ns});
+}
 #else
 constexpr size_t constexpr_sum() { return 0; }
 template <typename T, typename... Ts>
-constexpr size_t constexpr_sum(T n, Ts... ns) { return size_t{n} + constexpr_sum(ns...); }
+constexpr size_t constexpr_sum(T n, Ts... ns) {
+    return size_t{n} + constexpr_sum(ns...);
+}
 #endif
 
 PYBIND11_NAMESPACE_BEGIN(constexpr_impl)
 /// Implementation details for constexpr functions
 constexpr int first(int i) { return i; }
 template <typename T, typename... Ts>
-constexpr int first(int i, T v, Ts... vs) { return v ? i : first(i + 1, vs...); }
+constexpr int first(int i, T v, Ts... vs) {
+    return v ? i : first(i + 1, vs...);
+}
 
 constexpr int last(int /*i*/, int result) { return result; }
 template <typename T, typename... Ts>
-constexpr int last(int i, int result, T v, Ts... vs) { return last(i + 1, v ? i : result, vs...); }
+constexpr int last(int i, int result, T v, Ts... vs) {
+    return last(i + 1, v ? i : result, vs...);
+}
 PYBIND11_NAMESPACE_END(constexpr_impl)
 
-/// Return the index of the first type in Ts which satisfies Predicate<T>.  Returns sizeof...(Ts) if
-/// none match.
-template <template<typename> class Predicate, typename... Ts>
-constexpr int constexpr_first() { return constexpr_impl::first(0, Predicate<Ts>::value...); }
+/// Return the index of the first type in Ts which satisfies Predicate<T>.
+/// Returns sizeof...(Ts) if none match.
+template <template <typename> class Predicate, typename... Ts>
+constexpr int constexpr_first() {
+    return constexpr_impl::first(0, Predicate<Ts>::value...);
+}
 
 /// Return the index of the last type in Ts which satisfies Predicate<T>, or -1 if none match.
-template <template<typename> class Predicate, typename... Ts>
-constexpr int constexpr_last() { return constexpr_impl::last(0, -1, Predicate<Ts>::value...); }
+template <template <typename> class Predicate, typename... Ts>
+constexpr int constexpr_last() {
+    return constexpr_impl::last(0, -1, Predicate<Ts>::value...);
+}
 
 /// Return the Nth element from the parameter pack
 template <size_t N, typename T, typename... Ts>
-struct pack_element { using type = typename pack_element<N - 1, Ts...>::type; };
+struct pack_element {
+    using type = typename pack_element<N - 1, Ts...>::type;
+};
 template <typename T, typename... Ts>
-struct pack_element<0, T, Ts...> { using type = T; };
+struct pack_element<0, T, Ts...> {
+    using type = T;
+};
 
 /// Return the one and only type which matches the predicate, or Default if none match.
 /// If more than one type matches the predicate, fail at compile-time.
-template <template<typename> class Predicate, typename Default, typename... Ts>
+template <template <typename> class Predicate, typename Default, typename... Ts>
 struct exactly_one {
     static constexpr auto found = constexpr_sum(Predicate<Ts>::value...);
     static_assert(found <= 1, "Found more than one type matching the predicate");
@@ -709,62 +857,83 @@ struct exactly_one {
     static constexpr auto index = found ? constexpr_first<Predicate, Ts...>() : 0;
     using type = conditional_t<found, typename pack_element<index, Ts...>::type, Default>;
 };
-template <template<typename> class P, typename Default>
-struct exactly_one<P, Default> { using type = Default; };
+template <template <typename> class P, typename Default>
+struct exactly_one<P, Default> {
+    using type = Default;
+};
 
-template <template<typename> class Predicate, typename Default, typename... Ts>
+template <template <typename> class Predicate, typename Default, typename... Ts>
 using exactly_one_t = typename exactly_one<Predicate, Default, Ts...>::type;
 
 /// Defer the evaluation of type T until types Us are instantiated
-template <typename T, typename... /*Us*/> struct deferred_type { using type = T; };
-template <typename T, typename... Us> using deferred_t = typename deferred_type<T, Us...>::type;
+template <typename T, typename... /*Us*/>
+struct deferred_type {
+    using type = T;
+};
+template <typename T, typename... Us>
+using deferred_t = typename deferred_type<T, Us...>::type;
 
 /// Like is_base_of, but requires a strict base (i.e. `is_strict_base_of<T, T>::value == false`,
 /// unlike `std::is_base_of`)
-template <typename Base, typename Derived> using is_strict_base_of = bool_constant<
-    std::is_base_of<Base, Derived>::value && !std::is_same<Base, Derived>::value>;
+template <typename Base, typename Derived>
+using is_strict_base_of
+    = bool_constant<std::is_base_of<Base, Derived>::value && !std::is_same<Base, Derived>::value>;
 
-/// Like is_base_of, but also requires that the base type is accessible (i.e. that a Derived pointer
-/// can be converted to a Base pointer)
-/// For unions, `is_base_of<T, T>::value` is False, so we need to check `is_same` as well.
-template <typename Base, typename Derived> using is_accessible_base_of = bool_constant<
-    (std::is_same<Base, Derived>::value || std::is_base_of<Base, Derived>::value) && std::is_convertible<Derived *, Base *>::value>;
+/// Like is_base_of, but also requires that the base type is accessible (i.e. that a Derived
+/// pointer can be converted to a Base pointer) For unions, `is_base_of<T, T>::value` is False, so
+/// we need to check `is_same` as well.
+template <typename Base, typename Derived>
+using is_accessible_base_of
+    = bool_constant<(std::is_same<Base, Derived>::value || std::is_base_of<Base, Derived>::value)
+                    && std::is_convertible<Derived *, Base *>::value>;
 
-template <template<typename...> class Base>
+template <template <typename...> class Base>
 struct is_template_base_of_impl {
-    template <typename... Us> static std::true_type check(Base<Us...> *);
+    template <typename... Us>
+    static std::true_type check(Base<Us...> *);
     static std::false_type check(...);
 };
 
 /// Check if a template is the base of a type. For example:
 /// `is_template_base_of<Base, T>` is true if `struct T : Base<U> {}` where U can be anything
-template <template<typename...> class Base, typename T>
+template <template <typename...> class Base, typename T>
+// Sadly, all MSVC versions incl. 2022 need the workaround, even in C++20 mode.
+// See also: https://github.com/pybind/pybind11/pull/3741
 #if !defined(_MSC_VER)
-using is_template_base_of = decltype(is_template_base_of_impl<Base>::check((intrinsic_t<T>*)nullptr));
-#else // MSVC2015 has trouble with decltype in template aliases
-struct is_template_base_of : decltype(is_template_base_of_impl<Base>::check((intrinsic_t<T>*)nullptr)) { };
+using is_template_base_of
+    = decltype(is_template_base_of_impl<Base>::check((intrinsic_t<T> *) nullptr));
+#else
+struct is_template_base_of
+    : decltype(is_template_base_of_impl<Base>::check((intrinsic_t<T> *) nullptr)) {
+};
 #endif
 
 /// Check if T is an instantiation of the template `Class`. For example:
 /// `is_instantiation<shared_ptr, T>` is true if `T == shared_ptr<U>` where U can be anything.
-template <template<typename...> class Class, typename T>
-struct is_instantiation : std::false_type { };
-template <template<typename...> class Class, typename... Us>
-struct is_instantiation<Class, Class<Us...>> : std::true_type { };
+template <template <typename...> class Class, typename T>
+struct is_instantiation : std::false_type {};
+template <template <typename...> class Class, typename... Us>
+struct is_instantiation<Class, Class<Us...>> : std::true_type {};
 
 /// Check if T is std::shared_ptr<U> where U can be anything
-template <typename T> using is_shared_ptr = is_instantiation<std::shared_ptr, T>;
+template <typename T>
+using is_shared_ptr = is_instantiation<std::shared_ptr, T>;
 
 /// Check if T looks like an input iterator
-template <typename T, typename = void> struct is_input_iterator : std::false_type {};
+template <typename T, typename = void>
+struct is_input_iterator : std::false_type {};
 template <typename T>
-struct is_input_iterator<T, void_t<decltype(*std::declval<T &>()), decltype(++std::declval<T &>())>>
+struct is_input_iterator<T,
+                         void_t<decltype(*std::declval<T &>()), decltype(++std::declval<T &>())>>
     : std::true_type {};
 
-template <typename T> using is_function_pointer = bool_constant<
-    std::is_pointer<T>::value && std::is_function<typename std::remove_pointer<T>::type>::value>;
+template <typename T>
+using is_function_pointer
+    = bool_constant<std::is_pointer<T>::value
+                    && std::is_function<typename std::remove_pointer<T>::type>::value>;
 
-template <typename F> struct strip_function_object {
+template <typename F>
+struct strip_function_object {
     // If you are encountering an
     // 'error: name followed by "::" must be a class or namespace name'
     // with the Intel compiler and a noexcept function here,
@@ -777,35 +946,32 @@ template <typename Function, typename F = remove_reference_t<Function>>
 using function_signature_t = conditional_t<
     std::is_function<F>::value,
     F,
-    typename conditional_t<
-        std::is_pointer<F>::value || std::is_member_pointer<F>::value,
-        std::remove_pointer<F>,
-        strip_function_object<F>
-    >::type
->;
+    typename conditional_t<std::is_pointer<F>::value || std::is_member_pointer<F>::value,
+                           std::remove_pointer<F>,
+                           strip_function_object<F>>::type>;
 
 /// Returns true if the type looks like a lambda: that is, isn't a function, pointer or member
 /// pointer.  Note that this can catch all sorts of other things, too; this is intended to be used
 /// in a place where passing a lambda makes sense.
-template <typename T> using is_lambda = satisfies_none_of<remove_reference_t<T>,
-        std::is_function, std::is_pointer, std::is_member_pointer>;
+template <typename T>
+using is_lambda = satisfies_none_of<remove_reference_t<T>,
+                                    std::is_function,
+                                    std::is_pointer,
+                                    std::is_member_pointer>;
 
 // [workaround(intel)] Internal error on fold expression
 /// Apply a function over each element of a parameter pack
 #if defined(__cpp_fold_expressions) && !defined(__INTEL_COMPILER)
 // Intel compiler produces an internal error on this fold expression (tested with ICC 19.0.2)
-#define PYBIND11_EXPAND_SIDE_EFFECTS(PATTERN) (((PATTERN), void()), ...)
+#    define PYBIND11_EXPAND_SIDE_EFFECTS(PATTERN) (((PATTERN), void()), ...)
 #else
 using expand_side_effects = bool[];
-#define PYBIND11_EXPAND_SIDE_EFFECTS(PATTERN) (void)pybind11::detail::expand_side_effects{ ((PATTERN), void(), false)..., false }
+#    define PYBIND11_EXPAND_SIDE_EFFECTS(PATTERN)                                                 \
+        (void) pybind11::detail::expand_side_effects { ((PATTERN), void(), false)..., false }
 #endif
 
 PYBIND11_NAMESPACE_END(detail)
 
-#if defined(_MSC_VER)
-#  pragma warning(push)
-#  pragma warning(disable: 4275) // warning C4275: An exported class was derived from a class that wasn't exported. Can be ignored when derived from a STL class.
-#endif
 /// C++ bindings of builtin Python exceptions
 class PYBIND11_EXPORT_EXCEPTION builtin_exception : public std::runtime_error {
 public:
@@ -813,15 +979,13 @@ public:
     /// Set the error using the Python C API
     virtual void set_error() const = 0;
 };
-#if defined(_MSC_VER)
-#  pragma warning(pop)
-#endif
 
-#define PYBIND11_RUNTIME_EXCEPTION(name, type) \
-    class PYBIND11_EXPORT_EXCEPTION name : public builtin_exception { public: \
-        using builtin_exception::builtin_exception; \
-        name() : name("") { } \
-        void set_error() const override { PyErr_SetString(type, what()); } \
+#define PYBIND11_RUNTIME_EXCEPTION(name, type)                                                    \
+    class PYBIND11_EXPORT_EXCEPTION name : public builtin_exception {                             \
+    public:                                                                                       \
+        using builtin_exception::builtin_exception;                                               \
+        name() : name("") {}                                                                      \
+        void set_error() const override { PyErr_SetString(type, what()); }                        \
     };
 
 PYBIND11_RUNTIME_EXCEPTION(stop_iteration, PyExc_StopIteration)
@@ -832,13 +996,22 @@ PYBIND11_RUNTIME_EXCEPTION(type_error, PyExc_TypeError)
 PYBIND11_RUNTIME_EXCEPTION(buffer_error, PyExc_BufferError)
 PYBIND11_RUNTIME_EXCEPTION(import_error, PyExc_ImportError)
 PYBIND11_RUNTIME_EXCEPTION(attribute_error, PyExc_AttributeError)
-PYBIND11_RUNTIME_EXCEPTION(cast_error, PyExc_RuntimeError) /// Thrown when pybind11::cast or handle::call fail due to a type casting error
+PYBIND11_RUNTIME_EXCEPTION(cast_error, PyExc_RuntimeError) /// Thrown when pybind11::cast or
+                                                           /// handle::call fail due to a type
+                                                           /// casting error
 PYBIND11_RUNTIME_EXCEPTION(reference_cast_error, PyExc_RuntimeError) /// Used internally
 
-[[noreturn]] PYBIND11_NOINLINE void pybind11_fail(const char *reason) { throw std::runtime_error(reason); }
-[[noreturn]] PYBIND11_NOINLINE void pybind11_fail(const std::string &reason) { throw std::runtime_error(reason); }
+[[noreturn]] PYBIND11_NOINLINE void pybind11_fail(const char *reason) {
+    assert(!PyErr_Occurred());
+    throw std::runtime_error(reason);
+}
+[[noreturn]] PYBIND11_NOINLINE void pybind11_fail(const std::string &reason) {
+    assert(!PyErr_Occurred());
+    throw std::runtime_error(reason);
+}
 
-template <typename T, typename SFINAE = void> struct format_descriptor { };
+template <typename T, typename SFINAE = void>
+struct format_descriptor {};
 
 PYBIND11_NAMESPACE_BEGIN(detail)
 // Returns the index of the given type in the type char array below, and in the list in numpy.h
@@ -846,25 +1019,38 @@ PYBIND11_NAMESPACE_BEGIN(detail)
 // complex float,double,long double.  Note that the long double types only participate when long
 // double is actually longer than double (it isn't under MSVC).
 // NB: not only the string below but also complex.h and numpy.h rely on this order.
-template <typename T, typename SFINAE = void> struct is_fmt_numeric { static constexpr bool value = false; };
-template <typename T> struct is_fmt_numeric<T, enable_if_t<std::is_arithmetic<T>::value>> {
+template <typename T, typename SFINAE = void>
+struct is_fmt_numeric {
+    static constexpr bool value = false;
+};
+template <typename T>
+struct is_fmt_numeric<T, enable_if_t<std::is_arithmetic<T>::value>> {
     static constexpr bool value = true;
-    static constexpr int index = std::is_same<T, bool>::value ? 0 : 1 + (
-        std::is_integral<T>::value ? detail::log2(sizeof(T))*2 + std::is_unsigned<T>::value : 8 + (
-        std::is_same<T, double>::value ? 1 : std::is_same<T, long double>::value ? 2 : 0));
+    static constexpr int index
+        = std::is_same<T, bool>::value
+              ? 0
+              : 1
+                    + (std::is_integral<T>::value
+                           ? detail::log2(sizeof(T)) * 2 + std::is_unsigned<T>::value
+                           : 8
+                                 + (std::is_same<T, double>::value        ? 1
+                                    : std::is_same<T, long double>::value ? 2
+                                                                          : 0));
 };
 PYBIND11_NAMESPACE_END(detail)
 
-template <typename T> struct format_descriptor<T, detail::enable_if_t<std::is_arithmetic<T>::value>> {
+template <typename T>
+struct format_descriptor<T, detail::enable_if_t<std::is_arithmetic<T>::value>> {
     static constexpr const char c = "?bBhHiIqQfdg"[detail::is_fmt_numeric<T>::index];
-    static constexpr const char value[2] = { c, '\0' };
+    static constexpr const char value[2] = {c, '\0'};
     static std::string format() { return std::string(1, c); }
 };
 
 #if !defined(PYBIND11_CPP17)
 
-template <typename T> constexpr const char format_descriptor<
-    T, detail::enable_if_t<std::is_arithmetic<T>::value>>::value[2];
+template <typename T>
+constexpr const char
+    format_descriptor<T, detail::enable_if_t<std::is_arithmetic<T>::value>>::value[2];
 
 #endif
 
@@ -872,41 +1058,47 @@ template <typename T> constexpr const char format_descriptor<
 struct error_scope {
     PyObject *type, *value, *trace;
     error_scope() { PyErr_Fetch(&type, &value, &trace); }
+    error_scope(const error_scope &) = delete;
+    error_scope &operator=(const error_scope &) = delete;
     ~error_scope() { PyErr_Restore(type, value, trace); }
 };
 
 /// Dummy destructor wrapper that can be used to expose classes with a private destructor
-struct nodelete { template <typename T> void operator()(T*) { } };
+struct nodelete {
+    template <typename T>
+    void operator()(T *) {}
+};
 
 PYBIND11_NAMESPACE_BEGIN(detail)
 template <typename... Args>
 struct overload_cast_impl {
-    // NOLINTNEXTLINE(modernize-use-equals-default):  MSVC 2015 needs this
-    constexpr overload_cast_impl() {}
-
     template <typename Return>
-    constexpr auto operator()(Return (*pf)(Args...)) const noexcept
-                              -> decltype(pf) { return pf; }
+    constexpr auto operator()(Return (*pf)(Args...)) const noexcept -> decltype(pf) {
+        return pf;
+    }
 
     template <typename Return, typename Class>
     constexpr auto operator()(Return (Class::*pmf)(Args...), std::false_type = {}) const noexcept
-                              -> decltype(pmf) { return pmf; }
+        -> decltype(pmf) {
+        return pmf;
+    }
 
     template <typename Return, typename Class>
     constexpr auto operator()(Return (Class::*pmf)(Args...) const, std::true_type) const noexcept
-                              -> decltype(pmf) { return pmf; }
+        -> decltype(pmf) {
+        return pmf;
+    }
 };
 PYBIND11_NAMESPACE_END(detail)
 
 // overload_cast requires variable templates: C++14
 #if defined(PYBIND11_CPP14)
-#define PYBIND11_OVERLOAD_CAST 1
+#    define PYBIND11_OVERLOAD_CAST 1
 /// Syntax sugar for resolving overloaded function pointers:
 ///  - regular: static_cast<Return (Class::*)(Arg0, Arg1, Arg2)>(&Class::func)
 ///  - sweet:   overload_cast<Arg0, Arg1, Arg2>(&Class::func)
 template <typename... Args>
-static constexpr detail::overload_cast_impl<Args...> overload_cast = {};
-// MSVC 2015 only accepts this particular initialization syntax for this variable template.
+static constexpr detail::overload_cast_impl<Args...> overload_cast{};
 #endif
 
 /// Const member function selector for overload_cast
@@ -915,7 +1107,8 @@ static constexpr detail::overload_cast_impl<Args...> overload_cast = {};
 static constexpr auto const_ = std::true_type{};
 
 #if !defined(PYBIND11_CPP14) // no overload_cast: providing something that static_assert-fails:
-template <typename... Args> struct overload_cast {
+template <typename... Args>
+struct overload_cast {
     static_assert(detail::deferred_t<std::false_type, Args...>::value,
                   "pybind11::overload_cast<...> requires compiling in C++14 mode");
 };
@@ -929,26 +1122,31 @@ PYBIND11_NAMESPACE_BEGIN(detail)
 template <typename T>
 class any_container {
     std::vector<T> v;
+
 public:
     any_container() = default;
 
     // Can construct from a pair of iterators
     template <typename It, typename = enable_if_t<is_input_iterator<It>::value>>
-    any_container(It first, It last) : v(first, last) { }
+    any_container(It first, It last) : v(first, last) {}
 
-    // Implicit conversion constructor from any arbitrary container type with values convertible to T
-    template <typename Container, typename = enable_if_t<std::is_convertible<decltype(*std::begin(std::declval<const Container &>())), T>::value>>
+    // Implicit conversion constructor from any arbitrary container type
+    // with values convertible to T
+    template <typename Container,
+              typename = enable_if_t<
+                  std::is_convertible<decltype(*std::begin(std::declval<const Container &>())),
+                                      T>::value>>
     // NOLINTNEXTLINE(google-explicit-constructor)
-    any_container(const Container &c) : any_container(std::begin(c), std::end(c)) { }
+    any_container(const Container &c) : any_container(std::begin(c), std::end(c)) {}
 
-    // initializer_list's aren't deducible, so don't get matched by the above template; we need this
-    // to explicitly allow implicit conversion from one:
+    // initializer_list's aren't deducible, so don't get matched by the above template;
+    // we need this to explicitly allow implicit conversion from one:
     template <typename TIn, typename = enable_if_t<std::is_convertible<TIn, T>::value>>
-    any_container(const std::initializer_list<TIn> &c) : any_container(c.begin(), c.end()) { }
+    any_container(const std::initializer_list<TIn> &c) : any_container(c.begin(), c.end()) {}
 
     // Avoid copying if given an rvalue vector of the correct type.
     // NOLINTNEXTLINE(google-explicit-constructor)
-    any_container(std::vector<T> &&v) : v(std::move(v)) { }
+    any_container(std::vector<T> &&v) : v(std::move(v)) {}
 
     // Moves the vector out of an rvalue any_container
     // NOLINTNEXTLINE(google-explicit-constructor)
@@ -964,10 +1162,11 @@ public:
 };
 
 // Forward-declaration; see detail/class.h
-std::string get_fully_qualified_tp_name(PyTypeObject*);
+std::string get_fully_qualified_tp_name(PyTypeObject *);
 
 template <typename T>
-inline static std::shared_ptr<T> try_get_shared_from_this(std::enable_shared_from_this<T> *holder_value_ptr) {
+inline static std::shared_ptr<T>
+try_get_shared_from_this(std::enable_shared_from_this<T> *holder_value_ptr) {
 // Pre C++17, this code path exploits undefined behavior, but is known to work on many platforms.
 // Use at your own risk!
 // See also https://en.cppreference.com/w/cpp/memory/enable_shared_from_this, and in particular
@@ -977,8 +1176,7 @@ inline static std::shared_ptr<T> try_get_shared_from_this(std::enable_shared_fro
 #else
     try {
         return holder_value_ptr->shared_from_this();
-    }
-    catch (const std::bad_weak_ptr &) {
+    } catch (const std::bad_weak_ptr &) {
         return nullptr;
     }
 #endif
@@ -986,10 +1184,12 @@ inline static std::shared_ptr<T> try_get_shared_from_this(std::enable_shared_fro
 
 // For silencing "unused" compiler warnings in special situations.
 template <typename... Args>
-#if defined(_MSC_VER) && _MSC_VER >= 1910 && _MSC_VER < 1920 // MSVC 2017
+#if defined(_MSC_VER) && _MSC_VER < 1920 // MSVC 2017
 constexpr
 #endif
-inline void silence_unused_warnings(Args &&...) {}
+    inline void
+    silence_unused_warnings(Args &&...) {
+}
 
 // MSVC warning C4100: Unreferenced formal parameter
 #if defined(_MSC_VER) && _MSC_VER <= 1916
@@ -1001,21 +1201,35 @@ inline void silence_unused_warnings(Args &&...) {}
 
 // GCC -Wunused-but-set-parameter  All GCC versions (as of July 2021).
 #if defined(__GNUG__) && !defined(__clang__) && !defined(__INTEL_COMPILER)
-#    define PYBIND11_WORKAROUND_INCORRECT_GCC_UNUSED_BUT_SET_PARAMETER(...)                   \
+#    define PYBIND11_WORKAROUND_INCORRECT_GCC_UNUSED_BUT_SET_PARAMETER(...)                       \
         detail::silence_unused_warnings(__VA_ARGS__)
 #else
 #    define PYBIND11_WORKAROUND_INCORRECT_GCC_UNUSED_BUT_SET_PARAMETER(...)
 #endif
 
-#if defined(_MSC_VER) // All versions (as of July 2021).
+#if defined(__clang__)                                                                            \
+    && (defined(__apple_build_version__) /* AppleClang 13.0.0.13000029 was the only data point    \
+                                            available. */                                         \
+        || (__clang_major__ >= 7                                                                  \
+            && __clang_major__ <= 12) /* Clang 3, 5, 13, 14, 15 do not generate the warning. */   \
+    )
+#    define PYBIND11_DETECTED_CLANG_WITH_MISLEADING_CALL_STD_MOVE_EXPLICITLY_WARNING
+// Example:
+// tests/test_kwargs_and_defaults.cpp:46:68: error: local variable 'args' will be copied despite
+// being returned by name [-Werror,-Wreturn-std-move]
+//     m.def("args_function", [](py::args args) -> py::tuple { return args; });
+//                                                                    ^~~~
+// test_kwargs_and_defaults.cpp:46:68: note: call 'std::move' explicitly to avoid copying
+//     m.def("args_function", [](py::args args) -> py::tuple { return args; });
+//                                                                    ^~~~
+//                                                                    std::move(args)
+#endif
 
-// warning C4127: Conditional expression is constant
-constexpr inline bool silence_msvc_c4127(bool cond) { return cond; }
-
-#    define PYBIND11_SILENCE_MSVC_C4127(...) ::pybind11::detail::silence_msvc_c4127(__VA_ARGS__)
-
-#else
-#    define PYBIND11_SILENCE_MSVC_C4127(...) __VA_ARGS__
+// Pybind offers detailed error messages by default for all builts that are debug (through the
+// negation of ndebug). This can also be manually enabled by users, for any builds, through
+// defining PYBIND11_DETAILED_ERROR_MESSAGES.
+#if !defined(PYBIND11_DETAILED_ERROR_MESSAGES) && !defined(NDEBUG)
+#    define PYBIND11_DETAILED_ERROR_MESSAGES
 #endif
 
 PYBIND11_NAMESPACE_END(detail)
diff --git a/ext/pybind11/include/pybind11/detail/descr.h b/ext/pybind11/include/pybind11/detail/descr.h
index c62e541bda..e7a5e2c145 100644
--- a/ext/pybind11/include/pybind11/detail/descr.h
+++ b/ext/pybind11/include/pybind11/detail/descr.h
@@ -15,9 +15,9 @@ PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
 PYBIND11_NAMESPACE_BEGIN(detail)
 
 #if !defined(_MSC_VER)
-#  define PYBIND11_DESCR_CONSTEXPR static constexpr
+#    define PYBIND11_DESCR_CONSTEXPR static constexpr
 #else
-#  define PYBIND11_DESCR_CONSTEXPR const
+#    define PYBIND11_DESCR_CONSTEXPR const
 #endif
 
 /* Concatenate type signatures at compile time */
@@ -27,14 +27,14 @@ struct descr {
 
     constexpr descr() = default;
     // NOLINTNEXTLINE(google-explicit-constructor)
-    constexpr descr(char const (&s)[N+1]) : descr(s, make_index_sequence<N>()) { }
+    constexpr descr(char const (&s)[N + 1]) : descr(s, make_index_sequence<N>()) {}
 
     template <size_t... Is>
-    constexpr descr(char const (&s)[N+1], index_sequence<Is...>) : text{s[Is]..., '\0'} { }
+    constexpr descr(char const (&s)[N + 1], index_sequence<Is...>) : text{s[Is]..., '\0'} {}
 
     template <typename... Chars>
     // NOLINTNEXTLINE(google-explicit-constructor)
-    constexpr descr(char c, Chars... cs) : text{c, static_cast<char>(cs)..., '\0'} { }
+    constexpr descr(char c, Chars... cs) : text{c, static_cast<char>(cs)..., '\0'} {}
 
     static constexpr std::array<const std::type_info *, sizeof...(Ts) + 1> types() {
         return {{&typeid(Ts)..., nullptr}};
@@ -42,62 +42,116 @@ struct descr {
 };
 
 template <size_t N1, size_t N2, typename... Ts1, typename... Ts2, size_t... Is1, size_t... Is2>
-constexpr descr<N1 + N2, Ts1..., Ts2...> plus_impl(const descr<N1, Ts1...> &a, const descr<N2, Ts2...> &b,
-                                                   index_sequence<Is1...>, index_sequence<Is2...>) {
+constexpr descr<N1 + N2, Ts1..., Ts2...> plus_impl(const descr<N1, Ts1...> &a,
+                                                   const descr<N2, Ts2...> &b,
+                                                   index_sequence<Is1...>,
+                                                   index_sequence<Is2...>) {
     PYBIND11_WORKAROUND_INCORRECT_MSVC_C4100(b);
     return {a.text[Is1]..., b.text[Is2]...};
 }
 
 template <size_t N1, size_t N2, typename... Ts1, typename... Ts2>
-constexpr descr<N1 + N2, Ts1..., Ts2...> operator+(const descr<N1, Ts1...> &a, const descr<N2, Ts2...> &b) {
+constexpr descr<N1 + N2, Ts1..., Ts2...> operator+(const descr<N1, Ts1...> &a,
+                                                   const descr<N2, Ts2...> &b) {
     return plus_impl(a, b, make_index_sequence<N1>(), make_index_sequence<N2>());
 }
 
 template <size_t N>
-constexpr descr<N - 1> _(char const(&text)[N]) { return descr<N - 1>(text); }
-constexpr descr<0> _(char const(&)[1]) { return {}; }
+constexpr descr<N - 1> const_name(char const (&text)[N]) {
+    return descr<N - 1>(text);
+}
+constexpr descr<0> const_name(char const (&)[1]) { return {}; }
 
-template <size_t Rem, size_t... Digits> struct int_to_str : int_to_str<Rem/10, Rem%10, Digits...> { };
-template <size_t...Digits> struct int_to_str<0, Digits...> {
+template <size_t Rem, size_t... Digits>
+struct int_to_str : int_to_str<Rem / 10, Rem % 10, Digits...> {};
+template <size_t... Digits>
+struct int_to_str<0, Digits...> {
+    // WARNING: This only works with C++17 or higher.
     static constexpr auto digits = descr<sizeof...(Digits)>(('0' + Digits)...);
 };
 
 // Ternary description (like std::conditional)
 template <bool B, size_t N1, size_t N2>
-constexpr enable_if_t<B, descr<N1 - 1>> _(char const(&text1)[N1], char const(&)[N2]) {
-    return _(text1);
+constexpr enable_if_t<B, descr<N1 - 1>> const_name(char const (&text1)[N1], char const (&)[N2]) {
+    return const_name(text1);
 }
 template <bool B, size_t N1, size_t N2>
-constexpr enable_if_t<!B, descr<N2 - 1>> _(char const(&)[N1], char const(&text2)[N2]) {
-    return _(text2);
+constexpr enable_if_t<!B, descr<N2 - 1>> const_name(char const (&)[N1], char const (&text2)[N2]) {
+    return const_name(text2);
 }
 
 template <bool B, typename T1, typename T2>
-constexpr enable_if_t<B, T1> _(const T1 &d, const T2 &) { return d; }
+constexpr enable_if_t<B, T1> const_name(const T1 &d, const T2 &) {
+    return d;
+}
 template <bool B, typename T1, typename T2>
-constexpr enable_if_t<!B, T2> _(const T1 &, const T2 &d) { return d; }
+constexpr enable_if_t<!B, T2> const_name(const T1 &, const T2 &d) {
+    return d;
+}
 
 template <size_t Size>
-auto constexpr _() -> remove_cv_t<decltype(int_to_str<Size / 10, Size % 10>::digits)> {
+auto constexpr const_name() -> remove_cv_t<decltype(int_to_str<Size / 10, Size % 10>::digits)> {
     return int_to_str<Size / 10, Size % 10>::digits;
 }
 
-template <typename Type> constexpr descr<1, Type> _() { return {'%'}; }
+template <typename Type>
+constexpr descr<1, Type> const_name() {
+    return {'%'};
+}
+
+// If "_" is defined as a macro, py::detail::_ cannot be provided.
+// It is therefore best to use py::detail::const_name universally.
+// This block is for backward compatibility only.
+// (The const_name code is repeated to avoid introducing a "_" #define ourselves.)
+#ifndef _
+#    define PYBIND11_DETAIL_UNDERSCORE_BACKWARD_COMPATIBILITY
+template <size_t N>
+constexpr descr<N - 1> _(char const (&text)[N]) {
+    return const_name<N>(text);
+}
+template <bool B, size_t N1, size_t N2>
+constexpr enable_if_t<B, descr<N1 - 1>> _(char const (&text1)[N1], char const (&text2)[N2]) {
+    return const_name<B, N1, N2>(text1, text2);
+}
+template <bool B, size_t N1, size_t N2>
+constexpr enable_if_t<!B, descr<N2 - 1>> _(char const (&text1)[N1], char const (&text2)[N2]) {
+    return const_name<B, N1, N2>(text1, text2);
+}
+template <bool B, typename T1, typename T2>
+constexpr enable_if_t<B, T1> _(const T1 &d1, const T2 &d2) {
+    return const_name<B, T1, T2>(d1, d2);
+}
+template <bool B, typename T1, typename T2>
+constexpr enable_if_t<!B, T2> _(const T1 &d1, const T2 &d2) {
+    return const_name<B, T1, T2>(d1, d2);
+}
+
+template <size_t Size>
+auto constexpr _() -> remove_cv_t<decltype(int_to_str<Size / 10, Size % 10>::digits)> {
+    return const_name<Size>();
+}
+template <typename Type>
+constexpr descr<1, Type> _() {
+    return const_name<Type>();
+}
+#endif // #ifndef _
 
 constexpr descr<0> concat() { return {}; }
 
 template <size_t N, typename... Ts>
-constexpr descr<N, Ts...> concat(const descr<N, Ts...> &descr) { return descr; }
+constexpr descr<N, Ts...> concat(const descr<N, Ts...> &descr) {
+    return descr;
+}
 
 template <size_t N, typename... Ts, typename... Args>
 constexpr auto concat(const descr<N, Ts...> &d, const Args &...args)
     -> decltype(std::declval<descr<N + 2, Ts...>>() + concat(args...)) {
-    return d + _(", ") + concat(args...);
+    return d + const_name(", ") + concat(args...);
 }
 
 template <size_t N, typename... Ts>
 constexpr descr<N + 2, Ts...> type_descr(const descr<N, Ts...> &descr) {
-    return _("{") + descr + _("}");
+    return const_name("{") + descr + const_name("}");
 }
 
 PYBIND11_NAMESPACE_END(detail)
diff --git a/ext/pybind11/include/pybind11/detail/init.h b/ext/pybind11/include/pybind11/detail/init.h
index cace352964..9f71278c26 100644
--- a/ext/pybind11/include/pybind11/detail/init.h
+++ b/ext/pybind11/include/pybind11/detail/init.h
@@ -12,6 +12,9 @@
 #include "class.h"
 
 PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
+
+PYBIND11_WARNING_DISABLE_MSVC(4127)
+
 PYBIND11_NAMESPACE_BEGIN(detail)
 
 template <>
@@ -22,9 +25,10 @@ public:
         return true;
     }
 
-    template <typename> using cast_op_type = value_and_holder &;
+    template <typename>
+    using cast_op_type = value_and_holder &;
     explicit operator value_and_holder &() { return *value; }
-    static constexpr auto name = _<value_and_holder>();
+    static constexpr auto name = const_name<value_and_holder>();
 
 private:
     value_and_holder *value = nullptr;
@@ -33,15 +37,21 @@ private:
 PYBIND11_NAMESPACE_BEGIN(initimpl)
 
 inline void no_nullptr(void *ptr) {
-    if (!ptr) throw type_error("pybind11::init(): factory function returned nullptr");
+    if (!ptr) {
+        throw type_error("pybind11::init(): factory function returned nullptr");
+    }
 }
 
 // Implementing functions for all forms of py::init<...> and py::init(...)
-template <typename Class> using Cpp = typename Class::type;
-template <typename Class> using Alias = typename Class::type_alias;
-template <typename Class> using Holder = typename Class::holder_type;
+template <typename Class>
+using Cpp = typename Class::type;
+template <typename Class>
+using Alias = typename Class::type_alias;
+template <typename Class>
+using Holder = typename Class::holder_type;
 
-template <typename Class> using is_alias_constructible = std::is_constructible<Alias<Class>, Cpp<Class> &&>;
+template <typename Class>
+using is_alias_constructible = std::is_constructible<Alias<Class>, Cpp<Class> &&>;
 
 // Takes a Cpp pointer and returns true if it actually is a polymorphic Alias instance.
 template <typename Class, enable_if_t<Class::has_alias, int> = 0>
@@ -50,17 +60,27 @@ bool is_alias(Cpp<Class> *ptr) {
 }
 // Failing fallback version of the above for a no-alias class (always returns false)
 template <typename /*Class*/>
-constexpr bool is_alias(void *) { return false; }
+constexpr bool is_alias(void *) {
+    return false;
+}
 
 // Constructs and returns a new object; if the given arguments don't map to a constructor, we fall
 // back to brace aggregate initiailization so that for aggregate initialization can be used with
 // py::init, e.g.  `py::init<int, int>` to initialize a `struct T { int a; int b; }`.  For
 // non-aggregate types, we need to use an ordinary T(...) constructor (invoking as `T{...}` usually
 // works, but will not do the expected thing when `T` has an `initializer_list<T>` constructor).
-template <typename Class, typename... Args, detail::enable_if_t<std::is_constructible<Class, Args...>::value, int> = 0>
-inline Class *construct_or_initialize(Args &&...args) { return new Class(std::forward<Args>(args)...); }
-template <typename Class, typename... Args, detail::enable_if_t<!std::is_constructible<Class, Args...>::value, int> = 0>
-inline Class *construct_or_initialize(Args &&...args) { return new Class{std::forward<Args>(args)...}; }
+template <typename Class,
+          typename... Args,
+          detail::enable_if_t<std::is_constructible<Class, Args...>::value, int> = 0>
+inline Class *construct_or_initialize(Args &&...args) {
+    return new Class(std::forward<Args>(args)...);
+}
+template <typename Class,
+          typename... Args,
+          detail::enable_if_t<!std::is_constructible<Class, Args...>::value, int> = 0>
+inline Class *construct_or_initialize(Args &&...args) {
+    return new Class{std::forward<Args>(args)...};
+}
 
 // Attempts to constructs an alias using a `Alias(Cpp &&)` constructor.  This allows types with
 // an alias to provide only a single Cpp factory function as long as the Alias can be
@@ -69,12 +89,14 @@ inline Class *construct_or_initialize(Args &&...args) { return new Class{std::fo
 // inherit all the base class constructors.
 template <typename Class>
 void construct_alias_from_cpp(std::true_type /*is_alias_constructible*/,
-                              value_and_holder &v_h, Cpp<Class> &&base) {
+                              value_and_holder &v_h,
+                              Cpp<Class> &&base) {
     v_h.value_ptr() = new Alias<Class>(std::move(base));
 }
 template <typename Class>
 [[noreturn]] void construct_alias_from_cpp(std::false_type /*!is_alias_constructible*/,
-                                           value_and_holder &, Cpp<Class> &&) {
+                                           value_and_holder &,
+                                           Cpp<Class> &&) {
     throw type_error("pybind11::init(): unable to convert returned instance to required "
                      "alias class: no `Alias<Class>(Class &&)` constructor available");
 }
@@ -84,8 +106,8 @@ template <typename Class>
 template <typename Class>
 void construct(...) {
     static_assert(!std::is_same<Class, Class>::value /* always false */,
-            "pybind11::init(): init function must return a compatible pointer, "
-            "holder, or value");
+                  "pybind11::init(): init function must return a compatible pointer, "
+                  "holder, or value");
 }
 
 // Pointer return v1: the factory function returns a class pointer for a registered class.
@@ -96,7 +118,7 @@ template <typename Class>
 void construct(value_and_holder &v_h, Cpp<Class> *ptr, bool need_alias) {
     PYBIND11_WORKAROUND_INCORRECT_MSVC_C4100(need_alias);
     no_nullptr(ptr);
-    if (PYBIND11_SILENCE_MSVC_C4127(Class::has_alias) && need_alias && !is_alias<Class>(ptr)) {
+    if (Class::has_alias && need_alias && !is_alias<Class>(ptr)) {
         // We're going to try to construct an alias by moving the cpp type.  Whether or not
         // that succeeds, we still need to destroy the original cpp pointer (either the
         // moved away leftover, if the alias construction works, or the value itself if we
@@ -106,7 +128,7 @@ void construct(value_and_holder &v_h, Cpp<Class> *ptr, bool need_alias) {
         // the holder and destruction happens when we leave the C++ scope, and the holder
         // class gets to handle the destruction however it likes.
         v_h.value_ptr() = ptr;
-        v_h.set_instance_registered(true); // To prevent init_instance from registering it
+        v_h.set_instance_registered(true);          // To prevent init_instance from registering it
         v_h.type->init_instance(v_h.inst, nullptr); // Set up the holder
         Holder<Class> temp_holder(std::move(v_h.holder<Holder<Class>>())); // Steal the holder
         v_h.type->dealloc(v_h); // Destroys the moved-out holder remains, resets value ptr to null
@@ -129,16 +151,18 @@ void construct(value_and_holder &v_h, Alias<Class> *alias_ptr, bool) {
 
 // Holder return: copy its pointer, and move or copy the returned holder into the new instance's
 // holder.  This also handles types like std::shared_ptr<T> and std::unique_ptr<T> where T is a
-// derived type (through those holder's implicit conversion from derived class holder constructors).
+// derived type (through those holder's implicit conversion from derived class holder
+// constructors).
 template <typename Class>
 void construct(value_and_holder &v_h, Holder<Class> holder, bool need_alias) {
     PYBIND11_WORKAROUND_INCORRECT_MSVC_C4100(need_alias);
     auto *ptr = holder_helper<Holder<Class>>::get(holder);
     no_nullptr(ptr);
     // If we need an alias, check that the held pointer is actually an alias instance
-    if (PYBIND11_SILENCE_MSVC_C4127(Class::has_alias) && need_alias && !is_alias<Class>(ptr))
+    if (Class::has_alias && need_alias && !is_alias<Class>(ptr)) {
         throw type_error("pybind11::init(): construction failed: returned holder-wrapped instance "
                          "is not an alias instance");
+    }
 
     v_h.value_ptr() = ptr;
     v_h.type->init_instance(v_h.inst, &holder);
@@ -152,11 +176,12 @@ template <typename Class>
 void construct(value_and_holder &v_h, Cpp<Class> &&result, bool need_alias) {
     PYBIND11_WORKAROUND_INCORRECT_MSVC_C4100(need_alias);
     static_assert(std::is_move_constructible<Cpp<Class>>::value,
-        "pybind11::init() return-by-value factory function requires a movable class");
-    if (PYBIND11_SILENCE_MSVC_C4127(Class::has_alias) && need_alias)
+                  "pybind11::init() return-by-value factory function requires a movable class");
+    if (Class::has_alias && need_alias) {
         construct_alias_from_cpp<Class>(is_alias_constructible<Class>{}, v_h, std::move(result));
-    else
+    } else {
         v_h.value_ptr() = new Cpp<Class>(std::move(result));
+    }
 }
 
 // return-by-value version 2: returning a value of the alias type itself.  We move-construct an
@@ -164,7 +189,8 @@ void construct(value_and_holder &v_h, Cpp<Class> &&result, bool need_alias) {
 // cases where Alias initialization is always desired.
 template <typename Class>
 void construct(value_and_holder &v_h, Alias<Class> &&result, bool) {
-    static_assert(std::is_move_constructible<Alias<Class>>::value,
+    static_assert(
+        std::is_move_constructible<Alias<Class>>::value,
         "pybind11::init() return-by-alias-value factory function requires a movable alias class");
     v_h.value_ptr() = new Alias<Class>(std::move(result));
 }
@@ -173,48 +199,79 @@ void construct(value_and_holder &v_h, Alias<Class> &&result, bool) {
 template <typename... Args>
 struct constructor {
     template <typename Class, typename... Extra, enable_if_t<!Class::has_alias, int> = 0>
-    static void execute(Class &cl, const Extra&... extra) {
-        cl.def("__init__", [](value_and_holder &v_h, Args... args) {
-            v_h.value_ptr() = construct_or_initialize<Cpp<Class>>(std::forward<Args>(args)...);
-        }, is_new_style_constructor(), extra...);
-    }
-
-    template <typename Class, typename... Extra,
-              enable_if_t<Class::has_alias &&
-                          std::is_constructible<Cpp<Class>, Args...>::value, int> = 0>
-    static void execute(Class &cl, const Extra&... extra) {
-        cl.def("__init__", [](value_and_holder &v_h, Args... args) {
-            if (Py_TYPE(v_h.inst) == v_h.type->type)
+    static void execute(Class &cl, const Extra &...extra) {
+        cl.def(
+            "__init__",
+            [](value_and_holder &v_h, Args... args) {
                 v_h.value_ptr() = construct_or_initialize<Cpp<Class>>(std::forward<Args>(args)...);
-            else
-                v_h.value_ptr() = construct_or_initialize<Alias<Class>>(std::forward<Args>(args)...);
-        }, is_new_style_constructor(), extra...);
+            },
+            is_new_style_constructor(),
+            extra...);
     }
 
-    template <typename Class, typename... Extra,
-              enable_if_t<Class::has_alias &&
-                          !std::is_constructible<Cpp<Class>, Args...>::value, int> = 0>
-    static void execute(Class &cl, const Extra&... extra) {
-        cl.def("__init__", [](value_and_holder &v_h, Args... args) {
-            v_h.value_ptr() = construct_or_initialize<Alias<Class>>(std::forward<Args>(args)...);
-        }, is_new_style_constructor(), extra...);
+    template <
+        typename Class,
+        typename... Extra,
+        enable_if_t<Class::has_alias && std::is_constructible<Cpp<Class>, Args...>::value, int>
+        = 0>
+    static void execute(Class &cl, const Extra &...extra) {
+        cl.def(
+            "__init__",
+            [](value_and_holder &v_h, Args... args) {
+                if (Py_TYPE(v_h.inst) == v_h.type->type) {
+                    v_h.value_ptr()
+                        = construct_or_initialize<Cpp<Class>>(std::forward<Args>(args)...);
+                } else {
+                    v_h.value_ptr()
+                        = construct_or_initialize<Alias<Class>>(std::forward<Args>(args)...);
+                }
+            },
+            is_new_style_constructor(),
+            extra...);
+    }
+
+    template <
+        typename Class,
+        typename... Extra,
+        enable_if_t<Class::has_alias && !std::is_constructible<Cpp<Class>, Args...>::value, int>
+        = 0>
+    static void execute(Class &cl, const Extra &...extra) {
+        cl.def(
+            "__init__",
+            [](value_and_holder &v_h, Args... args) {
+                v_h.value_ptr()
+                    = construct_or_initialize<Alias<Class>>(std::forward<Args>(args)...);
+            },
+            is_new_style_constructor(),
+            extra...);
     }
 };
 
 // Implementing class for py::init_alias<...>()
-template <typename... Args> struct alias_constructor {
-    template <typename Class, typename... Extra,
-              enable_if_t<Class::has_alias && std::is_constructible<Alias<Class>, Args...>::value, int> = 0>
-    static void execute(Class &cl, const Extra&... extra) {
-        cl.def("__init__", [](value_and_holder &v_h, Args... args) {
-            v_h.value_ptr() = construct_or_initialize<Alias<Class>>(std::forward<Args>(args)...);
-        }, is_new_style_constructor(), extra...);
+template <typename... Args>
+struct alias_constructor {
+    template <
+        typename Class,
+        typename... Extra,
+        enable_if_t<Class::has_alias && std::is_constructible<Alias<Class>, Args...>::value, int>
+        = 0>
+    static void execute(Class &cl, const Extra &...extra) {
+        cl.def(
+            "__init__",
+            [](value_and_holder &v_h, Args... args) {
+                v_h.value_ptr()
+                    = construct_or_initialize<Alias<Class>>(std::forward<Args>(args)...);
+            },
+            is_new_style_constructor(),
+            extra...);
     }
 };
 
 // Implementation class for py::init(Func) and py::init(Func, AliasFunc)
-template <typename CFunc, typename AFunc = void_type (*)(),
-          typename = function_signature_t<CFunc>, typename = function_signature_t<AFunc>>
+template <typename CFunc,
+          typename AFunc = void_type (*)(),
+          typename = function_signature_t<CFunc>,
+          typename = function_signature_t<AFunc>>
 struct factory;
 
 // Specialization for py::init(Func)
@@ -232,22 +289,32 @@ struct factory<Func, void_type (*)(), Return(Args...)> {
     // instance, or the alias needs to be constructible from a `Class &&` argument.
     template <typename Class, typename... Extra>
     void execute(Class &cl, const Extra &...extra) && {
-        #if defined(PYBIND11_CPP14)
-        cl.def("__init__", [func = std::move(class_factory)]
-        #else
+#if defined(PYBIND11_CPP14)
+        cl.def(
+            "__init__",
+            [func = std::move(class_factory)]
+#else
         auto &func = class_factory;
-        cl.def("__init__", [func]
-        #endif
-        (value_and_holder &v_h, Args... args) {
-            construct<Class>(v_h, func(std::forward<Args>(args)...),
-                             Py_TYPE(v_h.inst) != v_h.type->type);
-        }, is_new_style_constructor(), extra...);
+        cl.def(
+            "__init__",
+            [func]
+#endif
+            (value_and_holder &v_h, Args... args) {
+                construct<Class>(
+                    v_h, func(std::forward<Args>(args)...), Py_TYPE(v_h.inst) != v_h.type->type);
+            },
+            is_new_style_constructor(),
+            extra...);
     }
 };
 
 // Specialization for py::init(Func, AliasFunc)
-template <typename CFunc, typename AFunc,
-          typename CReturn, typename... CArgs, typename AReturn, typename... AArgs>
+template <typename CFunc,
+          typename AFunc,
+          typename CReturn,
+          typename... CArgs,
+          typename AReturn,
+          typename... AArgs>
 struct factory<CFunc, AFunc, CReturn(CArgs...), AReturn(AArgs...)> {
     static_assert(sizeof...(CArgs) == sizeof...(AArgs),
                   "pybind11::init(class_factory, alias_factory): class and alias factories "
@@ -260,29 +327,37 @@ struct factory<CFunc, AFunc, CReturn(CArgs...), AReturn(AArgs...)> {
     remove_reference_t<AFunc> alias_factory;
 
     factory(CFunc &&c, AFunc &&a)
-        : class_factory(std::forward<CFunc>(c)), alias_factory(std::forward<AFunc>(a)) { }
+        : class_factory(std::forward<CFunc>(c)), alias_factory(std::forward<AFunc>(a)) {}
 
     // The class factory is called when the `self` type passed to `__init__` is the direct
     // class (i.e. not inherited), the alias factory when `self` is a Python-side subtype.
     template <typename Class, typename... Extra>
-    void execute(Class &cl, const Extra&... extra) && {
-        static_assert(Class::has_alias, "The two-argument version of `py::init()` can "
-                                        "only be used if the class has an alias");
-        #if defined(PYBIND11_CPP14)
-        cl.def("__init__", [class_func = std::move(class_factory), alias_func = std::move(alias_factory)]
-        #else
+    void execute(Class &cl, const Extra &...extra) && {
+        static_assert(Class::has_alias,
+                      "The two-argument version of `py::init()` can "
+                      "only be used if the class has an alias");
+#if defined(PYBIND11_CPP14)
+        cl.def(
+            "__init__",
+            [class_func = std::move(class_factory), alias_func = std::move(alias_factory)]
+#else
         auto &class_func = class_factory;
         auto &alias_func = alias_factory;
-        cl.def("__init__", [class_func, alias_func]
-        #endif
-        (value_and_holder &v_h, CArgs... args) {
-            if (Py_TYPE(v_h.inst) == v_h.type->type)
-                // If the instance type equals the registered type we don't have inheritance, so
-                // don't need the alias and can construct using the class function:
-                construct<Class>(v_h, class_func(std::forward<CArgs>(args)...), false);
-            else
-                construct<Class>(v_h, alias_func(std::forward<CArgs>(args)...), true);
-        }, is_new_style_constructor(), extra...);
+        cl.def(
+            "__init__",
+            [class_func, alias_func]
+#endif
+            (value_and_holder &v_h, CArgs... args) {
+                if (Py_TYPE(v_h.inst) == v_h.type->type) {
+                    // If the instance type equals the registered type we don't have inheritance,
+                    // so don't need the alias and can construct using the class function:
+                    construct<Class>(v_h, class_func(std::forward<CArgs>(args)...), false);
+                } else {
+                    construct<Class>(v_h, alias_func(std::forward<CArgs>(args)...), true);
+                }
+            },
+            is_new_style_constructor(),
+            extra...);
     }
 };
 
@@ -293,7 +368,9 @@ void setstate(value_and_holder &v_h, T &&result, bool need_alias) {
 }
 
 /// Set both the C++ and Python states
-template <typename Class, typename T, typename O,
+template <typename Class,
+          typename T,
+          typename O,
           enable_if_t<std::is_convertible<O, handle>::value, int> = 0>
 void setstate(value_and_holder &v_h, std::pair<T, O> &&result, bool need_alias) {
     construct<Class>(v_h, std::move(result.first), need_alias);
@@ -307,12 +384,18 @@ void setstate(value_and_holder &v_h, std::pair<T, O> &&result, bool need_alias)
 }
 
 /// Implementation for py::pickle(GetState, SetState)
-template <typename Get, typename Set,
-          typename = function_signature_t<Get>, typename = function_signature_t<Set>>
+template <typename Get,
+          typename Set,
+          typename = function_signature_t<Get>,
+          typename = function_signature_t<Set>>
 struct pickle_factory;
 
-template <typename Get, typename Set,
-          typename RetState, typename Self, typename NewInstance, typename ArgState>
+template <typename Get,
+          typename Set,
+          typename RetState,
+          typename Self,
+          typename NewInstance,
+          typename ArgState>
 struct pickle_factory<Get, Set, RetState(Self), NewInstance(ArgState)> {
     static_assert(std::is_same<intrinsic_t<RetState>, intrinsic_t<ArgState>>::value,
                   "The type returned by `__getstate__` must be the same "
@@ -321,26 +404,31 @@ struct pickle_factory<Get, Set, RetState(Self), NewInstance(ArgState)> {
     remove_reference_t<Get> get;
     remove_reference_t<Set> set;
 
-    pickle_factory(Get get, Set set)
-        : get(std::forward<Get>(get)), set(std::forward<Set>(set)) { }
+    pickle_factory(Get get, Set set) : get(std::forward<Get>(get)), set(std::forward<Set>(set)) {}
 
     template <typename Class, typename... Extra>
     void execute(Class &cl, const Extra &...extra) && {
         cl.def("__getstate__", std::move(get));
 
 #if defined(PYBIND11_CPP14)
-        cl.def("__setstate__", [func = std::move(set)]
+        cl.def(
+            "__setstate__",
+            [func = std::move(set)]
 #else
         auto &func = set;
-        cl.def("__setstate__", [func]
+        cl.def(
+            "__setstate__",
+            [func]
 #endif
-        (value_and_holder &v_h, ArgState state) {
-            setstate<Class>(v_h, func(std::forward<ArgState>(state)),
-                            Py_TYPE(v_h.inst) != v_h.type->type);
-        }, is_new_style_constructor(), extra...);
+            (value_and_holder &v_h, ArgState state) {
+                setstate<Class>(
+                    v_h, func(std::forward<ArgState>(state)), Py_TYPE(v_h.inst) != v_h.type->type);
+            },
+            is_new_style_constructor(),
+            extra...);
     }
 };
 
 PYBIND11_NAMESPACE_END(initimpl)
 PYBIND11_NAMESPACE_END(detail)
-PYBIND11_NAMESPACE_END(pybind11)
+PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE)
diff --git a/ext/pybind11/include/pybind11/detail/internals.h b/ext/pybind11/include/pybind11/detail/internals.h
index 98d21eb983..ef1849fbea 100644
--- a/ext/pybind11/include/pybind11/detail/internals.h
+++ b/ext/pybind11/include/pybind11/detail/internals.h
@@ -9,8 +9,16 @@
 
 #pragma once
 
+#include "common.h"
+
+#if defined(WITH_THREAD) && defined(PYBIND11_SIMPLE_GIL_MANAGEMENT)
+#    include "../gil.h"
+#endif
+
 #include "../pytypes.h"
 
+#include <exception>
+
 /// Tracks the `internals` and `type_info` ABI version independent of the main library version.
 ///
 /// Some portions of the code use an ABI that is conditional depending on this
@@ -35,6 +43,8 @@ using ExceptionTranslator = void (*)(std::exception_ptr);
 
 PYBIND11_NAMESPACE_BEGIN(detail)
 
+constexpr const char *internals_function_record_capsule_name = "pybind11_function_record_capsule";
+
 // Forward declarations
 inline PyTypeObject *make_static_property_type();
 inline PyTypeObject *make_default_metaclass();
@@ -47,7 +57,7 @@ inline PyObject *make_object_base_type(PyTypeObject *metaclass);
 // `Py_LIMITED_API` anyway.
 #    if PYBIND11_INTERNALS_VERSION > 4
 #        define PYBIND11_TLS_KEY_REF Py_tss_t &
-#        ifdef __GNUC__
+#        if defined(__GNUC__) && !defined(__INTEL_COMPILER)
 // Clang on macOS warns due to `Py_tss_NEEDS_INIT` not specifying an initializer
 // for every field.
 #            define PYBIND11_TLS_KEY_INIT(var)                                                    \
@@ -80,7 +90,7 @@ inline PyObject *make_object_base_type(PyTypeObject *metaclass);
 #    define PYBIND11_TLS_KEY_INIT(var) PYBIND11_TLS_KEY_REF var = 0;
 #    define PYBIND11_TLS_KEY_CREATE(var) (((var) = PyThread_create_key()) != -1)
 #    define PYBIND11_TLS_GET_VALUE(key) PyThread_get_key_value((key))
-#    if PY_MAJOR_VERSION < 3 || defined(PYPY_VERSION)
+#    if defined(PYPY_VERSION)
 // On CPython < 3.4 and on PyPy, `PyThread_set_key_value` strangely does not set
 // the value if it has already been set.  Instead, it must first be deleted and
 // then set again.
@@ -117,8 +127,9 @@ struct type_hash {
     size_t operator()(const std::type_index &t) const {
         size_t hash = 5381;
         const char *ptr = t.name();
-        while (auto c = static_cast<unsigned char>(*ptr++))
+        while (auto c = static_cast<unsigned char>(*ptr++)) {
             hash = (hash * 33) ^ c;
+        }
         return hash;
     }
 };
@@ -134,9 +145,9 @@ template <typename value_type>
 using type_map = std::unordered_map<std::type_index, value_type, type_hash, type_equal_to>;
 
 struct override_hash {
-    inline size_t operator()(const std::pair<const PyObject *, const char *>& v) const {
+    inline size_t operator()(const std::pair<const PyObject *, const char *> &v) const {
         size_t value = std::hash<const void *>()(v.first);
-        value ^= std::hash<const void *>()(v.second) + 0x9e3779b9 + (value<<6) + (value>>2);
+        value ^= std::hash<const void *>()(v.second) + 0x9e3779b9 + (value << 6) + (value >> 2);
         return value;
     }
 };
@@ -145,27 +156,44 @@ struct override_hash {
 /// Whenever binary incompatible changes are made to this structure,
 /// `PYBIND11_INTERNALS_VERSION` must be incremented.
 struct internals {
-    type_map<type_info *> registered_types_cpp; // std::type_index -> pybind11's type information
-    std::unordered_map<PyTypeObject *, std::vector<type_info *>> registered_types_py; // PyTypeObject* -> base type_info(s)
-    std::unordered_multimap<const void *, instance*> registered_instances; // void * -> instance*
-    std::unordered_set<std::pair<const PyObject *, const char *>, override_hash> inactive_override_cache;
+    // std::type_index -> pybind11's type information
+    type_map<type_info *> registered_types_cpp;
+    // PyTypeObject* -> base type_info(s)
+    std::unordered_map<PyTypeObject *, std::vector<type_info *>> registered_types_py;
+    std::unordered_multimap<const void *, instance *> registered_instances; // void * -> instance*
+    std::unordered_set<std::pair<const PyObject *, const char *>, override_hash>
+        inactive_override_cache;
     type_map<std::vector<bool (*)(PyObject *, void *&)>> direct_conversions;
     std::unordered_map<const PyObject *, std::vector<PyObject *>> patients;
     std::forward_list<ExceptionTranslator> registered_exception_translators;
-    std::unordered_map<std::string, void *> shared_data; // Custom data to be shared across extensions
+    std::unordered_map<std::string, void *> shared_data; // Custom data to be shared across
+                                                         // extensions
 #if PYBIND11_INTERNALS_VERSION == 4
     std::vector<PyObject *> unused_loader_patient_stack_remove_at_v5;
 #endif
-    std::forward_list<std::string> static_strings; // Stores the std::strings backing detail::c_str()
+    std::forward_list<std::string> static_strings; // Stores the std::strings backing
+                                                   // detail::c_str()
     PyTypeObject *static_property_type;
     PyTypeObject *default_metaclass;
     PyObject *instance_base;
 #if defined(WITH_THREAD)
+    // Unused if PYBIND11_SIMPLE_GIL_MANAGEMENT is defined:
     PYBIND11_TLS_KEY_INIT(tstate)
 #    if PYBIND11_INTERNALS_VERSION > 4
     PYBIND11_TLS_KEY_INIT(loader_life_support_tls_key)
 #    endif // PYBIND11_INTERNALS_VERSION > 4
+    // Unused if PYBIND11_SIMPLE_GIL_MANAGEMENT is defined:
     PyInterpreterState *istate = nullptr;
+
+#    if PYBIND11_INTERNALS_VERSION > 4
+    // Note that we have to use a std::string to allocate memory to ensure a unique address
+    // We want unique addresses since we use pointer equality to compare function records
+    std::string function_record_capsule_name = internals_function_record_capsule_name;
+#    endif
+
+    internals() = default;
+    internals(const internals &other) = delete;
+    internals &operator=(const internals &other) = delete;
     ~internals() {
 #    if PYBIND11_INTERNALS_VERSION > 4
         PYBIND11_TLS_FREE(loader_life_support_tls_key);
@@ -192,14 +220,16 @@ struct type_info {
     void *(*operator_new)(size_t);
     void (*init_instance)(instance *, const void *);
     void (*dealloc)(value_and_holder &v_h);
-    std::vector<PyObject *(*)(PyObject *, PyTypeObject *)> implicit_conversions;
-    std::vector<std::pair<const std::type_info *, void *(*)(void *)>> implicit_casts;
+    std::vector<PyObject *(*) (PyObject *, PyTypeObject *)> implicit_conversions;
+    std::vector<std::pair<const std::type_info *, void *(*) (void *)>> implicit_casts;
     std::vector<bool (*)(PyObject *, void *&)> *direct_conversions;
     buffer_info *(*get_buffer)(PyObject *, void *) = nullptr;
     void *get_buffer_data = nullptr;
     void *(*module_local_load)(PyObject *, const type_info *) = nullptr;
     /* A simple type never occurs as a (direct or indirect) parent
-     * of a class that makes use of multiple inheritance */
+     * of a class that makes use of multiple inheritance.
+     * A type can be simple even if it has non-simple ancestors as long as it has no descendants.
+     */
     bool simple_type : 1;
     /* True if there is no multiple inheritance in this type's inheritance tree */
     bool simple_ancestors : 1;
@@ -211,67 +241,71 @@ struct type_info {
 
 /// On MSVC, debug and release builds are not ABI-compatible!
 #if defined(_MSC_VER) && defined(_DEBUG)
-#  define PYBIND11_BUILD_TYPE "_debug"
+#    define PYBIND11_BUILD_TYPE "_debug"
 #else
-#  define PYBIND11_BUILD_TYPE ""
+#    define PYBIND11_BUILD_TYPE ""
 #endif
 
 /// Let's assume that different compilers are ABI-incompatible.
 /// A user can manually set this string if they know their
 /// compiler is compatible.
 #ifndef PYBIND11_COMPILER_TYPE
-#  if defined(_MSC_VER)
-#    define PYBIND11_COMPILER_TYPE "_msvc"
-#  elif defined(__INTEL_COMPILER)
-#    define PYBIND11_COMPILER_TYPE "_icc"
-#  elif defined(__clang__)
-#    define PYBIND11_COMPILER_TYPE "_clang"
-#  elif defined(__PGI)
-#    define PYBIND11_COMPILER_TYPE "_pgi"
-#  elif defined(__MINGW32__)
-#    define PYBIND11_COMPILER_TYPE "_mingw"
-#  elif defined(__CYGWIN__)
-#    define PYBIND11_COMPILER_TYPE "_gcc_cygwin"
-#  elif defined(__GNUC__)
-#    define PYBIND11_COMPILER_TYPE "_gcc"
-#  else
-#    define PYBIND11_COMPILER_TYPE "_unknown"
-#  endif
+#    if defined(_MSC_VER)
+#        define PYBIND11_COMPILER_TYPE "_msvc"
+#    elif defined(__INTEL_COMPILER)
+#        define PYBIND11_COMPILER_TYPE "_icc"
+#    elif defined(__clang__)
+#        define PYBIND11_COMPILER_TYPE "_clang"
+#    elif defined(__PGI)
+#        define PYBIND11_COMPILER_TYPE "_pgi"
+#    elif defined(__MINGW32__)
+#        define PYBIND11_COMPILER_TYPE "_mingw"
+#    elif defined(__CYGWIN__)
+#        define PYBIND11_COMPILER_TYPE "_gcc_cygwin"
+#    elif defined(__GNUC__)
+#        define PYBIND11_COMPILER_TYPE "_gcc"
+#    else
+#        define PYBIND11_COMPILER_TYPE "_unknown"
+#    endif
 #endif
 
 /// Also standard libs
 #ifndef PYBIND11_STDLIB
-#  if defined(_LIBCPP_VERSION)
-#    define PYBIND11_STDLIB "_libcpp"
-#  elif defined(__GLIBCXX__) || defined(__GLIBCPP__)
-#    define PYBIND11_STDLIB "_libstdcpp"
-#  else
-#    define PYBIND11_STDLIB ""
-#  endif
+#    if defined(_LIBCPP_VERSION)
+#        define PYBIND11_STDLIB "_libcpp"
+#    elif defined(__GLIBCXX__) || defined(__GLIBCPP__)
+#        define PYBIND11_STDLIB "_libstdcpp"
+#    else
+#        define PYBIND11_STDLIB ""
+#    endif
 #endif
 
 /// On Linux/OSX, changes in __GXX_ABI_VERSION__ indicate ABI incompatibility.
 #ifndef PYBIND11_BUILD_ABI
-#  if defined(__GXX_ABI_VERSION)
-#    define PYBIND11_BUILD_ABI "_cxxabi" PYBIND11_TOSTRING(__GXX_ABI_VERSION)
-#  else
-#    define PYBIND11_BUILD_ABI ""
-#  endif
+#    if defined(__GXX_ABI_VERSION)
+#        define PYBIND11_BUILD_ABI "_cxxabi" PYBIND11_TOSTRING(__GXX_ABI_VERSION)
+#    else
+#        define PYBIND11_BUILD_ABI ""
+#    endif
 #endif
 
 #ifndef PYBIND11_INTERNALS_KIND
-#  if defined(WITH_THREAD)
-#    define PYBIND11_INTERNALS_KIND ""
-#  else
-#    define PYBIND11_INTERNALS_KIND "_without_thread"
-#  endif
+#    if defined(WITH_THREAD)
+#        define PYBIND11_INTERNALS_KIND ""
+#    else
+#        define PYBIND11_INTERNALS_KIND "_without_thread"
+#    endif
 #endif
 
-#define PYBIND11_INTERNALS_ID "__pybind11_internals_v" \
-    PYBIND11_TOSTRING(PYBIND11_INTERNALS_VERSION) PYBIND11_INTERNALS_KIND PYBIND11_COMPILER_TYPE PYBIND11_STDLIB PYBIND11_BUILD_ABI PYBIND11_BUILD_TYPE "__"
+#define PYBIND11_INTERNALS_ID                                                                     \
+    "__pybind11_internals_v" PYBIND11_TOSTRING(PYBIND11_INTERNALS_VERSION)                        \
+        PYBIND11_INTERNALS_KIND PYBIND11_COMPILER_TYPE PYBIND11_STDLIB PYBIND11_BUILD_ABI         \
+            PYBIND11_BUILD_TYPE "__"
 
-#define PYBIND11_MODULE_LOCAL_ID "__pybind11_module_local_v" \
-    PYBIND11_TOSTRING(PYBIND11_INTERNALS_VERSION) PYBIND11_INTERNALS_KIND PYBIND11_COMPILER_TYPE PYBIND11_STDLIB PYBIND11_BUILD_ABI PYBIND11_BUILD_TYPE "__"
+#define PYBIND11_MODULE_LOCAL_ID                                                                  \
+    "__pybind11_module_local_v" PYBIND11_TOSTRING(PYBIND11_INTERNALS_VERSION)                     \
+        PYBIND11_INTERNALS_KIND PYBIND11_COMPILER_TYPE PYBIND11_STDLIB PYBIND11_BUILD_ABI         \
+            PYBIND11_BUILD_TYPE "__"
 
 /// Each module locally stores a pointer to the `internals` data. The data
 /// itself is shared among modules with the same `PYBIND11_INTERNALS_ID`.
@@ -280,21 +314,93 @@ inline internals **&get_internals_pp() {
     return internals_pp;
 }
 
+// forward decl
+inline void translate_exception(std::exception_ptr);
+
+template <class T,
+          enable_if_t<std::is_same<std::nested_exception, remove_cvref_t<T>>::value, int> = 0>
+bool handle_nested_exception(const T &exc, const std::exception_ptr &p) {
+    std::exception_ptr nested = exc.nested_ptr();
+    if (nested != nullptr && nested != p) {
+        translate_exception(nested);
+        return true;
+    }
+    return false;
+}
+
+template <class T,
+          enable_if_t<!std::is_same<std::nested_exception, remove_cvref_t<T>>::value, int> = 0>
+bool handle_nested_exception(const T &exc, const std::exception_ptr &p) {
+    if (const auto *nep = dynamic_cast<const std::nested_exception *>(std::addressof(exc))) {
+        return handle_nested_exception(*nep, p);
+    }
+    return false;
+}
+
+inline bool raise_err(PyObject *exc_type, const char *msg) {
+    if (PyErr_Occurred()) {
+        raise_from(exc_type, msg);
+        return true;
+    }
+    PyErr_SetString(exc_type, msg);
+    return false;
+}
+
 inline void translate_exception(std::exception_ptr p) {
+    if (!p) {
+        return;
+    }
     try {
-        if (p) std::rethrow_exception(p);
-    } catch (error_already_set &e)           { e.restore();                                    return;
-    } catch (const builtin_exception &e)     { e.set_error();                                  return;
-    } catch (const std::bad_alloc &e)        { PyErr_SetString(PyExc_MemoryError,   e.what()); return;
-    } catch (const std::domain_error &e)     { PyErr_SetString(PyExc_ValueError,    e.what()); return;
-    } catch (const std::invalid_argument &e) { PyErr_SetString(PyExc_ValueError,    e.what()); return;
-    } catch (const std::length_error &e)     { PyErr_SetString(PyExc_ValueError,    e.what()); return;
-    } catch (const std::out_of_range &e)     { PyErr_SetString(PyExc_IndexError,    e.what()); return;
-    } catch (const std::range_error &e)      { PyErr_SetString(PyExc_ValueError,    e.what()); return;
-    } catch (const std::overflow_error &e)   { PyErr_SetString(PyExc_OverflowError, e.what()); return;
-    } catch (const std::exception &e)        { PyErr_SetString(PyExc_RuntimeError,  e.what()); return;
+        std::rethrow_exception(p);
+    } catch (error_already_set &e) {
+        handle_nested_exception(e, p);
+        e.restore();
+        return;
+    } catch (const builtin_exception &e) {
+        // Could not use template since it's an abstract class.
+        if (const auto *nep = dynamic_cast<const std::nested_exception *>(std::addressof(e))) {
+            handle_nested_exception(*nep, p);
+        }
+        e.set_error();
+        return;
+    } catch (const std::bad_alloc &e) {
+        handle_nested_exception(e, p);
+        raise_err(PyExc_MemoryError, e.what());
+        return;
+    } catch (const std::domain_error &e) {
+        handle_nested_exception(e, p);
+        raise_err(PyExc_ValueError, e.what());
+        return;
+    } catch (const std::invalid_argument &e) {
+        handle_nested_exception(e, p);
+        raise_err(PyExc_ValueError, e.what());
+        return;
+    } catch (const std::length_error &e) {
+        handle_nested_exception(e, p);
+        raise_err(PyExc_ValueError, e.what());
+        return;
+    } catch (const std::out_of_range &e) {
+        handle_nested_exception(e, p);
+        raise_err(PyExc_IndexError, e.what());
+        return;
+    } catch (const std::range_error &e) {
+        handle_nested_exception(e, p);
+        raise_err(PyExc_ValueError, e.what());
+        return;
+    } catch (const std::overflow_error &e) {
+        handle_nested_exception(e, p);
+        raise_err(PyExc_OverflowError, e.what());
+        return;
+    } catch (const std::exception &e) {
+        handle_nested_exception(e, p);
+        raise_err(PyExc_RuntimeError, e.what());
+        return;
+    } catch (const std::nested_exception &e) {
+        handle_nested_exception(e, p);
+        raise_err(PyExc_RuntimeError, "Caught an unknown nested exception!");
+        return;
     } catch (...) {
-        PyErr_SetString(PyExc_RuntimeError, "Caught an unknown exception!");
+        raise_err(PyExc_RuntimeError, "Caught an unknown exception!");
         return;
     }
 }
@@ -302,9 +408,15 @@ inline void translate_exception(std::exception_ptr p) {
 #if !defined(__GLIBCXX__)
 inline void translate_local_exception(std::exception_ptr p) {
     try {
-        if (p) std::rethrow_exception(p);
-    } catch (error_already_set &e)       { e.restore();   return;
-    } catch (const builtin_exception &e) { e.set_error(); return;
+        if (p) {
+            std::rethrow_exception(p);
+        }
+    } catch (error_already_set &e) {
+        e.restore();
+        return;
+    } catch (const builtin_exception &e) {
+        e.set_error();
+        return;
     }
 }
 #endif
@@ -312,16 +424,26 @@ inline void translate_local_exception(std::exception_ptr p) {
 /// Return a reference to the current `internals` data
 PYBIND11_NOINLINE internals &get_internals() {
     auto **&internals_pp = get_internals_pp();
-    if (internals_pp && *internals_pp)
+    if (internals_pp && *internals_pp) {
         return **internals_pp;
+    }
 
+#if defined(WITH_THREAD)
+#    if defined(PYBIND11_SIMPLE_GIL_MANAGEMENT)
+    gil_scoped_acquire gil;
+#    else
     // Ensure that the GIL is held since we will need to make Python calls.
     // Cannot use py::gil_scoped_acquire here since that constructor calls get_internals.
     struct gil_scoped_acquire_local {
-        gil_scoped_acquire_local() : state (PyGILState_Ensure()) {}
+        gil_scoped_acquire_local() : state(PyGILState_Ensure()) {}
+        gil_scoped_acquire_local(const gil_scoped_acquire_local &) = delete;
+        gil_scoped_acquire_local &operator=(const gil_scoped_acquire_local &) = delete;
         ~gil_scoped_acquire_local() { PyGILState_Release(state); }
         const PyGILState_STATE state;
     } gil;
+#    endif
+#endif
+    error_scope err_scope;
 
     PYBIND11_STR_TYPE id(PYBIND11_INTERNALS_ID);
     auto builtins = handle(PyEval_GetBuiltins());
@@ -339,14 +461,13 @@ PYBIND11_NOINLINE internals &get_internals() {
         (*internals_pp)->registered_exception_translators.push_front(&translate_local_exception);
 #endif
     } else {
-        if (!internals_pp) internals_pp = new internals*();
+        if (!internals_pp) {
+            internals_pp = new internals *();
+        }
         auto *&internals_ptr = *internals_pp;
         internals_ptr = new internals();
 #if defined(WITH_THREAD)
 
-#    if PY_VERSION_HEX < 0x03090000
-        PyEval_InitThreads();
-#    endif
         PyThreadState *tstate = PyThreadState_Get();
         if (!PYBIND11_TLS_KEY_CREATE(internals_ptr->tstate)) {
             pybind11_fail("get_internals: could not successfully initialize the tstate TSS key!");
@@ -416,11 +537,15 @@ struct local_internals {
 
 /// Works like `get_internals`, but for things which are locally registered.
 inline local_internals &get_local_internals() {
-  static local_internals locals;
-  return locals;
+    // Current static can be created in the interpreter finalization routine. If the later will be
+    // destroyed in another static variable destructor, creation of this static there will cause
+    // static deinitialization fiasco. In order to avoid it we avoid destruction of the
+    // local_internals static. One can read more about the problem and current solution here:
+    // https://google.github.io/styleguide/cppguide.html#Static_and_Global_Variables
+    static auto *locals = new local_internals();
+    return *locals;
 }
 
-
 /// Constructs a std::string with the given arguments, stores it in `internals`, and returns its
 /// `c_str()`.  Such strings objects have a long storage duration -- the internal strings are only
 /// cleared when the program exits or after interpreter shutdown (when embedding), and so are
@@ -432,6 +557,25 @@ const char *c_str(Args &&...args) {
     return strings.front().c_str();
 }
 
+inline const char *get_function_record_capsule_name() {
+#if PYBIND11_INTERNALS_VERSION > 4
+    return get_internals().function_record_capsule_name.c_str();
+#else
+    return nullptr;
+#endif
+}
+
+// Determine whether or not the following capsule contains a pybind11 function record.
+// Note that we use `internals` to make sure that only ABI compatible records are touched.
+//
+// This check is currently used in two places:
+// - An important optimization in functional.h to avoid overhead in C++ -> Python -> C++
+// - The sibling feature of cpp_function to allow overloads
+inline bool is_function_record_capsule(const capsule &cap) {
+    // Pointer equality as we rely on internals() to ensure unique pointers
+    return cap.name() == get_function_record_capsule_name();
+}
+
 PYBIND11_NAMESPACE_END(detail)
 
 /// Returns a named pointer that is shared among all extension modules (using the same
@@ -452,7 +596,7 @@ PYBIND11_NOINLINE void *set_shared_data(const std::string &name, void *data) {
 /// Returns a typed reference to a shared data entry (by using `get_shared_data()`) if
 /// such entry exists. Otherwise, a new object of default-constructible type `T` is
 /// added to the shared data under the given name and a reference to it is returned.
-template<typename T>
+template <typename T>
 T &get_or_create_shared_data(const std::string &name) {
     auto &internals = detail::get_internals();
     auto it = internals.shared_data.find(name);
diff --git a/ext/pybind11/include/pybind11/detail/type_caster_base.h b/ext/pybind11/include/pybind11/detail/type_caster_base.h
index 00ce1a7a1e..0b710d7e4c 100644
--- a/ext/pybind11/include/pybind11/detail/type_caster_base.h
+++ b/ext/pybind11/include/pybind11/detail/type_caster_base.h
@@ -14,6 +14,7 @@
 #include "descr.h"
 #include "internals.h"
 #include "typeid.h"
+
 #include <cstdint>
 #include <iterator>
 #include <new>
@@ -32,7 +33,7 @@ PYBIND11_NAMESPACE_BEGIN(detail)
 /// Adding a patient will keep it alive up until the enclosing function returns.
 class loader_life_support {
 private:
-    loader_life_support* parent = nullptr;
+    loader_life_support *parent = nullptr;
     std::unordered_set<PyObject *> keep_alive;
 
 #if defined(WITH_THREAD)
@@ -62,18 +63,17 @@ private:
 
 public:
     /// A new patient frame is created when a function is entered
-    loader_life_support() {
-        parent = get_stack_top();
-        set_stack_top(this);
-    }
+    loader_life_support() : parent{get_stack_top()} { set_stack_top(this); }
 
     /// ... and destroyed after it returns
     ~loader_life_support() {
-        if (get_stack_top() != this)
+        if (get_stack_top() != this) {
             pybind11_fail("loader_life_support: internal error");
+        }
         set_stack_top(parent);
-        for (auto* item : keep_alive)
+        for (auto *item : keep_alive) {
             Py_DECREF(item);
+        }
     }
 
     /// This can only be used inside a pybind11-bound function, either by `argument_loader`
@@ -90,47 +90,56 @@ public:
                              "of temporary values");
         }
 
-        if (frame->keep_alive.insert(h.ptr()).second)
+        if (frame->keep_alive.insert(h.ptr()).second) {
             Py_INCREF(h.ptr());
+        }
     }
 };
 
 // Gets the cache entry for the given type, creating it if necessary.  The return value is the pair
 // returned by emplace, i.e. an iterator for the entry and a bool set to `true` if the entry was
 // just created.
-inline std::pair<decltype(internals::registered_types_py)::iterator, bool> all_type_info_get_cache(PyTypeObject *type);
+inline std::pair<decltype(internals::registered_types_py)::iterator, bool>
+all_type_info_get_cache(PyTypeObject *type);
 
 // Populates a just-created cache entry.
 PYBIND11_NOINLINE void all_type_info_populate(PyTypeObject *t, std::vector<type_info *> &bases) {
     std::vector<PyTypeObject *> check;
-    for (handle parent : reinterpret_borrow<tuple>(t->tp_bases))
+    for (handle parent : reinterpret_borrow<tuple>(t->tp_bases)) {
         check.push_back((PyTypeObject *) parent.ptr());
+    }
 
     auto const &type_dict = get_internals().registered_types_py;
     for (size_t i = 0; i < check.size(); i++) {
-        auto type = check[i];
+        auto *type = check[i];
         // Ignore Python2 old-style class super type:
-        if (!PyType_Check((PyObject *) type)) continue;
+        if (!PyType_Check((PyObject *) type)) {
+            continue;
+        }
 
         // Check `type` in the current set of registered python types:
         auto it = type_dict.find(type);
         if (it != type_dict.end()) {
             // We found a cache entry for it, so it's either pybind-registered or has pre-computed
-            // pybind bases, but we have to make sure we haven't already seen the type(s) before: we
-            // want to follow Python/virtual C++ rules that there should only be one instance of a
-            // common base.
+            // pybind bases, but we have to make sure we haven't already seen the type(s) before:
+            // we want to follow Python/virtual C++ rules that there should only be one instance of
+            // a common base.
             for (auto *tinfo : it->second) {
                 // NB: Could use a second set here, rather than doing a linear search, but since
                 // having a large number of immediate pybind11-registered types seems fairly
                 // unlikely, that probably isn't worthwhile.
                 bool found = false;
                 for (auto *known : bases) {
-                    if (known == tinfo) { found = true; break; }
+                    if (known == tinfo) {
+                        found = true;
+                        break;
+                    }
+                }
+                if (!found) {
+                    bases.push_back(tinfo);
                 }
-                if (!found) bases.push_back(tinfo);
             }
-        }
-        else if (type->tp_bases) {
+        } else if (type->tp_bases) {
             // It's some python type, so keep follow its bases classes to look for one or more
             // registered types
             if (i + 1 == check.size()) {
@@ -140,8 +149,9 @@ PYBIND11_NOINLINE void all_type_info_populate(PyTypeObject *t, std::vector<type_
                 check.pop_back();
                 i--;
             }
-            for (handle parent : reinterpret_borrow<tuple>(type->tp_bases))
+            for (handle parent : reinterpret_borrow<tuple>(type->tp_bases)) {
                 check.push_back((PyTypeObject *) parent.ptr());
+            }
         }
     }
 }
@@ -158,9 +168,10 @@ PYBIND11_NOINLINE void all_type_info_populate(PyTypeObject *t, std::vector<type_
  */
 inline const std::vector<detail::type_info *> &all_type_info(PyTypeObject *type) {
     auto ins = all_type_info_get_cache(type);
-    if (ins.second)
+    if (ins.second) {
         // New cache entry: populate it
         all_type_info_populate(type, ins.first->second);
+    }
 
     return ins.first->second;
 }
@@ -170,43 +181,52 @@ inline const std::vector<detail::type_info *> &all_type_info(PyTypeObject *type)
  * ancestors are pybind11-registered.  Throws an exception if there are multiple bases--use
  * `all_type_info` instead if you want to support multiple bases.
  */
-PYBIND11_NOINLINE detail::type_info* get_type_info(PyTypeObject *type) {
-    auto &bases = all_type_info(type);
-    if (bases.empty())
+PYBIND11_NOINLINE detail::type_info *get_type_info(PyTypeObject *type) {
+    const auto &bases = all_type_info(type);
+    if (bases.empty()) {
         return nullptr;
-    if (bases.size() > 1)
-        pybind11_fail("pybind11::detail::get_type_info: type has multiple pybind11-registered bases");
+    }
+    if (bases.size() > 1) {
+        pybind11_fail(
+            "pybind11::detail::get_type_info: type has multiple pybind11-registered bases");
+    }
     return bases.front();
 }
 
 inline detail::type_info *get_local_type_info(const std::type_index &tp) {
     auto &locals = get_local_internals().registered_types_cpp;
     auto it = locals.find(tp);
-    if (it != locals.end())
+    if (it != locals.end()) {
         return it->second;
+    }
     return nullptr;
 }
 
 inline detail::type_info *get_global_type_info(const std::type_index &tp) {
     auto &types = get_internals().registered_types_cpp;
     auto it = types.find(tp);
-    if (it != types.end())
+    if (it != types.end()) {
         return it->second;
+    }
     return nullptr;
 }
 
-/// Return the type info for a given C++ type; on lookup failure can either throw or return nullptr.
+/// Return the type info for a given C++ type; on lookup failure can either throw or return
+/// nullptr.
 PYBIND11_NOINLINE detail::type_info *get_type_info(const std::type_index &tp,
-                                                          bool throw_if_missing = false) {
-    if (auto ltype = get_local_type_info(tp))
+                                                   bool throw_if_missing = false) {
+    if (auto *ltype = get_local_type_info(tp)) {
         return ltype;
-    if (auto gtype = get_global_type_info(tp))
+    }
+    if (auto *gtype = get_global_type_info(tp)) {
         return gtype;
+    }
 
     if (throw_if_missing) {
         std::string tname = tp.name();
         detail::clean_type_id(tname);
-        pybind11_fail("pybind11::detail::get_type_info: unable to find type info for \"" + tname + "\"");
+        pybind11_fail("pybind11::detail::get_type_info: unable to find type info for \""
+                      + std::move(tname) + '"');
     }
     return nullptr;
 }
@@ -218,12 +238,13 @@ PYBIND11_NOINLINE handle get_type_handle(const std::type_info &tp, bool throw_if
 
 // Searches the inheritance graph for a registered Python instance, using all_type_info().
 PYBIND11_NOINLINE handle find_registered_python_instance(void *src,
-                                                                const detail::type_info *tinfo) {
+                                                         const detail::type_info *tinfo) {
     auto it_instances = get_internals().registered_instances.equal_range(src);
     for (auto it_i = it_instances.first; it_i != it_instances.second; ++it_i) {
-        for (auto instance_type : detail::all_type_info(Py_TYPE(it_i->second))) {
-            if (instance_type && same_type(*instance_type->cpptype, *tinfo->cpptype))
+        for (auto *instance_type : detail::all_type_info(Py_TYPE(it_i->second))) {
+            if (instance_type && same_type(*instance_type->cpptype, *tinfo->cpptype)) {
                 return handle((PyObject *) it_i->second).inc_ref();
+            }
         }
     }
     return handle();
@@ -236,10 +257,10 @@ struct value_and_holder {
     void **vh = nullptr;
 
     // Main constructor for a found value/holder:
-    value_and_holder(instance *i, const detail::type_info *type, size_t vpos, size_t index) :
-        inst{i}, index{index}, type{type},
-        vh{inst->simple_layout ? inst->simple_value_holder : &inst->nonsimple.values_and_holders[vpos]}
-    {}
+    value_and_holder(instance *i, const detail::type_info *type, size_t vpos, size_t index)
+        : inst{i}, index{index}, type{type}, vh{inst->simple_layout
+                                                    ? inst->simple_value_holder
+                                                    : &inst->nonsimple.values_and_holders[vpos]} {}
 
     // Default constructor (used to signal a value-and-holder not found by get_value_and_holder())
     value_and_holder() = default;
@@ -247,13 +268,15 @@ struct value_and_holder {
     // Used for past-the-end iterator
     explicit value_and_holder(size_t index) : index{index} {}
 
-    template <typename V = void> V *&value_ptr() const {
+    template <typename V = void>
+    V *&value_ptr() const {
         return reinterpret_cast<V *&>(vh[0]);
     }
     // True if this `value_and_holder` has a non-null value pointer
     explicit operator bool() const { return value_ptr() != nullptr; }
 
-    template <typename H> H &holder() const {
+    template <typename H>
+    H &holder() const {
         return reinterpret_cast<H &>(vh[1]);
     }
     bool holder_constructed() const {
@@ -263,26 +286,28 @@ struct value_and_holder {
     }
     // NOLINTNEXTLINE(readability-make-member-function-const)
     void set_holder_constructed(bool v = true) {
-        if (inst->simple_layout)
+        if (inst->simple_layout) {
             inst->simple_holder_constructed = v;
-        else if (v)
+        } else if (v) {
             inst->nonsimple.status[index] |= instance::status_holder_constructed;
-        else
+        } else {
             inst->nonsimple.status[index] &= (std::uint8_t) ~instance::status_holder_constructed;
+        }
     }
     bool instance_registered() const {
         return inst->simple_layout
-            ? inst->simple_instance_registered
-            : ((inst->nonsimple.status[index] & instance::status_instance_registered) != 0);
+                   ? inst->simple_instance_registered
+                   : ((inst->nonsimple.status[index] & instance::status_instance_registered) != 0);
     }
     // NOLINTNEXTLINE(readability-make-member-function-const)
     void set_instance_registered(bool v = true) {
-        if (inst->simple_layout)
+        if (inst->simple_layout) {
             inst->simple_instance_registered = v;
-        else if (v)
+        } else if (v) {
             inst->nonsimple.status[index] |= instance::status_instance_registered;
-        else
+        } else {
             inst->nonsimple.status[index] &= (std::uint8_t) ~instance::status_instance_registered;
+        }
     }
 };
 
@@ -305,11 +330,10 @@ public:
         friend struct values_and_holders;
         iterator(instance *inst, const type_vec *tinfo)
             : inst{inst}, types{tinfo},
-            curr(inst /* instance */,
-                 types->empty() ? nullptr : (*types)[0] /* type info */,
-                 0, /* vpos: (non-simple types only): the first vptr comes first */
-                 0 /* index */)
-        {}
+              curr(inst /* instance */,
+                   types->empty() ? nullptr : (*types)[0] /* type info */,
+                   0, /* vpos: (non-simple types only): the first vptr comes first */
+                   0 /* index */) {}
         // Past-the-end iterator:
         explicit iterator(size_t end) : curr(end) {}
 
@@ -317,8 +341,9 @@ public:
         bool operator==(const iterator &other) const { return curr.index == other.curr.index; }
         bool operator!=(const iterator &other) const { return curr.index != other.curr.index; }
         iterator &operator++() {
-            if (!inst->simple_layout)
+            if (!inst->simple_layout) {
                 curr.vh += 1 + (*types)[curr.index]->holder_size_in_ptrs;
+            }
             ++curr.index;
             curr.type = curr.index < types->size() ? (*types)[curr.index] : nullptr;
             return *this;
@@ -332,7 +357,9 @@ public:
 
     iterator find(const type_info *find_type) {
         auto it = begin(), endit = end();
-        while (it != endit && it->type != find_type) ++it;
+        while (it != endit && it->type != find_type) {
+            ++it;
+        }
         return it;
     }
 
@@ -349,152 +376,108 @@ public:
  * The returned object should be short-lived: in particular, it must not outlive the called-upon
  * instance.
  */
-PYBIND11_NOINLINE value_and_holder instance::get_value_and_holder(const type_info *find_type /*= nullptr default in common.h*/, bool throw_if_missing /*= true in common.h*/) {
+PYBIND11_NOINLINE value_and_holder
+instance::get_value_and_holder(const type_info *find_type /*= nullptr default in common.h*/,
+                               bool throw_if_missing /*= true in common.h*/) {
     // Optimize common case:
-    if (!find_type || Py_TYPE(this) == find_type->type)
+    if (!find_type || Py_TYPE(this) == find_type->type) {
         return value_and_holder(this, find_type, 0, 0);
+    }
 
     detail::values_and_holders vhs(this);
     auto it = vhs.find(find_type);
-    if (it != vhs.end())
+    if (it != vhs.end()) {
         return *it;
+    }
 
-    if (!throw_if_missing)
+    if (!throw_if_missing) {
         return value_and_holder();
+    }
 
-#if defined(NDEBUG)
-    pybind11_fail("pybind11::detail::instance::get_value_and_holder: "
-            "type is not a pybind11 base of the given instance "
-            "(compile in debug mode for type details)");
+#if defined(PYBIND11_DETAILED_ERROR_MESSAGES)
+    pybind11_fail("pybind11::detail::instance::get_value_and_holder: `"
+                  + get_fully_qualified_tp_name(find_type->type)
+                  + "' is not a pybind11 base of the given `"
+                  + get_fully_qualified_tp_name(Py_TYPE(this)) + "' instance");
 #else
-    pybind11_fail("pybind11::detail::instance::get_value_and_holder: `" +
-            get_fully_qualified_tp_name(find_type->type) + "' is not a pybind11 base of the given `" +
-            get_fully_qualified_tp_name(Py_TYPE(this)) + "' instance");
+    pybind11_fail(
+        "pybind11::detail::instance::get_value_and_holder: "
+        "type is not a pybind11 base of the given instance "
+        "(#define PYBIND11_DETAILED_ERROR_MESSAGES or compile in debug mode for type details)");
 #endif
 }
 
 PYBIND11_NOINLINE void instance::allocate_layout() {
-    auto &tinfo = all_type_info(Py_TYPE(this));
+    const auto &tinfo = all_type_info(Py_TYPE(this));
 
     const size_t n_types = tinfo.size();
 
-    if (n_types == 0)
-        pybind11_fail("instance allocation failed: new instance has no pybind11-registered base types");
+    if (n_types == 0) {
+        pybind11_fail(
+            "instance allocation failed: new instance has no pybind11-registered base types");
+    }
 
-    simple_layout =
-        n_types == 1 && tinfo.front()->holder_size_in_ptrs <= instance_simple_holder_in_ptrs();
+    simple_layout
+        = n_types == 1 && tinfo.front()->holder_size_in_ptrs <= instance_simple_holder_in_ptrs();
 
     // Simple path: no python-side multiple inheritance, and a small-enough holder
     if (simple_layout) {
         simple_value_holder[0] = nullptr;
         simple_holder_constructed = false;
         simple_instance_registered = false;
-    }
-    else { // multiple base types or a too-large holder
+    } else { // multiple base types or a too-large holder
         // Allocate space to hold: [v1*][h1][v2*][h2]...[bb...] where [vN*] is a value pointer,
         // [hN] is the (uninitialized) holder instance for value N, and [bb...] is a set of bool
         // values that tracks whether each associated holder has been initialized.  Each [block] is
         // padded, if necessary, to an integer multiple of sizeof(void *).
         size_t space = 0;
-        for (auto t : tinfo) {
-            space += 1; // value pointer
+        for (auto *t : tinfo) {
+            space += 1;                      // value pointer
             space += t->holder_size_in_ptrs; // holder instance
         }
         size_t flags_at = space;
-        space += size_in_ptrs(n_types); // status bytes (holder_constructed and instance_registered)
+        space += size_in_ptrs(n_types); // status bytes (holder_constructed and
+                                        // instance_registered)
 
         // Allocate space for flags, values, and holders, and initialize it to 0 (flags and values,
-        // in particular, need to be 0).  Use Python's memory allocation functions: in Python 3.6
-        // they default to using pymalloc, which is designed to be efficient for small allocations
-        // like the one we're doing here; in earlier versions (and for larger allocations) they are
-        // just wrappers around malloc.
-#if PY_VERSION_HEX >= 0x03050000
+        // in particular, need to be 0).  Use Python's memory allocation
+        // functions: Python is using pymalloc, which is designed to be
+        // efficient for small allocations like the one we're doing here;
+        // for larger allocations they are just wrappers around malloc.
+        // TODO: is this still true for pure Python 3.6?
         nonsimple.values_and_holders = (void **) PyMem_Calloc(space, sizeof(void *));
-        if (!nonsimple.values_and_holders) throw std::bad_alloc();
-#else
-        nonsimple.values_and_holders = (void **) PyMem_New(void *, space);
-        if (!nonsimple.values_and_holders) throw std::bad_alloc();
-        std::memset(nonsimple.values_and_holders, 0, space * sizeof(void *));
-#endif
-        nonsimple.status = reinterpret_cast<std::uint8_t *>(&nonsimple.values_and_holders[flags_at]);
+        if (!nonsimple.values_and_holders) {
+            throw std::bad_alloc();
+        }
+        nonsimple.status
+            = reinterpret_cast<std::uint8_t *>(&nonsimple.values_and_holders[flags_at]);
     }
     owned = true;
 }
 
 // NOLINTNEXTLINE(readability-make-member-function-const)
 PYBIND11_NOINLINE void instance::deallocate_layout() {
-    if (!simple_layout)
+    if (!simple_layout) {
         PyMem_Free(nonsimple.values_and_holders);
+    }
 }
 
 PYBIND11_NOINLINE bool isinstance_generic(handle obj, const std::type_info &tp) {
     handle type = detail::get_type_handle(tp, false);
-    if (!type)
+    if (!type) {
         return false;
+    }
     return isinstance(obj, type);
 }
 
-PYBIND11_NOINLINE std::string error_string() {
-    if (!PyErr_Occurred()) {
-        PyErr_SetString(PyExc_RuntimeError, "Unknown internal error occurred");
-        return "Unknown internal error occurred";
-    }
-
-    error_scope scope; // Preserve error state
-
-    std::string errorString;
-    if (scope.type) {
-        errorString += handle(scope.type).attr("__name__").cast<std::string>();
-        errorString += ": ";
-    }
-    if (scope.value)
-        errorString += (std::string) str(scope.value);
-
-    PyErr_NormalizeException(&scope.type, &scope.value, &scope.trace);
-
-#if PY_MAJOR_VERSION >= 3
-    if (scope.trace != nullptr)
-        PyException_SetTraceback(scope.value, scope.trace);
-#endif
-
-#if !defined(PYPY_VERSION)
-    if (scope.trace) {
-        auto *trace = (PyTracebackObject *) scope.trace;
-
-        /* Get the deepest trace possible */
-        while (trace->tb_next)
-            trace = trace->tb_next;
-
-        PyFrameObject *frame = trace->tb_frame;
-        errorString += "\n\nAt:\n";
-        while (frame) {
-#if PY_VERSION_HEX >= 0x03090000
-            PyCodeObject *f_code = PyFrame_GetCode(frame);
-#else
-            PyCodeObject *f_code = frame->f_code;
-            Py_INCREF(f_code);
-#endif
-            int lineno = PyFrame_GetLineNumber(frame);
-            errorString +=
-                "  " + handle(f_code->co_filename).cast<std::string>() +
-                "(" + std::to_string(lineno) + "): " +
-                handle(f_code->co_name).cast<std::string>() + "\n";
-            frame = frame->f_back;
-            Py_DECREF(f_code);
-        }
-    }
-#endif
-
-    return errorString;
-}
-
-PYBIND11_NOINLINE handle get_object_handle(const void *ptr, const detail::type_info *type ) {
+PYBIND11_NOINLINE handle get_object_handle(const void *ptr, const detail::type_info *type) {
     auto &instances = get_internals().registered_instances;
     auto range = instances.equal_range(ptr);
     for (auto it = range.first; it != range.second; ++it) {
         for (const auto &vh : values_and_holders(it->second)) {
-            if (vh.type == type)
+            if (vh.type == type) {
                 return handle((PyObject *) it->second);
+            }
         }
     }
     return handle();
@@ -503,12 +486,6 @@ PYBIND11_NOINLINE handle get_object_handle(const void *ptr, const detail::type_i
 inline PyThreadState *get_thread_state_unchecked() {
 #if defined(PYPY_VERSION)
     return PyThreadState_GET();
-#elif PY_VERSION_HEX < 0x03000000
-    return _PyThreadState_Current;
-#elif PY_VERSION_HEX < 0x03050000
-    return (PyThreadState*) _Py_atomic_load_relaxed(&_PyThreadState_Current);
-#elif PY_VERSION_HEX < 0x03050200
-    return (PyThreadState*) _PyThreadState_Current.value;
 #else
     return _PyThreadState_UncheckedGet();
 #endif
@@ -526,27 +503,30 @@ public:
     explicit type_caster_generic(const type_info *typeinfo)
         : typeinfo(typeinfo), cpptype(typeinfo ? typeinfo->cpptype : nullptr) {}
 
-    bool load(handle src, bool convert) {
-        return load_impl<type_caster_generic>(src, convert);
-    }
+    bool load(handle src, bool convert) { return load_impl<type_caster_generic>(src, convert); }
 
-    PYBIND11_NOINLINE static handle cast(const void *_src, return_value_policy policy, handle parent,
+    PYBIND11_NOINLINE static handle cast(const void *_src,
+                                         return_value_policy policy,
+                                         handle parent,
                                          const detail::type_info *tinfo,
                                          void *(*copy_constructor)(const void *),
                                          void *(*move_constructor)(const void *),
                                          const void *existing_holder = nullptr) {
-        if (!tinfo) // no type info: error will be set already
+        if (!tinfo) { // no type info: error will be set already
             return handle();
+        }
 
         void *src = const_cast<void *>(_src);
-        if (src == nullptr)
+        if (src == nullptr) {
             return none().release();
+        }
 
-        if (handle registered_inst = find_registered_python_instance(src, tinfo))
+        if (handle registered_inst = find_registered_python_instance(src, tinfo)) {
             return registered_inst;
+        }
 
         auto inst = reinterpret_steal<object>(make_new_instance(tinfo->type));
-        auto wrapper = reinterpret_cast<instance *>(inst.ptr());
+        auto *wrapper = reinterpret_cast<instance *>(inst.ptr());
         wrapper->owned = false;
         void *&valueptr = values_and_holders(wrapper).begin()->value_ptr();
 
@@ -564,37 +544,39 @@ public:
                 break;
 
             case return_value_policy::copy:
-                if (copy_constructor)
+                if (copy_constructor) {
                     valueptr = copy_constructor(src);
-                else {
-#if defined(NDEBUG)
-                    throw cast_error("return_value_policy = copy, but type is "
-                                     "non-copyable! (compile in debug mode for details)");
-#else
+                } else {
+#if defined(PYBIND11_DETAILED_ERROR_MESSAGES)
                     std::string type_name(tinfo->cpptype->name());
                     detail::clean_type_id(type_name);
-                    throw cast_error("return_value_policy = copy, but type " +
-                                     type_name + " is non-copyable!");
+                    throw cast_error("return_value_policy = copy, but type " + type_name
+                                     + " is non-copyable!");
+#else
+                    throw cast_error("return_value_policy = copy, but type is "
+                                     "non-copyable! (#define PYBIND11_DETAILED_ERROR_MESSAGES or "
+                                     "compile in debug mode for details)");
 #endif
                 }
                 wrapper->owned = true;
                 break;
 
             case return_value_policy::move:
-                if (move_constructor)
+                if (move_constructor) {
                     valueptr = move_constructor(src);
-                else if (copy_constructor)
+                } else if (copy_constructor) {
                     valueptr = copy_constructor(src);
-                else {
-#if defined(NDEBUG)
-                    throw cast_error("return_value_policy = move, but type is neither "
-                                     "movable nor copyable! "
-                                     "(compile in debug mode for details)");
-#else
+                } else {
+#if defined(PYBIND11_DETAILED_ERROR_MESSAGES)
                     std::string type_name(tinfo->cpptype->name());
                     detail::clean_type_id(type_name);
-                    throw cast_error("return_value_policy = move, but type " +
-                                     type_name + " is neither movable nor copyable!");
+                    throw cast_error("return_value_policy = move, but type " + type_name
+                                     + " is neither movable nor copyable!");
+#else
+                    throw cast_error("return_value_policy = move, but type is neither "
+                                     "movable nor copyable! "
+                                     "(#define PYBIND11_DETAILED_ERROR_MESSAGES or compile in "
+                                     "debug mode for details)");
 #endif
                 }
                 wrapper->owned = true;
@@ -620,23 +602,25 @@ public:
         auto *&vptr = v_h.value_ptr();
         // Lazy allocation for unallocated values:
         if (vptr == nullptr) {
-            auto *type = v_h.type ? v_h.type : typeinfo;
+            const auto *type = v_h.type ? v_h.type : typeinfo;
             if (type->operator_new) {
                 vptr = type->operator_new(type->type_size);
             } else {
-                #if defined(__cpp_aligned_new) && (!defined(_MSC_VER) || _MSC_VER >= 1912)
-                    if (type->type_align > __STDCPP_DEFAULT_NEW_ALIGNMENT__)
-                        vptr = ::operator new(type->type_size,
-                                              std::align_val_t(type->type_align));
-                    else
-                #endif
+#if defined(__cpp_aligned_new) && (!defined(_MSC_VER) || _MSC_VER >= 1912)
+                if (type->type_align > __STDCPP_DEFAULT_NEW_ALIGNMENT__) {
+                    vptr = ::operator new(type->type_size, std::align_val_t(type->type_align));
+                } else {
+                    vptr = ::operator new(type->type_size);
+                }
+#else
                 vptr = ::operator new(type->type_size);
+#endif
             }
         }
         value = vptr;
     }
     bool try_implicit_casts(handle src, bool convert) {
-        for (auto &cast : typeinfo->implicit_casts) {
+        for (const auto &cast : typeinfo->implicit_casts) {
             type_caster_generic sub_caster(*cast.first);
             if (sub_caster.load(src, convert)) {
                 value = cast.second(sub_caster.value);
@@ -647,8 +631,9 @@ public:
     }
     bool try_direct_conversions(handle src) {
         for (auto &converter : *typeinfo->direct_conversions) {
-            if (converter(src.ptr(), value))
+            if (converter(src.ptr(), value)) {
                 return true;
+            }
         }
         return false;
     }
@@ -656,8 +641,9 @@ public:
 
     PYBIND11_NOINLINE static void *local_load(PyObject *src, const type_info *ti) {
         auto caster = type_caster_generic(ti);
-        if (caster.load(src, false))
+        if (caster.load(src, false)) {
             return caster.value;
+        }
         return nullptr;
     }
 
@@ -666,16 +652,19 @@ public:
     PYBIND11_NOINLINE bool try_load_foreign_module_local(handle src) {
         constexpr auto *local_key = PYBIND11_MODULE_LOCAL_ID;
         const auto pytype = type::handle_of(src);
-        if (!hasattr(pytype, local_key))
+        if (!hasattr(pytype, local_key)) {
             return false;
+        }
 
         type_info *foreign_typeinfo = reinterpret_borrow<capsule>(getattr(pytype, local_key));
-        // Only consider this foreign loader if actually foreign and is a loader of the correct cpp type
+        // Only consider this foreign loader if actually foreign and is a loader of the correct cpp
+        // type
         if (foreign_typeinfo->module_local_load == &local_load
-            || (cpptype && !same_type(*cpptype, *foreign_typeinfo->cpptype)))
+            || (cpptype && !same_type(*cpptype, *foreign_typeinfo->cpptype))) {
             return false;
+        }
 
-        if (auto result = foreign_typeinfo->module_local_load(src.ptr(), foreign_typeinfo)) {
+        if (auto *result = foreign_typeinfo->module_local_load(src.ptr(), foreign_typeinfo)) {
             value = result;
             return true;
         }
@@ -687,8 +676,12 @@ public:
     // logic (without having to resort to virtual inheritance).
     template <typename ThisT>
     PYBIND11_NOINLINE bool load_impl(handle src, bool convert) {
-        if (!src) return false;
-        if (!typeinfo) return try_load_foreign_module_local(src);
+        if (!src) {
+            return false;
+        }
+        if (!typeinfo) {
+            return try_load_foreign_module_local(src);
+        }
 
         auto &this_ = static_cast<ThisT &>(*this);
         this_.check_holder_compat();
@@ -703,7 +696,7 @@ public:
         }
         // Case 2: We have a derived class
         if (PyType_IsSubtype(srctype, typeinfo->type)) {
-            auto &bases = all_type_info(srctype);
+            const auto &bases = all_type_info(srctype);
             bool no_cpp_mi = typeinfo->simple_type;
 
             // Case 2a: the python type is a Python-inherited derived class that inherits from just
@@ -716,41 +709,45 @@ public:
                 this_.load_value(reinterpret_cast<instance *>(src.ptr())->get_value_and_holder());
                 return true;
             }
-            // Case 2b: the python type inherits from multiple C++ bases.  Check the bases to see if
-            // we can find an exact match (or, for a simple C++ type, an inherited match); if so, we
-            // can safely reinterpret_cast to the relevant pointer.
+            // Case 2b: the python type inherits from multiple C++ bases.  Check the bases to see
+            // if we can find an exact match (or, for a simple C++ type, an inherited match); if
+            // so, we can safely reinterpret_cast to the relevant pointer.
             if (bases.size() > 1) {
-                for (auto base : bases) {
-                    if (no_cpp_mi ? PyType_IsSubtype(base->type, typeinfo->type) : base->type == typeinfo->type) {
-                        this_.load_value(reinterpret_cast<instance *>(src.ptr())->get_value_and_holder(base));
+                for (auto *base : bases) {
+                    if (no_cpp_mi ? PyType_IsSubtype(base->type, typeinfo->type)
+                                  : base->type == typeinfo->type) {
+                        this_.load_value(
+                            reinterpret_cast<instance *>(src.ptr())->get_value_and_holder(base));
                         return true;
                     }
                 }
             }
 
-            // Case 2c: C++ multiple inheritance is involved and we couldn't find an exact type match
-            // in the registered bases, above, so try implicit casting (needed for proper C++ casting
-            // when MI is involved).
-            if (this_.try_implicit_casts(src, convert))
+            // Case 2c: C++ multiple inheritance is involved and we couldn't find an exact type
+            // match in the registered bases, above, so try implicit casting (needed for proper C++
+            // casting when MI is involved).
+            if (this_.try_implicit_casts(src, convert)) {
                 return true;
+            }
         }
 
         // Perform an implicit conversion
         if (convert) {
-            for (auto &converter : typeinfo->implicit_conversions) {
+            for (const auto &converter : typeinfo->implicit_conversions) {
                 auto temp = reinterpret_steal<object>(converter(src.ptr(), typeinfo->type));
                 if (load_impl<ThisT>(temp, false)) {
                     loader_life_support::add_patient(temp);
                     return true;
                 }
             }
-            if (this_.try_direct_conversions(src))
+            if (this_.try_direct_conversions(src)) {
                 return true;
+            }
         }
 
         // Failed to match local typeinfo. Try again with global.
         if (typeinfo->module_local) {
-            if (auto gtype = get_global_type_info(*typeinfo->cpptype)) {
+            if (auto *gtype = get_global_type_info(*typeinfo->cpptype)) {
                 typeinfo = gtype;
                 return load(src, false);
             }
@@ -758,28 +755,32 @@ public:
 
         // Global typeinfo has precedence over foreign module_local
         if (try_load_foreign_module_local(src)) {
-           return true;
+            return true;
         }
 
         // Custom converters didn't take None, now we convert None to nullptr.
         if (src.is_none()) {
-           // Defer accepting None to other overloads (if we aren't in convert mode):
-           if (!convert) return false;
-           value = nullptr;
-           return true;
+            // Defer accepting None to other overloads (if we aren't in convert mode):
+            if (!convert) {
+                return false;
+            }
+            value = nullptr;
+            return true;
         }
 
         return false;
     }
 
-
     // Called to do type lookup and wrap the pointer and type in a pair when a dynamic_cast
     // isn't needed or can't be used.  If the type is unknown, sets the error and returns a pair
     // with .second = nullptr.  (p.first = nullptr is not an error: it becomes None).
-    PYBIND11_NOINLINE static std::pair<const void *, const type_info *> src_and_type(
-            const void *src, const std::type_info &cast_type, const std::type_info *rtti_type = nullptr) {
-        if (auto *tpi = get_type_info(cast_type))
+    PYBIND11_NOINLINE static std::pair<const void *, const type_info *>
+    src_and_type(const void *src,
+                 const std::type_info &cast_type,
+                 const std::type_info *rtti_type = nullptr) {
+        if (auto *tpi = get_type_info(cast_type)) {
             return {src, const_cast<const type_info *>(tpi)};
+        }
 
         // Not found, set error:
         std::string tname = rtti_type ? rtti_type->name() : cast_type.name();
@@ -802,10 +803,9 @@ public:
  * `movable_cast_op_type` instead.
  */
 template <typename T>
-using cast_op_type =
-    conditional_t<std::is_pointer<remove_reference_t<T>>::value,
-        typename std::add_pointer<intrinsic_t<T>>::type,
-        typename std::add_lvalue_reference<intrinsic_t<T>>::type>;
+using cast_op_type = conditional_t<std::is_pointer<remove_reference_t<T>>::value,
+                                   typename std::add_pointer<intrinsic_t<T>>::type,
+                                   typename std::add_lvalue_reference<intrinsic_t<T>>::type>;
 
 /**
  * Determine suitable casting operator for a type caster with a movable value.  Such a type caster
@@ -815,40 +815,50 @@ using cast_op_type =
  * These operator are automatically provided when using the PYBIND11_TYPE_CASTER macro.
  */
 template <typename T>
-using movable_cast_op_type =
-    conditional_t<std::is_pointer<typename std::remove_reference<T>::type>::value,
-        typename std::add_pointer<intrinsic_t<T>>::type,
-    conditional_t<std::is_rvalue_reference<T>::value,
-        typename std::add_rvalue_reference<intrinsic_t<T>>::type,
-        typename std::add_lvalue_reference<intrinsic_t<T>>::type>>;
+using movable_cast_op_type
+    = conditional_t<std::is_pointer<typename std::remove_reference<T>::type>::value,
+                    typename std::add_pointer<intrinsic_t<T>>::type,
+                    conditional_t<std::is_rvalue_reference<T>::value,
+                                  typename std::add_rvalue_reference<intrinsic_t<T>>::type,
+                                  typename std::add_lvalue_reference<intrinsic_t<T>>::type>>;
 
 // std::is_copy_constructible isn't quite enough: it lets std::vector<T> (and similar) through when
 // T is non-copyable, but code containing such a copy constructor fails to actually compile.
-template <typename T, typename SFINAE = void> struct is_copy_constructible : std::is_copy_constructible<T> {};
+template <typename T, typename SFINAE = void>
+struct is_copy_constructible : std::is_copy_constructible<T> {};
 
 // Specialization for types that appear to be copy constructible but also look like stl containers
 // (we specifically check for: has `value_type` and `reference` with `reference = value_type&`): if
 // so, copy constructability depends on whether the value_type is copy constructible.
-template <typename Container> struct is_copy_constructible<Container, enable_if_t<all_of<
-        std::is_copy_constructible<Container>,
-        std::is_same<typename Container::value_type &, typename Container::reference>,
-        // Avoid infinite recursion
-        negation<std::is_same<Container, typename Container::value_type>>
-    >::value>> : is_copy_constructible<typename Container::value_type> {};
+template <typename Container>
+struct is_copy_constructible<
+    Container,
+    enable_if_t<
+        all_of<std::is_copy_constructible<Container>,
+               std::is_same<typename Container::value_type &, typename Container::reference>,
+               // Avoid infinite recursion
+               negation<std::is_same<Container, typename Container::value_type>>>::value>>
+    : is_copy_constructible<typename Container::value_type> {};
 
 // Likewise for std::pair
-// (after C++17 it is mandatory that the copy constructor not exist when the two types aren't themselves
-// copy constructible, but this can not be relied upon when T1 or T2 are themselves containers).
-template <typename T1, typename T2> struct is_copy_constructible<std::pair<T1, T2>>
+// (after C++17 it is mandatory that the copy constructor not exist when the two types aren't
+// themselves copy constructible, but this can not be relied upon when T1 or T2 are themselves
+// containers).
+template <typename T1, typename T2>
+struct is_copy_constructible<std::pair<T1, T2>>
     : all_of<is_copy_constructible<T1>, is_copy_constructible<T2>> {};
 
 // The same problems arise with std::is_copy_assignable, so we use the same workaround.
-template <typename T, typename SFINAE = void> struct is_copy_assignable : std::is_copy_assignable<T> {};
-template <typename Container> struct is_copy_assignable<Container, enable_if_t<all_of<
-        std::is_copy_assignable<Container>,
-        std::is_same<typename Container::value_type &, typename Container::reference>
-    >::value>> : is_copy_assignable<typename Container::value_type> {};
-template <typename T1, typename T2> struct is_copy_assignable<std::pair<T1, T2>>
+template <typename T, typename SFINAE = void>
+struct is_copy_assignable : std::is_copy_assignable<T> {};
+template <typename Container>
+struct is_copy_assignable<Container,
+                          enable_if_t<all_of<std::is_copy_assignable<Container>,
+                                             std::is_same<typename Container::value_type &,
+                                                          typename Container::reference>>::value>>
+    : is_copy_assignable<typename Container::value_type> {};
+template <typename T1, typename T2>
+struct is_copy_assignable<std::pair<T1, T2>>
     : all_of<is_copy_assignable<T1>, is_copy_assignable<T2>> {};
 
 PYBIND11_NAMESPACE_END(detail)
@@ -875,16 +885,14 @@ PYBIND11_NAMESPACE_END(detail)
 // std::enable_if. User provided specializations will always have higher priority than
 // the default implementation and specialization provided in polymorphic_type_hook_base.
 template <typename itype, typename SFINAE = void>
-struct polymorphic_type_hook_base
-{
-    static const void *get(const itype *src, const std::type_info*&) { return src; }
+struct polymorphic_type_hook_base {
+    static const void *get(const itype *src, const std::type_info *&) { return src; }
 };
 template <typename itype>
-struct polymorphic_type_hook_base<itype, detail::enable_if_t<std::is_polymorphic<itype>::value>>
-{
-    static const void *get(const itype *src, const std::type_info*& type) {
+struct polymorphic_type_hook_base<itype, detail::enable_if_t<std::is_polymorphic<itype>::value>> {
+    static const void *get(const itype *src, const std::type_info *&type) {
         type = src ? &typeid(*src) : nullptr;
-        return dynamic_cast<const void*>(src);
+        return dynamic_cast<const void *>(src);
     }
 };
 template <typename itype, typename SFINAE = void>
@@ -893,18 +901,21 @@ struct polymorphic_type_hook : public polymorphic_type_hook_base<itype> {};
 PYBIND11_NAMESPACE_BEGIN(detail)
 
 /// Generic type caster for objects stored on the heap
-template <typename type> class type_caster_base : public type_caster_generic {
+template <typename type>
+class type_caster_base : public type_caster_generic {
     using itype = intrinsic_t<type>;
 
 public:
-    static constexpr auto name = _<type>();
+    static constexpr auto name = const_name<type>();
 
-    type_caster_base() : type_caster_base(typeid(type)) { }
-    explicit type_caster_base(const std::type_info &info) : type_caster_generic(info) { }
+    type_caster_base() : type_caster_base(typeid(type)) {}
+    explicit type_caster_base(const std::type_info &info) : type_caster_generic(info) {}
 
     static handle cast(const itype &src, return_value_policy policy, handle parent) {
-        if (policy == return_value_policy::automatic || policy == return_value_policy::automatic_reference)
+        if (policy == return_value_policy::automatic
+            || policy == return_value_policy::automatic_reference) {
             policy = return_value_policy::copy;
+        }
         return cast(&src, policy, parent);
     }
 
@@ -916,7 +927,7 @@ public:
     // polymorphic type (using RTTI by default, but can be overridden by specializing
     // polymorphic_type_hook). If the instance isn't derived, returns the base version.
     static std::pair<const void *, const type_info *> src_and_type(const itype *src) {
-        auto &cast_type = typeid(itype);
+        const auto &cast_type = typeid(itype);
         const std::type_info *instance_type = nullptr;
         const void *vsrc = polymorphic_type_hook<itype>::get(src, instance_type);
         if (instance_type && !same_type(cast_type, *instance_type)) {
@@ -928,50 +939,64 @@ public:
             // except via a user-provided specialization of polymorphic_type_hook,
             // and the user has promised that no this-pointer adjustment is
             // required in that case, so it's OK to use static_cast.
-            if (const auto *tpi = get_type_info(*instance_type))
+            if (const auto *tpi = get_type_info(*instance_type)) {
                 return {vsrc, tpi};
+            }
         }
-        // Otherwise we have either a nullptr, an `itype` pointer, or an unknown derived pointer, so
-        // don't do a cast
+        // Otherwise we have either a nullptr, an `itype` pointer, or an unknown derived pointer,
+        // so don't do a cast
         return type_caster_generic::src_and_type(src, cast_type, instance_type);
     }
 
     static handle cast(const itype *src, return_value_policy policy, handle parent) {
         auto st = src_and_type(src);
-        return type_caster_generic::cast(
-            st.first, policy, parent, st.second,
-            make_copy_constructor(src), make_move_constructor(src));
+        return type_caster_generic::cast(st.first,
+                                         policy,
+                                         parent,
+                                         st.second,
+                                         make_copy_constructor(src),
+                                         make_move_constructor(src));
     }
 
     static handle cast_holder(const itype *src, const void *holder) {
         auto st = src_and_type(src);
-        return type_caster_generic::cast(
-            st.first, return_value_policy::take_ownership, {}, st.second,
-            nullptr, nullptr, holder);
+        return type_caster_generic::cast(st.first,
+                                         return_value_policy::take_ownership,
+                                         {},
+                                         st.second,
+                                         nullptr,
+                                         nullptr,
+                                         holder);
     }
 
-    template <typename T> using cast_op_type = detail::cast_op_type<T>;
+    template <typename T>
+    using cast_op_type = detail::cast_op_type<T>;
 
     // NOLINTNEXTLINE(google-explicit-constructor)
-    operator itype*() { return (type *) value; }
+    operator itype *() { return (type *) value; }
     // NOLINTNEXTLINE(google-explicit-constructor)
-    operator itype&() { if (!value) throw reference_cast_error(); return *((itype *) value); }
+    operator itype &() {
+        if (!value) {
+            throw reference_cast_error();
+        }
+        return *((itype *) value);
+    }
 
 protected:
-    using Constructor = void *(*)(const void *);
+    using Constructor = void *(*) (const void *);
 
     /* Only enabled when the types are {copy,move}-constructible *and* when the type
-       does not have a private operator new implementation. A comma operator is used in the decltype
-       argument to apply SFINAE to the public copy/move constructors.*/
+       does not have a private operator new implementation. A comma operator is used in the
+       decltype argument to apply SFINAE to the public copy/move constructors.*/
     template <typename T, typename = enable_if_t<is_copy_constructible<T>::value>>
-    static auto make_copy_constructor(const T *) -> decltype(new T(std::declval<const T>()), Constructor{}) {
-        return [](const void *arg) -> void * {
-            return new T(*reinterpret_cast<const T *>(arg));
-        };
+    static auto make_copy_constructor(const T *)
+        -> decltype(new T(std::declval<const T>()), Constructor{}) {
+        return [](const void *arg) -> void * { return new T(*reinterpret_cast<const T *>(arg)); };
     }
 
     template <typename T, typename = enable_if_t<std::is_move_constructible<T>::value>>
-    static auto make_move_constructor(const T *) -> decltype(new T(std::declval<T&&>()), Constructor{}) {
+    static auto make_move_constructor(const T *)
+        -> decltype(new T(std::declval<T &&>()), Constructor{}) {
         return [](const void *arg) -> void * {
             return new T(std::move(*const_cast<T *>(reinterpret_cast<const T *>(arg))));
         };
@@ -981,5 +1006,14 @@ protected:
     static Constructor make_move_constructor(...) { return nullptr; }
 };
 
+PYBIND11_NOINLINE std::string type_info_description(const std::type_info &ti) {
+    if (auto *type_data = get_type_info(ti)) {
+        handle th((PyObject *) type_data->type);
+        return th.attr("__module__").cast<std::string>() + '.'
+               + th.attr("__qualname__").cast<std::string>();
+    }
+    return clean_type_id(ti.name());
+}
+
 PYBIND11_NAMESPACE_END(detail)
 PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE)
diff --git a/ext/pybind11/include/pybind11/detail/typeid.h b/ext/pybind11/include/pybind11/detail/typeid.h
index 39ba8ce0f7..a67b52135b 100644
--- a/ext/pybind11/include/pybind11/detail/typeid.h
+++ b/ext/pybind11/include/pybind11/detail/typeid.h
@@ -13,18 +13,21 @@
 #include <cstdlib>
 
 #if defined(__GNUG__)
-#include <cxxabi.h>
+#    include <cxxabi.h>
 #endif
 
 #include "common.h"
 
 PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
 PYBIND11_NAMESPACE_BEGIN(detail)
+
 /// Erase all occurrences of a substring
 inline void erase_all(std::string &string, const std::string &search) {
     for (size_t pos = 0;;) {
         pos = string.find(search, pos);
-        if (pos == std::string::npos) break;
+        if (pos == std::string::npos) {
+            break;
+        }
         string.erase(pos, search.length());
     }
 }
@@ -32,10 +35,11 @@ inline void erase_all(std::string &string, const std::string &search) {
 PYBIND11_NOINLINE void clean_type_id(std::string &name) {
 #if defined(__GNUG__)
     int status = 0;
-    std::unique_ptr<char, void (*)(void *)> res {
-        abi::__cxa_demangle(name.c_str(), nullptr, nullptr, &status), std::free };
-    if (status == 0)
+    std::unique_ptr<char, void (*)(void *)> res{
+        abi::__cxa_demangle(name.c_str(), nullptr, nullptr, &status), std::free};
+    if (status == 0) {
         name = res.get();
+    }
 #else
     detail::erase_all(name, "class ");
     detail::erase_all(name, "struct ");
@@ -43,13 +47,19 @@ PYBIND11_NOINLINE void clean_type_id(std::string &name) {
 #endif
     detail::erase_all(name, "pybind11::");
 }
-PYBIND11_NAMESPACE_END(detail)
 
-/// Return a string representation of a C++ type
-template <typename T> static std::string type_id() {
-    std::string name(typeid(T).name());
+inline std::string clean_type_id(const char *typeid_name) {
+    std::string name(typeid_name);
     detail::clean_type_id(name);
     return name;
 }
 
+PYBIND11_NAMESPACE_END(detail)
+
+/// Return a string representation of a C++ type
+template <typename T>
+static std::string type_id() {
+    return detail::clean_type_id(typeid(T).name());
+}
+
 PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE)
diff --git a/ext/pybind11/include/pybind11/eigen.h b/ext/pybind11/include/pybind11/eigen.h
index 97b1d96b09..273b9c9308 100644
--- a/ext/pybind11/include/pybind11/eigen.h
+++ b/ext/pybind11/include/pybind11/eigen.h
@@ -9,598 +9,4 @@
 
 #pragma once
 
-/* HINT: To suppress warnings originating from the Eigen headers, use -isystem.
-   See also:
-       https://stackoverflow.com/questions/2579576/i-dir-vs-isystem-dir
-       https://stackoverflow.com/questions/1741816/isystem-for-ms-visual-studio-c-compiler
-*/
-
-#include "numpy.h"
-
-// The C4127 suppression was introduced for Eigen 3.4.0. In theory we could
-// make it version specific, or even remove it later, but considering that
-// 1. C4127 is generally far more distracting than useful for modern template code, and
-// 2. we definitely want to ignore any MSVC warnings originating from Eigen code,
-// it is probably best to keep this around indefinitely.
-#if defined(_MSC_VER)
-#  pragma warning(push)
-#  pragma warning(disable: 4127) // C4127: conditional expression is constant
-#endif
-
-#include <Eigen/Core>
-#include <Eigen/SparseCore>
-
-#if defined(_MSC_VER)
-#  pragma warning(pop)
-#endif
-
-// Eigen prior to 3.2.7 doesn't have proper move constructors--but worse, some classes get implicit
-// move constructors that break things.  We could detect this an explicitly copy, but an extra copy
-// of matrices seems highly undesirable.
-static_assert(EIGEN_VERSION_AT_LEAST(3,2,7), "Eigen support in pybind11 requires Eigen >= 3.2.7");
-
-PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
-
-// Provide a convenience alias for easier pass-by-ref usage with fully dynamic strides:
-using EigenDStride = Eigen::Stride<Eigen::Dynamic, Eigen::Dynamic>;
-template <typename MatrixType> using EigenDRef = Eigen::Ref<MatrixType, 0, EigenDStride>;
-template <typename MatrixType> using EigenDMap = Eigen::Map<MatrixType, 0, EigenDStride>;
-
-PYBIND11_NAMESPACE_BEGIN(detail)
-
-#if EIGEN_VERSION_AT_LEAST(3,3,0)
-using EigenIndex = Eigen::Index;
-#else
-using EigenIndex = EIGEN_DEFAULT_DENSE_INDEX_TYPE;
-#endif
-
-// Matches Eigen::Map, Eigen::Ref, blocks, etc:
-template <typename T> using is_eigen_dense_map = all_of<is_template_base_of<Eigen::DenseBase, T>, std::is_base_of<Eigen::MapBase<T, Eigen::ReadOnlyAccessors>, T>>;
-template <typename T> using is_eigen_mutable_map = std::is_base_of<Eigen::MapBase<T, Eigen::WriteAccessors>, T>;
-template <typename T> using is_eigen_dense_plain = all_of<negation<is_eigen_dense_map<T>>, is_template_base_of<Eigen::PlainObjectBase, T>>;
-template <typename T> using is_eigen_sparse = is_template_base_of<Eigen::SparseMatrixBase, T>;
-// Test for objects inheriting from EigenBase<Derived> that aren't captured by the above.  This
-// basically covers anything that can be assigned to a dense matrix but that don't have a typical
-// matrix data layout that can be copied from their .data().  For example, DiagonalMatrix and
-// SelfAdjointView fall into this category.
-template <typename T> using is_eigen_other = all_of<
-    is_template_base_of<Eigen::EigenBase, T>,
-    negation<any_of<is_eigen_dense_map<T>, is_eigen_dense_plain<T>, is_eigen_sparse<T>>>
->;
-
-// Captures numpy/eigen conformability status (returned by EigenProps::conformable()):
-template <bool EigenRowMajor> struct EigenConformable {
-    bool conformable = false;
-    EigenIndex rows = 0, cols = 0;
-    EigenDStride stride{0, 0};      // Only valid if negativestrides is false!
-    bool negativestrides = false;   // If true, do not use stride!
-
-    // NOLINTNEXTLINE(google-explicit-constructor)
-    EigenConformable(bool fits = false) : conformable{fits} {}
-    // Matrix type:
-    EigenConformable(EigenIndex r, EigenIndex c,
-            EigenIndex rstride, EigenIndex cstride) :
-        conformable{true}, rows{r}, cols{c} {
-        // TODO: when Eigen bug #747 is fixed, remove the tests for non-negativity. http://eigen.tuxfamily.org/bz/show_bug.cgi?id=747
-        if (rstride < 0 || cstride < 0) {
-            negativestrides = true;
-        } else {
-            stride = {EigenRowMajor ? rstride : cstride /* outer stride */,
-                      EigenRowMajor ? cstride : rstride /* inner stride */ };
-        }
-    }
-    // Vector type:
-    EigenConformable(EigenIndex r, EigenIndex c, EigenIndex stride)
-        : EigenConformable(r, c, r == 1 ? c*stride : stride, c == 1 ? r : r*stride) {}
-
-    template <typename props> bool stride_compatible() const {
-        // To have compatible strides, we need (on both dimensions) one of fully dynamic strides,
-        // matching strides, or a dimension size of 1 (in which case the stride value is irrelevant)
-        return
-            !negativestrides &&
-            (props::inner_stride == Eigen::Dynamic || props::inner_stride == stride.inner() ||
-                (EigenRowMajor ? cols : rows) == 1) &&
-            (props::outer_stride == Eigen::Dynamic || props::outer_stride == stride.outer() ||
-                (EigenRowMajor ? rows : cols) == 1);
-    }
-    // NOLINTNEXTLINE(google-explicit-constructor)
-    operator bool() const { return conformable; }
-};
-
-template <typename Type> struct eigen_extract_stride { using type = Type; };
-template <typename PlainObjectType, int MapOptions, typename StrideType>
-struct eigen_extract_stride<Eigen::Map<PlainObjectType, MapOptions, StrideType>> { using type = StrideType; };
-template <typename PlainObjectType, int Options, typename StrideType>
-struct eigen_extract_stride<Eigen::Ref<PlainObjectType, Options, StrideType>> { using type = StrideType; };
-
-// Helper struct for extracting information from an Eigen type
-template <typename Type_> struct EigenProps {
-    using Type = Type_;
-    using Scalar = typename Type::Scalar;
-    using StrideType = typename eigen_extract_stride<Type>::type;
-    static constexpr EigenIndex
-        rows = Type::RowsAtCompileTime,
-        cols = Type::ColsAtCompileTime,
-        size = Type::SizeAtCompileTime;
-    static constexpr bool
-        row_major = Type::IsRowMajor,
-        vector = Type::IsVectorAtCompileTime, // At least one dimension has fixed size 1
-        fixed_rows = rows != Eigen::Dynamic,
-        fixed_cols = cols != Eigen::Dynamic,
-        fixed = size != Eigen::Dynamic, // Fully-fixed size
-        dynamic = !fixed_rows && !fixed_cols; // Fully-dynamic size
-
-    template <EigenIndex i, EigenIndex ifzero> using if_zero = std::integral_constant<EigenIndex, i == 0 ? ifzero : i>;
-    static constexpr EigenIndex inner_stride = if_zero<StrideType::InnerStrideAtCompileTime, 1>::value,
-                                outer_stride = if_zero<StrideType::OuterStrideAtCompileTime,
-                                                       vector ? size : row_major ? cols : rows>::value;
-    static constexpr bool dynamic_stride = inner_stride == Eigen::Dynamic && outer_stride == Eigen::Dynamic;
-    static constexpr bool requires_row_major = !dynamic_stride && !vector && (row_major ? inner_stride : outer_stride) == 1;
-    static constexpr bool requires_col_major = !dynamic_stride && !vector && (row_major ? outer_stride : inner_stride) == 1;
-
-    // Takes an input array and determines whether we can make it fit into the Eigen type.  If
-    // the array is a vector, we attempt to fit it into either an Eigen 1xN or Nx1 vector
-    // (preferring the latter if it will fit in either, i.e. for a fully dynamic matrix type).
-    static EigenConformable<row_major> conformable(const array &a) {
-        const auto dims = a.ndim();
-        if (dims < 1 || dims > 2)
-            return false;
-
-        if (dims == 2) { // Matrix type: require exact match (or dynamic)
-
-            EigenIndex
-                np_rows = a.shape(0),
-                np_cols = a.shape(1),
-                np_rstride = a.strides(0) / static_cast<ssize_t>(sizeof(Scalar)),
-                np_cstride = a.strides(1) / static_cast<ssize_t>(sizeof(Scalar));
-            if ((PYBIND11_SILENCE_MSVC_C4127(fixed_rows) && np_rows != rows) ||
-                (PYBIND11_SILENCE_MSVC_C4127(fixed_cols) && np_cols != cols))
-                return false;
-
-            return {np_rows, np_cols, np_rstride, np_cstride};
-        }
-
-        // Otherwise we're storing an n-vector.  Only one of the strides will be used, but whichever
-        // is used, we want the (single) numpy stride value.
-        const EigenIndex n = a.shape(0),
-              stride = a.strides(0) / static_cast<ssize_t>(sizeof(Scalar));
-
-        if (vector) { // Eigen type is a compile-time vector
-            if (PYBIND11_SILENCE_MSVC_C4127(fixed) && size != n)
-                return false; // Vector size mismatch
-            return {rows == 1 ? 1 : n, cols == 1 ? 1 : n, stride};
-        }
-        if (fixed) {
-            // The type has a fixed size, but is not a vector: abort
-            return false;
-        }
-        if (fixed_cols) {
-            // Since this isn't a vector, cols must be != 1.  We allow this only if it exactly
-            // equals the number of elements (rows is Dynamic, and so 1 row is allowed).
-            if (cols != n) return false;
-            return {1, n, stride};
-        } // Otherwise it's either fully dynamic, or column dynamic; both become a column vector
-            if (PYBIND11_SILENCE_MSVC_C4127(fixed_rows) && rows != n) return false;
-            return {n, 1, stride};
-    }
-
-    static constexpr bool show_writeable = is_eigen_dense_map<Type>::value && is_eigen_mutable_map<Type>::value;
-    static constexpr bool show_order = is_eigen_dense_map<Type>::value;
-    static constexpr bool show_c_contiguous = show_order && requires_row_major;
-    static constexpr bool show_f_contiguous = !show_c_contiguous && show_order && requires_col_major;
-
-    static constexpr auto descriptor =
-        _("numpy.ndarray[") + npy_format_descriptor<Scalar>::name +
-        _("[")  + _<fixed_rows>(_<(size_t) rows>(), _("m")) +
-        _(", ") + _<fixed_cols>(_<(size_t) cols>(), _("n")) +
-        _("]") +
-        // For a reference type (e.g. Ref<MatrixXd>) we have other constraints that might need to be
-        // satisfied: writeable=True (for a mutable reference), and, depending on the map's stride
-        // options, possibly f_contiguous or c_contiguous.  We include them in the descriptor output
-        // to provide some hint as to why a TypeError is occurring (otherwise it can be confusing to
-        // see that a function accepts a 'numpy.ndarray[float64[3,2]]' and an error message that you
-        // *gave* a numpy.ndarray of the right type and dimensions.
-        _<show_writeable>(", flags.writeable", "") +
-        _<show_c_contiguous>(", flags.c_contiguous", "") +
-        _<show_f_contiguous>(", flags.f_contiguous", "") +
-        _("]");
-};
-
-// Casts an Eigen type to numpy array.  If given a base, the numpy array references the src data,
-// otherwise it'll make a copy.  writeable lets you turn off the writeable flag for the array.
-template <typename props> handle eigen_array_cast(typename props::Type const &src, handle base = handle(), bool writeable = true) {
-    constexpr ssize_t elem_size = sizeof(typename props::Scalar);
-    array a;
-    if (props::vector)
-        a = array({ src.size() }, { elem_size * src.innerStride() }, src.data(), base);
-    else
-        a = array({ src.rows(), src.cols() }, { elem_size * src.rowStride(), elem_size * src.colStride() },
-                  src.data(), base);
-
-    if (!writeable)
-        array_proxy(a.ptr())->flags &= ~detail::npy_api::NPY_ARRAY_WRITEABLE_;
-
-    return a.release();
-}
-
-// Takes an lvalue ref to some Eigen type and a (python) base object, creating a numpy array that
-// reference the Eigen object's data with `base` as the python-registered base class (if omitted,
-// the base will be set to None, and lifetime management is up to the caller).  The numpy array is
-// non-writeable if the given type is const.
-template <typename props, typename Type>
-handle eigen_ref_array(Type &src, handle parent = none()) {
-    // none here is to get past array's should-we-copy detection, which currently always
-    // copies when there is no base.  Setting the base to None should be harmless.
-    return eigen_array_cast<props>(src, parent, !std::is_const<Type>::value);
-}
-
-// Takes a pointer to some dense, plain Eigen type, builds a capsule around it, then returns a numpy
-// array that references the encapsulated data with a python-side reference to the capsule to tie
-// its destruction to that of any dependent python objects.  Const-ness is determined by whether or
-// not the Type of the pointer given is const.
-template <typename props, typename Type, typename = enable_if_t<is_eigen_dense_plain<Type>::value>>
-handle eigen_encapsulate(Type *src) {
-    capsule base(src, [](void *o) { delete static_cast<Type *>(o); });
-    return eigen_ref_array<props>(*src, base);
-}
-
-// Type caster for regular, dense matrix types (e.g. MatrixXd), but not maps/refs/etc. of dense
-// types.
-template<typename Type>
-struct type_caster<Type, enable_if_t<is_eigen_dense_plain<Type>::value>> {
-    using Scalar = typename Type::Scalar;
-    using props = EigenProps<Type>;
-
-    bool load(handle src, bool convert) {
-        // If we're in no-convert mode, only load if given an array of the correct type
-        if (!convert && !isinstance<array_t<Scalar>>(src))
-            return false;
-
-        // Coerce into an array, but don't do type conversion yet; the copy below handles it.
-        auto buf = array::ensure(src);
-
-        if (!buf)
-            return false;
-
-        auto dims = buf.ndim();
-        if (dims < 1 || dims > 2)
-            return false;
-
-        auto fits = props::conformable(buf);
-        if (!fits)
-            return false;
-
-        // Allocate the new type, then build a numpy reference into it
-        value = Type(fits.rows, fits.cols);
-        auto ref = reinterpret_steal<array>(eigen_ref_array<props>(value));
-        if (dims == 1) ref = ref.squeeze();
-        else if (ref.ndim() == 1) buf = buf.squeeze();
-
-        int result = detail::npy_api::get().PyArray_CopyInto_(ref.ptr(), buf.ptr());
-
-        if (result < 0) { // Copy failed!
-            PyErr_Clear();
-            return false;
-        }
-
-        return true;
-    }
-
-private:
-
-    // Cast implementation
-    template <typename CType>
-    static handle cast_impl(CType *src, return_value_policy policy, handle parent) {
-        switch (policy) {
-            case return_value_policy::take_ownership:
-            case return_value_policy::automatic:
-                return eigen_encapsulate<props>(src);
-            case return_value_policy::move:
-                return eigen_encapsulate<props>(new CType(std::move(*src)));
-            case return_value_policy::copy:
-                return eigen_array_cast<props>(*src);
-            case return_value_policy::reference:
-            case return_value_policy::automatic_reference:
-                return eigen_ref_array<props>(*src);
-            case return_value_policy::reference_internal:
-                return eigen_ref_array<props>(*src, parent);
-            default:
-                throw cast_error("unhandled return_value_policy: should not happen!");
-        };
-    }
-
-public:
-
-    // Normal returned non-reference, non-const value:
-    static handle cast(Type &&src, return_value_policy /* policy */, handle parent) {
-        return cast_impl(&src, return_value_policy::move, parent);
-    }
-    // If you return a non-reference const, we mark the numpy array readonly:
-    static handle cast(const Type &&src, return_value_policy /* policy */, handle parent) {
-        return cast_impl(&src, return_value_policy::move, parent);
-    }
-    // lvalue reference return; default (automatic) becomes copy
-    static handle cast(Type &src, return_value_policy policy, handle parent) {
-        if (policy == return_value_policy::automatic || policy == return_value_policy::automatic_reference)
-            policy = return_value_policy::copy;
-        return cast_impl(&src, policy, parent);
-    }
-    // const lvalue reference return; default (automatic) becomes copy
-    static handle cast(const Type &src, return_value_policy policy, handle parent) {
-        if (policy == return_value_policy::automatic || policy == return_value_policy::automatic_reference)
-            policy = return_value_policy::copy;
-        return cast(&src, policy, parent);
-    }
-    // non-const pointer return
-    static handle cast(Type *src, return_value_policy policy, handle parent) {
-        return cast_impl(src, policy, parent);
-    }
-    // const pointer return
-    static handle cast(const Type *src, return_value_policy policy, handle parent) {
-        return cast_impl(src, policy, parent);
-    }
-
-    static constexpr auto name = props::descriptor;
-
-    // NOLINTNEXTLINE(google-explicit-constructor)
-    operator Type*() { return &value; }
-    // NOLINTNEXTLINE(google-explicit-constructor)
-    operator Type&() { return value; }
-    // NOLINTNEXTLINE(google-explicit-constructor)
-    operator Type&&() && { return std::move(value); }
-    template <typename T> using cast_op_type = movable_cast_op_type<T>;
-
-private:
-    Type value;
-};
-
-// Base class for casting reference/map/block/etc. objects back to python.
-template <typename MapType> struct eigen_map_caster {
-private:
-    using props = EigenProps<MapType>;
-
-public:
-
-    // Directly referencing a ref/map's data is a bit dangerous (whatever the map/ref points to has
-    // to stay around), but we'll allow it under the assumption that you know what you're doing (and
-    // have an appropriate keep_alive in place).  We return a numpy array pointing directly at the
-    // ref's data (The numpy array ends up read-only if the ref was to a const matrix type.) Note
-    // that this means you need to ensure you don't destroy the object in some other way (e.g. with
-    // an appropriate keep_alive, or with a reference to a statically allocated matrix).
-    static handle cast(const MapType &src, return_value_policy policy, handle parent) {
-        switch (policy) {
-            case return_value_policy::copy:
-                return eigen_array_cast<props>(src);
-            case return_value_policy::reference_internal:
-                return eigen_array_cast<props>(src, parent, is_eigen_mutable_map<MapType>::value);
-            case return_value_policy::reference:
-            case return_value_policy::automatic:
-            case return_value_policy::automatic_reference:
-                return eigen_array_cast<props>(src, none(), is_eigen_mutable_map<MapType>::value);
-            default:
-                // move, take_ownership don't make any sense for a ref/map:
-                pybind11_fail("Invalid return_value_policy for Eigen Map/Ref/Block type");
-        }
-    }
-
-    static constexpr auto name = props::descriptor;
-
-    // Explicitly delete these: support python -> C++ conversion on these (i.e. these can be return
-    // types but not bound arguments).  We still provide them (with an explicitly delete) so that
-    // you end up here if you try anyway.
-    bool load(handle, bool) = delete;
-    operator MapType() = delete;
-    template <typename> using cast_op_type = MapType;
-};
-
-// We can return any map-like object (but can only load Refs, specialized next):
-template <typename Type> struct type_caster<Type, enable_if_t<is_eigen_dense_map<Type>::value>>
-    : eigen_map_caster<Type> {};
-
-// Loader for Ref<...> arguments.  See the documentation for info on how to make this work without
-// copying (it requires some extra effort in many cases).
-template <typename PlainObjectType, typename StrideType>
-struct type_caster<
-    Eigen::Ref<PlainObjectType, 0, StrideType>,
-    enable_if_t<is_eigen_dense_map<Eigen::Ref<PlainObjectType, 0, StrideType>>::value>
-> : public eigen_map_caster<Eigen::Ref<PlainObjectType, 0, StrideType>> {
-private:
-    using Type = Eigen::Ref<PlainObjectType, 0, StrideType>;
-    using props = EigenProps<Type>;
-    using Scalar = typename props::Scalar;
-    using MapType = Eigen::Map<PlainObjectType, 0, StrideType>;
-    using Array = array_t<Scalar, array::forcecast |
-                ((props::row_major ? props::inner_stride : props::outer_stride) == 1 ? array::c_style :
-                 (props::row_major ? props::outer_stride : props::inner_stride) == 1 ? array::f_style : 0)>;
-    static constexpr bool need_writeable = is_eigen_mutable_map<Type>::value;
-    // Delay construction (these have no default constructor)
-    std::unique_ptr<MapType> map;
-    std::unique_ptr<Type> ref;
-    // Our array.  When possible, this is just a numpy array pointing to the source data, but
-    // sometimes we can't avoid copying (e.g. input is not a numpy array at all, has an incompatible
-    // layout, or is an array of a type that needs to be converted).  Using a numpy temporary
-    // (rather than an Eigen temporary) saves an extra copy when we need both type conversion and
-    // storage order conversion.  (Note that we refuse to use this temporary copy when loading an
-    // argument for a Ref<M> with M non-const, i.e. a read-write reference).
-    Array copy_or_ref;
-public:
-    bool load(handle src, bool convert) {
-        // First check whether what we have is already an array of the right type.  If not, we can't
-        // avoid a copy (because the copy is also going to do type conversion).
-        bool need_copy = !isinstance<Array>(src);
-
-        EigenConformable<props::row_major> fits;
-        if (!need_copy) {
-            // We don't need a converting copy, but we also need to check whether the strides are
-            // compatible with the Ref's stride requirements
-            auto aref = reinterpret_borrow<Array>(src);
-
-            if (aref && (!need_writeable || aref.writeable())) {
-                fits = props::conformable(aref);
-                if (!fits) return false; // Incompatible dimensions
-                if (!fits.template stride_compatible<props>())
-                    need_copy = true;
-                else
-                    copy_or_ref = std::move(aref);
-            }
-            else {
-                need_copy = true;
-            }
-        }
-
-        if (need_copy) {
-            // We need to copy: If we need a mutable reference, or we're not supposed to convert
-            // (either because we're in the no-convert overload pass, or because we're explicitly
-            // instructed not to copy (via `py::arg().noconvert()`) we have to fail loading.
-            if (!convert || need_writeable) return false;
-
-            Array copy = Array::ensure(src);
-            if (!copy) return false;
-            fits = props::conformable(copy);
-            if (!fits || !fits.template stride_compatible<props>())
-                return false;
-            copy_or_ref = std::move(copy);
-            loader_life_support::add_patient(copy_or_ref);
-        }
-
-        ref.reset();
-        map.reset(new MapType(data(copy_or_ref), fits.rows, fits.cols, make_stride(fits.stride.outer(), fits.stride.inner())));
-        ref.reset(new Type(*map));
-
-        return true;
-    }
-
-    // NOLINTNEXTLINE(google-explicit-constructor)
-    operator Type*() { return ref.get(); }
-    // NOLINTNEXTLINE(google-explicit-constructor)
-    operator Type&() { return *ref; }
-    template <typename _T> using cast_op_type = pybind11::detail::cast_op_type<_T>;
-
-private:
-    template <typename T = Type, enable_if_t<is_eigen_mutable_map<T>::value, int> = 0>
-    Scalar *data(Array &a) { return a.mutable_data(); }
-
-    template <typename T = Type, enable_if_t<!is_eigen_mutable_map<T>::value, int> = 0>
-    const Scalar *data(Array &a) { return a.data(); }
-
-    // Attempt to figure out a constructor of `Stride` that will work.
-    // If both strides are fixed, use a default constructor:
-    template <typename S> using stride_ctor_default = bool_constant<
-        S::InnerStrideAtCompileTime != Eigen::Dynamic && S::OuterStrideAtCompileTime != Eigen::Dynamic &&
-        std::is_default_constructible<S>::value>;
-    // Otherwise, if there is a two-index constructor, assume it is (outer,inner) like
-    // Eigen::Stride, and use it:
-    template <typename S> using stride_ctor_dual = bool_constant<
-        !stride_ctor_default<S>::value && std::is_constructible<S, EigenIndex, EigenIndex>::value>;
-    // Otherwise, if there is a one-index constructor, and just one of the strides is dynamic, use
-    // it (passing whichever stride is dynamic).
-    template <typename S> using stride_ctor_outer = bool_constant<
-        !any_of<stride_ctor_default<S>, stride_ctor_dual<S>>::value &&
-        S::OuterStrideAtCompileTime == Eigen::Dynamic && S::InnerStrideAtCompileTime != Eigen::Dynamic &&
-        std::is_constructible<S, EigenIndex>::value>;
-    template <typename S> using stride_ctor_inner = bool_constant<
-        !any_of<stride_ctor_default<S>, stride_ctor_dual<S>>::value &&
-        S::InnerStrideAtCompileTime == Eigen::Dynamic && S::OuterStrideAtCompileTime != Eigen::Dynamic &&
-        std::is_constructible<S, EigenIndex>::value>;
-
-    template <typename S = StrideType, enable_if_t<stride_ctor_default<S>::value, int> = 0>
-    static S make_stride(EigenIndex, EigenIndex) { return S(); }
-    template <typename S = StrideType, enable_if_t<stride_ctor_dual<S>::value, int> = 0>
-    static S make_stride(EigenIndex outer, EigenIndex inner) { return S(outer, inner); }
-    template <typename S = StrideType, enable_if_t<stride_ctor_outer<S>::value, int> = 0>
-    static S make_stride(EigenIndex outer, EigenIndex) { return S(outer); }
-    template <typename S = StrideType, enable_if_t<stride_ctor_inner<S>::value, int> = 0>
-    static S make_stride(EigenIndex, EigenIndex inner) { return S(inner); }
-
-};
-
-// type_caster for special matrix types (e.g. DiagonalMatrix), which are EigenBase, but not
-// EigenDense (i.e. they don't have a data(), at least not with the usual matrix layout).
-// load() is not supported, but we can cast them into the python domain by first copying to a
-// regular Eigen::Matrix, then casting that.
-template <typename Type>
-struct type_caster<Type, enable_if_t<is_eigen_other<Type>::value>> {
-protected:
-    using Matrix = Eigen::Matrix<typename Type::Scalar, Type::RowsAtCompileTime, Type::ColsAtCompileTime>;
-    using props = EigenProps<Matrix>;
-public:
-    static handle cast(const Type &src, return_value_policy /* policy */, handle /* parent */) {
-        handle h = eigen_encapsulate<props>(new Matrix(src));
-        return h;
-    }
-    static handle cast(const Type *src, return_value_policy policy, handle parent) { return cast(*src, policy, parent); }
-
-    static constexpr auto name = props::descriptor;
-
-    // Explicitly delete these: support python -> C++ conversion on these (i.e. these can be return
-    // types but not bound arguments).  We still provide them (with an explicitly delete) so that
-    // you end up here if you try anyway.
-    bool load(handle, bool) = delete;
-    operator Type() = delete;
-    template <typename> using cast_op_type = Type;
-};
-
-template<typename Type>
-struct type_caster<Type, enable_if_t<is_eigen_sparse<Type>::value>> {
-    using Scalar = typename Type::Scalar;
-    using StorageIndex = remove_reference_t<decltype(*std::declval<Type>().outerIndexPtr())>;
-    using Index = typename Type::Index;
-    static constexpr bool rowMajor = Type::IsRowMajor;
-
-    bool load(handle src, bool) {
-        if (!src)
-            return false;
-
-        auto obj = reinterpret_borrow<object>(src);
-        object sparse_module = module_::import("scipy.sparse");
-        object matrix_type = sparse_module.attr(
-            rowMajor ? "csr_matrix" : "csc_matrix");
-
-        if (!type::handle_of(obj).is(matrix_type)) {
-            try {
-                obj = matrix_type(obj);
-            } catch (const error_already_set &) {
-                return false;
-            }
-        }
-
-        auto values = array_t<Scalar>((object) obj.attr("data"));
-        auto innerIndices = array_t<StorageIndex>((object) obj.attr("indices"));
-        auto outerIndices = array_t<StorageIndex>((object) obj.attr("indptr"));
-        auto shape = pybind11::tuple((pybind11::object) obj.attr("shape"));
-        auto nnz = obj.attr("nnz").cast<Index>();
-
-        if (!values || !innerIndices || !outerIndices)
-            return false;
-
-        value = Eigen::MappedSparseMatrix<Scalar,
-                                          Type::Flags & (Eigen::RowMajor | Eigen::ColMajor),
-                                          StorageIndex>(
-            shape[0].cast<Index>(), shape[1].cast<Index>(), nnz,
-            outerIndices.mutable_data(), innerIndices.mutable_data(), values.mutable_data());
-
-        return true;
-    }
-
-    static handle cast(const Type &src, return_value_policy /* policy */, handle /* parent */) {
-        const_cast<Type&>(src).makeCompressed();
-
-        object matrix_type = module_::import("scipy.sparse").attr(
-            rowMajor ? "csr_matrix" : "csc_matrix");
-
-        array data(src.nonZeros(), src.valuePtr());
-        array outerIndices((rowMajor ? src.rows() : src.cols()) + 1, src.outerIndexPtr());
-        array innerIndices(src.nonZeros(), src.innerIndexPtr());
-
-        return matrix_type(
-            std::make_tuple(data, innerIndices, outerIndices),
-            std::make_pair(src.rows(), src.cols())
-        ).release();
-    }
-
-    PYBIND11_TYPE_CASTER(Type, _<(Type::IsRowMajor) != 0>("scipy.sparse.csr_matrix[", "scipy.sparse.csc_matrix[")
-            + npy_format_descriptor<Scalar>::name + _("]"));
-};
-
-PYBIND11_NAMESPACE_END(detail)
-PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE)
+#include "eigen/matrix.h"
diff --git a/ext/pybind11/include/pybind11/eigen/matrix.h b/ext/pybind11/include/pybind11/eigen/matrix.h
new file mode 100644
index 0000000000..34fe329a82
--- /dev/null
+++ b/ext/pybind11/include/pybind11/eigen/matrix.h
@@ -0,0 +1,699 @@
+/*
+    pybind11/eigen/matrix.h: Transparent conversion for dense and sparse Eigen matrices
+
+    Copyright (c) 2016 Wenzel Jakob <wenzel.jakob@epfl.ch>
+
+    All rights reserved. Use of this source code is governed by a
+    BSD-style license that can be found in the LICENSE file.
+*/
+
+#pragma once
+
+#include "../numpy.h"
+
+/* HINT: To suppress warnings originating from the Eigen headers, use -isystem.
+   See also:
+       https://stackoverflow.com/questions/2579576/i-dir-vs-isystem-dir
+       https://stackoverflow.com/questions/1741816/isystem-for-ms-visual-studio-c-compiler
+*/
+PYBIND11_WARNING_PUSH
+PYBIND11_WARNING_DISABLE_MSVC(5054) // https://github.com/pybind/pybind11/pull/3741
+//       C5054: operator '&': deprecated between enumerations of different types
+PYBIND11_WARNING_DISABLE_GCC("-Wmaybe-uninitialized")
+
+#include <Eigen/Core>
+#include <Eigen/SparseCore>
+
+PYBIND11_WARNING_POP
+
+// Eigen prior to 3.2.7 doesn't have proper move constructors--but worse, some classes get implicit
+// move constructors that break things.  We could detect this an explicitly copy, but an extra copy
+// of matrices seems highly undesirable.
+static_assert(EIGEN_VERSION_AT_LEAST(3, 2, 7),
+              "Eigen matrix support in pybind11 requires Eigen >= 3.2.7");
+
+PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
+
+PYBIND11_WARNING_DISABLE_MSVC(4127)
+
+// Provide a convenience alias for easier pass-by-ref usage with fully dynamic strides:
+using EigenDStride = Eigen::Stride<Eigen::Dynamic, Eigen::Dynamic>;
+template <typename MatrixType>
+using EigenDRef = Eigen::Ref<MatrixType, 0, EigenDStride>;
+template <typename MatrixType>
+using EigenDMap = Eigen::Map<MatrixType, 0, EigenDStride>;
+
+PYBIND11_NAMESPACE_BEGIN(detail)
+
+#if EIGEN_VERSION_AT_LEAST(3, 3, 0)
+using EigenIndex = Eigen::Index;
+template <typename Scalar, int Flags, typename StorageIndex>
+using EigenMapSparseMatrix = Eigen::Map<Eigen::SparseMatrix<Scalar, Flags, StorageIndex>>;
+#else
+using EigenIndex = EIGEN_DEFAULT_DENSE_INDEX_TYPE;
+template <typename Scalar, int Flags, typename StorageIndex>
+using EigenMapSparseMatrix = Eigen::MappedSparseMatrix<Scalar, Flags, StorageIndex>;
+#endif
+
+// Matches Eigen::Map, Eigen::Ref, blocks, etc:
+template <typename T>
+using is_eigen_dense_map = all_of<is_template_base_of<Eigen::DenseBase, T>,
+                                  std::is_base_of<Eigen::MapBase<T, Eigen::ReadOnlyAccessors>, T>>;
+template <typename T>
+using is_eigen_mutable_map = std::is_base_of<Eigen::MapBase<T, Eigen::WriteAccessors>, T>;
+template <typename T>
+using is_eigen_dense_plain
+    = all_of<negation<is_eigen_dense_map<T>>, is_template_base_of<Eigen::PlainObjectBase, T>>;
+template <typename T>
+using is_eigen_sparse = is_template_base_of<Eigen::SparseMatrixBase, T>;
+// Test for objects inheriting from EigenBase<Derived> that aren't captured by the above.  This
+// basically covers anything that can be assigned to a dense matrix but that don't have a typical
+// matrix data layout that can be copied from their .data().  For example, DiagonalMatrix and
+// SelfAdjointView fall into this category.
+template <typename T>
+using is_eigen_other
+    = all_of<is_template_base_of<Eigen::EigenBase, T>,
+             negation<any_of<is_eigen_dense_map<T>, is_eigen_dense_plain<T>, is_eigen_sparse<T>>>>;
+
+// Captures numpy/eigen conformability status (returned by EigenProps::conformable()):
+template <bool EigenRowMajor>
+struct EigenConformable {
+    bool conformable = false;
+    EigenIndex rows = 0, cols = 0;
+    EigenDStride stride{0, 0};    // Only valid if negativestrides is false!
+    bool negativestrides = false; // If true, do not use stride!
+
+    // NOLINTNEXTLINE(google-explicit-constructor)
+    EigenConformable(bool fits = false) : conformable{fits} {}
+    // Matrix type:
+    EigenConformable(EigenIndex r, EigenIndex c, EigenIndex rstride, EigenIndex cstride)
+        : conformable{true}, rows{r}, cols{c},
+          // TODO: when Eigen bug #747 is fixed, remove the tests for non-negativity.
+          // http://eigen.tuxfamily.org/bz/show_bug.cgi?id=747
+          stride{EigenRowMajor ? (rstride > 0 ? rstride : 0)
+                               : (cstride > 0 ? cstride : 0) /* outer stride */,
+                 EigenRowMajor ? (cstride > 0 ? cstride : 0)
+                               : (rstride > 0 ? rstride : 0) /* inner stride */},
+          negativestrides{rstride < 0 || cstride < 0} {}
+    // Vector type:
+    EigenConformable(EigenIndex r, EigenIndex c, EigenIndex stride)
+        : EigenConformable(r, c, r == 1 ? c * stride : stride, c == 1 ? r : r * stride) {}
+
+    template <typename props>
+    bool stride_compatible() const {
+        // To have compatible strides, we need (on both dimensions) one of fully dynamic strides,
+        // matching strides, or a dimension size of 1 (in which case the stride value is
+        // irrelevant). Alternatively, if any dimension size is 0, the strides are not relevant
+        // (and numpy ≥ 1.23 sets the strides to 0 in that case, so we need to check explicitly).
+        if (negativestrides) {
+            return false;
+        }
+        if (rows == 0 || cols == 0) {
+            return true;
+        }
+        return (props::inner_stride == Eigen::Dynamic || props::inner_stride == stride.inner()
+                || (EigenRowMajor ? cols : rows) == 1)
+               && (props::outer_stride == Eigen::Dynamic || props::outer_stride == stride.outer()
+                   || (EigenRowMajor ? rows : cols) == 1);
+    }
+    // NOLINTNEXTLINE(google-explicit-constructor)
+    operator bool() const { return conformable; }
+};
+
+template <typename Type>
+struct eigen_extract_stride {
+    using type = Type;
+};
+template <typename PlainObjectType, int MapOptions, typename StrideType>
+struct eigen_extract_stride<Eigen::Map<PlainObjectType, MapOptions, StrideType>> {
+    using type = StrideType;
+};
+template <typename PlainObjectType, int Options, typename StrideType>
+struct eigen_extract_stride<Eigen::Ref<PlainObjectType, Options, StrideType>> {
+    using type = StrideType;
+};
+
+// Helper struct for extracting information from an Eigen type
+template <typename Type_>
+struct EigenProps {
+    using Type = Type_;
+    using Scalar = typename Type::Scalar;
+    using StrideType = typename eigen_extract_stride<Type>::type;
+    static constexpr EigenIndex rows = Type::RowsAtCompileTime, cols = Type::ColsAtCompileTime,
+                                size = Type::SizeAtCompileTime;
+    static constexpr bool row_major = Type::IsRowMajor,
+                          vector
+                          = Type::IsVectorAtCompileTime, // At least one dimension has fixed size 1
+        fixed_rows = rows != Eigen::Dynamic, fixed_cols = cols != Eigen::Dynamic,
+                          fixed = size != Eigen::Dynamic, // Fully-fixed size
+        dynamic = !fixed_rows && !fixed_cols;             // Fully-dynamic size
+
+    template <EigenIndex i, EigenIndex ifzero>
+    using if_zero = std::integral_constant<EigenIndex, i == 0 ? ifzero : i>;
+    static constexpr EigenIndex inner_stride
+        = if_zero<StrideType::InnerStrideAtCompileTime, 1>::value,
+        outer_stride = if_zero < StrideType::OuterStrideAtCompileTime,
+        vector      ? size
+        : row_major ? cols
+                    : rows > ::value;
+    static constexpr bool dynamic_stride
+        = inner_stride == Eigen::Dynamic && outer_stride == Eigen::Dynamic;
+    static constexpr bool requires_row_major
+        = !dynamic_stride && !vector && (row_major ? inner_stride : outer_stride) == 1;
+    static constexpr bool requires_col_major
+        = !dynamic_stride && !vector && (row_major ? outer_stride : inner_stride) == 1;
+
+    // Takes an input array and determines whether we can make it fit into the Eigen type.  If
+    // the array is a vector, we attempt to fit it into either an Eigen 1xN or Nx1 vector
+    // (preferring the latter if it will fit in either, i.e. for a fully dynamic matrix type).
+    static EigenConformable<row_major> conformable(const array &a) {
+        const auto dims = a.ndim();
+        if (dims < 1 || dims > 2) {
+            return false;
+        }
+
+        if (dims == 2) { // Matrix type: require exact match (or dynamic)
+
+            EigenIndex np_rows = a.shape(0), np_cols = a.shape(1),
+                       np_rstride = a.strides(0) / static_cast<ssize_t>(sizeof(Scalar)),
+                       np_cstride = a.strides(1) / static_cast<ssize_t>(sizeof(Scalar));
+            if ((fixed_rows && np_rows != rows) || (fixed_cols && np_cols != cols)) {
+                return false;
+            }
+
+            return {np_rows, np_cols, np_rstride, np_cstride};
+        }
+
+        // Otherwise we're storing an n-vector.  Only one of the strides will be used, but
+        // whichever is used, we want the (single) numpy stride value.
+        const EigenIndex n = a.shape(0),
+                         stride = a.strides(0) / static_cast<ssize_t>(sizeof(Scalar));
+
+        if (vector) { // Eigen type is a compile-time vector
+            if (fixed && size != n) {
+                return false; // Vector size mismatch
+            }
+            return {rows == 1 ? 1 : n, cols == 1 ? 1 : n, stride};
+        }
+        if (fixed) {
+            // The type has a fixed size, but is not a vector: abort
+            return false;
+        }
+        if (fixed_cols) {
+            // Since this isn't a vector, cols must be != 1.  We allow this only if it exactly
+            // equals the number of elements (rows is Dynamic, and so 1 row is allowed).
+            if (cols != n) {
+                return false;
+            }
+            return {1, n, stride};
+        } // Otherwise it's either fully dynamic, or column dynamic; both become a column vector
+        if (fixed_rows && rows != n) {
+            return false;
+        }
+        return {n, 1, stride};
+    }
+
+    static constexpr bool show_writeable
+        = is_eigen_dense_map<Type>::value && is_eigen_mutable_map<Type>::value;
+    static constexpr bool show_order = is_eigen_dense_map<Type>::value;
+    static constexpr bool show_c_contiguous = show_order && requires_row_major;
+    static constexpr bool show_f_contiguous
+        = !show_c_contiguous && show_order && requires_col_major;
+
+    static constexpr auto descriptor
+        = const_name("numpy.ndarray[") + npy_format_descriptor<Scalar>::name + const_name("[")
+          + const_name<fixed_rows>(const_name<(size_t) rows>(), const_name("m")) + const_name(", ")
+          + const_name<fixed_cols>(const_name<(size_t) cols>(), const_name("n")) + const_name("]")
+          +
+          // For a reference type (e.g. Ref<MatrixXd>) we have other constraints that might need to
+          // be satisfied: writeable=True (for a mutable reference), and, depending on the map's
+          // stride options, possibly f_contiguous or c_contiguous.  We include them in the
+          // descriptor output to provide some hint as to why a TypeError is occurring (otherwise
+          // it can be confusing to see that a function accepts a 'numpy.ndarray[float64[3,2]]' and
+          // an error message that you *gave* a numpy.ndarray of the right type and dimensions.
+          const_name<show_writeable>(", flags.writeable", "")
+          + const_name<show_c_contiguous>(", flags.c_contiguous", "")
+          + const_name<show_f_contiguous>(", flags.f_contiguous", "") + const_name("]");
+};
+
+// Casts an Eigen type to numpy array.  If given a base, the numpy array references the src data,
+// otherwise it'll make a copy.  writeable lets you turn off the writeable flag for the array.
+template <typename props>
+handle
+eigen_array_cast(typename props::Type const &src, handle base = handle(), bool writeable = true) {
+    constexpr ssize_t elem_size = sizeof(typename props::Scalar);
+    array a;
+    if (props::vector) {
+        a = array({src.size()}, {elem_size * src.innerStride()}, src.data(), base);
+    } else {
+        a = array({src.rows(), src.cols()},
+                  {elem_size * src.rowStride(), elem_size * src.colStride()},
+                  src.data(),
+                  base);
+    }
+
+    if (!writeable) {
+        array_proxy(a.ptr())->flags &= ~detail::npy_api::NPY_ARRAY_WRITEABLE_;
+    }
+
+    return a.release();
+}
+
+// Takes an lvalue ref to some Eigen type and a (python) base object, creating a numpy array that
+// reference the Eigen object's data with `base` as the python-registered base class (if omitted,
+// the base will be set to None, and lifetime management is up to the caller).  The numpy array is
+// non-writeable if the given type is const.
+template <typename props, typename Type>
+handle eigen_ref_array(Type &src, handle parent = none()) {
+    // none here is to get past array's should-we-copy detection, which currently always
+    // copies when there is no base.  Setting the base to None should be harmless.
+    return eigen_array_cast<props>(src, parent, !std::is_const<Type>::value);
+}
+
+// Takes a pointer to some dense, plain Eigen type, builds a capsule around it, then returns a
+// numpy array that references the encapsulated data with a python-side reference to the capsule to
+// tie its destruction to that of any dependent python objects.  Const-ness is determined by
+// whether or not the Type of the pointer given is const.
+template <typename props, typename Type, typename = enable_if_t<is_eigen_dense_plain<Type>::value>>
+handle eigen_encapsulate(Type *src) {
+    capsule base(src, [](void *o) { delete static_cast<Type *>(o); });
+    return eigen_ref_array<props>(*src, base);
+}
+
+// Type caster for regular, dense matrix types (e.g. MatrixXd), but not maps/refs/etc. of dense
+// types.
+template <typename Type>
+struct type_caster<Type, enable_if_t<is_eigen_dense_plain<Type>::value>> {
+    using Scalar = typename Type::Scalar;
+    using props = EigenProps<Type>;
+
+    bool load(handle src, bool convert) {
+        // If we're in no-convert mode, only load if given an array of the correct type
+        if (!convert && !isinstance<array_t<Scalar>>(src)) {
+            return false;
+        }
+
+        // Coerce into an array, but don't do type conversion yet; the copy below handles it.
+        auto buf = array::ensure(src);
+
+        if (!buf) {
+            return false;
+        }
+
+        auto dims = buf.ndim();
+        if (dims < 1 || dims > 2) {
+            return false;
+        }
+
+        auto fits = props::conformable(buf);
+        if (!fits) {
+            return false;
+        }
+
+        // Allocate the new type, then build a numpy reference into it
+        value = Type(fits.rows, fits.cols);
+        auto ref = reinterpret_steal<array>(eigen_ref_array<props>(value));
+        if (dims == 1) {
+            ref = ref.squeeze();
+        } else if (ref.ndim() == 1) {
+            buf = buf.squeeze();
+        }
+
+        int result = detail::npy_api::get().PyArray_CopyInto_(ref.ptr(), buf.ptr());
+
+        if (result < 0) { // Copy failed!
+            PyErr_Clear();
+            return false;
+        }
+
+        return true;
+    }
+
+private:
+    // Cast implementation
+    template <typename CType>
+    static handle cast_impl(CType *src, return_value_policy policy, handle parent) {
+        switch (policy) {
+            case return_value_policy::take_ownership:
+            case return_value_policy::automatic:
+                return eigen_encapsulate<props>(src);
+            case return_value_policy::move:
+                return eigen_encapsulate<props>(new CType(std::move(*src)));
+            case return_value_policy::copy:
+                return eigen_array_cast<props>(*src);
+            case return_value_policy::reference:
+            case return_value_policy::automatic_reference:
+                return eigen_ref_array<props>(*src);
+            case return_value_policy::reference_internal:
+                return eigen_ref_array<props>(*src, parent);
+            default:
+                throw cast_error("unhandled return_value_policy: should not happen!");
+        };
+    }
+
+public:
+    // Normal returned non-reference, non-const value:
+    static handle cast(Type &&src, return_value_policy /* policy */, handle parent) {
+        return cast_impl(&src, return_value_policy::move, parent);
+    }
+    // If you return a non-reference const, we mark the numpy array readonly:
+    static handle cast(const Type &&src, return_value_policy /* policy */, handle parent) {
+        return cast_impl(&src, return_value_policy::move, parent);
+    }
+    // lvalue reference return; default (automatic) becomes copy
+    static handle cast(Type &src, return_value_policy policy, handle parent) {
+        if (policy == return_value_policy::automatic
+            || policy == return_value_policy::automatic_reference) {
+            policy = return_value_policy::copy;
+        }
+        return cast_impl(&src, policy, parent);
+    }
+    // const lvalue reference return; default (automatic) becomes copy
+    static handle cast(const Type &src, return_value_policy policy, handle parent) {
+        if (policy == return_value_policy::automatic
+            || policy == return_value_policy::automatic_reference) {
+            policy = return_value_policy::copy;
+        }
+        return cast(&src, policy, parent);
+    }
+    // non-const pointer return
+    static handle cast(Type *src, return_value_policy policy, handle parent) {
+        return cast_impl(src, policy, parent);
+    }
+    // const pointer return
+    static handle cast(const Type *src, return_value_policy policy, handle parent) {
+        return cast_impl(src, policy, parent);
+    }
+
+    static constexpr auto name = props::descriptor;
+
+    // NOLINTNEXTLINE(google-explicit-constructor)
+    operator Type *() { return &value; }
+    // NOLINTNEXTLINE(google-explicit-constructor)
+    operator Type &() { return value; }
+    // NOLINTNEXTLINE(google-explicit-constructor)
+    operator Type &&() && { return std::move(value); }
+    template <typename T>
+    using cast_op_type = movable_cast_op_type<T>;
+
+private:
+    Type value;
+};
+
+// Base class for casting reference/map/block/etc. objects back to python.
+template <typename MapType>
+struct eigen_map_caster {
+private:
+    using props = EigenProps<MapType>;
+
+public:
+    // Directly referencing a ref/map's data is a bit dangerous (whatever the map/ref points to has
+    // to stay around), but we'll allow it under the assumption that you know what you're doing
+    // (and have an appropriate keep_alive in place).  We return a numpy array pointing directly at
+    // the ref's data (The numpy array ends up read-only if the ref was to a const matrix type.)
+    // Note that this means you need to ensure you don't destroy the object in some other way (e.g.
+    // with an appropriate keep_alive, or with a reference to a statically allocated matrix).
+    static handle cast(const MapType &src, return_value_policy policy, handle parent) {
+        switch (policy) {
+            case return_value_policy::copy:
+                return eigen_array_cast<props>(src);
+            case return_value_policy::reference_internal:
+                return eigen_array_cast<props>(src, parent, is_eigen_mutable_map<MapType>::value);
+            case return_value_policy::reference:
+            case return_value_policy::automatic:
+            case return_value_policy::automatic_reference:
+                return eigen_array_cast<props>(src, none(), is_eigen_mutable_map<MapType>::value);
+            default:
+                // move, take_ownership don't make any sense for a ref/map:
+                pybind11_fail("Invalid return_value_policy for Eigen Map/Ref/Block type");
+        }
+    }
+
+    static constexpr auto name = props::descriptor;
+
+    // Explicitly delete these: support python -> C++ conversion on these (i.e. these can be return
+    // types but not bound arguments).  We still provide them (with an explicitly delete) so that
+    // you end up here if you try anyway.
+    bool load(handle, bool) = delete;
+    operator MapType() = delete;
+    template <typename>
+    using cast_op_type = MapType;
+};
+
+// We can return any map-like object (but can only load Refs, specialized next):
+template <typename Type>
+struct type_caster<Type, enable_if_t<is_eigen_dense_map<Type>::value>> : eigen_map_caster<Type> {};
+
+// Loader for Ref<...> arguments.  See the documentation for info on how to make this work without
+// copying (it requires some extra effort in many cases).
+template <typename PlainObjectType, typename StrideType>
+struct type_caster<
+    Eigen::Ref<PlainObjectType, 0, StrideType>,
+    enable_if_t<is_eigen_dense_map<Eigen::Ref<PlainObjectType, 0, StrideType>>::value>>
+    : public eigen_map_caster<Eigen::Ref<PlainObjectType, 0, StrideType>> {
+private:
+    using Type = Eigen::Ref<PlainObjectType, 0, StrideType>;
+    using props = EigenProps<Type>;
+    using Scalar = typename props::Scalar;
+    using MapType = Eigen::Map<PlainObjectType, 0, StrideType>;
+    using Array
+        = array_t<Scalar,
+                  array::forcecast
+                      | ((props::row_major ? props::inner_stride : props::outer_stride) == 1
+                             ? array::c_style
+                         : (props::row_major ? props::outer_stride : props::inner_stride) == 1
+                             ? array::f_style
+                             : 0)>;
+    static constexpr bool need_writeable = is_eigen_mutable_map<Type>::value;
+    // Delay construction (these have no default constructor)
+    std::unique_ptr<MapType> map;
+    std::unique_ptr<Type> ref;
+    // Our array.  When possible, this is just a numpy array pointing to the source data, but
+    // sometimes we can't avoid copying (e.g. input is not a numpy array at all, has an
+    // incompatible layout, or is an array of a type that needs to be converted).  Using a numpy
+    // temporary (rather than an Eigen temporary) saves an extra copy when we need both type
+    // conversion and storage order conversion.  (Note that we refuse to use this temporary copy
+    // when loading an argument for a Ref<M> with M non-const, i.e. a read-write reference).
+    Array copy_or_ref;
+
+public:
+    bool load(handle src, bool convert) {
+        // First check whether what we have is already an array of the right type.  If not, we
+        // can't avoid a copy (because the copy is also going to do type conversion).
+        bool need_copy = !isinstance<Array>(src);
+
+        EigenConformable<props::row_major> fits;
+        if (!need_copy) {
+            // We don't need a converting copy, but we also need to check whether the strides are
+            // compatible with the Ref's stride requirements
+            auto aref = reinterpret_borrow<Array>(src);
+
+            if (aref && (!need_writeable || aref.writeable())) {
+                fits = props::conformable(aref);
+                if (!fits) {
+                    return false; // Incompatible dimensions
+                }
+                if (!fits.template stride_compatible<props>()) {
+                    need_copy = true;
+                } else {
+                    copy_or_ref = std::move(aref);
+                }
+            } else {
+                need_copy = true;
+            }
+        }
+
+        if (need_copy) {
+            // We need to copy: If we need a mutable reference, or we're not supposed to convert
+            // (either because we're in the no-convert overload pass, or because we're explicitly
+            // instructed not to copy (via `py::arg().noconvert()`) we have to fail loading.
+            if (!convert || need_writeable) {
+                return false;
+            }
+
+            Array copy = Array::ensure(src);
+            if (!copy) {
+                return false;
+            }
+            fits = props::conformable(copy);
+            if (!fits || !fits.template stride_compatible<props>()) {
+                return false;
+            }
+            copy_or_ref = std::move(copy);
+            loader_life_support::add_patient(copy_or_ref);
+        }
+
+        ref.reset();
+        map.reset(new MapType(data(copy_or_ref),
+                              fits.rows,
+                              fits.cols,
+                              make_stride(fits.stride.outer(), fits.stride.inner())));
+        ref.reset(new Type(*map));
+
+        return true;
+    }
+
+    // NOLINTNEXTLINE(google-explicit-constructor)
+    operator Type *() { return ref.get(); }
+    // NOLINTNEXTLINE(google-explicit-constructor)
+    operator Type &() { return *ref; }
+    template <typename _T>
+    using cast_op_type = pybind11::detail::cast_op_type<_T>;
+
+private:
+    template <typename T = Type, enable_if_t<is_eigen_mutable_map<T>::value, int> = 0>
+    Scalar *data(Array &a) {
+        return a.mutable_data();
+    }
+
+    template <typename T = Type, enable_if_t<!is_eigen_mutable_map<T>::value, int> = 0>
+    const Scalar *data(Array &a) {
+        return a.data();
+    }
+
+    // Attempt to figure out a constructor of `Stride` that will work.
+    // If both strides are fixed, use a default constructor:
+    template <typename S>
+    using stride_ctor_default = bool_constant<S::InnerStrideAtCompileTime != Eigen::Dynamic
+                                              && S::OuterStrideAtCompileTime != Eigen::Dynamic
+                                              && std::is_default_constructible<S>::value>;
+    // Otherwise, if there is a two-index constructor, assume it is (outer,inner) like
+    // Eigen::Stride, and use it:
+    template <typename S>
+    using stride_ctor_dual
+        = bool_constant<!stride_ctor_default<S>::value
+                        && std::is_constructible<S, EigenIndex, EigenIndex>::value>;
+    // Otherwise, if there is a one-index constructor, and just one of the strides is dynamic, use
+    // it (passing whichever stride is dynamic).
+    template <typename S>
+    using stride_ctor_outer
+        = bool_constant<!any_of<stride_ctor_default<S>, stride_ctor_dual<S>>::value
+                        && S::OuterStrideAtCompileTime == Eigen::Dynamic
+                        && S::InnerStrideAtCompileTime != Eigen::Dynamic
+                        && std::is_constructible<S, EigenIndex>::value>;
+    template <typename S>
+    using stride_ctor_inner
+        = bool_constant<!any_of<stride_ctor_default<S>, stride_ctor_dual<S>>::value
+                        && S::InnerStrideAtCompileTime == Eigen::Dynamic
+                        && S::OuterStrideAtCompileTime != Eigen::Dynamic
+                        && std::is_constructible<S, EigenIndex>::value>;
+
+    template <typename S = StrideType, enable_if_t<stride_ctor_default<S>::value, int> = 0>
+    static S make_stride(EigenIndex, EigenIndex) {
+        return S();
+    }
+    template <typename S = StrideType, enable_if_t<stride_ctor_dual<S>::value, int> = 0>
+    static S make_stride(EigenIndex outer, EigenIndex inner) {
+        return S(outer, inner);
+    }
+    template <typename S = StrideType, enable_if_t<stride_ctor_outer<S>::value, int> = 0>
+    static S make_stride(EigenIndex outer, EigenIndex) {
+        return S(outer);
+    }
+    template <typename S = StrideType, enable_if_t<stride_ctor_inner<S>::value, int> = 0>
+    static S make_stride(EigenIndex, EigenIndex inner) {
+        return S(inner);
+    }
+};
+
+// type_caster for special matrix types (e.g. DiagonalMatrix), which are EigenBase, but not
+// EigenDense (i.e. they don't have a data(), at least not with the usual matrix layout).
+// load() is not supported, but we can cast them into the python domain by first copying to a
+// regular Eigen::Matrix, then casting that.
+template <typename Type>
+struct type_caster<Type, enable_if_t<is_eigen_other<Type>::value>> {
+protected:
+    using Matrix
+        = Eigen::Matrix<typename Type::Scalar, Type::RowsAtCompileTime, Type::ColsAtCompileTime>;
+    using props = EigenProps<Matrix>;
+
+public:
+    static handle cast(const Type &src, return_value_policy /* policy */, handle /* parent */) {
+        handle h = eigen_encapsulate<props>(new Matrix(src));
+        return h;
+    }
+    static handle cast(const Type *src, return_value_policy policy, handle parent) {
+        return cast(*src, policy, parent);
+    }
+
+    static constexpr auto name = props::descriptor;
+
+    // Explicitly delete these: support python -> C++ conversion on these (i.e. these can be return
+    // types but not bound arguments).  We still provide them (with an explicitly delete) so that
+    // you end up here if you try anyway.
+    bool load(handle, bool) = delete;
+    operator Type() = delete;
+    template <typename>
+    using cast_op_type = Type;
+};
+
+template <typename Type>
+struct type_caster<Type, enable_if_t<is_eigen_sparse<Type>::value>> {
+    using Scalar = typename Type::Scalar;
+    using StorageIndex = remove_reference_t<decltype(*std::declval<Type>().outerIndexPtr())>;
+    using Index = typename Type::Index;
+    static constexpr bool rowMajor = Type::IsRowMajor;
+
+    bool load(handle src, bool) {
+        if (!src) {
+            return false;
+        }
+
+        auto obj = reinterpret_borrow<object>(src);
+        object sparse_module = module_::import("scipy.sparse");
+        object matrix_type = sparse_module.attr(rowMajor ? "csr_matrix" : "csc_matrix");
+
+        if (!type::handle_of(obj).is(matrix_type)) {
+            try {
+                obj = matrix_type(obj);
+            } catch (const error_already_set &) {
+                return false;
+            }
+        }
+
+        auto values = array_t<Scalar>((object) obj.attr("data"));
+        auto innerIndices = array_t<StorageIndex>((object) obj.attr("indices"));
+        auto outerIndices = array_t<StorageIndex>((object) obj.attr("indptr"));
+        auto shape = pybind11::tuple((pybind11::object) obj.attr("shape"));
+        auto nnz = obj.attr("nnz").cast<Index>();
+
+        if (!values || !innerIndices || !outerIndices) {
+            return false;
+        }
+
+        value = EigenMapSparseMatrix<Scalar,
+                                     Type::Flags &(Eigen::RowMajor | Eigen::ColMajor),
+                                     StorageIndex>(shape[0].cast<Index>(),
+                                                   shape[1].cast<Index>(),
+                                                   std::move(nnz),
+                                                   outerIndices.mutable_data(),
+                                                   innerIndices.mutable_data(),
+                                                   values.mutable_data());
+
+        return true;
+    }
+
+    static handle cast(const Type &src, return_value_policy /* policy */, handle /* parent */) {
+        const_cast<Type &>(src).makeCompressed();
+
+        object matrix_type
+            = module_::import("scipy.sparse").attr(rowMajor ? "csr_matrix" : "csc_matrix");
+
+        array data(src.nonZeros(), src.valuePtr());
+        array outerIndices((rowMajor ? src.rows() : src.cols()) + 1, src.outerIndexPtr());
+        array innerIndices(src.nonZeros(), src.innerIndexPtr());
+
+        return matrix_type(pybind11::make_tuple(
+                               std::move(data), std::move(innerIndices), std::move(outerIndices)),
+                           pybind11::make_tuple(src.rows(), src.cols()))
+            .release();
+    }
+
+    PYBIND11_TYPE_CASTER(Type,
+                         const_name<(Type::IsRowMajor) != 0>("scipy.sparse.csr_matrix[",
+                                                             "scipy.sparse.csc_matrix[")
+                             + npy_format_descriptor<Scalar>::name + const_name("]"));
+};
+
+PYBIND11_NAMESPACE_END(detail)
+PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE)
diff --git a/ext/pybind11/include/pybind11/eigen/tensor.h b/ext/pybind11/include/pybind11/eigen/tensor.h
new file mode 100644
index 0000000000..0877da8953
--- /dev/null
+++ b/ext/pybind11/include/pybind11/eigen/tensor.h
@@ -0,0 +1,509 @@
+/*
+    pybind11/eigen/tensor.h: Transparent conversion for Eigen tensors
+
+    All rights reserved. Use of this source code is governed by a
+    BSD-style license that can be found in the LICENSE file.
+*/
+
+#pragma once
+
+#include "../numpy.h"
+
+#if defined(__GNUC__) && !defined(__clang__) && !defined(__INTEL_COMPILER)
+static_assert(__GNUC__ > 5, "Eigen Tensor support in pybind11 requires GCC > 5.0");
+#endif
+
+// Disable warnings for Eigen
+PYBIND11_WARNING_PUSH
+PYBIND11_WARNING_DISABLE_MSVC(4554)
+PYBIND11_WARNING_DISABLE_MSVC(4127)
+PYBIND11_WARNING_DISABLE_GCC("-Wmaybe-uninitialized")
+
+#include <unsupported/Eigen/CXX11/Tensor>
+
+PYBIND11_WARNING_POP
+
+static_assert(EIGEN_VERSION_AT_LEAST(3, 3, 0),
+              "Eigen Tensor support in pybind11 requires Eigen >= 3.3.0");
+
+PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
+
+PYBIND11_WARNING_DISABLE_MSVC(4127)
+
+PYBIND11_NAMESPACE_BEGIN(detail)
+
+inline bool is_tensor_aligned(const void *data) {
+    return (reinterpret_cast<std::size_t>(data) % EIGEN_DEFAULT_ALIGN_BYTES) == 0;
+}
+
+template <typename T>
+constexpr int compute_array_flag_from_tensor() {
+    static_assert((static_cast<int>(T::Layout) == static_cast<int>(Eigen::RowMajor))
+                      || (static_cast<int>(T::Layout) == static_cast<int>(Eigen::ColMajor)),
+                  "Layout must be row or column major");
+    return (static_cast<int>(T::Layout) == static_cast<int>(Eigen::RowMajor)) ? array::c_style
+                                                                              : array::f_style;
+}
+
+template <typename T>
+struct eigen_tensor_helper {};
+
+template <typename Scalar_, int NumIndices_, int Options_, typename IndexType>
+struct eigen_tensor_helper<Eigen::Tensor<Scalar_, NumIndices_, Options_, IndexType>> {
+    using Type = Eigen::Tensor<Scalar_, NumIndices_, Options_, IndexType>;
+    using ValidType = void;
+
+    static Eigen::DSizes<typename Type::Index, Type::NumIndices> get_shape(const Type &f) {
+        return f.dimensions();
+    }
+
+    static constexpr bool
+    is_correct_shape(const Eigen::DSizes<typename Type::Index, Type::NumIndices> & /*shape*/) {
+        return true;
+    }
+
+    template <typename T>
+    struct helper {};
+
+    template <size_t... Is>
+    struct helper<index_sequence<Is...>> {
+        static constexpr auto value = concat(const_name(((void) Is, "?"))...);
+    };
+
+    static constexpr auto dimensions_descriptor
+        = helper<decltype(make_index_sequence<Type::NumIndices>())>::value;
+
+    template <typename... Args>
+    static Type *alloc(Args &&...args) {
+        return new Type(std::forward<Args>(args)...);
+    }
+
+    static void free(Type *tensor) { delete tensor; }
+};
+
+template <typename Scalar_, typename std::ptrdiff_t... Indices, int Options_, typename IndexType>
+struct eigen_tensor_helper<
+    Eigen::TensorFixedSize<Scalar_, Eigen::Sizes<Indices...>, Options_, IndexType>> {
+    using Type = Eigen::TensorFixedSize<Scalar_, Eigen::Sizes<Indices...>, Options_, IndexType>;
+    using ValidType = void;
+
+    static constexpr Eigen::DSizes<typename Type::Index, Type::NumIndices>
+    get_shape(const Type & /*f*/) {
+        return get_shape();
+    }
+
+    static constexpr Eigen::DSizes<typename Type::Index, Type::NumIndices> get_shape() {
+        return Eigen::DSizes<typename Type::Index, Type::NumIndices>(Indices...);
+    }
+
+    static bool
+    is_correct_shape(const Eigen::DSizes<typename Type::Index, Type::NumIndices> &shape) {
+        return get_shape() == shape;
+    }
+
+    static constexpr auto dimensions_descriptor = concat(const_name<Indices>()...);
+
+    template <typename... Args>
+    static Type *alloc(Args &&...args) {
+        Eigen::aligned_allocator<Type> allocator;
+        return ::new (allocator.allocate(1)) Type(std::forward<Args>(args)...);
+    }
+
+    static void free(Type *tensor) {
+        Eigen::aligned_allocator<Type> allocator;
+        tensor->~Type();
+        allocator.deallocate(tensor, 1);
+    }
+};
+
+template <typename Type, bool ShowDetails, bool NeedsWriteable = false>
+struct get_tensor_descriptor {
+    static constexpr auto details
+        = const_name<NeedsWriteable>(", flags.writeable", "")
+          + const_name<static_cast<int>(Type::Layout) == static_cast<int>(Eigen::RowMajor)>(
+              ", flags.c_contiguous", ", flags.f_contiguous");
+    static constexpr auto value
+        = const_name("numpy.ndarray[") + npy_format_descriptor<typename Type::Scalar>::name
+          + const_name("[") + eigen_tensor_helper<remove_cv_t<Type>>::dimensions_descriptor
+          + const_name("]") + const_name<ShowDetails>(details, const_name("")) + const_name("]");
+};
+
+// When EIGEN_AVOID_STL_ARRAY is defined, Eigen::DSizes<T, 0> does not have the begin() member
+// function. Falling back to a simple loop works around this issue.
+//
+// We need to disable the type-limits warning for the inner loop when size = 0.
+
+PYBIND11_WARNING_PUSH
+PYBIND11_WARNING_DISABLE_GCC("-Wtype-limits")
+
+template <typename T, int size>
+std::vector<T> convert_dsizes_to_vector(const Eigen::DSizes<T, size> &arr) {
+    std::vector<T> result(size);
+
+    for (size_t i = 0; i < size; i++) {
+        result[i] = arr[i];
+    }
+
+    return result;
+}
+
+template <typename T, int size>
+Eigen::DSizes<T, size> get_shape_for_array(const array &arr) {
+    Eigen::DSizes<T, size> result;
+    const T *shape = arr.shape();
+    for (size_t i = 0; i < size; i++) {
+        result[i] = shape[i];
+    }
+
+    return result;
+}
+
+PYBIND11_WARNING_POP
+
+template <typename Type>
+struct type_caster<Type, typename eigen_tensor_helper<Type>::ValidType> {
+    using Helper = eigen_tensor_helper<Type>;
+    static constexpr auto temp_name = get_tensor_descriptor<Type, false>::value;
+    PYBIND11_TYPE_CASTER(Type, temp_name);
+
+    bool load(handle src, bool convert) {
+        if (!convert) {
+            if (!isinstance<array>(src)) {
+                return false;
+            }
+            array temp = array::ensure(src);
+            if (!temp) {
+                return false;
+            }
+
+            if (!temp.dtype().is(dtype::of<typename Type::Scalar>())) {
+                return false;
+            }
+        }
+
+        array_t<typename Type::Scalar, compute_array_flag_from_tensor<Type>()> arr(
+            reinterpret_borrow<object>(src));
+
+        if (arr.ndim() != Type::NumIndices) {
+            return false;
+        }
+        auto shape = get_shape_for_array<typename Type::Index, Type::NumIndices>(arr);
+
+        if (!Helper::is_correct_shape(shape)) {
+            return false;
+        }
+
+#if EIGEN_VERSION_AT_LEAST(3, 4, 0)
+        auto data_pointer = arr.data();
+#else
+        // Handle Eigen bug
+        auto data_pointer = const_cast<typename Type::Scalar *>(arr.data());
+#endif
+
+        if (is_tensor_aligned(arr.data())) {
+            value = Eigen::TensorMap<const Type, Eigen::Aligned>(data_pointer, shape);
+        } else {
+            value = Eigen::TensorMap<const Type>(data_pointer, shape);
+        }
+
+        return true;
+    }
+
+    static handle cast(Type &&src, return_value_policy policy, handle parent) {
+        if (policy == return_value_policy::reference
+            || policy == return_value_policy::reference_internal) {
+            pybind11_fail("Cannot use a reference return value policy for an rvalue");
+        }
+        return cast_impl(&src, return_value_policy::move, parent);
+    }
+
+    static handle cast(const Type &&src, return_value_policy policy, handle parent) {
+        if (policy == return_value_policy::reference
+            || policy == return_value_policy::reference_internal) {
+            pybind11_fail("Cannot use a reference return value policy for an rvalue");
+        }
+        return cast_impl(&src, return_value_policy::move, parent);
+    }
+
+    static handle cast(Type &src, return_value_policy policy, handle parent) {
+        if (policy == return_value_policy::automatic
+            || policy == return_value_policy::automatic_reference) {
+            policy = return_value_policy::copy;
+        }
+        return cast_impl(&src, policy, parent);
+    }
+
+    static handle cast(const Type &src, return_value_policy policy, handle parent) {
+        if (policy == return_value_policy::automatic
+            || policy == return_value_policy::automatic_reference) {
+            policy = return_value_policy::copy;
+        }
+        return cast(&src, policy, parent);
+    }
+
+    static handle cast(Type *src, return_value_policy policy, handle parent) {
+        if (policy == return_value_policy::automatic) {
+            policy = return_value_policy::take_ownership;
+        } else if (policy == return_value_policy::automatic_reference) {
+            policy = return_value_policy::reference;
+        }
+        return cast_impl(src, policy, parent);
+    }
+
+    static handle cast(const Type *src, return_value_policy policy, handle parent) {
+        if (policy == return_value_policy::automatic) {
+            policy = return_value_policy::take_ownership;
+        } else if (policy == return_value_policy::automatic_reference) {
+            policy = return_value_policy::reference;
+        }
+        return cast_impl(src, policy, parent);
+    }
+
+    template <typename C>
+    static handle cast_impl(C *src, return_value_policy policy, handle parent) {
+        object parent_object;
+        bool writeable = false;
+        switch (policy) {
+            case return_value_policy::move:
+                if (std::is_const<C>::value) {
+                    pybind11_fail("Cannot move from a constant reference");
+                }
+
+                src = Helper::alloc(std::move(*src));
+
+                parent_object
+                    = capsule(src, [](void *ptr) { Helper::free(reinterpret_cast<Type *>(ptr)); });
+                writeable = true;
+                break;
+
+            case return_value_policy::take_ownership:
+                if (std::is_const<C>::value) {
+                    // This cast is ugly, and might be UB in some cases, but we don't have an
+                    // alternative here as we must free that memory
+                    Helper::free(const_cast<Type *>(src));
+                    pybind11_fail("Cannot take ownership of a const reference");
+                }
+
+                parent_object
+                    = capsule(src, [](void *ptr) { Helper::free(reinterpret_cast<Type *>(ptr)); });
+                writeable = true;
+                break;
+
+            case return_value_policy::copy:
+                writeable = true;
+                break;
+
+            case return_value_policy::reference:
+                parent_object = none();
+                writeable = !std::is_const<C>::value;
+                break;
+
+            case return_value_policy::reference_internal:
+                // Default should do the right thing
+                if (!parent) {
+                    pybind11_fail("Cannot use reference internal when there is no parent");
+                }
+                parent_object = reinterpret_borrow<object>(parent);
+                writeable = !std::is_const<C>::value;
+                break;
+
+            default:
+                pybind11_fail("pybind11 bug in eigen.h, please file a bug report");
+        }
+
+        auto result = array_t<typename Type::Scalar, compute_array_flag_from_tensor<Type>()>(
+            convert_dsizes_to_vector(Helper::get_shape(*src)), src->data(), parent_object);
+
+        if (!writeable) {
+            array_proxy(result.ptr())->flags &= ~detail::npy_api::NPY_ARRAY_WRITEABLE_;
+        }
+
+        return result.release();
+    }
+};
+
+template <typename StoragePointerType,
+          bool needs_writeable,
+          enable_if_t<!needs_writeable, bool> = true>
+StoragePointerType get_array_data_for_type(array &arr) {
+#if EIGEN_VERSION_AT_LEAST(3, 4, 0)
+    return reinterpret_cast<StoragePointerType>(arr.data());
+#else
+    // Handle Eigen bug
+    return reinterpret_cast<StoragePointerType>(const_cast<void *>(arr.data()));
+#endif
+}
+
+template <typename StoragePointerType,
+          bool needs_writeable,
+          enable_if_t<needs_writeable, bool> = true>
+StoragePointerType get_array_data_for_type(array &arr) {
+    return reinterpret_cast<StoragePointerType>(arr.mutable_data());
+}
+
+template <typename T, typename = void>
+struct get_storage_pointer_type;
+
+template <typename MapType>
+struct get_storage_pointer_type<MapType, void_t<typename MapType::StoragePointerType>> {
+    using SPT = typename MapType::StoragePointerType;
+};
+
+template <typename MapType>
+struct get_storage_pointer_type<MapType, void_t<typename MapType::PointerArgType>> {
+    using SPT = typename MapType::PointerArgType;
+};
+
+template <typename Type, int Options>
+struct type_caster<Eigen::TensorMap<Type, Options>,
+                   typename eigen_tensor_helper<remove_cv_t<Type>>::ValidType> {
+    using MapType = Eigen::TensorMap<Type, Options>;
+    using Helper = eigen_tensor_helper<remove_cv_t<Type>>;
+
+    bool load(handle src, bool /*convert*/) {
+        // Note that we have a lot more checks here as we want to make sure to avoid copies
+        if (!isinstance<array>(src)) {
+            return false;
+        }
+        auto arr = reinterpret_borrow<array>(src);
+        if ((arr.flags() & compute_array_flag_from_tensor<Type>()) == 0) {
+            return false;
+        }
+
+        if (!arr.dtype().is(dtype::of<typename Type::Scalar>())) {
+            return false;
+        }
+
+        if (arr.ndim() != Type::NumIndices) {
+            return false;
+        }
+
+        constexpr bool is_aligned = (Options & Eigen::Aligned) != 0;
+
+        if (is_aligned && !is_tensor_aligned(arr.data())) {
+            return false;
+        }
+
+        auto shape = get_shape_for_array<typename Type::Index, Type::NumIndices>(arr);
+
+        if (!Helper::is_correct_shape(shape)) {
+            return false;
+        }
+
+        if (needs_writeable && !arr.writeable()) {
+            return false;
+        }
+
+        auto result = get_array_data_for_type<typename get_storage_pointer_type<MapType>::SPT,
+                                              needs_writeable>(arr);
+
+        value.reset(new MapType(std::move(result), std::move(shape)));
+
+        return true;
+    }
+
+    static handle cast(MapType &&src, return_value_policy policy, handle parent) {
+        return cast_impl(&src, policy, parent);
+    }
+
+    static handle cast(const MapType &&src, return_value_policy policy, handle parent) {
+        return cast_impl(&src, policy, parent);
+    }
+
+    static handle cast(MapType &src, return_value_policy policy, handle parent) {
+        if (policy == return_value_policy::automatic
+            || policy == return_value_policy::automatic_reference) {
+            policy = return_value_policy::copy;
+        }
+        return cast_impl(&src, policy, parent);
+    }
+
+    static handle cast(const MapType &src, return_value_policy policy, handle parent) {
+        if (policy == return_value_policy::automatic
+            || policy == return_value_policy::automatic_reference) {
+            policy = return_value_policy::copy;
+        }
+        return cast(&src, policy, parent);
+    }
+
+    static handle cast(MapType *src, return_value_policy policy, handle parent) {
+        if (policy == return_value_policy::automatic) {
+            policy = return_value_policy::take_ownership;
+        } else if (policy == return_value_policy::automatic_reference) {
+            policy = return_value_policy::reference;
+        }
+        return cast_impl(src, policy, parent);
+    }
+
+    static handle cast(const MapType *src, return_value_policy policy, handle parent) {
+        if (policy == return_value_policy::automatic) {
+            policy = return_value_policy::take_ownership;
+        } else if (policy == return_value_policy::automatic_reference) {
+            policy = return_value_policy::reference;
+        }
+        return cast_impl(src, policy, parent);
+    }
+
+    template <typename C>
+    static handle cast_impl(C *src, return_value_policy policy, handle parent) {
+        object parent_object;
+        constexpr bool writeable = !std::is_const<C>::value;
+        switch (policy) {
+            case return_value_policy::reference:
+                parent_object = none();
+                break;
+
+            case return_value_policy::reference_internal:
+                // Default should do the right thing
+                if (!parent) {
+                    pybind11_fail("Cannot use reference internal when there is no parent");
+                }
+                parent_object = reinterpret_borrow<object>(parent);
+                break;
+
+            case return_value_policy::take_ownership:
+                delete src;
+                // fallthrough
+            default:
+                // move, take_ownership don't make any sense for a ref/map:
+                pybind11_fail("Invalid return_value_policy for Eigen Map type, must be either "
+                              "reference or reference_internal");
+        }
+
+        auto result = array_t<typename Type::Scalar, compute_array_flag_from_tensor<Type>()>(
+            convert_dsizes_to_vector(Helper::get_shape(*src)),
+            src->data(),
+            std::move(parent_object));
+
+        if (!writeable) {
+            array_proxy(result.ptr())->flags &= ~detail::npy_api::NPY_ARRAY_WRITEABLE_;
+        }
+
+        return result.release();
+    }
+
+#if EIGEN_VERSION_AT_LEAST(3, 4, 0)
+
+    static constexpr bool needs_writeable = !std::is_const<typename std::remove_pointer<
+        typename get_storage_pointer_type<MapType>::SPT>::type>::value;
+#else
+    // Handle Eigen bug
+    static constexpr bool needs_writeable = !std::is_const<Type>::value;
+#endif
+
+protected:
+    // TODO: Move to std::optional once std::optional has more support
+    std::unique_ptr<MapType> value;
+
+public:
+    static constexpr auto name = get_tensor_descriptor<Type, true, needs_writeable>::value;
+    explicit operator MapType *() { return value.get(); }
+    explicit operator MapType &() { return *value; }
+    explicit operator MapType &&() && { return std::move(*value); }
+
+    template <typename T_>
+    using cast_op_type = ::pybind11::detail::movable_cast_op_type<T_>;
+};
+
+PYBIND11_NAMESPACE_END(detail)
+PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE)
diff --git a/ext/pybind11/include/pybind11/embed.h b/ext/pybind11/include/pybind11/embed.h
index 9843f0f973..749c75beb6 100644
--- a/ext/pybind11/include/pybind11/embed.h
+++ b/ext/pybind11/include/pybind11/embed.h
@@ -16,22 +16,12 @@
 #include <vector>
 
 #if defined(PYPY_VERSION)
-#  error Embedding the interpreter is not supported with PyPy
+#    error Embedding the interpreter is not supported with PyPy
 #endif
 
-#if PY_MAJOR_VERSION >= 3
-#  define PYBIND11_EMBEDDED_MODULE_IMPL(name)            \
-      extern "C" PyObject *pybind11_init_impl_##name();  \
-      extern "C" PyObject *pybind11_init_impl_##name() { \
-          return pybind11_init_wrapper_##name();         \
-      }
-#else
-#  define PYBIND11_EMBEDDED_MODULE_IMPL(name)            \
-      extern "C" void pybind11_init_impl_##name();       \
-      extern "C" void pybind11_init_impl_##name() {      \
-          pybind11_init_wrapper_##name();                \
-      }
-#endif
+#define PYBIND11_EMBEDDED_MODULE_IMPL(name)                                                       \
+    extern "C" PyObject *pybind11_init_impl_##name();                                             \
+    extern "C" PyObject *pybind11_init_impl_##name() { return pybind11_init_wrapper_##name(); }
 
 /** \rst
     Add a new module to the table of builtins for the interpreter. Must be
@@ -71,69 +61,69 @@ PYBIND11_NAMESPACE_BEGIN(detail)
 
 /// Python 2.7/3.x compatible version of `PyImport_AppendInittab` and error checks.
 struct embedded_module {
-#if PY_MAJOR_VERSION >= 3
-    using init_t = PyObject *(*)();
-#else
-    using init_t = void (*)();
-#endif
+    using init_t = PyObject *(*) ();
     embedded_module(const char *name, init_t init) {
-        if (Py_IsInitialized() != 0)
+        if (Py_IsInitialized() != 0) {
             pybind11_fail("Can't add new modules after the interpreter has been initialized");
+        }
 
         auto result = PyImport_AppendInittab(name, init);
-        if (result == -1)
+        if (result == -1) {
             pybind11_fail("Insufficient memory to add a new module");
+        }
     }
 };
 
 struct wide_char_arg_deleter {
     void operator()(wchar_t *ptr) const {
-#if PY_VERSION_HEX >= 0x030500f0
         // API docs: https://docs.python.org/3/c-api/sys.html#c.Py_DecodeLocale
         PyMem_RawFree(ptr);
-#else
-        delete[] ptr;
-#endif
     }
 };
 
 inline wchar_t *widen_chars(const char *safe_arg) {
-#if PY_VERSION_HEX >= 0x030500f0
     wchar_t *widened_arg = Py_DecodeLocale(safe_arg, nullptr);
-#else
-    wchar_t *widened_arg = nullptr;
-#    if defined(HAVE_BROKEN_MBSTOWCS) && HAVE_BROKEN_MBSTOWCS
-    size_t count = strlen(safe_arg);
-#    else
-    size_t count = mbstowcs(nullptr, safe_arg, 0);
-#    endif
-    if (count != static_cast<size_t>(-1)) {
-        widened_arg = new wchar_t[count + 1];
-        mbstowcs(widened_arg, safe_arg, count + 1);
-    }
-#endif
     return widened_arg;
 }
 
-/// Python 2.x/3.x-compatible version of `PySys_SetArgv`
-inline void set_interpreter_argv(int argc, const char *const *argv, bool add_program_dir_to_path) {
+inline void precheck_interpreter() {
+    if (Py_IsInitialized() != 0) {
+        pybind11_fail("The interpreter is already running");
+    }
+}
+
+#if !defined(PYBIND11_PYCONFIG_SUPPORT_PY_VERSION_HEX)
+#    define PYBIND11_PYCONFIG_SUPPORT_PY_VERSION_HEX (0x03080000)
+#endif
+
+#if PY_VERSION_HEX < PYBIND11_PYCONFIG_SUPPORT_PY_VERSION_HEX
+inline void initialize_interpreter_pre_pyconfig(bool init_signal_handlers,
+                                                int argc,
+                                                const char *const *argv,
+                                                bool add_program_dir_to_path) {
+    detail::precheck_interpreter();
+    Py_InitializeEx(init_signal_handlers ? 1 : 0);
+#    if defined(WITH_THREAD) && PY_VERSION_HEX < 0x03070000
+    PyEval_InitThreads();
+#    endif
+
     // Before it was special-cased in python 3.8, passing an empty or null argv
     // caused a segfault, so we have to reimplement the special case ourselves.
     bool special_case = (argv == nullptr || argc <= 0);
 
     const char *const empty_argv[]{"\0"};
     const char *const *safe_argv = special_case ? empty_argv : argv;
-    if (special_case)
+    if (special_case) {
         argc = 1;
+    }
 
     auto argv_size = static_cast<size_t>(argc);
-#if PY_MAJOR_VERSION >= 3
     // SetArgv* on python 3 takes wchar_t, so we have to convert.
     std::unique_ptr<wchar_t *[]> widened_argv(new wchar_t *[argv_size]);
-    std::vector<std::unique_ptr<wchar_t[], wide_char_arg_deleter>> widened_argv_entries;
+    std::vector<std::unique_ptr<wchar_t[], detail::wide_char_arg_deleter>> widened_argv_entries;
     widened_argv_entries.reserve(argv_size);
     for (size_t ii = 0; ii < argv_size; ++ii) {
-        widened_argv_entries.emplace_back(widen_chars(safe_argv[ii]));
+        widened_argv_entries.emplace_back(detail::widen_chars(safe_argv[ii]));
         if (!widened_argv_entries.back()) {
             // A null here indicates a character-encoding failure or the python
             // interpreter out of memory. Give up.
@@ -142,21 +132,44 @@ inline void set_interpreter_argv(int argc, const char *const *argv, bool add_pro
         widened_argv[ii] = widened_argv_entries.back().get();
     }
 
-    auto pysys_argv = widened_argv.get();
-#else
-    // python 2.x
-    std::vector<std::string> strings{safe_argv, safe_argv + argv_size};
-    std::vector<char *> char_strings{argv_size};
-    for (std::size_t i = 0; i < argv_size; ++i)
-        char_strings[i] = &strings[i][0];
-    char **pysys_argv = char_strings.data();
-#endif
+    auto *pysys_argv = widened_argv.get();
 
     PySys_SetArgvEx(argc, pysys_argv, static_cast<int>(add_program_dir_to_path));
 }
+#endif
 
 PYBIND11_NAMESPACE_END(detail)
 
+#if PY_VERSION_HEX >= PYBIND11_PYCONFIG_SUPPORT_PY_VERSION_HEX
+inline void initialize_interpreter(PyConfig *config,
+                                   int argc = 0,
+                                   const char *const *argv = nullptr,
+                                   bool add_program_dir_to_path = true) {
+    detail::precheck_interpreter();
+    PyStatus status = PyConfig_SetBytesArgv(config, argc, const_cast<char *const *>(argv));
+    if (PyStatus_Exception(status) != 0) {
+        // A failure here indicates a character-encoding failure or the python
+        // interpreter out of memory. Give up.
+        PyConfig_Clear(config);
+        throw std::runtime_error(PyStatus_IsError(status) != 0 ? status.err_msg
+                                                               : "Failed to prepare CPython");
+    }
+    status = Py_InitializeFromConfig(config);
+    if (PyStatus_Exception(status) != 0) {
+        PyConfig_Clear(config);
+        throw std::runtime_error(PyStatus_IsError(status) != 0 ? status.err_msg
+                                                               : "Failed to init CPython");
+    }
+    if (add_program_dir_to_path) {
+        PyRun_SimpleString("import sys, os.path; "
+                           "sys.path.insert(0, "
+                           "os.path.abspath(os.path.dirname(sys.argv[0])) "
+                           "if sys.argv and os.path.exists(sys.argv[0]) else '')");
+    }
+    PyConfig_Clear(config);
+}
+#endif
+
 /** \rst
     Initialize the Python interpreter. No other pybind11 or CPython API functions can be
     called before this is done; with the exception of `PYBIND11_EMBEDDED_MODULE`. The
@@ -180,12 +193,17 @@ inline void initialize_interpreter(bool init_signal_handlers = true,
                                    int argc = 0,
                                    const char *const *argv = nullptr,
                                    bool add_program_dir_to_path = true) {
-    if (Py_IsInitialized() != 0)
-        pybind11_fail("The interpreter is already running");
-
-    Py_InitializeEx(init_signal_handlers ? 1 : 0);
-
-    detail::set_interpreter_argv(argc, argv, add_program_dir_to_path);
+#if PY_VERSION_HEX < PYBIND11_PYCONFIG_SUPPORT_PY_VERSION_HEX
+    detail::initialize_interpreter_pre_pyconfig(
+        init_signal_handlers, argc, argv, add_program_dir_to_path);
+#else
+    PyConfig config;
+    PyConfig_InitIsolatedConfig(&config);
+    config.isolated = 0;
+    config.use_environment = 1;
+    config.install_signal_handlers = init_signal_handlers ? 1 : 0;
+    initialize_interpreter(&config, argc, argv, add_program_dir_to_path);
+#endif
 }
 
 /** \rst
@@ -232,8 +250,13 @@ inline void finalize_interpreter() {
     // during destruction), so we get the pointer-pointer here and check it after Py_Finalize().
     detail::internals **internals_ptr_ptr = detail::get_internals_pp();
     // It could also be stashed in builtins, so look there too:
-    if (builtins.contains(id) && isinstance<capsule>(builtins[id]))
+    if (builtins.contains(id) && isinstance<capsule>(builtins[id])) {
         internals_ptr_ptr = capsule(builtins[id]);
+    }
+    // Local internals contains data managed by the current interpreter, so we must clear them to
+    // avoid undefined behaviors when initializing another interpreter
+    detail::get_local_internals().registered_types_cpp.clear();
+    detail::get_local_internals().registered_exception_translators.clear();
 
     Py_Finalize();
 
@@ -267,14 +290,24 @@ public:
         initialize_interpreter(init_signal_handlers, argc, argv, add_program_dir_to_path);
     }
 
+#if PY_VERSION_HEX >= PYBIND11_PYCONFIG_SUPPORT_PY_VERSION_HEX
+    explicit scoped_interpreter(PyConfig *config,
+                                int argc = 0,
+                                const char *const *argv = nullptr,
+                                bool add_program_dir_to_path = true) {
+        initialize_interpreter(config, argc, argv, add_program_dir_to_path);
+    }
+#endif
+
     scoped_interpreter(const scoped_interpreter &) = delete;
     scoped_interpreter(scoped_interpreter &&other) noexcept { other.is_valid = false; }
     scoped_interpreter &operator=(const scoped_interpreter &) = delete;
     scoped_interpreter &operator=(scoped_interpreter &&) = delete;
 
     ~scoped_interpreter() {
-        if (is_valid)
+        if (is_valid) {
             finalize_interpreter();
+        }
     }
 
 private:
diff --git a/ext/pybind11/include/pybind11/eval.h b/ext/pybind11/include/pybind11/eval.h
index 6cc672e2d5..bd5f981f53 100644
--- a/ext/pybind11/include/pybind11/eval.h
+++ b/ext/pybind11/include/pybind11/eval.h
@@ -11,24 +11,24 @@
 
 #pragma once
 
-#include <utility>
-
 #include "pybind11.h"
 
+#include <utility>
+
 PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
 PYBIND11_NAMESPACE_BEGIN(detail)
 
 inline void ensure_builtins_in_globals(object &global) {
-    #if PY_VERSION_HEX < 0x03080000
-        // Running exec and eval on Python 2 and 3 adds `builtins` module under
-        // `__builtins__` key to globals if not yet present.
-        // Python 3.8 made PyRun_String behave similarly. Let's also do that for
-        // older versions, for consistency.
-        if (!global.contains("__builtins__"))
-            global["__builtins__"] = module_::import(PYBIND11_BUILTINS_MODULE);
-    #else
-        (void) global;
-    #endif
+#if defined(PYPY_VERSION) || PY_VERSION_HEX < 0x03080000
+    // Running exec and eval adds `builtins` module under `__builtins__` key to
+    // globals if not yet present.  Python 3.8 made PyRun_String behave
+    // similarly. Let's also do that for older versions, for consistency. This
+    // was missing from PyPy3.8 7.3.7.
+    if (!global.contains("__builtins__"))
+        global["__builtins__"] = module_::import(PYBIND11_BUILTINS_MODULE);
+#else
+    (void) global;
+#endif
 }
 
 PYBIND11_NAMESPACE_END(detail)
@@ -46,8 +46,9 @@ enum eval_mode {
 
 template <eval_mode mode = eval_expr>
 object eval(const str &expr, object global = globals(), object local = object()) {
-    if (!local)
+    if (!local) {
         local = global;
+    }
 
     detail::ensure_builtins_in_globals(global);
 
@@ -57,24 +58,31 @@ object eval(const str &expr, object global = globals(), object local = object())
 
     int start = 0;
     switch (mode) {
-        case eval_expr:             start = Py_eval_input;   break;
-        case eval_single_statement: start = Py_single_input; break;
-        case eval_statements:       start = Py_file_input;   break;
-        default: pybind11_fail("invalid evaluation mode");
+        case eval_expr:
+            start = Py_eval_input;
+            break;
+        case eval_single_statement:
+            start = Py_single_input;
+            break;
+        case eval_statements:
+            start = Py_file_input;
+            break;
+        default:
+            pybind11_fail("invalid evaluation mode");
     }
 
     PyObject *result = PyRun_String(buffer.c_str(), start, global.ptr(), local.ptr());
-    if (!result)
+    if (!result) {
         throw error_already_set();
+    }
     return reinterpret_steal<object>(result);
 }
 
 template <eval_mode mode = eval_expr, size_t N>
 object eval(const char (&s)[N], object global = globals(), object local = object()) {
     /* Support raw string literals by removing common leading whitespace */
-    auto expr = (s[0] == '\n') ? str(module_::import("textwrap").attr("dedent")(s))
-                               : str(s);
-    return eval<mode>(expr, global, local);
+    auto expr = (s[0] == '\n') ? str(module_::import("textwrap").attr("dedent")(s)) : str(s);
+    return eval<mode>(expr, std::move(global), std::move(local));
 }
 
 inline void exec(const str &expr, object global = globals(), object local = object()) {
@@ -83,10 +91,10 @@ inline void exec(const str &expr, object global = globals(), object local = obje
 
 template <size_t N>
 void exec(const char (&s)[N], object global = globals(), object local = object()) {
-    eval<eval_statements>(s, global, local);
+    eval<eval_statements>(s, std::move(global), std::move(local));
 }
 
-#if defined(PYPY_VERSION) && PY_VERSION_HEX >= 0x03000000
+#if defined(PYPY_VERSION)
 template <eval_mode mode = eval_statements>
 object eval_file(str, object, object) {
     pybind11_fail("eval_file not supported in PyPy3. Use eval");
@@ -102,60 +110,45 @@ object eval_file(str) {
 #else
 template <eval_mode mode = eval_statements>
 object eval_file(str fname, object global = globals(), object local = object()) {
-    if (!local)
+    if (!local) {
         local = global;
+    }
 
     detail::ensure_builtins_in_globals(global);
 
     int start = 0;
     switch (mode) {
-        case eval_expr:             start = Py_eval_input;   break;
-        case eval_single_statement: start = Py_single_input; break;
-        case eval_statements:       start = Py_file_input;   break;
-        default: pybind11_fail("invalid evaluation mode");
+        case eval_expr:
+            start = Py_eval_input;
+            break;
+        case eval_single_statement:
+            start = Py_single_input;
+            break;
+        case eval_statements:
+            start = Py_file_input;
+            break;
+        default:
+            pybind11_fail("invalid evaluation mode");
     }
 
     int closeFile = 1;
     std::string fname_str = (std::string) fname;
-#if PY_VERSION_HEX >= 0x03040000
     FILE *f = _Py_fopen_obj(fname.ptr(), "r");
-#elif PY_VERSION_HEX >= 0x03000000
-    FILE *f = _Py_fopen(fname.ptr(), "r");
-#else
-    /* No unicode support in open() :( */
-    auto fobj = reinterpret_steal<object>(PyFile_FromString(
-        const_cast<char *>(fname_str.c_str()),
-        const_cast<char*>("r")));
-    FILE *f = nullptr;
-    if (fobj)
-        f = PyFile_AsFile(fobj.ptr());
-    closeFile = 0;
-#endif
     if (!f) {
         PyErr_Clear();
         pybind11_fail("File \"" + fname_str + "\" could not be opened!");
     }
 
-    // In Python2, this should be encoded by getfilesystemencoding.
-    // We don't boher setting it since Python2 is past EOL anyway.
-    // See PR#3233
-#if PY_VERSION_HEX >= 0x03000000
     if (!global.contains("__file__")) {
         global["__file__"] = std::move(fname);
     }
-#endif
 
-#if PY_VERSION_HEX < 0x03000000 && defined(PYPY_VERSION)
-    PyObject *result = PyRun_File(f, fname_str.c_str(), start, global.ptr(),
-                                  local.ptr());
-    (void) closeFile;
-#else
-    PyObject *result = PyRun_FileEx(f, fname_str.c_str(), start, global.ptr(),
-                                    local.ptr(), closeFile);
-#endif
+    PyObject *result
+        = PyRun_FileEx(f, fname_str.c_str(), start, global.ptr(), local.ptr(), closeFile);
 
-    if (!result)
+    if (!result) {
         throw error_already_set();
+    }
     return reinterpret_steal<object>(result);
 }
 #endif
diff --git a/ext/pybind11/include/pybind11/functional.h b/ext/pybind11/include/pybind11/functional.h
index ad5608c255..87ec4d10cb 100644
--- a/ext/pybind11/include/pybind11/functional.h
+++ b/ext/pybind11/include/pybind11/functional.h
@@ -10,6 +10,7 @@
 #pragma once
 
 #include "pybind11.h"
+
 #include <functional>
 
 PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
@@ -19,18 +20,21 @@ template <typename Return, typename... Args>
 struct type_caster<std::function<Return(Args...)>> {
     using type = std::function<Return(Args...)>;
     using retval_type = conditional_t<std::is_same<Return, void>::value, void_type, Return>;
-    using function_type = Return (*) (Args...);
+    using function_type = Return (*)(Args...);
 
 public:
     bool load(handle src, bool convert) {
         if (src.is_none()) {
             // Defer accepting None to other overloads (if we aren't in convert mode):
-            if (!convert) return false;
+            if (!convert) {
+                return false;
+            }
             return true;
         }
 
-        if (!isinstance<function>(src))
+        if (!isinstance<function>(src)) {
             return false;
+        }
 
         auto func = reinterpret_borrow<function>(src);
 
@@ -43,10 +47,17 @@ public:
            captured variables), in which case the roundtrip can be avoided.
          */
         if (auto cfunc = func.cpp_function()) {
-            auto cfunc_self = PyCFunction_GET_SELF(cfunc.ptr());
-            if (isinstance<capsule>(cfunc_self)) {
+            auto *cfunc_self = PyCFunction_GET_SELF(cfunc.ptr());
+            if (cfunc_self == nullptr) {
+                PyErr_Clear();
+            } else if (isinstance<capsule>(cfunc_self)) {
                 auto c = reinterpret_borrow<capsule>(cfunc_self);
-                auto rec = (function_record *) c;
+
+                function_record *rec = nullptr;
+                // Check that we can safely reinterpret the capsule into a function_record
+                if (detail::is_function_record_capsule(c)) {
+                    rec = c.get_pointer<function_record>();
+                }
 
                 while (rec != nullptr) {
                     if (rec->is_stateless
@@ -73,7 +84,9 @@ public:
             // This triggers a syntax error under very special conditions (very weird indeed).
             explicit
 #endif
-            func_handle(function &&f_) noexcept : f(std::move(f_)) {}
+                func_handle(function &&f_) noexcept
+                : f(std::move(f_)) {
+            }
             func_handle(const func_handle &f_) { operator=(f_); }
             func_handle &operator=(const func_handle &f_) {
                 gil_scoped_acquire acq;
@@ -92,9 +105,8 @@ public:
             explicit func_wrapper(func_handle &&hf) noexcept : hfunc(std::move(hf)) {}
             Return operator()(Args... args) const {
                 gil_scoped_acquire acq;
-                object retval(hfunc.f(std::forward<Args>(args)...));
-                /* Visual studio 2015 parser issue: need parentheses around this expression */
-                return (retval.template cast<Return>());
+                // casts the returned object as a rvalue to the return type
+                return hfunc.f(std::forward<Args>(args)...).template cast<Return>();
             }
         };
 
@@ -104,17 +116,21 @@ public:
 
     template <typename Func>
     static handle cast(Func &&f_, return_value_policy policy, handle /* parent */) {
-        if (!f_)
-            return none().inc_ref();
+        if (!f_) {
+            return none().release();
+        }
 
         auto result = f_.template target<function_type>();
-        if (result)
+        if (result) {
             return cpp_function(*result, policy).release();
+        }
         return cpp_function(std::forward<Func>(f_), policy).release();
     }
 
-    PYBIND11_TYPE_CASTER(type, _("Callable[[") + concat(make_caster<Args>::name...) + _("], ")
-                               + make_caster<retval_type>::name + _("]"));
+    PYBIND11_TYPE_CASTER(type,
+                         const_name("Callable[[") + concat(make_caster<Args>::name...)
+                             + const_name("], ") + make_caster<retval_type>::name
+                             + const_name("]"));
 };
 
 PYBIND11_NAMESPACE_END(detail)
diff --git a/ext/pybind11/include/pybind11/gil.h b/ext/pybind11/include/pybind11/gil.h
index b73aaa3f54..cb0028d505 100644
--- a/ext/pybind11/include/pybind11/gil.h
+++ b/ext/pybind11/include/pybind11/gil.h
@@ -10,11 +10,13 @@
 #pragma once
 
 #include "detail/common.h"
-#include "detail/internals.h"
+
+#if defined(WITH_THREAD) && !defined(PYBIND11_SIMPLE_GIL_MANAGEMENT)
+#    include "detail/internals.h"
+#endif
 
 PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
 
-
 PYBIND11_NAMESPACE_BEGIN(detail)
 
 // forward declarations
@@ -22,8 +24,9 @@ PyThreadState *get_thread_state_unchecked();
 
 PYBIND11_NAMESPACE_END(detail)
 
+#if defined(WITH_THREAD)
 
-#if defined(WITH_THREAD) && !defined(PYPY_VERSION)
+#    if !defined(PYBIND11_SIMPLE_GIL_MANAGEMENT)
 
 /* The functions below essentially reproduce the PyGILState_* API using a RAII
  * pattern, but there are a few important differences:
@@ -64,10 +67,11 @@ public:
 
         if (!tstate) {
             tstate = PyThreadState_New(internals.istate);
-            #if !defined(NDEBUG)
-                if (!tstate)
-                    pybind11_fail("scoped_acquire: could not create thread state!");
-            #endif
+#        if defined(PYBIND11_DETAILED_ERROR_MESSAGES)
+            if (!tstate) {
+                pybind11_fail("scoped_acquire: could not create thread state!");
+            }
+#        endif
             tstate->gilstate_counter = 0;
             PYBIND11_TLS_REPLACE_VALUE(internals.tstate, tstate);
         } else {
@@ -81,26 +85,31 @@ public:
         inc_ref();
     }
 
-    void inc_ref() {
-        ++tstate->gilstate_counter;
-    }
+    gil_scoped_acquire(const gil_scoped_acquire &) = delete;
+    gil_scoped_acquire &operator=(const gil_scoped_acquire &) = delete;
+
+    void inc_ref() { ++tstate->gilstate_counter; }
 
     PYBIND11_NOINLINE void dec_ref() {
         --tstate->gilstate_counter;
-        #if !defined(NDEBUG)
-            if (detail::get_thread_state_unchecked() != tstate)
-                pybind11_fail("scoped_acquire::dec_ref(): thread state must be current!");
-            if (tstate->gilstate_counter < 0)
-                pybind11_fail("scoped_acquire::dec_ref(): reference count underflow!");
-        #endif
+#        if defined(PYBIND11_DETAILED_ERROR_MESSAGES)
+        if (detail::get_thread_state_unchecked() != tstate) {
+            pybind11_fail("scoped_acquire::dec_ref(): thread state must be current!");
+        }
+        if (tstate->gilstate_counter < 0) {
+            pybind11_fail("scoped_acquire::dec_ref(): reference count underflow!");
+        }
+#        endif
         if (tstate->gilstate_counter == 0) {
-            #if !defined(NDEBUG)
-                if (!release)
-                    pybind11_fail("scoped_acquire::dec_ref(): internal error!");
-            #endif
+#        if defined(PYBIND11_DETAILED_ERROR_MESSAGES)
+            if (!release) {
+                pybind11_fail("scoped_acquire::dec_ref(): internal error!");
+            }
+#        endif
             PyThreadState_Clear(tstate);
-            if (active)
+            if (active) {
                 PyThreadState_DeleteCurrent();
+            }
             PYBIND11_TLS_DELETE_VALUE(detail::get_internals().tstate);
             release = false;
         }
@@ -111,15 +120,15 @@ public:
     /// could be shutting down when this is called, as thread deletion is not
     /// allowed during shutdown. Check _Py_IsFinalizing() on Python 3.7+, and
     /// protect subsequent code.
-    PYBIND11_NOINLINE void disarm() {
-        active = false;
-    }
+    PYBIND11_NOINLINE void disarm() { active = false; }
 
     PYBIND11_NOINLINE ~gil_scoped_acquire() {
         dec_ref();
-        if (release)
-           PyEval_SaveThread();
+        if (release) {
+            PyEval_SaveThread();
+        }
     }
+
 private:
     PyThreadState *tstate = nullptr;
     bool release = true;
@@ -133,61 +142,98 @@ public:
         // `internals.tstate` for subsequent `gil_scoped_acquire` calls. Otherwise, an
         // initialization race could occur as multiple threads try `gil_scoped_acquire`.
         auto &internals = detail::get_internals();
+        // NOLINTNEXTLINE(cppcoreguidelines-prefer-member-initializer)
         tstate = PyEval_SaveThread();
         if (disassoc) {
+            // Python >= 3.7 can remove this, it's an int before 3.7
+            // NOLINTNEXTLINE(readability-qualified-auto)
             auto key = internals.tstate;
             PYBIND11_TLS_DELETE_VALUE(key);
         }
     }
 
+    gil_scoped_release(const gil_scoped_acquire &) = delete;
+    gil_scoped_release &operator=(const gil_scoped_acquire &) = delete;
+
     /// This method will disable the PyThreadState_DeleteCurrent call and the
     /// GIL won't be acquired. This method should be used if the interpreter
     /// could be shutting down when this is called, as thread deletion is not
     /// allowed during shutdown. Check _Py_IsFinalizing() on Python 3.7+, and
     /// protect subsequent code.
-    PYBIND11_NOINLINE void disarm() {
-        active = false;
-    }
+    PYBIND11_NOINLINE void disarm() { active = false; }
 
     ~gil_scoped_release() {
-        if (!tstate)
+        if (!tstate) {
             return;
+        }
         // `PyEval_RestoreThread()` should not be called if runtime is finalizing
-        if (active)
+        if (active) {
             PyEval_RestoreThread(tstate);
+        }
         if (disassoc) {
+            // Python >= 3.7 can remove this, it's an int before 3.7
+            // NOLINTNEXTLINE(readability-qualified-auto)
             auto key = detail::get_internals().tstate;
             PYBIND11_TLS_REPLACE_VALUE(key, tstate);
         }
     }
+
 private:
     PyThreadState *tstate;
     bool disassoc;
     bool active = true;
 };
-#elif defined(PYPY_VERSION)
+
+#    else // PYBIND11_SIMPLE_GIL_MANAGEMENT
+
 class gil_scoped_acquire {
     PyGILState_STATE state;
+
 public:
-    gil_scoped_acquire() { state = PyGILState_Ensure(); }
+    gil_scoped_acquire() : state{PyGILState_Ensure()} {}
+    gil_scoped_acquire(const gil_scoped_acquire &) = delete;
+    gil_scoped_acquire &operator=(const gil_scoped_acquire &) = delete;
     ~gil_scoped_acquire() { PyGILState_Release(state); }
     void disarm() {}
 };
 
 class gil_scoped_release {
     PyThreadState *state;
+
 public:
-    gil_scoped_release() { state = PyEval_SaveThread(); }
+    gil_scoped_release() : state{PyEval_SaveThread()} {}
+    gil_scoped_release(const gil_scoped_release &) = delete;
+    gil_scoped_release &operator=(const gil_scoped_acquire &) = delete;
     ~gil_scoped_release() { PyEval_RestoreThread(state); }
     void disarm() {}
 };
-#else
+
+#    endif // PYBIND11_SIMPLE_GIL_MANAGEMENT
+
+#else // WITH_THREAD
+
 class gil_scoped_acquire {
+public:
+    gil_scoped_acquire() {
+        // Trick to suppress `unused variable` error messages (at call sites).
+        (void) (this != (this + 1));
+    }
+    gil_scoped_acquire(const gil_scoped_acquire &) = delete;
+    gil_scoped_acquire &operator=(const gil_scoped_acquire &) = delete;
     void disarm() {}
 };
+
 class gil_scoped_release {
+public:
+    gil_scoped_release() {
+        // Trick to suppress `unused variable` error messages (at call sites).
+        (void) (this != (this + 1));
+    }
+    gil_scoped_release(const gil_scoped_release &) = delete;
+    gil_scoped_release &operator=(const gil_scoped_acquire &) = delete;
     void disarm() {}
 };
-#endif
+
+#endif // WITH_THREAD
 
 PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE)
diff --git a/ext/pybind11/include/pybind11/iostream.h b/ext/pybind11/include/pybind11/iostream.h
index 95449a07ba..1878089e31 100644
--- a/ext/pybind11/include/pybind11/iostream.h
+++ b/ext/pybind11/include/pybind11/iostream.h
@@ -58,36 +58,31 @@ private:
     size_t utf8_remainder() const {
         const auto rbase = std::reverse_iterator<char *>(pbase());
         const auto rpptr = std::reverse_iterator<char *>(pptr());
-        auto is_ascii = [](char c) {
-            return (static_cast<unsigned char>(c) & 0x80) == 0x00;
-        };
-        auto is_leading = [](char c) {
-            return (static_cast<unsigned char>(c) & 0xC0) == 0xC0;
-        };
-        auto is_leading_2b = [](char c) {
-            return static_cast<unsigned char>(c) <= 0xDF;
-        };
-        auto is_leading_3b = [](char c) {
-            return static_cast<unsigned char>(c) <= 0xEF;
-        };
+        auto is_ascii = [](char c) { return (static_cast<unsigned char>(c) & 0x80) == 0x00; };
+        auto is_leading = [](char c) { return (static_cast<unsigned char>(c) & 0xC0) == 0xC0; };
+        auto is_leading_2b = [](char c) { return static_cast<unsigned char>(c) <= 0xDF; };
+        auto is_leading_3b = [](char c) { return static_cast<unsigned char>(c) <= 0xEF; };
         // If the last character is ASCII, there are no incomplete code points
-        if (is_ascii(*rpptr))
+        if (is_ascii(*rpptr)) {
             return 0;
+        }
         // Otherwise, work back from the end of the buffer and find the first
         // UTF-8 leading byte
-        const auto rpend   = rbase - rpptr >= 3 ? rpptr + 3 : rbase;
+        const auto rpend = rbase - rpptr >= 3 ? rpptr + 3 : rbase;
         const auto leading = std::find_if(rpptr, rpend, is_leading);
-        if (leading == rbase)
+        if (leading == rbase) {
             return 0;
-        const auto dist    = static_cast<size_t>(leading - rpptr);
-        size_t remainder   = 0;
+        }
+        const auto dist = static_cast<size_t>(leading - rpptr);
+        size_t remainder = 0;
 
-        if (dist == 0)
+        if (dist == 0) {
             remainder = 1; // 1-byte code point is impossible
-        else if (dist == 1)
+        } else if (dist == 1) {
             remainder = is_leading_2b(*leading) ? 0 : dist + 1;
-        else if (dist == 2)
+        } else if (dist == 2) {
             remainder = is_leading_3b(*leading) ? 0 : dist + 1;
+        }
         // else if (dist >= 3), at least 4 bytes before encountering an UTF-8
         // leading byte, either no remainder or invalid UTF-8.
         // Invalid UTF-8 will cause an exception later when converting
@@ -100,27 +95,26 @@ private:
         if (pbase() != pptr()) { // If buffer is not empty
             gil_scoped_acquire tmp;
             // This subtraction cannot be negative, so dropping the sign.
-            auto size        = static_cast<size_t>(pptr() - pbase());
+            auto size = static_cast<size_t>(pptr() - pbase());
             size_t remainder = utf8_remainder();
 
             if (size > remainder) {
                 str line(pbase(), size - remainder);
-                pywrite(line);
+                pywrite(std::move(line));
                 pyflush();
             }
 
             // Copy the remainder at the end of the buffer to the beginning:
-            if (remainder > 0)
+            if (remainder > 0) {
                 std::memmove(pbase(), pptr() - remainder, remainder);
+            }
             setp(pbase(), epptr());
             pbump(static_cast<int>(remainder));
         }
         return 0;
     }
 
-    int sync() override {
-        return _sync();
-    }
+    int sync() override { return _sync(); }
 
 public:
     explicit pythonbuf(const object &pyostream, size_t buffer_size = 1024)
@@ -129,17 +123,14 @@ public:
         setp(d_buffer.get(), d_buffer.get() + buf_size - 1);
     }
 
-    pythonbuf(pythonbuf&&) = default;
+    pythonbuf(pythonbuf &&) = default;
 
     /// Sync before destroy
-    ~pythonbuf() override {
-        _sync();
-    }
+    ~pythonbuf() override { _sync(); }
 };
 
 PYBIND11_NAMESPACE_END(detail)
 
-
 /** \rst
     This a move-only guard that redirects output.
 
@@ -160,7 +151,8 @@ PYBIND11_NAMESPACE_END(detail)
     .. code-block:: cpp
 
         {
-            py::scoped_ostream_redirect output{std::cerr, py::module::import("sys").attr("stderr")};
+            py::scoped_ostream_redirect output{
+                std::cerr, py::module::import("sys").attr("stderr")};
             std::cout << "Hello, World!";
         }
  \endrst */
@@ -178,9 +170,7 @@ public:
         old = costream.rdbuf(&buffer);
     }
 
-    ~scoped_ostream_redirect() {
-        costream.rdbuf(old);
-    }
+    ~scoped_ostream_redirect() { costream.rdbuf(old); }
 
     scoped_ostream_redirect(const scoped_ostream_redirect &) = delete;
     scoped_ostream_redirect(scoped_ostream_redirect &&other) = default;
@@ -188,7 +178,6 @@ public:
     scoped_ostream_redirect &operator=(scoped_ostream_redirect &&) = delete;
 };
 
-
 /** \rst
     Like `scoped_ostream_redirect`, but redirects cerr by default. This class
     is provided primary to make ``py::call_guard`` easier to make.
@@ -208,7 +197,6 @@ public:
         : scoped_ostream_redirect(costream, pyostream) {}
 };
 
-
 PYBIND11_NAMESPACE_BEGIN(detail)
 
 // Class to redirect output as a context manager. C++ backend.
@@ -223,10 +211,12 @@ public:
         : do_stdout_(do_stdout), do_stderr_(do_stderr) {}
 
     void enter() {
-        if (do_stdout_)
+        if (do_stdout_) {
             redirect_stdout.reset(new scoped_ostream_redirect());
-        if (do_stderr_)
+        }
+        if (do_stderr_) {
             redirect_stderr.reset(new scoped_estream_redirect());
+        }
     }
 
     void exit() {
diff --git a/ext/pybind11/include/pybind11/numpy.h b/ext/pybind11/include/pybind11/numpy.h
index b43a771684..8f072af267 100644
--- a/ext/pybind11/include/pybind11/numpy.h
+++ b/ext/pybind11/include/pybind11/numpy.h
@@ -11,19 +11,20 @@
 
 #include "pybind11.h"
 #include "complex.h"
-#include <numeric>
+
 #include <algorithm>
 #include <array>
 #include <cstdint>
 #include <cstdlib>
 #include <cstring>
+#include <functional>
+#include <numeric>
 #include <sstream>
 #include <string>
-#include <functional>
 #include <type_traits>
+#include <typeindex>
 #include <utility>
 #include <vector>
-#include <typeindex>
 
 /* This will be true on all flat address space platforms and allows us to reduce the
    whole npy_intp / ssize_t / Py_intptr_t business down to just ssize_t for all size
@@ -35,13 +36,19 @@ static_assert(std::is_signed<Py_intptr_t>::value, "Py_intptr_t must be signed");
 
 PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
 
+PYBIND11_WARNING_DISABLE_MSVC(4127)
+
 class array; // Forward declaration
 
 PYBIND11_NAMESPACE_BEGIN(detail)
 
-template <> struct handle_type_name<array> { static constexpr auto name = _("numpy.ndarray"); };
+template <>
+struct handle_type_name<array> {
+    static constexpr auto name = const_name("numpy.ndarray");
+};
 
-template <typename type, typename SFINAE = void> struct npy_format_descriptor;
+template <typename type, typename SFINAE = void>
+struct npy_format_descriptor;
 
 struct PyArrayDescr_Proxy {
     PyObject_HEAD
@@ -70,51 +77,59 @@ struct PyArray_Proxy {
 };
 
 struct PyVoidScalarObject_Proxy {
-    PyObject_VAR_HEAD
-    char *obval;
+    PyObject_VAR_HEAD char *obval;
     PyArrayDescr_Proxy *descr;
     int flags;
     PyObject *base;
 };
 
 struct numpy_type_info {
-    PyObject* dtype_ptr;
+    PyObject *dtype_ptr;
     std::string format_str;
 };
 
 struct numpy_internals {
     std::unordered_map<std::type_index, numpy_type_info> registered_dtypes;
 
-    numpy_type_info *get_type_info(const std::type_info& tinfo, bool throw_if_missing = true) {
+    numpy_type_info *get_type_info(const std::type_info &tinfo, bool throw_if_missing = true) {
         auto it = registered_dtypes.find(std::type_index(tinfo));
-        if (it != registered_dtypes.end())
+        if (it != registered_dtypes.end()) {
             return &(it->second);
-        if (throw_if_missing)
+        }
+        if (throw_if_missing) {
             pybind11_fail(std::string("NumPy type info missing for ") + tinfo.name());
+        }
         return nullptr;
     }
 
-    template<typename T> numpy_type_info *get_type_info(bool throw_if_missing = true) {
+    template <typename T>
+    numpy_type_info *get_type_info(bool throw_if_missing = true) {
         return get_type_info(typeid(typename std::remove_cv<T>::type), throw_if_missing);
     }
 };
 
-PYBIND11_NOINLINE void load_numpy_internals(numpy_internals* &ptr) {
+PYBIND11_NOINLINE void load_numpy_internals(numpy_internals *&ptr) {
     ptr = &get_or_create_shared_data<numpy_internals>("_numpy_internals");
 }
 
-inline numpy_internals& get_numpy_internals() {
-    static numpy_internals* ptr = nullptr;
-    if (!ptr)
+inline numpy_internals &get_numpy_internals() {
+    static numpy_internals *ptr = nullptr;
+    if (!ptr) {
         load_numpy_internals(ptr);
+    }
     return *ptr;
 }
 
-template <typename T> struct same_size {
-    template <typename U> using as = bool_constant<sizeof(T) == sizeof(U)>;
+template <typename T>
+struct same_size {
+    template <typename U>
+    using as = bool_constant<sizeof(T) == sizeof(U)>;
 };
 
-template <typename Concrete> constexpr int platform_lookup() { return -1; }
+template <typename Concrete>
+constexpr int platform_lookup() {
+    return -1;
+}
 
 // Lookup a type according to its size, and return a value corresponding to the NumPy typenum.
 template <typename Concrete, typename T, typename... Ts, typename... Ints>
@@ -132,15 +147,26 @@ struct npy_api {
         NPY_ARRAY_ALIGNED_ = 0x0100,
         NPY_ARRAY_WRITEABLE_ = 0x0400,
         NPY_BOOL_ = 0,
-        NPY_BYTE_, NPY_UBYTE_,
-        NPY_SHORT_, NPY_USHORT_,
-        NPY_INT_, NPY_UINT_,
-        NPY_LONG_, NPY_ULONG_,
-        NPY_LONGLONG_, NPY_ULONGLONG_,
-        NPY_FLOAT_, NPY_DOUBLE_, NPY_LONGDOUBLE_,
-        NPY_CFLOAT_, NPY_CDOUBLE_, NPY_CLONGDOUBLE_,
+        NPY_BYTE_,
+        NPY_UBYTE_,
+        NPY_SHORT_,
+        NPY_USHORT_,
+        NPY_INT_,
+        NPY_UINT_,
+        NPY_LONG_,
+        NPY_ULONG_,
+        NPY_LONGLONG_,
+        NPY_ULONGLONG_,
+        NPY_FLOAT_,
+        NPY_DOUBLE_,
+        NPY_LONGDOUBLE_,
+        NPY_CFLOAT_,
+        NPY_CDOUBLE_,
+        NPY_CLONGDOUBLE_,
         NPY_OBJECT_ = 17,
-        NPY_STRING_, NPY_UNICODE_, NPY_VOID_,
+        NPY_STRING_,
+        NPY_UNICODE_,
+        NPY_VOID_,
         // Platform-dependent normalization
         NPY_INT8_ = NPY_BYTE_,
         NPY_UINT8_ = NPY_UBYTE_,
@@ -149,13 +175,14 @@ struct npy_api {
         // `npy_common.h` defines the integer aliases. In order, it checks:
         // NPY_BITSOF_LONG, NPY_BITSOF_LONGLONG, NPY_BITSOF_INT, NPY_BITSOF_SHORT, NPY_BITSOF_CHAR
         // and assigns the alias to the first matching size, so we should check in this order.
-        NPY_INT32_ = platform_lookup<std::int32_t, long, int, short>(
-            NPY_LONG_, NPY_INT_, NPY_SHORT_),
+        NPY_INT32_
+        = platform_lookup<std::int32_t, long, int, short>(NPY_LONG_, NPY_INT_, NPY_SHORT_),
         NPY_UINT32_ = platform_lookup<std::uint32_t, unsigned long, unsigned int, unsigned short>(
             NPY_ULONG_, NPY_UINT_, NPY_USHORT_),
-        NPY_INT64_ = platform_lookup<std::int64_t, long, long long, int>(
-            NPY_LONG_, NPY_LONGLONG_, NPY_INT_),
-        NPY_UINT64_ = platform_lookup<std::uint64_t, unsigned long, unsigned long long, unsigned int>(
+        NPY_INT64_
+        = platform_lookup<std::int64_t, long, long long, int>(NPY_LONG_, NPY_LONGLONG_, NPY_INT_),
+        NPY_UINT64_
+        = platform_lookup<std::uint64_t, unsigned long, unsigned long long, unsigned int>(
             NPY_ULONG_, NPY_ULONGLONG_, NPY_UINT_),
     };
 
@@ -164,23 +191,28 @@ struct npy_api {
         int len;
     };
 
-    static npy_api& get() {
+    static npy_api &get() {
         static npy_api api = lookup();
         return api;
     }
 
     bool PyArray_Check_(PyObject *obj) const {
-        return (bool) PyObject_TypeCheck(obj, PyArray_Type_);
+        return PyObject_TypeCheck(obj, PyArray_Type_) != 0;
     }
     bool PyArrayDescr_Check_(PyObject *obj) const {
-        return (bool) PyObject_TypeCheck(obj, PyArrayDescr_Type_);
+        return PyObject_TypeCheck(obj, PyArrayDescr_Type_) != 0;
     }
 
     unsigned int (*PyArray_GetNDArrayCFeatureVersion_)();
     PyObject *(*PyArray_DescrFromType_)(int);
-    PyObject *(*PyArray_NewFromDescr_)
-        (PyTypeObject *, PyObject *, int, Py_intptr_t const *,
-         Py_intptr_t const *, void *, int, PyObject *);
+    PyObject *(*PyArray_NewFromDescr_)(PyTypeObject *,
+                                       PyObject *,
+                                       int,
+                                       Py_intptr_t const *,
+                                       Py_intptr_t const *,
+                                       void *,
+                                       int,
+                                       PyObject *);
     // Unused. Not removed because that affects ABI of the class.
     PyObject *(*PyArray_DescrNewFromType_)(int);
     int (*PyArray_CopyInto_)(PyObject *, PyObject *);
@@ -189,17 +221,23 @@ struct npy_api {
     PyTypeObject *PyVoidArrType_Type_;
     PyTypeObject *PyArrayDescr_Type_;
     PyObject *(*PyArray_DescrFromScalar_)(PyObject *);
-    PyObject *(*PyArray_FromAny_) (PyObject *, PyObject *, int, int, int, PyObject *);
-    int (*PyArray_DescrConverter_) (PyObject *, PyObject **);
-    bool (*PyArray_EquivTypes_) (PyObject *, PyObject *);
-    int (*PyArray_GetArrayParamsFromObject_)(PyObject *, PyObject *, unsigned char, PyObject **, int *,
-                                             Py_intptr_t *, PyObject **, PyObject *);
+    PyObject *(*PyArray_FromAny_)(PyObject *, PyObject *, int, int, int, PyObject *);
+    int (*PyArray_DescrConverter_)(PyObject *, PyObject **);
+    bool (*PyArray_EquivTypes_)(PyObject *, PyObject *);
+    int (*PyArray_GetArrayParamsFromObject_)(PyObject *,
+                                             PyObject *,
+                                             unsigned char,
+                                             PyObject **,
+                                             int *,
+                                             Py_intptr_t *,
+                                             PyObject **,
+                                             PyObject *);
     PyObject *(*PyArray_Squeeze_)(PyObject *);
     // Unused. Not removed because that affects ABI of the class.
     int (*PyArray_SetBaseObject_)(PyObject *, PyObject *);
-    PyObject* (*PyArray_Resize_)(PyObject*, PyArray_Dims*, int, int);
-    PyObject* (*PyArray_Newshape_)(PyObject*, PyArray_Dims*, int);
-    PyObject* (*PyArray_View_)(PyObject*, PyObject*, PyObject*);
+    PyObject *(*PyArray_Resize_)(PyObject *, PyArray_Dims *, int, int);
+    PyObject *(*PyArray_Newshape_)(PyObject *, PyArray_Dims *, int);
+    PyObject *(*PyArray_View_)(PyObject *, PyObject *, PyObject *);
 
 private:
     enum functions {
@@ -227,16 +265,13 @@ private:
     static npy_api lookup() {
         module_ m = module_::import("numpy.core.multiarray");
         auto c = m.attr("_ARRAY_API");
-#if PY_MAJOR_VERSION >= 3
-        void **api_ptr = (void **) PyCapsule_GetPointer(c.ptr(), NULL);
-#else
-        void **api_ptr = (void **) PyCObject_AsVoidPtr(c.ptr());
-#endif
+        void **api_ptr = (void **) PyCapsule_GetPointer(c.ptr(), nullptr);
         npy_api api;
 #define DECL_NPY_API(Func) api.Func##_ = (decltype(api.Func##_)) api_ptr[API_##Func];
         DECL_NPY_API(PyArray_GetNDArrayCFeatureVersion);
-        if (api.PyArray_GetNDArrayCFeatureVersion_() < 0x7)
+        if (api.PyArray_GetNDArrayCFeatureVersion_() < 0x7) {
             pybind11_fail("pybind11 numpy support requires numpy >= 1.7.0");
+        }
         DECL_NPY_API(PyArray_Type);
         DECL_NPY_API(PyVoidArrType_Type);
         DECL_NPY_API(PyArrayDescr_Type);
@@ -261,85 +296,103 @@ private:
     }
 };
 
-inline PyArray_Proxy* array_proxy(void* ptr) {
-    return reinterpret_cast<PyArray_Proxy*>(ptr);
+inline PyArray_Proxy *array_proxy(void *ptr) { return reinterpret_cast<PyArray_Proxy *>(ptr); }
+
+inline const PyArray_Proxy *array_proxy(const void *ptr) {
+    return reinterpret_cast<const PyArray_Proxy *>(ptr);
 }
 
-inline const PyArray_Proxy* array_proxy(const void* ptr) {
-    return reinterpret_cast<const PyArray_Proxy*>(ptr);
+inline PyArrayDescr_Proxy *array_descriptor_proxy(PyObject *ptr) {
+    return reinterpret_cast<PyArrayDescr_Proxy *>(ptr);
 }
 
-inline PyArrayDescr_Proxy* array_descriptor_proxy(PyObject* ptr) {
-   return reinterpret_cast<PyArrayDescr_Proxy*>(ptr);
+inline const PyArrayDescr_Proxy *array_descriptor_proxy(const PyObject *ptr) {
+    return reinterpret_cast<const PyArrayDescr_Proxy *>(ptr);
 }
 
-inline const PyArrayDescr_Proxy* array_descriptor_proxy(const PyObject* ptr) {
-   return reinterpret_cast<const PyArrayDescr_Proxy*>(ptr);
-}
-
-inline bool check_flags(const void* ptr, int flag) {
+inline bool check_flags(const void *ptr, int flag) {
     return (flag == (array_proxy(ptr)->flags & flag));
 }
 
-template <typename T> struct is_std_array : std::false_type { };
-template <typename T, size_t N> struct is_std_array<std::array<T, N>> : std::true_type { };
-template <typename T> struct is_complex : std::false_type { };
-template <typename T> struct is_complex<std::complex<T>> : std::true_type { };
+template <typename T>
+struct is_std_array : std::false_type {};
+template <typename T, size_t N>
+struct is_std_array<std::array<T, N>> : std::true_type {};
+template <typename T>
+struct is_complex : std::false_type {};
+template <typename T>
+struct is_complex<std::complex<T>> : std::true_type {};
 
-template <typename T> struct array_info_scalar {
+template <typename T>
+struct array_info_scalar {
     using type = T;
     static constexpr bool is_array = false;
     static constexpr bool is_empty = false;
-    static constexpr auto extents = _("");
-    static void append_extents(list& /* shape */) { }
+    static constexpr auto extents = const_name("");
+    static void append_extents(list & /* shape */) {}
 };
 // Computes underlying type and a comma-separated list of extents for array
 // types (any mix of std::array and built-in arrays). An array of char is
 // treated as scalar because it gets special handling.
-template <typename T> struct array_info : array_info_scalar<T> { };
-template <typename T, size_t N> struct array_info<std::array<T, N>> {
+template <typename T>
+struct array_info : array_info_scalar<T> {};
+template <typename T, size_t N>
+struct array_info<std::array<T, N>> {
     using type = typename array_info<T>::type;
     static constexpr bool is_array = true;
     static constexpr bool is_empty = (N == 0) || array_info<T>::is_empty;
     static constexpr size_t extent = N;
 
     // appends the extents to shape
-    static void append_extents(list& shape) {
+    static void append_extents(list &shape) {
         shape.append(N);
         array_info<T>::append_extents(shape);
     }
 
-    static constexpr auto extents = _<array_info<T>::is_array>(
-        concat(_<N>(), array_info<T>::extents), _<N>()
-    );
+    static constexpr auto extents = const_name<array_info<T>::is_array>(
+        concat(const_name<N>(), array_info<T>::extents), const_name<N>());
 };
 // For numpy we have special handling for arrays of characters, so we don't include
 // the size in the array extents.
-template <size_t N> struct array_info<char[N]> : array_info_scalar<char[N]> { };
-template <size_t N> struct array_info<std::array<char, N>> : array_info_scalar<std::array<char, N>> { };
-template <typename T, size_t N> struct array_info<T[N]> : array_info<std::array<T, N>> { };
-template <typename T> using remove_all_extents_t = typename array_info<T>::type;
+template <size_t N>
+struct array_info<char[N]> : array_info_scalar<char[N]> {};
+template <size_t N>
+struct array_info<std::array<char, N>> : array_info_scalar<std::array<char, N>> {};
+template <typename T, size_t N>
+struct array_info<T[N]> : array_info<std::array<T, N>> {};
+template <typename T>
+using remove_all_extents_t = typename array_info<T>::type;
 
-template <typename T> using is_pod_struct = all_of<
-    std::is_standard_layout<T>,     // since we're accessing directly in memory we need a standard layout type
-#if defined(__GLIBCXX__) && (__GLIBCXX__ < 20150422 || __GLIBCXX__ == 20150426 || __GLIBCXX__ == 20150623 || __GLIBCXX__ == 20150626 || __GLIBCXX__ == 20160803)
-    // libstdc++ < 5 (including versions 4.8.5, 4.9.3 and 4.9.4 which were released after 5)
-    // don't implement is_trivially_copyable, so approximate it
-    std::is_trivially_destructible<T>,
-    satisfies_any_of<T, std::has_trivial_copy_constructor, std::has_trivial_copy_assign>,
+template <typename T>
+using is_pod_struct
+    = all_of<std::is_standard_layout<T>, // since we're accessing directly in memory
+                                         // we need a standard layout type
+#if defined(__GLIBCXX__)                                                                          \
+    && (__GLIBCXX__ < 20150422 || __GLIBCXX__ == 20150426 || __GLIBCXX__ == 20150623              \
+        || __GLIBCXX__ == 20150626 || __GLIBCXX__ == 20160803)
+             // libstdc++ < 5 (including versions 4.8.5, 4.9.3 and 4.9.4 which were released after
+             // 5) don't implement is_trivially_copyable, so approximate it
+             std::is_trivially_destructible<T>,
+             satisfies_any_of<T, std::has_trivial_copy_constructor, std::has_trivial_copy_assign>,
 #else
-    std::is_trivially_copyable<T>,
+             std::is_trivially_copyable<T>,
 #endif
-    satisfies_none_of<T, std::is_reference, std::is_array, is_std_array, std::is_arithmetic, is_complex, std::is_enum>
->;
+             satisfies_none_of<T,
+                               std::is_reference,
+                               std::is_array,
+                               is_std_array,
+                               std::is_arithmetic,
+                               is_complex,
+                               std::is_enum>>;
 
 // Replacement for std::is_pod (deprecated in C++20)
-template <typename T> using is_pod = all_of<
-    std::is_standard_layout<T>,
-    std::is_trivial<T>
->;
+template <typename T>
+using is_pod = all_of<std::is_standard_layout<T>, std::is_trivial<T>>;
 
-template <ssize_t Dim = 0, typename Strides> ssize_t byte_offset_unsafe(const Strides &) { return 0; }
+template <ssize_t Dim = 0, typename Strides>
+ssize_t byte_offset_unsafe(const Strides &) {
+    return 0;
+}
 template <ssize_t Dim = 0, typename Strides, typename... Ix>
 ssize_t byte_offset_unsafe(const Strides &strides, ssize_t i, Ix... index) {
     return i * strides[Dim] + byte_offset_unsafe<Dim + 1>(strides, index...);
@@ -347,7 +400,7 @@ ssize_t byte_offset_unsafe(const Strides &strides, ssize_t i, Ix... index) {
 
 /**
  * Proxy class providing unsafe, unchecked const access to array data.  This is constructed through
- * the `unchecked<T, N>()` method of `array` or the `unchecked<N>()` method of `array_t<T>`.  `Dims`
+ * the `unchecked<T, N>()` method of `array` or the `unchecked<N>()` method of `array_t<T>`. `Dims`
  * will be -1 for dimensions determined at runtime.
  */
 template <typename T, ssize_t Dims>
@@ -357,15 +410,17 @@ protected:
     const unsigned char *data_;
     // Storing the shape & strides in local variables (i.e. these arrays) allows the compiler to
     // make large performance gains on big, nested loops, but requires compile-time dimensions
-    conditional_t<Dynamic, const ssize_t *, std::array<ssize_t, (size_t) Dims>>
-            shape_, strides_;
+    conditional_t<Dynamic, const ssize_t *, std::array<ssize_t, (size_t) Dims>> shape_, strides_;
     const ssize_t dims_;
 
     friend class pybind11::array;
     // Constructor for compile-time dimensions:
     template <bool Dyn = Dynamic>
-    unchecked_reference(const void *data, const ssize_t *shape, const ssize_t *strides, enable_if_t<!Dyn, ssize_t>)
-    : data_{reinterpret_cast<const unsigned char *>(data)}, dims_{Dims} {
+    unchecked_reference(const void *data,
+                        const ssize_t *shape,
+                        const ssize_t *strides,
+                        enable_if_t<!Dyn, ssize_t>)
+        : data_{reinterpret_cast<const unsigned char *>(data)}, dims_{Dims} {
         for (size_t i = 0; i < (size_t) dims_; i++) {
             shape_[i] = shape[i];
             strides_[i] = strides[i];
@@ -373,8 +428,12 @@ protected:
     }
     // Constructor for runtime dimensions:
     template <bool Dyn = Dynamic>
-    unchecked_reference(const void *data, const ssize_t *shape, const ssize_t *strides, enable_if_t<Dyn, ssize_t> dims)
-    : data_{reinterpret_cast<const unsigned char *>(data)}, shape_{shape}, strides_{strides}, dims_{dims} {}
+    unchecked_reference(const void *data,
+                        const ssize_t *shape,
+                        const ssize_t *strides,
+                        enable_if_t<Dyn, ssize_t> dims)
+        : data_{reinterpret_cast<const unsigned char *>(data)}, shape_{shape}, strides_{strides},
+          dims_{dims} {}
 
 public:
     /**
@@ -382,20 +441,27 @@ public:
      * number of dimensions, this requires the correct number of arguments; for run-time
      * dimensionality, this is not checked (and so is up to the caller to use safely).
      */
-    template <typename... Ix> const T &operator()(Ix... index) const {
+    template <typename... Ix>
+    const T &operator()(Ix... index) const {
         static_assert(ssize_t{sizeof...(Ix)} == Dims || Dynamic,
-                "Invalid number of indices for unchecked array reference");
-        return *reinterpret_cast<const T *>(data_ + byte_offset_unsafe(strides_, ssize_t(index)...));
+                      "Invalid number of indices for unchecked array reference");
+        return *reinterpret_cast<const T *>(data_
+                                            + byte_offset_unsafe(strides_, ssize_t(index)...));
     }
     /**
      * Unchecked const reference access to data; this operator only participates if the reference
      * is to a 1-dimensional array.  When present, this is exactly equivalent to `obj(index)`.
      */
     template <ssize_t D = Dims, typename = enable_if_t<D == 1 || Dynamic>>
-    const T &operator[](ssize_t index) const { return operator()(index); }
+    const T &operator[](ssize_t index) const {
+        return operator()(index);
+    }
 
     /// Pointer access to the data at the given indices.
-    template <typename... Ix> const T *data(Ix... ix) const { return &operator()(ssize_t(ix)...); }
+    template <typename... Ix>
+    const T *data(Ix... ix) const {
+        return &operator()(ssize_t(ix)...);
+    }
 
     /// Returns the item size, i.e. sizeof(T)
     constexpr static ssize_t itemsize() { return sizeof(T); }
@@ -406,21 +472,22 @@ public:
     /// Returns the number of dimensions of the array
     ssize_t ndim() const { return dims_; }
 
-    /// Returns the total number of elements in the referenced array, i.e. the product of the shapes
+    /// Returns the total number of elements in the referenced array, i.e. the product of the
+    /// shapes
     template <bool Dyn = Dynamic>
     enable_if_t<!Dyn, ssize_t> size() const {
-        return std::accumulate(shape_.begin(), shape_.end(), (ssize_t) 1, std::multiplies<ssize_t>());
+        return std::accumulate(
+            shape_.begin(), shape_.end(), (ssize_t) 1, std::multiplies<ssize_t>());
     }
     template <bool Dyn = Dynamic>
     enable_if_t<Dyn, ssize_t> size() const {
         return std::accumulate(shape_, shape_ + ndim(), (ssize_t) 1, std::multiplies<ssize_t>());
     }
 
-    /// Returns the total number of bytes used by the referenced data.  Note that the actual span in
-    /// memory may be larger if the referenced array has non-contiguous strides (e.g. for a slice).
-    ssize_t nbytes() const {
-        return size() * itemsize();
-    }
+    /// Returns the total number of bytes used by the referenced data.  Note that the actual span
+    /// in memory may be larger if the referenced array has non-contiguous strides (e.g. for a
+    /// slice).
+    ssize_t nbytes() const { return size() * itemsize(); }
 };
 
 template <typename T, ssize_t Dims>
@@ -429,15 +496,17 @@ class unchecked_mutable_reference : public unchecked_reference<T, Dims> {
     using ConstBase = unchecked_reference<T, Dims>;
     using ConstBase::ConstBase;
     using ConstBase::Dynamic;
+
 public:
     // Bring in const-qualified versions from base class
     using ConstBase::operator();
     using ConstBase::operator[];
 
     /// Mutable, unchecked access to data at the given indices.
-    template <typename... Ix> T& operator()(Ix... index) {
+    template <typename... Ix>
+    T &operator()(Ix... index) {
         static_assert(ssize_t{sizeof...(Ix)} == Dims || Dynamic,
-                "Invalid number of indices for unchecked array reference");
+                      "Invalid number of indices for unchecked array reference");
         return const_cast<T &>(ConstBase::operator()(index...));
     }
     /**
@@ -446,38 +515,45 @@ public:
      * exactly equivalent to `obj(index)`.
      */
     template <ssize_t D = Dims, typename = enable_if_t<D == 1 || Dynamic>>
-    T &operator[](ssize_t index) { return operator()(index); }
+    T &operator[](ssize_t index) {
+        return operator()(index);
+    }
 
     /// Mutable pointer access to the data at the given indices.
-    template <typename... Ix> T *mutable_data(Ix... ix) { return &operator()(ssize_t(ix)...); }
+    template <typename... Ix>
+    T *mutable_data(Ix... ix) {
+        return &operator()(ssize_t(ix)...);
+    }
 };
 
 template <typename T, ssize_t Dim>
 struct type_caster<unchecked_reference<T, Dim>> {
-    static_assert(Dim == 0 && Dim > 0 /* always fail */, "unchecked array proxy object is not castable");
+    static_assert(Dim == 0 && Dim > 0 /* always fail */,
+                  "unchecked array proxy object is not castable");
 };
 template <typename T, ssize_t Dim>
-struct type_caster<unchecked_mutable_reference<T, Dim>> : type_caster<unchecked_reference<T, Dim>> {};
+struct type_caster<unchecked_mutable_reference<T, Dim>>
+    : type_caster<unchecked_reference<T, Dim>> {};
 
 PYBIND11_NAMESPACE_END(detail)
 
 class dtype : public object {
 public:
-    PYBIND11_OBJECT_DEFAULT(dtype, object, detail::npy_api::get().PyArrayDescr_Check_);
+    PYBIND11_OBJECT_DEFAULT(dtype, object, detail::npy_api::get().PyArrayDescr_Check_)
 
     explicit dtype(const buffer_info &info) {
-        dtype descr(_dtype_from_pep3118()(PYBIND11_STR_TYPE(info.format)));
+        dtype descr(_dtype_from_pep3118()(pybind11::str(info.format)));
         // If info.itemsize == 0, use the value calculated from the format string
         m_ptr = descr.strip_padding(info.itemsize != 0 ? info.itemsize : descr.itemsize())
                     .release()
                     .ptr();
     }
 
-    explicit dtype(const std::string &format) {
-        m_ptr = from_args(pybind11::str(format)).release().ptr();
-    }
+    explicit dtype(const pybind11::str &format) : dtype(from_args(format)) {}
 
-    explicit dtype(const char *format) : dtype(std::string(format)) {}
+    explicit dtype(const std::string &format) : dtype(pybind11::str(format)) {}
+
+    explicit dtype(const char *format) : dtype(pybind11::str(format)) {}
 
     dtype(list names, list formats, list offsets, ssize_t itemsize) {
         dict args;
@@ -485,37 +561,40 @@ public:
         args["formats"] = std::move(formats);
         args["offsets"] = std::move(offsets);
         args["itemsize"] = pybind11::int_(itemsize);
-        m_ptr = from_args(std::move(args)).release().ptr();
+        m_ptr = from_args(args).release().ptr();
+    }
+
+    explicit dtype(int typenum)
+        : object(detail::npy_api::get().PyArray_DescrFromType_(typenum), stolen_t{}) {
+        if (m_ptr == nullptr) {
+            throw error_already_set();
+        }
     }
 
     /// This is essentially the same as calling numpy.dtype(args) in Python.
-    static dtype from_args(object args) {
+    static dtype from_args(const object &args) {
         PyObject *ptr = nullptr;
-        if ((detail::npy_api::get().PyArray_DescrConverter_(args.ptr(), &ptr) == 0) || !ptr)
+        if ((detail::npy_api::get().PyArray_DescrConverter_(args.ptr(), &ptr) == 0) || !ptr) {
             throw error_already_set();
+        }
         return reinterpret_steal<dtype>(ptr);
     }
 
     /// Return dtype associated with a C++ type.
-    template <typename T> static dtype of() {
+    template <typename T>
+    static dtype of() {
         return detail::npy_format_descriptor<typename std::remove_cv<T>::type>::dtype();
     }
 
     /// Size of the data type in bytes.
-    ssize_t itemsize() const {
-        return detail::array_descriptor_proxy(m_ptr)->elsize;
-    }
+    ssize_t itemsize() const { return detail::array_descriptor_proxy(m_ptr)->elsize; }
 
     /// Returns true for structured data types.
-    bool has_fields() const {
-        return detail::array_descriptor_proxy(m_ptr)->names != nullptr;
-    }
+    bool has_fields() const { return detail::array_descriptor_proxy(m_ptr)->names != nullptr; }
 
     /// Single-character code for dtype's kind.
     /// For example, floating point types are 'f' and integral types are 'i'.
-    char kind() const {
-        return detail::array_descriptor_proxy(m_ptr)->kind;
-    }
+    char kind() const { return detail::array_descriptor_proxy(m_ptr)->kind; }
 
     /// Single-character for dtype's type.
     /// For example, ``float`` is 'f', ``double`` 'd', ``int`` 'i', and ``long`` 'l'.
@@ -526,42 +605,75 @@ public:
         return detail::array_descriptor_proxy(m_ptr)->type;
     }
 
+    /// type number of dtype.
+    int num() const {
+        // Note: The signature, `dtype::num` follows the naming of NumPy's public
+        // Python API (i.e., ``dtype.num``), rather than its internal
+        // C API (``PyArray_Descr::type_num``).
+        return detail::array_descriptor_proxy(m_ptr)->type_num;
+    }
+
+    /// Single character for byteorder
+    char byteorder() const { return detail::array_descriptor_proxy(m_ptr)->byteorder; }
+
+    /// Alignment of the data type
+    int alignment() const { return detail::array_descriptor_proxy(m_ptr)->alignment; }
+
+    /// Flags for the array descriptor
+    char flags() const { return detail::array_descriptor_proxy(m_ptr)->flags; }
+
 private:
     static object _dtype_from_pep3118() {
         static PyObject *obj = module_::import("numpy.core._internal")
-            .attr("_dtype_from_pep3118").cast<object>().release().ptr();
+                                   .attr("_dtype_from_pep3118")
+                                   .cast<object>()
+                                   .release()
+                                   .ptr();
         return reinterpret_borrow<object>(obj);
     }
 
     dtype strip_padding(ssize_t itemsize) {
         // Recursively strip all void fields with empty names that are generated for
         // padding fields (as of NumPy v1.11).
-        if (!has_fields())
+        if (!has_fields()) {
             return *this;
-
-        struct field_descr { PYBIND11_STR_TYPE name; object format; pybind11::int_ offset; };
-        std::vector<field_descr> field_descriptors;
-
-        for (auto field : attr("fields").attr("items")()) {
-            auto spec = field.cast<tuple>();
-            auto name = spec[0].cast<pybind11::str>();
-            auto format = spec[1].cast<tuple>()[0].cast<dtype>();
-            auto offset = spec[1].cast<tuple>()[1].cast<pybind11::int_>();
-            if ((len(name) == 0u) && format.kind() == 'V')
-                continue;
-            field_descriptors.push_back({(PYBIND11_STR_TYPE) name, format.strip_padding(format.itemsize()), offset});
         }
 
-        std::sort(field_descriptors.begin(), field_descriptors.end(),
-                  [](const field_descr& a, const field_descr& b) {
+        struct field_descr {
+            pybind11::str name;
+            object format;
+            pybind11::int_ offset;
+            field_descr(pybind11::str &&name, object &&format, pybind11::int_ &&offset)
+                : name{std::move(name)}, format{std::move(format)}, offset{std::move(offset)} {};
+        };
+        auto field_dict = attr("fields").cast<dict>();
+        std::vector<field_descr> field_descriptors;
+        field_descriptors.reserve(field_dict.size());
+
+        for (auto field : field_dict.attr("items")()) {
+            auto spec = field.cast<tuple>();
+            auto name = spec[0].cast<pybind11::str>();
+            auto spec_fo = spec[1].cast<tuple>();
+            auto format = spec_fo[0].cast<dtype>();
+            auto offset = spec_fo[1].cast<pybind11::int_>();
+            if ((len(name) == 0u) && format.kind() == 'V') {
+                continue;
+            }
+            field_descriptors.emplace_back(
+                std::move(name), format.strip_padding(format.itemsize()), std::move(offset));
+        }
+
+        std::sort(field_descriptors.begin(),
+                  field_descriptors.end(),
+                  [](const field_descr &a, const field_descr &b) {
                       return a.offset.cast<int>() < b.offset.cast<int>();
                   });
 
         list names, formats, offsets;
-        for (auto& descr : field_descriptors) {
-            names.append(descr.name);
-            formats.append(descr.format);
-            offsets.append(descr.offset);
+        for (auto &descr : field_descriptors) {
+            names.append(std::move(descr.name));
+            formats.append(std::move(descr.format));
+            offsets.append(std::move(descr.offset));
         }
         return dtype(std::move(names), std::move(formats), std::move(offsets), itemsize);
     }
@@ -583,66 +695,85 @@ public:
     using StridesContainer = detail::any_container<ssize_t>;
 
     // Constructs an array taking shape/strides from arbitrary container types
-    array(const pybind11::dtype &dt, ShapeContainer shape, StridesContainer strides,
-          const void *ptr = nullptr, handle base = handle()) {
+    array(const pybind11::dtype &dt,
+          ShapeContainer shape,
+          StridesContainer strides,
+          const void *ptr = nullptr,
+          handle base = handle()) {
 
-        if (strides->empty())
+        if (strides->empty()) {
             *strides = detail::c_strides(*shape, dt.itemsize());
+        }
 
         auto ndim = shape->size();
-        if (ndim != strides->size())
+        if (ndim != strides->size()) {
             pybind11_fail("NumPy: shape ndim doesn't match strides ndim");
+        }
         auto descr = dt;
 
         int flags = 0;
         if (base && ptr) {
-            if (isinstance<array>(base))
+            if (isinstance<array>(base)) {
                 /* Copy flags from base (except ownership bit) */
-                flags = reinterpret_borrow<array>(base).flags() & ~detail::npy_api::NPY_ARRAY_OWNDATA_;
-            else
+                flags = reinterpret_borrow<array>(base).flags()
+                        & ~detail::npy_api::NPY_ARRAY_OWNDATA_;
+            } else {
                 /* Writable by default, easy to downgrade later on if needed */
                 flags = detail::npy_api::NPY_ARRAY_WRITEABLE_;
+            }
         }
 
         auto &api = detail::npy_api::get();
         auto tmp = reinterpret_steal<object>(api.PyArray_NewFromDescr_(
-            api.PyArray_Type_, descr.release().ptr(), (int) ndim,
+            api.PyArray_Type_,
+            descr.release().ptr(),
+            (int) ndim,
             // Use reinterpret_cast for PyPy on Windows (remove if fixed, checked on 7.3.1)
-            reinterpret_cast<Py_intptr_t*>(shape->data()),
-            reinterpret_cast<Py_intptr_t*>(strides->data()),
-            const_cast<void *>(ptr), flags, nullptr));
-        if (!tmp)
+            reinterpret_cast<Py_intptr_t *>(shape->data()),
+            reinterpret_cast<Py_intptr_t *>(strides->data()),
+            const_cast<void *>(ptr),
+            flags,
+            nullptr));
+        if (!tmp) {
             throw error_already_set();
+        }
         if (ptr) {
             if (base) {
                 api.PyArray_SetBaseObject_(tmp.ptr(), base.inc_ref().ptr());
             } else {
-                tmp = reinterpret_steal<object>(api.PyArray_NewCopy_(tmp.ptr(), -1 /* any order */));
+                tmp = reinterpret_steal<object>(
+                    api.PyArray_NewCopy_(tmp.ptr(), -1 /* any order */));
             }
         }
         m_ptr = tmp.release().ptr();
     }
 
-    array(const pybind11::dtype &dt, ShapeContainer shape, const void *ptr = nullptr, handle base = handle())
-        : array(dt, std::move(shape), {}, ptr, base) { }
+    array(const pybind11::dtype &dt,
+          ShapeContainer shape,
+          const void *ptr = nullptr,
+          handle base = handle())
+        : array(dt, std::move(shape), {}, ptr, base) {}
 
-    template <typename T, typename = detail::enable_if_t<std::is_integral<T>::value && !std::is_same<bool, T>::value>>
+    template <typename T,
+              typename
+              = detail::enable_if_t<std::is_integral<T>::value && !std::is_same<bool, T>::value>>
     array(const pybind11::dtype &dt, T count, const void *ptr = nullptr, handle base = handle())
-        : array(dt, {{count}}, ptr, base) { }
+        : array(dt, {{count}}, ptr, base) {}
 
     template <typename T>
     array(ShapeContainer shape, StridesContainer strides, const T *ptr, handle base = handle())
-        : array(pybind11::dtype::of<T>(), std::move(shape), std::move(strides), ptr, base) { }
+        : array(pybind11::dtype::of<T>(), std::move(shape), std::move(strides), ptr, base) {}
 
     template <typename T>
     array(ShapeContainer shape, const T *ptr, handle base = handle())
-        : array(std::move(shape), {}, ptr, base) { }
+        : array(std::move(shape), {}, ptr, base) {}
 
     template <typename T>
-    explicit array(ssize_t count, const T *ptr, handle base = handle()) : array({count}, {}, ptr, base) { }
+    explicit array(ssize_t count, const T *ptr, handle base = handle())
+        : array({count}, {}, ptr, base) {}
 
     explicit array(const buffer_info &info, handle base = handle())
-    : array(pybind11::dtype(info), info.shape, info.strides, info.ptr, base) { }
+        : array(pybind11::dtype(info), info.shape, info.strides, info.ptr, base) {}
 
     /// Array descriptor (dtype)
     pybind11::dtype dtype() const {
@@ -660,48 +791,38 @@ public:
     }
 
     /// Total number of bytes
-    ssize_t nbytes() const {
-        return size() * itemsize();
-    }
+    ssize_t nbytes() const { return size() * itemsize(); }
 
     /// Number of dimensions
-    ssize_t ndim() const {
-        return detail::array_proxy(m_ptr)->nd;
-    }
+    ssize_t ndim() const { return detail::array_proxy(m_ptr)->nd; }
 
     /// Base object
-    object base() const {
-        return reinterpret_borrow<object>(detail::array_proxy(m_ptr)->base);
-    }
+    object base() const { return reinterpret_borrow<object>(detail::array_proxy(m_ptr)->base); }
 
     /// Dimensions of the array
-    const ssize_t* shape() const {
-        return detail::array_proxy(m_ptr)->dimensions;
-    }
+    const ssize_t *shape() const { return detail::array_proxy(m_ptr)->dimensions; }
 
     /// Dimension along a given axis
     ssize_t shape(ssize_t dim) const {
-        if (dim >= ndim())
+        if (dim >= ndim()) {
             fail_dim_check(dim, "invalid axis");
+        }
         return shape()[dim];
     }
 
     /// Strides of the array
-    const ssize_t* strides() const {
-        return detail::array_proxy(m_ptr)->strides;
-    }
+    const ssize_t *strides() const { return detail::array_proxy(m_ptr)->strides; }
 
     /// Stride along a given axis
     ssize_t strides(ssize_t dim) const {
-        if (dim >= ndim())
+        if (dim >= ndim()) {
             fail_dim_check(dim, "invalid axis");
+        }
         return strides()[dim];
     }
 
     /// Return the NumPy array flags
-    int flags() const {
-        return detail::array_proxy(m_ptr)->flags;
-    }
+    int flags() const { return detail::array_proxy(m_ptr)->flags; }
 
     /// If set, the array is writeable (otherwise the buffer is read-only)
     bool writeable() const {
@@ -715,23 +836,27 @@ public:
 
     /// Pointer to the contained data. If index is not provided, points to the
     /// beginning of the buffer. May throw if the index would lead to out of bounds access.
-    template<typename... Ix> const void* data(Ix... index) const {
+    template <typename... Ix>
+    const void *data(Ix... index) const {
         return static_cast<const void *>(detail::array_proxy(m_ptr)->data + offset_at(index...));
     }
 
     /// Mutable pointer to the contained data. If index is not provided, points to the
     /// beginning of the buffer. May throw if the index would lead to out of bounds access.
     /// May throw if the array is not writeable.
-    template<typename... Ix> void* mutable_data(Ix... index) {
+    template <typename... Ix>
+    void *mutable_data(Ix... index) {
         check_writeable();
         return static_cast<void *>(detail::array_proxy(m_ptr)->data + offset_at(index...));
     }
 
     /// Byte offset from beginning of the array to a given index (full or partial).
     /// May throw if the index would lead to out of bounds access.
-    template<typename... Ix> ssize_t offset_at(Ix... index) const {
-        if ((ssize_t) sizeof...(index) > ndim())
+    template <typename... Ix>
+    ssize_t offset_at(Ix... index) const {
+        if ((ssize_t) sizeof...(index) > ndim()) {
             fail_dim_check(sizeof...(index), "too many indices for an array");
+        }
         return byte_offset(ssize_t(index)...);
     }
 
@@ -739,7 +864,8 @@ public:
 
     /// Item count from beginning of the array to a given index (full or partial).
     /// May throw if the index would lead to out of bounds access.
-    template<typename... Ix> ssize_t index_at(Ix... index) const {
+    template <typename... Ix>
+    ssize_t index_at(Ix... index) const {
         return offset_at(index...) / itemsize();
     }
 
@@ -749,30 +875,37 @@ public:
      * care: the array must not be destroyed or reshaped for the duration of the returned object,
      * and the caller must take care not to access invalid dimensions or dimension indices.
      */
-    template <typename T, ssize_t Dims = -1> detail::unchecked_mutable_reference<T, Dims> mutable_unchecked() & {
-        if (PYBIND11_SILENCE_MSVC_C4127(Dims >= 0) && ndim() != Dims)
-            throw std::domain_error("array has incorrect number of dimensions: " + std::to_string(ndim()) +
-                    "; expected " + std::to_string(Dims));
-        return detail::unchecked_mutable_reference<T, Dims>(mutable_data(), shape(), strides(), ndim());
+    template <typename T, ssize_t Dims = -1>
+    detail::unchecked_mutable_reference<T, Dims> mutable_unchecked() & {
+        if (Dims >= 0 && ndim() != Dims) {
+            throw std::domain_error("array has incorrect number of dimensions: "
+                                    + std::to_string(ndim()) + "; expected "
+                                    + std::to_string(Dims));
+        }
+        return detail::unchecked_mutable_reference<T, Dims>(
+            mutable_data(), shape(), strides(), ndim());
     }
 
     /**
      * Returns a proxy object that provides const access to the array's data without bounds or
      * dimensionality checking.  Unlike `mutable_unchecked()`, this does not require that the
-     * underlying array have the `writable` flag.  Use with care: the array must not be destroyed or
-     * reshaped for the duration of the returned object, and the caller must take care not to access
-     * invalid dimensions or dimension indices.
+     * underlying array have the `writable` flag.  Use with care: the array must not be destroyed
+     * or reshaped for the duration of the returned object, and the caller must take care not to
+     * access invalid dimensions or dimension indices.
      */
-    template <typename T, ssize_t Dims = -1> detail::unchecked_reference<T, Dims> unchecked() const & {
-        if (PYBIND11_SILENCE_MSVC_C4127(Dims >= 0) && ndim() != Dims)
-            throw std::domain_error("array has incorrect number of dimensions: " + std::to_string(ndim()) +
-                    "; expected " + std::to_string(Dims));
+    template <typename T, ssize_t Dims = -1>
+    detail::unchecked_reference<T, Dims> unchecked() const & {
+        if (Dims >= 0 && ndim() != Dims) {
+            throw std::domain_error("array has incorrect number of dimensions: "
+                                    + std::to_string(ndim()) + "; expected "
+                                    + std::to_string(Dims));
+        }
         return detail::unchecked_reference<T, Dims>(data(), shape(), strides(), ndim());
     }
 
     /// Return a new view with all of the dimensions of length 1 removed
     array squeeze() {
-        auto& api = detail::npy_api::get();
+        auto &api = detail::npy_api::get();
         return reinterpret_steal<array>(api.PyArray_Squeeze_(m_ptr));
     }
 
@@ -780,17 +913,19 @@ public:
     /// If refcheck is true and more that one reference exist to this array
     /// then resize will succeed only if it makes a reshape, i.e. original size doesn't change
     void resize(ShapeContainer new_shape, bool refcheck = true) {
-        detail::npy_api::PyArray_Dims d = {
-            // Use reinterpret_cast for PyPy on Windows (remove if fixed, checked on 7.3.1)
-            reinterpret_cast<Py_intptr_t*>(new_shape->data()),
-            int(new_shape->size())
-        };
+        detail::npy_api::PyArray_Dims d
+            = {// Use reinterpret_cast for PyPy on Windows (remove if fixed, checked on 7.3.1)
+               reinterpret_cast<Py_intptr_t *>(new_shape->data()),
+               int(new_shape->size())};
         // try to resize, set ordering param to -1 cause it's not used anyway
         auto new_array = reinterpret_steal<object>(
-            detail::npy_api::get().PyArray_Resize_(m_ptr, &d, int(refcheck), -1)
-        );
-        if (!new_array) throw error_already_set();
-        if (isinstance<array>(new_array)) { *this = std::move(new_array); }
+            detail::npy_api::get().PyArray_Resize_(m_ptr, &d, int(refcheck), -1));
+        if (!new_array) {
+            throw error_already_set();
+        }
+        if (isinstance<array>(new_array)) {
+            *this = std::move(new_array);
+        }
     }
 
     /// Optional `order` parameter omitted, to be added as needed.
@@ -824,40 +959,46 @@ public:
     /// In case of an error, nullptr is returned and the Python error is cleared.
     static array ensure(handle h, int ExtraFlags = 0) {
         auto result = reinterpret_steal<array>(raw_array(h.ptr(), ExtraFlags));
-        if (!result)
+        if (!result) {
             PyErr_Clear();
+        }
         return result;
     }
 
 protected:
-    template<typename, typename> friend struct detail::npy_format_descriptor;
+    template <typename, typename>
+    friend struct detail::npy_format_descriptor;
 
-    void fail_dim_check(ssize_t dim, const std::string& msg) const {
-        throw index_error(msg + ": " + std::to_string(dim) +
-                          " (ndim = " + std::to_string(ndim()) + ")");
+    void fail_dim_check(ssize_t dim, const std::string &msg) const {
+        throw index_error(msg + ": " + std::to_string(dim) + " (ndim = " + std::to_string(ndim())
+                          + ')');
     }
 
-    template<typename... Ix> ssize_t byte_offset(Ix... index) const {
+    template <typename... Ix>
+    ssize_t byte_offset(Ix... index) const {
         check_dimensions(index...);
         return detail::byte_offset_unsafe(strides(), ssize_t(index)...);
     }
 
     void check_writeable() const {
-        if (!writeable())
+        if (!writeable()) {
             throw std::domain_error("array is not writeable");
+        }
     }
 
-    template<typename... Ix> void check_dimensions(Ix... index) const {
+    template <typename... Ix>
+    void check_dimensions(Ix... index) const {
         check_dimensions_impl(ssize_t(0), shape(), ssize_t(index)...);
     }
 
-    void check_dimensions_impl(ssize_t, const ssize_t*) const { }
+    void check_dimensions_impl(ssize_t, const ssize_t *) const {}
 
-    template<typename... Ix> void check_dimensions_impl(ssize_t axis, const ssize_t* shape, ssize_t i, Ix... index) const {
+    template <typename... Ix>
+    void check_dimensions_impl(ssize_t axis, const ssize_t *shape, ssize_t i, Ix... index) const {
         if (i >= *shape) {
-            throw index_error(std::string("index ") + std::to_string(i) +
-                              " is out of bounds for axis " + std::to_string(axis) +
-                              " with size " + std::to_string(*shape));
+            throw index_error(std::string("index ") + std::to_string(i)
+                              + " is out of bounds for axis " + std::to_string(axis)
+                              + " with size " + std::to_string(*shape));
         }
         check_dimensions_impl(axis + 1, shape + 1, index...);
     }
@@ -873,36 +1014,51 @@ protected:
     }
 };
 
-template <typename T, int ExtraFlags = array::forcecast> class array_t : public array {
+template <typename T, int ExtraFlags = array::forcecast>
+class array_t : public array {
 private:
     struct private_ctor {};
     // Delegating constructor needed when both moving and accessing in the same constructor
-    array_t(private_ctor, ShapeContainer &&shape, StridesContainer &&strides, const T *ptr, handle base)
+    array_t(private_ctor,
+            ShapeContainer &&shape,
+            StridesContainer &&strides,
+            const T *ptr,
+            handle base)
         : array(std::move(shape), std::move(strides), ptr, base) {}
+
 public:
     static_assert(!detail::array_info<T>::is_array, "Array types cannot be used with array_t");
 
     using value_type = T;
 
     array_t() : array(0, static_cast<const T *>(nullptr)) {}
-    array_t(handle h, borrowed_t) : array(h, borrowed_t{}) { }
-    array_t(handle h, stolen_t) : array(h, stolen_t{}) { }
+    array_t(handle h, borrowed_t) : array(h, borrowed_t{}) {}
+    array_t(handle h, stolen_t) : array(h, stolen_t{}) {}
 
     PYBIND11_DEPRECATED("Use array_t<T>::ensure() instead")
     array_t(handle h, bool is_borrowed) : array(raw_array_t(h.ptr()), stolen_t{}) {
-        if (!m_ptr) PyErr_Clear();
-        if (!is_borrowed) Py_XDECREF(h.ptr());
+        if (!m_ptr) {
+            PyErr_Clear();
+        }
+        if (!is_borrowed) {
+            Py_XDECREF(h.ptr());
+        }
     }
 
     // NOLINTNEXTLINE(google-explicit-constructor)
     array_t(const object &o) : array(raw_array_t(o.ptr()), stolen_t{}) {
-        if (!m_ptr) throw error_already_set();
+        if (!m_ptr) {
+            throw error_already_set();
+        }
     }
 
-    explicit array_t(const buffer_info& info, handle base = handle()) : array(info, base) { }
+    explicit array_t(const buffer_info &info, handle base = handle()) : array(info, base) {}
 
-    array_t(ShapeContainer shape, StridesContainer strides, const T *ptr = nullptr, handle base = handle())
-        : array(std::move(shape), std::move(strides), ptr, base) { }
+    array_t(ShapeContainer shape,
+            StridesContainer strides,
+            const T *ptr = nullptr,
+            handle base = handle())
+        : array(std::move(shape), std::move(strides), ptr, base) {}
 
     explicit array_t(ShapeContainer shape, const T *ptr = nullptr, handle base = handle())
         : array_t(private_ctor{},
@@ -913,36 +1069,43 @@ public:
                   base) {}
 
     explicit array_t(ssize_t count, const T *ptr = nullptr, handle base = handle())
-        : array({count}, {}, ptr, base) { }
+        : array({count}, {}, ptr, base) {}
 
-    constexpr ssize_t itemsize() const {
-        return sizeof(T);
-    }
+    constexpr ssize_t itemsize() const { return sizeof(T); }
 
-    template<typename... Ix> ssize_t index_at(Ix... index) const {
+    template <typename... Ix>
+    ssize_t index_at(Ix... index) const {
         return offset_at(index...) / itemsize();
     }
 
-    template<typename... Ix> const T* data(Ix... index) const {
-        return static_cast<const T*>(array::data(index...));
+    template <typename... Ix>
+    const T *data(Ix... index) const {
+        return static_cast<const T *>(array::data(index...));
     }
 
-    template<typename... Ix> T* mutable_data(Ix... index) {
-        return static_cast<T*>(array::mutable_data(index...));
+    template <typename... Ix>
+    T *mutable_data(Ix... index) {
+        return static_cast<T *>(array::mutable_data(index...));
     }
 
     // Reference to element at a given index
-    template<typename... Ix> const T& at(Ix... index) const {
-        if ((ssize_t) sizeof...(index) != ndim())
+    template <typename... Ix>
+    const T &at(Ix... index) const {
+        if ((ssize_t) sizeof...(index) != ndim()) {
             fail_dim_check(sizeof...(index), "index dimension mismatch");
-        return *(static_cast<const T*>(array::data()) + byte_offset(ssize_t(index)...) / itemsize());
+        }
+        return *(static_cast<const T *>(array::data())
+                 + byte_offset(ssize_t(index)...) / itemsize());
     }
 
     // Mutable reference to element at a given index
-    template<typename... Ix> T& mutable_at(Ix... index) {
-        if ((ssize_t) sizeof...(index) != ndim())
+    template <typename... Ix>
+    T &mutable_at(Ix... index) {
+        if ((ssize_t) sizeof...(index) != ndim()) {
             fail_dim_check(sizeof...(index), "index dimension mismatch");
-        return *(static_cast<T*>(array::mutable_data()) + byte_offset(ssize_t(index)...) / itemsize());
+        }
+        return *(static_cast<T *>(array::mutable_data())
+                 + byte_offset(ssize_t(index)...) / itemsize());
     }
 
     /**
@@ -951,7 +1114,8 @@ public:
      * care: the array must not be destroyed or reshaped for the duration of the returned object,
      * and the caller must take care not to access invalid dimensions or dimension indices.
      */
-    template <ssize_t Dims = -1> detail::unchecked_mutable_reference<T, Dims> mutable_unchecked() & {
+    template <ssize_t Dims = -1>
+    detail::unchecked_mutable_reference<T, Dims> mutable_unchecked() & {
         return array::mutable_unchecked<T, Dims>();
     }
 
@@ -962,7 +1126,8 @@ public:
      * for the duration of the returned object, and the caller must take care not to access invalid
      * dimensions or dimension indices.
      */
-    template <ssize_t Dims = -1> detail::unchecked_reference<T, Dims> unchecked() const & {
+    template <ssize_t Dims = -1>
+    detail::unchecked_reference<T, Dims> unchecked() const & {
         return array::unchecked<T, Dims>();
     }
 
@@ -970,15 +1135,17 @@ public:
     /// it).  In case of an error, nullptr is returned and the Python error is cleared.
     static array_t ensure(handle h) {
         auto result = reinterpret_steal<array_t>(raw_array_t(h.ptr()));
-        if (!result)
+        if (!result) {
             PyErr_Clear();
+        }
         return result;
     }
 
     static bool check_(handle h) {
         const auto &api = detail::npy_api::get();
         return api.PyArray_Check_(h.ptr())
-               && api.PyArray_EquivTypes_(detail::array_proxy(h.ptr())->descr, dtype::of<T>().ptr())
+               && api.PyArray_EquivTypes_(detail::array_proxy(h.ptr())->descr,
+                                          dtype::of<T>().ptr())
                && detail::check_flags(h.ptr(), ExtraFlags & (array::c_style | array::f_style));
     }
 
@@ -989,9 +1156,13 @@ protected:
             PyErr_SetString(PyExc_ValueError, "cannot create a pybind11::array_t from a nullptr");
             return nullptr;
         }
-        return detail::npy_api::get().PyArray_FromAny_(
-            ptr, dtype::of<T>().release().ptr(), 0, 0,
-            detail::npy_api::NPY_ARRAY_ENSUREARRAY_ | ExtraFlags, nullptr);
+        return detail::npy_api::get().PyArray_FromAny_(ptr,
+                                                       dtype::of<T>().release().ptr(),
+                                                       0,
+                                                       0,
+                                                       detail::npy_api::NPY_ARRAY_ENSUREARRAY_
+                                                           | ExtraFlags,
+                                                       nullptr);
     }
 };
 
@@ -1002,11 +1173,13 @@ struct format_descriptor<T, detail::enable_if_t<detail::is_pod_struct<T>::value>
     }
 };
 
-template <size_t N> struct format_descriptor<char[N]> {
-    static std::string format() { return std::to_string(N) + "s"; }
+template <size_t N>
+struct format_descriptor<char[N]> {
+    static std::string format() { return std::to_string(N) + 's'; }
 };
-template <size_t N> struct format_descriptor<std::array<char, N>> {
-    static std::string format() { return std::to_string(N) + "s"; }
+template <size_t N>
+struct format_descriptor<std::array<char, N>> {
+    static std::string format() { return std::to_string(N) + 's'; }
 };
 
 template <typename T>
@@ -1021,7 +1194,7 @@ template <typename T>
 struct format_descriptor<T, detail::enable_if_t<detail::array_info<T>::is_array>> {
     static std::string format() {
         using namespace detail;
-        static constexpr auto extents = _("(") + array_info<T>::extents + _(")");
+        static constexpr auto extents = const_name("(") + array_info<T>::extents + const_name(")");
         return extents.text + format_descriptor<remove_all_extents_t<T>>::format();
     }
 };
@@ -1032,8 +1205,9 @@ struct pyobject_caster<array_t<T, ExtraFlags>> {
     using type = array_t<T, ExtraFlags>;
 
     bool load(handle src, bool convert) {
-        if (!convert && !type::check_(src))
+        if (!convert && !type::check_(src)) {
             return false;
+        }
         value = type::ensure(src);
         return static_cast<bool>(value);
     }
@@ -1046,7 +1220,7 @@ struct pyobject_caster<array_t<T, ExtraFlags>> {
 
 template <typename T>
 struct compare_buffer_info<T, detail::enable_if_t<detail::is_pod_struct<T>::value>> {
-    static bool compare(const buffer_info& b) {
+    static bool compare(const buffer_info &b) {
         return npy_api::get().PyArray_EquivTypes_(dtype::of<T>().ptr(), dtype(b).ptr());
     }
 };
@@ -1056,78 +1230,105 @@ struct npy_format_descriptor_name;
 
 template <typename T>
 struct npy_format_descriptor_name<T, enable_if_t<std::is_integral<T>::value>> {
-    static constexpr auto name = _<std::is_same<T, bool>::value>(
-        _("bool"), _<std::is_signed<T>::value>("numpy.int", "numpy.uint") + _<sizeof(T)*8>()
-    );
+    static constexpr auto name = const_name<std::is_same<T, bool>::value>(
+        const_name("bool"),
+        const_name<std::is_signed<T>::value>("numpy.int", "numpy.uint")
+            + const_name<sizeof(T) * 8>());
 };
 
 template <typename T>
 struct npy_format_descriptor_name<T, enable_if_t<std::is_floating_point<T>::value>> {
-    static constexpr auto name = _<std::is_same<T, float>::value
-                                   || std::is_same<T, const float>::value
-                                   || std::is_same<T, double>::value
-                                   || std::is_same<T, const double>::value>(
-        _("numpy.float") + _<sizeof(T)*8>(), _("numpy.longdouble")
-    );
+    static constexpr auto name = const_name < std::is_same<T, float>::value
+                                 || std::is_same<T, const float>::value
+                                 || std::is_same<T, double>::value
+                                 || std::is_same<T, const double>::value
+                                        > (const_name("numpy.float") + const_name<sizeof(T) * 8>(),
+                                           const_name("numpy.longdouble"));
 };
 
 template <typename T>
 struct npy_format_descriptor_name<T, enable_if_t<is_complex<T>::value>> {
-    static constexpr auto name = _<std::is_same<typename T::value_type, float>::value
-                                   || std::is_same<typename T::value_type, const float>::value
-                                   || std::is_same<typename T::value_type, double>::value
-                                   || std::is_same<typename T::value_type, const double>::value>(
-        _("numpy.complex") + _<sizeof(typename T::value_type)*16>(), _("numpy.longcomplex")
-    );
+    static constexpr auto name = const_name < std::is_same<typename T::value_type, float>::value
+                                 || std::is_same<typename T::value_type, const float>::value
+                                 || std::is_same<typename T::value_type, double>::value
+                                 || std::is_same<typename T::value_type, const double>::value
+                                        > (const_name("numpy.complex")
+                                               + const_name<sizeof(typename T::value_type) * 16>(),
+                                           const_name("numpy.longcomplex"));
 };
 
 template <typename T>
-struct npy_format_descriptor<T, enable_if_t<satisfies_any_of<T, std::is_arithmetic, is_complex>::value>>
+struct npy_format_descriptor<
+    T,
+    enable_if_t<satisfies_any_of<T, std::is_arithmetic, is_complex>::value>>
     : npy_format_descriptor_name<T> {
 private:
     // NB: the order here must match the one in common.h
-    constexpr static const int values[15] = {
-        npy_api::NPY_BOOL_,
-        npy_api::NPY_BYTE_,   npy_api::NPY_UBYTE_,   npy_api::NPY_INT16_,    npy_api::NPY_UINT16_,
-        npy_api::NPY_INT32_,  npy_api::NPY_UINT32_,  npy_api::NPY_INT64_,    npy_api::NPY_UINT64_,
-        npy_api::NPY_FLOAT_,  npy_api::NPY_DOUBLE_,  npy_api::NPY_LONGDOUBLE_,
-        npy_api::NPY_CFLOAT_, npy_api::NPY_CDOUBLE_, npy_api::NPY_CLONGDOUBLE_
-    };
+    constexpr static const int values[15] = {npy_api::NPY_BOOL_,
+                                             npy_api::NPY_BYTE_,
+                                             npy_api::NPY_UBYTE_,
+                                             npy_api::NPY_INT16_,
+                                             npy_api::NPY_UINT16_,
+                                             npy_api::NPY_INT32_,
+                                             npy_api::NPY_UINT32_,
+                                             npy_api::NPY_INT64_,
+                                             npy_api::NPY_UINT64_,
+                                             npy_api::NPY_FLOAT_,
+                                             npy_api::NPY_DOUBLE_,
+                                             npy_api::NPY_LONGDOUBLE_,
+                                             npy_api::NPY_CFLOAT_,
+                                             npy_api::NPY_CDOUBLE_,
+                                             npy_api::NPY_CLONGDOUBLE_};
 
 public:
     static constexpr int value = values[detail::is_fmt_numeric<T>::index];
 
     static pybind11::dtype dtype() {
-        if (auto ptr = npy_api::get().PyArray_DescrFromType_(value))
+        if (auto *ptr = npy_api::get().PyArray_DescrFromType_(value)) {
             return reinterpret_steal<pybind11::dtype>(ptr);
+        }
         pybind11_fail("Unsupported buffer format!");
     }
 };
 
-#define PYBIND11_DECL_CHAR_FMT \
-    static constexpr auto name = _("S") + _<N>(); \
-    static pybind11::dtype dtype() { return pybind11::dtype(std::string("S") + std::to_string(N)); }
-template <size_t N> struct npy_format_descriptor<char[N]> { PYBIND11_DECL_CHAR_FMT };
-template <size_t N> struct npy_format_descriptor<std::array<char, N>> { PYBIND11_DECL_CHAR_FMT };
+#define PYBIND11_DECL_CHAR_FMT                                                                    \
+    static constexpr auto name = const_name("S") + const_name<N>();                               \
+    static pybind11::dtype dtype() {                                                              \
+        return pybind11::dtype(std::string("S") + std::to_string(N));                             \
+    }
+template <size_t N>
+struct npy_format_descriptor<char[N]> {
+    PYBIND11_DECL_CHAR_FMT
+};
+template <size_t N>
+struct npy_format_descriptor<std::array<char, N>> {
+    PYBIND11_DECL_CHAR_FMT
+};
 #undef PYBIND11_DECL_CHAR_FMT
 
-template<typename T> struct npy_format_descriptor<T, enable_if_t<array_info<T>::is_array>> {
+template <typename T>
+struct npy_format_descriptor<T, enable_if_t<array_info<T>::is_array>> {
 private:
     using base_descr = npy_format_descriptor<typename array_info<T>::type>;
+
 public:
     static_assert(!array_info<T>::is_empty, "Zero-sized arrays are not supported");
 
-    static constexpr auto name = _("(") + array_info<T>::extents + _(")") + base_descr::name;
+    static constexpr auto name
+        = const_name("(") + array_info<T>::extents + const_name(")") + base_descr::name;
     static pybind11::dtype dtype() {
         list shape;
         array_info<T>::append_extents(shape);
-        return pybind11::dtype::from_args(pybind11::make_tuple(base_descr::dtype(), shape));
+        return pybind11::dtype::from_args(
+            pybind11::make_tuple(base_descr::dtype(), std::move(shape)));
     }
 };
 
-template<typename T> struct npy_format_descriptor<T, enable_if_t<std::is_enum<T>::value>> {
+template <typename T>
+struct npy_format_descriptor<T, enable_if_t<std::is_enum<T>::value>> {
 private:
     using base_descr = npy_format_descriptor<typename std::underlying_type<T>::type>;
+
 public:
     static constexpr auto name = base_descr::name;
     static pybind11::dtype dtype() { return base_descr::dtype(); }
@@ -1141,31 +1342,35 @@ struct field_descriptor {
     dtype descr;
 };
 
-PYBIND11_NOINLINE void register_structured_dtype(
-    any_container<field_descriptor> fields,
-    const std::type_info& tinfo, ssize_t itemsize,
-    bool (*direct_converter)(PyObject *, void *&)) {
+PYBIND11_NOINLINE void register_structured_dtype(any_container<field_descriptor> fields,
+                                                 const std::type_info &tinfo,
+                                                 ssize_t itemsize,
+                                                 bool (*direct_converter)(PyObject *, void *&)) {
 
-    auto& numpy_internals = get_numpy_internals();
-    if (numpy_internals.get_type_info(tinfo, false))
+    auto &numpy_internals = get_numpy_internals();
+    if (numpy_internals.get_type_info(tinfo, false)) {
         pybind11_fail("NumPy: dtype is already registered");
+    }
 
     // Use ordered fields because order matters as of NumPy 1.14:
     // https://docs.scipy.org/doc/numpy/release.html#multiple-field-indexing-assignment-of-structured-arrays
     std::vector<field_descriptor> ordered_fields(std::move(fields));
-    std::sort(ordered_fields.begin(), ordered_fields.end(),
+    std::sort(
+        ordered_fields.begin(),
+        ordered_fields.end(),
         [](const field_descriptor &a, const field_descriptor &b) { return a.offset < b.offset; });
 
     list names, formats, offsets;
-    for (auto& field : ordered_fields) {
-        if (!field.descr)
-            pybind11_fail(std::string("NumPy: unsupported field dtype: `") +
-                            field.name + "` @ " + tinfo.name());
-        names.append(PYBIND11_STR_TYPE(field.name));
+    for (auto &field : ordered_fields) {
+        if (!field.descr) {
+            pybind11_fail(std::string("NumPy: unsupported field dtype: `") + field.name + "` @ "
+                          + tinfo.name());
+        }
+        names.append(pybind11::str(field.name));
         formats.append(field.descr);
         offsets.append(pybind11::int_(field.offset));
     }
-    auto dtype_ptr
+    auto *dtype_ptr
         = pybind11::dtype(std::move(names), std::move(formats), std::move(offsets), itemsize)
               .release()
               .ptr();
@@ -1185,36 +1390,39 @@ PYBIND11_NOINLINE void register_structured_dtype(
     // overriding the endianness. Putting the ^ in front of individual fields
     // isn't guaranteed to work due to https://github.com/numpy/numpy/issues/9049
     oss << "^T{";
-    for (auto& field : ordered_fields) {
-        if (field.offset > offset)
+    for (auto &field : ordered_fields) {
+        if (field.offset > offset) {
             oss << (field.offset - offset) << 'x';
+        }
         oss << field.format << ':' << field.name << ':';
         offset = field.offset + field.size;
     }
-    if (itemsize > offset)
+    if (itemsize > offset) {
         oss << (itemsize - offset) << 'x';
+    }
     oss << '}';
     auto format_str = oss.str();
 
-    // Sanity check: verify that NumPy properly parses our buffer format string
-    auto& api = npy_api::get();
-    auto arr =  array(buffer_info(nullptr, itemsize, format_str, 1));
-    if (!api.PyArray_EquivTypes_(dtype_ptr, arr.dtype().ptr()))
+    // Smoke test: verify that NumPy properly parses our buffer format string
+    auto &api = npy_api::get();
+    auto arr = array(buffer_info(nullptr, itemsize, format_str, 1));
+    if (!api.PyArray_EquivTypes_(dtype_ptr, arr.dtype().ptr())) {
         pybind11_fail("NumPy: invalid buffer descriptor!");
+    }
 
     auto tindex = std::type_index(tinfo);
-    numpy_internals.registered_dtypes[tindex] = { dtype_ptr, format_str };
+    numpy_internals.registered_dtypes[tindex] = {dtype_ptr, std::move(format_str)};
     get_internals().direct_conversions[tindex].push_back(direct_converter);
 }
 
-template <typename T, typename SFINAE> struct npy_format_descriptor {
-    static_assert(is_pod_struct<T>::value, "Attempt to use a non-POD or unimplemented POD type as a numpy dtype");
+template <typename T, typename SFINAE>
+struct npy_format_descriptor {
+    static_assert(is_pod_struct<T>::value,
+                  "Attempt to use a non-POD or unimplemented POD type as a numpy dtype");
 
     static constexpr auto name = make_caster<T>::name;
 
-    static pybind11::dtype dtype() {
-        return reinterpret_borrow<pybind11::dtype>(dtype_ptr());
-    }
+    static pybind11::dtype dtype() { return reinterpret_borrow<pybind11::dtype>(dtype_ptr()); }
 
     static std::string format() {
         static auto format_str = get_numpy_internals().get_type_info<T>(true)->format_str;
@@ -1222,20 +1430,23 @@ template <typename T, typename SFINAE> struct npy_format_descriptor {
     }
 
     static void register_dtype(any_container<field_descriptor> fields) {
-        register_structured_dtype(std::move(fields), typeid(typename std::remove_cv<T>::type),
-                                  sizeof(T), &direct_converter);
+        register_structured_dtype(std::move(fields),
+                                  typeid(typename std::remove_cv<T>::type),
+                                  sizeof(T),
+                                  &direct_converter);
     }
 
 private:
-    static PyObject* dtype_ptr() {
-        static PyObject* ptr = get_numpy_internals().get_type_info<T>(true)->dtype_ptr;
+    static PyObject *dtype_ptr() {
+        static PyObject *ptr = get_numpy_internals().get_type_info<T>(true)->dtype_ptr;
         return ptr;
     }
 
-    static bool direct_converter(PyObject *obj, void*& value) {
-        auto& api = npy_api::get();
-        if (!PyObject_TypeCheck(obj, api.PyVoidArrType_Type_))
+    static bool direct_converter(PyObject *obj, void *&value) {
+        auto &api = npy_api::get();
+        if (!PyObject_TypeCheck(obj, api.PyVoidArrType_Type_)) {
             return false;
+        }
         if (auto descr = reinterpret_steal<object>(api.PyArray_DescrFromScalar_(obj))) {
             if (api.PyArray_EquivTypes_(dtype_ptr(), descr.ptr())) {
                 value = ((PyVoidScalarObject_Proxy *) obj)->obval;
@@ -1247,78 +1458,80 @@ private:
 };
 
 #ifdef __CLION_IDE__ // replace heavy macro with dummy code for the IDE (doesn't affect code)
-# define PYBIND11_NUMPY_DTYPE(Type, ...) ((void)0)
-# define PYBIND11_NUMPY_DTYPE_EX(Type, ...) ((void)0)
+#    define PYBIND11_NUMPY_DTYPE(Type, ...) ((void) 0)
+#    define PYBIND11_NUMPY_DTYPE_EX(Type, ...) ((void) 0)
 #else
 
-#define PYBIND11_FIELD_DESCRIPTOR_EX(T, Field, Name)                                          \
-    ::pybind11::detail::field_descriptor {                                                    \
-        Name, offsetof(T, Field), sizeof(decltype(std::declval<T>().Field)),                  \
-        ::pybind11::format_descriptor<decltype(std::declval<T>().Field)>::format(),           \
-        ::pybind11::detail::npy_format_descriptor<decltype(std::declval<T>().Field)>::dtype() \
-    }
+#    define PYBIND11_FIELD_DESCRIPTOR_EX(T, Field, Name)                                          \
+        ::pybind11::detail::field_descriptor {                                                    \
+            Name, offsetof(T, Field), sizeof(decltype(std::declval<T>().Field)),                  \
+                ::pybind11::format_descriptor<decltype(std::declval<T>().Field)>::format(),       \
+                ::pybind11::detail::npy_format_descriptor<                                        \
+                    decltype(std::declval<T>().Field)>::dtype()                                   \
+        }
 
 // Extract name, offset and format descriptor for a struct field
-#define PYBIND11_FIELD_DESCRIPTOR(T, Field) PYBIND11_FIELD_DESCRIPTOR_EX(T, Field, #Field)
+#    define PYBIND11_FIELD_DESCRIPTOR(T, Field) PYBIND11_FIELD_DESCRIPTOR_EX(T, Field, #Field)
 
 // The main idea of this macro is borrowed from https://github.com/swansontec/map-macro
 // (C) William Swanson, Paul Fultz
-#define PYBIND11_EVAL0(...) __VA_ARGS__
-#define PYBIND11_EVAL1(...) PYBIND11_EVAL0 (PYBIND11_EVAL0 (PYBIND11_EVAL0 (__VA_ARGS__)))
-#define PYBIND11_EVAL2(...) PYBIND11_EVAL1 (PYBIND11_EVAL1 (PYBIND11_EVAL1 (__VA_ARGS__)))
-#define PYBIND11_EVAL3(...) PYBIND11_EVAL2 (PYBIND11_EVAL2 (PYBIND11_EVAL2 (__VA_ARGS__)))
-#define PYBIND11_EVAL4(...) PYBIND11_EVAL3 (PYBIND11_EVAL3 (PYBIND11_EVAL3 (__VA_ARGS__)))
-#define PYBIND11_EVAL(...)  PYBIND11_EVAL4 (PYBIND11_EVAL4 (PYBIND11_EVAL4 (__VA_ARGS__)))
-#define PYBIND11_MAP_END(...)
-#define PYBIND11_MAP_OUT
-#define PYBIND11_MAP_COMMA ,
-#define PYBIND11_MAP_GET_END() 0, PYBIND11_MAP_END
-#define PYBIND11_MAP_NEXT0(test, next, ...) next PYBIND11_MAP_OUT
-#define PYBIND11_MAP_NEXT1(test, next) PYBIND11_MAP_NEXT0 (test, next, 0)
-#define PYBIND11_MAP_NEXT(test, next)  PYBIND11_MAP_NEXT1 (PYBIND11_MAP_GET_END test, next)
-#if defined(_MSC_VER) && !defined(__clang__) // MSVC is not as eager to expand macros, hence this workaround
-#define PYBIND11_MAP_LIST_NEXT1(test, next) \
-    PYBIND11_EVAL0 (PYBIND11_MAP_NEXT0 (test, PYBIND11_MAP_COMMA next, 0))
-#else
-#define PYBIND11_MAP_LIST_NEXT1(test, next) \
-    PYBIND11_MAP_NEXT0 (test, PYBIND11_MAP_COMMA next, 0)
-#endif
-#define PYBIND11_MAP_LIST_NEXT(test, next) \
-    PYBIND11_MAP_LIST_NEXT1 (PYBIND11_MAP_GET_END test, next)
-#define PYBIND11_MAP_LIST0(f, t, x, peek, ...) \
-    f(t, x) PYBIND11_MAP_LIST_NEXT (peek, PYBIND11_MAP_LIST1) (f, t, peek, __VA_ARGS__)
-#define PYBIND11_MAP_LIST1(f, t, x, peek, ...) \
-    f(t, x) PYBIND11_MAP_LIST_NEXT (peek, PYBIND11_MAP_LIST0) (f, t, peek, __VA_ARGS__)
+#    define PYBIND11_EVAL0(...) __VA_ARGS__
+#    define PYBIND11_EVAL1(...) PYBIND11_EVAL0(PYBIND11_EVAL0(PYBIND11_EVAL0(__VA_ARGS__)))
+#    define PYBIND11_EVAL2(...) PYBIND11_EVAL1(PYBIND11_EVAL1(PYBIND11_EVAL1(__VA_ARGS__)))
+#    define PYBIND11_EVAL3(...) PYBIND11_EVAL2(PYBIND11_EVAL2(PYBIND11_EVAL2(__VA_ARGS__)))
+#    define PYBIND11_EVAL4(...) PYBIND11_EVAL3(PYBIND11_EVAL3(PYBIND11_EVAL3(__VA_ARGS__)))
+#    define PYBIND11_EVAL(...) PYBIND11_EVAL4(PYBIND11_EVAL4(PYBIND11_EVAL4(__VA_ARGS__)))
+#    define PYBIND11_MAP_END(...)
+#    define PYBIND11_MAP_OUT
+#    define PYBIND11_MAP_COMMA ,
+#    define PYBIND11_MAP_GET_END() 0, PYBIND11_MAP_END
+#    define PYBIND11_MAP_NEXT0(test, next, ...) next PYBIND11_MAP_OUT
+#    define PYBIND11_MAP_NEXT1(test, next) PYBIND11_MAP_NEXT0(test, next, 0)
+#    define PYBIND11_MAP_NEXT(test, next) PYBIND11_MAP_NEXT1(PYBIND11_MAP_GET_END test, next)
+#    if defined(_MSC_VER)                                                                         \
+        && !defined(__clang__) // MSVC is not as eager to expand macros, hence this workaround
+#        define PYBIND11_MAP_LIST_NEXT1(test, next)                                               \
+            PYBIND11_EVAL0(PYBIND11_MAP_NEXT0(test, PYBIND11_MAP_COMMA next, 0))
+#    else
+#        define PYBIND11_MAP_LIST_NEXT1(test, next)                                               \
+            PYBIND11_MAP_NEXT0(test, PYBIND11_MAP_COMMA next, 0)
+#    endif
+#    define PYBIND11_MAP_LIST_NEXT(test, next)                                                    \
+        PYBIND11_MAP_LIST_NEXT1(PYBIND11_MAP_GET_END test, next)
+#    define PYBIND11_MAP_LIST0(f, t, x, peek, ...)                                                \
+        f(t, x) PYBIND11_MAP_LIST_NEXT(peek, PYBIND11_MAP_LIST1)(f, t, peek, __VA_ARGS__)
+#    define PYBIND11_MAP_LIST1(f, t, x, peek, ...)                                                \
+        f(t, x) PYBIND11_MAP_LIST_NEXT(peek, PYBIND11_MAP_LIST0)(f, t, peek, __VA_ARGS__)
 // PYBIND11_MAP_LIST(f, t, a1, a2, ...) expands to f(t, a1), f(t, a2), ...
-#define PYBIND11_MAP_LIST(f, t, ...) \
-    PYBIND11_EVAL (PYBIND11_MAP_LIST1 (f, t, __VA_ARGS__, (), 0))
+#    define PYBIND11_MAP_LIST(f, t, ...)                                                          \
+        PYBIND11_EVAL(PYBIND11_MAP_LIST1(f, t, __VA_ARGS__, (), 0))
 
-#define PYBIND11_NUMPY_DTYPE(Type, ...) \
-    ::pybind11::detail::npy_format_descriptor<Type>::register_dtype \
-        (::std::vector<::pybind11::detail::field_descriptor> \
-         {PYBIND11_MAP_LIST (PYBIND11_FIELD_DESCRIPTOR, Type, __VA_ARGS__)})
+#    define PYBIND11_NUMPY_DTYPE(Type, ...)                                                       \
+        ::pybind11::detail::npy_format_descriptor<Type>::register_dtype(                          \
+            ::std::vector<::pybind11::detail::field_descriptor>{                                  \
+                PYBIND11_MAP_LIST(PYBIND11_FIELD_DESCRIPTOR, Type, __VA_ARGS__)})
 
-#if defined(_MSC_VER) && !defined(__clang__)
-#define PYBIND11_MAP2_LIST_NEXT1(test, next) \
-    PYBIND11_EVAL0 (PYBIND11_MAP_NEXT0 (test, PYBIND11_MAP_COMMA next, 0))
-#else
-#define PYBIND11_MAP2_LIST_NEXT1(test, next) \
-    PYBIND11_MAP_NEXT0 (test, PYBIND11_MAP_COMMA next, 0)
-#endif
-#define PYBIND11_MAP2_LIST_NEXT(test, next) \
-    PYBIND11_MAP2_LIST_NEXT1 (PYBIND11_MAP_GET_END test, next)
-#define PYBIND11_MAP2_LIST0(f, t, x1, x2, peek, ...) \
-    f(t, x1, x2) PYBIND11_MAP2_LIST_NEXT (peek, PYBIND11_MAP2_LIST1) (f, t, peek, __VA_ARGS__)
-#define PYBIND11_MAP2_LIST1(f, t, x1, x2, peek, ...) \
-    f(t, x1, x2) PYBIND11_MAP2_LIST_NEXT (peek, PYBIND11_MAP2_LIST0) (f, t, peek, __VA_ARGS__)
+#    if defined(_MSC_VER) && !defined(__clang__)
+#        define PYBIND11_MAP2_LIST_NEXT1(test, next)                                              \
+            PYBIND11_EVAL0(PYBIND11_MAP_NEXT0(test, PYBIND11_MAP_COMMA next, 0))
+#    else
+#        define PYBIND11_MAP2_LIST_NEXT1(test, next)                                              \
+            PYBIND11_MAP_NEXT0(test, PYBIND11_MAP_COMMA next, 0)
+#    endif
+#    define PYBIND11_MAP2_LIST_NEXT(test, next)                                                   \
+        PYBIND11_MAP2_LIST_NEXT1(PYBIND11_MAP_GET_END test, next)
+#    define PYBIND11_MAP2_LIST0(f, t, x1, x2, peek, ...)                                          \
+        f(t, x1, x2) PYBIND11_MAP2_LIST_NEXT(peek, PYBIND11_MAP2_LIST1)(f, t, peek, __VA_ARGS__)
+#    define PYBIND11_MAP2_LIST1(f, t, x1, x2, peek, ...)                                          \
+        f(t, x1, x2) PYBIND11_MAP2_LIST_NEXT(peek, PYBIND11_MAP2_LIST0)(f, t, peek, __VA_ARGS__)
 // PYBIND11_MAP2_LIST(f, t, a1, a2, ...) expands to f(t, a1, a2), f(t, a3, a4), ...
-#define PYBIND11_MAP2_LIST(f, t, ...) \
-    PYBIND11_EVAL (PYBIND11_MAP2_LIST1 (f, t, __VA_ARGS__, (), 0))
+#    define PYBIND11_MAP2_LIST(f, t, ...)                                                         \
+        PYBIND11_EVAL(PYBIND11_MAP2_LIST1(f, t, __VA_ARGS__, (), 0))
 
-#define PYBIND11_NUMPY_DTYPE_EX(Type, ...) \
-    ::pybind11::detail::npy_format_descriptor<Type>::register_dtype \
-        (::std::vector<::pybind11::detail::field_descriptor> \
-         {PYBIND11_MAP2_LIST (PYBIND11_FIELD_DESCRIPTOR_EX, Type, __VA_ARGS__)})
+#    define PYBIND11_NUMPY_DTYPE_EX(Type, ...)                                                    \
+        ::pybind11::detail::npy_format_descriptor<Type>::register_dtype(                          \
+            ::std::vector<::pybind11::detail::field_descriptor>{                                  \
+                PYBIND11_MAP2_LIST(PYBIND11_FIELD_DESCRIPTOR_EX, Type, __VA_ARGS__)})
 
 #endif // __CLION_IDE__
 
@@ -1330,8 +1543,8 @@ public:
 
     common_iterator() : m_strides() {}
 
-    common_iterator(void* ptr, const container_type& strides, const container_type& shape)
-        : p_ptr(reinterpret_cast<char*>(ptr)), m_strides(strides.size()) {
+    common_iterator(void *ptr, const container_type &strides, const container_type &shape)
+        : p_ptr(reinterpret_cast<char *>(ptr)), m_strides(strides.size()) {
         m_strides.back() = static_cast<value_type>(strides.back());
         for (size_type i = m_strides.size() - 1; i != 0; --i) {
             size_type j = i - 1;
@@ -1340,38 +1553,35 @@ public:
         }
     }
 
-    void increment(size_type dim) {
-        p_ptr += m_strides[dim];
-    }
+    void increment(size_type dim) { p_ptr += m_strides[dim]; }
 
-    void* data() const {
-        return p_ptr;
-    }
+    void *data() const { return p_ptr; }
 
 private:
-    char *p_ptr{0};
+    char *p_ptr{nullptr};
     container_type m_strides;
 };
 
-template <size_t N> class multi_array_iterator {
+template <size_t N>
+class multi_array_iterator {
 public:
     using container_type = std::vector<ssize_t>;
 
-    multi_array_iterator(const std::array<buffer_info, N> &buffers,
-                         const container_type &shape)
-        : m_shape(shape.size()), m_index(shape.size(), 0),
-          m_common_iterator() {
+    multi_array_iterator(const std::array<buffer_info, N> &buffers, const container_type &shape)
+        : m_shape(shape.size()), m_index(shape.size(), 0), m_common_iterator() {
 
         // Manual copy to avoid conversion warning if using std::copy
-        for (size_t i = 0; i < shape.size(); ++i)
+        for (size_t i = 0; i < shape.size(); ++i) {
             m_shape[i] = shape[i];
+        }
 
         container_type strides(shape.size());
-        for (size_t i = 0; i < N; ++i)
+        for (size_t i = 0; i < N; ++i) {
             init_common_iterator(buffers[i], shape, m_common_iterator[i], strides);
+        }
     }
 
-    multi_array_iterator& operator++() {
+    multi_array_iterator &operator++() {
         for (size_t j = m_index.size(); j != 0; --j) {
             size_t i = j - 1;
             if (++m_index[i] != m_shape[i]) {
@@ -1383,12 +1593,12 @@ public:
         return *this;
     }
 
-    template <size_t K, class T = void> T* data() const {
-        return reinterpret_cast<T*>(m_common_iterator[K].data());
+    template <size_t K, class T = void>
+    T *data() const {
+        return reinterpret_cast<T *>(m_common_iterator[K].data());
     }
 
 private:
-
     using common_iter = common_iterator;
 
     void init_common_iterator(const buffer_info &buffer,
@@ -1401,10 +1611,11 @@ private:
         auto strides_iter = strides.rbegin();
 
         while (buffer_shape_iter != buffer.shape.rend()) {
-            if (*shape_iter == *buffer_shape_iter)
+            if (*shape_iter == *buffer_shape_iter) {
                 *strides_iter = *buffer_strides_iter;
-            else
+            } else {
                 *strides_iter = 0;
+            }
 
             ++buffer_shape_iter;
             ++buffer_strides_iter;
@@ -1417,8 +1628,9 @@ private:
     }
 
     void increment_common_iterator(size_t dim) {
-        for (auto &iter : m_common_iterator)
+        for (auto &iter : m_common_iterator) {
             iter.increment(dim);
+        }
     }
 
     container_type m_shape;
@@ -1428,60 +1640,71 @@ private:
 
 enum class broadcast_trivial { non_trivial, c_trivial, f_trivial };
 
-// Populates the shape and number of dimensions for the set of buffers.  Returns a broadcast_trivial
-// enum value indicating whether the broadcast is "trivial"--that is, has each buffer being either a
-// singleton or a full-size, C-contiguous (`c_trivial`) or Fortran-contiguous (`f_trivial`) storage
-// buffer; returns `non_trivial` otherwise.
+// Populates the shape and number of dimensions for the set of buffers.  Returns a
+// broadcast_trivial enum value indicating whether the broadcast is "trivial"--that is, has each
+// buffer being either a singleton or a full-size, C-contiguous (`c_trivial`) or Fortran-contiguous
+// (`f_trivial`) storage buffer; returns `non_trivial` otherwise.
 template <size_t N>
-broadcast_trivial broadcast(const std::array<buffer_info, N> &buffers, ssize_t &ndim, std::vector<ssize_t> &shape) {
-    ndim = std::accumulate(buffers.begin(), buffers.end(), ssize_t(0), [](ssize_t res, const buffer_info &buf) {
-        return std::max(res, buf.ndim);
-    });
+broadcast_trivial
+broadcast(const std::array<buffer_info, N> &buffers, ssize_t &ndim, std::vector<ssize_t> &shape) {
+    ndim = std::accumulate(
+        buffers.begin(), buffers.end(), ssize_t(0), [](ssize_t res, const buffer_info &buf) {
+            return std::max(res, buf.ndim);
+        });
 
     shape.clear();
     shape.resize((size_t) ndim, 1);
 
-    // Figure out the output size, and make sure all input arrays conform (i.e. are either size 1 or
-    // the full size).
+    // Figure out the output size, and make sure all input arrays conform (i.e. are either size 1
+    // or the full size).
     for (size_t i = 0; i < N; ++i) {
         auto res_iter = shape.rbegin();
         auto end = buffers[i].shape.rend();
-        for (auto shape_iter = buffers[i].shape.rbegin(); shape_iter != end; ++shape_iter, ++res_iter) {
+        for (auto shape_iter = buffers[i].shape.rbegin(); shape_iter != end;
+             ++shape_iter, ++res_iter) {
             const auto &dim_size_in = *shape_iter;
             auto &dim_size_out = *res_iter;
 
-            // Each input dimension can either be 1 or `n`, but `n` values must match across buffers
-            if (dim_size_out == 1)
+            // Each input dimension can either be 1 or `n`, but `n` values must match across
+            // buffers
+            if (dim_size_out == 1) {
                 dim_size_out = dim_size_in;
-            else if (dim_size_in != 1 && dim_size_in != dim_size_out)
+            } else if (dim_size_in != 1 && dim_size_in != dim_size_out) {
                 pybind11_fail("pybind11::vectorize: incompatible size/dimension of inputs!");
+            }
         }
     }
 
     bool trivial_broadcast_c = true;
     bool trivial_broadcast_f = true;
     for (size_t i = 0; i < N && (trivial_broadcast_c || trivial_broadcast_f); ++i) {
-        if (buffers[i].size == 1)
+        if (buffers[i].size == 1) {
             continue;
+        }
 
         // Require the same number of dimensions:
-        if (buffers[i].ndim != ndim)
+        if (buffers[i].ndim != ndim) {
             return broadcast_trivial::non_trivial;
+        }
 
         // Require all dimensions be full-size:
-        if (!std::equal(buffers[i].shape.cbegin(), buffers[i].shape.cend(), shape.cbegin()))
+        if (!std::equal(buffers[i].shape.cbegin(), buffers[i].shape.cend(), shape.cbegin())) {
             return broadcast_trivial::non_trivial;
+        }
 
         // Check for C contiguity (but only if previous inputs were also C contiguous)
         if (trivial_broadcast_c) {
             ssize_t expect_stride = buffers[i].itemsize;
             auto end = buffers[i].shape.crend();
-            for (auto shape_iter = buffers[i].shape.crbegin(), stride_iter = buffers[i].strides.crbegin();
-                    trivial_broadcast_c && shape_iter != end; ++shape_iter, ++stride_iter) {
-                if (expect_stride == *stride_iter)
+            for (auto shape_iter = buffers[i].shape.crbegin(),
+                      stride_iter = buffers[i].strides.crbegin();
+                 trivial_broadcast_c && shape_iter != end;
+                 ++shape_iter, ++stride_iter) {
+                if (expect_stride == *stride_iter) {
                     expect_stride *= *shape_iter;
-                else
+                } else {
                     trivial_broadcast_c = false;
+                }
             }
         }
 
@@ -1489,60 +1712,61 @@ broadcast_trivial broadcast(const std::array<buffer_info, N> &buffers, ssize_t &
         if (trivial_broadcast_f) {
             ssize_t expect_stride = buffers[i].itemsize;
             auto end = buffers[i].shape.cend();
-            for (auto shape_iter = buffers[i].shape.cbegin(), stride_iter = buffers[i].strides.cbegin();
-                    trivial_broadcast_f && shape_iter != end; ++shape_iter, ++stride_iter) {
-                if (expect_stride == *stride_iter)
+            for (auto shape_iter = buffers[i].shape.cbegin(),
+                      stride_iter = buffers[i].strides.cbegin();
+                 trivial_broadcast_f && shape_iter != end;
+                 ++shape_iter, ++stride_iter) {
+                if (expect_stride == *stride_iter) {
                     expect_stride *= *shape_iter;
-                else
+                } else {
                     trivial_broadcast_f = false;
+                }
             }
         }
     }
 
-    return
-        trivial_broadcast_c ? broadcast_trivial::c_trivial :
-        trivial_broadcast_f ? broadcast_trivial::f_trivial :
-        broadcast_trivial::non_trivial;
+    return trivial_broadcast_c   ? broadcast_trivial::c_trivial
+           : trivial_broadcast_f ? broadcast_trivial::f_trivial
+                                 : broadcast_trivial::non_trivial;
 }
 
 template <typename T>
 struct vectorize_arg {
-    static_assert(!std::is_rvalue_reference<T>::value, "Functions with rvalue reference arguments cannot be vectorized");
+    static_assert(!std::is_rvalue_reference<T>::value,
+                  "Functions with rvalue reference arguments cannot be vectorized");
     // The wrapped function gets called with this type:
     using call_type = remove_reference_t<T>;
     // Is this a vectorized argument?
-    static constexpr bool vectorize =
-        satisfies_any_of<call_type, std::is_arithmetic, is_complex, is_pod>::value &&
-        satisfies_none_of<call_type, std::is_pointer, std::is_array, is_std_array, std::is_enum>::value &&
-        (!std::is_reference<T>::value ||
-         (std::is_lvalue_reference<T>::value && std::is_const<call_type>::value));
+    static constexpr bool vectorize
+        = satisfies_any_of<call_type, std::is_arithmetic, is_complex, is_pod>::value
+          && satisfies_none_of<call_type,
+                               std::is_pointer,
+                               std::is_array,
+                               is_std_array,
+                               std::is_enum>::value
+          && (!std::is_reference<T>::value
+              || (std::is_lvalue_reference<T>::value && std::is_const<call_type>::value));
     // Accept this type: an array for vectorized types, otherwise the type as-is:
     using type = conditional_t<vectorize, array_t<remove_cv_t<call_type>, array::forcecast>, T>;
 };
 
-
 // py::vectorize when a return type is present
 template <typename Func, typename Return, typename... Args>
 struct vectorize_returned_array {
     using Type = array_t<Return>;
 
     static Type create(broadcast_trivial trivial, const std::vector<ssize_t> &shape) {
-        if (trivial == broadcast_trivial::f_trivial)
+        if (trivial == broadcast_trivial::f_trivial) {
             return array_t<Return, array::f_style>(shape);
+        }
         return array_t<Return>(shape);
     }
 
-    static Return *mutable_data(Type &array) {
-        return array.mutable_data();
-    }
+    static Return *mutable_data(Type &array) { return array.mutable_data(); }
 
-    static Return call(Func &f, Args &... args) {
-        return f(args...);
-    }
+    static Return call(Func &f, Args &...args) { return f(args...); }
 
-    static void call(Return *out, size_t i, Func &f, Args &... args) {
-        out[i] = f(args...);
-    }
+    static void call(Return *out, size_t i, Func &f, Args &...args) { out[i] = f(args...); }
 };
 
 // py::vectorize when a return type is not present
@@ -1550,25 +1774,18 @@ template <typename Func, typename... Args>
 struct vectorize_returned_array<Func, void, Args...> {
     using Type = none;
 
-    static Type create(broadcast_trivial, const std::vector<ssize_t> &) {
-        return none();
-    }
+    static Type create(broadcast_trivial, const std::vector<ssize_t> &) { return none(); }
 
-    static void *mutable_data(Type &) {
-        return nullptr;
-    }
+    static void *mutable_data(Type &) { return nullptr; }
 
-    static detail::void_type call(Func &f, Args &... args) {
+    static detail::void_type call(Func &f, Args &...args) {
         f(args...);
         return {};
     }
 
-    static void call(void *, size_t, Func &f, Args &... args) {
-        f(args...);
-    }
+    static void call(void *, size_t, Func &f, Args &...args) { f(args...); }
 };
 
-
 template <typename Func, typename Return, typename... Args>
 struct vectorize_helper {
 
@@ -1581,8 +1798,9 @@ private:
 
     static constexpr size_t N = sizeof...(Args);
     static constexpr size_t NVectorized = constexpr_sum(vectorize_arg<Args>::vectorize...);
-    static_assert(NVectorized >= 1,
-            "pybind11::vectorize(...) requires a function with at least one vectorizable argument");
+    static_assert(
+        NVectorized >= 1,
+        "pybind11::vectorize(...) requires a function with at least one vectorizable argument");
 
 public:
     template <typename T,
@@ -1601,10 +1819,11 @@ public:
 private:
     remove_reference_t<Func> f;
 
-    // Internal compiler error in MSVC 19.16.27025.1 (Visual Studio 2017 15.9.4), when compiling with "/permissive-" flag
-    // when arg_call_types is manually inlined.
+    // Internal compiler error in MSVC 19.16.27025.1 (Visual Studio 2017 15.9.4), when compiling
+    // with "/permissive-" flag when arg_call_types is manually inlined.
     using arg_call_types = std::tuple<typename vectorize_arg<Args>::call_type...>;
-    template <size_t Index> using param_n_t = typename std::tuple_element<Index, arg_call_types>::type;
+    template <size_t Index>
+    using param_n_t = typename std::tuple_element<Index, arg_call_types>::type;
 
     using returned_array = vectorize_returned_array<Func, Return, Args...>;
 
@@ -1615,17 +1834,20 @@ private:
     //     - BIndex is a incremental sequence (beginning at 0) of the same size as VIndex, so that
     //       we can store vectorized buffer_infos in an array (argument VIndex has its buffer at
     //       index BIndex in the array).
-    template <size_t... Index, size_t... VIndex, size_t... BIndex> object run(
-            typename vectorize_arg<Args>::type &...args,
-            index_sequence<Index...> i_seq, index_sequence<VIndex...> vi_seq, index_sequence<BIndex...> bi_seq) {
+    template <size_t... Index, size_t... VIndex, size_t... BIndex>
+    object run(typename vectorize_arg<Args>::type &...args,
+               index_sequence<Index...> i_seq,
+               index_sequence<VIndex...> vi_seq,
+               index_sequence<BIndex...> bi_seq) {
 
         // Pointers to values the function was called with; the vectorized ones set here will start
         // out as array_t<T> pointers, but they will be changed them to T pointers before we make
         // call the wrapped function.  Non-vectorized pointers are left as-is.
-        std::array<void *, N> params{{ &args... }};
+        std::array<void *, N> params{{&args...}};
 
         // The array of `buffer_info`s of vectorized arguments:
-        std::array<buffer_info, NVectorized> buffers{{ reinterpret_cast<array *>(params[VIndex])->request()... }};
+        std::array<buffer_info, NVectorized> buffers{
+            {reinterpret_cast<array *>(params[VIndex])->request()...}};
 
         /* Determine dimensions parameters of output array */
         ssize_t nd = 0;
@@ -1633,27 +1855,38 @@ private:
         auto trivial = broadcast(buffers, nd, shape);
         auto ndim = (size_t) nd;
 
-        size_t size = std::accumulate(shape.begin(), shape.end(), (size_t) 1, std::multiplies<size_t>());
+        size_t size
+            = std::accumulate(shape.begin(), shape.end(), (size_t) 1, std::multiplies<size_t>());
 
         // If all arguments are 0-dimension arrays (i.e. single values) return a plain value (i.e.
         // not wrapped in an array).
         if (size == 1 && ndim == 0) {
             PYBIND11_EXPAND_SIDE_EFFECTS(params[VIndex] = buffers[BIndex].ptr);
-            return cast(returned_array::call(f, *reinterpret_cast<param_n_t<Index> *>(params[Index])...));
+            return cast(
+                returned_array::call(f, *reinterpret_cast<param_n_t<Index> *>(params[Index])...));
         }
 
         auto result = returned_array::create(trivial, shape);
 
-        if (size == 0) return std::move(result);
+        PYBIND11_WARNING_PUSH
+#ifdef PYBIND11_DETECTED_CLANG_WITH_MISLEADING_CALL_STD_MOVE_EXPLICITLY_WARNING
+        PYBIND11_WARNING_DISABLE_CLANG("-Wreturn-std-move")
+#endif
+
+        if (size == 0) {
+            return result;
+        }
 
         /* Call the function */
-        auto mutable_data = returned_array::mutable_data(result);
-        if (trivial == broadcast_trivial::non_trivial)
+        auto *mutable_data = returned_array::mutable_data(result);
+        if (trivial == broadcast_trivial::non_trivial) {
             apply_broadcast(buffers, params, mutable_data, size, shape, i_seq, vi_seq, bi_seq);
-        else
+        } else {
             apply_trivial(buffers, params, mutable_data, size, i_seq, vi_seq, bi_seq);
+        }
 
-        return std::move(result);
+        return result;
+        PYBIND11_WARNING_POP
     }
 
     template <size_t... Index, size_t... VIndex, size_t... BIndex>
@@ -1661,21 +1894,24 @@ private:
                        std::array<void *, N> &params,
                        Return *out,
                        size_t size,
-                       index_sequence<Index...>, index_sequence<VIndex...>, index_sequence<BIndex...>) {
+                       index_sequence<Index...>,
+                       index_sequence<VIndex...>,
+                       index_sequence<BIndex...>) {
 
         // Initialize an array of mutable byte references and sizes with references set to the
         // appropriate pointer in `params`; as we iterate, we'll increment each pointer by its size
         // (except for singletons, which get an increment of 0).
-        std::array<std::pair<unsigned char *&, const size_t>, NVectorized> vecparams{{
-            std::pair<unsigned char *&, const size_t>(
-                    reinterpret_cast<unsigned char *&>(params[VIndex] = buffers[BIndex].ptr),
-                    buffers[BIndex].size == 1 ? 0 : sizeof(param_n_t<VIndex>)
-            )...
-        }};
+        std::array<std::pair<unsigned char *&, const size_t>, NVectorized> vecparams{
+            {std::pair<unsigned char *&, const size_t>(
+                reinterpret_cast<unsigned char *&>(params[VIndex] = buffers[BIndex].ptr),
+                buffers[BIndex].size == 1 ? 0 : sizeof(param_n_t<VIndex>))...}};
 
         for (size_t i = 0; i < size; ++i) {
-            returned_array::call(out, i, f, *reinterpret_cast<param_n_t<Index> *>(params[Index])...);
-            for (auto &x : vecparams) x.first += x.second;
+            returned_array::call(
+                out, i, f, *reinterpret_cast<param_n_t<Index> *>(params[Index])...);
+            for (auto &x : vecparams) {
+                x.first += x.second;
+            }
         }
     }
 
@@ -1685,55 +1921,70 @@ private:
                          Return *out,
                          size_t size,
                          const std::vector<ssize_t> &output_shape,
-                         index_sequence<Index...>, index_sequence<VIndex...>, index_sequence<BIndex...>) {
+                         index_sequence<Index...>,
+                         index_sequence<VIndex...>,
+                         index_sequence<BIndex...>) {
 
         multi_array_iterator<NVectorized> input_iter(buffers, output_shape);
 
         for (size_t i = 0; i < size; ++i, ++input_iter) {
-            PYBIND11_EXPAND_SIDE_EFFECTS((
-                params[VIndex] = input_iter.template data<BIndex>()
-            ));
-            returned_array::call(out, i, f, *reinterpret_cast<param_n_t<Index> *>(std::get<Index>(params))...);
+            PYBIND11_EXPAND_SIDE_EFFECTS((params[VIndex] = input_iter.template data<BIndex>()));
+            returned_array::call(
+                out, i, f, *reinterpret_cast<param_n_t<Index> *>(std::get<Index>(params))...);
         }
     }
 };
 
 template <typename Func, typename Return, typename... Args>
-vectorize_helper<Func, Return, Args...>
-vectorize_extractor(const Func &f, Return (*) (Args ...)) {
+vectorize_helper<Func, Return, Args...> vectorize_extractor(const Func &f, Return (*)(Args...)) {
     return detail::vectorize_helper<Func, Return, Args...>(f);
 }
 
-template <typename T, int Flags> struct handle_type_name<array_t<T, Flags>> {
-    static constexpr auto name = _("numpy.ndarray[") + npy_format_descriptor<T>::name + _("]");
+template <typename T, int Flags>
+struct handle_type_name<array_t<T, Flags>> {
+    static constexpr auto name
+        = const_name("numpy.ndarray[") + npy_format_descriptor<T>::name + const_name("]");
 };
 
 PYBIND11_NAMESPACE_END(detail)
 
 // Vanilla pointer vectorizer:
 template <typename Return, typename... Args>
-detail::vectorize_helper<Return (*)(Args...), Return, Args...>
-vectorize(Return (*f) (Args ...)) {
+detail::vectorize_helper<Return (*)(Args...), Return, Args...> vectorize(Return (*f)(Args...)) {
     return detail::vectorize_helper<Return (*)(Args...), Return, Args...>(f);
 }
 
 // lambda vectorizer:
 template <typename Func, detail::enable_if_t<detail::is_lambda<Func>::value, int> = 0>
-auto vectorize(Func &&f) -> decltype(
-        detail::vectorize_extractor(std::forward<Func>(f), (detail::function_signature_t<Func> *) nullptr)) {
-    return detail::vectorize_extractor(std::forward<Func>(f), (detail::function_signature_t<Func> *) nullptr);
+auto vectorize(Func &&f)
+    -> decltype(detail::vectorize_extractor(std::forward<Func>(f),
+                                            (detail::function_signature_t<Func> *) nullptr)) {
+    return detail::vectorize_extractor(std::forward<Func>(f),
+                                       (detail::function_signature_t<Func> *) nullptr);
 }
 
 // Vectorize a class method (non-const):
-template <typename Return, typename Class, typename... Args,
-          typename Helper = detail::vectorize_helper<decltype(std::mem_fn(std::declval<Return (Class::*)(Args...)>())), Return, Class *, Args...>>
+template <typename Return,
+          typename Class,
+          typename... Args,
+          typename Helper = detail::vectorize_helper<
+              decltype(std::mem_fn(std::declval<Return (Class::*)(Args...)>())),
+              Return,
+              Class *,
+              Args...>>
 Helper vectorize(Return (Class::*f)(Args...)) {
     return Helper(std::mem_fn(f));
 }
 
 // Vectorize a class method (const):
-template <typename Return, typename Class, typename... Args,
-          typename Helper = detail::vectorize_helper<decltype(std::mem_fn(std::declval<Return (Class::*)(Args...) const>())), Return, const Class *, Args...>>
+template <typename Return,
+          typename Class,
+          typename... Args,
+          typename Helper = detail::vectorize_helper<
+              decltype(std::mem_fn(std::declval<Return (Class::*)(Args...) const>())),
+              Return,
+              const Class *,
+              Args...>>
 Helper vectorize(Return (Class::*f)(Args...) const) {
     return Helper(std::mem_fn(f));
 }
diff --git a/ext/pybind11/include/pybind11/operators.h b/ext/pybind11/include/pybind11/operators.h
index 2a61531589..16a88ae171 100644
--- a/ext/pybind11/include/pybind11/operators.h
+++ b/ext/pybind11/include/pybind11/operators.h
@@ -16,12 +16,50 @@ PYBIND11_NAMESPACE_BEGIN(detail)
 
 /// Enumeration with all supported operator types
 enum op_id : int {
-    op_add, op_sub, op_mul, op_div, op_mod, op_divmod, op_pow, op_lshift,
-    op_rshift, op_and, op_xor, op_or, op_neg, op_pos, op_abs, op_invert,
-    op_int, op_long, op_float, op_str, op_cmp, op_gt, op_ge, op_lt, op_le,
-    op_eq, op_ne, op_iadd, op_isub, op_imul, op_idiv, op_imod, op_ilshift,
-    op_irshift, op_iand, op_ixor, op_ior, op_complex, op_bool, op_nonzero,
-    op_repr, op_truediv, op_itruediv, op_hash
+    op_add,
+    op_sub,
+    op_mul,
+    op_div,
+    op_mod,
+    op_divmod,
+    op_pow,
+    op_lshift,
+    op_rshift,
+    op_and,
+    op_xor,
+    op_or,
+    op_neg,
+    op_pos,
+    op_abs,
+    op_invert,
+    op_int,
+    op_long,
+    op_float,
+    op_str,
+    op_cmp,
+    op_gt,
+    op_ge,
+    op_lt,
+    op_le,
+    op_eq,
+    op_ne,
+    op_iadd,
+    op_isub,
+    op_imul,
+    op_idiv,
+    op_imod,
+    op_ilshift,
+    op_irshift,
+    op_iand,
+    op_ixor,
+    op_ior,
+    op_complex,
+    op_bool,
+    op_nonzero,
+    op_repr,
+    op_truediv,
+    op_itruediv,
+    op_hash
 };
 
 enum op_type : int {
@@ -30,126 +68,127 @@ enum op_type : int {
     op_u  /* unary operator */
 };
 
-struct self_t { };
+struct self_t {};
 static const self_t self = self_t();
 
 /// Type for an unused type slot
-struct undefined_t { };
+struct undefined_t {};
 
 /// Don't warn about an unused variable
 inline self_t __self() { return self; }
 
 /// base template of operator implementations
-template <op_id, op_type, typename B, typename L, typename R> struct op_impl { };
+template <op_id, op_type, typename B, typename L, typename R>
+struct op_impl {};
 
 /// Operator implementation generator
-template <op_id id, op_type ot, typename L, typename R> struct op_ {
-    template <typename Class, typename... Extra> void execute(Class &cl, const Extra&... extra) const {
+template <op_id id, op_type ot, typename L, typename R>
+struct op_ {
+    static constexpr bool op_enable_if_hook = true;
+    template <typename Class, typename... Extra>
+    void execute(Class &cl, const Extra &...extra) const {
         using Base = typename Class::type;
         using L_type = conditional_t<std::is_same<L, self_t>::value, Base, L>;
         using R_type = conditional_t<std::is_same<R, self_t>::value, Base, R>;
         using op = op_impl<id, ot, Base, L_type, R_type>;
         cl.def(op::name(), &op::execute, is_operator(), extra...);
-        #if PY_MAJOR_VERSION < 3
-        if (PYBIND11_SILENCE_MSVC_C4127(id == op_truediv) ||
-            PYBIND11_SILENCE_MSVC_C4127(id == op_itruediv))
-            cl.def(id == op_itruediv ? "__idiv__" : ot == op_l ? "__div__" : "__rdiv__",
-                    &op::execute, is_operator(), extra...);
-        #endif
     }
-    template <typename Class, typename... Extra> void execute_cast(Class &cl, const Extra&... extra) const {
+    template <typename Class, typename... Extra>
+    void execute_cast(Class &cl, const Extra &...extra) const {
         using Base = typename Class::type;
         using L_type = conditional_t<std::is_same<L, self_t>::value, Base, L>;
         using R_type = conditional_t<std::is_same<R, self_t>::value, Base, R>;
         using op = op_impl<id, ot, Base, L_type, R_type>;
         cl.def(op::name(), &op::execute_cast, is_operator(), extra...);
-        #if PY_MAJOR_VERSION < 3
-        if (id == op_truediv || id == op_itruediv)
-            cl.def(id == op_itruediv ? "__idiv__" : ot == op_l ? "__div__" : "__rdiv__",
-                    &op::execute, is_operator(), extra...);
-        #endif
     }
 };
 
-#define PYBIND11_BINARY_OPERATOR(id, rid, op, expr)                                    \
-template <typename B, typename L, typename R> struct op_impl<op_##id, op_l, B, L, R> { \
-    static char const* name() { return "__" #id "__"; }                                \
-    static auto execute(const L &l, const R &r) -> decltype(expr) { return (expr); }   \
-    static B execute_cast(const L &l, const R &r) { return B(expr); }                  \
-};                                                                                     \
-template <typename B, typename L, typename R> struct op_impl<op_##id, op_r, B, L, R> { \
-    static char const* name() { return "__" #rid "__"; }                               \
-    static auto execute(const R &r, const L &l) -> decltype(expr) { return (expr); }   \
-    static B execute_cast(const R &r, const L &l) { return B(expr); }                  \
-};                                                                                     \
-inline op_<op_##id, op_l, self_t, self_t> op(const self_t &, const self_t &) {         \
-    return op_<op_##id, op_l, self_t, self_t>();                                       \
-}                                                                                      \
-template <typename T> op_<op_##id, op_l, self_t, T> op(const self_t &, const T &) {    \
-    return op_<op_##id, op_l, self_t, T>();                                            \
-}                                                                                      \
-template <typename T> op_<op_##id, op_r, T, self_t> op(const T &, const self_t &) {    \
-    return op_<op_##id, op_r, T, self_t>();                                            \
-}
+#define PYBIND11_BINARY_OPERATOR(id, rid, op, expr)                                               \
+    template <typename B, typename L, typename R>                                                 \
+    struct op_impl<op_##id, op_l, B, L, R> {                                                      \
+        static char const *name() { return "__" #id "__"; }                                       \
+        static auto execute(const L &l, const R &r) -> decltype(expr) { return (expr); }          \
+        static B execute_cast(const L &l, const R &r) { return B(expr); }                         \
+    };                                                                                            \
+    template <typename B, typename L, typename R>                                                 \
+    struct op_impl<op_##id, op_r, B, L, R> {                                                      \
+        static char const *name() { return "__" #rid "__"; }                                      \
+        static auto execute(const R &r, const L &l) -> decltype(expr) { return (expr); }          \
+        static B execute_cast(const R &r, const L &l) { return B(expr); }                         \
+    };                                                                                            \
+    inline op_<op_##id, op_l, self_t, self_t> op(const self_t &, const self_t &) {                \
+        return op_<op_##id, op_l, self_t, self_t>();                                              \
+    }                                                                                             \
+    template <typename T>                                                                         \
+    op_<op_##id, op_l, self_t, T> op(const self_t &, const T &) {                                 \
+        return op_<op_##id, op_l, self_t, T>();                                                   \
+    }                                                                                             \
+    template <typename T>                                                                         \
+    op_<op_##id, op_r, T, self_t> op(const T &, const self_t &) {                                 \
+        return op_<op_##id, op_r, T, self_t>();                                                   \
+    }
 
-#define PYBIND11_INPLACE_OPERATOR(id, op, expr)                                        \
-template <typename B, typename L, typename R> struct op_impl<op_##id, op_l, B, L, R> { \
-    static char const* name() { return "__" #id "__"; }                                \
-    static auto execute(L &l, const R &r) -> decltype(expr) { return expr; }           \
-    static B execute_cast(L &l, const R &r) { return B(expr); }                        \
-};                                                                                     \
-template <typename T> op_<op_##id, op_l, self_t, T> op(const self_t &, const T &) {    \
-    return op_<op_##id, op_l, self_t, T>();                                            \
-}
+#define PYBIND11_INPLACE_OPERATOR(id, op, expr)                                                   \
+    template <typename B, typename L, typename R>                                                 \
+    struct op_impl<op_##id, op_l, B, L, R> {                                                      \
+        static char const *name() { return "__" #id "__"; }                                       \
+        static auto execute(L &l, const R &r) -> decltype(expr) { return expr; }                  \
+        static B execute_cast(L &l, const R &r) { return B(expr); }                               \
+    };                                                                                            \
+    template <typename T>                                                                         \
+    op_<op_##id, op_l, self_t, T> op(const self_t &, const T &) {                                 \
+        return op_<op_##id, op_l, self_t, T>();                                                   \
+    }
 
-#define PYBIND11_UNARY_OPERATOR(id, op, expr)                                          \
-template <typename B, typename L> struct op_impl<op_##id, op_u, B, L, undefined_t> {   \
-    static char const* name() { return "__" #id "__"; }                                \
-    static auto execute(const L &l) -> decltype(expr) { return expr; }                 \
-    static B execute_cast(const L &l) { return B(expr); }                              \
-};                                                                                     \
-inline op_<op_##id, op_u, self_t, undefined_t> op(const self_t &) {                    \
-    return op_<op_##id, op_u, self_t, undefined_t>();                                  \
-}
+#define PYBIND11_UNARY_OPERATOR(id, op, expr)                                                     \
+    template <typename B, typename L>                                                             \
+    struct op_impl<op_##id, op_u, B, L, undefined_t> {                                            \
+        static char const *name() { return "__" #id "__"; }                                       \
+        static auto execute(const L &l) -> decltype(expr) { return expr; }                        \
+        static B execute_cast(const L &l) { return B(expr); }                                     \
+    };                                                                                            \
+    inline op_<op_##id, op_u, self_t, undefined_t> op(const self_t &) {                           \
+        return op_<op_##id, op_u, self_t, undefined_t>();                                         \
+    }
 
-PYBIND11_BINARY_OPERATOR(sub,       rsub,         operator-,    l - r)
-PYBIND11_BINARY_OPERATOR(add,       radd,         operator+,    l + r)
-PYBIND11_BINARY_OPERATOR(mul,       rmul,         operator*,    l * r)
-PYBIND11_BINARY_OPERATOR(truediv,   rtruediv,     operator/,    l / r)
-PYBIND11_BINARY_OPERATOR(mod,       rmod,         operator%,    l % r)
-PYBIND11_BINARY_OPERATOR(lshift,    rlshift,      operator<<,   l << r)
-PYBIND11_BINARY_OPERATOR(rshift,    rrshift,      operator>>,   l >> r)
-PYBIND11_BINARY_OPERATOR(and,       rand,         operator&,    l & r)
-PYBIND11_BINARY_OPERATOR(xor,       rxor,         operator^,    l ^ r)
-PYBIND11_BINARY_OPERATOR(eq,        eq,           operator==,   l == r)
-PYBIND11_BINARY_OPERATOR(ne,        ne,           operator!=,   l != r)
-PYBIND11_BINARY_OPERATOR(or,        ror,          operator|,    l | r)
-PYBIND11_BINARY_OPERATOR(gt,        lt,           operator>,    l > r)
-PYBIND11_BINARY_OPERATOR(ge,        le,           operator>=,   l >= r)
-PYBIND11_BINARY_OPERATOR(lt,        gt,           operator<,    l < r)
-PYBIND11_BINARY_OPERATOR(le,        ge,           operator<=,   l <= r)
-//PYBIND11_BINARY_OPERATOR(pow,       rpow,         pow,          std::pow(l,  r))
-PYBIND11_INPLACE_OPERATOR(iadd,     operator+=,   l += r)
-PYBIND11_INPLACE_OPERATOR(isub,     operator-=,   l -= r)
-PYBIND11_INPLACE_OPERATOR(imul,     operator*=,   l *= r)
-PYBIND11_INPLACE_OPERATOR(itruediv, operator/=,   l /= r)
-PYBIND11_INPLACE_OPERATOR(imod,     operator%=,   l %= r)
-PYBIND11_INPLACE_OPERATOR(ilshift,  operator<<=,  l <<= r)
-PYBIND11_INPLACE_OPERATOR(irshift,  operator>>=,  l >>= r)
-PYBIND11_INPLACE_OPERATOR(iand,     operator&=,   l &= r)
-PYBIND11_INPLACE_OPERATOR(ixor,     operator^=,   l ^= r)
-PYBIND11_INPLACE_OPERATOR(ior,      operator|=,   l |= r)
-PYBIND11_UNARY_OPERATOR(neg,        operator-,    -l)
-PYBIND11_UNARY_OPERATOR(pos,        operator+,    +l)
+PYBIND11_BINARY_OPERATOR(sub, rsub, operator-, l - r)
+PYBIND11_BINARY_OPERATOR(add, radd, operator+, l + r)
+PYBIND11_BINARY_OPERATOR(mul, rmul, operator*, l *r)
+PYBIND11_BINARY_OPERATOR(truediv, rtruediv, operator/, l / r)
+PYBIND11_BINARY_OPERATOR(mod, rmod, operator%, l % r)
+PYBIND11_BINARY_OPERATOR(lshift, rlshift, operator<<, l << r)
+PYBIND11_BINARY_OPERATOR(rshift, rrshift, operator>>, l >> r)
+PYBIND11_BINARY_OPERATOR(and, rand, operator&, l &r)
+PYBIND11_BINARY_OPERATOR(xor, rxor, operator^, l ^ r)
+PYBIND11_BINARY_OPERATOR(eq, eq, operator==, l == r)
+PYBIND11_BINARY_OPERATOR(ne, ne, operator!=, l != r)
+PYBIND11_BINARY_OPERATOR(or, ror, operator|, l | r)
+PYBIND11_BINARY_OPERATOR(gt, lt, operator>, l > r)
+PYBIND11_BINARY_OPERATOR(ge, le, operator>=, l >= r)
+PYBIND11_BINARY_OPERATOR(lt, gt, operator<, l < r)
+PYBIND11_BINARY_OPERATOR(le, ge, operator<=, l <= r)
+// PYBIND11_BINARY_OPERATOR(pow,       rpow,         pow,          std::pow(l,  r))
+PYBIND11_INPLACE_OPERATOR(iadd, operator+=, l += r)
+PYBIND11_INPLACE_OPERATOR(isub, operator-=, l -= r)
+PYBIND11_INPLACE_OPERATOR(imul, operator*=, l *= r)
+PYBIND11_INPLACE_OPERATOR(itruediv, operator/=, l /= r)
+PYBIND11_INPLACE_OPERATOR(imod, operator%=, l %= r)
+PYBIND11_INPLACE_OPERATOR(ilshift, operator<<=, l <<= r)
+PYBIND11_INPLACE_OPERATOR(irshift, operator>>=, l >>= r)
+PYBIND11_INPLACE_OPERATOR(iand, operator&=, l &= r)
+PYBIND11_INPLACE_OPERATOR(ixor, operator^=, l ^= r)
+PYBIND11_INPLACE_OPERATOR(ior, operator|=, l |= r)
+PYBIND11_UNARY_OPERATOR(neg, operator-, -l)
+PYBIND11_UNARY_OPERATOR(pos, operator+, +l)
 // WARNING: This usage of `abs` should only be done for existing STL overloads.
 // Adding overloads directly in to the `std::` namespace is advised against:
 // https://en.cppreference.com/w/cpp/language/extending_std
-PYBIND11_UNARY_OPERATOR(abs,        abs,          std::abs(l))
-PYBIND11_UNARY_OPERATOR(hash,       hash,         std::hash<L>()(l))
-PYBIND11_UNARY_OPERATOR(invert,     operator~,    (~l))
-PYBIND11_UNARY_OPERATOR(bool,       operator!,    !!l)
-PYBIND11_UNARY_OPERATOR(int,        int_,         (int) l)
-PYBIND11_UNARY_OPERATOR(float,      float_,       (double) l)
+PYBIND11_UNARY_OPERATOR(abs, abs, std::abs(l))
+PYBIND11_UNARY_OPERATOR(hash, hash, std::hash<L>()(l))
+PYBIND11_UNARY_OPERATOR(invert, operator~, (~l))
+PYBIND11_UNARY_OPERATOR(bool, operator!, !!l)
+PYBIND11_UNARY_OPERATOR(int, int_, (int) l)
+PYBIND11_UNARY_OPERATOR(float, float_, (double) l)
 
 #undef PYBIND11_BINARY_OPERATOR
 #undef PYBIND11_INPLACE_OPERATOR
diff --git a/ext/pybind11/include/pybind11/options.h b/ext/pybind11/include/pybind11/options.h
index d74db1c68d..1b2122522d 100644
--- a/ext/pybind11/include/pybind11/options.h
+++ b/ext/pybind11/include/pybind11/options.h
@@ -15,43 +15,70 @@ PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
 
 class options {
 public:
-
     // Default RAII constructor, which leaves settings as they currently are.
     options() : previous_state(global_state()) {}
 
     // Class is non-copyable.
-    options(const options&) = delete;
-    options& operator=(const options&) = delete;
+    options(const options &) = delete;
+    options &operator=(const options &) = delete;
 
     // Destructor, which restores settings that were in effect before.
-    ~options() {
-        global_state() = previous_state;
-    }
+    ~options() { global_state() = previous_state; }
 
     // Setter methods (affect the global state):
 
-    options& disable_user_defined_docstrings() & { global_state().show_user_defined_docstrings = false; return *this; }
+    options &disable_user_defined_docstrings() & {
+        global_state().show_user_defined_docstrings = false;
+        return *this;
+    }
 
-    options& enable_user_defined_docstrings() & { global_state().show_user_defined_docstrings = true; return *this; }
+    options &enable_user_defined_docstrings() & {
+        global_state().show_user_defined_docstrings = true;
+        return *this;
+    }
 
-    options& disable_function_signatures() & { global_state().show_function_signatures = false; return *this; }
+    options &disable_function_signatures() & {
+        global_state().show_function_signatures = false;
+        return *this;
+    }
 
-    options& enable_function_signatures() & { global_state().show_function_signatures = true; return *this; }
+    options &enable_function_signatures() & {
+        global_state().show_function_signatures = true;
+        return *this;
+    }
+
+    options &disable_enum_members_docstring() & {
+        global_state().show_enum_members_docstring = false;
+        return *this;
+    }
+
+    options &enable_enum_members_docstring() & {
+        global_state().show_enum_members_docstring = true;
+        return *this;
+    }
 
     // Getter methods (return the global state):
 
-    static bool show_user_defined_docstrings() { return global_state().show_user_defined_docstrings; }
+    static bool show_user_defined_docstrings() {
+        return global_state().show_user_defined_docstrings;
+    }
 
     static bool show_function_signatures() { return global_state().show_function_signatures; }
 
+    static bool show_enum_members_docstring() {
+        return global_state().show_enum_members_docstring;
+    }
+
     // This type is not meant to be allocated on the heap.
-    void* operator new(size_t) = delete;
+    void *operator new(size_t) = delete;
 
 private:
-
     struct state {
-        bool show_user_defined_docstrings = true;  //< Include user-supplied texts in docstrings.
-        bool show_function_signatures = true;      //< Include auto-generated function signatures in docstrings.
+        bool show_user_defined_docstrings = true; //< Include user-supplied texts in docstrings.
+        bool show_function_signatures = true;     //< Include auto-generated function signatures
+                                                  //  in docstrings.
+        bool show_enum_members_docstring = true;  //< Include auto-generated member list in enum
+                                                  //  docstrings.
     };
 
     static state &global_state() {
diff --git a/ext/pybind11/include/pybind11/pybind11.h b/ext/pybind11/include/pybind11/pybind11.h
index bfc1c368c0..6205effd61 100644
--- a/ext/pybind11/include/pybind11/pybind11.h
+++ b/ext/pybind11/include/pybind11/pybind11.h
@@ -10,32 +10,33 @@
 
 #pragma once
 
+#include "detail/class.h"
+#include "detail/init.h"
 #include "attr.h"
 #include "gil.h"
 #include "options.h"
-#include "detail/class.h"
-#include "detail/init.h"
 
 #include <cstdlib>
+#include <cstring>
 #include <memory>
 #include <new>
-#include <vector>
 #include <string>
 #include <utility>
-
-#include <string.h>
+#include <vector>
 
 #if defined(__cpp_lib_launder) && !(defined(_MSC_VER) && (_MSC_VER < 1914))
-#  define PYBIND11_STD_LAUNDER std::launder
-#  define PYBIND11_HAS_STD_LAUNDER 1
+#    define PYBIND11_STD_LAUNDER std::launder
+#    define PYBIND11_HAS_STD_LAUNDER 1
 #else
-#  define PYBIND11_STD_LAUNDER
-#  define PYBIND11_HAS_STD_LAUNDER 0
+#    define PYBIND11_STD_LAUNDER
+#    define PYBIND11_HAS_STD_LAUNDER 0
 #endif
 #if defined(__GNUG__) && !defined(__clang__)
-#  include <cxxabi.h>
+#    include <cxxabi.h>
 #endif
 
+PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
+
 /* https://stackoverflow.com/questions/46798456/handling-gccs-noexcept-type-warning
    This warning is about ABI compatibility, not code health.
    It is only actually needed in a couple places, but apparently GCC 7 "generates this warning if
@@ -44,11 +45,10 @@
    No other GCC version generates this warning.
  */
 #if defined(__GNUC__) && __GNUC__ == 7
-#    pragma GCC diagnostic push
-#    pragma GCC diagnostic ignored "-Wnoexcept-type"
+PYBIND11_WARNING_DISABLE_GCC("-Wnoexcept-type")
 #endif
 
-PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
+PYBIND11_WARNING_DISABLE_MSVC(4127)
 
 PYBIND11_NAMESPACE_BEGIN(detail)
 
@@ -56,7 +56,7 @@ PYBIND11_NAMESPACE_BEGIN(detail)
 // Return true if one of the translators completed without raising an exception
 // itself. Return of false indicates that if there are other translators
 // available, they should be tried.
-inline bool apply_exception_translators(std::forward_list<ExceptionTranslator>& translators) {
+inline bool apply_exception_translators(std::forward_list<ExceptionTranslator> &translators) {
     auto last_exception = std::current_exception();
 
     for (auto &translator : translators) {
@@ -83,30 +83,33 @@ class cpp_function : public function {
 public:
     cpp_function() = default;
     // NOLINTNEXTLINE(google-explicit-constructor)
-    cpp_function(std::nullptr_t) { }
+    cpp_function(std::nullptr_t) {}
 
     /// Construct a cpp_function from a vanilla function pointer
     template <typename Return, typename... Args, typename... Extra>
     // NOLINTNEXTLINE(google-explicit-constructor)
-    cpp_function(Return (*f)(Args...), const Extra&... extra) {
+    cpp_function(Return (*f)(Args...), const Extra &...extra) {
         initialize(f, f, extra...);
     }
 
     /// Construct a cpp_function from a lambda function (possibly with internal state)
-    template <typename Func, typename... Extra,
+    template <typename Func,
+              typename... Extra,
               typename = detail::enable_if_t<detail::is_lambda<Func>::value>>
     // NOLINTNEXTLINE(google-explicit-constructor)
-    cpp_function(Func &&f, const Extra&... extra) {
-        initialize(std::forward<Func>(f),
-                   (detail::function_signature_t<Func> *) nullptr, extra...);
+    cpp_function(Func &&f, const Extra &...extra) {
+        initialize(
+            std::forward<Func>(f), (detail::function_signature_t<Func> *) nullptr, extra...);
     }
 
     /// Construct a cpp_function from a class method (non-const, no ref-qualifier)
     template <typename Return, typename Class, typename... Arg, typename... Extra>
     // NOLINTNEXTLINE(google-explicit-constructor)
-    cpp_function(Return (Class::*f)(Arg...), const Extra&... extra) {
-        initialize([f](Class *c, Arg... args) -> Return { return (c->*f)(std::forward<Arg>(args)...); },
-                   (Return (*) (Class *, Arg...)) nullptr, extra...);
+    cpp_function(Return (Class::*f)(Arg...), const Extra &...extra) {
+        initialize(
+            [f](Class *c, Arg... args) -> Return { return (c->*f)(std::forward<Arg>(args)...); },
+            (Return(*)(Class *, Arg...)) nullptr,
+            extra...);
     }
 
     /// Construct a cpp_function from a class method (non-const, lvalue ref-qualifier)
@@ -114,17 +117,21 @@ public:
     /// but with an added `&`.
     template <typename Return, typename Class, typename... Arg, typename... Extra>
     // NOLINTNEXTLINE(google-explicit-constructor)
-    cpp_function(Return (Class::*f)(Arg...)&, const Extra&... extra) {
-        initialize([f](Class *c, Arg... args) -> Return { return (c->*f)(args...); },
-                   (Return (*) (Class *, Arg...)) nullptr, extra...);
+    cpp_function(Return (Class::*f)(Arg...) &, const Extra &...extra) {
+        initialize(
+            [f](Class *c, Arg... args) -> Return { return (c->*f)(std::forward<Arg>(args)...); },
+            (Return(*)(Class *, Arg...)) nullptr,
+            extra...);
     }
 
     /// Construct a cpp_function from a class method (const, no ref-qualifier)
     template <typename Return, typename Class, typename... Arg, typename... Extra>
     // NOLINTNEXTLINE(google-explicit-constructor)
-    cpp_function(Return (Class::*f)(Arg...) const, const Extra&... extra) {
-        initialize([f](const Class *c, Arg... args) -> Return { return (c->*f)(std::forward<Arg>(args)...); },
-                   (Return (*)(const Class *, Arg ...)) nullptr, extra...);
+    cpp_function(Return (Class::*f)(Arg...) const, const Extra &...extra) {
+        initialize([f](const Class *c,
+                       Arg... args) -> Return { return (c->*f)(std::forward<Arg>(args)...); },
+                   (Return(*)(const Class *, Arg...)) nullptr,
+                   extra...);
     }
 
     /// Construct a cpp_function from a class method (const, lvalue ref-qualifier)
@@ -132,9 +139,11 @@ public:
     /// but with an added `&`.
     template <typename Return, typename Class, typename... Arg, typename... Extra>
     // NOLINTNEXTLINE(google-explicit-constructor)
-    cpp_function(Return (Class::*f)(Arg...) const&, const Extra&... extra) {
-        initialize([f](const Class *c, Arg... args) -> Return { return (c->*f)(args...); },
-                   (Return (*)(const Class *, Arg ...)) nullptr, extra...);
+    cpp_function(Return (Class::*f)(Arg...) const &, const Extra &...extra) {
+        initialize([f](const Class *c,
+                       Arg... args) -> Return { return (c->*f)(std::forward<Arg>(args)...); },
+                   (Return(*)(const Class *, Arg...)) nullptr,
+                   extra...);
     }
 
     /// Return the function name
@@ -144,9 +153,10 @@ protected:
     struct InitializingFunctionRecordDeleter {
         // `destruct(function_record, false)`: `initialize_generic` copies strings and
         // takes care of cleaning up in case of exceptions. So pass `false` to `free_strings`.
-        void operator()(detail::function_record * rec) { destruct(rec, false); }
+        void operator()(detail::function_record *rec) { destruct(rec, false); }
     };
-    using unique_function_record = std::unique_ptr<detail::function_record, InitializingFunctionRecordDeleter>;
+    using unique_function_record
+        = std::unique_ptr<detail::function_record, InitializingFunctionRecordDeleter>;
 
     /// Space optimization: don't inline this frequently instantiated fragment
     PYBIND11_NOINLINE unique_function_record make_function_record() {
@@ -155,82 +165,89 @@ protected:
 
     /// Special internal constructor for functors, lambda functions, etc.
     template <typename Func, typename Return, typename... Args, typename... Extra>
-    void initialize(Func &&f, Return (*)(Args...), const Extra&... extra) {
+    void initialize(Func &&f, Return (*)(Args...), const Extra &...extra) {
         using namespace detail;
-        struct capture { remove_reference_t<Func> f; };
+        struct capture {
+            remove_reference_t<Func> f;
+        };
 
-        /* Store the function including any extra state it might have (e.g. a lambda capture object) */
+        /* Store the function including any extra state it might have (e.g. a lambda capture
+         * object) */
         // The unique_ptr makes sure nothing is leaked in case of an exception.
         auto unique_rec = make_function_record();
-        auto rec = unique_rec.get();
+        auto *rec = unique_rec.get();
 
         /* Store the capture object directly in the function record if there is enough space */
-        if (PYBIND11_SILENCE_MSVC_C4127(sizeof(capture) <= sizeof(rec->data))) {
+        if (sizeof(capture) <= sizeof(rec->data)) {
             /* Without these pragmas, GCC warns that there might not be
                enough space to use the placement new operator. However, the
                'if' statement above ensures that this is the case. */
-#if defined(__GNUG__) && __GNUC__ >= 6 && !defined(__clang__) && !defined(__INTEL_COMPILER)
-#  pragma GCC diagnostic push
-#  pragma GCC diagnostic ignored "-Wplacement-new"
+            PYBIND11_WARNING_PUSH
+
+#if defined(__GNUG__) && __GNUC__ >= 6
+            PYBIND11_WARNING_DISABLE_GCC("-Wplacement-new")
 #endif
-            new ((capture *) &rec->data) capture { std::forward<Func>(f) };
-#if defined(__GNUG__) && __GNUC__ >= 6 && !defined(__clang__) && !defined(__INTEL_COMPILER)
-#  pragma GCC diagnostic pop
-#endif
-#if defined(__GNUG__) && !PYBIND11_HAS_STD_LAUNDER && !defined(__INTEL_COMPILER)
-#  pragma GCC diagnostic push
-#  pragma GCC diagnostic ignored "-Wstrict-aliasing"
+
+            new ((capture *) &rec->data) capture{std::forward<Func>(f)};
+
+#if !PYBIND11_HAS_STD_LAUNDER
+            PYBIND11_WARNING_DISABLE_GCC("-Wstrict-aliasing")
 #endif
+
             // UB without std::launder, but without breaking ABI and/or
             // a significant refactoring it's "impossible" to solve.
-            if (!std::is_trivially_destructible<capture>::value)
+            if (!std::is_trivially_destructible<capture>::value) {
                 rec->free_data = [](function_record *r) {
                     auto data = PYBIND11_STD_LAUNDER((capture *) &r->data);
                     (void) data;
                     data->~capture();
                 };
-#if defined(__GNUG__) && !PYBIND11_HAS_STD_LAUNDER && !defined(__INTEL_COMPILER)
-#  pragma GCC diagnostic pop
-#endif
+            }
+            PYBIND11_WARNING_POP
         } else {
-            rec->data[0] = new capture { std::forward<Func>(f) };
+            rec->data[0] = new capture{std::forward<Func>(f)};
             rec->free_data = [](function_record *r) { delete ((capture *) r->data[0]); };
         }
 
         /* Type casters for the function arguments and return value */
         using cast_in = argument_loader<Args...>;
-        using cast_out = make_caster<
-            conditional_t<std::is_void<Return>::value, void_type, Return>
-        >;
+        using cast_out
+            = make_caster<conditional_t<std::is_void<Return>::value, void_type, Return>>;
 
-        static_assert(expected_num_args<Extra...>(sizeof...(Args), cast_in::has_args, cast_in::has_kwargs),
-                      "The number of argument annotations does not match the number of function arguments");
+        static_assert(
+            expected_num_args<Extra...>(
+                sizeof...(Args), cast_in::args_pos >= 0, cast_in::has_kwargs),
+            "The number of argument annotations does not match the number of function arguments");
 
         /* Dispatch code which converts function arguments and performs the actual function call */
         rec->impl = [](function_call &call) -> handle {
             cast_in args_converter;
 
             /* Try to cast the function arguments into the C++ domain */
-            if (!args_converter.load_args(call))
+            if (!args_converter.load_args(call)) {
                 return PYBIND11_TRY_NEXT_OVERLOAD;
+            }
 
             /* Invoke call policy pre-call hook */
             process_attributes<Extra...>::precall(call);
 
             /* Get a pointer to the capture object */
-            auto data = (sizeof(capture) <= sizeof(call.func.data)
-                         ? &call.func.data : call.func.data[0]);
+            const auto *data = (sizeof(capture) <= sizeof(call.func.data) ? &call.func.data
+                                                                          : call.func.data[0]);
             auto *cap = const_cast<capture *>(reinterpret_cast<const capture *>(data));
 
             /* Override policy for rvalues -- usually to enforce rvp::move on an rvalue */
-            return_value_policy policy = return_value_policy_override<Return>::policy(call.func.policy);
+            return_value_policy policy
+                = return_value_policy_override<Return>::policy(call.func.policy);
 
             /* Function scope guard -- defaults to the compile-to-nothing `void_type` */
             using Guard = extract_guard_t<Extra...>;
 
             /* Perform the function call */
-            handle result = cast_out::cast(
-                std::move(args_converter).template call<Return, Guard>(cap->f), policy, call.parent);
+            handle result
+                = cast_out::cast(std::move(args_converter).template call<Return, Guard>(cap->f),
+                                 policy,
+                                 call.parent);
 
             /* Invoke call policy post-call hook */
             process_attributes<Extra...>::postcall(call, result);
@@ -238,119 +255,156 @@ protected:
             return result;
         };
 
+        rec->nargs_pos = cast_in::args_pos >= 0
+                             ? static_cast<std::uint16_t>(cast_in::args_pos)
+                             : sizeof...(Args) - cast_in::has_kwargs; // Will get reduced more if
+                                                                      // we have a kw_only
+        rec->has_args = cast_in::args_pos >= 0;
+        rec->has_kwargs = cast_in::has_kwargs;
+
         /* Process any user-provided function attributes */
         process_attributes<Extra...>::init(extra..., rec);
 
         {
             constexpr bool has_kw_only_args = any_of<std::is_same<kw_only, Extra>...>::value,
                            has_pos_only_args = any_of<std::is_same<pos_only, Extra>...>::value,
-                           has_args = any_of<std::is_same<args, Args>...>::value,
                            has_arg_annotations = any_of<is_keyword<Extra>...>::value;
-            static_assert(has_arg_annotations || !has_kw_only_args, "py::kw_only requires the use of argument annotations");
-            static_assert(has_arg_annotations || !has_pos_only_args, "py::pos_only requires the use of argument annotations (for docstrings and aligning the annotations to the argument)");
-            static_assert(!(has_args && has_kw_only_args), "py::kw_only cannot be combined with a py::args argument");
+            static_assert(has_arg_annotations || !has_kw_only_args,
+                          "py::kw_only requires the use of argument annotations");
+            static_assert(has_arg_annotations || !has_pos_only_args,
+                          "py::pos_only requires the use of argument annotations (for docstrings "
+                          "and aligning the annotations to the argument)");
+
+            static_assert(constexpr_sum(is_kw_only<Extra>::value...) <= 1,
+                          "py::kw_only may be specified only once");
+            static_assert(constexpr_sum(is_pos_only<Extra>::value...) <= 1,
+                          "py::pos_only may be specified only once");
+            constexpr auto kw_only_pos = constexpr_first<is_kw_only, Extra...>();
+            constexpr auto pos_only_pos = constexpr_first<is_pos_only, Extra...>();
+            static_assert(!(has_kw_only_args && has_pos_only_args) || pos_only_pos < kw_only_pos,
+                          "py::pos_only must come before py::kw_only");
         }
 
-        /* Generate a readable signature describing the function's arguments and return value types */
-        static constexpr auto signature = _("(") + cast_in::arg_names + _(") -> ") + cast_out::name;
+        /* Generate a readable signature describing the function's arguments and return
+           value types */
+        static constexpr auto signature
+            = const_name("(") + cast_in::arg_names + const_name(") -> ") + cast_out::name;
         PYBIND11_DESCR_CONSTEXPR auto types = decltype(signature)::types();
 
         /* Register the function with Python from generic (non-templated) code */
         // Pass on the ownership over the `unique_rec` to `initialize_generic`. `rec` stays valid.
         initialize_generic(std::move(unique_rec), signature.text, types.data(), sizeof...(Args));
 
-        if (cast_in::has_args) rec->has_args = true;
-        if (cast_in::has_kwargs) rec->has_kwargs = true;
-
         /* Stash some additional information used by an important optimization in 'functional.h' */
         using FunctionType = Return (*)(Args...);
-        constexpr bool is_function_ptr =
-            std::is_convertible<Func, FunctionType>::value &&
-            sizeof(capture) == sizeof(void *);
+        constexpr bool is_function_ptr
+            = std::is_convertible<Func, FunctionType>::value && sizeof(capture) == sizeof(void *);
         if (is_function_ptr) {
             rec->is_stateless = true;
-            rec->data[1] = const_cast<void *>(reinterpret_cast<const void *>(&typeid(FunctionType)));
+            rec->data[1]
+                = const_cast<void *>(reinterpret_cast<const void *>(&typeid(FunctionType)));
         }
     }
 
-    // Utility class that keeps track of all duplicated strings, and cleans them up in its destructor,
-    // unless they are released. Basically a RAII-solution to deal with exceptions along the way.
+    // Utility class that keeps track of all duplicated strings, and cleans them up in its
+    // destructor, unless they are released. Basically a RAII-solution to deal with exceptions
+    // along the way.
     class strdup_guard {
     public:
+        strdup_guard() = default;
+        strdup_guard(const strdup_guard &) = delete;
+        strdup_guard &operator=(const strdup_guard &) = delete;
+
         ~strdup_guard() {
-            for (auto s : strings)
+            for (auto *s : strings) {
                 std::free(s);
+            }
         }
         char *operator()(const char *s) {
-            auto t = PYBIND11_COMPAT_STRDUP(s);
+            auto *t = PYBIND11_COMPAT_STRDUP(s);
             strings.push_back(t);
             return t;
         }
-        void release() {
-            strings.clear();
-        }
+        void release() { strings.clear(); }
+
     private:
         std::vector<char *> strings;
     };
 
     /// Register a function call with Python (generic non-templated code goes here)
-    void initialize_generic(unique_function_record &&unique_rec, const char *text,
-                            const std::type_info *const *types, size_t args) {
+    void initialize_generic(unique_function_record &&unique_rec,
+                            const char *text,
+                            const std::type_info *const *types,
+                            size_t args) {
         // Do NOT receive `unique_rec` by value. If this function fails to move out the unique_ptr,
-        // we do not want this to destuct the pointer. `initialize` (the caller) still relies on the
-        // pointee being alive after this call. Only move out if a `capsule` is going to keep it alive.
-        auto rec = unique_rec.get();
+        // we do not want this to destruct the pointer. `initialize` (the caller) still relies on
+        // the pointee being alive after this call. Only move out if a `capsule` is going to keep
+        // it alive.
+        auto *rec = unique_rec.get();
 
         // Keep track of strdup'ed strings, and clean them up as long as the function's capsule
         // has not taken ownership yet (when `unique_rec.release()` is called).
-        // Note: This cannot easily be fixed by a `unique_ptr` with custom deleter, because the strings
-        // are only referenced before strdup'ing. So only *after* the following block could `destruct`
-        // safely be called, but even then, `repr` could still throw in the middle of copying all strings.
+        // Note: This cannot easily be fixed by a `unique_ptr` with custom deleter, because the
+        // strings are only referenced before strdup'ing. So only *after* the following block could
+        // `destruct` safely be called, but even then, `repr` could still throw in the middle of
+        // copying all strings.
         strdup_guard guarded_strdup;
 
         /* Create copies of all referenced C-style strings */
         rec->name = guarded_strdup(rec->name ? rec->name : "");
-        if (rec->doc) rec->doc = guarded_strdup(rec->doc);
-        for (auto &a: rec->args) {
-            if (a.name)
+        if (rec->doc) {
+            rec->doc = guarded_strdup(rec->doc);
+        }
+        for (auto &a : rec->args) {
+            if (a.name) {
                 a.name = guarded_strdup(a.name);
-            if (a.descr)
+            }
+            if (a.descr) {
                 a.descr = guarded_strdup(a.descr);
-            else if (a.value)
+            } else if (a.value) {
                 a.descr = guarded_strdup(repr(a.value).cast<std::string>().c_str());
+            }
         }
 
-        rec->is_constructor
-            = (strcmp(rec->name, "__init__") == 0) || (strcmp(rec->name, "__setstate__") == 0);
+        rec->is_constructor = (std::strcmp(rec->name, "__init__") == 0)
+                              || (std::strcmp(rec->name, "__setstate__") == 0);
 
-#if !defined(NDEBUG) && !defined(PYBIND11_DISABLE_NEW_STYLE_INIT_WARNING)
+#if defined(PYBIND11_DETAILED_ERROR_MESSAGES) && !defined(PYBIND11_DISABLE_NEW_STYLE_INIT_WARNING)
         if (rec->is_constructor && !rec->is_new_style_constructor) {
-            const auto class_name = detail::get_fully_qualified_tp_name((PyTypeObject *) rec->scope.ptr());
+            const auto class_name
+                = detail::get_fully_qualified_tp_name((PyTypeObject *) rec->scope.ptr());
             const auto func_name = std::string(rec->name);
-            PyErr_WarnEx(
-                PyExc_FutureWarning,
-                ("pybind11-bound class '" + class_name + "' is using an old-style "
-                 "placement-new '" + func_name + "' which has been deprecated. See "
-                 "the upgrade guide in pybind11's docs. This message is only visible "
-                 "when compiled in debug mode.").c_str(), 0
-            );
+            PyErr_WarnEx(PyExc_FutureWarning,
+                         ("pybind11-bound class '" + class_name
+                          + "' is using an old-style "
+                            "placement-new '"
+                          + func_name
+                          + "' which has been deprecated. See "
+                            "the upgrade guide in pybind11's docs. This message is only visible "
+                            "when compiled in debug mode.")
+                             .c_str(),
+                         0);
         }
 #endif
 
         /* Generate a proper function signature */
         std::string signature;
         size_t type_index = 0, arg_index = 0;
-        for (auto *pc = text; *pc != '\0'; ++pc) {
+        bool is_starred = false;
+        for (const auto *pc = text; *pc != '\0'; ++pc) {
             const auto c = *pc;
 
             if (c == '{') {
                 // Write arg name for everything except *args and **kwargs.
-                if (*(pc + 1) == '*')
+                is_starred = *(pc + 1) == '*';
+                if (is_starred) {
                     continue;
+                }
                 // Separator for keyword-only arguments, placed before the kw
-                // arguments start
-                if (rec->nargs_kw_only > 0 && arg_index + rec->nargs_kw_only == args)
+                // arguments start (unless we are already putting an *args)
+                if (!rec->has_args && arg_index == rec->nargs_pos) {
                     signature += "*, ";
+                }
                 if (arg_index < rec->args.size() && rec->args[arg_index].name) {
                     signature += rec->args[arg_index].name;
                 } else if (arg_index == 0 && rec->is_method) {
@@ -361,30 +415,32 @@ protected:
                 signature += ": ";
             } else if (c == '}') {
                 // Write default value if available.
-                if (arg_index < rec->args.size() && rec->args[arg_index].descr) {
+                if (!is_starred && arg_index < rec->args.size() && rec->args[arg_index].descr) {
                     signature += " = ";
                     signature += rec->args[arg_index].descr;
                 }
                 // Separator for positional-only arguments (placed after the
                 // argument, rather than before like *
-                if (rec->nargs_pos_only > 0 && (arg_index + 1) == rec->nargs_pos_only)
+                if (rec->nargs_pos_only > 0 && (arg_index + 1) == rec->nargs_pos_only) {
                     signature += ", /";
-                arg_index++;
+                }
+                if (!is_starred) {
+                    arg_index++;
+                }
             } else if (c == '%') {
                 const std::type_info *t = types[type_index++];
-                if (!t)
+                if (!t) {
                     pybind11_fail("Internal error while parsing type signature (1)");
-                if (auto tinfo = detail::get_type_info(*t)) {
+                }
+                if (auto *tinfo = detail::get_type_info(*t)) {
                     handle th((PyObject *) tinfo->type);
-                    signature +=
-                        th.attr("__module__").cast<std::string>() + "." +
-                        th.attr("__qualname__").cast<std::string>(); // Python 3.3+, but we backport it to earlier versions
+                    signature += th.attr("__module__").cast<std::string>() + "."
+                                 + th.attr("__qualname__").cast<std::string>();
                 } else if (rec->is_new_style_constructor && arg_index == 0) {
                     // A new-style `__init__` takes `self` as `value_and_holder`.
                     // Rewrite it to the proper class type.
-                    signature +=
-                        rec->scope.attr("__module__").cast<std::string>() + "." +
-                        rec->scope.attr("__qualname__").cast<std::string>();
+                    signature += rec->scope.attr("__module__").cast<std::string>() + "."
+                                 + rec->scope.attr("__qualname__").cast<std::string>();
                 } else {
                     std::string tname(t->name());
                     detail::clean_type_id(tname);
@@ -395,40 +451,44 @@ protected:
             }
         }
 
-        if (arg_index != args || types[type_index] != nullptr)
+        if (arg_index != args - rec->has_args - rec->has_kwargs || types[type_index] != nullptr) {
             pybind11_fail("Internal error while parsing type signature (2)");
-
-#if PY_MAJOR_VERSION < 3
-        if (strcmp(rec->name, "__next__") == 0) {
-            std::free(rec->name);
-            rec->name = guarded_strdup("next");
-        } else if (strcmp(rec->name, "__bool__") == 0) {
-            std::free(rec->name);
-            rec->name = guarded_strdup("__nonzero__");
         }
-#endif
+
         rec->signature = guarded_strdup(signature.c_str());
         rec->args.shrink_to_fit();
         rec->nargs = (std::uint16_t) args;
 
-        if (rec->sibling && PYBIND11_INSTANCE_METHOD_CHECK(rec->sibling.ptr()))
+        if (rec->sibling && PYBIND11_INSTANCE_METHOD_CHECK(rec->sibling.ptr())) {
             rec->sibling = PYBIND11_INSTANCE_METHOD_GET_FUNCTION(rec->sibling.ptr());
+        }
 
         detail::function_record *chain = nullptr, *chain_start = rec;
         if (rec->sibling) {
             if (PyCFunction_Check(rec->sibling.ptr())) {
                 auto *self = PyCFunction_GET_SELF(rec->sibling.ptr());
-                capsule rec_capsule = isinstance<capsule>(self) ? reinterpret_borrow<capsule>(self) : capsule(self);
-                chain = (detail::function_record *) rec_capsule;
-                /* Never append a method to an overload chain of a parent class;
-                   instead, hide the parent's overloads in this case */
-                if (!chain->scope.is(rec->scope))
+                if (!isinstance<capsule>(self)) {
                     chain = nullptr;
+                } else {
+                    auto rec_capsule = reinterpret_borrow<capsule>(self);
+                    if (detail::is_function_record_capsule(rec_capsule)) {
+                        chain = rec_capsule.get_pointer<detail::function_record>();
+                        /* Never append a method to an overload chain of a parent class;
+                           instead, hide the parent's overloads in this case */
+                        if (!chain->scope.is(rec->scope)) {
+                            chain = nullptr;
+                        }
+                    } else {
+                        chain = nullptr;
+                    }
+                }
+            }
+            // Don't trigger for things like the default __init__, which are wrapper_descriptors
+            // that we are intentionally replacing
+            else if (!rec->sibling.is_none() && rec->name[0] != '_') {
+                pybind11_fail("Cannot overload existing non-function object \""
+                              + std::string(rec->name) + "\" with a function of the same name");
             }
-            // Don't trigger for things like the default __init__, which are wrapper_descriptors that we are intentionally replacing
-            else if (!rec->sibling.is_none() && rec->name[0] != '_')
-                pybind11_fail("Cannot overload existing non-function object \"" + std::string(rec->name) +
-                        "\" with a function of the same name");
         }
 
         if (!chain) {
@@ -440,9 +500,9 @@ protected:
                 = reinterpret_cast<PyCFunction>(reinterpret_cast<void (*)()>(dispatcher));
             rec->def->ml_flags = METH_VARARGS | METH_KEYWORDS;
 
-            capsule rec_capsule(unique_rec.release(), [](void *ptr) {
-                destruct((detail::function_record *) ptr);
-            });
+            capsule rec_capsule(unique_rec.release(),
+                                [](void *ptr) { destruct((detail::function_record *) ptr); });
+            rec_capsule.set_name(detail::get_function_record_capsule_name());
             guarded_strdup.release();
 
             object scope_module;
@@ -455,21 +515,27 @@ protected:
             }
 
             m_ptr = PyCFunction_NewEx(rec->def, rec_capsule.ptr(), scope_module.ptr());
-            if (!m_ptr)
+            if (!m_ptr) {
                 pybind11_fail("cpp_function::cpp_function(): Could not allocate function object");
+            }
         } else {
             /* Append at the beginning or end of the overload chain */
             m_ptr = rec->sibling.ptr();
             inc_ref();
-            if (chain->is_method != rec->is_method)
-                pybind11_fail("overloading a method with both static and instance methods is not supported; "
-                    #if defined(NDEBUG)
-                        "compile in debug mode for more details"
-                    #else
-                        "error while attempting to bind " + std::string(rec->is_method ? "instance" : "static") + " method " +
-                        std::string(pybind11::str(rec->scope.attr("__name__"))) + "." + std::string(rec->name) + signature
-                    #endif
+            if (chain->is_method != rec->is_method) {
+                pybind11_fail(
+                    "overloading a method with both static and instance methods is not supported; "
+#if !defined(PYBIND11_DETAILED_ERROR_MESSAGES)
+                    "#define PYBIND11_DETAILED_ERROR_MESSAGES or compile in debug mode for more "
+                    "details"
+#else
+                    "error while attempting to bind "
+                    + std::string(rec->is_method ? "instance" : "static") + " method "
+                    + std::string(pybind11::str(rec->scope.attr("__name__"))) + "."
+                    + std::string(rec->name) + signature
+#endif
                 );
+            }
 
             if (rec->prepend) {
                 // Beginning of chain; we need to replace the capsule's current head-of-the-chain
@@ -477,14 +543,16 @@ protected:
                 // chain.
                 chain_start = rec;
                 rec->next = chain;
-                auto rec_capsule = reinterpret_borrow<capsule>(((PyCFunctionObject *) m_ptr)->m_self);
+                auto rec_capsule
+                    = reinterpret_borrow<capsule>(((PyCFunctionObject *) m_ptr)->m_self);
                 rec_capsule.set_pointer(unique_rec.release());
                 guarded_strdup.release();
             } else {
                 // Or end of chain (normal behavior)
                 chain_start = chain;
-                while (chain->next)
+                while (chain->next) {
                     chain = chain->next;
+                }
                 chain->next = unique_rec.release();
                 guarded_strdup.release();
             }
@@ -502,25 +570,35 @@ protected:
         }
         // Then specific overload signatures
         bool first_user_def = true;
-        for (auto it = chain_start; it != nullptr; it = it->next) {
+        for (auto *it = chain_start; it != nullptr; it = it->next) {
             if (options::show_function_signatures()) {
-                if (index > 0) signatures += "\n";
-                if (chain)
+                if (index > 0) {
+                    signatures += '\n';
+                }
+                if (chain) {
                     signatures += std::to_string(++index) + ". ";
+                }
                 signatures += rec->name;
                 signatures += it->signature;
-                signatures += "\n";
+                signatures += '\n';
             }
             if (it->doc && it->doc[0] != '\0' && options::show_user_defined_docstrings()) {
-                // If we're appending another docstring, and aren't printing function signatures, we
-                // need to append a newline first:
+                // If we're appending another docstring, and aren't printing function signatures,
+                // we need to append a newline first:
                 if (!options::show_function_signatures()) {
-                    if (first_user_def) first_user_def = false;
-                    else signatures += "\n";
+                    if (first_user_def) {
+                        first_user_def = false;
+                    } else {
+                        signatures += '\n';
+                    }
+                }
+                if (options::show_function_signatures()) {
+                    signatures += '\n';
                 }
-                if (options::show_function_signatures()) signatures += "\n";
                 signatures += it->doc;
-                if (options::show_function_signatures()) signatures += "\n";
+                if (options::show_function_signatures()) {
+                    signatures += '\n';
+                }
             }
         }
 
@@ -533,24 +611,27 @@ protected:
 
         if (rec->is_method) {
             m_ptr = PYBIND11_INSTANCE_METHOD_NEW(m_ptr, rec->scope.ptr());
-            if (!m_ptr)
-                pybind11_fail("cpp_function::cpp_function(): Could not allocate instance method object");
+            if (!m_ptr) {
+                pybind11_fail(
+                    "cpp_function::cpp_function(): Could not allocate instance method object");
+            }
             Py_DECREF(func);
         }
     }
 
     /// When a cpp_function is GCed, release any memory allocated by pybind11
     static void destruct(detail::function_record *rec, bool free_strings = true) {
-        // If on Python 3.9, check the interpreter "MICRO" (patch) version.
-        // If this is running on 3.9.0, we have to work around a bug.
-        #if !defined(PYPY_VERSION) && PY_MAJOR_VERSION == 3 && PY_MINOR_VERSION == 9
-            static bool is_zero = Py_GetVersion()[4] == '0';
-        #endif
+// If on Python 3.9, check the interpreter "MICRO" (patch) version.
+// If this is running on 3.9.0, we have to work around a bug.
+#if !defined(PYPY_VERSION) && PY_MAJOR_VERSION == 3 && PY_MINOR_VERSION == 9
+        static bool is_zero = Py_GetVersion()[4] == '0';
+#endif
 
         while (rec) {
             detail::function_record *next = rec->next;
-            if (rec->free_data)
+            if (rec->free_data) {
                 rec->free_data(rec);
+            }
             // During initialization, these strings might not have been copied yet,
             // so they cannot be freed. Once the function has been created, they can.
             // Check `make_function_record` for more details.
@@ -558,40 +639,45 @@ protected:
                 std::free((char *) rec->name);
                 std::free((char *) rec->doc);
                 std::free((char *) rec->signature);
-                for (auto &arg: rec->args) {
+                for (auto &arg : rec->args) {
                     std::free(const_cast<char *>(arg.name));
                     std::free(const_cast<char *>(arg.descr));
                 }
             }
-            for (auto &arg: rec->args)
+            for (auto &arg : rec->args) {
                 arg.value.dec_ref();
+            }
             if (rec->def) {
                 std::free(const_cast<char *>(rec->def->ml_doc));
-                // Python 3.9.0 decref's these in the wrong order; rec->def
-                // If loaded on 3.9.0, let these leak (use Python 3.9.1 at runtime to fix)
-                // See https://github.com/python/cpython/pull/22670
-                #if !defined(PYPY_VERSION) && PY_MAJOR_VERSION == 3 && PY_MINOR_VERSION == 9
-                    if (!is_zero)
-                        delete rec->def;
-                #else
+// Python 3.9.0 decref's these in the wrong order; rec->def
+// If loaded on 3.9.0, let these leak (use Python 3.9.1 at runtime to fix)
+// See https://github.com/python/cpython/pull/22670
+#if !defined(PYPY_VERSION) && PY_MAJOR_VERSION == 3 && PY_MINOR_VERSION == 9
+                if (!is_zero) {
                     delete rec->def;
-                #endif
+                }
+#else
+                delete rec->def;
+#endif
             }
             delete rec;
             rec = next;
         }
     }
 
-
     /// Main dispatch logic for calls to functions bound using pybind11
     static PyObject *dispatcher(PyObject *self, PyObject *args_in, PyObject *kwargs_in) {
         using namespace detail;
+        assert(isinstance<capsule>(self));
 
         /* Iterator over the list of potentially admissible overloads */
-        const function_record *overloads = (function_record *) PyCapsule_GetPointer(self, nullptr),
+        const function_record *overloads = reinterpret_cast<function_record *>(
+                                  PyCapsule_GetPointer(self, get_function_record_capsule_name())),
                               *it = overloads;
+        assert(overloads != nullptr);
 
-        /* Need to know how many arguments + keyword arguments there are to pick the right overload */
+        /* Need to know how many arguments + keyword arguments there are to pick the right
+           overload */
         const auto n_args_in = (size_t) PyTuple_GET_SIZE(args_in);
 
         handle parent = n_args_in > 0 ? PyTuple_GET_ITEM(args_in, 0) : nullptr,
@@ -599,19 +685,23 @@ protected:
 
         auto self_value_and_holder = value_and_holder();
         if (overloads->is_constructor) {
-            if (!parent || !PyObject_TypeCheck(parent.ptr(), (PyTypeObject *) overloads->scope.ptr())) {
-                PyErr_SetString(PyExc_TypeError, "__init__(self, ...) called with invalid or missing `self` argument");
+            if (!parent
+                || !PyObject_TypeCheck(parent.ptr(), (PyTypeObject *) overloads->scope.ptr())) {
+                PyErr_SetString(
+                    PyExc_TypeError,
+                    "__init__(self, ...) called with invalid or missing `self` argument");
                 return nullptr;
             }
 
-            const auto tinfo = get_type_info((PyTypeObject *) overloads->scope.ptr());
-            const auto pi = reinterpret_cast<instance *>(parent.ptr());
+            auto *const tinfo = get_type_info((PyTypeObject *) overloads->scope.ptr());
+            auto *const pi = reinterpret_cast<instance *>(parent.ptr());
             self_value_and_holder = pi->get_value_and_holder(tinfo, true);
 
             // If this value is already registered it must mean __init__ is invoked multiple times;
             // we really can't support that in C++, so just ignore the second __init__.
-            if (self_value_and_holder.instance_registered())
+            if (self_value_and_holder.instance_registered()) {
                 return none().release().ptr();
+            }
         }
 
         try {
@@ -630,44 +720,53 @@ protected:
                    1. Copy all positional arguments we were given, also checking to make sure that
                       named positional arguments weren't *also* specified via kwarg.
                    2. If we weren't given enough, try to make up the omitted ones by checking
-                      whether they were provided by a kwarg matching the `py::arg("name")` name.  If
-                      so, use it (and remove it from kwargs; if not, see if the function binding
+                      whether they were provided by a kwarg matching the `py::arg("name")` name. If
+                      so, use it (and remove it from kwargs); if not, see if the function binding
                       provided a default that we can use.
-                   3. Ensure that either all keyword arguments were "consumed", or that the function
-                      takes a kwargs argument to accept unconsumed kwargs.
+                   3. Ensure that either all keyword arguments were "consumed", or that the
+                   function takes a kwargs argument to accept unconsumed kwargs.
                    4. Any positional arguments still left get put into a tuple (for args), and any
                       leftover kwargs get put into a dict.
                    5. Pack everything into a vector; if we have py::args or py::kwargs, they are an
                       extra tuple or dict at the end of the positional arguments.
                    6. Call the function call dispatcher (function_record::impl)
 
-                   If one of these fail, move on to the next overload and keep trying until we get a
-                   result other than PYBIND11_TRY_NEXT_OVERLOAD.
+                   If one of these fail, move on to the next overload and keep trying until we get
+                   a result other than PYBIND11_TRY_NEXT_OVERLOAD.
                  */
 
                 const function_record &func = *it;
-                size_t num_args = func.nargs;    // Number of positional arguments that we need
-                if (func.has_args) --num_args;   // (but don't count py::args
-                if (func.has_kwargs) --num_args; //  or py::kwargs)
-                size_t pos_args = num_args - func.nargs_kw_only;
+                size_t num_args = func.nargs; // Number of positional arguments that we need
+                if (func.has_args) {
+                    --num_args; // (but don't count py::args
+                }
+                if (func.has_kwargs) {
+                    --num_args; //  or py::kwargs)
+                }
+                size_t pos_args = func.nargs_pos;
 
-                if (!func.has_args && n_args_in > pos_args)
+                if (!func.has_args && n_args_in > pos_args) {
                     continue; // Too many positional arguments for this overload
+                }
 
-                if (n_args_in < pos_args && func.args.size() < pos_args)
-                    continue; // Not enough positional arguments given, and not enough defaults to fill in the blanks
+                if (n_args_in < pos_args && func.args.size() < pos_args) {
+                    continue; // Not enough positional arguments given, and not enough defaults to
+                              // fill in the blanks
+                }
 
                 function_call call(func, parent);
 
-                size_t args_to_copy = (std::min)(pos_args, n_args_in); // Protect std::min with parentheses
+                // Protect std::min with parentheses
+                size_t args_to_copy = (std::min)(pos_args, n_args_in);
                 size_t args_copied = 0;
 
                 // 0. Inject new-style `self` argument
                 if (func.is_new_style_constructor) {
                     // The `value` may have been preallocated by an old-style `__init__`
                     // if it was a preceding candidate for overload resolution.
-                    if (self_value_and_holder)
+                    if (self_value_and_holder) {
                         self_value_and_holder.type->dealloc(self_value_and_holder);
+                    }
 
                     call.init_self = PyTuple_GET_ITEM(args_in, 0);
                     call.args.emplace_back(reinterpret_cast<PyObject *>(&self_value_and_holder));
@@ -678,8 +777,10 @@ protected:
                 // 1. Copy any position arguments given.
                 bool bad_arg = false;
                 for (; args_copied < args_to_copy; ++args_copied) {
-                    const argument_record *arg_rec = args_copied < func.args.size() ? &func.args[args_copied] : nullptr;
-                    if (kwargs_in && arg_rec && arg_rec->name && dict_getitemstring(kwargs_in, arg_rec->name)) {
+                    const argument_record *arg_rec
+                        = args_copied < func.args.size() ? &func.args[args_copied] : nullptr;
+                    if (kwargs_in && arg_rec && arg_rec->name
+                        && dict_getitemstring(kwargs_in, arg_rec->name)) {
                         bad_arg = true;
                         break;
                     }
@@ -692,8 +793,13 @@ protected:
                     call.args.push_back(arg);
                     call.args_convert.push_back(arg_rec ? arg_rec->convert : true);
                 }
-                if (bad_arg)
+                if (bad_arg) {
                     continue; // Maybe it was meant for another overload (issue #688)
+                }
+
+                // Keep track of how many position args we copied out in case we need to come back
+                // to copy the rest into a py::args argument.
+                size_t positional_args_copied = args_copied;
 
                 // We'll need to copy this if we steal some kwargs for defaults
                 dict kwargs = reinterpret_borrow<dict>(kwargs_in);
@@ -710,12 +816,14 @@ protected:
                         if (value) {
                             call.args.push_back(value);
                             call.args_convert.push_back(arg_rec.convert);
-                        } else
+                        } else {
                             break;
+                        }
                     }
 
-                    if (args_copied < func.nargs_pos_only)
+                    if (args_copied < func.nargs_pos_only) {
                         continue; // Not enough defaults to fill the positional arguments
+                    }
                 }
 
                 // 2. Check kwargs and, failing that, defaults that may help complete the list
@@ -726,8 +834,9 @@ protected:
                         const auto &arg_rec = func.args[args_copied];
 
                         handle value;
-                        if (kwargs_in && arg_rec.name)
+                        if (kwargs_in && arg_rec.name) {
                             value = dict_getitemstring(kwargs.ptr(), arg_rec.name);
+                        }
 
                         if (value) {
                             // Consume a kwargs value
@@ -747,20 +856,29 @@ protected:
                         }
 
                         if (value) {
+                            // If we're at the py::args index then first insert a stub for it to be
+                            // replaced later
+                            if (func.has_args && call.args.size() == func.nargs_pos) {
+                                call.args.push_back(none());
+                            }
+
                             call.args.push_back(value);
                             call.args_convert.push_back(arg_rec.convert);
-                        }
-                        else
+                        } else {
                             break;
+                        }
                     }
 
-                    if (args_copied < num_args)
-                        continue; // Not enough arguments, defaults, or kwargs to fill the positional arguments
+                    if (args_copied < num_args) {
+                        continue; // Not enough arguments, defaults, or kwargs to fill the
+                                  // positional arguments
+                    }
                 }
 
                 // 3. Check everything was consumed (unless we have a kwargs arg)
-                if (kwargs && !kwargs.empty() && !func.has_kwargs)
+                if (kwargs && !kwargs.empty() && !func.has_kwargs) {
                     continue; // Unconsumed kwargs, but no py::kwargs argument to accept them
+                }
 
                 // 4a. If we have a py::args argument, create a new tuple with leftovers
                 if (func.has_args) {
@@ -769,35 +887,42 @@ protected:
                         // We didn't copy out any position arguments from the args_in tuple, so we
                         // can reuse it directly without copying:
                         extra_args = reinterpret_borrow<tuple>(args_in);
-                    } else if (args_copied >= n_args_in) {
+                    } else if (positional_args_copied >= n_args_in) {
                         extra_args = tuple(0);
                     } else {
-                        size_t args_size = n_args_in - args_copied;
+                        size_t args_size = n_args_in - positional_args_copied;
                         extra_args = tuple(args_size);
                         for (size_t i = 0; i < args_size; ++i) {
-                            extra_args[i] = PyTuple_GET_ITEM(args_in, args_copied + i);
+                            extra_args[i] = PyTuple_GET_ITEM(args_in, positional_args_copied + i);
                         }
                     }
-                    call.args.push_back(extra_args);
+                    if (call.args.size() <= func.nargs_pos) {
+                        call.args.push_back(extra_args);
+                    } else {
+                        call.args[func.nargs_pos] = extra_args;
+                    }
                     call.args_convert.push_back(false);
                     call.args_ref = std::move(extra_args);
                 }
 
                 // 4b. If we have a py::kwargs, pass on any remaining kwargs
                 if (func.has_kwargs) {
-                    if (!kwargs.ptr())
+                    if (!kwargs.ptr()) {
                         kwargs = dict(); // If we didn't get one, send an empty one
+                    }
                     call.args.push_back(kwargs);
                     call.args_convert.push_back(false);
                     call.kwargs_ref = std::move(kwargs);
                 }
 
-                // 5. Put everything in a vector.  Not technically step 5, we've been building it
-                // in `call.args` all along.
-                #if !defined(NDEBUG)
-                if (call.args.size() != func.nargs || call.args_convert.size() != func.nargs)
-                    pybind11_fail("Internal error: function call dispatcher inserted wrong number of arguments!");
-                #endif
+// 5. Put everything in a vector.  Not technically step 5, we've been building it
+// in `call.args` all along.
+#if defined(PYBIND11_DETAILED_ERROR_MESSAGES)
+                if (call.args.size() != func.nargs || call.args_convert.size() != func.nargs) {
+                    pybind11_fail("Internal error: function call dispatcher inserted wrong number "
+                                  "of arguments!");
+                }
+#endif
 
                 std::vector<bool> second_pass_convert;
                 if (overloaded) {
@@ -816,8 +941,9 @@ protected:
                     result = PYBIND11_TRY_NEXT_OVERLOAD;
                 }
 
-                if (result.ptr() != PYBIND11_TRY_NEXT_OVERLOAD)
+                if (result.ptr() != PYBIND11_TRY_NEXT_OVERLOAD) {
                     break;
+                }
 
                 if (overloaded) {
                     // The (overloaded) call failed; if the call has at least one argument that
@@ -836,7 +962,8 @@ protected:
             }
 
             if (overloaded && !second_pass.empty() && result.ptr() == PYBIND11_TRY_NEXT_OVERLOAD) {
-                // The no-conversion pass finished without success, try again with conversion allowed
+                // The no-conversion pass finished without success, try again with conversion
+                // allowed
                 for (auto &call : second_pass) {
                     try {
                         loader_life_support guard{};
@@ -848,8 +975,9 @@ protected:
                     if (result.ptr() != PYBIND11_TRY_NEXT_OVERLOAD) {
                         // The error reporting logic below expects 'it' to be valid, as it would be
                         // if we'd encountered this failure in the first-pass loop.
-                        if (!result)
+                        if (!result) {
                             it = &call.func;
+                        }
                         break;
                     }
                 }
@@ -858,7 +986,7 @@ protected:
             e.restore();
             return nullptr;
 #ifdef __GLIBCXX__
-        } catch ( abi::__forced_unwind& ) {
+        } catch (abi::__forced_unwind &) {
             throw;
 #endif
         } catch (...) {
@@ -875,9 +1003,11 @@ protected:
                 - catch the exception and call PyErr_SetString or PyErr_SetObject
                   to set a standard (or custom) Python exception, or
                 - do nothing and let the exception fall through to the next translator, or
-                - delegate translation to the next translator by throwing a new type of exception. */
+                - delegate translation to the next translator by throwing a new type of exception.
+             */
 
-            auto &local_exception_translators = get_local_internals().registered_exception_translators;
+            auto &local_exception_translators
+                = get_local_internals().registered_exception_translators;
             if (detail::apply_exception_translators(local_exception_translators)) {
                 return nullptr;
             }
@@ -886,7 +1016,8 @@ protected:
                 return nullptr;
             }
 
-            PyErr_SetString(PyExc_SystemError, "Exception escaped from default exception translator!");
+            PyErr_SetString(PyExc_SystemError,
+                            "Exception escaped from default exception translator!");
             return nullptr;
         }
 
@@ -901,20 +1032,22 @@ protected:
         };
 
         if (result.ptr() == PYBIND11_TRY_NEXT_OVERLOAD) {
-            if (overloads->is_operator)
+            if (overloads->is_operator) {
                 return handle(Py_NotImplemented).inc_ref().ptr();
+            }
 
-            std::string msg = std::string(overloads->name) + "(): incompatible " +
-                std::string(overloads->is_constructor ? "constructor" : "function") +
-                " arguments. The following argument types are supported:\n";
+            std::string msg = std::string(overloads->name) + "(): incompatible "
+                              + std::string(overloads->is_constructor ? "constructor" : "function")
+                              + " arguments. The following argument types are supported:\n";
 
             int ctr = 0;
             for (const function_record *it2 = overloads; it2 != nullptr; it2 = it2->next) {
-                msg += "    "+ std::to_string(++ctr) + ". ";
+                msg += "    " + std::to_string(++ctr) + ". ";
 
                 bool wrote_sig = false;
                 if (overloads->is_constructor) {
-                    // For a constructor, rewrite `(self: Object, arg0, ...) -> NoneType` as `Object(arg0, ...)`
+                    // For a constructor, rewrite `(self: Object, arg0, ...) -> NoneType` as
+                    // `Object(arg0, ...)`
                     std::string sig = it2->signature;
                     size_t start = sig.find('(') + 7; // skip "(self: "
                     if (start < sig.size()) {
@@ -922,7 +1055,9 @@ protected:
                         size_t end = sig.find(", "), next = end + 2;
                         size_t ret = sig.rfind(" -> ");
                         // Or the ), if there is no comma:
-                        if (end >= sig.size()) next = end = sig.find(')');
+                        if (end >= sig.size()) {
+                            next = end = sig.find(')');
+                        }
                         if (start < end && next < sig.size()) {
                             msg.append(sig, start, end - start);
                             msg += '(';
@@ -931,35 +1066,45 @@ protected:
                         }
                     }
                 }
-                if (!wrote_sig) msg += it2->signature;
+                if (!wrote_sig) {
+                    msg += it2->signature;
+                }
 
-                msg += "\n";
+                msg += '\n';
             }
             msg += "\nInvoked with: ";
             auto args_ = reinterpret_borrow<tuple>(args_in);
             bool some_args = false;
             for (size_t ti = overloads->is_constructor ? 1 : 0; ti < args_.size(); ++ti) {
-                if (!some_args) some_args = true;
-                else msg += ", ";
+                if (!some_args) {
+                    some_args = true;
+                } else {
+                    msg += ", ";
+                }
                 try {
                     msg += pybind11::repr(args_[ti]);
-                } catch (const error_already_set&) {
+                } catch (const error_already_set &) {
                     msg += "<repr raised Error>";
                 }
             }
             if (kwargs_in) {
                 auto kwargs = reinterpret_borrow<dict>(kwargs_in);
                 if (!kwargs.empty()) {
-                    if (some_args) msg += "; ";
+                    if (some_args) {
+                        msg += "; ";
+                    }
                     msg += "kwargs: ";
                     bool first = true;
                     for (auto kwarg : kwargs) {
-                        if (first) first = false;
-                        else msg += ", ";
+                        if (first) {
+                            first = false;
+                        } else {
+                            msg += ", ";
+                        }
                         msg += pybind11::str("{}=").format(kwarg.first);
                         try {
                             msg += pybind11::repr(kwarg.second);
-                        } catch (const error_already_set&) {
+                        } catch (const error_already_set &) {
                             msg += "<repr raised Error>";
                         }
                     }
@@ -967,6 +1112,12 @@ protected:
             }
 
             append_note_if_missing_header_is_suspected(msg);
+            // Attach additional error info to the exception if supported
+            if (PyErr_Occurred()) {
+                // #HelpAppreciated: unit test coverage for this branch.
+                raise_from(PyExc_TypeError, msg.c_str());
+                return nullptr;
+            }
             PyErr_SetString(PyExc_TypeError, msg.c_str());
             return nullptr;
         }
@@ -975,6 +1126,11 @@ protected:
                               "Python type! The signature was\n\t";
             msg += it->signature;
             append_note_if_missing_header_is_suspected(msg);
+            // Attach additional error info to the exception if supported
+            if (PyErr_Occurred()) {
+                raise_from(PyExc_TypeError, msg.c_str());
+                return nullptr;
+            }
             PyErr_SetString(PyExc_TypeError, msg.c_str());
             return nullptr;
         }
@@ -986,7 +1142,6 @@ protected:
     }
 };
 
-
 /// Wrapper for Python extension modules
 class module_ : public object {
 public:
@@ -995,11 +1150,7 @@ public:
     /// Create a new top-level Python module with the given name and docstring
     PYBIND11_DEPRECATED("Use PYBIND11_MODULE or module_::create_extension_module instead")
     explicit module_(const char *name, const char *doc = nullptr) {
-#if PY_MAJOR_VERSION >= 3
         *this = create_extension_module(name, doc, new PyModuleDef());
-#else
-        *this = create_extension_module(name, doc, nullptr);
-#endif
     }
 
     /** \rst
@@ -1008,11 +1159,15 @@ public:
         details on the ``Extra&& ... extra`` argument, see section :ref:`extras`.
     \endrst */
     template <typename Func, typename... Extra>
-    module_ &def(const char *name_, Func &&f, const Extra& ... extra) {
-        cpp_function func(std::forward<Func>(f), name(name_), scope(*this),
-                          sibling(getattr(*this, name_, none())), extra...);
+    module_ &def(const char *name_, Func &&f, const Extra &...extra) {
+        cpp_function func(std::forward<Func>(f),
+                          name(name_),
+                          scope(*this),
+                          sibling(getattr(*this, name_, none())),
+                          extra...);
         // NB: allow overwriting here because cpp_function sets up a chain with the intention of
-        // overwriting (and has already checked internally that it isn't overwriting non-functions).
+        // overwriting (and has already checked internally that it isn't overwriting
+        // non-functions).
         add_object(name_, func, true /* overwrite */);
         return *this;
     }
@@ -1028,11 +1183,19 @@ public:
             py::module_ m3 = m2.def_submodule("subsub", "A submodule of 'example.sub'");
     \endrst */
     module_ def_submodule(const char *name, const char *doc = nullptr) {
-        std::string full_name = std::string(PyModule_GetName(m_ptr))
-            + std::string(".") + std::string(name);
-        auto result = reinterpret_borrow<module_>(PyImport_AddModule(full_name.c_str()));
-        if (doc && options::show_user_defined_docstrings())
+        const char *this_name = PyModule_GetName(m_ptr);
+        if (this_name == nullptr) {
+            throw error_already_set();
+        }
+        std::string full_name = std::string(this_name) + '.' + name;
+        handle submodule = PyImport_AddModule(full_name.c_str());
+        if (!submodule) {
+            throw error_already_set();
+        }
+        auto result = reinterpret_borrow<module_>(submodule);
+        if (doc && options::show_user_defined_docstrings()) {
             result.attr("__doc__") = pybind11::str(doc);
+        }
         attr(name) = result;
         return result;
     }
@@ -1040,16 +1203,18 @@ public:
     /// Import and return a module or throws `error_already_set`.
     static module_ import(const char *name) {
         PyObject *obj = PyImport_ImportModule(name);
-        if (!obj)
+        if (!obj) {
             throw error_already_set();
+        }
         return reinterpret_steal<module_>(obj);
     }
 
     /// Reload the module or throws `error_already_set`.
     void reload() {
         PyObject *obj = PyImport_ReloadModule(ptr());
-        if (!obj)
+        if (!obj) {
             throw error_already_set();
+        }
         *this = reinterpret_steal<module_>(obj);
     }
 
@@ -1057,56 +1222,49 @@ public:
         Adds an object to the module using the given name.  Throws if an object with the given name
         already exists.
 
-        ``overwrite`` should almost always be false: attempting to overwrite objects that pybind11 has
-        established will, in most cases, break things.
+        ``overwrite`` should almost always be false: attempting to overwrite objects that pybind11
+        has established will, in most cases, break things.
     \endrst */
     PYBIND11_NOINLINE void add_object(const char *name, handle obj, bool overwrite = false) {
-        if (!overwrite && hasattr(*this, name))
-            pybind11_fail("Error during initialization: multiple incompatible definitions with name \"" +
-                    std::string(name) + "\"");
+        if (!overwrite && hasattr(*this, name)) {
+            pybind11_fail(
+                "Error during initialization: multiple incompatible definitions with name \""
+                + std::string(name) + "\"");
+        }
 
         PyModule_AddObject(ptr(), name, obj.inc_ref().ptr() /* steals a reference */);
     }
 
-#if PY_MAJOR_VERSION >= 3
-    using module_def = PyModuleDef;
-#else
-    struct module_def {};
-#endif
+    using module_def = PyModuleDef; // TODO: Can this be removed (it was needed only for Python 2)?
 
     /** \rst
         Create a new top-level module that can be used as the main module of a C extension.
 
-        For Python 3, ``def`` should point to a statically allocated module_def.
-        For Python 2, ``def`` can be a nullptr and is completely ignored.
+        ``def`` should point to a statically allocated module_def.
     \endrst */
     static module_ create_extension_module(const char *name, const char *doc, module_def *def) {
-#if PY_MAJOR_VERSION >= 3
         // module_def is PyModuleDef
-        def = new (def) PyModuleDef {  // Placement new (not an allocation).
-            /* m_base */     PyModuleDef_HEAD_INIT,
-            /* m_name */     name,
-            /* m_doc */      options::show_user_defined_docstrings() ? doc : nullptr,
-            /* m_size */     -1,
-            /* m_methods */  nullptr,
-            /* m_slots */    nullptr,
-            /* m_traverse */ nullptr,
-            /* m_clear */    nullptr,
-            /* m_free */     nullptr
-        };
-        auto m = PyModule_Create(def);
-#else
-        // Ignore module_def *def; only necessary for Python 3
-        (void) def;
-        auto m = Py_InitModule3(name, nullptr, options::show_user_defined_docstrings() ? doc : nullptr);
-#endif
+        // Placement new (not an allocation).
+        def = new (def)
+            PyModuleDef{/* m_base */ PyModuleDef_HEAD_INIT,
+                        /* m_name */ name,
+                        /* m_doc */ options::show_user_defined_docstrings() ? doc : nullptr,
+                        /* m_size */ -1,
+                        /* m_methods */ nullptr,
+                        /* m_slots */ nullptr,
+                        /* m_traverse */ nullptr,
+                        /* m_clear */ nullptr,
+                        /* m_free */ nullptr};
+        auto *m = PyModule_Create(def);
         if (m == nullptr) {
-            if (PyErr_Occurred())
+            if (PyErr_Occurred()) {
                 throw error_already_set();
+            }
             pybind11_fail("Internal error in module_::create_extension_module()");
         }
-        // TODO: Should be reinterpret_steal for Python 3, but Python also steals it again when returned from PyInit_...
-        //       For Python 2, reinterpret_borrow is correct.
+        // TODO: Should be reinterpret_steal for Python 3, but Python also steals it again when
+        //       returned from PyInit_...
+        //       For Python 2, reinterpret_borrow was correct.
         return reinterpret_borrow<module_>(m);
     }
 };
@@ -1124,14 +1282,12 @@ inline dict globals() {
     return reinterpret_borrow<dict>(p ? p : module_::import("__main__").attr("__dict__").ptr());
 }
 
-#if PY_VERSION_HEX >= 0x03030000
-template <typename... Args,
-          typename = detail::enable_if_t<args_are_all_keyword_or_ds<Args...>()>>
-PYBIND11_DEPRECATED("make_simple_namespace should be replaced with py::module_::import(\"types\").attr(\"SimpleNamespace\") ")
-object make_simple_namespace(Args&&... args_) {
+template <typename... Args, typename = detail::enable_if_t<args_are_all_keyword_or_ds<Args...>()>>
+PYBIND11_DEPRECATED("make_simple_namespace should be replaced with "
+                    "py::module_::import(\"types\").attr(\"SimpleNamespace\") ")
+object make_simple_namespace(Args &&...args_) {
     return module_::import("types").attr("SimpleNamespace")(std::forward<Args>(args_)...);
 }
-#endif
 
 PYBIND11_NAMESPACE_BEGIN(detail)
 /// Generic support for creating new Python heap types
@@ -1140,14 +1296,17 @@ public:
     PYBIND11_OBJECT_DEFAULT(generic_type, object, PyType_Check)
 protected:
     void initialize(const type_record &rec) {
-        if (rec.scope && hasattr(rec.scope, "__dict__") && rec.scope.attr("__dict__").contains(rec.name))
-            pybind11_fail("generic_type: cannot initialize type \"" + std::string(rec.name) +
-                          "\": an object with that name is already defined");
+        if (rec.scope && hasattr(rec.scope, "__dict__")
+            && rec.scope.attr("__dict__").contains(rec.name)) {
+            pybind11_fail("generic_type: cannot initialize type \"" + std::string(rec.name)
+                          + "\": an object with that name is already defined");
+        }
 
         if ((rec.module_local ? get_local_type_info(*rec.type) : get_global_type_info(*rec.type))
-            != nullptr)
-            pybind11_fail("generic_type: type \"" + std::string(rec.name) +
-                          "\" is already registered!");
+            != nullptr) {
+            pybind11_fail("generic_type: type \"" + std::string(rec.name)
+                          + "\" is already registered!");
+        }
 
         m_ptr = make_new_python_type(rec);
 
@@ -1169,19 +1328,23 @@ protected:
         auto &internals = get_internals();
         auto tindex = std::type_index(*rec.type);
         tinfo->direct_conversions = &internals.direct_conversions[tindex];
-        if (rec.module_local)
+        if (rec.module_local) {
             get_local_internals().registered_types_cpp[tindex] = tinfo;
-        else
+        } else {
             internals.registered_types_cpp[tindex] = tinfo;
-        internals.registered_types_py[(PyTypeObject *) m_ptr] = { tinfo };
+        }
+        internals.registered_types_py[(PyTypeObject *) m_ptr] = {tinfo};
 
         if (rec.bases.size() > 1 || rec.multiple_inheritance) {
             mark_parents_nonsimple(tinfo->type);
             tinfo->simple_ancestors = false;
-        }
-        else if (rec.bases.size() == 1) {
-            auto parent_tinfo = get_type_info((PyTypeObject *) rec.bases[0].ptr());
-            tinfo->simple_ancestors = parent_tinfo->simple_ancestors;
+        } else if (rec.bases.size() == 1) {
+            auto *parent_tinfo = get_type_info((PyTypeObject *) rec.bases[0].ptr());
+            assert(parent_tinfo != nullptr);
+            bool parent_simple_ancestors = parent_tinfo->simple_ancestors;
+            tinfo->simple_ancestors = parent_simple_ancestors;
+            // The parent can no longer be a simple type if it has MI and has a child
+            parent_tinfo->simple_type = parent_tinfo->simple_type && parent_simple_ancestors;
         }
 
         if (rec.module_local) {
@@ -1195,25 +1358,25 @@ protected:
     void mark_parents_nonsimple(PyTypeObject *value) {
         auto t = reinterpret_borrow<tuple>(value->tp_bases);
         for (handle h : t) {
-            auto tinfo2 = get_type_info((PyTypeObject *) h.ptr());
-            if (tinfo2)
+            auto *tinfo2 = get_type_info((PyTypeObject *) h.ptr());
+            if (tinfo2) {
                 tinfo2->simple_type = false;
+            }
             mark_parents_nonsimple((PyTypeObject *) h.ptr());
         }
     }
 
-    void install_buffer_funcs(
-            buffer_info *(*get_buffer)(PyObject *, void *),
-            void *get_buffer_data) {
-        auto *type = (PyHeapTypeObject*) m_ptr;
-        auto tinfo = detail::get_type_info(&type->ht_type);
+    void install_buffer_funcs(buffer_info *(*get_buffer)(PyObject *, void *),
+                              void *get_buffer_data) {
+        auto *type = (PyHeapTypeObject *) m_ptr;
+        auto *tinfo = detail::get_type_info(&type->ht_type);
 
-        if (!type->ht_type.tp_as_buffer)
-            pybind11_fail(
-                "To be able to register buffer protocol support for the type '" +
-                get_fully_qualified_tp_name(tinfo->type) +
-                "' the associated class<>(..) invocation must "
-                "include the pybind11::buffer_protocol() annotation!");
+        if (!type->ht_type.tp_as_buffer) {
+            pybind11_fail("To be able to register buffer protocol support for the type '"
+                          + get_fully_qualified_tp_name(tinfo->type)
+                          + "' the associated class<>(..) invocation must "
+                            "include the pybind11::buffer_protocol() annotation!");
+        }
 
         tinfo->get_buffer = get_buffer;
         tinfo->get_buffer_data = get_buffer_data;
@@ -1221,61 +1384,79 @@ protected:
 
     // rec_func must be set for either fget or fset.
     void def_property_static_impl(const char *name,
-                                  handle fget, handle fset,
+                                  handle fget,
+                                  handle fset,
                                   detail::function_record *rec_func) {
         const auto is_static = (rec_func != nullptr) && !(rec_func->is_method && rec_func->scope);
         const auto has_doc = (rec_func != nullptr) && (rec_func->doc != nullptr)
                              && pybind11::options::show_user_defined_docstrings();
-        auto property = handle((PyObject *) (is_static ? get_internals().static_property_type
-                                                       : &PyProperty_Type));
+        auto property = handle(
+            (PyObject *) (is_static ? get_internals().static_property_type : &PyProperty_Type));
         attr(name) = property(fget.ptr() ? fget : none(),
                               fset.ptr() ? fset : none(),
-                              /*deleter*/none(),
+                              /*deleter*/ none(),
                               pybind11::str(has_doc ? rec_func->doc : ""));
     }
 };
 
 /// Set the pointer to operator new if it exists. The cast is needed because it can be overloaded.
-template <typename T, typename = void_t<decltype(static_cast<void *(*)(size_t)>(T::operator new))>>
-void set_operator_new(type_record *r) { r->operator_new = &T::operator new; }
-
-template <typename> void set_operator_new(...) { }
-
-template <typename T, typename SFINAE = void> struct has_operator_delete : std::false_type { };
-template <typename T> struct has_operator_delete<T, void_t<decltype(static_cast<void (*)(void *)>(T::operator delete))>>
-    : std::true_type { };
-template <typename T, typename SFINAE = void> struct has_operator_delete_size : std::false_type { };
-template <typename T> struct has_operator_delete_size<T, void_t<decltype(static_cast<void (*)(void *, size_t)>(T::operator delete))>>
-    : std::true_type { };
-/// Call class-specific delete if it exists or global otherwise. Can also be an overload set.
-template <typename T, enable_if_t<has_operator_delete<T>::value, int> = 0>
-void call_operator_delete(T *p, size_t, size_t) { T::operator delete(p); }
-template <typename T, enable_if_t<!has_operator_delete<T>::value && has_operator_delete_size<T>::value, int> = 0>
-void call_operator_delete(T *p, size_t s, size_t) { T::operator delete(p, s); }
-
-inline void call_operator_delete(void *p, size_t s, size_t a) {
-    (void)s; (void)a;
-    #if defined(__cpp_aligned_new) && (!defined(_MSC_VER) || _MSC_VER >= 1912)
-        if (a > __STDCPP_DEFAULT_NEW_ALIGNMENT__) {
-            #ifdef __cpp_sized_deallocation
-                ::operator delete(p, s, std::align_val_t(a));
-            #else
-                ::operator delete(p, std::align_val_t(a));
-            #endif
-            return;
-        }
-    #endif
-    #ifdef __cpp_sized_deallocation
-        ::operator delete(p, s);
-    #else
-        ::operator delete(p);
-    #endif
+template <typename T,
+          typename = void_t<decltype(static_cast<void *(*) (size_t)>(T::operator new))>>
+void set_operator_new(type_record *r) {
+    r->operator_new = &T::operator new;
 }
 
-inline void add_class_method(object& cls, const char *name_, const cpp_function &cf) {
+template <typename>
+void set_operator_new(...) {}
+
+template <typename T, typename SFINAE = void>
+struct has_operator_delete : std::false_type {};
+template <typename T>
+struct has_operator_delete<T, void_t<decltype(static_cast<void (*)(void *)>(T::operator delete))>>
+    : std::true_type {};
+template <typename T, typename SFINAE = void>
+struct has_operator_delete_size : std::false_type {};
+template <typename T>
+struct has_operator_delete_size<
+    T,
+    void_t<decltype(static_cast<void (*)(void *, size_t)>(T::operator delete))>> : std::true_type {
+};
+/// Call class-specific delete if it exists or global otherwise. Can also be an overload set.
+template <typename T, enable_if_t<has_operator_delete<T>::value, int> = 0>
+void call_operator_delete(T *p, size_t, size_t) {
+    T::operator delete(p);
+}
+template <typename T,
+          enable_if_t<!has_operator_delete<T>::value && has_operator_delete_size<T>::value, int>
+          = 0>
+void call_operator_delete(T *p, size_t s, size_t) {
+    T::operator delete(p, s);
+}
+
+inline void call_operator_delete(void *p, size_t s, size_t a) {
+    (void) s;
+    (void) a;
+#if defined(__cpp_aligned_new) && (!defined(_MSC_VER) || _MSC_VER >= 1912)
+    if (a > __STDCPP_DEFAULT_NEW_ALIGNMENT__) {
+#    ifdef __cpp_sized_deallocation
+        ::operator delete(p, s, std::align_val_t(a));
+#    else
+        ::operator delete(p, std::align_val_t(a));
+#    endif
+        return;
+    }
+#endif
+#ifdef __cpp_sized_deallocation
+    ::operator delete(p, s);
+#else
+    ::operator delete(p);
+#endif
+}
+
+inline void add_class_method(object &cls, const char *name_, const cpp_function &cf) {
     cls.attr(cf.name()) = cf;
-    if (strcmp(name_, "__eq__") == 0 && !cls.attr("__dict__").contains("__hash__")) {
-      cls.attr("__hash__") = none();
+    if (std::strcmp(name_, "__eq__") == 0 && !cls.attr("__dict__").contains("__hash__")) {
+        cls.attr("__hash__") = none();
     }
 }
 
@@ -1284,30 +1465,37 @@ PYBIND11_NAMESPACE_END(detail)
 /// Given a pointer to a member function, cast it to its `Derived` version.
 /// Forward everything else unchanged.
 template <typename /*Derived*/, typename F>
-auto method_adaptor(F &&f) -> decltype(std::forward<F>(f)) { return std::forward<F>(f); }
+auto method_adaptor(F &&f) -> decltype(std::forward<F>(f)) {
+    return std::forward<F>(f);
+}
 
 template <typename Derived, typename Return, typename Class, typename... Args>
 auto method_adaptor(Return (Class::*pmf)(Args...)) -> Return (Derived::*)(Args...) {
-    static_assert(detail::is_accessible_base_of<Class, Derived>::value,
+    static_assert(
+        detail::is_accessible_base_of<Class, Derived>::value,
         "Cannot bind an inaccessible base class method; use a lambda definition instead");
     return pmf;
 }
 
 template <typename Derived, typename Return, typename Class, typename... Args>
 auto method_adaptor(Return (Class::*pmf)(Args...) const) -> Return (Derived::*)(Args...) const {
-    static_assert(detail::is_accessible_base_of<Class, Derived>::value,
+    static_assert(
+        detail::is_accessible_base_of<Class, Derived>::value,
         "Cannot bind an inaccessible base class method; use a lambda definition instead");
     return pmf;
 }
 
 template <typename type_, typename... options>
 class class_ : public detail::generic_type {
-    template <typename T> using is_holder = detail::is_holder_type<type_, T>;
-    template <typename T> using is_subtype = detail::is_strict_base_of<type_, T>;
-    template <typename T> using is_base = detail::is_strict_base_of<T, type_>;
+    template <typename T>
+    using is_holder = detail::is_holder_type<type_, T>;
+    template <typename T>
+    using is_subtype = detail::is_strict_base_of<type_, T>;
+    template <typename T>
+    using is_base = detail::is_strict_base_of<T, type_>;
     // struct instead of using here to help MSVC:
-    template <typename T> struct is_valid_class_option :
-        detail::any_of<is_holder<T>, is_subtype<T>, is_base<T>> {};
+    template <typename T>
+    struct is_valid_class_option : detail::any_of<is_holder<T>, is_subtype<T>, is_base<T>> {};
 
 public:
     using type = type_;
@@ -1316,23 +1504,24 @@ public:
     using holder_type = detail::exactly_one_t<is_holder, std::unique_ptr<type>, options...>;
 
     static_assert(detail::all_of<is_valid_class_option<options>...>::value,
-            "Unknown/invalid class_ template parameters provided");
+                  "Unknown/invalid class_ template parameters provided");
 
     static_assert(!has_alias || std::is_polymorphic<type>::value,
-            "Cannot use an alias class with a non-polymorphic type");
+                  "Cannot use an alias class with a non-polymorphic type");
 
     PYBIND11_OBJECT(class_, generic_type, PyType_Check)
 
     template <typename... Extra>
-    class_(handle scope, const char *name, const Extra &... extra) {
+    class_(handle scope, const char *name, const Extra &...extra) {
         using namespace detail;
 
         // MI can only be specified via class_ template options, not constructor parameters
         static_assert(
             none_of<is_pyobject<Extra>...>::value || // no base class arguments, or:
-            (   constexpr_sum(is_pyobject<Extra>::value...) == 1 && // Exactly one base
-                constexpr_sum(is_base<options>::value...)   == 0 && // no template option bases
-                none_of<std::is_same<multiple_inheritance, Extra>...>::value), // no multiple_inheritance attr
+                (constexpr_sum(is_pyobject<Extra>::value...) == 1 && // Exactly one base
+                 constexpr_sum(is_base<options>::value...) == 0 &&   // no template option bases
+                 // no multiple_inheritance attr
+                 none_of<std::is_same<multiple_inheritance, Extra>...>::value),
             "Error: multiple inheritance bases must be specified via class_ template options");
 
         type_record record;
@@ -1340,7 +1529,7 @@ public:
         record.name = name;
         record.type = &typeid(type);
         record.type_size = sizeof(conditional_t<has_alias, type_alias, type>);
-        record.type_align = alignof(conditional_t<has_alias, type_alias, type>&);
+        record.type_align = alignof(conditional_t<has_alias, type_alias, type> &);
         record.holder_size = sizeof(holder_type);
         record.init_instance = init_instance;
         record.dealloc = dealloc;
@@ -1357,8 +1546,10 @@ public:
         generic_type::initialize(record);
 
         if (has_alias) {
-            auto &instances = record.module_local ? get_local_internals().registered_types_cpp : get_internals().registered_types_cpp;
-            instances[std::type_index(typeid(type_alias))] = instances[std::type_index(typeid(type))];
+            auto &instances = record.module_local ? get_local_internals().registered_types_cpp
+                                                  : get_internals().registered_types_cpp;
+            instances[std::type_index(typeid(type_alias))]
+                = instances[std::type_index(typeid(type))];
         }
     }
 
@@ -1370,54 +1561,61 @@ public:
     }
 
     template <typename Base, detail::enable_if_t<!is_base<Base>::value, int> = 0>
-    static void add_base(detail::type_record &) { }
+    static void add_base(detail::type_record &) {}
 
     template <typename Func, typename... Extra>
-    class_ &def(const char *name_, Func&& f, const Extra&... extra) {
-        cpp_function cf(method_adaptor<type>(std::forward<Func>(f)), name(name_), is_method(*this),
-                        sibling(getattr(*this, name_, none())), extra...);
+    class_ &def(const char *name_, Func &&f, const Extra &...extra) {
+        cpp_function cf(method_adaptor<type>(std::forward<Func>(f)),
+                        name(name_),
+                        is_method(*this),
+                        sibling(getattr(*this, name_, none())),
+                        extra...);
         add_class_method(*this, name_, cf);
         return *this;
     }
 
-    template <typename Func, typename... Extra> class_ &
-    def_static(const char *name_, Func &&f, const Extra&... extra) {
+    template <typename Func, typename... Extra>
+    class_ &def_static(const char *name_, Func &&f, const Extra &...extra) {
         static_assert(!std::is_member_function_pointer<Func>::value,
-                "def_static(...) called with a non-static member function pointer");
-        cpp_function cf(std::forward<Func>(f), name(name_), scope(*this),
-                        sibling(getattr(*this, name_, none())), extra...);
-        attr(cf.name()) = staticmethod(cf);
+                      "def_static(...) called with a non-static member function pointer");
+        cpp_function cf(std::forward<Func>(f),
+                        name(name_),
+                        scope(*this),
+                        sibling(getattr(*this, name_, none())),
+                        extra...);
+        auto cf_name = cf.name();
+        attr(std::move(cf_name)) = staticmethod(std::move(cf));
         return *this;
     }
 
-    template <detail::op_id id, detail::op_type ot, typename L, typename R, typename... Extra>
-    class_ &def(const detail::op_<id, ot, L, R> &op, const Extra&... extra) {
+    template <typename T, typename... Extra, detail::enable_if_t<T::op_enable_if_hook, int> = 0>
+    class_ &def(const T &op, const Extra &...extra) {
         op.execute(*this, extra...);
         return *this;
     }
 
-    template <detail::op_id id, detail::op_type ot, typename L, typename R, typename... Extra>
-    class_ & def_cast(const detail::op_<id, ot, L, R> &op, const Extra&... extra) {
+    template <typename T, typename... Extra, detail::enable_if_t<T::op_enable_if_hook, int> = 0>
+    class_ &def_cast(const T &op, const Extra &...extra) {
         op.execute_cast(*this, extra...);
         return *this;
     }
 
     template <typename... Args, typename... Extra>
-    class_ &def(const detail::initimpl::constructor<Args...> &init, const Extra&... extra) {
+    class_ &def(const detail::initimpl::constructor<Args...> &init, const Extra &...extra) {
         PYBIND11_WORKAROUND_INCORRECT_MSVC_C4100(init);
         init.execute(*this, extra...);
         return *this;
     }
 
     template <typename... Args, typename... Extra>
-    class_ &def(const detail::initimpl::alias_constructor<Args...> &init, const Extra&... extra) {
+    class_ &def(const detail::initimpl::alias_constructor<Args...> &init, const Extra &...extra) {
         PYBIND11_WORKAROUND_INCORRECT_MSVC_C4100(init);
         init.execute(*this, extra...);
         return *this;
     }
 
     template <typename... Args, typename... Extra>
-    class_ &def(detail::initimpl::factory<Args...> &&init, const Extra&... extra) {
+    class_ &def(detail::initimpl::factory<Args...> &&init, const Extra &...extra) {
         std::move(init).execute(*this, extra...);
         return *this;
     }
@@ -1429,51 +1627,59 @@ public:
     }
 
     template <typename Func>
-    class_& def_buffer(Func &&func) {
-        struct capture { Func func; };
-        auto *ptr = new capture { std::forward<Func>(func) };
-        install_buffer_funcs([](PyObject *obj, void *ptr) -> buffer_info* {
-            detail::make_caster<type> caster;
-            if (!caster.load(obj, false))
-                return nullptr;
-            return new buffer_info(((capture *) ptr)->func(caster));
-        }, ptr);
+    class_ &def_buffer(Func &&func) {
+        struct capture {
+            Func func;
+        };
+        auto *ptr = new capture{std::forward<Func>(func)};
+        install_buffer_funcs(
+            [](PyObject *obj, void *ptr) -> buffer_info * {
+                detail::make_caster<type> caster;
+                if (!caster.load(obj, false)) {
+                    return nullptr;
+                }
+                return new buffer_info(((capture *) ptr)->func(std::move(caster)));
+            },
+            ptr);
         weakref(m_ptr, cpp_function([ptr](handle wr) {
-            delete ptr;
-            wr.dec_ref();
-        })).release();
+                    delete ptr;
+                    wr.dec_ref();
+                }))
+            .release();
         return *this;
     }
 
     template <typename Return, typename Class, typename... Args>
     class_ &def_buffer(Return (Class::*func)(Args...)) {
-        return def_buffer([func] (type &obj) { return (obj.*func)(); });
+        return def_buffer([func](type &obj) { return (obj.*func)(); });
     }
 
     template <typename Return, typename Class, typename... Args>
     class_ &def_buffer(Return (Class::*func)(Args...) const) {
-        return def_buffer([func] (const type &obj) { return (obj.*func)(); });
+        return def_buffer([func](const type &obj) { return (obj.*func)(); });
     }
 
     template <typename C, typename D, typename... Extra>
-    class_ &def_readwrite(const char *name, D C::*pm, const Extra&... extra) {
-        static_assert(std::is_same<C, type>::value || std::is_base_of<C, type>::value, "def_readwrite() requires a class member (or base class member)");
-        cpp_function fget([pm](const type &c) -> const D &{ return c.*pm; }, is_method(*this)),
-                     fset([pm](type &c, const D &value) { c.*pm = value; }, is_method(*this));
+    class_ &def_readwrite(const char *name, D C::*pm, const Extra &...extra) {
+        static_assert(std::is_same<C, type>::value || std::is_base_of<C, type>::value,
+                      "def_readwrite() requires a class member (or base class member)");
+        cpp_function fget([pm](const type &c) -> const D & { return c.*pm; }, is_method(*this)),
+            fset([pm](type &c, const D &value) { c.*pm = value; }, is_method(*this));
         def_property(name, fget, fset, return_value_policy::reference_internal, extra...);
         return *this;
     }
 
     template <typename C, typename D, typename... Extra>
-    class_ &def_readonly(const char *name, const D C::*pm, const Extra& ...extra) {
-        static_assert(std::is_same<C, type>::value || std::is_base_of<C, type>::value, "def_readonly() requires a class member (or base class member)");
-        cpp_function fget([pm](const type &c) -> const D &{ return c.*pm; }, is_method(*this));
+    class_ &def_readonly(const char *name, const D C::*pm, const Extra &...extra) {
+        static_assert(std::is_same<C, type>::value || std::is_base_of<C, type>::value,
+                      "def_readonly() requires a class member (or base class member)");
+        cpp_function fget([pm](const type &c) -> const D & { return c.*pm; }, is_method(*this));
         def_property_readonly(name, fget, return_value_policy::reference_internal, extra...);
         return *this;
     }
 
     template <typename D, typename... Extra>
-    class_ &def_readwrite_static(const char *name, D *pm, const Extra& ...extra) {
+    class_ &def_readwrite_static(const char *name, D *pm, const Extra &...extra) {
         cpp_function fget([pm](const object &) -> const D & { return *pm; }, scope(*this)),
             fset([pm](const object &, const D &value) { *pm = value; }, scope(*this));
         def_property_static(name, fget, fset, return_value_policy::reference, extra...);
@@ -1481,7 +1687,7 @@ public:
     }
 
     template <typename D, typename... Extra>
-    class_ &def_readonly_static(const char *name, const D *pm, const Extra& ...extra) {
+    class_ &def_readonly_static(const char *name, const D *pm, const Extra &...extra) {
         cpp_function fget([pm](const object &) -> const D & { return *pm; }, scope(*this));
         def_property_readonly_static(name, fget, return_value_policy::reference, extra...);
         return *this;
@@ -1489,66 +1695,91 @@ public:
 
     /// Uses return_value_policy::reference_internal by default
     template <typename Getter, typename... Extra>
-    class_ &def_property_readonly(const char *name, const Getter &fget, const Extra& ...extra) {
-        return def_property_readonly(name, cpp_function(method_adaptor<type>(fget)),
-                                     return_value_policy::reference_internal, extra...);
+    class_ &def_property_readonly(const char *name, const Getter &fget, const Extra &...extra) {
+        return def_property_readonly(name,
+                                     cpp_function(method_adaptor<type>(fget)),
+                                     return_value_policy::reference_internal,
+                                     extra...);
     }
 
     /// Uses cpp_function's return_value_policy by default
     template <typename... Extra>
-    class_ &def_property_readonly(const char *name, const cpp_function &fget, const Extra& ...extra) {
+    class_ &
+    def_property_readonly(const char *name, const cpp_function &fget, const Extra &...extra) {
         return def_property(name, fget, nullptr, extra...);
     }
 
     /// Uses return_value_policy::reference by default
     template <typename Getter, typename... Extra>
-    class_ &def_property_readonly_static(const char *name, const Getter &fget, const Extra& ...extra) {
-        return def_property_readonly_static(name, cpp_function(fget), return_value_policy::reference, extra...);
+    class_ &
+    def_property_readonly_static(const char *name, const Getter &fget, const Extra &...extra) {
+        return def_property_readonly_static(
+            name, cpp_function(fget), return_value_policy::reference, extra...);
     }
 
     /// Uses cpp_function's return_value_policy by default
     template <typename... Extra>
-    class_ &def_property_readonly_static(const char *name, const cpp_function &fget, const Extra& ...extra) {
+    class_ &def_property_readonly_static(const char *name,
+                                         const cpp_function &fget,
+                                         const Extra &...extra) {
         return def_property_static(name, fget, nullptr, extra...);
     }
 
     /// Uses return_value_policy::reference_internal by default
     template <typename Getter, typename Setter, typename... Extra>
-    class_ &def_property(const char *name, const Getter &fget, const Setter &fset, const Extra& ...extra) {
+    class_ &
+    def_property(const char *name, const Getter &fget, const Setter &fset, const Extra &...extra) {
         return def_property(name, fget, cpp_function(method_adaptor<type>(fset)), extra...);
     }
     template <typename Getter, typename... Extra>
-    class_ &def_property(const char *name, const Getter &fget, const cpp_function &fset, const Extra& ...extra) {
-        return def_property(name, cpp_function(method_adaptor<type>(fget)), fset,
-                            return_value_policy::reference_internal, extra...);
+    class_ &def_property(const char *name,
+                         const Getter &fget,
+                         const cpp_function &fset,
+                         const Extra &...extra) {
+        return def_property(name,
+                            cpp_function(method_adaptor<type>(fget)),
+                            fset,
+                            return_value_policy::reference_internal,
+                            extra...);
     }
 
     /// Uses cpp_function's return_value_policy by default
     template <typename... Extra>
-    class_ &def_property(const char *name, const cpp_function &fget, const cpp_function &fset, const Extra& ...extra) {
+    class_ &def_property(const char *name,
+                         const cpp_function &fget,
+                         const cpp_function &fset,
+                         const Extra &...extra) {
         return def_property_static(name, fget, fset, is_method(*this), extra...);
     }
 
     /// Uses return_value_policy::reference by default
     template <typename Getter, typename... Extra>
-    class_ &def_property_static(const char *name, const Getter &fget, const cpp_function &fset, const Extra& ...extra) {
-        return def_property_static(name, cpp_function(fget), fset, return_value_policy::reference, extra...);
+    class_ &def_property_static(const char *name,
+                                const Getter &fget,
+                                const cpp_function &fset,
+                                const Extra &...extra) {
+        return def_property_static(
+            name, cpp_function(fget), fset, return_value_policy::reference, extra...);
     }
 
     /// Uses cpp_function's return_value_policy by default
     template <typename... Extra>
-    class_ &def_property_static(const char *name, const cpp_function &fget, const cpp_function &fset, const Extra& ...extra) {
-        static_assert( 0 == detail::constexpr_sum(std::is_base_of<arg, Extra>::value...),
+    class_ &def_property_static(const char *name,
+                                const cpp_function &fget,
+                                const cpp_function &fset,
+                                const Extra &...extra) {
+        static_assert(0 == detail::constexpr_sum(std::is_base_of<arg, Extra>::value...),
                       "Argument annotations are not allowed for properties");
         auto rec_fget = get_function_record(fget), rec_fset = get_function_record(fset);
         auto *rec_active = rec_fget;
         if (rec_fget) {
-           char *doc_prev = rec_fget->doc; /* 'extra' field may include a property-specific documentation string */
-           detail::process_attributes<Extra...>::init(extra..., rec_fget);
-           if (rec_fget->doc && rec_fget->doc != doc_prev) {
-              std::free(doc_prev);
-              rec_fget->doc = PYBIND11_COMPAT_STRDUP(rec_fget->doc);
-           }
+            char *doc_prev = rec_fget->doc; /* 'extra' field may include a property-specific
+                                               documentation string */
+            detail::process_attributes<Extra...>::init(extra..., rec_fget);
+            if (rec_fget->doc && rec_fget->doc != doc_prev) {
+                std::free(doc_prev);
+                rec_fget->doc = PYBIND11_COMPAT_STRDUP(rec_fget->doc);
+            }
         }
         if (rec_fset) {
             char *doc_prev = rec_fset->doc;
@@ -1557,7 +1788,9 @@ public:
                 std::free(doc_prev);
                 rec_fset->doc = PYBIND11_COMPAT_STRDUP(rec_fset->doc);
             }
-            if (! rec_active) rec_active = rec_fset;
+            if (!rec_active) {
+                rec_active = rec_fset;
+            }
         }
         def_property_static_impl(name, fget, fset, rec_active);
         return *this;
@@ -1566,11 +1799,13 @@ public:
 private:
     /// Initialize holder object, variant 1: object derives from enable_shared_from_this
     template <typename T>
-    static void init_holder(detail::instance *inst, detail::value_and_holder &v_h,
-            const holder_type * /* unused */, const std::enable_shared_from_this<T> * /* dummy */) {
+    static void init_holder(detail::instance *inst,
+                            detail::value_and_holder &v_h,
+                            const holder_type * /* unused */,
+                            const std::enable_shared_from_this<T> * /* dummy */) {
 
         auto sh = std::dynamic_pointer_cast<typename holder_type::element_type>(
-                detail::try_get_shared_from_this(v_h.value_ptr<type>()));
+            detail::try_get_shared_from_this(v_h.value_ptr<type>()));
         if (sh) {
             new (std::addressof(v_h.holder<holder_type>())) holder_type(std::move(sh));
             v_h.set_holder_constructed();
@@ -1583,30 +1818,37 @@ private:
     }
 
     static void init_holder_from_existing(const detail::value_and_holder &v_h,
-            const holder_type *holder_ptr, std::true_type /*is_copy_constructible*/) {
-        new (std::addressof(v_h.holder<holder_type>())) holder_type(*reinterpret_cast<const holder_type *>(holder_ptr));
+                                          const holder_type *holder_ptr,
+                                          std::true_type /*is_copy_constructible*/) {
+        new (std::addressof(v_h.holder<holder_type>()))
+            holder_type(*reinterpret_cast<const holder_type *>(holder_ptr));
     }
 
     static void init_holder_from_existing(const detail::value_and_holder &v_h,
-            const holder_type *holder_ptr, std::false_type /*is_copy_constructible*/) {
-        new (std::addressof(v_h.holder<holder_type>())) holder_type(std::move(*const_cast<holder_type *>(holder_ptr)));
+                                          const holder_type *holder_ptr,
+                                          std::false_type /*is_copy_constructible*/) {
+        new (std::addressof(v_h.holder<holder_type>()))
+            holder_type(std::move(*const_cast<holder_type *>(holder_ptr)));
     }
 
-    /// Initialize holder object, variant 2: try to construct from existing holder object, if possible
-    static void init_holder(detail::instance *inst, detail::value_and_holder &v_h,
-            const holder_type *holder_ptr, const void * /* dummy -- not enable_shared_from_this<T>) */) {
+    /// Initialize holder object, variant 2: try to construct from existing holder object, if
+    /// possible
+    static void init_holder(detail::instance *inst,
+                            detail::value_and_holder &v_h,
+                            const holder_type *holder_ptr,
+                            const void * /* dummy -- not enable_shared_from_this<T>) */) {
         if (holder_ptr) {
             init_holder_from_existing(v_h, holder_ptr, std::is_copy_constructible<holder_type>());
             v_h.set_holder_constructed();
-        } else if (inst->owned || detail::always_construct_holder<holder_type>::value) {
+        } else if (detail::always_construct_holder<holder_type>::value || inst->owned) {
             new (std::addressof(v_h.holder<holder_type>())) holder_type(v_h.value_ptr<type>());
             v_h.set_holder_constructed();
         }
     }
 
     /// Performs instance initialization including constructing a holder and registering the known
-    /// instance.  Should be called as soon as the `type` value_ptr is set for an instance.  Takes an
-    /// optional pointer to an existing holder to use; if not specified and the instance is
+    /// instance.  Should be called as soon as the `type` value_ptr is set for an instance.  Takes
+    /// an optional pointer to an existing holder to use; if not specified and the instance is
     /// `.owned`, a new holder will be constructed to manage the value pointer.
     static void init_instance(detail::instance *inst, const void *holder_ptr) {
         auto v_h = inst->get_value_and_holder(detail::get_type_info(typeid(type)));
@@ -1629,35 +1871,55 @@ private:
         if (v_h.holder_constructed()) {
             v_h.holder<holder_type>().~holder_type();
             v_h.set_holder_constructed(false);
-        }
-        else {
-            detail::call_operator_delete(v_h.value_ptr<type>(),
-                v_h.type->type_size,
-                v_h.type->type_align
-            );
+        } else {
+            detail::call_operator_delete(
+                v_h.value_ptr<type>(), v_h.type->type_size, v_h.type->type_align);
         }
         v_h.value_ptr() = nullptr;
     }
 
     static detail::function_record *get_function_record(handle h) {
         h = detail::get_function(h);
-        return h ? (detail::function_record *) reinterpret_borrow<capsule>(PyCFunction_GET_SELF(h.ptr()))
-                 : nullptr;
+        if (!h) {
+            return nullptr;
+        }
+
+        handle func_self = PyCFunction_GET_SELF(h.ptr());
+        if (!func_self) {
+            throw error_already_set();
+        }
+        if (!isinstance<capsule>(func_self)) {
+            return nullptr;
+        }
+        auto cap = reinterpret_borrow<capsule>(func_self);
+        if (!detail::is_function_record_capsule(cap)) {
+            return nullptr;
+        }
+        return cap.get_pointer<detail::function_record>();
     }
 };
 
 /// Binds an existing constructor taking arguments Args...
-template <typename... Args> detail::initimpl::constructor<Args...> init() { return {}; }
+template <typename... Args>
+detail::initimpl::constructor<Args...> init() {
+    return {};
+}
 /// Like `init<Args...>()`, but the instance is always constructed through the alias class (even
 /// when not inheriting on the Python side).
-template <typename... Args> detail::initimpl::alias_constructor<Args...> init_alias() { return {}; }
+template <typename... Args>
+detail::initimpl::alias_constructor<Args...> init_alias() {
+    return {};
+}
 
 /// Binds a factory function as a constructor
 template <typename Func, typename Ret = detail::initimpl::factory<Func>>
-Ret init(Func &&f) { return {std::forward<Func>(f)}; }
+Ret init(Func &&f) {
+    return {std::forward<Func>(f)};
+}
 
-/// Dual-argument factory function: the first function is called when no alias is needed, the second
-/// when an alias is needed (i.e. due to python-side inheritance).  Arguments must be identical.
+/// Dual-argument factory function: the first function is called when no alias is needed, the
+/// second when an alias is needed (i.e. due to python-side inheritance).  Arguments must be
+/// identical.
 template <typename CFunc, typename AFunc, typename Ret = detail::initimpl::factory<CFunc, AFunc>>
 Ret init(CFunc &&c, AFunc &&a) {
     return {std::forward<CFunc>(c), std::forward<AFunc>(a)};
@@ -1675,14 +1937,15 @@ PYBIND11_NAMESPACE_BEGIN(detail)
 inline str enum_name(handle arg) {
     dict entries = arg.get_type().attr("__entries");
     for (auto kv : entries) {
-        if (handle(kv.second[int_(0)]).equal(arg))
+        if (handle(kv.second[int_(0)]).equal(arg)) {
             return pybind11::str(kv.first);
+        }
     }
     return "???";
 }
 
 struct enum_base {
-    enum_base(const handle &base, const handle &parent) : m_base(base), m_parent(parent) { }
+    enum_base(const handle &base, const handle &parent) : m_base(base), m_parent(parent) {}
 
     PYBIND11_NOINLINE void init(bool is_arithmetic, bool is_convertible) {
         m_base.attr("__entries") = dict();
@@ -1693,7 +1956,8 @@ struct enum_base {
             [](const object &arg) -> str {
                 handle type = type::handle_of(arg);
                 object type_name = type.attr("__name__");
-                return pybind11::str("<{}.{}: {}>").format(type_name, enum_name(arg), int_(arg));
+                return pybind11::str("<{}.{}: {}>")
+                    .format(std::move(type_name), enum_name(arg), int_(arg));
             },
             name("__repr__"),
             is_method(m_base));
@@ -1703,36 +1967,54 @@ struct enum_base {
         m_base.attr("__str__") = cpp_function(
             [](handle arg) -> str {
                 object type_name = type::handle_of(arg).attr("__name__");
-                return pybind11::str("{}.{}").format(type_name, enum_name(arg));
-            }, name("name"), is_method(m_base)
-        );
+                return pybind11::str("{}.{}").format(std::move(type_name), enum_name(arg));
+            },
+            name("name"),
+            is_method(m_base));
 
-        m_base.attr("__doc__") = static_property(cpp_function(
-            [](handle arg) -> std::string {
-                std::string docstring;
-                dict entries = arg.attr("__entries");
-                if (((PyTypeObject *) arg.ptr())->tp_doc)
-                    docstring += std::string(((PyTypeObject *) arg.ptr())->tp_doc) + "\n\n";
-                docstring += "Members:";
-                for (auto kv : entries) {
-                    auto key = std::string(pybind11::str(kv.first));
-                    auto comment = kv.second[int_(1)];
-                    docstring += "\n\n  " + key;
-                    if (!comment.is_none())
-                        docstring += " : " + (std::string) pybind11::str(comment);
-                }
-                return docstring;
-            }, name("__doc__")
-        ), none(), none(), "");
+        if (options::show_enum_members_docstring()) {
+            m_base.attr("__doc__") = static_property(
+                cpp_function(
+                    [](handle arg) -> std::string {
+                        std::string docstring;
+                        dict entries = arg.attr("__entries");
+                        if (((PyTypeObject *) arg.ptr())->tp_doc) {
+                            docstring += std::string(
+                                reinterpret_cast<PyTypeObject *>(arg.ptr())->tp_doc);
+                            docstring += "\n\n";
+                        }
+                        docstring += "Members:";
+                        for (auto kv : entries) {
+                            auto key = std::string(pybind11::str(kv.first));
+                            auto comment = kv.second[int_(1)];
+                            docstring += "\n\n  ";
+                            docstring += key;
+                            if (!comment.is_none()) {
+                                docstring += " : ";
+                                docstring += pybind11::str(comment).cast<std::string>();
+                            }
+                        }
+                        return docstring;
+                    },
+                    name("__doc__")),
+                none(),
+                none(),
+                "");
+        }
 
         m_base.attr("__members__") = static_property(cpp_function(
-            [](handle arg) -> dict {
-                dict entries = arg.attr("__entries"), m;
-                for (auto kv : entries)
-                    m[kv.first] = kv.second[int_(0)];
-                return m;
-            }, name("__members__")), none(), none(), ""
-        );
+                                                         [](handle arg) -> dict {
+                                                             dict entries = arg.attr("__entries"),
+                                                                  m;
+                                                             for (auto kv : entries) {
+                                                                 m[kv.first] = kv.second[int_(0)];
+                                                             }
+                                                             return m;
+                                                         },
+                                                         name("__members__")),
+                                                     none(),
+                                                     none(),
+                                                     "");
 
 #define PYBIND11_ENUM_OP_STRICT(op, expr, strict_behavior)                                        \
     m_base.attr(op) = cpp_function(                                                               \
@@ -1766,42 +2048,42 @@ struct enum_base {
         arg("other"))
 
         if (is_convertible) {
-            PYBIND11_ENUM_OP_CONV_LHS("__eq__", !b.is_none() &&  a.equal(b));
-            PYBIND11_ENUM_OP_CONV_LHS("__ne__",  b.is_none() || !a.equal(b));
+            PYBIND11_ENUM_OP_CONV_LHS("__eq__", !b.is_none() && a.equal(b));
+            PYBIND11_ENUM_OP_CONV_LHS("__ne__", b.is_none() || !a.equal(b));
 
             if (is_arithmetic) {
-                PYBIND11_ENUM_OP_CONV("__lt__",   a <  b);
-                PYBIND11_ENUM_OP_CONV("__gt__",   a >  b);
-                PYBIND11_ENUM_OP_CONV("__le__",   a <= b);
-                PYBIND11_ENUM_OP_CONV("__ge__",   a >= b);
-                PYBIND11_ENUM_OP_CONV("__and__",  a &  b);
-                PYBIND11_ENUM_OP_CONV("__rand__", a &  b);
-                PYBIND11_ENUM_OP_CONV("__or__",   a |  b);
-                PYBIND11_ENUM_OP_CONV("__ror__",  a |  b);
-                PYBIND11_ENUM_OP_CONV("__xor__",  a ^  b);
-                PYBIND11_ENUM_OP_CONV("__rxor__", a ^  b);
+                PYBIND11_ENUM_OP_CONV("__lt__", a < b);
+                PYBIND11_ENUM_OP_CONV("__gt__", a > b);
+                PYBIND11_ENUM_OP_CONV("__le__", a <= b);
+                PYBIND11_ENUM_OP_CONV("__ge__", a >= b);
+                PYBIND11_ENUM_OP_CONV("__and__", a & b);
+                PYBIND11_ENUM_OP_CONV("__rand__", a & b);
+                PYBIND11_ENUM_OP_CONV("__or__", a | b);
+                PYBIND11_ENUM_OP_CONV("__ror__", a | b);
+                PYBIND11_ENUM_OP_CONV("__xor__", a ^ b);
+                PYBIND11_ENUM_OP_CONV("__rxor__", a ^ b);
                 m_base.attr("__invert__")
                     = cpp_function([](const object &arg) { return ~(int_(arg)); },
                                    name("__invert__"),
                                    is_method(m_base));
             }
         } else {
-            PYBIND11_ENUM_OP_STRICT("__eq__",  int_(a).equal(int_(b)), return false);
+            PYBIND11_ENUM_OP_STRICT("__eq__", int_(a).equal(int_(b)), return false);
             PYBIND11_ENUM_OP_STRICT("__ne__", !int_(a).equal(int_(b)), return true);
 
             if (is_arithmetic) {
-                #define PYBIND11_THROW throw type_error("Expected an enumeration of matching type!");
-                PYBIND11_ENUM_OP_STRICT("__lt__", int_(a) <  int_(b), PYBIND11_THROW);
-                PYBIND11_ENUM_OP_STRICT("__gt__", int_(a) >  int_(b), PYBIND11_THROW);
+#define PYBIND11_THROW throw type_error("Expected an enumeration of matching type!");
+                PYBIND11_ENUM_OP_STRICT("__lt__", int_(a) < int_(b), PYBIND11_THROW);
+                PYBIND11_ENUM_OP_STRICT("__gt__", int_(a) > int_(b), PYBIND11_THROW);
                 PYBIND11_ENUM_OP_STRICT("__le__", int_(a) <= int_(b), PYBIND11_THROW);
                 PYBIND11_ENUM_OP_STRICT("__ge__", int_(a) >= int_(b), PYBIND11_THROW);
-                #undef PYBIND11_THROW
+#undef PYBIND11_THROW
             }
         }
 
-        #undef PYBIND11_ENUM_OP_CONV_LHS
-        #undef PYBIND11_ENUM_OP_CONV
-        #undef PYBIND11_ENUM_OP_STRICT
+#undef PYBIND11_ENUM_OP_CONV_LHS
+#undef PYBIND11_ENUM_OP_CONV
+#undef PYBIND11_ENUM_OP_STRICT
 
         m_base.attr("__getstate__") = cpp_function(
             [](const object &arg) { return int_(arg); }, name("__getstate__"), is_method(m_base));
@@ -1810,60 +2092,90 @@ struct enum_base {
             [](const object &arg) { return int_(arg); }, name("__hash__"), is_method(m_base));
     }
 
-    PYBIND11_NOINLINE void value(char const* name_, object value, const char *doc = nullptr) {
+    PYBIND11_NOINLINE void value(char const *name_, object value, const char *doc = nullptr) {
         dict entries = m_base.attr("__entries");
         str name(name_);
         if (entries.contains(name)) {
             std::string type_name = (std::string) str(m_base.attr("__name__"));
-            throw value_error(type_name + ": element \"" + std::string(name_) + "\" already exists!");
+            throw value_error(std::move(type_name) + ": element \"" + std::string(name_)
+                              + "\" already exists!");
         }
 
-        entries[name] = std::make_pair(value, doc);
-        m_base.attr(name) = value;
+        entries[name] = pybind11::make_tuple(value, doc);
+        m_base.attr(std::move(name)) = std::move(value);
     }
 
     PYBIND11_NOINLINE void export_values() {
         dict entries = m_base.attr("__entries");
-        for (auto kv : entries)
+        for (auto kv : entries) {
             m_parent.attr(kv.first) = kv.second[int_(0)];
+        }
     }
 
     handle m_base;
     handle m_parent;
 };
 
-template <bool is_signed, size_t length> struct equivalent_integer {};
-template <> struct equivalent_integer<true,  1> { using type = int8_t;   };
-template <> struct equivalent_integer<false, 1> { using type = uint8_t;  };
-template <> struct equivalent_integer<true,  2> { using type = int16_t;  };
-template <> struct equivalent_integer<false, 2> { using type = uint16_t; };
-template <> struct equivalent_integer<true,  4> { using type = int32_t;  };
-template <> struct equivalent_integer<false, 4> { using type = uint32_t; };
-template <> struct equivalent_integer<true,  8> { using type = int64_t;  };
-template <> struct equivalent_integer<false, 8> { using type = uint64_t; };
+template <bool is_signed, size_t length>
+struct equivalent_integer {};
+template <>
+struct equivalent_integer<true, 1> {
+    using type = int8_t;
+};
+template <>
+struct equivalent_integer<false, 1> {
+    using type = uint8_t;
+};
+template <>
+struct equivalent_integer<true, 2> {
+    using type = int16_t;
+};
+template <>
+struct equivalent_integer<false, 2> {
+    using type = uint16_t;
+};
+template <>
+struct equivalent_integer<true, 4> {
+    using type = int32_t;
+};
+template <>
+struct equivalent_integer<false, 4> {
+    using type = uint32_t;
+};
+template <>
+struct equivalent_integer<true, 8> {
+    using type = int64_t;
+};
+template <>
+struct equivalent_integer<false, 8> {
+    using type = uint64_t;
+};
 
 template <typename IntLike>
-using equivalent_integer_t = typename equivalent_integer<std::is_signed<IntLike>::value, sizeof(IntLike)>::type;
+using equivalent_integer_t =
+    typename equivalent_integer<std::is_signed<IntLike>::value, sizeof(IntLike)>::type;
 
 PYBIND11_NAMESPACE_END(detail)
 
 /// Binds C++ enumerations and enumeration classes to Python
-template <typename Type> class enum_ : public class_<Type> {
+template <typename Type>
+class enum_ : public class_<Type> {
 public:
     using Base = class_<Type>;
-    using Base::def;
     using Base::attr;
+    using Base::def;
     using Base::def_property_readonly;
     using Base::def_property_readonly_static;
     using Underlying = typename std::underlying_type<Type>::type;
     // Scalar is the integer representation of underlying type
-    using Scalar = detail::conditional_t<detail::any_of<
-        detail::is_std_char_type<Underlying>, std::is_same<Underlying, bool>
-    >::value, detail::equivalent_integer_t<Underlying>, Underlying>;
+    using Scalar = detail::conditional_t<detail::any_of<detail::is_std_char_type<Underlying>,
+                                                        std::is_same<Underlying, bool>>::value,
+                                         detail::equivalent_integer_t<Underlying>,
+                                         Underlying>;
 
     template <typename... Extra>
-    enum_(const handle &scope, const char *name, const Extra&... extra)
-      : class_<Type>(scope, name, extra...), m_base(*this, scope) {
+    enum_(const handle &scope, const char *name, const Extra &...extra)
+        : class_<Type>(scope, name, extra...), m_base(*this, scope) {
         constexpr bool is_arithmetic = detail::any_of<std::is_same<arithmetic, Extra>...>::value;
         constexpr bool is_convertible = std::is_convertible<Type, Underlying>::value;
         m_base.init(is_arithmetic, is_convertible);
@@ -1871,29 +2183,26 @@ public:
         def(init([](Scalar i) { return static_cast<Type>(i); }), arg("value"));
         def_property_readonly("value", [](Type value) { return (Scalar) value; });
         def("__int__", [](Type value) { return (Scalar) value; });
-        #if PY_MAJOR_VERSION < 3
-            def("__long__", [](Type value) { return (Scalar) value; });
-        #endif
-        #if PY_MAJOR_VERSION > 3 || (PY_MAJOR_VERSION == 3 && PY_MINOR_VERSION >= 8)
-            def("__index__", [](Type value) { return (Scalar) value; });
-        #endif
-
+        def("__index__", [](Type value) { return (Scalar) value; });
         attr("__setstate__") = cpp_function(
             [](detail::value_and_holder &v_h, Scalar arg) {
-                detail::initimpl::setstate<Base>(v_h, static_cast<Type>(arg),
-                        Py_TYPE(v_h.inst) != v_h.type->type); },
+                detail::initimpl::setstate<Base>(
+                    v_h, static_cast<Type>(arg), Py_TYPE(v_h.inst) != v_h.type->type);
+            },
             detail::is_new_style_constructor(),
-            pybind11::name("__setstate__"), is_method(*this), arg("state"));
+            pybind11::name("__setstate__"),
+            is_method(*this),
+            arg("state"));
     }
 
     /// Export enumeration entries into the parent scope
-    enum_& export_values() {
+    enum_ &export_values() {
         m_base.export_values();
         return *this;
     }
 
     /// Add an enumeration entry
-    enum_& value(char const* name, Type value, const char *doc = nullptr) {
+    enum_ &value(char const *name, Type value, const char *doc = nullptr) {
         m_base.value(name, pybind11::cast(value, return_value_policy::copy), doc);
         return *this;
     }
@@ -1904,26 +2213,28 @@ private:
 
 PYBIND11_NAMESPACE_BEGIN(detail)
 
-
 PYBIND11_NOINLINE void keep_alive_impl(handle nurse, handle patient) {
-    if (!nurse || !patient)
+    if (!nurse || !patient) {
         pybind11_fail("Could not activate keep_alive!");
+    }
 
-    if (patient.is_none() || nurse.is_none())
+    if (patient.is_none() || nurse.is_none()) {
         return; /* Nothing to keep alive or nothing to be kept alive by */
+    }
 
     auto tinfo = all_type_info(Py_TYPE(nurse.ptr()));
     if (!tinfo.empty()) {
         /* It's a pybind-registered type, so we can store the patient in the
          * internal list. */
         add_patient(nurse.ptr(), patient.ptr());
-    }
-    else {
+    } else {
         /* Fall back to clever approach based on weak references taken from
          * Boost.Python. This is not used for pybind-registered types because
          * the objects can be destroyed out-of-order in a GC pass. */
-        cpp_function disable_lifesupport(
-            [patient](handle weakref) { patient.dec_ref(); weakref.dec_ref(); });
+        cpp_function disable_lifesupport([patient](handle weakref) {
+            patient.dec_ref();
+            weakref.dec_ref();
+        });
 
         weakref wr(nurse, disable_lifesupport);
 
@@ -1932,34 +2243,52 @@ PYBIND11_NOINLINE void keep_alive_impl(handle nurse, handle patient) {
     }
 }
 
-PYBIND11_NOINLINE void keep_alive_impl(size_t Nurse, size_t Patient, function_call &call, handle ret) {
+PYBIND11_NOINLINE void
+keep_alive_impl(size_t Nurse, size_t Patient, function_call &call, handle ret) {
     auto get_arg = [&](size_t n) {
-        if (n == 0)
+        if (n == 0) {
             return ret;
-        if (n == 1 && call.init_self)
+        }
+        if (n == 1 && call.init_self) {
             return call.init_self;
-        if (n <= call.args.size())
+        }
+        if (n <= call.args.size()) {
             return call.args[n - 1];
+        }
         return handle();
     };
 
     keep_alive_impl(get_arg(Nurse), get_arg(Patient));
 }
 
-inline std::pair<decltype(internals::registered_types_py)::iterator, bool> all_type_info_get_cache(PyTypeObject *type) {
-    auto res = get_internals().registered_types_py
+inline std::pair<decltype(internals::registered_types_py)::iterator, bool>
+all_type_info_get_cache(PyTypeObject *type) {
+    auto res = get_internals()
+                   .registered_types_py
 #ifdef __cpp_lib_unordered_map_try_emplace
-        .try_emplace(type);
+                   .try_emplace(type);
 #else
-        .emplace(type, std::vector<detail::type_info *>());
+                   .emplace(type, std::vector<detail::type_info *>());
 #endif
     if (res.second) {
         // New cache entry created; set up a weak reference to automatically remove it if the type
         // gets destroyed:
         weakref((PyObject *) type, cpp_function([type](handle wr) {
-            get_internals().registered_types_py.erase(type);
-            wr.dec_ref();
-        })).release();
+                    get_internals().registered_types_py.erase(type);
+
+                    // TODO consolidate the erasure code in pybind11_meta_dealloc() in class.h
+                    auto &cache = get_internals().inactive_override_cache;
+                    for (auto it = cache.begin(), last = cache.end(); it != last;) {
+                        if (it->first == reinterpret_cast<PyObject *>(type)) {
+                            it = cache.erase(it);
+                        } else {
+                            ++it;
+                        }
+                    }
+
+                    wr.dec_ref();
+                }))
+            .release();
     }
 
     return res;
@@ -1968,7 +2297,12 @@ inline std::pair<decltype(internals::registered_types_py)::iterator, bool> all_t
 /* There are a large number of apparently unused template arguments because
  * each combination requires a separate py::class_ registration.
  */
-template <typename Access, return_value_policy Policy, typename Iterator, typename Sentinel, typename ValueType, typename... Extra>
+template <typename Access,
+          return_value_policy Policy,
+          typename Iterator,
+          typename Sentinel,
+          typename ValueType,
+          typename... Extra>
 struct iterator_state {
     Iterator it;
     Sentinel end;
@@ -1983,12 +2317,10 @@ template <typename Iterator, typename SFINAE = decltype(*std::declval<Iterator &
 struct iterator_access {
     using result_type = decltype(*std::declval<Iterator &>());
     // NOLINTNEXTLINE(readability-const-return-type) // PR #3263
-    result_type operator()(Iterator &it) const {
-        return *it;
-    }
+    result_type operator()(Iterator &it) const { return *it; }
 };
 
-template <typename Iterator, typename SFINAE = decltype((*std::declval<Iterator &>()).first) >
+template <typename Iterator, typename SFINAE = decltype((*std::declval<Iterator &>()).first)>
 class iterator_key_access {
 private:
     using pair_type = decltype(*std::declval<Iterator &>());
@@ -2003,14 +2335,11 @@ public:
      * (it's not used in the first branch because going via decltype and back
      * through declval does not perfectly preserve references).
      */
-    using result_type = conditional_t<
-        std::is_reference<decltype(*std::declval<Iterator &>())>::value,
-        decltype(((*std::declval<Iterator &>()).first)),
-        decltype(std::declval<pair_type>().first)
-    >;
-    result_type operator()(Iterator &it) const {
-        return (*it).first;
-    }
+    using result_type
+        = conditional_t<std::is_reference<decltype(*std::declval<Iterator &>())>::value,
+                        decltype(((*std::declval<Iterator &>()).first)),
+                        decltype(std::declval<pair_type>().first)>;
+    result_type operator()(Iterator &it) const { return (*it).first; }
 };
 
 template <typename Iterator, typename SFINAE = decltype((*std::declval<Iterator &>()).second)>
@@ -2019,14 +2348,11 @@ private:
     using pair_type = decltype(*std::declval<Iterator &>());
 
 public:
-    using result_type = conditional_t<
-        std::is_reference<decltype(*std::declval<Iterator &>())>::value,
-        decltype(((*std::declval<Iterator &>()).second)),
-        decltype(std::declval<pair_type>().second)
-    >;
-    result_type operator()(Iterator &it) const {
-        return (*it).second;
-    }
+    using result_type
+        = conditional_t<std::is_reference<decltype(*std::declval<Iterator &>())>::value,
+                        decltype(((*std::declval<Iterator &>()).second)),
+                        decltype(std::declval<pair_type>().second)>;
+    result_type operator()(Iterator &it) const { return (*it).second; }
 };
 
 template <typename Access,
@@ -2035,25 +2361,30 @@ template <typename Access,
           typename Sentinel,
           typename ValueType,
           typename... Extra>
-iterator make_iterator_impl(Iterator first, Sentinel last, Extra &&... extra) {
+iterator make_iterator_impl(Iterator first, Sentinel last, Extra &&...extra) {
     using state = detail::iterator_state<Access, Policy, Iterator, Sentinel, ValueType, Extra...>;
     // TODO: state captures only the types of Extra, not the values
 
     if (!detail::get_type_info(typeid(state), false)) {
         class_<state>(handle(), "iterator", pybind11::module_local())
-            .def("__iter__", [](state &s) -> state& { return s; })
-            .def("__next__", [](state &s) -> ValueType {
-                if (!s.first_or_done)
-                    ++s.it;
-                else
-                    s.first_or_done = false;
-                if (s.it == s.end) {
-                    s.first_or_done = true;
-                    throw stop_iteration();
-                }
-                return Access()(s.it);
-            // NOLINTNEXTLINE(readability-const-return-type) // PR #3263
-            }, std::forward<Extra>(extra)..., Policy);
+            .def("__iter__", [](state &s) -> state & { return s; })
+            .def(
+                "__next__",
+                [](state &s) -> ValueType {
+                    if (!s.first_or_done) {
+                        ++s.it;
+                    } else {
+                        s.first_or_done = false;
+                    }
+                    if (s.it == s.end) {
+                        s.first_or_done = true;
+                        throw stop_iteration();
+                    }
+                    return Access()(s.it);
+                    // NOLINTNEXTLINE(readability-const-return-type) // PR #3263
+                },
+                std::forward<Extra>(extra)...,
+                Policy);
     }
 
     return cast(state{first, last, true});
@@ -2067,14 +2398,13 @@ template <return_value_policy Policy = return_value_policy::reference_internal,
           typename Sentinel,
           typename ValueType = typename detail::iterator_access<Iterator>::result_type,
           typename... Extra>
-iterator make_iterator(Iterator first, Sentinel last, Extra &&... extra) {
-    return detail::make_iterator_impl<
-        detail::iterator_access<Iterator>,
-        Policy,
-        Iterator,
-        Sentinel,
-        ValueType,
-        Extra...>(first, last, std::forward<Extra>(extra)...);
+iterator make_iterator(Iterator first, Sentinel last, Extra &&...extra) {
+    return detail::make_iterator_impl<detail::iterator_access<Iterator>,
+                                      Policy,
+                                      Iterator,
+                                      Sentinel,
+                                      ValueType,
+                                      Extra...>(first, last, std::forward<Extra>(extra)...);
 }
 
 /// Makes a python iterator over the keys (`.first`) of a iterator over pairs from a
@@ -2085,13 +2415,12 @@ template <return_value_policy Policy = return_value_policy::reference_internal,
           typename KeyType = typename detail::iterator_key_access<Iterator>::result_type,
           typename... Extra>
 iterator make_key_iterator(Iterator first, Sentinel last, Extra &&...extra) {
-    return detail::make_iterator_impl<
-        detail::iterator_key_access<Iterator>,
-        Policy,
-        Iterator,
-        Sentinel,
-        KeyType,
-        Extra...>(first, last, std::forward<Extra>(extra)...);
+    return detail::make_iterator_impl<detail::iterator_key_access<Iterator>,
+                                      Policy,
+                                      Iterator,
+                                      Sentinel,
+                                      KeyType,
+                                      Extra...>(first, last, std::forward<Extra>(extra)...);
 }
 
 /// Makes a python iterator over the values (`.second`) of a iterator over pairs from a
@@ -2102,36 +2431,46 @@ template <return_value_policy Policy = return_value_policy::reference_internal,
           typename ValueType = typename detail::iterator_value_access<Iterator>::result_type,
           typename... Extra>
 iterator make_value_iterator(Iterator first, Sentinel last, Extra &&...extra) {
-    return detail::make_iterator_impl<
-        detail::iterator_value_access<Iterator>,
-        Policy, Iterator,
-        Sentinel,
-        ValueType,
-        Extra...>(first, last, std::forward<Extra>(extra)...);
+    return detail::make_iterator_impl<detail::iterator_value_access<Iterator>,
+                                      Policy,
+                                      Iterator,
+                                      Sentinel,
+                                      ValueType,
+                                      Extra...>(first, last, std::forward<Extra>(extra)...);
 }
 
 /// Makes an iterator over values of an stl container or other container supporting
 /// `std::begin()`/`std::end()`
 template <return_value_policy Policy = return_value_policy::reference_internal,
-          typename Type, typename... Extra> iterator make_iterator(Type &value, Extra&&... extra) {
-    return make_iterator<Policy>(std::begin(value), std::end(value), extra...);
+          typename Type,
+          typename... Extra>
+iterator make_iterator(Type &value, Extra &&...extra) {
+    return make_iterator<Policy>(
+        std::begin(value), std::end(value), std::forward<Extra>(extra)...);
 }
 
 /// Makes an iterator over the keys (`.first`) of a stl map-like container supporting
 /// `std::begin()`/`std::end()`
 template <return_value_policy Policy = return_value_policy::reference_internal,
-          typename Type, typename... Extra> iterator make_key_iterator(Type &value, Extra&&... extra) {
-    return make_key_iterator<Policy>(std::begin(value), std::end(value), extra...);
+          typename Type,
+          typename... Extra>
+iterator make_key_iterator(Type &value, Extra &&...extra) {
+    return make_key_iterator<Policy>(
+        std::begin(value), std::end(value), std::forward<Extra>(extra)...);
 }
 
 /// Makes an iterator over the values (`.second`) of a stl map-like container supporting
 /// `std::begin()`/`std::end()`
 template <return_value_policy Policy = return_value_policy::reference_internal,
-          typename Type, typename... Extra> iterator make_value_iterator(Type &value, Extra&&... extra) {
-    return make_value_iterator<Policy>(std::begin(value), std::end(value), extra...);
+          typename Type,
+          typename... Extra>
+iterator make_value_iterator(Type &value, Extra &&...extra) {
+    return make_value_iterator<Policy>(
+        std::begin(value), std::end(value), std::forward<Extra>(extra)...);
 }
 
-template <typename InputType, typename OutputType> void implicitly_convertible() {
+template <typename InputType, typename OutputType>
+void implicitly_convertible() {
     struct set_flag {
         bool &flag;
         explicit set_flag(bool &flag_) : flag(flag_) { flag_ = true; }
@@ -2139,38 +2478,40 @@ template <typename InputType, typename OutputType> void implicitly_convertible()
     };
     auto implicit_caster = [](PyObject *obj, PyTypeObject *type) -> PyObject * {
         static bool currently_used = false;
-        if (currently_used) // implicit conversions are non-reentrant
+        if (currently_used) { // implicit conversions are non-reentrant
             return nullptr;
+        }
         set_flag flag_helper(currently_used);
-        if (!detail::make_caster<InputType>().load(obj, false))
+        if (!detail::make_caster<InputType>().load(obj, false)) {
             return nullptr;
+        }
         tuple args(1);
         args[0] = obj;
         PyObject *result = PyObject_Call((PyObject *) type, args.ptr(), nullptr);
-        if (result == nullptr)
+        if (result == nullptr) {
             PyErr_Clear();
+        }
         return result;
     };
 
-    if (auto tinfo = detail::get_type_info(typeid(OutputType)))
-        tinfo->implicit_conversions.push_back(implicit_caster);
-    else
+    if (auto *tinfo = detail::get_type_info(typeid(OutputType))) {
+        tinfo->implicit_conversions.emplace_back(std::move(implicit_caster));
+    } else {
         pybind11_fail("implicitly_convertible: Unable to find type " + type_id<OutputType>());
+    }
 }
 
-
 inline void register_exception_translator(ExceptionTranslator &&translator) {
     detail::get_internals().registered_exception_translators.push_front(
         std::forward<ExceptionTranslator>(translator));
 }
 
-
 /**
-  * Add a new module-local exception translator. Locally registered functions
-  * will be tried before any globally registered exception translators, which
-  * will only be invoked if the module-local handlers do not deal with
-  * the exception.
-  */
+ * Add a new module-local exception translator. Locally registered functions
+ * will be tried before any globally registered exception translators, which
+ * will only be invoked if the module-local handlers do not deal with
+ * the exception.
+ */
 inline void register_local_exception_translator(ExceptionTranslator &&translator) {
     detail::get_local_internals().registered_exception_translators.push_front(
         std::forward<ExceptionTranslator>(translator));
@@ -2188,19 +2529,19 @@ class exception : public object {
 public:
     exception() = default;
     exception(handle scope, const char *name, handle base = PyExc_Exception) {
-        std::string full_name = scope.attr("__name__").cast<std::string>() +
-                                std::string(".") + name;
-        m_ptr = PyErr_NewException(const_cast<char *>(full_name.c_str()), base.ptr(), NULL);
-        if (hasattr(scope, "__dict__") && scope.attr("__dict__").contains(name))
+        std::string full_name
+            = scope.attr("__name__").cast<std::string>() + std::string(".") + name;
+        m_ptr = PyErr_NewException(const_cast<char *>(full_name.c_str()), base.ptr(), nullptr);
+        if (hasattr(scope, "__dict__") && scope.attr("__dict__").contains(name)) {
             pybind11_fail("Error during initialization: multiple incompatible "
-                          "definitions with name \"" + std::string(name) + "\"");
+                          "definitions with name \""
+                          + std::string(name) + "\"");
+        }
         scope.attr(name) = *this;
     }
 
     // Sets the current python exception to this exception object with the given message
-    void operator()(const char *message) {
-        PyErr_SetString(m_ptr, message);
-    }
+    void operator()(const char *message) { PyErr_SetString(m_ptr, message); }
 };
 
 PYBIND11_NAMESPACE_BEGIN(detail)
@@ -2208,22 +2549,27 @@ PYBIND11_NAMESPACE_BEGIN(detail)
 // register_exception approach below.  (It would be simpler to have the static local variable
 // directly in register_exception, but that makes clang <3.5 segfault - issue #1349).
 template <typename CppException>
-exception<CppException> &get_exception_object() { static exception<CppException> ex; return ex; }
+exception<CppException> &get_exception_object() {
+    static exception<CppException> ex;
+    return ex;
+}
 
 // Helper function for register_exception and register_local_exception
 template <typename CppException>
-exception<CppException> &register_exception_impl(handle scope,
-                                                const char *name,
-                                                handle base,
-                                                bool isLocal) {
+exception<CppException> &
+register_exception_impl(handle scope, const char *name, handle base, bool isLocal) {
     auto &ex = detail::get_exception_object<CppException>();
-    if (!ex) ex = exception<CppException>(scope, name, base);
+    if (!ex) {
+        ex = exception<CppException>(scope, name, base);
+    }
 
-    auto register_func = isLocal ? &register_local_exception_translator
-                                 : &register_exception_translator;
+    auto register_func
+        = isLocal ? &register_local_exception_translator : &register_exception_translator;
 
     register_func([](std::exception_ptr p) {
-        if (!p) return;
+        if (!p) {
+            return;
+        }
         try {
             std::rethrow_exception(p);
         } catch (const CppException &e) {
@@ -2242,9 +2588,8 @@ PYBIND11_NAMESPACE_END(detail)
  * exception object and translator directly.
  */
 template <typename CppException>
-exception<CppException> &register_exception(handle scope,
-                                            const char *name,
-                                            handle base = PyExc_Exception) {
+exception<CppException> &
+register_exception(handle scope, const char *name, handle base = PyExc_Exception) {
     return detail::register_exception_impl<CppException>(scope, name, base, false /* isLocal */);
 }
 
@@ -2257,9 +2602,8 @@ exception<CppException> &register_exception(handle scope,
  * exception object and translator directly.
  */
 template <typename CppException>
-exception<CppException> &register_local_exception(handle scope,
-                                                  const char *name,
-                                                  handle base = PyExc_Exception) {
+exception<CppException> &
+register_local_exception(handle scope, const char *name, handle base = PyExc_Exception) {
     return detail::register_exception_impl<CppException>(scope, name, base, true /* isLocal */);
 }
 
@@ -2269,8 +2613,8 @@ PYBIND11_NOINLINE void print(const tuple &args, const dict &kwargs) {
     for (size_t i = 0; i < args.size(); ++i) {
         strings[i] = str(args[i]);
     }
-    auto sep = kwargs.contains("sep") ? kwargs["sep"] : cast(" ");
-    auto line = sep.attr("join")(strings);
+    auto sep = kwargs.contains("sep") ? kwargs["sep"] : str(" ");
+    auto line = sep.attr("join")(std::move(strings));
 
     object file;
     if (kwargs.contains("file")) {
@@ -2288,11 +2632,12 @@ PYBIND11_NOINLINE void print(const tuple &args, const dict &kwargs) {
     }
 
     auto write = file.attr("write");
-    write(line);
-    write(kwargs.contains("end") ? kwargs["end"] : cast("\n"));
+    write(std::move(line));
+    write(kwargs.contains("end") ? kwargs["end"] : str("\n"));
 
-    if (kwargs.contains("flush") && kwargs["flush"].cast<bool>())
+    if (kwargs.contains("flush") && kwargs["flush"].cast<bool>()) {
         file.attr("flush")();
+    }
 }
 PYBIND11_NAMESPACE_END(detail)
 
@@ -2302,51 +2647,58 @@ void print(Args &&...args) {
     detail::print(c.args(), c.kwargs());
 }
 
-error_already_set::~error_already_set() {
-    if (m_type) {
-        gil_scoped_acquire gil;
-        error_scope scope;
-        m_type.release().dec_ref();
-        m_value.release().dec_ref();
-        m_trace.release().dec_ref();
-    }
+inline void
+error_already_set::m_fetched_error_deleter(detail::error_fetch_and_normalize *raw_ptr) {
+    gil_scoped_acquire gil;
+    error_scope scope;
+    delete raw_ptr;
+}
+
+inline const char *error_already_set::what() const noexcept {
+    gil_scoped_acquire gil;
+    error_scope scope;
+    return m_fetched_error->error_string().c_str();
 }
 
 PYBIND11_NAMESPACE_BEGIN(detail)
-inline function get_type_override(const void *this_ptr, const type_info *this_type, const char *name)  {
+
+inline function
+get_type_override(const void *this_ptr, const type_info *this_type, const char *name) {
     handle self = get_object_handle(this_ptr, this_type);
-    if (!self)
+    if (!self) {
         return function();
+    }
     handle type = type::handle_of(self);
     auto key = std::make_pair(type.ptr(), name);
 
     /* Cache functions that aren't overridden in Python to avoid
        many costly Python dictionary lookups below */
     auto &cache = get_internals().inactive_override_cache;
-    if (cache.find(key) != cache.end())
+    if (cache.find(key) != cache.end()) {
         return function();
+    }
 
     function override = getattr(self, name, function());
     if (override.is_cpp_function()) {
-        cache.insert(key);
+        cache.insert(std::move(key));
         return function();
     }
 
     /* Don't call dispatch code if invoked from overridden function.
        Unfortunately this doesn't work on PyPy. */
 #if !defined(PYPY_VERSION)
-
-#if PY_VERSION_HEX >= 0x03090000
+#    if PY_VERSION_HEX >= 0x03090000
     PyFrameObject *frame = PyThreadState_GetFrame(PyThreadState_Get());
     if (frame != nullptr) {
         PyCodeObject *f_code = PyFrame_GetCode(frame);
         // f_code is guaranteed to not be NULL
         if ((std::string) str(f_code->co_name) == name && f_code->co_argcount > 0) {
-            PyObject* locals = PyEval_GetLocals();
+            PyObject *locals = PyEval_GetLocals();
             if (locals != nullptr) {
-                PyObject *self_caller = dict_getitem(
-                    locals, PyTuple_GET_ITEM(f_code->co_varnames, 0)
-                );
+                PyObject *co_varnames = PyObject_GetAttrString((PyObject *) f_code, "co_varnames");
+                PyObject *self_arg = PyTuple_GET_ITEM(co_varnames, 0);
+                Py_DECREF(co_varnames);
+                PyObject *self_caller = dict_getitem(locals, self_arg);
                 if (self_caller == self.ptr()) {
                     Py_DECREF(f_code);
                     Py_DECREF(frame);
@@ -2357,39 +2709,44 @@ inline function get_type_override(const void *this_ptr, const type_info *this_ty
         Py_DECREF(f_code);
         Py_DECREF(frame);
     }
-#else
+#    else
     PyFrameObject *frame = PyThreadState_Get()->frame;
     if (frame != nullptr && (std::string) str(frame->f_code->co_name) == name
         && frame->f_code->co_argcount > 0) {
         PyFrame_FastToLocals(frame);
-        PyObject *self_caller = dict_getitem(
-            frame->f_locals, PyTuple_GET_ITEM(frame->f_code->co_varnames, 0));
-        if (self_caller == self.ptr())
+        PyObject *self_caller
+            = dict_getitem(frame->f_locals, PyTuple_GET_ITEM(frame->f_code->co_varnames, 0));
+        if (self_caller == self.ptr()) {
             return function();
+        }
     }
-#endif
+#    endif
 
 #else
     /* PyPy currently doesn't provide a detailed cpyext emulation of
        frame objects, so we have to emulate this using Python. This
        is going to be slow..*/
-    dict d; d["self"] = self; d["name"] = pybind11::str(name);
-    PyObject *result = PyRun_String(
-        "import inspect\n"
-        "frame = inspect.currentframe()\n"
-        "if frame is not None:\n"
-        "    frame = frame.f_back\n"
-        "    if frame is not None and str(frame.f_code.co_name) == name and "
-        "frame.f_code.co_argcount > 0:\n"
-        "        self_caller = frame.f_locals[frame.f_code.co_varnames[0]]\n"
-        "        if self_caller == self:\n"
-        "            self = None\n",
-        Py_file_input, d.ptr(), d.ptr());
+    dict d;
+    d["self"] = self;
+    d["name"] = pybind11::str(name);
+    PyObject *result
+        = PyRun_String("import inspect\n"
+                       "frame = inspect.currentframe()\n"
+                       "if frame is not None:\n"
+                       "    frame = frame.f_back\n"
+                       "    if frame is not None and str(frame.f_code.co_name) == name and "
+                       "frame.f_code.co_argcount > 0:\n"
+                       "        self_caller = frame.f_locals[frame.f_code.co_varnames[0]]\n"
+                       "        if self_caller == self:\n"
+                       "            self = None\n",
+                       Py_file_input,
+                       d.ptr(),
+                       d.ptr());
     if (result == nullptr)
         throw error_already_set();
+    Py_DECREF(result);
     if (d["self"].is_none())
         return function();
-    Py_DECREF(result);
 #endif
 
     return override;
@@ -2397,15 +2754,17 @@ inline function get_type_override(const void *this_ptr, const type_info *this_ty
 PYBIND11_NAMESPACE_END(detail)
 
 /** \rst
-  Try to retrieve a python method by the provided name from the instance pointed to by the this_ptr.
+  Try to retrieve a python method by the provided name from the instance pointed to by the
+  this_ptr.
 
-  :this_ptr: The pointer to the object the overridden method should be retrieved for. This should be
-             the first non-trampoline class encountered in the inheritance chain.
+  :this_ptr: The pointer to the object the overridden method should be retrieved for. This should
+             be the first non-trampoline class encountered in the inheritance chain.
   :name: The name of the overridden Python method to retrieve.
   :return: The Python method by this name from the object or an empty function wrapper.
  \endrst */
-template <class T> function get_override(const T *this_ptr, const char *name) {
-    auto tinfo = detail::get_type_info(typeid(T));
+template <class T>
+function get_override(const T *this_ptr, const char *name) {
+    auto *tinfo = detail::get_type_info(typeid(T));
     return tinfo ? detail::get_type_override(this_ptr, tinfo, name) : function();
 }
 
@@ -2425,9 +2784,10 @@ template <class T> function get_override(const T *this_ptr, const char *name) {
     } while (false)
 
 /** \rst
-    Macro to populate the virtual method in the trampoline class. This macro tries to look up a method named 'fn'
-    from the Python side, deals with the :ref:`gil` and necessary argument conversions to call this method and return
-    the appropriate type. See :ref:`overriding_virtuals` for more information. This macro should be used when the method
+    Macro to populate the virtual method in the trampoline class. This macro tries to look up a
+    method named 'fn' from the Python side, deals with the :ref:`gil` and necessary argument
+    conversions to call this method and return the appropriate type.
+    See :ref:`overriding_virtuals` for more information. This macro should be used when the method
     name in C is not the same as the method name in Python. For example with `__str__`.
 
     .. code-block:: cpp
@@ -2441,26 +2801,28 @@ template <class T> function get_override(const T *this_ptr, const char *name) {
         );
       }
 \endrst */
-#define PYBIND11_OVERRIDE_NAME(ret_type, cname, name, fn, ...) \
-    do { \
+#define PYBIND11_OVERRIDE_NAME(ret_type, cname, name, fn, ...)                                    \
+    do {                                                                                          \
         PYBIND11_OVERRIDE_IMPL(PYBIND11_TYPE(ret_type), PYBIND11_TYPE(cname), name, __VA_ARGS__); \
-        return cname::fn(__VA_ARGS__); \
+        return cname::fn(__VA_ARGS__);                                                            \
     } while (false)
 
 /** \rst
-    Macro for pure virtual functions, this function is identical to :c:macro:`PYBIND11_OVERRIDE_NAME`, except that it
-    throws if no override can be found.
+    Macro for pure virtual functions, this function is identical to
+    :c:macro:`PYBIND11_OVERRIDE_NAME`, except that it throws if no override can be found.
 \endrst */
-#define PYBIND11_OVERRIDE_PURE_NAME(ret_type, cname, name, fn, ...) \
-    do { \
+#define PYBIND11_OVERRIDE_PURE_NAME(ret_type, cname, name, fn, ...)                               \
+    do {                                                                                          \
         PYBIND11_OVERRIDE_IMPL(PYBIND11_TYPE(ret_type), PYBIND11_TYPE(cname), name, __VA_ARGS__); \
-        pybind11::pybind11_fail("Tried to call pure virtual function \"" PYBIND11_STRINGIFY(cname) "::" name "\""); \
+        pybind11::pybind11_fail(                                                                  \
+            "Tried to call pure virtual function \"" PYBIND11_STRINGIFY(cname) "::" name "\"");   \
     } while (false)
 
 /** \rst
-    Macro to populate the virtual method in the trampoline class. This macro tries to look up the method
-    from the Python side, deals with the :ref:`gil` and necessary argument conversions to call this method and return
-    the appropriate type. This macro should be used if the method name in C and in Python are identical.
+    Macro to populate the virtual method in the trampoline class. This macro tries to look up the
+    method from the Python side, deals with the :ref:`gil` and necessary argument conversions to
+    call this method and return the appropriate type. This macro should be used if the method name
+    in C and in Python are identical.
     See :ref:`overriding_virtuals` for more information.
 
     .. code-block:: cpp
@@ -2481,21 +2843,22 @@ template <class T> function get_override(const T *this_ptr, const char *name) {
           }
       };
 \endrst */
-#define PYBIND11_OVERRIDE(ret_type, cname, fn, ...) \
+#define PYBIND11_OVERRIDE(ret_type, cname, fn, ...)                                               \
     PYBIND11_OVERRIDE_NAME(PYBIND11_TYPE(ret_type), PYBIND11_TYPE(cname), #fn, fn, __VA_ARGS__)
 
 /** \rst
-    Macro for pure virtual functions, this function is identical to :c:macro:`PYBIND11_OVERRIDE`, except that it throws
-    if no override can be found.
+    Macro for pure virtual functions, this function is identical to :c:macro:`PYBIND11_OVERRIDE`,
+    except that it throws if no override can be found.
 \endrst */
-#define PYBIND11_OVERRIDE_PURE(ret_type, cname, fn, ...) \
-    PYBIND11_OVERRIDE_PURE_NAME(PYBIND11_TYPE(ret_type), PYBIND11_TYPE(cname), #fn, fn, __VA_ARGS__)
-
+#define PYBIND11_OVERRIDE_PURE(ret_type, cname, fn, ...)                                          \
+    PYBIND11_OVERRIDE_PURE_NAME(                                                                  \
+        PYBIND11_TYPE(ret_type), PYBIND11_TYPE(cname), #fn, fn, __VA_ARGS__)
 
 // Deprecated versions
 
 PYBIND11_DEPRECATED("get_type_overload has been deprecated")
-inline function get_type_overload(const void *this_ptr, const detail::type_info *this_type, const char *name) {
+inline function
+get_type_overload(const void *this_ptr, const detail::type_info *this_type, const char *name) {
     return detail::get_type_override(this_ptr, this_type, name);
 }
 
@@ -2504,19 +2867,16 @@ inline function get_overload(const T *this_ptr, const char *name) {
     return get_override(this_ptr, name);
 }
 
-#define PYBIND11_OVERLOAD_INT(ret_type, cname, name, ...) \
+#define PYBIND11_OVERLOAD_INT(ret_type, cname, name, ...)                                         \
     PYBIND11_OVERRIDE_IMPL(PYBIND11_TYPE(ret_type), PYBIND11_TYPE(cname), name, __VA_ARGS__)
-#define PYBIND11_OVERLOAD_NAME(ret_type, cname, name, fn, ...) \
+#define PYBIND11_OVERLOAD_NAME(ret_type, cname, name, fn, ...)                                    \
     PYBIND11_OVERRIDE_NAME(PYBIND11_TYPE(ret_type), PYBIND11_TYPE(cname), name, fn, __VA_ARGS__)
-#define PYBIND11_OVERLOAD_PURE_NAME(ret_type, cname, name, fn, ...) \
-    PYBIND11_OVERRIDE_PURE_NAME(PYBIND11_TYPE(ret_type), PYBIND11_TYPE(cname), name, fn, __VA_ARGS__);
-#define PYBIND11_OVERLOAD(ret_type, cname, fn, ...) \
+#define PYBIND11_OVERLOAD_PURE_NAME(ret_type, cname, name, fn, ...)                               \
+    PYBIND11_OVERRIDE_PURE_NAME(                                                                  \
+        PYBIND11_TYPE(ret_type), PYBIND11_TYPE(cname), name, fn, __VA_ARGS__);
+#define PYBIND11_OVERLOAD(ret_type, cname, fn, ...)                                               \
     PYBIND11_OVERRIDE(PYBIND11_TYPE(ret_type), PYBIND11_TYPE(cname), fn, __VA_ARGS__)
-#define PYBIND11_OVERLOAD_PURE(ret_type, cname, fn, ...) \
+#define PYBIND11_OVERLOAD_PURE(ret_type, cname, fn, ...)                                          \
     PYBIND11_OVERRIDE_PURE(PYBIND11_TYPE(ret_type), PYBIND11_TYPE(cname), fn, __VA_ARGS__);
 
 PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE)
-
-#if defined(__GNUC__) && __GNUC__ == 7
-#    pragma GCC diagnostic pop // -Wnoexcept-type
-#endif
diff --git a/ext/pybind11/include/pybind11/pytypes.h b/ext/pybind11/include/pybind11/pytypes.h
index f54d5fad61..f11ed5da78 100644
--- a/ext/pybind11/include/pybind11/pytypes.h
+++ b/ext/pybind11/include/pybind11/pytypes.h
@@ -11,34 +11,53 @@
 
 #include "detail/common.h"
 #include "buffer_info.h"
-#include <utility>
+
+#include <assert.h>
+#include <cstddef>
+#include <exception>
+#include <frameobject.h>
+#include <iterator>
+#include <memory>
+#include <string>
 #include <type_traits>
+#include <typeinfo>
+#include <utility>
 
 #if defined(PYBIND11_HAS_OPTIONAL)
-#  include <optional>
+#    include <optional>
+#endif
+
+#ifdef PYBIND11_HAS_STRING_VIEW
+#    include <string_view>
 #endif
 
 PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
 
+PYBIND11_WARNING_DISABLE_MSVC(4127)
+
 /* A few forward declarations */
-class handle; class object;
-class str; class iterator;
+class handle;
+class object;
+class str;
+class iterator;
 class type;
-struct arg; struct arg_v;
+struct arg;
+struct arg_v;
 
 PYBIND11_NAMESPACE_BEGIN(detail)
 class args_proxy;
 bool isinstance_generic(handle obj, const std::type_info &tp);
 
 // Accessor forward declarations
-template <typename Policy> class accessor;
+template <typename Policy>
+class accessor;
 namespace accessor_policies {
-    struct obj_attr;
-    struct str_attr;
-    struct generic_item;
-    struct sequence_item;
-    struct list_item;
-    struct tuple_item;
+struct obj_attr;
+struct str_attr;
+struct generic_item;
+struct sequence_item;
+struct list_item;
+struct tuple_item;
 } // namespace accessor_policies
 using obj_attr_accessor = accessor<accessor_policies::obj_attr>;
 using str_attr_accessor = accessor<accessor_policies::str_attr>;
@@ -48,8 +67,9 @@ using list_accessor = accessor<accessor_policies::list_item>;
 using tuple_accessor = accessor<accessor_policies::tuple_item>;
 
 /// Tag and check to identify a class which implements the Python object API
-class pyobject_tag { };
-template <typename T> using is_pyobject = std::is_base_of<pyobject_tag, remove_reference_t<T>>;
+class pyobject_tag {};
+template <typename T>
+using is_pyobject = std::is_base_of<pyobject_tag, remove_reference_t<T>>;
 
 /** \rst
     A mixin class which adds common functions to `handle`, `object` and various accessors.
@@ -75,7 +95,9 @@ public:
         or `object` subclass causes a call to ``__setitem__``.
     \endrst */
     item_accessor operator[](handle key) const;
-    /// See above (the only difference is that they key is provided as a string literal)
+    /// See above (the only difference is that the key's reference is stolen)
+    item_accessor operator[](object &&key) const;
+    /// See above (the only difference is that the key is provided as a string literal)
     item_accessor operator[](const char *key) const;
 
     /** \rst
@@ -85,7 +107,9 @@ public:
         or `object` subclass causes a call to ``setattr``.
     \endrst */
     obj_attr_accessor attr(handle key) const;
-    /// See above (the only difference is that they key is provided as a string literal)
+    /// See above (the only difference is that the key's reference is stolen)
+    obj_attr_accessor attr(object &&key) const;
+    /// See above (the only difference is that the key is provided as a string literal)
     str_attr_accessor attr(const char *key) const;
 
     /** \rst
@@ -97,7 +121,8 @@ public:
     args_proxy operator*() const;
 
     /// Check if the given item is contained within this object, i.e. ``item in obj``.
-    template <typename T> bool contains(T &&item) const;
+    template <typename T>
+    bool contains(T &&item) const;
 
     /** \rst
         Assuming the Python object is a function or implements the ``__call__``
@@ -109,44 +134,46 @@ public:
         function will throw a `cast_error` exception. When the Python function
         call fails, a `error_already_set` exception is thrown.
     \endrst */
-    template <return_value_policy policy = return_value_policy::automatic_reference, typename... Args>
+    template <return_value_policy policy = return_value_policy::automatic_reference,
+              typename... Args>
     object operator()(Args &&...args) const;
-    template <return_value_policy policy = return_value_policy::automatic_reference, typename... Args>
+    template <return_value_policy policy = return_value_policy::automatic_reference,
+              typename... Args>
     PYBIND11_DEPRECATED("call(...) was deprecated in favor of operator()(...)")
-        object call(Args&&... args) const;
+    object call(Args &&...args) const;
 
     /// Equivalent to ``obj is other`` in Python.
-    bool is(object_api const& other) const { return derived().ptr() == other.derived().ptr(); }
+    bool is(object_api const &other) const { return derived().ptr() == other.derived().ptr(); }
     /// Equivalent to ``obj is None`` in Python.
     bool is_none() const { return derived().ptr() == Py_None; }
     /// Equivalent to obj == other in Python
-    bool equal(object_api const &other) const      { return rich_compare(other, Py_EQ); }
-    bool not_equal(object_api const &other) const  { return rich_compare(other, Py_NE); }
-    bool operator<(object_api const &other) const  { return rich_compare(other, Py_LT); }
+    bool equal(object_api const &other) const { return rich_compare(other, Py_EQ); }
+    bool not_equal(object_api const &other) const { return rich_compare(other, Py_NE); }
+    bool operator<(object_api const &other) const { return rich_compare(other, Py_LT); }
     bool operator<=(object_api const &other) const { return rich_compare(other, Py_LE); }
-    bool operator>(object_api const &other) const  { return rich_compare(other, Py_GT); }
+    bool operator>(object_api const &other) const { return rich_compare(other, Py_GT); }
     bool operator>=(object_api const &other) const { return rich_compare(other, Py_GE); }
 
     object operator-() const;
     object operator~() const;
     object operator+(object_api const &other) const;
-    object operator+=(object_api const &other) const;
+    object operator+=(object_api const &other);
     object operator-(object_api const &other) const;
-    object operator-=(object_api const &other) const;
+    object operator-=(object_api const &other);
     object operator*(object_api const &other) const;
-    object operator*=(object_api const &other) const;
+    object operator*=(object_api const &other);
     object operator/(object_api const &other) const;
-    object operator/=(object_api const &other) const;
+    object operator/=(object_api const &other);
     object operator|(object_api const &other) const;
-    object operator|=(object_api const &other) const;
+    object operator|=(object_api const &other);
     object operator&(object_api const &other) const;
-    object operator&=(object_api const &other) const;
+    object operator&=(object_api const &other);
     object operator^(object_api const &other) const;
-    object operator^=(object_api const &other) const;
+    object operator^=(object_api const &other);
     object operator<<(object_api const &other) const;
-    object operator<<=(object_api const &other) const;
+    object operator<<=(object_api const &other);
     object operator>>(object_api const &other) const;
-    object operator>>=(object_api const &other) const;
+    object operator>>=(object_api const &other);
 
     PYBIND11_DEPRECATED("Use py::str(obj) instead")
     pybind11::str str() const;
@@ -157,15 +184,25 @@ public:
     /// Return the object's current reference count
     int ref_count() const { return static_cast<int>(Py_REFCNT(derived().ptr())); }
 
-    // TODO PYBIND11_DEPRECATED("Call py::type::handle_of(h) or py::type::of(h) instead of h.get_type()")
+    // TODO PYBIND11_DEPRECATED(
+    //     "Call py::type::handle_of(h) or py::type::of(h) instead of h.get_type()")
     handle get_type() const;
 
 private:
     bool rich_compare(object_api const &other, int value) const;
 };
 
+template <typename T>
+using is_pyobj_ptr_or_nullptr_t = detail::any_of<std::is_same<T, PyObject *>,
+                                                 std::is_same<T, PyObject *const>,
+                                                 std::is_same<T, std::nullptr_t>>;
+
 PYBIND11_NAMESPACE_END(detail)
 
+#if !defined(PYBIND11_HANDLE_REF_DEBUG) && !defined(NDEBUG)
+#    define PYBIND11_HANDLE_REF_DEBUG
+#endif
+
 /** \rst
     Holds a reference to a Python object (no reference counting)
 
@@ -181,9 +218,24 @@ class handle : public detail::object_api<handle> {
 public:
     /// The default constructor creates a handle with a ``nullptr``-valued pointer
     handle() = default;
-    /// Creates a ``handle`` from the given raw Python object pointer
+
+    /// Enable implicit conversion from ``PyObject *`` and ``nullptr``.
+    /// Not using ``handle(PyObject *ptr)`` to avoid implicit conversion from ``0``.
+    template <typename T,
+              detail::enable_if_t<detail::is_pyobj_ptr_or_nullptr_t<T>::value, int> = 0>
     // NOLINTNEXTLINE(google-explicit-constructor)
-    handle(PyObject *ptr) : m_ptr(ptr) { } // Allow implicit conversion from PyObject*
+    handle(T ptr) : m_ptr(ptr) {}
+
+    /// Enable implicit conversion through ``T::operator PyObject *()``.
+    template <
+        typename T,
+        detail::enable_if_t<detail::all_of<detail::none_of<std::is_base_of<handle, T>,
+                                                           detail::is_pyobj_ptr_or_nullptr_t<T>>,
+                                           std::is_convertible<T, PyObject *>>::value,
+                            int>
+        = 0>
+    // NOLINTNEXTLINE(google-explicit-constructor)
+    handle(T &obj) : m_ptr(obj) {}
 
     /// Return the underlying ``PyObject *`` pointer
     PyObject *ptr() const { return m_ptr; }
@@ -194,20 +246,40 @@ public:
         preferable to use the `object` class which derives from `handle` and calls
         this function automatically. Returns a reference to itself.
     \endrst */
-    const handle& inc_ref() const & { Py_XINCREF(m_ptr); return *this; }
+    const handle &inc_ref() const & {
+#ifdef PYBIND11_HANDLE_REF_DEBUG
+        inc_ref_counter(1);
+#endif
+#ifdef PYBIND11_ASSERT_GIL_HELD_INCREF_DECREF
+        if (m_ptr != nullptr && !PyGILState_Check()) {
+            throw_gilstate_error("pybind11::handle::inc_ref()");
+        }
+#endif
+        Py_XINCREF(m_ptr);
+        return *this;
+    }
 
     /** \rst
         Manually decrease the reference count of the Python object. Usually, it is
         preferable to use the `object` class which derives from `handle` and calls
         this function automatically. Returns a reference to itself.
     \endrst */
-    const handle& dec_ref() const & { Py_XDECREF(m_ptr); return *this; }
+    const handle &dec_ref() const & {
+#ifdef PYBIND11_ASSERT_GIL_HELD_INCREF_DECREF
+        if (m_ptr != nullptr && !PyGILState_Check()) {
+            throw_gilstate_error("pybind11::handle::dec_ref()");
+        }
+#endif
+        Py_XDECREF(m_ptr);
+        return *this;
+    }
 
     /** \rst
         Attempt to cast the Python object into the given C++ type. A `cast_error`
         will be throw upon failure.
     \endrst */
-    template <typename T> T cast() const;
+    template <typename T>
+    T cast() const;
     /// Return ``true`` when the `handle` wraps a valid Python object
     explicit operator bool() const { return m_ptr != nullptr; }
     /** \rst
@@ -220,8 +292,41 @@ public:
     bool operator!=(const handle &h) const { return m_ptr != h.m_ptr; }
     PYBIND11_DEPRECATED("Use handle::operator bool() instead")
     bool check() const { return m_ptr != nullptr; }
+
 protected:
     PyObject *m_ptr = nullptr;
+
+private:
+#ifdef PYBIND11_ASSERT_GIL_HELD_INCREF_DECREF
+    void throw_gilstate_error(const std::string &function_name) const {
+        fprintf(
+            stderr,
+            "%s is being called while the GIL is either not held or invalid. Please see "
+            "https://pybind11.readthedocs.io/en/stable/advanced/"
+            "misc.html#common-sources-of-global-interpreter-lock-errors for debugging advice.\n",
+            function_name.c_str());
+        fflush(stderr);
+        if (Py_TYPE(m_ptr)->tp_name != nullptr) {
+            fprintf(stderr,
+                    "The failing %s call was triggered on a %s object.\n",
+                    function_name.c_str(),
+                    Py_TYPE(m_ptr)->tp_name);
+            fflush(stderr);
+        }
+        throw std::runtime_error(function_name + " PyGILState_Check() failure.");
+    }
+#endif
+
+#ifdef PYBIND11_HANDLE_REF_DEBUG
+    static std::size_t inc_ref_counter(std::size_t add) {
+        thread_local std::size_t counter = 0;
+        counter += add;
+        return counter;
+    }
+
+public:
+    static std::size_t inc_ref_counter() { return inc_ref_counter(0); }
+#endif
 };
 
 /** \rst
@@ -238,11 +343,15 @@ class object : public handle {
 public:
     object() = default;
     PYBIND11_DEPRECATED("Use reinterpret_borrow<object>() or reinterpret_steal<object>()")
-    object(handle h, bool is_borrowed) : handle(h) { if (is_borrowed) inc_ref(); }
+    object(handle h, bool is_borrowed) : handle(h) {
+        if (is_borrowed) {
+            inc_ref();
+        }
+    }
     /// Copy constructor; always increases the reference count
     object(const object &o) : handle(o) { inc_ref(); }
     /// Move constructor; steals the object from ``other`` and preserves its reference count
-    object(object &&other) noexcept { m_ptr = other.m_ptr; other.m_ptr = nullptr; }
+    object(object &&other) noexcept : handle(other) { other.m_ptr = nullptr; }
     /// Destructor; automatically calls `handle::dec_ref()`
     ~object() { dec_ref(); }
 
@@ -252,22 +361,25 @@ public:
         Python object.
     \endrst */
     handle release() {
-      PyObject *tmp = m_ptr;
-      m_ptr = nullptr;
-      return handle(tmp);
+        PyObject *tmp = m_ptr;
+        m_ptr = nullptr;
+        return handle(tmp);
     }
 
-    object& operator=(const object &other) {
-        other.inc_ref();
-        // Use temporary variable to ensure `*this` remains valid while
-        // `Py_XDECREF` executes, in case `*this` is accessible from Python.
-        handle temp(m_ptr);
-        m_ptr = other.m_ptr;
-        temp.dec_ref();
+    object &operator=(const object &other) {
+        // Skip inc_ref and dec_ref if both objects are the same
+        if (!this->is(other)) {
+            other.inc_ref();
+            // Use temporary variable to ensure `*this` remains valid while
+            // `Py_XDECREF` executes, in case `*this` is accessible from Python.
+            handle temp(m_ptr);
+            m_ptr = other.m_ptr;
+            temp.dec_ref();
+        }
         return *this;
     }
 
-    object& operator=(object &&other) noexcept {
+    object &operator=(object &&other) noexcept {
         if (this != &other) {
             handle temp(m_ptr);
             m_ptr = other.m_ptr;
@@ -277,25 +389,43 @@ public:
         return *this;
     }
 
+#define PYBIND11_INPLACE_OP(iop)                                                                  \
+    object iop(object_api const &other) { return operator=(handle::iop(other)); }
+
+    PYBIND11_INPLACE_OP(operator+=)
+    PYBIND11_INPLACE_OP(operator-=)
+    PYBIND11_INPLACE_OP(operator*=)
+    PYBIND11_INPLACE_OP(operator/=)
+    PYBIND11_INPLACE_OP(operator|=)
+    PYBIND11_INPLACE_OP(operator&=)
+    PYBIND11_INPLACE_OP(operator^=)
+    PYBIND11_INPLACE_OP(operator<<=)
+    PYBIND11_INPLACE_OP(operator>>=)
+#undef PYBIND11_INPLACE_OP
+
     // Calling cast() on an object lvalue just copies (via handle::cast)
-    template <typename T> T cast() const &;
+    template <typename T>
+    T cast() const &;
     // Calling on an object rvalue does a move, if needed and/or possible
-    template <typename T> T cast() &&;
+    template <typename T>
+    T cast() &&;
 
 protected:
     // Tags for choosing constructors from raw PyObject *
-    struct borrowed_t { };
-    struct stolen_t { };
+    struct borrowed_t {};
+    struct stolen_t {};
 
-#ifndef DOXYGEN_SHOULD_SKIP_THIS  // Issue in breathe 4.26.1
-    template <typename T> friend T reinterpret_borrow(handle);
-    template <typename T> friend T reinterpret_steal(handle);
-#endif
+    /// @cond BROKEN
+    template <typename T>
+    friend T reinterpret_borrow(handle);
+    template <typename T>
+    friend T reinterpret_steal(handle);
+    /// @endcond
 
 public:
     // Only accessible from derived classes and the reinterpret_* functions
     object(handle h, borrowed_t) : handle(h) { inc_ref(); }
-    object(handle h, stolen_t) : handle(h) { }
+    object(handle h, stolen_t) : handle(h) {}
 };
 
 /** \rst
@@ -311,7 +441,10 @@ public:
         // or
         py::tuple t = reinterpret_borrow<py::tuple>(p); // <-- `p` must be already be a `tuple`
 \endrst */
-template <typename T> T reinterpret_borrow(handle h) { return {h, object::borrowed_t{}}; }
+template <typename T>
+T reinterpret_borrow(handle h) {
+    return {h, object::borrowed_t{}};
+}
 
 /** \rst
     Like `reinterpret_borrow`, but steals the reference.
@@ -321,49 +454,243 @@ template <typename T> T reinterpret_borrow(handle h) { return {h, object::borrow
         PyObject *p = PyObject_Str(obj);
         py::str s = reinterpret_steal<py::str>(p); // <-- `p` must be already be a `str`
 \endrst */
-template <typename T> T reinterpret_steal(handle h) { return {h, object::stolen_t{}}; }
+template <typename T>
+T reinterpret_steal(handle h) {
+    return {h, object::stolen_t{}};
+}
 
 PYBIND11_NAMESPACE_BEGIN(detail)
+
+// Equivalent to obj.__class__.__name__ (or obj.__name__ if obj is a class).
+inline const char *obj_class_name(PyObject *obj) {
+    if (PyType_Check(obj)) {
+        return reinterpret_cast<PyTypeObject *>(obj)->tp_name;
+    }
+    return Py_TYPE(obj)->tp_name;
+}
+
 std::string error_string();
+
+struct error_fetch_and_normalize {
+    // Immediate normalization is long-established behavior (starting with
+    // https://github.com/pybind/pybind11/commit/135ba8deafb8bf64a15b24d1513899eb600e2011
+    // from Sep 2016) and safest. Normalization could be deferred, but this could mask
+    // errors elsewhere, the performance gain is very minor in typical situations
+    // (usually the dominant bottleneck is EH unwinding), and the implementation here
+    // would be more complex.
+    explicit error_fetch_and_normalize(const char *called) {
+        PyErr_Fetch(&m_type.ptr(), &m_value.ptr(), &m_trace.ptr());
+        if (!m_type) {
+            pybind11_fail("Internal error: " + std::string(called)
+                          + " called while "
+                            "Python error indicator not set.");
+        }
+        const char *exc_type_name_orig = detail::obj_class_name(m_type.ptr());
+        if (exc_type_name_orig == nullptr) {
+            pybind11_fail("Internal error: " + std::string(called)
+                          + " failed to obtain the name "
+                            "of the original active exception type.");
+        }
+        m_lazy_error_string = exc_type_name_orig;
+        // PyErr_NormalizeException() may change the exception type if there are cascading
+        // failures. This can potentially be extremely confusing.
+        PyErr_NormalizeException(&m_type.ptr(), &m_value.ptr(), &m_trace.ptr());
+        if (m_type.ptr() == nullptr) {
+            pybind11_fail("Internal error: " + std::string(called)
+                          + " failed to normalize the "
+                            "active exception.");
+        }
+        const char *exc_type_name_norm = detail::obj_class_name(m_type.ptr());
+        if (exc_type_name_norm == nullptr) {
+            pybind11_fail("Internal error: " + std::string(called)
+                          + " failed to obtain the name "
+                            "of the normalized active exception type.");
+        }
+#if defined(PYPY_VERSION_NUM) && PYPY_VERSION_NUM < 0x07030a00
+        // This behavior runs the risk of masking errors in the error handling, but avoids a
+        // conflict with PyPy, which relies on the normalization here to change OSError to
+        // FileNotFoundError (https://github.com/pybind/pybind11/issues/4075).
+        m_lazy_error_string = exc_type_name_norm;
+#else
+        if (exc_type_name_norm != m_lazy_error_string) {
+            std::string msg = std::string(called)
+                              + ": MISMATCH of original and normalized "
+                                "active exception types: ";
+            msg += "ORIGINAL ";
+            msg += m_lazy_error_string;
+            msg += " REPLACED BY ";
+            msg += exc_type_name_norm;
+            msg += ": " + format_value_and_trace();
+            pybind11_fail(msg);
+        }
+#endif
+    }
+
+    error_fetch_and_normalize(const error_fetch_and_normalize &) = delete;
+    error_fetch_and_normalize(error_fetch_and_normalize &&) = delete;
+
+    std::string format_value_and_trace() const {
+        std::string result;
+        std::string message_error_string;
+        if (m_value) {
+            auto value_str = reinterpret_steal<object>(PyObject_Str(m_value.ptr()));
+            constexpr const char *message_unavailable_exc
+                = "<MESSAGE UNAVAILABLE DUE TO ANOTHER EXCEPTION>";
+            if (!value_str) {
+                message_error_string = detail::error_string();
+                result = message_unavailable_exc;
+            } else {
+                // Not using `value_str.cast<std::string>()`, to not potentially throw a secondary
+                // error_already_set that will then result in process termination (#4288).
+                auto value_bytes = reinterpret_steal<object>(
+                    PyUnicode_AsEncodedString(value_str.ptr(), "utf-8", "backslashreplace"));
+                if (!value_bytes) {
+                    message_error_string = detail::error_string();
+                    result = message_unavailable_exc;
+                } else {
+                    char *buffer = nullptr;
+                    Py_ssize_t length = 0;
+                    if (PyBytes_AsStringAndSize(value_bytes.ptr(), &buffer, &length) == -1) {
+                        message_error_string = detail::error_string();
+                        result = message_unavailable_exc;
+                    } else {
+                        result = std::string(buffer, static_cast<std::size_t>(length));
+                    }
+                }
+            }
+        } else {
+            result = "<MESSAGE UNAVAILABLE>";
+        }
+        if (result.empty()) {
+            result = "<EMPTY MESSAGE>";
+        }
+
+        bool have_trace = false;
+        if (m_trace) {
+#if !defined(PYPY_VERSION)
+            auto *tb = reinterpret_cast<PyTracebackObject *>(m_trace.ptr());
+
+            // Get the deepest trace possible.
+            while (tb->tb_next) {
+                tb = tb->tb_next;
+            }
+
+            PyFrameObject *frame = tb->tb_frame;
+            Py_XINCREF(frame);
+            result += "\n\nAt:\n";
+            while (frame) {
+#    if PY_VERSION_HEX >= 0x030900B1
+                PyCodeObject *f_code = PyFrame_GetCode(frame);
+#    else
+                PyCodeObject *f_code = frame->f_code;
+                Py_INCREF(f_code);
+#    endif
+                int lineno = PyFrame_GetLineNumber(frame);
+                result += "  ";
+                result += handle(f_code->co_filename).cast<std::string>();
+                result += '(';
+                result += std::to_string(lineno);
+                result += "): ";
+                result += handle(f_code->co_name).cast<std::string>();
+                result += '\n';
+                Py_DECREF(f_code);
+#    if PY_VERSION_HEX >= 0x030900B1
+                auto *b_frame = PyFrame_GetBack(frame);
+#    else
+                auto *b_frame = frame->f_back;
+                Py_XINCREF(b_frame);
+#    endif
+                Py_DECREF(frame);
+                frame = b_frame;
+            }
+
+            have_trace = true;
+#endif //! defined(PYPY_VERSION)
+        }
+
+        if (!message_error_string.empty()) {
+            if (!have_trace) {
+                result += '\n';
+            }
+            result += "\nMESSAGE UNAVAILABLE DUE TO EXCEPTION: " + message_error_string;
+        }
+
+        return result;
+    }
+
+    std::string const &error_string() const {
+        if (!m_lazy_error_string_completed) {
+            m_lazy_error_string += ": " + format_value_and_trace();
+            m_lazy_error_string_completed = true;
+        }
+        return m_lazy_error_string;
+    }
+
+    void restore() {
+        if (m_restore_called) {
+            pybind11_fail("Internal error: pybind11::detail::error_fetch_and_normalize::restore() "
+                          "called a second time. ORIGINAL ERROR: "
+                          + error_string());
+        }
+        PyErr_Restore(m_type.inc_ref().ptr(), m_value.inc_ref().ptr(), m_trace.inc_ref().ptr());
+        m_restore_called = true;
+    }
+
+    bool matches(handle exc) const {
+        return (PyErr_GivenExceptionMatches(m_type.ptr(), exc.ptr()) != 0);
+    }
+
+    // Not protecting these for simplicity.
+    object m_type, m_value, m_trace;
+
+private:
+    // Only protecting invariants.
+    mutable std::string m_lazy_error_string;
+    mutable bool m_lazy_error_string_completed = false;
+    mutable bool m_restore_called = false;
+};
+
+inline std::string error_string() {
+    return error_fetch_and_normalize("pybind11::detail::error_string").error_string();
+}
+
 PYBIND11_NAMESPACE_END(detail)
 
-#if defined(_MSC_VER)
-#  pragma warning(push)
-#  pragma warning(disable: 4275 4251) // warning C4275: An exported class was derived from a class that wasn't exported. Can be ignored when derived from a STL class.
-#endif
 /// Fetch and hold an error which was already set in Python.  An instance of this is typically
 /// thrown to propagate python-side errors back through C++ which can either be caught manually or
 /// else falls back to the function dispatcher (which then raises the captured error back to
 /// python).
-class PYBIND11_EXPORT_EXCEPTION error_already_set : public std::runtime_error {
+class PYBIND11_EXPORT_EXCEPTION error_already_set : public std::exception {
 public:
-    /// Constructs a new exception from the current Python error indicator, if any.  The current
-    /// Python error indicator will be cleared.
-    error_already_set() : std::runtime_error(detail::error_string()) {
-        PyErr_Fetch(&m_type.ptr(), &m_value.ptr(), &m_trace.ptr());
-    }
+    /// Fetches the current Python exception (using PyErr_Fetch()), which will clear the
+    /// current Python error indicator.
+    error_already_set()
+        : m_fetched_error{new detail::error_fetch_and_normalize("pybind11::error_already_set"),
+                          m_fetched_error_deleter} {}
 
-    error_already_set(const error_already_set &) = default;
-    error_already_set(error_already_set &&) = default;
+    /// The what() result is built lazily on demand.
+    /// WARNING: This member function needs to acquire the Python GIL. This can lead to
+    ///          crashes (undefined behavior) if the Python interpreter is finalizing.
+    const char *what() const noexcept override;
 
-    inline ~error_already_set() override;
+    /// Restores the currently-held Python error (which will clear the Python error indicator first
+    /// if already set).
+    /// NOTE: This member function will always restore the normalized exception, which may or may
+    ///       not be the original Python exception.
+    /// WARNING: The GIL must be held when this member function is called!
+    void restore() { m_fetched_error->restore(); }
 
-    /// Give the currently-held error back to Python, if any.  If there is currently a Python error
-    /// already set it is cleared first.  After this call, the current object no longer stores the
-    /// error variables (but the `.what()` string is still available).
-    void restore() { PyErr_Restore(m_type.release().ptr(), m_value.release().ptr(), m_trace.release().ptr()); }
-
-    /// If it is impossible to raise the currently-held error, such as in a destructor, we can write
-    /// it out using Python's unraisable hook (`sys.unraisablehook`). The error context should be
-    /// some object whose `repr()` helps identify the location of the error. Python already knows the
-    /// type and value of the error, so there is no need to repeat that. After this call, the current
-    /// object no longer stores the error variables, and neither does Python.
+    /// If it is impossible to raise the currently-held error, such as in a destructor, we can
+    /// write it out using Python's unraisable hook (`sys.unraisablehook`). The error context
+    /// should be some object whose `repr()` helps identify the location of the error. Python
+    /// already knows the type and value of the error, so there is no need to repeat that.
     void discard_as_unraisable(object err_context) {
         restore();
         PyErr_WriteUnraisable(err_context.ptr());
     }
-    /// An alternate version of `discard_as_unraisable()`, where a string provides information on the
-    /// location of the error. For example, `__func__` could be helpful.
+    /// An alternate version of `discard_as_unraisable()`, where a string provides information on
+    /// the location of the error. For example, `__func__` could be helpful.
+    /// WARNING: The GIL must be held when this member function is called!
     void discard_as_unraisable(const char *err_context) {
         discard_as_unraisable(reinterpret_steal<object>(PYBIND11_FROM_STRING(err_context)));
     }
@@ -375,22 +702,19 @@ public:
     /// Check if the currently trapped error type matches the given Python exception class (or a
     /// subclass thereof).  May also be passed a tuple to search for any exception class matches in
     /// the given tuple.
-    bool matches(handle exc) const {
-        return (PyErr_GivenExceptionMatches(m_type.ptr(), exc.ptr()) != 0);
-    }
+    bool matches(handle exc) const { return m_fetched_error->matches(exc); }
 
-    const object& type() const { return m_type; }
-    const object& value() const { return m_value; }
-    const object& trace() const { return m_trace; }
+    const object &type() const { return m_fetched_error->m_type; }
+    const object &value() const { return m_fetched_error->m_value; }
+    const object &trace() const { return m_fetched_error->m_trace; }
 
 private:
-    object m_type, m_value, m_trace;
-};
-#if defined(_MSC_VER)
-#  pragma warning(pop)
-#endif
+    std::shared_ptr<detail::error_fetch_and_normalize> m_fetched_error;
 
-#if PY_VERSION_HEX >= 0x03030000
+    /// WARNING: This custom deleter needs to acquire the Python GIL. This can lead to
+    ///          crashes (undefined behavior) if the Python interpreter is finalizing.
+    static void m_fetched_error_deleter(detail::error_fetch_and_normalize *raw_ptr);
+};
 
 /// Replaces the current Python error indicator with the chosen error, performing a
 /// 'raise from' to indicate that the chosen error was caused by the original error.
@@ -422,16 +746,13 @@ inline void raise_from(PyObject *type, const char *message) {
 
 /// Sets the current Python error indicator with the chosen error, performing a 'raise from'
 /// from the error contained in error_already_set to indicate that the chosen error was
-/// caused by the original error. After this function is called error_already_set will
-/// no longer contain an error.
-inline void raise_from(error_already_set& err, PyObject *type, const char *message) {
+/// caused by the original error.
+inline void raise_from(error_already_set &err, PyObject *type, const char *message) {
     err.restore();
     raise_from(type, message);
 }
 
-#endif
-
-/** \defgroup python_builtins _
+/** \defgroup python_builtins const_name
     Unless stated otherwise, the following C++ functions behave the same
     as their Python counterparts.
  */
@@ -442,20 +763,29 @@ inline void raise_from(error_already_set& err, PyObject *type, const char *messa
     `object` or a class which was exposed to Python as ``py::class_<T>``.
 \endrst */
 template <typename T, detail::enable_if_t<std::is_base_of<object, T>::value, int> = 0>
-bool isinstance(handle obj) { return T::check_(obj); }
+bool isinstance(handle obj) {
+    return T::check_(obj);
+}
 
 template <typename T, detail::enable_if_t<!std::is_base_of<object, T>::value, int> = 0>
-bool isinstance(handle obj) { return detail::isinstance_generic(obj, typeid(T)); }
+bool isinstance(handle obj) {
+    return detail::isinstance_generic(obj, typeid(T));
+}
 
-template <> inline bool isinstance<handle>(handle) = delete;
-template <> inline bool isinstance<object>(handle obj) { return obj.ptr() != nullptr; }
+template <>
+inline bool isinstance<handle>(handle) = delete;
+template <>
+inline bool isinstance<object>(handle obj) {
+    return obj.ptr() != nullptr;
+}
 
 /// \ingroup python_builtins
 /// Return true if ``obj`` is an instance of the ``type``.
 inline bool isinstance(handle obj, handle type) {
     const auto result = PyObject_IsInstance(obj.ptr(), type.ptr());
-    if (result == -1)
+    if (result == -1) {
         throw error_already_set();
+    }
     return result != 0;
 }
 
@@ -470,22 +800,30 @@ inline bool hasattr(handle obj, const char *name) {
 }
 
 inline void delattr(handle obj, handle name) {
-    if (PyObject_DelAttr(obj.ptr(), name.ptr()) != 0) { throw error_already_set(); }
+    if (PyObject_DelAttr(obj.ptr(), name.ptr()) != 0) {
+        throw error_already_set();
+    }
 }
 
 inline void delattr(handle obj, const char *name) {
-    if (PyObject_DelAttrString(obj.ptr(), name) != 0) { throw error_already_set(); }
+    if (PyObject_DelAttrString(obj.ptr(), name) != 0) {
+        throw error_already_set();
+    }
 }
 
 inline object getattr(handle obj, handle name) {
     PyObject *result = PyObject_GetAttr(obj.ptr(), name.ptr());
-    if (!result) { throw error_already_set(); }
+    if (!result) {
+        throw error_already_set();
+    }
     return reinterpret_steal<object>(result);
 }
 
 inline object getattr(handle obj, const char *name) {
     PyObject *result = PyObject_GetAttrString(obj.ptr(), name);
-    if (!result) { throw error_already_set(); }
+    if (!result) {
+        throw error_already_set();
+    }
     return reinterpret_steal<object>(result);
 }
 
@@ -506,16 +844,22 @@ inline object getattr(handle obj, const char *name, handle default_) {
 }
 
 inline void setattr(handle obj, handle name, handle value) {
-    if (PyObject_SetAttr(obj.ptr(), name.ptr(), value.ptr()) != 0) { throw error_already_set(); }
+    if (PyObject_SetAttr(obj.ptr(), name.ptr(), value.ptr()) != 0) {
+        throw error_already_set();
+    }
 }
 
 inline void setattr(handle obj, const char *name, handle value) {
-    if (PyObject_SetAttrString(obj.ptr(), name, value.ptr()) != 0) { throw error_already_set(); }
+    if (PyObject_SetAttrString(obj.ptr(), name, value.ptr()) != 0) {
+        throw error_already_set();
+    }
 }
 
 inline ssize_t hash(handle obj) {
     auto h = PyObject_Hash(obj.ptr());
-    if (h == -1) { throw error_already_set(); }
+    if (h == -1) {
+        throw error_already_set();
+    }
     return h;
 }
 
@@ -524,13 +868,11 @@ inline ssize_t hash(handle obj) {
 PYBIND11_NAMESPACE_BEGIN(detail)
 inline handle get_function(handle value) {
     if (value) {
-#if PY_MAJOR_VERSION >= 3
-        if (PyInstanceMethod_Check(value.ptr()))
+        if (PyInstanceMethod_Check(value.ptr())) {
             value = PyInstanceMethod_GET_FUNCTION(value.ptr());
-        else
-#endif
-        if (PyMethod_Check(value.ptr()))
+        } else if (PyMethod_Check(value.ptr())) {
             value = PyMethod_GET_FUNCTION(value.ptr());
+        }
     }
     return value;
 }
@@ -539,95 +881,99 @@ inline handle get_function(handle value) {
 // aren't swallowed (see #2862)
 
 // copied from cpython _PyDict_GetItemStringWithError
-inline PyObject * dict_getitemstring(PyObject *v, const char *key)
-{
-#if PY_MAJOR_VERSION >= 3
+inline PyObject *dict_getitemstring(PyObject *v, const char *key) {
     PyObject *kv = nullptr, *rv = nullptr;
     kv = PyUnicode_FromString(key);
-    if (kv == NULL) {
+    if (kv == nullptr) {
         throw error_already_set();
     }
 
     rv = PyDict_GetItemWithError(v, kv);
     Py_DECREF(kv);
-    if (rv == NULL && PyErr_Occurred()) {
+    if (rv == nullptr && PyErr_Occurred()) {
         throw error_already_set();
     }
     return rv;
-#else
-    return PyDict_GetItemString(v, key);
-#endif
 }
 
-inline PyObject * dict_getitem(PyObject *v, PyObject *key)
-{
-#if PY_MAJOR_VERSION >= 3
+inline PyObject *dict_getitem(PyObject *v, PyObject *key) {
     PyObject *rv = PyDict_GetItemWithError(v, key);
-    if (rv == NULL && PyErr_Occurred()) {
+    if (rv == nullptr && PyErr_Occurred()) {
         throw error_already_set();
     }
     return rv;
-#else
-    return PyDict_GetItem(v, key);
-#endif
 }
 
-// Helper aliases/functions to support implicit casting of values given to python accessors/methods.
-// When given a pyobject, this simply returns the pyobject as-is; for other C++ type, the value goes
-// through pybind11::cast(obj) to convert it to an `object`.
+// Helper aliases/functions to support implicit casting of values given to python
+// accessors/methods. When given a pyobject, this simply returns the pyobject as-is; for other C++
+// type, the value goes through pybind11::cast(obj) to convert it to an `object`.
 template <typename T, enable_if_t<is_pyobject<T>::value, int> = 0>
-auto object_or_cast(T &&o) -> decltype(std::forward<T>(o)) { return std::forward<T>(o); }
+auto object_or_cast(T &&o) -> decltype(std::forward<T>(o)) {
+    return std::forward<T>(o);
+}
 // The following casting version is implemented in cast.h:
 template <typename T, enable_if_t<!is_pyobject<T>::value, int> = 0>
 object object_or_cast(T &&o);
 // Match a PyObject*, which we want to convert directly to handle via its converting constructor
 inline handle object_or_cast(PyObject *ptr) { return ptr; }
 
-#if defined(_MSC_VER) && _MSC_VER < 1920
-#  pragma warning(push)
-#  pragma warning(disable: 4522) // warning C4522: multiple assignment operators specified
-#endif
+PYBIND11_WARNING_PUSH
+PYBIND11_WARNING_DISABLE_MSVC(4522) // warning C4522: multiple assignment operators specified
 template <typename Policy>
 class accessor : public object_api<accessor<Policy>> {
     using key_type = typename Policy::key_type;
 
 public:
-    accessor(handle obj, key_type key) : obj(obj), key(std::move(key)) { }
+    accessor(handle obj, key_type key) : obj(obj), key(std::move(key)) {}
     accessor(const accessor &) = default;
     accessor(accessor &&) noexcept = default;
 
-    // accessor overload required to override default assignment operator (templates are not allowed
-    // to replace default compiler-generated assignments).
+    // accessor overload required to override default assignment operator (templates are not
+    // allowed to replace default compiler-generated assignments).
     void operator=(const accessor &a) && { std::move(*this).operator=(handle(a)); }
     void operator=(const accessor &a) & { operator=(handle(a)); }
 
-    template <typename T> void operator=(T &&value) && {
+    template <typename T>
+    void operator=(T &&value) && {
         Policy::set(obj, key, object_or_cast(std::forward<T>(value)));
     }
-    template <typename T> void operator=(T &&value) & {
-        get_cache() = reinterpret_borrow<object>(object_or_cast(std::forward<T>(value)));
+    template <typename T>
+    void operator=(T &&value) & {
+        get_cache() = ensure_object(object_or_cast(std::forward<T>(value)));
     }
 
     template <typename T = Policy>
-    PYBIND11_DEPRECATED("Use of obj.attr(...) as bool is deprecated in favor of pybind11::hasattr(obj, ...)")
-    explicit operator enable_if_t<std::is_same<T, accessor_policies::str_attr>::value ||
-            std::is_same<T, accessor_policies::obj_attr>::value, bool>() const {
+    PYBIND11_DEPRECATED(
+        "Use of obj.attr(...) as bool is deprecated in favor of pybind11::hasattr(obj, ...)")
+    explicit
+    operator enable_if_t<std::is_same<T, accessor_policies::str_attr>::value
+                             || std::is_same<T, accessor_policies::obj_attr>::value,
+                         bool>() const {
         return hasattr(obj, key);
     }
     template <typename T = Policy>
     PYBIND11_DEPRECATED("Use of obj[key] as bool is deprecated in favor of obj.contains(key)")
-    explicit operator enable_if_t<std::is_same<T, accessor_policies::generic_item>::value, bool>() const {
+    explicit
+    operator enable_if_t<std::is_same<T, accessor_policies::generic_item>::value, bool>() const {
         return obj.contains(key);
     }
 
     // NOLINTNEXTLINE(google-explicit-constructor)
     operator object() const { return get_cache(); }
     PyObject *ptr() const { return get_cache().ptr(); }
-    template <typename T> T cast() const { return get_cache().template cast<T>(); }
+    template <typename T>
+    T cast() const {
+        return get_cache().template cast<T>();
+    }
 
 private:
+    static object ensure_object(object &&o) { return std::move(o); }
+    static object ensure_object(handle h) { return reinterpret_borrow<object>(h); }
+
     object &get_cache() const {
-        if (!cache) { cache = Policy::get(obj, key); }
+        if (!cache) {
+            cache = Policy::get(obj, key);
+        }
         return cache;
     }
 
@@ -636,9 +982,7 @@ private:
     key_type key;
     mutable object cache;
 };
-#if defined(_MSC_VER) && _MSC_VER < 1920
-#  pragma warning(pop)
-#endif
+PYBIND11_WARNING_POP
 
 PYBIND11_NAMESPACE_BEGIN(accessor_policies)
 struct obj_attr {
@@ -658,12 +1002,16 @@ struct generic_item {
 
     static object get(handle obj, handle key) {
         PyObject *result = PyObject_GetItem(obj.ptr(), key.ptr());
-        if (!result) { throw error_already_set(); }
+        if (!result) {
+            throw error_already_set();
+        }
         return reinterpret_steal<object>(result);
     }
 
     static void set(handle obj, handle key, handle val) {
-        if (PyObject_SetItem(obj.ptr(), key.ptr(), val.ptr()) != 0) { throw error_already_set(); }
+        if (PyObject_SetItem(obj.ptr(), key.ptr(), val.ptr()) != 0) {
+            throw error_already_set();
+        }
     }
 };
 
@@ -673,7 +1021,9 @@ struct sequence_item {
     template <typename IdxType, detail::enable_if_t<std::is_integral<IdxType>::value, int> = 0>
     static object get(handle obj, const IdxType &index) {
         PyObject *result = PySequence_GetItem(obj.ptr(), ssize_t_cast(index));
-        if (!result) { throw error_already_set(); }
+        if (!result) {
+            throw error_already_set();
+        }
         return reinterpret_steal<object>(result);
     }
 
@@ -692,7 +1042,9 @@ struct list_item {
     template <typename IdxType, detail::enable_if_t<std::is_integral<IdxType>::value, int> = 0>
     static object get(handle obj, const IdxType &index) {
         PyObject *result = PyList_GetItem(obj.ptr(), ssize_t_cast(index));
-        if (!result) { throw error_already_set(); }
+        if (!result) {
+            throw error_already_set();
+        }
         return reinterpret_borrow<object>(result);
     }
 
@@ -711,7 +1063,9 @@ struct tuple_item {
     template <typename IdxType, detail::enable_if_t<std::is_integral<IdxType>::value, int> = 0>
     static object get(handle obj, const IdxType &index) {
         PyObject *result = PyTuple_GetItem(obj.ptr(), ssize_t_cast(index));
-        if (!result) { throw error_already_set(); }
+        if (!result) {
+            throw error_already_set();
+        }
         return reinterpret_borrow<object>(result);
     }
 
@@ -738,7 +1092,7 @@ public:
     using pointer = typename Policy::pointer;
 
     generic_iterator() = default;
-    generic_iterator(handle seq, ssize_t index) : Policy(seq, index) { }
+    generic_iterator(handle seq, ssize_t index) : Policy(seq, index) {}
 
     // NOLINTNEXTLINE(readability-const-return-type) // PR #3263
     reference operator*() const { return Policy::dereference(); }
@@ -746,22 +1100,48 @@ public:
     reference operator[](difference_type n) const { return *(*this + n); }
     pointer operator->() const { return **this; }
 
-    It &operator++() { Policy::increment(); return *this; }
-    It operator++(int) { auto copy = *this; Policy::increment(); return copy; }
-    It &operator--() { Policy::decrement(); return *this; }
-    It operator--(int) { auto copy = *this; Policy::decrement(); return copy; }
-    It &operator+=(difference_type n) { Policy::advance(n); return *this; }
-    It &operator-=(difference_type n) { Policy::advance(-n); return *this; }
+    It &operator++() {
+        Policy::increment();
+        return *this;
+    }
+    It operator++(int) {
+        auto copy = *this;
+        Policy::increment();
+        return copy;
+    }
+    It &operator--() {
+        Policy::decrement();
+        return *this;
+    }
+    It operator--(int) {
+        auto copy = *this;
+        Policy::decrement();
+        return copy;
+    }
+    It &operator+=(difference_type n) {
+        Policy::advance(n);
+        return *this;
+    }
+    It &operator-=(difference_type n) {
+        Policy::advance(-n);
+        return *this;
+    }
 
-    friend It operator+(const It &a, difference_type n) { auto copy = a; return copy += n; }
+    friend It operator+(const It &a, difference_type n) {
+        auto copy = a;
+        return copy += n;
+    }
     friend It operator+(difference_type n, const It &b) { return b + n; }
-    friend It operator-(const It &a, difference_type n) { auto copy = a; return copy -= n; }
+    friend It operator-(const It &a, difference_type n) {
+        auto copy = a;
+        return copy -= n;
+    }
     friend difference_type operator-(const It &a, const It &b) { return a.distance_to(b); }
 
     friend bool operator==(const It &a, const It &b) { return a.equal(b); }
     friend bool operator!=(const It &a, const It &b) { return !(a == b); }
-    friend bool operator< (const It &a, const It &b) { return b - a > 0; }
-    friend bool operator> (const It &a, const It &b) { return b < a; }
+    friend bool operator<(const It &a, const It &b) { return b - a > 0; }
+    friend bool operator>(const It &a, const It &b) { return b < a; }
     friend bool operator>=(const It &a, const It &b) { return !(a < b); }
     friend bool operator<=(const It &a, const It &b) { return !(a > b); }
 };
@@ -773,7 +1153,7 @@ struct arrow_proxy {
     T value;
 
     // NOLINTNEXTLINE(google-explicit-constructor)
-    arrow_proxy(T &&value) noexcept : value(std::move(value)) { }
+    arrow_proxy(T &&value) noexcept : value(std::move(value)) {}
     T *operator->() const { return &value; }
 };
 
@@ -785,7 +1165,7 @@ protected:
     using reference = const handle; // PR #3263
     using pointer = arrow_proxy<const handle>;
 
-    sequence_fast_readonly(handle obj, ssize_t n) : ptr(PySequence_Fast_ITEMS(obj.ptr()) + n) { }
+    sequence_fast_readonly(handle obj, ssize_t n) : ptr(PySequence_Fast_ITEMS(obj.ptr()) + n) {}
 
     // NOLINTNEXTLINE(readability-const-return-type) // PR #3263
     reference dereference() const { return *ptr; }
@@ -807,7 +1187,7 @@ protected:
     using reference = sequence_accessor;
     using pointer = arrow_proxy<const sequence_accessor>;
 
-    sequence_slow_readwrite(handle obj, ssize_t index) : obj(obj), index(index) { }
+    sequence_slow_readwrite(handle obj, ssize_t index) : obj(obj), index(index) {}
 
     reference dereference() const { return {obj, static_cast<size_t>(index)}; }
     void increment() { ++index; }
@@ -873,33 +1253,38 @@ inline bool PyNone_Check(PyObject *o) { return o == Py_None; }
 inline bool PyEllipsis_Check(PyObject *o) { return o == Py_Ellipsis; }
 
 #ifdef PYBIND11_STR_LEGACY_PERMISSIVE
-inline bool PyUnicode_Check_Permissive(PyObject *o) { return PyUnicode_Check(o) || PYBIND11_BYTES_CHECK(o); }
-#define PYBIND11_STR_CHECK_FUN detail::PyUnicode_Check_Permissive
+inline bool PyUnicode_Check_Permissive(PyObject *o) {
+    return PyUnicode_Check(o) || PYBIND11_BYTES_CHECK(o);
+}
+#    define PYBIND11_STR_CHECK_FUN detail::PyUnicode_Check_Permissive
 #else
-#define PYBIND11_STR_CHECK_FUN PyUnicode_Check
+#    define PYBIND11_STR_CHECK_FUN PyUnicode_Check
 #endif
 
 inline bool PyStaticMethod_Check(PyObject *o) { return o->ob_type == &PyStaticMethod_Type; }
 
 class kwargs_proxy : public handle {
 public:
-    explicit kwargs_proxy(handle h) : handle(h) { }
+    explicit kwargs_proxy(handle h) : handle(h) {}
 };
 
 class args_proxy : public handle {
 public:
-    explicit args_proxy(handle h) : handle(h) { }
+    explicit args_proxy(handle h) : handle(h) {}
     kwargs_proxy operator*() const { return kwargs_proxy(*this); }
 };
 
 /// Python argument categories (using PEP 448 terms)
-template <typename T> using is_keyword = std::is_base_of<arg, T>;
-template <typename T> using is_s_unpacking = std::is_same<args_proxy, T>; // * unpacking
-template <typename T> using is_ds_unpacking = std::is_same<kwargs_proxy, T>; // ** unpacking
-template <typename T> using is_positional = satisfies_none_of<T,
-    is_keyword, is_s_unpacking, is_ds_unpacking
->;
-template <typename T> using is_keyword_or_ds = satisfies_any_of<T, is_keyword, is_ds_unpacking>;
+template <typename T>
+using is_keyword = std::is_base_of<arg, T>;
+template <typename T>
+using is_s_unpacking = std::is_same<args_proxy, T>; // * unpacking
+template <typename T>
+using is_ds_unpacking = std::is_same<kwargs_proxy, T>; // ** unpacking
+template <typename T>
+using is_positional = satisfies_none_of<T, is_keyword, is_s_unpacking, is_ds_unpacking>;
+template <typename T>
+using is_keyword_or_ds = satisfies_any_of<T, is_keyword, is_ds_unpacking>;
 
 // Call argument collector forward declarations
 template <return_value_policy policy = return_value_policy::automatic_reference>
@@ -913,53 +1298,60 @@ PYBIND11_NAMESPACE_END(detail)
 //       inheriting ctors: `using Parent::Parent`. It's not an option right now because
 //       the `using` statement triggers the parent deprecation warning even if the ctor
 //       isn't even used.
-#define PYBIND11_OBJECT_COMMON(Name, Parent, CheckFun) \
-    public: \
-        PYBIND11_DEPRECATED("Use reinterpret_borrow<"#Name">() or reinterpret_steal<"#Name">()") \
-        Name(handle h, bool is_borrowed) : Parent(is_borrowed ? Parent(h, borrowed_t{}) : Parent(h, stolen_t{})) { } \
-        Name(handle h, borrowed_t) : Parent(h, borrowed_t{}) { } \
-        Name(handle h, stolen_t) : Parent(h, stolen_t{}) { } \
-        PYBIND11_DEPRECATED("Use py::isinstance<py::python_type>(obj) instead") \
-        bool check() const { return m_ptr != nullptr && (CheckFun(m_ptr) != 0); } \
-        static bool check_(handle h) { return h.ptr() != nullptr && CheckFun(h.ptr()); } \
-        template <typename Policy_> \
-        /* NOLINTNEXTLINE(google-explicit-constructor) */ \
-        Name(const ::pybind11::detail::accessor<Policy_> &a) : Name(object(a)) { }
+#define PYBIND11_OBJECT_COMMON(Name, Parent, CheckFun)                                            \
+public:                                                                                           \
+    PYBIND11_DEPRECATED("Use reinterpret_borrow<" #Name ">() or reinterpret_steal<" #Name ">()")  \
+    Name(handle h, bool is_borrowed)                                                              \
+        : Parent(is_borrowed ? Parent(h, borrowed_t{}) : Parent(h, stolen_t{})) {}                \
+    Name(handle h, borrowed_t) : Parent(h, borrowed_t{}) {}                                       \
+    Name(handle h, stolen_t) : Parent(h, stolen_t{}) {}                                           \
+    PYBIND11_DEPRECATED("Use py::isinstance<py::python_type>(obj) instead")                       \
+    bool check() const { return m_ptr != nullptr && (CheckFun(m_ptr) != 0); }                     \
+    static bool check_(handle h) { return h.ptr() != nullptr && CheckFun(h.ptr()); }              \
+    template <typename Policy_> /* NOLINTNEXTLINE(google-explicit-constructor) */                 \
+    Name(const ::pybind11::detail::accessor<Policy_> &a) : Name(object(a)) {}
 
-#define PYBIND11_OBJECT_CVT(Name, Parent, CheckFun, ConvertFun) \
-    PYBIND11_OBJECT_COMMON(Name, Parent, CheckFun) \
-    /* This is deliberately not 'explicit' to allow implicit conversion from object: */ \
-    /* NOLINTNEXTLINE(google-explicit-constructor) */ \
-    Name(const object &o) \
-    : Parent(check_(o) ? o.inc_ref().ptr() : ConvertFun(o.ptr()), stolen_t{}) \
-    { if (!m_ptr) throw error_already_set(); } \
-    /* NOLINTNEXTLINE(google-explicit-constructor) */ \
-    Name(object &&o) \
-    : Parent(check_(o) ? o.release().ptr() : ConvertFun(o.ptr()), stolen_t{}) \
-    { if (!m_ptr) throw error_already_set(); }
+#define PYBIND11_OBJECT_CVT(Name, Parent, CheckFun, ConvertFun)                                   \
+    PYBIND11_OBJECT_COMMON(Name, Parent, CheckFun)                                                \
+    /* This is deliberately not 'explicit' to allow implicit conversion from object: */           \
+    /* NOLINTNEXTLINE(google-explicit-constructor) */                                             \
+    Name(const object &o)                                                                         \
+        : Parent(check_(o) ? o.inc_ref().ptr() : ConvertFun(o.ptr()), stolen_t{}) {               \
+        if (!m_ptr)                                                                               \
+            throw ::pybind11::error_already_set();                                                \
+    }                                                                                             \
+    /* NOLINTNEXTLINE(google-explicit-constructor) */                                             \
+    Name(object &&o) : Parent(check_(o) ? o.release().ptr() : ConvertFun(o.ptr()), stolen_t{}) {  \
+        if (!m_ptr)                                                                               \
+            throw ::pybind11::error_already_set();                                                \
+    }
 
-#define PYBIND11_OBJECT_CVT_DEFAULT(Name, Parent, CheckFun, ConvertFun) \
-    PYBIND11_OBJECT_CVT(Name, Parent, CheckFun, ConvertFun) \
-    Name() : Parent() { }
+#define PYBIND11_OBJECT_CVT_DEFAULT(Name, Parent, CheckFun, ConvertFun)                           \
+    PYBIND11_OBJECT_CVT(Name, Parent, CheckFun, ConvertFun)                                       \
+    Name() = default;
 
-#define PYBIND11_OBJECT_CHECK_FAILED(Name, o_ptr) \
-    ::pybind11::type_error("Object of type '" + \
-                           ::pybind11::detail::get_fully_qualified_tp_name(Py_TYPE(o_ptr)) + \
-                           "' is not an instance of '" #Name "'")
+#define PYBIND11_OBJECT_CHECK_FAILED(Name, o_ptr)                                                 \
+    ::pybind11::type_error("Object of type '"                                                     \
+                           + ::pybind11::detail::get_fully_qualified_tp_name(Py_TYPE(o_ptr))      \
+                           + "' is not an instance of '" #Name "'")
 
-#define PYBIND11_OBJECT(Name, Parent, CheckFun) \
-    PYBIND11_OBJECT_COMMON(Name, Parent, CheckFun) \
-    /* This is deliberately not 'explicit' to allow implicit conversion from object: */ \
-    /* NOLINTNEXTLINE(google-explicit-constructor) */ \
-    Name(const object &o) : Parent(o) \
-    { if (m_ptr && !check_(m_ptr)) throw PYBIND11_OBJECT_CHECK_FAILED(Name, m_ptr); } \
-    /* NOLINTNEXTLINE(google-explicit-constructor) */ \
-    Name(object &&o) : Parent(std::move(o)) \
-    { if (m_ptr && !check_(m_ptr)) throw PYBIND11_OBJECT_CHECK_FAILED(Name, m_ptr); }
+#define PYBIND11_OBJECT(Name, Parent, CheckFun)                                                   \
+    PYBIND11_OBJECT_COMMON(Name, Parent, CheckFun)                                                \
+    /* This is deliberately not 'explicit' to allow implicit conversion from object: */           \
+    /* NOLINTNEXTLINE(google-explicit-constructor) */                                             \
+    Name(const object &o) : Parent(o) {                                                           \
+        if (m_ptr && !check_(m_ptr))                                                              \
+            throw PYBIND11_OBJECT_CHECK_FAILED(Name, m_ptr);                                      \
+    }                                                                                             \
+    /* NOLINTNEXTLINE(google-explicit-constructor) */                                             \
+    Name(object &&o) : Parent(std::move(o)) {                                                     \
+        if (m_ptr && !check_(m_ptr))                                                              \
+            throw PYBIND11_OBJECT_CHECK_FAILED(Name, m_ptr);                                      \
+    }
 
-#define PYBIND11_OBJECT_DEFAULT(Name, Parent, CheckFun) \
-    PYBIND11_OBJECT(Name, Parent, CheckFun) \
-    Name() : Parent() { }
+#define PYBIND11_OBJECT_DEFAULT(Name, Parent, CheckFun)                                           \
+    PYBIND11_OBJECT(Name, Parent, CheckFun)                                                       \
+    Name() = default;
 
 /// \addtogroup pytypes
 /// @{
@@ -982,7 +1374,7 @@ public:
 
     PYBIND11_OBJECT_DEFAULT(iterator, object, PyIter_Check)
 
-    iterator& operator++() {
+    iterator &operator++() {
         advance();
         return *this;
     }
@@ -996,13 +1388,16 @@ public:
     // NOLINTNEXTLINE(readability-const-return-type) // PR #3263
     reference operator*() const {
         if (m_ptr && !value.ptr()) {
-            auto& self = const_cast<iterator &>(*this);
+            auto &self = const_cast<iterator &>(*this);
             self.advance();
         }
         return value;
     }
 
-    pointer operator->() const { operator*(); return &value; }
+    pointer operator->() const {
+        operator*();
+        return &value;
+    }
 
     /** \rst
          The value which marks the end of the iteration. ``it == iterator::sentinel()``
@@ -1025,21 +1420,21 @@ public:
 private:
     void advance() {
         value = reinterpret_steal<object>(PyIter_Next(m_ptr));
-        if (PyErr_Occurred()) { throw error_already_set(); }
+        if (value.ptr() == nullptr && PyErr_Occurred()) {
+            throw error_already_set();
+        }
     }
 
 private:
     object value = {};
 };
 
-
-
 class type : public object {
 public:
     PYBIND11_OBJECT(type, object, PyType_Check)
 
     /// Return a type handle from a handle or an object
-    static handle handle_of(handle h) { return handle((PyObject*) Py_TYPE(h.ptr())); }
+    static handle handle_of(handle h) { return handle((PyObject *) Py_TYPE(h.ptr())); }
 
     /// Return a type object from a handle or an object
     static type of(handle h) { return type(type::handle_of(h), borrowed_t{}); }
@@ -1048,14 +1443,16 @@ public:
     /// Convert C++ type to handle if previously registered. Does not convert
     /// standard types, like int, float. etc. yet.
     /// See https://github.com/pybind/pybind11/issues/2486
-    template<typename T>
+    template <typename T>
     static handle handle_of();
 
     /// Convert C++ type to type if previously registered. Does not convert
     /// standard types, like int, float. etc. yet.
     /// See https://github.com/pybind/pybind11/issues/2486
-    template<typename T>
-    static type of() {return type(type::handle_of<T>(), borrowed_t{}); }
+    template <typename T>
+    static type of() {
+        return type(type::handle_of<T>(), borrowed_t{});
+    }
 };
 
 class iterable : public object {
@@ -1072,18 +1469,42 @@ public:
     template <typename SzType, detail::enable_if_t<std::is_integral<SzType>::value, int> = 0>
     str(const char *c, const SzType &n)
         : object(PyUnicode_FromStringAndSize(c, ssize_t_cast(n)), stolen_t{}) {
-        if (!m_ptr) pybind11_fail("Could not allocate string object!");
+        if (!m_ptr) {
+            if (PyErr_Occurred()) {
+                throw error_already_set();
+            }
+            pybind11_fail("Could not allocate string object!");
+        }
     }
 
-    // 'explicit' is explicitly omitted from the following constructors to allow implicit conversion to py::str from C++ string-like objects
+    // 'explicit' is explicitly omitted from the following constructors to allow implicit
+    // conversion to py::str from C++ string-like objects
     // NOLINTNEXTLINE(google-explicit-constructor)
-    str(const char *c = "")
-        : object(PyUnicode_FromString(c), stolen_t{}) {
-        if (!m_ptr) pybind11_fail("Could not allocate string object!");
+    str(const char *c = "") : object(PyUnicode_FromString(c), stolen_t{}) {
+        if (!m_ptr) {
+            if (PyErr_Occurred()) {
+                throw error_already_set();
+            }
+            pybind11_fail("Could not allocate string object!");
+        }
     }
 
     // NOLINTNEXTLINE(google-explicit-constructor)
-    str(const std::string &s) : str(s.data(), s.size()) { }
+    str(const std::string &s) : str(s.data(), s.size()) {}
+
+#ifdef PYBIND11_HAS_STRING_VIEW
+    // enable_if is needed to avoid "ambiguous conversion" errors (see PR #3521).
+    template <typename T, detail::enable_if_t<std::is_same<T, std::string_view>::value, int> = 0>
+    // NOLINTNEXTLINE(google-explicit-constructor)
+    str(T s) : str(s.data(), s.size()) {}
+
+#    ifdef PYBIND11_HAS_U8STRING
+    // reinterpret_cast here is safe (C++20 guarantees char8_t has the same size/alignment as char)
+    // NOLINTNEXTLINE(google-explicit-constructor)
+    str(std::u8string_view s) : str(reinterpret_cast<const char *>(s.data()), s.size()) {}
+#    endif
+
+#endif
 
     explicit str(const bytes &b);
 
@@ -1091,20 +1512,26 @@ public:
         Return a string representation of the object. This is analogous to
         the ``str()`` function in Python.
     \endrst */
-    explicit str(handle h) : object(raw_str(h.ptr()), stolen_t{}) { if (!m_ptr) throw error_already_set(); }
+    explicit str(handle h) : object(raw_str(h.ptr()), stolen_t{}) {
+        if (!m_ptr) {
+            throw error_already_set();
+        }
+    }
 
     // NOLINTNEXTLINE(google-explicit-constructor)
     operator std::string() const {
         object temp = *this;
         if (PyUnicode_Check(m_ptr)) {
             temp = reinterpret_steal<object>(PyUnicode_AsUTF8String(m_ptr));
-            if (!temp)
+            if (!temp) {
                 throw error_already_set();
+            }
         }
         char *buffer = nullptr;
         ssize_t length = 0;
-        if (PYBIND11_BYTES_AS_STRING_AND_SIZE(temp.ptr(), &buffer, &length))
-            pybind11_fail("Unable to extract string contents! (invalid type)");
+        if (PyBytes_AsStringAndSize(temp.ptr(), &buffer, &length) != 0) {
+            throw error_already_set();
+        }
         return std::string(buffer, (size_t) length);
     }
 
@@ -1117,11 +1544,6 @@ private:
     /// Return string representation -- always returns a new reference, even if already a str
     static PyObject *raw_str(PyObject *op) {
         PyObject *str_value = PyObject_Str(op);
-#if PY_MAJOR_VERSION < 3
-        if (!str_value) throw error_already_set();
-        PyObject *unicode = PyUnicode_FromEncodedObject(str_value, "utf-8", nullptr);
-        Py_XDECREF(str_value); str_value = unicode;
-#endif
         return str_value;
     }
 };
@@ -1142,30 +1564,50 @@ public:
 
     // Allow implicit conversion:
     // NOLINTNEXTLINE(google-explicit-constructor)
-    bytes(const char *c = "")
-        : object(PYBIND11_BYTES_FROM_STRING(c), stolen_t{}) {
-        if (!m_ptr) pybind11_fail("Could not allocate bytes object!");
+    bytes(const char *c = "") : object(PYBIND11_BYTES_FROM_STRING(c), stolen_t{}) {
+        if (!m_ptr) {
+            pybind11_fail("Could not allocate bytes object!");
+        }
     }
 
     template <typename SzType, detail::enable_if_t<std::is_integral<SzType>::value, int> = 0>
     bytes(const char *c, const SzType &n)
         : object(PYBIND11_BYTES_FROM_STRING_AND_SIZE(c, ssize_t_cast(n)), stolen_t{}) {
-        if (!m_ptr) pybind11_fail("Could not allocate bytes object!");
+        if (!m_ptr) {
+            pybind11_fail("Could not allocate bytes object!");
+        }
     }
 
     // Allow implicit conversion:
     // NOLINTNEXTLINE(google-explicit-constructor)
-    bytes(const std::string &s) : bytes(s.data(), s.size()) { }
+    bytes(const std::string &s) : bytes(s.data(), s.size()) {}
 
     explicit bytes(const pybind11::str &s);
 
     // NOLINTNEXTLINE(google-explicit-constructor)
-    operator std::string() const {
+    operator std::string() const { return string_op<std::string>(); }
+
+#ifdef PYBIND11_HAS_STRING_VIEW
+    // enable_if is needed to avoid "ambiguous conversion" errors (see PR #3521).
+    template <typename T, detail::enable_if_t<std::is_same<T, std::string_view>::value, int> = 0>
+    // NOLINTNEXTLINE(google-explicit-constructor)
+    bytes(T s) : bytes(s.data(), s.size()) {}
+
+    // Obtain a string view that views the current `bytes` buffer value.  Note that this is only
+    // valid so long as the `bytes` instance remains alive and so generally should not outlive the
+    // lifetime of the `bytes` instance.
+    // NOLINTNEXTLINE(google-explicit-constructor)
+    operator std::string_view() const { return string_op<std::string_view>(); }
+#endif
+private:
+    template <typename T>
+    T string_op() const {
         char *buffer = nullptr;
         ssize_t length = 0;
-        if (PYBIND11_BYTES_AS_STRING_AND_SIZE(m_ptr, &buffer, &length))
-            pybind11_fail("Unable to extract bytes contents!");
-        return std::string(buffer, (size_t) length);
+        if (PyBytes_AsStringAndSize(m_ptr, &buffer, &length) != 0) {
+            throw error_already_set();
+        }
+        return {buffer, static_cast<size_t>(length)};
     }
 };
 // Note: breathe >= 4.17.0 will fail to build docs if the below two constructors
@@ -1176,27 +1618,35 @@ inline bytes::bytes(const pybind11::str &s) {
     object temp = s;
     if (PyUnicode_Check(s.ptr())) {
         temp = reinterpret_steal<object>(PyUnicode_AsUTF8String(s.ptr()));
-        if (!temp)
-            pybind11_fail("Unable to extract string contents! (encoding issue)");
+        if (!temp) {
+            throw error_already_set();
+        }
     }
     char *buffer = nullptr;
     ssize_t length = 0;
-    if (PYBIND11_BYTES_AS_STRING_AND_SIZE(temp.ptr(), &buffer, &length))
-        pybind11_fail("Unable to extract string contents! (invalid type)");
+    if (PyBytes_AsStringAndSize(temp.ptr(), &buffer, &length) != 0) {
+        throw error_already_set();
+    }
     auto obj = reinterpret_steal<object>(PYBIND11_BYTES_FROM_STRING_AND_SIZE(buffer, length));
-    if (!obj)
+    if (!obj) {
         pybind11_fail("Could not allocate bytes object!");
+    }
     m_ptr = obj.release().ptr();
 }
 
-inline str::str(const bytes& b) {
+inline str::str(const bytes &b) {
     char *buffer = nullptr;
     ssize_t length = 0;
-    if (PYBIND11_BYTES_AS_STRING_AND_SIZE(b.ptr(), &buffer, &length))
-        pybind11_fail("Unable to extract bytes contents!");
+    if (PyBytes_AsStringAndSize(b.ptr(), &buffer, &length) != 0) {
+        throw error_already_set();
+    }
     auto obj = reinterpret_steal<object>(PyUnicode_FromStringAndSize(buffer, length));
-    if (!obj)
+    if (!obj) {
+        if (PyErr_Occurred()) {
+            throw error_already_set();
+        }
         pybind11_fail("Could not allocate string object!");
+    }
     m_ptr = obj.release().ptr();
 }
 
@@ -1209,13 +1659,14 @@ public:
     template <typename SzType, detail::enable_if_t<std::is_integral<SzType>::value, int> = 0>
     bytearray(const char *c, const SzType &n)
         : object(PyByteArray_FromStringAndSize(c, ssize_t_cast(n)), stolen_t{}) {
-        if (!m_ptr) pybind11_fail("Could not allocate bytearray object!");
+        if (!m_ptr) {
+            pybind11_fail("Could not allocate bytearray object!");
+        }
     }
 
-    bytearray()
-        : bytearray("", 0) {}
+    bytearray() : bytearray("", 0) {}
 
-    explicit bytearray(const std::string &s) : bytearray(s.data(), s.size()) { }
+    explicit bytearray(const std::string &s) : bytearray(s.data(), s.size()) {}
 
     size_t size() const { return static_cast<size_t>(PyByteArray_Size(m_ptr)); }
 
@@ -1234,22 +1685,22 @@ public:
 class none : public object {
 public:
     PYBIND11_OBJECT(none, object, detail::PyNone_Check)
-    none() : object(Py_None, borrowed_t{}) { }
+    none() : object(Py_None, borrowed_t{}) {}
 };
 
 class ellipsis : public object {
 public:
     PYBIND11_OBJECT(ellipsis, object, detail::PyEllipsis_Check)
-    ellipsis() : object(Py_Ellipsis, borrowed_t{}) { }
+    ellipsis() : object(Py_Ellipsis, borrowed_t{}) {}
 };
 
 class bool_ : public object {
 public:
     PYBIND11_OBJECT_CVT(bool_, object, PyBool_Check, raw_bool)
-    bool_() : object(Py_False, borrowed_t{}) { }
+    bool_() : object(Py_False, borrowed_t{}) {}
     // Allow implicit conversion from and to `bool`:
     // NOLINTNEXTLINE(google-explicit-constructor)
-    bool_(bool value) : object(value ? Py_True : Py_False, borrowed_t{}) { }
+    bool_(bool value) : object(value ? Py_True : Py_False, borrowed_t{}) {}
     // NOLINTNEXTLINE(google-explicit-constructor)
     operator bool() const { return (m_ptr != nullptr) && PyLong_AsLong(m_ptr) != 0; }
 
@@ -1257,7 +1708,9 @@ private:
     /// Return the truth value of an object -- always returns a new reference
     static PyObject *raw_bool(PyObject *op) {
         const auto value = PyObject_IsTrue(op);
-        if (value == -1) return nullptr;
+        if (value == -1) {
+            return nullptr;
+        }
         return handle(value != 0 ? Py_True : Py_False).inc_ref().ptr();
     }
 };
@@ -1269,11 +1722,7 @@ PYBIND11_NAMESPACE_BEGIN(detail)
 // unsigned type: (A)-1 != (B)-1 when A and B are unsigned types of different sizes).
 template <typename Unsigned>
 Unsigned as_unsigned(PyObject *o) {
-    if (PYBIND11_SILENCE_MSVC_C4127(sizeof(Unsigned) <= sizeof(unsigned long))
-#if PY_VERSION_HEX < 0x03000000
-        || PyInt_Check(o)
-#endif
-    ) {
+    if (sizeof(Unsigned) <= sizeof(unsigned long)) {
         unsigned long v = PyLong_AsUnsignedLong(o);
         return v == (unsigned long) -1 && PyErr_Occurred() ? (Unsigned) -1 : (Unsigned) v;
     }
@@ -1285,35 +1734,35 @@ PYBIND11_NAMESPACE_END(detail)
 class int_ : public object {
 public:
     PYBIND11_OBJECT_CVT(int_, object, PYBIND11_LONG_CHECK, PyNumber_Long)
-    int_() : object(PyLong_FromLong(0), stolen_t{}) { }
+    int_() : object(PyLong_FromLong(0), stolen_t{}) {}
     // Allow implicit conversion from C++ integral types:
-    template <typename T,
-              detail::enable_if_t<std::is_integral<T>::value, int> = 0>
+    template <typename T, detail::enable_if_t<std::is_integral<T>::value, int> = 0>
     // NOLINTNEXTLINE(google-explicit-constructor)
     int_(T value) {
-        if (PYBIND11_SILENCE_MSVC_C4127(sizeof(T) <= sizeof(long))) {
-            if (std::is_signed<T>::value)
+        if (sizeof(T) <= sizeof(long)) {
+            if (std::is_signed<T>::value) {
                 m_ptr = PyLong_FromLong((long) value);
-            else
+            } else {
                 m_ptr = PyLong_FromUnsignedLong((unsigned long) value);
+            }
         } else {
-            if (std::is_signed<T>::value)
+            if (std::is_signed<T>::value) {
                 m_ptr = PyLong_FromLongLong((long long) value);
-            else
+            } else {
                 m_ptr = PyLong_FromUnsignedLongLong((unsigned long long) value);
+            }
+        }
+        if (!m_ptr) {
+            pybind11_fail("Could not allocate int object!");
         }
-        if (!m_ptr) pybind11_fail("Could not allocate int object!");
     }
 
-    template <typename T,
-              detail::enable_if_t<std::is_integral<T>::value, int> = 0>
+    template <typename T, detail::enable_if_t<std::is_integral<T>::value, int> = 0>
     // NOLINTNEXTLINE(google-explicit-constructor)
     operator T() const {
-        return std::is_unsigned<T>::value
-            ? detail::as_unsigned<T>(m_ptr)
-            : sizeof(T) <= sizeof(long)
-              ? (T) PyLong_AsLong(m_ptr)
-              : (T) PYBIND11_LONG_AS_LONGLONG(m_ptr);
+        return std::is_unsigned<T>::value  ? detail::as_unsigned<T>(m_ptr)
+               : sizeof(T) <= sizeof(long) ? (T) PyLong_AsLong(m_ptr)
+                                           : (T) PYBIND11_LONG_AS_LONGLONG(m_ptr);
     }
 };
 
@@ -1323,11 +1772,15 @@ public:
     // Allow implicit conversion from float/double:
     // NOLINTNEXTLINE(google-explicit-constructor)
     float_(float value) : object(PyFloat_FromDouble((double) value), stolen_t{}) {
-        if (!m_ptr) pybind11_fail("Could not allocate float object!");
+        if (!m_ptr) {
+            pybind11_fail("Could not allocate float object!");
+        }
     }
     // NOLINTNEXTLINE(google-explicit-constructor)
     float_(double value = .0) : object(PyFloat_FromDouble((double) value), stolen_t{}) {
-        if (!m_ptr) pybind11_fail("Could not allocate float object!");
+        if (!m_ptr) {
+            pybind11_fail("Could not allocate float object!");
+        }
     }
     // NOLINTNEXTLINE(google-explicit-constructor)
     operator float() const { return (float) PyFloat_AsDouble(m_ptr); }
@@ -1340,22 +1793,26 @@ public:
     PYBIND11_OBJECT_CVT_DEFAULT(weakref, object, PyWeakref_Check, raw_weakref)
     explicit weakref(handle obj, handle callback = {})
         : object(PyWeakref_NewRef(obj.ptr(), callback.ptr()), stolen_t{}) {
-        if (!m_ptr) pybind11_fail("Could not allocate weak reference!");
+        if (!m_ptr) {
+            if (PyErr_Occurred()) {
+                throw error_already_set();
+            }
+            pybind11_fail("Could not allocate weak reference!");
+        }
     }
 
 private:
-    static PyObject *raw_weakref(PyObject *o) {
-        return PyWeakref_NewRef(o, nullptr);
-    }
+    static PyObject *raw_weakref(PyObject *o) { return PyWeakref_NewRef(o, nullptr); }
 };
 
 class slice : public object {
 public:
     PYBIND11_OBJECT_DEFAULT(slice, object, PySlice_Check)
-    slice(handle start, handle stop, handle step) {
-        m_ptr = PySlice_New(start.ptr(), stop.ptr(), step.ptr());
-        if (!m_ptr)
+    slice(handle start, handle stop, handle step)
+        : object(PySlice_New(start.ptr(), stop.ptr(), step.ptr()), stolen_t{}) {
+        if (!m_ptr) {
             pybind11_fail("Could not allocate slice object!");
+        }
     }
 
 #ifdef PYBIND11_HAS_OPTIONAL
@@ -1366,19 +1823,21 @@ public:
         : slice(int_(start_), int_(stop_), int_(step_)) {}
 #endif
 
-    bool compute(size_t length, size_t *start, size_t *stop, size_t *step,
-                 size_t *slicelength) const {
+    bool
+    compute(size_t length, size_t *start, size_t *stop, size_t *step, size_t *slicelength) const {
         return PySlice_GetIndicesEx((PYBIND11_SLICE_OBJECT *) m_ptr,
-                                    (ssize_t) length, (ssize_t *) start,
-                                    (ssize_t *) stop, (ssize_t *) step,
-                                    (ssize_t *) slicelength) == 0;
+                                    (ssize_t) length,
+                                    (ssize_t *) start,
+                                    (ssize_t *) stop,
+                                    (ssize_t *) step,
+                                    (ssize_t *) slicelength)
+               == 0;
     }
-    bool compute(ssize_t length, ssize_t *start, ssize_t *stop, ssize_t *step,
-      ssize_t *slicelength) const {
-      return PySlice_GetIndicesEx((PYBIND11_SLICE_OBJECT *) m_ptr,
-          length, start,
-          stop, step,
-          slicelength) == 0;
+    bool compute(
+        ssize_t length, ssize_t *start, ssize_t *stop, ssize_t *step, ssize_t *slicelength) const {
+        return PySlice_GetIndicesEx(
+                   (PYBIND11_SLICE_OBJECT *) m_ptr, length, start, stop, step, slicelength)
+               == 0;
     }
 
 private:
@@ -1392,58 +1851,77 @@ class capsule : public object {
 public:
     PYBIND11_OBJECT_DEFAULT(capsule, object, PyCapsule_CheckExact)
     PYBIND11_DEPRECATED("Use reinterpret_borrow<capsule>() or reinterpret_steal<capsule>()")
-    capsule(PyObject *ptr, bool is_borrowed) : object(is_borrowed ? object(ptr, borrowed_t{}) : object(ptr, stolen_t{})) { }
+    capsule(PyObject *ptr, bool is_borrowed)
+        : object(is_borrowed ? object(ptr, borrowed_t{}) : object(ptr, stolen_t{})) {}
 
-    explicit capsule(const void *value, const char *name = nullptr, void (*destructor)(PyObject *) = nullptr)
+    explicit capsule(const void *value,
+                     const char *name = nullptr,
+                     PyCapsule_Destructor destructor = nullptr)
         : object(PyCapsule_New(const_cast<void *>(value), name, destructor), stolen_t{}) {
-        if (!m_ptr)
-            pybind11_fail("Could not allocate capsule object!");
+        if (!m_ptr) {
+            throw error_already_set();
+        }
     }
 
-    PYBIND11_DEPRECATED("Please pass a destructor that takes a void pointer as input")
-    capsule(const void *value, void (*destruct)(PyObject *))
-        : object(PyCapsule_New(const_cast<void*>(value), nullptr, destruct), stolen_t{}) {
-        if (!m_ptr)
-            pybind11_fail("Could not allocate capsule object!");
+    PYBIND11_DEPRECATED("Please use the ctor with value, name, destructor args")
+    capsule(const void *value, PyCapsule_Destructor destructor)
+        : object(PyCapsule_New(const_cast<void *>(value), nullptr, destructor), stolen_t{}) {
+        if (!m_ptr) {
+            throw error_already_set();
+        }
     }
 
     capsule(const void *value, void (*destructor)(void *)) {
         m_ptr = PyCapsule_New(const_cast<void *>(value), nullptr, [](PyObject *o) {
+            // guard if destructor called while err indicator is set
+            error_scope error_guard;
             auto destructor = reinterpret_cast<void (*)(void *)>(PyCapsule_GetContext(o));
-            void *ptr = PyCapsule_GetPointer(o, nullptr);
-            destructor(ptr);
+            if (destructor == nullptr && PyErr_Occurred()) {
+                throw error_already_set();
+            }
+            const char *name = get_name_in_error_scope(o);
+            void *ptr = PyCapsule_GetPointer(o, name);
+            if (ptr == nullptr) {
+                throw error_already_set();
+            }
+
+            if (destructor != nullptr) {
+                destructor(ptr);
+            }
         });
 
-        if (!m_ptr)
-            pybind11_fail("Could not allocate capsule object!");
-
-        if (PyCapsule_SetContext(m_ptr, (void *) destructor) != 0)
-            pybind11_fail("Could not set capsule context!");
+        if (!m_ptr || PyCapsule_SetContext(m_ptr, reinterpret_cast<void *>(destructor)) != 0) {
+            throw error_already_set();
+        }
     }
 
     explicit capsule(void (*destructor)()) {
         m_ptr = PyCapsule_New(reinterpret_cast<void *>(destructor), nullptr, [](PyObject *o) {
-            auto destructor = reinterpret_cast<void (*)()>(PyCapsule_GetPointer(o, nullptr));
+            const char *name = get_name_in_error_scope(o);
+            auto destructor = reinterpret_cast<void (*)()>(PyCapsule_GetPointer(o, name));
+            if (destructor == nullptr) {
+                throw error_already_set();
+            }
             destructor();
         });
 
-        if (!m_ptr)
-            pybind11_fail("Could not allocate capsule object!");
+        if (!m_ptr) {
+            throw error_already_set();
+        }
     }
 
-    // NOLINTNEXTLINE(google-explicit-constructor)
-    template <typename T> operator T *() const {
+    template <typename T>
+    operator T *() const { // NOLINT(google-explicit-constructor)
         return get_pointer<T>();
     }
 
     /// Get the pointer the capsule holds.
-    template<typename T = void>
-    T* get_pointer() const {
-        auto name = this->name();
+    template <typename T = void>
+    T *get_pointer() const {
+        const auto *name = this->name();
         T *result = static_cast<T *>(PyCapsule_GetPointer(m_ptr, name));
         if (!result) {
-            PyErr_Clear();
-            pybind11_fail("Unable to extract capsule contents!");
+            throw error_already_set();
         }
         return result;
     }
@@ -1451,12 +1929,37 @@ public:
     /// Replaces a capsule's pointer *without* calling the destructor on the existing one.
     void set_pointer(const void *value) {
         if (PyCapsule_SetPointer(m_ptr, const_cast<void *>(value)) != 0) {
-            PyErr_Clear();
-            pybind11_fail("Could not set capsule pointer");
+            throw error_already_set();
         }
     }
 
-    const char *name() const { return PyCapsule_GetName(m_ptr); }
+    const char *name() const {
+        const char *name = PyCapsule_GetName(m_ptr);
+        if ((name == nullptr) && PyErr_Occurred()) {
+            throw error_already_set();
+        }
+        return name;
+    }
+
+    /// Replaces a capsule's name *without* calling the destructor on the existing one.
+    void set_name(const char *new_name) {
+        if (PyCapsule_SetName(m_ptr, new_name) != 0) {
+            throw error_already_set();
+        }
+    }
+
+private:
+    static const char *get_name_in_error_scope(PyObject *o) {
+        error_scope error_guard;
+
+        const char *name = PyCapsule_GetName(o);
+        if ((name == nullptr) && PyErr_Occurred()) {
+            // write out and consume error raised by call to PyCapsule_GetName
+            PyErr_WriteUnraisable(o);
+        }
+
+        return name;
+    }
 };
 
 class tuple : public object {
@@ -1466,12 +1969,17 @@ public:
               detail::enable_if_t<std::is_integral<SzType>::value, int> = 0>
     // Some compilers generate link errors when using `const SzType &` here:
     explicit tuple(SzType size = 0) : object(PyTuple_New(ssize_t_cast(size)), stolen_t{}) {
-        if (!m_ptr) pybind11_fail("Could not allocate tuple object!");
+        if (!m_ptr) {
+            pybind11_fail("Could not allocate tuple object!");
+        }
     }
     size_t size() const { return (size_t) PyTuple_Size(m_ptr); }
     bool empty() const { return size() == 0; }
     detail::tuple_accessor operator[](size_t index) const { return {*this, index}; }
-    detail::item_accessor operator[](handle h) const { return object::operator[](h); }
+    template <typename T, detail::enable_if_t<detail::is_pyobject<T>::value, int> = 0>
+    detail::item_accessor operator[](T &&o) const {
+        return object::operator[](std::forward<T>(o));
+    }
     detail::tuple_iterator begin() const { return {*this, 0}; }
     detail::tuple_iterator end() const { return {*this, PyTuple_GET_SIZE(m_ptr)}; }
 };
@@ -1480,37 +1988,45 @@ public:
 // fails to compile enable_if_t<all_of<is_keyword_or_ds<Args>...>::value> part below
 // (tested with ICC 2021.1 Beta 20200827).
 template <typename... Args>
-constexpr bool args_are_all_keyword_or_ds()
-{
-  return detail::all_of<detail::is_keyword_or_ds<Args>...>::value;
+constexpr bool args_are_all_keyword_or_ds() {
+    return detail::all_of<detail::is_keyword_or_ds<Args>...>::value;
 }
 
 class dict : public object {
 public:
     PYBIND11_OBJECT_CVT(dict, object, PyDict_Check, raw_dict)
     dict() : object(PyDict_New(), stolen_t{}) {
-        if (!m_ptr) pybind11_fail("Could not allocate dict object!");
+        if (!m_ptr) {
+            pybind11_fail("Could not allocate dict object!");
+        }
     }
     template <typename... Args,
               typename = detail::enable_if_t<args_are_all_keyword_or_ds<Args...>()>,
-              // MSVC workaround: it can't compile an out-of-line definition, so defer the collector
+              // MSVC workaround: it can't compile an out-of-line definition, so defer the
+              // collector
               typename collector = detail::deferred_t<detail::unpacking_collector<>, Args...>>
-    explicit dict(Args &&...args) : dict(collector(std::forward<Args>(args)...).kwargs()) { }
+    explicit dict(Args &&...args) : dict(collector(std::forward<Args>(args)...).kwargs()) {}
 
     size_t size() const { return (size_t) PyDict_Size(m_ptr); }
     bool empty() const { return size() == 0; }
     detail::dict_iterator begin() const { return {*this, 0}; }
     detail::dict_iterator end() const { return {}; }
     void clear() /* py-non-const */ { PyDict_Clear(ptr()); }
-    template <typename T> bool contains(T &&key) const {
-        return PyDict_Contains(m_ptr, detail::object_or_cast(std::forward<T>(key)).ptr()) == 1;
+    template <typename T>
+    bool contains(T &&key) const {
+        auto result = PyDict_Contains(m_ptr, detail::object_or_cast(std::forward<T>(key)).ptr());
+        if (result == -1) {
+            throw error_already_set();
+        }
+        return result == 1;
     }
 
 private:
     /// Call the `dict` Python type -- always returns a new reference
     static PyObject *raw_dict(PyObject *op) {
-        if (PyDict_Check(op))
+        if (PyDict_Check(op)) {
             return handle(op).inc_ref().ptr();
+        }
         return PyObject_CallFunctionObjArgs((PyObject *) &PyDict_Type, op, nullptr);
     }
 };
@@ -1520,13 +2036,17 @@ public:
     PYBIND11_OBJECT_DEFAULT(sequence, object, PySequence_Check)
     size_t size() const {
         ssize_t result = PySequence_Size(m_ptr);
-        if (result == -1)
+        if (result == -1) {
             throw error_already_set();
+        }
         return (size_t) result;
     }
     bool empty() const { return size() == 0; }
     detail::sequence_accessor operator[](size_t index) const { return {*this, index}; }
-    detail::item_accessor operator[](handle h) const { return object::operator[](h); }
+    template <typename T, detail::enable_if_t<detail::is_pyobject<T>::value, int> = 0>
+    detail::item_accessor operator[](T &&o) const {
+        return object::operator[](std::forward<T>(o));
+    }
     detail::sequence_iterator begin() const { return {*this, 0}; }
     detail::sequence_iterator end() const { return {*this, PySequence_Size(m_ptr)}; }
 };
@@ -1538,44 +2058,78 @@ public:
               detail::enable_if_t<std::is_integral<SzType>::value, int> = 0>
     // Some compilers generate link errors when using `const SzType &` here:
     explicit list(SzType size = 0) : object(PyList_New(ssize_t_cast(size)), stolen_t{}) {
-        if (!m_ptr) pybind11_fail("Could not allocate list object!");
+        if (!m_ptr) {
+            pybind11_fail("Could not allocate list object!");
+        }
     }
     size_t size() const { return (size_t) PyList_Size(m_ptr); }
     bool empty() const { return size() == 0; }
     detail::list_accessor operator[](size_t index) const { return {*this, index}; }
-    detail::item_accessor operator[](handle h) const { return object::operator[](h); }
+    template <typename T, detail::enable_if_t<detail::is_pyobject<T>::value, int> = 0>
+    detail::item_accessor operator[](T &&o) const {
+        return object::operator[](std::forward<T>(o));
+    }
     detail::list_iterator begin() const { return {*this, 0}; }
     detail::list_iterator end() const { return {*this, PyList_GET_SIZE(m_ptr)}; }
-    template <typename T> void append(T &&val) /* py-non-const */ {
-        PyList_Append(m_ptr, detail::object_or_cast(std::forward<T>(val)).ptr());
+    template <typename T>
+    void append(T &&val) /* py-non-const */ {
+        if (PyList_Append(m_ptr, detail::object_or_cast(std::forward<T>(val)).ptr()) != 0) {
+            throw error_already_set();
+        }
     }
     template <typename IdxType,
               typename ValType,
               detail::enable_if_t<std::is_integral<IdxType>::value, int> = 0>
     void insert(const IdxType &index, ValType &&val) /* py-non-const */ {
-        PyList_Insert(
-            m_ptr, ssize_t_cast(index), detail::object_or_cast(std::forward<ValType>(val)).ptr());
+        if (PyList_Insert(m_ptr,
+                          ssize_t_cast(index),
+                          detail::object_or_cast(std::forward<ValType>(val)).ptr())
+            != 0) {
+            throw error_already_set();
+        }
     }
 };
 
-class args : public tuple { PYBIND11_OBJECT_DEFAULT(args, tuple, PyTuple_Check) };
-class kwargs : public dict { PYBIND11_OBJECT_DEFAULT(kwargs, dict, PyDict_Check)  };
+class args : public tuple {
+    PYBIND11_OBJECT_DEFAULT(args, tuple, PyTuple_Check)
+};
+class kwargs : public dict {
+    PYBIND11_OBJECT_DEFAULT(kwargs, dict, PyDict_Check)
+};
 
-class set : public object {
+class anyset : public object {
 public:
-    PYBIND11_OBJECT_CVT(set, object, PySet_Check, PySet_New)
-    set() : object(PySet_New(nullptr), stolen_t{}) {
-        if (!m_ptr) pybind11_fail("Could not allocate set object!");
-    }
-    size_t size() const { return (size_t) PySet_Size(m_ptr); }
+    PYBIND11_OBJECT(anyset, object, PyAnySet_Check)
+    size_t size() const { return static_cast<size_t>(PySet_Size(m_ptr)); }
     bool empty() const { return size() == 0; }
-    template <typename T> bool add(T &&val) /* py-non-const */ {
+    template <typename T>
+    bool contains(T &&val) const {
+        auto result = PySet_Contains(m_ptr, detail::object_or_cast(std::forward<T>(val)).ptr());
+        if (result == -1) {
+            throw error_already_set();
+        }
+        return result == 1;
+    }
+};
+
+class set : public anyset {
+public:
+    PYBIND11_OBJECT_CVT(set, anyset, PySet_Check, PySet_New)
+    set() : anyset(PySet_New(nullptr), stolen_t{}) {
+        if (!m_ptr) {
+            pybind11_fail("Could not allocate set object!");
+        }
+    }
+    template <typename T>
+    bool add(T &&val) /* py-non-const */ {
         return PySet_Add(m_ptr, detail::object_or_cast(std::forward<T>(val)).ptr()) == 0;
     }
     void clear() /* py-non-const */ { PySet_Clear(m_ptr); }
-    template <typename T> bool contains(T &&val) const {
-        return PySet_Contains(m_ptr, detail::object_or_cast(std::forward<T>(val)).ptr()) == 1;
-    }
+};
+
+class frozenset : public anyset {
+public:
+    PYBIND11_OBJECT_CVT(frozenset, anyset, PyFrozenSet_Check, PyFrozenSet_New)
 };
 
 class function : public object {
@@ -1583,8 +2137,9 @@ public:
     PYBIND11_OBJECT_DEFAULT(function, object, PyCallable_Check)
     handle cpp_function() const {
         handle fun = detail::get_function(m_ptr);
-        if (fun && PyCFunction_Check(fun.ptr()))
+        if (fun && PyCFunction_Check(fun.ptr())) {
             return fun;
+        }
         return handle();
     }
     bool is_cpp_function() const { return (bool) cpp_function(); }
@@ -1601,7 +2156,9 @@ public:
 
     buffer_info request(bool writable = false) const {
         int flags = PyBUF_STRIDES | PyBUF_FORMAT;
-        if (writable) flags |= PyBUF_WRITABLE;
+        if (writable) {
+            flags |= PyBUF_WRITABLE;
+        }
         auto *view = new Py_buffer();
         if (PyObject_GetBuffer(m_ptr, view, flags) != 0) {
             delete view;
@@ -1624,15 +2181,16 @@ public:
         For creating a ``memoryview`` from objects that support buffer protocol,
         use ``memoryview(const object& obj)`` instead of this constructor.
      \endrst */
-    explicit memoryview(const buffer_info& info) {
-        if (!info.view())
+    explicit memoryview(const buffer_info &info) {
+        if (!info.view()) {
             pybind11_fail("Prohibited to create memoryview without Py_buffer");
+        }
         // Note: PyMemoryView_FromBuffer never increments obj reference.
-        m_ptr = (info.view()->obj) ?
-            PyMemoryView_FromObject(info.view()->obj) :
-            PyMemoryView_FromBuffer(info.view());
-        if (!m_ptr)
+        m_ptr = (info.view()->obj) ? PyMemoryView_FromObject(info.view()->obj)
+                                   : PyMemoryView_FromBuffer(info.view());
+        if (!m_ptr) {
             pybind11_fail("Unable to create memoryview from buffer descriptor");
+        }
     }
 
     /** \rst
@@ -1645,7 +2203,8 @@ public:
 
         See also: Python C API documentation for `PyMemoryView_FromBuffer`_.
 
-        .. _PyMemoryView_FromBuffer: https://docs.python.org/c-api/memoryview.html#c.PyMemoryView_FromBuffer
+        .. _PyMemoryView_FromBuffer:
+           https://docs.python.org/c-api/memoryview.html#c.PyMemoryView_FromBuffer
 
         :param ptr: Pointer to the buffer.
         :param itemsize: Byte size of an element.
@@ -1658,37 +2217,43 @@ public:
         :param readonly: Flag to indicate if the underlying storage may be
             written to.
      \endrst */
-    static memoryview from_buffer(
-        void *ptr, ssize_t itemsize, const char *format,
-        detail::any_container<ssize_t> shape,
-        detail::any_container<ssize_t> strides, bool readonly = false);
+    static memoryview from_buffer(void *ptr,
+                                  ssize_t itemsize,
+                                  const char *format,
+                                  detail::any_container<ssize_t> shape,
+                                  detail::any_container<ssize_t> strides,
+                                  bool readonly = false);
 
-    static memoryview from_buffer(
-        const void *ptr, ssize_t itemsize, const char *format,
-        detail::any_container<ssize_t> shape,
-        detail::any_container<ssize_t> strides) {
+    static memoryview from_buffer(const void *ptr,
+                                  ssize_t itemsize,
+                                  const char *format,
+                                  detail::any_container<ssize_t> shape,
+                                  detail::any_container<ssize_t> strides) {
         return memoryview::from_buffer(
             const_cast<void *>(ptr), itemsize, format, std::move(shape), std::move(strides), true);
     }
 
-    template<typename T>
-    static memoryview from_buffer(
-        T *ptr, detail::any_container<ssize_t> shape,
-        detail::any_container<ssize_t> strides, bool readonly = false) {
-        return memoryview::from_buffer(
-            reinterpret_cast<void*>(ptr), sizeof(T),
-            format_descriptor<T>::value, shape, strides, readonly);
+    template <typename T>
+    static memoryview from_buffer(T *ptr,
+                                  detail::any_container<ssize_t> shape,
+                                  detail::any_container<ssize_t> strides,
+                                  bool readonly = false) {
+        return memoryview::from_buffer(reinterpret_cast<void *>(ptr),
+                                       sizeof(T),
+                                       format_descriptor<T>::value,
+                                       std::move(shape),
+                                       std::move(strides),
+                                       readonly);
     }
 
-    template<typename T>
-    static memoryview from_buffer(
-        const T *ptr, detail::any_container<ssize_t> shape,
-        detail::any_container<ssize_t> strides) {
+    template <typename T>
+    static memoryview from_buffer(const T *ptr,
+                                  detail::any_container<ssize_t> shape,
+                                  detail::any_container<ssize_t> strides) {
         return memoryview::from_buffer(
-            const_cast<T*>(ptr), shape, strides, true);
+            const_cast<T *>(ptr), std::move(shape), std::move(strides), true);
     }
 
-#if PY_MAJOR_VERSION >= 3
     /** \rst
         Creates ``memoryview`` from static memory.
 
@@ -1696,56 +2261,65 @@ public:
         managed by Python. The caller is responsible for managing the lifetime
         of ``mem``, which MUST outlive the memoryview constructed here.
 
-        This method is not available in Python 2.
-
         See also: Python C API documentation for `PyMemoryView_FromBuffer`_.
 
-        .. _PyMemoryView_FromMemory: https://docs.python.org/c-api/memoryview.html#c.PyMemoryView_FromMemory
+        .. _PyMemoryView_FromMemory:
+           https://docs.python.org/c-api/memoryview.html#c.PyMemoryView_FromMemory
      \endrst */
     static memoryview from_memory(void *mem, ssize_t size, bool readonly = false) {
-        PyObject* ptr = PyMemoryView_FromMemory(
-            reinterpret_cast<char*>(mem), size,
-            (readonly) ? PyBUF_READ : PyBUF_WRITE);
-        if (!ptr)
+        PyObject *ptr = PyMemoryView_FromMemory(
+            reinterpret_cast<char *>(mem), size, (readonly) ? PyBUF_READ : PyBUF_WRITE);
+        if (!ptr) {
             pybind11_fail("Could not allocate memoryview object!");
+        }
         return memoryview(object(ptr, stolen_t{}));
     }
 
     static memoryview from_memory(const void *mem, ssize_t size) {
-        return memoryview::from_memory(const_cast<void*>(mem), size, true);
+        return memoryview::from_memory(const_cast<void *>(mem), size, true);
+    }
+
+#ifdef PYBIND11_HAS_STRING_VIEW
+    static memoryview from_memory(std::string_view mem) {
+        return from_memory(const_cast<char *>(mem.data()), static_cast<ssize_t>(mem.size()), true);
     }
 #endif
 };
 
-#ifndef DOXYGEN_SHOULD_SKIP_THIS
-inline memoryview memoryview::from_buffer(
-    void *ptr, ssize_t itemsize, const char* format,
-    detail::any_container<ssize_t> shape,
-    detail::any_container<ssize_t> strides, bool readonly) {
+/// @cond DUPLICATE
+inline memoryview memoryview::from_buffer(void *ptr,
+                                          ssize_t itemsize,
+                                          const char *format,
+                                          detail::any_container<ssize_t> shape,
+                                          detail::any_container<ssize_t> strides,
+                                          bool readonly) {
     size_t ndim = shape->size();
-    if (ndim != strides->size())
+    if (ndim != strides->size()) {
         pybind11_fail("memoryview: shape length doesn't match strides length");
+    }
     ssize_t size = ndim != 0u ? 1 : 0;
-    for (size_t i = 0; i < ndim; ++i)
+    for (size_t i = 0; i < ndim; ++i) {
         size *= (*shape)[i];
+    }
     Py_buffer view;
     view.buf = ptr;
     view.obj = nullptr;
     view.len = size * itemsize;
     view.readonly = static_cast<int>(readonly);
     view.itemsize = itemsize;
-    view.format = const_cast<char*>(format);
+    view.format = const_cast<char *>(format);
     view.ndim = static_cast<int>(ndim);
     view.shape = shape->data();
     view.strides = strides->data();
     view.suboffsets = nullptr;
     view.internal = nullptr;
-    PyObject* obj = PyMemoryView_FromBuffer(&view);
-    if (!obj)
+    PyObject *obj = PyMemoryView_FromBuffer(&view);
+    if (!obj) {
         throw error_already_set();
+    }
     return memoryview(object(obj, stolen_t{}));
 }
-#endif  // DOXYGEN_SHOULD_SKIP_THIS
+/// @endcond
 /// @} pytypes
 
 /// \addtogroup python_builtins
@@ -1754,19 +2328,16 @@ inline memoryview memoryview::from_buffer(
 /// Get the length of a Python object.
 inline size_t len(handle h) {
     ssize_t result = PyObject_Length(h.ptr());
-    if (result < 0)
+    if (result < 0) {
         throw error_already_set();
+    }
     return (size_t) result;
 }
 
 /// Get the length hint of a Python object.
 /// Returns 0 when this cannot be determined.
 inline size_t len_hint(handle h) {
-#if PY_VERSION_HEX >= 0x03040000
     ssize_t result = PyObject_LengthHint(h.ptr(), 0);
-#else
-    ssize_t result = PyObject_Length(h.ptr());
-#endif
     if (result < 0) {
         // Sometimes a length can't be determined at all (eg generators)
         // In which case simply return 0
@@ -1778,102 +2349,139 @@ inline size_t len_hint(handle h) {
 
 inline str repr(handle h) {
     PyObject *str_value = PyObject_Repr(h.ptr());
-    if (!str_value) throw error_already_set();
-#if PY_MAJOR_VERSION < 3
-    PyObject *unicode = PyUnicode_FromEncodedObject(str_value, "utf-8", nullptr);
-    Py_XDECREF(str_value); str_value = unicode;
-    if (!str_value) throw error_already_set();
-#endif
+    if (!str_value) {
+        throw error_already_set();
+    }
     return reinterpret_steal<str>(str_value);
 }
 
 inline iterator iter(handle obj) {
     PyObject *result = PyObject_GetIter(obj.ptr());
-    if (!result) { throw error_already_set(); }
+    if (!result) {
+        throw error_already_set();
+    }
     return reinterpret_steal<iterator>(result);
 }
 /// @} python_builtins
 
 PYBIND11_NAMESPACE_BEGIN(detail)
-template <typename D> iterator object_api<D>::begin() const { return iter(derived()); }
-template <typename D> iterator object_api<D>::end() const { return iterator::sentinel(); }
-template <typename D> item_accessor object_api<D>::operator[](handle key) const {
+template <typename D>
+iterator object_api<D>::begin() const {
+    return iter(derived());
+}
+template <typename D>
+iterator object_api<D>::end() const {
+    return iterator::sentinel();
+}
+template <typename D>
+item_accessor object_api<D>::operator[](handle key) const {
     return {derived(), reinterpret_borrow<object>(key)};
 }
-template <typename D> item_accessor object_api<D>::operator[](const char *key) const {
+template <typename D>
+item_accessor object_api<D>::operator[](object &&key) const {
+    return {derived(), std::move(key)};
+}
+template <typename D>
+item_accessor object_api<D>::operator[](const char *key) const {
     return {derived(), pybind11::str(key)};
 }
-template <typename D> obj_attr_accessor object_api<D>::attr(handle key) const {
+template <typename D>
+obj_attr_accessor object_api<D>::attr(handle key) const {
     return {derived(), reinterpret_borrow<object>(key)};
 }
-template <typename D> str_attr_accessor object_api<D>::attr(const char *key) const {
+template <typename D>
+obj_attr_accessor object_api<D>::attr(object &&key) const {
+    return {derived(), std::move(key)};
+}
+template <typename D>
+str_attr_accessor object_api<D>::attr(const char *key) const {
     return {derived(), key};
 }
-template <typename D> args_proxy object_api<D>::operator*() const {
+template <typename D>
+args_proxy object_api<D>::operator*() const {
     return args_proxy(derived().ptr());
 }
-template <typename D> template <typename T> bool object_api<D>::contains(T &&item) const {
+template <typename D>
+template <typename T>
+bool object_api<D>::contains(T &&item) const {
     return attr("__contains__")(std::forward<T>(item)).template cast<bool>();
 }
 
 template <typename D>
-pybind11::str object_api<D>::str() const { return pybind11::str(derived()); }
+pybind11::str object_api<D>::str() const {
+    return pybind11::str(derived());
+}
 
 template <typename D>
-str_attr_accessor object_api<D>::doc() const { return attr("__doc__"); }
+str_attr_accessor object_api<D>::doc() const {
+    return attr("__doc__");
+}
 
 template <typename D>
-handle object_api<D>::get_type() const { return type::handle_of(derived()); }
+handle object_api<D>::get_type() const {
+    return type::handle_of(derived());
+}
 
 template <typename D>
 bool object_api<D>::rich_compare(object_api const &other, int value) const {
     int rv = PyObject_RichCompareBool(derived().ptr(), other.derived().ptr(), value);
-    if (rv == -1)
+    if (rv == -1) {
         throw error_already_set();
+    }
     return rv == 1;
 }
 
-#define PYBIND11_MATH_OPERATOR_UNARY(op, fn)                                   \
-    template <typename D> object object_api<D>::op() const {                   \
-        object result = reinterpret_steal<object>(fn(derived().ptr()));        \
-        if (!result.ptr())                                                     \
-            throw error_already_set();                                         \
-        return result;                                                         \
+#define PYBIND11_MATH_OPERATOR_UNARY(op, fn)                                                      \
+    template <typename D>                                                                         \
+    object object_api<D>::op() const {                                                            \
+        object result = reinterpret_steal<object>(fn(derived().ptr()));                           \
+        if (!result.ptr())                                                                        \
+            throw error_already_set();                                                            \
+        return result;                                                                            \
     }
 
-#define PYBIND11_MATH_OPERATOR_BINARY(op, fn)                                  \
-    template <typename D>                                                      \
-    object object_api<D>::op(object_api const &other) const {                  \
-        object result = reinterpret_steal<object>(                             \
-            fn(derived().ptr(), other.derived().ptr()));                       \
-        if (!result.ptr())                                                     \
-            throw error_already_set();                                         \
-        return result;                                                         \
+#define PYBIND11_MATH_OPERATOR_BINARY(op, fn)                                                     \
+    template <typename D>                                                                         \
+    object object_api<D>::op(object_api const &other) const {                                     \
+        object result = reinterpret_steal<object>(fn(derived().ptr(), other.derived().ptr()));    \
+        if (!result.ptr())                                                                        \
+            throw error_already_set();                                                            \
+        return result;                                                                            \
     }
 
-PYBIND11_MATH_OPERATOR_UNARY (operator~,   PyNumber_Invert)
-PYBIND11_MATH_OPERATOR_UNARY (operator-,   PyNumber_Negative)
-PYBIND11_MATH_OPERATOR_BINARY(operator+,   PyNumber_Add)
-PYBIND11_MATH_OPERATOR_BINARY(operator+=,  PyNumber_InPlaceAdd)
-PYBIND11_MATH_OPERATOR_BINARY(operator-,   PyNumber_Subtract)
-PYBIND11_MATH_OPERATOR_BINARY(operator-=,  PyNumber_InPlaceSubtract)
-PYBIND11_MATH_OPERATOR_BINARY(operator*,   PyNumber_Multiply)
-PYBIND11_MATH_OPERATOR_BINARY(operator*=,  PyNumber_InPlaceMultiply)
-PYBIND11_MATH_OPERATOR_BINARY(operator/,   PyNumber_TrueDivide)
-PYBIND11_MATH_OPERATOR_BINARY(operator/=,  PyNumber_InPlaceTrueDivide)
-PYBIND11_MATH_OPERATOR_BINARY(operator|,   PyNumber_Or)
-PYBIND11_MATH_OPERATOR_BINARY(operator|=,  PyNumber_InPlaceOr)
-PYBIND11_MATH_OPERATOR_BINARY(operator&,   PyNumber_And)
-PYBIND11_MATH_OPERATOR_BINARY(operator&=,  PyNumber_InPlaceAnd)
-PYBIND11_MATH_OPERATOR_BINARY(operator^,   PyNumber_Xor)
-PYBIND11_MATH_OPERATOR_BINARY(operator^=,  PyNumber_InPlaceXor)
-PYBIND11_MATH_OPERATOR_BINARY(operator<<,  PyNumber_Lshift)
-PYBIND11_MATH_OPERATOR_BINARY(operator<<=, PyNumber_InPlaceLshift)
-PYBIND11_MATH_OPERATOR_BINARY(operator>>,  PyNumber_Rshift)
-PYBIND11_MATH_OPERATOR_BINARY(operator>>=, PyNumber_InPlaceRshift)
+#define PYBIND11_MATH_OPERATOR_BINARY_INPLACE(iop, fn)                                            \
+    template <typename D>                                                                         \
+    object object_api<D>::iop(object_api const &other) {                                          \
+        object result = reinterpret_steal<object>(fn(derived().ptr(), other.derived().ptr()));    \
+        if (!result.ptr())                                                                        \
+            throw error_already_set();                                                            \
+        return result;                                                                            \
+    }
+
+PYBIND11_MATH_OPERATOR_UNARY(operator~, PyNumber_Invert)
+PYBIND11_MATH_OPERATOR_UNARY(operator-, PyNumber_Negative)
+PYBIND11_MATH_OPERATOR_BINARY(operator+, PyNumber_Add)
+PYBIND11_MATH_OPERATOR_BINARY_INPLACE(operator+=, PyNumber_InPlaceAdd)
+PYBIND11_MATH_OPERATOR_BINARY(operator-, PyNumber_Subtract)
+PYBIND11_MATH_OPERATOR_BINARY_INPLACE(operator-=, PyNumber_InPlaceSubtract)
+PYBIND11_MATH_OPERATOR_BINARY(operator*, PyNumber_Multiply)
+PYBIND11_MATH_OPERATOR_BINARY_INPLACE(operator*=, PyNumber_InPlaceMultiply)
+PYBIND11_MATH_OPERATOR_BINARY(operator/, PyNumber_TrueDivide)
+PYBIND11_MATH_OPERATOR_BINARY_INPLACE(operator/=, PyNumber_InPlaceTrueDivide)
+PYBIND11_MATH_OPERATOR_BINARY(operator|, PyNumber_Or)
+PYBIND11_MATH_OPERATOR_BINARY_INPLACE(operator|=, PyNumber_InPlaceOr)
+PYBIND11_MATH_OPERATOR_BINARY(operator&, PyNumber_And)
+PYBIND11_MATH_OPERATOR_BINARY_INPLACE(operator&=, PyNumber_InPlaceAnd)
+PYBIND11_MATH_OPERATOR_BINARY(operator^, PyNumber_Xor)
+PYBIND11_MATH_OPERATOR_BINARY_INPLACE(operator^=, PyNumber_InPlaceXor)
+PYBIND11_MATH_OPERATOR_BINARY(operator<<, PyNumber_Lshift)
+PYBIND11_MATH_OPERATOR_BINARY_INPLACE(operator<<=, PyNumber_InPlaceLshift)
+PYBIND11_MATH_OPERATOR_BINARY(operator>>, PyNumber_Rshift)
+PYBIND11_MATH_OPERATOR_BINARY_INPLACE(operator>>=, PyNumber_InPlaceRshift)
 
 #undef PYBIND11_MATH_OPERATOR_UNARY
 #undef PYBIND11_MATH_OPERATOR_BINARY
+#undef PYBIND11_MATH_OPERATOR_BINARY_INPLACE
 
 PYBIND11_NAMESPACE_END(detail)
 PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE)
diff --git a/ext/pybind11/include/pybind11/stl.h b/ext/pybind11/include/pybind11/stl.h
index 3608d2989c..2d144b598b 100644
--- a/ext/pybind11/include/pybind11/stl.h
+++ b/ext/pybind11/include/pybind11/stl.h
@@ -9,26 +9,27 @@
 
 #pragma once
 
-#include "detail/common.h"
 #include "pybind11.h"
-#include <set>
-#include <unordered_set>
-#include <map>
-#include <unordered_map>
-#include <iostream>
-#include <list>
+#include "detail/common.h"
+
 #include <deque>
+#include <list>
+#include <map>
+#include <ostream>
+#include <set>
+#include <unordered_map>
+#include <unordered_set>
 #include <valarray>
 
 // See `detail/common.h` for implementation of these guards.
 #if defined(PYBIND11_HAS_OPTIONAL)
-#  include <optional>
+#    include <optional>
 #elif defined(PYBIND11_HAS_EXP_OPTIONAL)
-#  include <experimental/optional>
+#    include <experimental/optional>
 #endif
 
 #if defined(PYBIND11_HAS_VARIANT)
-#  include <variant>
+#    include <variant>
 #endif
 
 PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
@@ -37,29 +38,47 @@ PYBIND11_NAMESPACE_BEGIN(detail)
 /// Extracts an const lvalue reference or rvalue reference for U based on the type of T (e.g. for
 /// forwarding a container element).  Typically used indirect via forwarded_type(), below.
 template <typename T, typename U>
-using forwarded_type = conditional_t<
-    std::is_lvalue_reference<T>::value, remove_reference_t<U> &, remove_reference_t<U> &&>;
+using forwarded_type = conditional_t<std::is_lvalue_reference<T>::value,
+                                     remove_reference_t<U> &,
+                                     remove_reference_t<U> &&>;
 
 /// Forwards a value U as rvalue or lvalue according to whether T is rvalue or lvalue; typically
 /// used for forwarding a container's elements.
 template <typename T, typename U>
-forwarded_type<T, U> forward_like(U &&u) {
+constexpr forwarded_type<T, U> forward_like(U &&u) {
     return std::forward<detail::forwarded_type<T, U>>(std::forward<U>(u));
 }
 
-template <typename Type, typename Key> struct set_caster {
+// Checks if a container has a STL style reserve method.
+// This will only return true for a `reserve()` with a `void` return.
+template <typename C>
+using has_reserve_method = std::is_same<decltype(std::declval<C>().reserve(0)), void>;
+
+template <typename Type, typename Key>
+struct set_caster {
     using type = Type;
     using key_conv = make_caster<Key>;
 
+private:
+    template <typename T = Type, enable_if_t<has_reserve_method<T>::value, int> = 0>
+    void reserve_maybe(const anyset &s, Type *) {
+        value.reserve(s.size());
+    }
+    void reserve_maybe(const anyset &, void *) {}
+
+public:
     bool load(handle src, bool convert) {
-        if (!isinstance<pybind11::set>(src))
+        if (!isinstance<anyset>(src)) {
             return false;
-        auto s = reinterpret_borrow<pybind11::set>(src);
+        }
+        auto s = reinterpret_borrow<anyset>(src);
         value.clear();
+        reserve_maybe(s, &value);
         for (auto entry : s) {
             key_conv conv;
-            if (!conv.load(entry, convert))
+            if (!conv.load(entry, convert)) {
                 return false;
+            }
             value.insert(cast_op<Key &&>(std::move(conv)));
         }
         return true;
@@ -67,35 +86,49 @@ template <typename Type, typename Key> struct set_caster {
 
     template <typename T>
     static handle cast(T &&src, return_value_policy policy, handle parent) {
-        if (!std::is_lvalue_reference<T>::value)
+        if (!std::is_lvalue_reference<T>::value) {
             policy = return_value_policy_override<Key>::policy(policy);
+        }
         pybind11::set s;
         for (auto &&value : src) {
-            auto value_ = reinterpret_steal<object>(key_conv::cast(forward_like<T>(value), policy, parent));
-            if (!value_ || !s.add(value_))
+            auto value_ = reinterpret_steal<object>(
+                key_conv::cast(detail::forward_like<T>(value), policy, parent));
+            if (!value_ || !s.add(std::move(value_))) {
                 return handle();
+            }
         }
         return s.release();
     }
 
-    PYBIND11_TYPE_CASTER(type, _("Set[") + key_conv::name + _("]"));
+    PYBIND11_TYPE_CASTER(type, const_name("Set[") + key_conv::name + const_name("]"));
 };
 
-template <typename Type, typename Key, typename Value> struct map_caster {
-    using key_conv   = make_caster<Key>;
+template <typename Type, typename Key, typename Value>
+struct map_caster {
+    using key_conv = make_caster<Key>;
     using value_conv = make_caster<Value>;
 
+private:
+    template <typename T = Type, enable_if_t<has_reserve_method<T>::value, int> = 0>
+    void reserve_maybe(const dict &d, Type *) {
+        value.reserve(d.size());
+    }
+    void reserve_maybe(const dict &, void *) {}
+
+public:
     bool load(handle src, bool convert) {
-        if (!isinstance<dict>(src))
+        if (!isinstance<dict>(src)) {
             return false;
+        }
         auto d = reinterpret_borrow<dict>(src);
         value.clear();
+        reserve_maybe(d, &value);
         for (auto it : d) {
             key_conv kconv;
             value_conv vconv;
-            if (!kconv.load(it.first.ptr(), convert) ||
-                !vconv.load(it.second.ptr(), convert))
+            if (!kconv.load(it.first.ptr(), convert) || !vconv.load(it.second.ptr(), convert)) {
                 return false;
+            }
             value.emplace(cast_op<Key &&>(std::move(kconv)), cast_op<Value &&>(std::move(vconv)));
         }
         return true;
@@ -111,40 +144,46 @@ template <typename Type, typename Key, typename Value> struct map_caster {
             policy_value = return_value_policy_override<Value>::policy(policy_value);
         }
         for (auto &&kv : src) {
-            auto key = reinterpret_steal<object>(key_conv::cast(forward_like<T>(kv.first), policy_key, parent));
-            auto value = reinterpret_steal<object>(value_conv::cast(forward_like<T>(kv.second), policy_value, parent));
-            if (!key || !value)
+            auto key = reinterpret_steal<object>(
+                key_conv::cast(detail::forward_like<T>(kv.first), policy_key, parent));
+            auto value = reinterpret_steal<object>(
+                value_conv::cast(detail::forward_like<T>(kv.second), policy_value, parent));
+            if (!key || !value) {
                 return handle();
-            d[key] = value;
+            }
+            d[std::move(key)] = std::move(value);
         }
         return d.release();
     }
 
-    PYBIND11_TYPE_CASTER(Type, _("Dict[") + key_conv::name + _(", ") + value_conv::name + _("]"));
+    PYBIND11_TYPE_CASTER(Type,
+                         const_name("Dict[") + key_conv::name + const_name(", ") + value_conv::name
+                             + const_name("]"));
 };
 
-template <typename Type, typename Value> struct list_caster {
+template <typename Type, typename Value>
+struct list_caster {
     using value_conv = make_caster<Value>;
 
     bool load(handle src, bool convert) {
-        if (!isinstance<sequence>(src) || isinstance<bytes>(src) || isinstance<str>(src))
+        if (!isinstance<sequence>(src) || isinstance<bytes>(src) || isinstance<str>(src)) {
             return false;
+        }
         auto s = reinterpret_borrow<sequence>(src);
         value.clear();
         reserve_maybe(s, &value);
         for (auto it : s) {
             value_conv conv;
-            if (!conv.load(it, convert))
+            if (!conv.load(it, convert)) {
                 return false;
+            }
             value.push_back(cast_op<Value &&>(std::move(conv)));
         }
         return true;
     }
 
 private:
-    template <
-        typename T                                                                          = Type,
-        enable_if_t<std::is_same<decltype(std::declval<T>().reserve(0)), void>::value, int> = 0>
+    template <typename T = Type, enable_if_t<has_reserve_method<T>::value, int> = 0>
     void reserve_maybe(const sequence &s, Type *) {
         value.reserve(s.size());
     }
@@ -153,39 +192,44 @@ private:
 public:
     template <typename T>
     static handle cast(T &&src, return_value_policy policy, handle parent) {
-        if (!std::is_lvalue_reference<T>::value)
+        if (!std::is_lvalue_reference<T>::value) {
             policy = return_value_policy_override<Value>::policy(policy);
+        }
         list l(src.size());
         ssize_t index = 0;
         for (auto &&value : src) {
-            auto value_ = reinterpret_steal<object>(value_conv::cast(forward_like<T>(value), policy, parent));
-            if (!value_)
+            auto value_ = reinterpret_steal<object>(
+                value_conv::cast(detail::forward_like<T>(value), policy, parent));
+            if (!value_) {
                 return handle();
+            }
             PyList_SET_ITEM(l.ptr(), index++, value_.release().ptr()); // steals a reference
         }
         return l.release();
     }
 
-    PYBIND11_TYPE_CASTER(Type, _("List[") + value_conv::name + _("]"));
+    PYBIND11_TYPE_CASTER(Type, const_name("List[") + value_conv::name + const_name("]"));
 };
 
-template <typename Type, typename Alloc> struct type_caster<std::vector<Type, Alloc>>
- : list_caster<std::vector<Type, Alloc>, Type> { };
+template <typename Type, typename Alloc>
+struct type_caster<std::vector<Type, Alloc>> : list_caster<std::vector<Type, Alloc>, Type> {};
 
-template <typename Type, typename Alloc> struct type_caster<std::deque<Type, Alloc>>
- : list_caster<std::deque<Type, Alloc>, Type> { };
+template <typename Type, typename Alloc>
+struct type_caster<std::deque<Type, Alloc>> : list_caster<std::deque<Type, Alloc>, Type> {};
 
-template <typename Type, typename Alloc> struct type_caster<std::list<Type, Alloc>>
- : list_caster<std::list<Type, Alloc>, Type> { };
+template <typename Type, typename Alloc>
+struct type_caster<std::list<Type, Alloc>> : list_caster<std::list<Type, Alloc>, Type> {};
 
-template <typename ArrayType, typename Value, bool Resizable, size_t Size = 0> struct array_caster {
+template <typename ArrayType, typename Value, bool Resizable, size_t Size = 0>
+struct array_caster {
     using value_conv = make_caster<Value>;
 
 private:
     template <bool R = Resizable>
     bool require_size(enable_if_t<R, size_t> size) {
-        if (value.size() != size)
+        if (value.size() != size) {
             value.resize(size);
+        }
         return true;
     }
     template <bool R = Resizable>
@@ -195,16 +239,19 @@ private:
 
 public:
     bool load(handle src, bool convert) {
-        if (!isinstance<sequence>(src))
+        if (!isinstance<sequence>(src)) {
             return false;
+        }
         auto l = reinterpret_borrow<sequence>(src);
-        if (!require_size(l.size()))
+        if (!require_size(l.size())) {
             return false;
+        }
         size_t ctr = 0;
         for (auto it : l) {
             value_conv conv;
-            if (!conv.load(it, convert))
+            if (!conv.load(it, convert)) {
                 return false;
+            }
             value[ctr++] = cast_op<Value &&>(std::move(conv));
         }
         return true;
@@ -215,43 +262,57 @@ public:
         list l(src.size());
         ssize_t index = 0;
         for (auto &&value : src) {
-            auto value_ = reinterpret_steal<object>(value_conv::cast(forward_like<T>(value), policy, parent));
-            if (!value_)
+            auto value_ = reinterpret_steal<object>(
+                value_conv::cast(detail::forward_like<T>(value), policy, parent));
+            if (!value_) {
                 return handle();
+            }
             PyList_SET_ITEM(l.ptr(), index++, value_.release().ptr()); // steals a reference
         }
         return l.release();
     }
 
-    PYBIND11_TYPE_CASTER(ArrayType, _("List[") + value_conv::name + _<Resizable>(_(""), _("[") + _<Size>() + _("]")) + _("]"));
+    PYBIND11_TYPE_CASTER(ArrayType,
+                         const_name("List[") + value_conv::name
+                             + const_name<Resizable>(const_name(""),
+                                                     const_name("[") + const_name<Size>()
+                                                         + const_name("]"))
+                             + const_name("]"));
 };
 
-template <typename Type, size_t Size> struct type_caster<std::array<Type, Size>>
- : array_caster<std::array<Type, Size>, Type, false, Size> { };
+template <typename Type, size_t Size>
+struct type_caster<std::array<Type, Size>>
+    : array_caster<std::array<Type, Size>, Type, false, Size> {};
 
-template <typename Type> struct type_caster<std::valarray<Type>>
- : array_caster<std::valarray<Type>, Type, true> { };
+template <typename Type>
+struct type_caster<std::valarray<Type>> : array_caster<std::valarray<Type>, Type, true> {};
 
-template <typename Key, typename Compare, typename Alloc> struct type_caster<std::set<Key, Compare, Alloc>>
-  : set_caster<std::set<Key, Compare, Alloc>, Key> { };
+template <typename Key, typename Compare, typename Alloc>
+struct type_caster<std::set<Key, Compare, Alloc>>
+    : set_caster<std::set<Key, Compare, Alloc>, Key> {};
 
-template <typename Key, typename Hash, typename Equal, typename Alloc> struct type_caster<std::unordered_set<Key, Hash, Equal, Alloc>>
-  : set_caster<std::unordered_set<Key, Hash, Equal, Alloc>, Key> { };
+template <typename Key, typename Hash, typename Equal, typename Alloc>
+struct type_caster<std::unordered_set<Key, Hash, Equal, Alloc>>
+    : set_caster<std::unordered_set<Key, Hash, Equal, Alloc>, Key> {};
 
-template <typename Key, typename Value, typename Compare, typename Alloc> struct type_caster<std::map<Key, Value, Compare, Alloc>>
-  : map_caster<std::map<Key, Value, Compare, Alloc>, Key, Value> { };
+template <typename Key, typename Value, typename Compare, typename Alloc>
+struct type_caster<std::map<Key, Value, Compare, Alloc>>
+    : map_caster<std::map<Key, Value, Compare, Alloc>, Key, Value> {};
 
-template <typename Key, typename Value, typename Hash, typename Equal, typename Alloc> struct type_caster<std::unordered_map<Key, Value, Hash, Equal, Alloc>>
-  : map_caster<std::unordered_map<Key, Value, Hash, Equal, Alloc>, Key, Value> { };
+template <typename Key, typename Value, typename Hash, typename Equal, typename Alloc>
+struct type_caster<std::unordered_map<Key, Value, Hash, Equal, Alloc>>
+    : map_caster<std::unordered_map<Key, Value, Hash, Equal, Alloc>, Key, Value> {};
 
 // This type caster is intended to be used for std::optional and std::experimental::optional
-template<typename Type, typename Value = typename Type::value_type> struct optional_caster {
+template <typename Type, typename Value = typename Type::value_type>
+struct optional_caster {
     using value_conv = make_caster<Value>;
 
     template <typename T>
     static handle cast(T &&src, return_value_policy policy, handle parent) {
-        if (!src)
-            return none().inc_ref();
+        if (!src) {
+            return none().release();
+        }
         if (!std::is_lvalue_reference<T>::value) {
             policy = return_value_policy_override<Value>::policy(policy);
         }
@@ -263,32 +324,35 @@ template<typename Type, typename Value = typename Type::value_type> struct optio
             return false;
         }
         if (src.is_none()) {
-            return true;  // default-constructed value is already empty
+            return true; // default-constructed value is already empty
         }
         value_conv inner_caster;
-        if (!inner_caster.load(src, convert))
+        if (!inner_caster.load(src, convert)) {
             return false;
+        }
 
         value.emplace(cast_op<Value &&>(std::move(inner_caster)));
         return true;
     }
 
-    PYBIND11_TYPE_CASTER(Type, _("Optional[") + value_conv::name + _("]"));
+    PYBIND11_TYPE_CASTER(Type, const_name("Optional[") + value_conv::name + const_name("]"));
 };
 
 #if defined(PYBIND11_HAS_OPTIONAL)
-template<typename T> struct type_caster<std::optional<T>>
-    : public optional_caster<std::optional<T>> {};
+template <typename T>
+struct type_caster<std::optional<T>> : public optional_caster<std::optional<T>> {};
 
-template<> struct type_caster<std::nullopt_t>
-    : public void_caster<std::nullopt_t> {};
+template <>
+struct type_caster<std::nullopt_t> : public void_caster<std::nullopt_t> {};
 #endif
 
 #if defined(PYBIND11_HAS_EXP_OPTIONAL)
-template<typename T> struct type_caster<std::experimental::optional<T>>
+template <typename T>
+struct type_caster<std::experimental::optional<T>>
     : public optional_caster<std::experimental::optional<T>> {};
 
-template<> struct type_caster<std::experimental::nullopt_t>
+template <>
+struct type_caster<std::experimental::nullopt_t>
     : public void_caster<std::experimental::nullopt_t> {};
 #endif
 
@@ -309,7 +373,7 @@ struct variant_caster_visitor {
 /// `namespace::variant` types which provide a `namespace::visit()` function are handled here
 /// automatically using argument-dependent lookup. Users can provide specializations for other
 /// variant-like classes, e.g. `boost::variant` and `boost::apply_visitor`.
-template <template<typename...> class Variant>
+template <template <typename...> class Variant>
 struct visit_helper {
     template <typename... Args>
     static auto call(Args &&...args) -> decltype(visit(std::forward<Args>(args)...)) {
@@ -318,9 +382,10 @@ struct visit_helper {
 };
 
 /// Generic variant caster
-template <typename Variant> struct variant_caster;
+template <typename Variant>
+struct variant_caster;
 
-template <template<typename...> class V, typename... Ts>
+template <template <typename...> class V, typename... Ts>
 struct variant_caster<V<Ts...>> {
     static_assert(sizeof...(Ts) > 0, "Variant must consist of at least one alternative.");
 
@@ -328,7 +393,7 @@ struct variant_caster<V<Ts...>> {
     bool load_alternative(handle src, bool convert, type_list<U, Us...>) {
         auto caster = make_caster<U>();
         if (caster.load(src, convert)) {
-            value = cast_op<U>(caster);
+            value = cast_op<U>(std::move(caster));
             return true;
         }
         return load_alternative(src, convert, type_list<Us...>{});
@@ -341,8 +406,9 @@ struct variant_caster<V<Ts...>> {
         // E.g. `py::int_(1).cast<variant<double, int>>()` needs to fill the `int`
         // slot of the variant. Without two-pass loading `double` would be filled
         // because it appears first and a conversion is possible.
-        if (convert && load_alternative(src, false, type_list<Ts...>{}))
+        if (convert && load_alternative(src, false, type_list<Ts...>{})) {
             return true;
+        }
         return load_alternative(src, convert, type_list<Ts...>{});
     }
 
@@ -353,12 +419,17 @@ struct variant_caster<V<Ts...>> {
     }
 
     using Type = V<Ts...>;
-    PYBIND11_TYPE_CASTER(Type, _("Union[") + detail::concat(make_caster<Ts>::name...) + _("]"));
+    PYBIND11_TYPE_CASTER(Type,
+                         const_name("Union[") + detail::concat(make_caster<Ts>::name...)
+                             + const_name("]"));
 };
 
 #if defined(PYBIND11_HAS_VARIANT)
 template <typename... Ts>
-struct type_caster<std::variant<Ts...>> : variant_caster<std::variant<Ts...>> { };
+struct type_caster<std::variant<Ts...>> : variant_caster<std::variant<Ts...>> {};
+
+template <>
+struct type_caster<std::monostate> : public void_caster<std::monostate> {};
 #endif
 
 PYBIND11_NAMESPACE_END(detail)
diff --git a/ext/pybind11/include/pybind11/stl/filesystem.h b/ext/pybind11/include/pybind11/stl/filesystem.h
index 431b94b4f7..e26f421776 100644
--- a/ext/pybind11/include/pybind11/stl/filesystem.h
+++ b/ext/pybind11/include/pybind11/stl/filesystem.h
@@ -4,54 +4,60 @@
 
 #pragma once
 
-#include "../cast.h"
 #include "../pybind11.h"
-#include "../pytypes.h"
-
 #include "../detail/common.h"
 #include "../detail/descr.h"
+#include "../cast.h"
+#include "../pytypes.h"
 
 #include <string>
 
 #ifdef __has_include
-#  if defined(PYBIND11_CPP17) && __has_include(<filesystem>) && \
-      PY_VERSION_HEX >= 0x03060000
-#    include <filesystem>
-#    define PYBIND11_HAS_FILESYSTEM 1
-#  endif
+#    if defined(PYBIND11_CPP17)
+#        if __has_include(<filesystem>) && \
+          PY_VERSION_HEX >= 0x03060000
+#            include <filesystem>
+#            define PYBIND11_HAS_FILESYSTEM 1
+#        elif __has_include(<experimental/filesystem>)
+#            include <experimental/filesystem>
+#            define PYBIND11_HAS_EXPERIMENTAL_FILESYSTEM 1
+#        endif
+#    endif
 #endif
 
-#if !defined(PYBIND11_HAS_FILESYSTEM) && !defined(PYBIND11_HAS_FILESYSTEM_IS_OPTIONAL)
+#if !defined(PYBIND11_HAS_FILESYSTEM) && !defined(PYBIND11_HAS_EXPERIMENTAL_FILESYSTEM)           \
+    && !defined(PYBIND11_HAS_FILESYSTEM_IS_OPTIONAL)
 #    error                                                                                        \
-        "#include <filesystem> is not available. (Use -DPYBIND11_HAS_FILESYSTEM_IS_OPTIONAL to ignore.)"
+        "Neither #include <filesystem> nor #include <experimental/filesystem is available. (Use -DPYBIND11_HAS_FILESYSTEM_IS_OPTIONAL to ignore.)"
 #endif
 
 PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
 PYBIND11_NAMESPACE_BEGIN(detail)
 
-#if defined(PYBIND11_HAS_FILESYSTEM)
-template<typename T> struct path_caster {
+#if defined(PYBIND11_HAS_FILESYSTEM) || defined(PYBIND11_HAS_EXPERIMENTAL_FILESYSTEM)
+template <typename T>
+struct path_caster {
 
 private:
-    static PyObject* unicode_from_fs_native(const std::string& w) {
-#if !defined(PYPY_VERSION)
+    static PyObject *unicode_from_fs_native(const std::string &w) {
+#    if !defined(PYPY_VERSION)
         return PyUnicode_DecodeFSDefaultAndSize(w.c_str(), ssize_t(w.size()));
-#else
+#    else
         // PyPy mistakenly declares the first parameter as non-const.
-        return PyUnicode_DecodeFSDefaultAndSize(
-            const_cast<char*>(w.c_str()), ssize_t(w.size()));
-#endif
+        return PyUnicode_DecodeFSDefaultAndSize(const_cast<char *>(w.c_str()), ssize_t(w.size()));
+#    endif
     }
 
-    static PyObject* unicode_from_fs_native(const std::wstring& w) {
+    static PyObject *unicode_from_fs_native(const std::wstring &w) {
         return PyUnicode_FromWideChar(w.c_str(), ssize_t(w.size()));
     }
 
 public:
-    static handle cast(const T& path, return_value_policy, handle) {
+    static handle cast(const T &path, return_value_policy, handle) {
         if (auto py_str = unicode_from_fs_native(path.native())) {
-            return module_::import("pathlib").attr("Path")(reinterpret_steal<object>(py_str))
-                   .release();
+            return module_::import("pathlib")
+                .attr("Path")(reinterpret_steal<object>(py_str))
+                .release();
         }
         return nullptr;
     }
@@ -60,15 +66,15 @@ public:
         // PyUnicode_FSConverter and PyUnicode_FSDecoder normally take care of
         // calling PyOS_FSPath themselves, but that's broken on PyPy (PyPy
         // issue #3168) so we do it ourselves instead.
-        PyObject* buf = PyOS_FSPath(handle.ptr());
+        PyObject *buf = PyOS_FSPath(handle.ptr());
         if (!buf) {
             PyErr_Clear();
             return false;
         }
-        PyObject* native = nullptr;
+        PyObject *native = nullptr;
         if constexpr (std::is_same_v<typename T::value_type, char>) {
             if (PyUnicode_FSConverter(buf, &native) != 0) {
-                if (auto c_str = PyBytes_AsString(native)) {
+                if (auto *c_str = PyBytes_AsString(native)) {
                     // AsString returns a pointer to the internal buffer, which
                     // must not be free'd.
                     value = c_str;
@@ -76,9 +82,9 @@ public:
             }
         } else if constexpr (std::is_same_v<typename T::value_type, wchar_t>) {
             if (PyUnicode_FSDecoder(buf, &native) != 0) {
-                if (auto c_str = PyUnicode_AsWideCharString(native, nullptr)) {
+                if (auto *c_str = PyUnicode_AsWideCharString(native, nullptr)) {
                     // AsWideCharString returns a new string that must be free'd.
-                    value = c_str;  // Copies the string.
+                    value = c_str; // Copies the string.
                     PyMem_Free(c_str);
                 }
             }
@@ -92,12 +98,19 @@ public:
         return true;
     }
 
-    PYBIND11_TYPE_CASTER(T, _("os.PathLike"));
+    PYBIND11_TYPE_CASTER(T, const_name("os.PathLike"));
 };
 
-template<> struct type_caster<std::filesystem::path>
-    : public path_caster<std::filesystem::path> {};
-#endif // PYBIND11_HAS_FILESYSTEM
+#endif // PYBIND11_HAS_FILESYSTEM || defined(PYBIND11_HAS_EXPERIMENTAL_FILESYSTEM)
+
+#if defined(PYBIND11_HAS_FILESYSTEM)
+template <>
+struct type_caster<std::filesystem::path> : public path_caster<std::filesystem::path> {};
+#elif defined(PYBIND11_HAS_EXPERIMENTAL_FILESYSTEM)
+template <>
+struct type_caster<std::experimental::filesystem::path>
+    : public path_caster<std::experimental::filesystem::path> {};
+#endif
 
 PYBIND11_NAMESPACE_END(detail)
 PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE)
diff --git a/ext/pybind11/include/pybind11/stl_bind.h b/ext/pybind11/include/pybind11/stl_bind.h
index 050be83cc5..0c634597ec 100644
--- a/ext/pybind11/include/pybind11/stl_bind.h
+++ b/ext/pybind11/include/pybind11/stl_bind.h
@@ -10,146 +10,160 @@
 #pragma once
 
 #include "detail/common.h"
+#include "detail/type_caster_base.h"
+#include "cast.h"
 #include "operators.h"
 
 #include <algorithm>
 #include <sstream>
+#include <type_traits>
 
 PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
 PYBIND11_NAMESPACE_BEGIN(detail)
 
 /* SFINAE helper class used by 'is_comparable */
-template <typename T>  struct container_traits {
-    template <typename T2> static std::true_type test_comparable(decltype(std::declval<const T2 &>() == std::declval<const T2 &>())*);
-    template <typename T2> static std::false_type test_comparable(...);
-    template <typename T2> static std::true_type test_value(typename T2::value_type *);
-    template <typename T2> static std::false_type test_value(...);
-    template <typename T2> static std::true_type test_pair(typename T2::first_type *, typename T2::second_type *);
-    template <typename T2> static std::false_type test_pair(...);
+template <typename T>
+struct container_traits {
+    template <typename T2>
+    static std::true_type
+    test_comparable(decltype(std::declval<const T2 &>() == std::declval<const T2 &>()) *);
+    template <typename T2>
+    static std::false_type test_comparable(...);
+    template <typename T2>
+    static std::true_type test_value(typename T2::value_type *);
+    template <typename T2>
+    static std::false_type test_value(...);
+    template <typename T2>
+    static std::true_type test_pair(typename T2::first_type *, typename T2::second_type *);
+    template <typename T2>
+    static std::false_type test_pair(...);
 
-    static constexpr const bool is_comparable = std::is_same<std::true_type, decltype(test_comparable<T>(nullptr))>::value;
-    static constexpr const bool is_pair = std::is_same<std::true_type, decltype(test_pair<T>(nullptr, nullptr))>::value;
-    static constexpr const bool is_vector = std::is_same<std::true_type, decltype(test_value<T>(nullptr))>::value;
+    static constexpr const bool is_comparable
+        = std::is_same<std::true_type, decltype(test_comparable<T>(nullptr))>::value;
+    static constexpr const bool is_pair
+        = std::is_same<std::true_type, decltype(test_pair<T>(nullptr, nullptr))>::value;
+    static constexpr const bool is_vector
+        = std::is_same<std::true_type, decltype(test_value<T>(nullptr))>::value;
     static constexpr const bool is_element = !is_pair && !is_vector;
 };
 
 /* Default: is_comparable -> std::false_type */
 template <typename T, typename SFINAE = void>
-struct is_comparable : std::false_type { };
+struct is_comparable : std::false_type {};
 
 /* For non-map data structures, check whether operator== can be instantiated */
 template <typename T>
 struct is_comparable<
-    T, enable_if_t<container_traits<T>::is_element &&
-                   container_traits<T>::is_comparable>>
-    : std::true_type { };
+    T,
+    enable_if_t<container_traits<T>::is_element && container_traits<T>::is_comparable>>
+    : std::true_type {};
 
-/* For a vector/map data structure, recursively check the value type (which is std::pair for maps) */
+/* For a vector/map data structure, recursively check the value type
+   (which is std::pair for maps) */
 template <typename T>
 struct is_comparable<T, enable_if_t<container_traits<T>::is_vector>> {
-    static constexpr const bool value =
-        is_comparable<typename T::value_type>::value;
+    static constexpr const bool value = is_comparable<typename T::value_type>::value;
 };
 
 /* For pairs, recursively check the two data types */
 template <typename T>
 struct is_comparable<T, enable_if_t<container_traits<T>::is_pair>> {
-    static constexpr const bool value =
-        is_comparable<typename T::first_type>::value &&
-        is_comparable<typename T::second_type>::value;
+    static constexpr const bool value = is_comparable<typename T::first_type>::value
+                                        && is_comparable<typename T::second_type>::value;
 };
 
 /* Fallback functions */
-template <typename, typename, typename... Args> void vector_if_copy_constructible(const Args &...) { }
-template <typename, typename, typename... Args> void vector_if_equal_operator(const Args &...) { }
-template <typename, typename, typename... Args> void vector_if_insertion_operator(const Args &...) { }
-template <typename, typename, typename... Args> void vector_modifiers(const Args &...) { }
+template <typename, typename, typename... Args>
+void vector_if_copy_constructible(const Args &...) {}
+template <typename, typename, typename... Args>
+void vector_if_equal_operator(const Args &...) {}
+template <typename, typename, typename... Args>
+void vector_if_insertion_operator(const Args &...) {}
+template <typename, typename, typename... Args>
+void vector_modifiers(const Args &...) {}
 
-template<typename Vector, typename Class_>
+template <typename Vector, typename Class_>
 void vector_if_copy_constructible(enable_if_t<is_copy_constructible<Vector>::value, Class_> &cl) {
     cl.def(init<const Vector &>(), "Copy constructor");
 }
 
-template<typename Vector, typename Class_>
+template <typename Vector, typename Class_>
 void vector_if_equal_operator(enable_if_t<is_comparable<Vector>::value, Class_> &cl) {
     using T = typename Vector::value_type;
 
     cl.def(self == self);
     cl.def(self != self);
 
-    cl.def("count",
-        [](const Vector &v, const T &x) {
-            return std::count(v.begin(), v.end(), x);
-        },
+    cl.def(
+        "count",
+        [](const Vector &v, const T &x) { return std::count(v.begin(), v.end(), x); },
         arg("x"),
-        "Return the number of times ``x`` appears in the list"
-    );
+        "Return the number of times ``x`` appears in the list");
 
-    cl.def("remove", [](Vector &v, const T &x) {
+    cl.def(
+        "remove",
+        [](Vector &v, const T &x) {
             auto p = std::find(v.begin(), v.end(), x);
-            if (p != v.end())
+            if (p != v.end()) {
                 v.erase(p);
-            else
+            } else {
                 throw value_error();
+            }
         },
         arg("x"),
         "Remove the first item from the list whose value is x. "
-        "It is an error if there is no such item."
-    );
+        "It is an error if there is no such item.");
 
-    cl.def("__contains__",
-        [](const Vector &v, const T &x) {
-            return std::find(v.begin(), v.end(), x) != v.end();
-        },
+    cl.def(
+        "__contains__",
+        [](const Vector &v, const T &x) { return std::find(v.begin(), v.end(), x) != v.end(); },
         arg("x"),
-        "Return true the container contains ``x``"
-    );
+        "Return true the container contains ``x``");
 }
 
 // Vector modifiers -- requires a copyable vector_type:
-// (Technically, some of these (pop and __delitem__) don't actually require copyability, but it seems
-// silly to allow deletion but not insertion, so include them here too.)
+// (Technically, some of these (pop and __delitem__) don't actually require copyability, but it
+// seems silly to allow deletion but not insertion, so include them here too.)
 template <typename Vector, typename Class_>
-void vector_modifiers(enable_if_t<is_copy_constructible<typename Vector::value_type>::value, Class_> &cl) {
+void vector_modifiers(
+    enable_if_t<is_copy_constructible<typename Vector::value_type>::value, Class_> &cl) {
     using T = typename Vector::value_type;
     using SizeType = typename Vector::size_type;
     using DiffType = typename Vector::difference_type;
 
     auto wrap_i = [](DiffType i, SizeType n) {
-        if (i < 0)
+        if (i < 0) {
             i += n;
-        if (i < 0 || (SizeType)i >= n)
+        }
+        if (i < 0 || (SizeType) i >= n) {
             throw index_error();
+        }
         return i;
     };
 
-    cl.def("append",
-           [](Vector &v, const T &value) { v.push_back(value); },
-           arg("x"),
-           "Add an item to the end of the list");
+    cl.def(
+        "append",
+        [](Vector &v, const T &value) { v.push_back(value); },
+        arg("x"),
+        "Add an item to the end of the list");
 
     cl.def(init([](const iterable &it) {
         auto v = std::unique_ptr<Vector>(new Vector());
         v->reserve(len_hint(it));
-        for (handle h : it)
+        for (handle h : it) {
             v->push_back(h.cast<T>());
+        }
         return v.release();
     }));
 
-    cl.def("clear",
-        [](Vector &v) {
-            v.clear();
-        },
-        "Clear the contents"
-    );
+    cl.def(
+        "clear", [](Vector &v) { v.clear(); }, "Clear the contents");
 
-    cl.def("extend",
-       [](Vector &v, const Vector &src) {
-           v.insert(v.end(), src.begin(), src.end());
-       },
-       arg("L"),
-       "Extend the list by appending all the items in the given list"
-    );
+    cl.def(
+        "extend",
+        [](Vector &v, const Vector &src) { v.insert(v.end(), src.begin(), src.end()); },
+        arg("L"),
+        "Extend the list by appending all the items in the given list");
 
     cl.def(
         "extend",
@@ -174,31 +188,36 @@ void vector_modifiers(enable_if_t<is_copy_constructible<typename Vector::value_t
         arg("L"),
         "Extend the list by appending all the items in the given list");
 
-    cl.def("insert",
+    cl.def(
+        "insert",
         [](Vector &v, DiffType i, const T &x) {
             // Can't use wrap_i; i == v.size() is OK
-            if (i < 0)
+            if (i < 0) {
                 i += v.size();
-            if (i < 0 || (SizeType)i > v.size())
+            }
+            if (i < 0 || (SizeType) i > v.size()) {
                 throw index_error();
+            }
             v.insert(v.begin() + i, x);
         },
-        arg("i") , arg("x"),
-        "Insert an item at a given position."
-    );
+        arg("i"),
+        arg("x"),
+        "Insert an item at a given position.");
 
-    cl.def("pop",
+    cl.def(
+        "pop",
         [](Vector &v) {
-            if (v.empty())
+            if (v.empty()) {
                 throw index_error();
+            }
             T t = std::move(v.back());
             v.pop_back();
             return t;
         },
-        "Remove and return the last item"
-    );
+        "Remove and return the last item");
 
-    cl.def("pop",
+    cl.def(
+        "pop",
         [wrap_i](Vector &v, DiffType i) {
             i = wrap_i(i, v.size());
             T t = std::move(v[(SizeType) i]);
@@ -206,29 +225,27 @@ void vector_modifiers(enable_if_t<is_copy_constructible<typename Vector::value_t
             return t;
         },
         arg("i"),
-        "Remove and return the item at index ``i``"
-    );
+        "Remove and return the item at index ``i``");
 
-    cl.def("__setitem__",
-        [wrap_i](Vector &v, DiffType i, const T &t) {
-            i = wrap_i(i, v.size());
-            v[(SizeType)i] = t;
-        }
-    );
+    cl.def("__setitem__", [wrap_i](Vector &v, DiffType i, const T &t) {
+        i = wrap_i(i, v.size());
+        v[(SizeType) i] = t;
+    });
 
     /// Slicing protocol
     cl.def(
         "__getitem__",
-        [](const Vector &v, slice slice) -> Vector * {
+        [](const Vector &v, const slice &slice) -> Vector * {
             size_t start = 0, stop = 0, step = 0, slicelength = 0;
 
-            if (!slice.compute(v.size(), &start, &stop, &step, &slicelength))
+            if (!slice.compute(v.size(), &start, &stop, &step, &slicelength)) {
                 throw error_already_set();
+            }
 
             auto *seq = new Vector();
             seq->reserve((size_t) slicelength);
 
-            for (size_t i=0; i<slicelength; ++i) {
+            for (size_t i = 0; i < slicelength; ++i) {
                 seq->push_back(v[start]);
                 start += step;
             }
@@ -239,36 +256,40 @@ void vector_modifiers(enable_if_t<is_copy_constructible<typename Vector::value_t
 
     cl.def(
         "__setitem__",
-        [](Vector &v, slice slice, const Vector &value) {
+        [](Vector &v, const slice &slice, const Vector &value) {
             size_t start = 0, stop = 0, step = 0, slicelength = 0;
-            if (!slice.compute(v.size(), &start, &stop, &step, &slicelength))
+            if (!slice.compute(v.size(), &start, &stop, &step, &slicelength)) {
                 throw error_already_set();
+            }
 
-            if (slicelength != value.size())
-                throw std::runtime_error("Left and right hand size of slice assignment have different sizes!");
+            if (slicelength != value.size()) {
+                throw std::runtime_error(
+                    "Left and right hand size of slice assignment have different sizes!");
+            }
 
-            for (size_t i=0; i<slicelength; ++i) {
+            for (size_t i = 0; i < slicelength; ++i) {
                 v[start] = value[i];
                 start += step;
             }
         },
         "Assign list elements using a slice object");
 
-    cl.def("__delitem__",
+    cl.def(
+        "__delitem__",
         [wrap_i](Vector &v, DiffType i) {
             i = wrap_i(i, v.size());
             v.erase(v.begin() + i);
         },
-        "Delete the list elements at index ``i``"
-    );
+        "Delete the list elements at index ``i``");
 
     cl.def(
         "__delitem__",
-        [](Vector &v, slice slice) {
+        [](Vector &v, const slice &slice) {
             size_t start = 0, stop = 0, step = 0, slicelength = 0;
 
-            if (!slice.compute(v.size(), &start, &stop, &step, &slicelength))
+            if (!slice.compute(v.size(), &start, &stop, &step, &slicelength)) {
                 throw error_already_set();
+            }
 
             if (step == 1 && false) {
                 v.erase(v.begin() + (DiffType) start, v.begin() + DiffType(start + slicelength));
@@ -284,8 +305,10 @@ void vector_modifiers(enable_if_t<is_copy_constructible<typename Vector::value_t
 
 // If the type has an operator[] that doesn't return a reference (most notably std::vector<bool>),
 // we have to access by copying; otherwise we return by reference.
-template <typename Vector> using vector_needs_copy = negation<
-    std::is_same<decltype(std::declval<Vector>()[typename Vector::size_type()]), typename Vector::value_type &>>;
+template <typename Vector>
+using vector_needs_copy
+    = negation<std::is_same<decltype(std::declval<Vector>()[typename Vector::size_type()]),
+                            typename Vector::value_type &>>;
 
 // The usual case: access and iterate by reference
 template <typename Vector, typename Class_>
@@ -293,31 +316,34 @@ void vector_accessor(enable_if_t<!vector_needs_copy<Vector>::value, Class_> &cl)
     using T = typename Vector::value_type;
     using SizeType = typename Vector::size_type;
     using DiffType = typename Vector::difference_type;
-    using ItType   = typename Vector::iterator;
+    using ItType = typename Vector::iterator;
 
     auto wrap_i = [](DiffType i, SizeType n) {
-        if (i < 0)
+        if (i < 0) {
             i += n;
-        if (i < 0 || (SizeType)i >= n)
+        }
+        if (i < 0 || (SizeType) i >= n) {
             throw index_error();
+        }
         return i;
     };
 
-    cl.def("__getitem__",
+    cl.def(
+        "__getitem__",
         [wrap_i](Vector &v, DiffType i) -> T & {
             i = wrap_i(i, v.size());
-            return v[(SizeType)i];
+            return v[(SizeType) i];
         },
         return_value_policy::reference_internal // ref + keepalive
     );
 
-    cl.def("__iter__",
-           [](Vector &v) {
-               return make_iterator<
-                   return_value_policy::reference_internal, ItType, ItType, T&>(
-                   v.begin(), v.end());
-           },
-           keep_alive<0, 1>() /* Essential: keep list alive while iterator exists */
+    cl.def(
+        "__iter__",
+        [](Vector &v) {
+            return make_iterator<return_value_policy::reference_internal, ItType, ItType, T &>(
+                v.begin(), v.end());
+        },
+        keep_alive<0, 1>() /* Essential: keep list alive while iterator exists */
     );
 }
 
@@ -327,53 +353,60 @@ void vector_accessor(enable_if_t<vector_needs_copy<Vector>::value, Class_> &cl)
     using T = typename Vector::value_type;
     using SizeType = typename Vector::size_type;
     using DiffType = typename Vector::difference_type;
-    using ItType   = typename Vector::iterator;
-    cl.def("__getitem__",
-        [](const Vector &v, DiffType i) -> T {
-            if (i < 0 && (i += v.size()) < 0)
-                throw index_error();
-            if ((SizeType)i >= v.size())
-                throw index_error();
-            return v[(SizeType)i];
+    using ItType = typename Vector::iterator;
+    cl.def("__getitem__", [](const Vector &v, DiffType i) -> T {
+        if (i < 0 && (i += v.size()) < 0) {
+            throw index_error();
         }
-    );
+        if ((SizeType) i >= v.size()) {
+            throw index_error();
+        }
+        return v[(SizeType) i];
+    });
 
-    cl.def("__iter__",
-           [](Vector &v) {
-               return make_iterator<
-                   return_value_policy::copy, ItType, ItType, T>(
-                   v.begin(), v.end());
-           },
-           keep_alive<0, 1>() /* Essential: keep list alive while iterator exists */
+    cl.def(
+        "__iter__",
+        [](Vector &v) {
+            return make_iterator<return_value_policy::copy, ItType, ItType, T>(v.begin(), v.end());
+        },
+        keep_alive<0, 1>() /* Essential: keep list alive while iterator exists */
     );
 }
 
-template <typename Vector, typename Class_> auto vector_if_insertion_operator(Class_ &cl, std::string const &name)
-    -> decltype(std::declval<std::ostream&>() << std::declval<typename Vector::value_type>(), void()) {
+template <typename Vector, typename Class_>
+auto vector_if_insertion_operator(Class_ &cl, std::string const &name)
+    -> decltype(std::declval<std::ostream &>() << std::declval<typename Vector::value_type>(),
+                void()) {
     using size_type = typename Vector::size_type;
 
-    cl.def("__repr__",
-           [name](Vector &v) {
+    cl.def(
+        "__repr__",
+        [name](Vector &v) {
             std::ostringstream s;
             s << name << '[';
-            for (size_type i=0; i < v.size(); ++i) {
+            for (size_type i = 0; i < v.size(); ++i) {
                 s << v[i];
-                if (i != v.size() - 1)
+                if (i != v.size() - 1) {
                     s << ", ";
+                }
             }
             s << ']';
             return s.str();
         },
-        "Return the canonical string representation of this list."
-    );
+        "Return the canonical string representation of this list.");
 }
 
 // Provide the buffer interface for vectors if we have data() and we have a format for it
-// GCC seems to have "void std::vector<bool>::data()" - doing SFINAE on the existence of data() is insufficient, we need to check it returns an appropriate pointer
+// GCC seems to have "void std::vector<bool>::data()" - doing SFINAE on the existence of data()
+// is insufficient, we need to check it returns an appropriate pointer
 template <typename Vector, typename = void>
 struct vector_has_data_and_format : std::false_type {};
 template <typename Vector>
-struct vector_has_data_and_format<Vector, enable_if_t<std::is_same<decltype(format_descriptor<typename Vector::value_type>::format(), std::declval<Vector>().data()), typename Vector::value_type*>::value>> : std::true_type {};
+struct vector_has_data_and_format<
+    Vector,
+    enable_if_t<std::is_same<decltype(format_descriptor<typename Vector::value_type>::format(),
+                                      std::declval<Vector>().data()),
+                             typename Vector::value_type *>::value>> : std::true_type {};
 
 // [workaround(intel)] Separate function required here
 // Workaround as the Intel compiler does not compile the enable_if_t part below
@@ -388,26 +421,37 @@ constexpr bool args_any_are_buffer() {
 
 // Add the buffer interface to a vector
 template <typename Vector, typename Class_, typename... Args>
-void vector_buffer_impl(Class_& cl, std::true_type) {
+void vector_buffer_impl(Class_ &cl, std::true_type) {
     using T = typename Vector::value_type;
 
-    static_assert(vector_has_data_and_format<Vector>::value, "There is not an appropriate format descriptor for this vector");
+    static_assert(vector_has_data_and_format<Vector>::value,
+                  "There is not an appropriate format descriptor for this vector");
 
-    // numpy.h declares this for arbitrary types, but it may raise an exception and crash hard at runtime if PYBIND11_NUMPY_DTYPE hasn't been called, so check here
+    // numpy.h declares this for arbitrary types, but it may raise an exception and crash hard
+    // at runtime if PYBIND11_NUMPY_DTYPE hasn't been called, so check here
     format_descriptor<T>::format();
 
-    cl.def_buffer([](Vector& v) -> buffer_info {
-        return buffer_info(v.data(), static_cast<ssize_t>(sizeof(T)), format_descriptor<T>::format(), 1, {v.size()}, {sizeof(T)});
+    cl.def_buffer([](Vector &v) -> buffer_info {
+        return buffer_info(v.data(),
+                           static_cast<ssize_t>(sizeof(T)),
+                           format_descriptor<T>::format(),
+                           1,
+                           {v.size()},
+                           {sizeof(T)});
     });
 
     cl.def(init([](const buffer &buf) {
         auto info = buf.request();
-        if (info.ndim != 1 || info.strides[0] % static_cast<ssize_t>(sizeof(T)))
+        if (info.ndim != 1 || info.strides[0] % static_cast<ssize_t>(sizeof(T))) {
             throw type_error("Only valid 1D buffers can be copied to a vector");
-        if (!detail::compare_buffer_info<T>::compare(info) || (ssize_t) sizeof(T) != info.itemsize)
-            throw type_error("Format mismatch (Python: " + info.format + " C++: " + format_descriptor<T>::format() + ")");
+        }
+        if (!detail::compare_buffer_info<T>::compare(info)
+            || (ssize_t) sizeof(T) != info.itemsize) {
+            throw type_error("Format mismatch (Python: " + info.format
+                             + " C++: " + format_descriptor<T>::format() + ")");
+        }
 
-        T *p = static_cast<T*>(info.ptr);
+        T *p = static_cast<T *>(info.ptr);
         ssize_t step = info.strides[0] / static_cast<ssize_t>(sizeof(T));
         T *end = p + info.shape[0] * step;
         if (step == 1) {
@@ -415,21 +459,22 @@ void vector_buffer_impl(Class_& cl, std::true_type) {
         }
         Vector vec;
         vec.reserve((size_t) info.shape[0]);
-        for (; p != end; p += step)
+        for (; p != end; p += step) {
             vec.push_back(*p);
+        }
         return vec;
-
     }));
 
     return;
 }
 
 template <typename Vector, typename Class_, typename... Args>
-void vector_buffer_impl(Class_&, std::false_type) {}
+void vector_buffer_impl(Class_ &, std::false_type) {}
 
 template <typename Vector, typename Class_, typename... Args>
-void vector_buffer(Class_& cl) {
-    vector_buffer_impl<Vector, Class_, Args...>(cl, detail::any_of<std::is_same<Args, buffer_protocol>...>{});
+void vector_buffer(Class_ &cl) {
+    vector_buffer_impl<Vector, Class_, Args...>(
+        cl, detail::any_of<std::is_same<Args, buffer_protocol>...>{});
 }
 
 PYBIND11_NAMESPACE_END(detail)
@@ -438,13 +483,13 @@ PYBIND11_NAMESPACE_END(detail)
 // std::vector
 //
 template <typename Vector, typename holder_type = std::unique_ptr<Vector>, typename... Args>
-class_<Vector, holder_type> bind_vector(handle scope, std::string const &name, Args&&... args) {
+class_<Vector, holder_type> bind_vector(handle scope, std::string const &name, Args &&...args) {
     using Class_ = class_<Vector, holder_type>;
 
     // If the value_type is unregistered (e.g. a converting type) or is itself registered
     // module-local then make the vector binding module-local as well:
     using vtype = typename Vector::value_type;
-    auto vtype_info = detail::get_type_info(typeid(vtype));
+    auto *vtype_info = detail::get_type_info(typeid(vtype));
     bool local = !vtype_info || vtype_info->module_local;
 
     Class_ cl(scope, name.c_str(), pybind11::module_local(local), std::forward<Args>(args)...);
@@ -469,18 +514,13 @@ class_<Vector, holder_type> bind_vector(handle scope, std::string const &name, A
     // Accessor and iterator; return by value if copyable, otherwise we return by ref + keep-alive
     detail::vector_accessor<Vector, Class_>(cl);
 
-    cl.def("__bool__",
-        [](const Vector &v) -> bool {
-            return !v.empty();
-        },
-        "Check whether the list is nonempty"
-    );
+    cl.def(
+        "__bool__",
+        [](const Vector &v) -> bool { return !v.empty(); },
+        "Check whether the list is nonempty");
 
     cl.def("__len__", &Vector::size);
 
-
-
-
 #if 0
     // C++ style functions deprecated, leaving it here as an example
     cl.def(init<size_type>());
@@ -524,8 +564,6 @@ class_<Vector, holder_type> bind_vector(handle scope, std::string const &name, A
     return cl;
 }
 
-
-
 //
 // std::map, std::unordered_map
 //
@@ -533,101 +571,140 @@ class_<Vector, holder_type> bind_vector(handle scope, std::string const &name, A
 PYBIND11_NAMESPACE_BEGIN(detail)
 
 /* Fallback functions */
-template <typename, typename, typename... Args> void map_if_insertion_operator(const Args &...) { }
-template <typename, typename, typename... Args> void map_assignment(const Args &...) { }
+template <typename, typename, typename... Args>
+void map_if_insertion_operator(const Args &...) {}
+template <typename, typename, typename... Args>
+void map_assignment(const Args &...) {}
 
 // Map assignment when copy-assignable: just copy the value
 template <typename Map, typename Class_>
-void map_assignment(enable_if_t<is_copy_assignable<typename Map::mapped_type>::value, Class_> &cl) {
+void map_assignment(
+    enable_if_t<is_copy_assignable<typename Map::mapped_type>::value, Class_> &cl) {
     using KeyType = typename Map::key_type;
     using MappedType = typename Map::mapped_type;
 
-    cl.def("__setitem__",
-           [](Map &m, const KeyType &k, const MappedType &v) {
-               auto it = m.find(k);
-               if (it != m.end()) it->second = v;
-               else m.emplace(k, v);
-           }
-    );
+    cl.def("__setitem__", [](Map &m, const KeyType &k, const MappedType &v) {
+        auto it = m.find(k);
+        if (it != m.end()) {
+            it->second = v;
+        } else {
+            m.emplace(k, v);
+        }
+    });
 }
 
-// Not copy-assignable, but still copy-constructible: we can update the value by erasing and reinserting
-template<typename Map, typename Class_>
-void map_assignment(enable_if_t<
-        !is_copy_assignable<typename Map::mapped_type>::value &&
-        is_copy_constructible<typename Map::mapped_type>::value,
-        Class_> &cl) {
+// Not copy-assignable, but still copy-constructible: we can update the value by erasing and
+// reinserting
+template <typename Map, typename Class_>
+void map_assignment(enable_if_t<!is_copy_assignable<typename Map::mapped_type>::value
+                                    && is_copy_constructible<typename Map::mapped_type>::value,
+                                Class_> &cl) {
     using KeyType = typename Map::key_type;
     using MappedType = typename Map::mapped_type;
 
-    cl.def("__setitem__",
-           [](Map &m, const KeyType &k, const MappedType &v) {
-               // We can't use m[k] = v; because value type might not be default constructable
-               auto r = m.emplace(k, v);
-               if (!r.second) {
-                   // value type is not copy assignable so the only way to insert it is to erase it first...
-                   m.erase(r.first);
-                   m.emplace(k, v);
-               }
-           }
-    );
+    cl.def("__setitem__", [](Map &m, const KeyType &k, const MappedType &v) {
+        // We can't use m[k] = v; because value type might not be default constructable
+        auto r = m.emplace(k, v);
+        if (!r.second) {
+            // value type is not copy assignable so the only way to insert it is to erase it
+            // first...
+            m.erase(r.first);
+            m.emplace(k, v);
+        }
+    });
 }
 
+template <typename Map, typename Class_>
+auto map_if_insertion_operator(Class_ &cl, std::string const &name)
+    -> decltype(std::declval<std::ostream &>() << std::declval<typename Map::key_type>()
+                                               << std::declval<typename Map::mapped_type>(),
+                void()) {
 
-template <typename Map, typename Class_> auto map_if_insertion_operator(Class_ &cl, std::string const &name)
--> decltype(std::declval<std::ostream&>() << std::declval<typename Map::key_type>() << std::declval<typename Map::mapped_type>(), void()) {
-
-    cl.def("__repr__",
-           [name](Map &m) {
+    cl.def(
+        "__repr__",
+        [name](Map &m) {
             std::ostringstream s;
             s << name << '{';
             bool f = false;
             for (auto const &kv : m) {
-                if (f)
+                if (f) {
                     s << ", ";
+                }
                 s << kv.first << ": " << kv.second;
                 f = true;
             }
             s << '}';
             return s.str();
         },
-        "Return the canonical string representation of this map."
-    );
+        "Return the canonical string representation of this map.");
 }
 
-template<typename Map>
-struct keys_view
-{
+template <typename KeyType>
+struct keys_view {
+    virtual size_t len() = 0;
+    virtual iterator iter() = 0;
+    virtual bool contains(const KeyType &k) = 0;
+    virtual bool contains(const object &k) = 0;
+    virtual ~keys_view() = default;
+};
+
+template <typename MappedType>
+struct values_view {
+    virtual size_t len() = 0;
+    virtual iterator iter() = 0;
+    virtual ~values_view() = default;
+};
+
+template <typename KeyType, typename MappedType>
+struct items_view {
+    virtual size_t len() = 0;
+    virtual iterator iter() = 0;
+    virtual ~items_view() = default;
+};
+
+template <typename Map, typename KeysView>
+struct KeysViewImpl : public KeysView {
+    explicit KeysViewImpl(Map &map) : map(map) {}
+    size_t len() override { return map.size(); }
+    iterator iter() override { return make_key_iterator(map.begin(), map.end()); }
+    bool contains(const typename Map::key_type &k) override { return map.find(k) != map.end(); }
+    bool contains(const object &) override { return false; }
     Map &map;
 };
 
-template<typename Map>
-struct values_view
-{
+template <typename Map, typename ValuesView>
+struct ValuesViewImpl : public ValuesView {
+    explicit ValuesViewImpl(Map &map) : map(map) {}
+    size_t len() override { return map.size(); }
+    iterator iter() override { return make_value_iterator(map.begin(), map.end()); }
     Map &map;
 };
 
-template<typename Map>
-struct items_view
-{
+template <typename Map, typename ItemsView>
+struct ItemsViewImpl : public ItemsView {
+    explicit ItemsViewImpl(Map &map) : map(map) {}
+    size_t len() override { return map.size(); }
+    iterator iter() override { return make_iterator(map.begin(), map.end()); }
     Map &map;
 };
 
 PYBIND11_NAMESPACE_END(detail)
 
 template <typename Map, typename holder_type = std::unique_ptr<Map>, typename... Args>
-class_<Map, holder_type> bind_map(handle scope, const std::string &name, Args&&... args) {
+class_<Map, holder_type> bind_map(handle scope, const std::string &name, Args &&...args) {
     using KeyType = typename Map::key_type;
     using MappedType = typename Map::mapped_type;
-    using KeysView = detail::keys_view<Map>;
-    using ValuesView = detail::values_view<Map>;
-    using ItemsView = detail::items_view<Map>;
+    using StrippedKeyType = detail::remove_cvref_t<KeyType>;
+    using StrippedMappedType = detail::remove_cvref_t<MappedType>;
+    using KeysView = detail::keys_view<StrippedKeyType>;
+    using ValuesView = detail::values_view<StrippedMappedType>;
+    using ItemsView = detail::items_view<StrippedKeyType, StrippedMappedType>;
     using Class_ = class_<Map, holder_type>;
 
     // If either type is a non-module-local bound type then make the map binding non-local as well;
     // otherwise (e.g. both types are either module-local or converting) the map will be
     // module-local.
-    auto tinfo = detail::get_type_info(typeid(MappedType));
+    auto *tinfo = detail::get_type_info(typeid(MappedType));
     bool local = !tinfo || tinfo->module_local;
     if (local) {
         tinfo = detail::get_type_info(typeid(KeyType));
@@ -635,112 +712,133 @@ class_<Map, holder_type> bind_map(handle scope, const std::string &name, Args&&.
     }
 
     Class_ cl(scope, name.c_str(), pybind11::module_local(local), std::forward<Args>(args)...);
-    class_<KeysView> keys_view(
-        scope, ("KeysView[" + name + "]").c_str(), pybind11::module_local(local));
-    class_<ValuesView> values_view(
-        scope, ("ValuesView[" + name + "]").c_str(), pybind11::module_local(local));
-    class_<ItemsView> items_view(
-        scope, ("ItemsView[" + name + "]").c_str(), pybind11::module_local(local));
+    static constexpr auto key_type_descr = detail::make_caster<KeyType>::name;
+    static constexpr auto mapped_type_descr = detail::make_caster<MappedType>::name;
+    std::string key_type_name(key_type_descr.text), mapped_type_name(mapped_type_descr.text);
+
+    // If key type isn't properly wrapped, fall back to C++ names
+    if (key_type_name == "%") {
+        key_type_name = detail::type_info_description(typeid(KeyType));
+    }
+    // Similarly for value type:
+    if (mapped_type_name == "%") {
+        mapped_type_name = detail::type_info_description(typeid(MappedType));
+    }
+
+    // Wrap KeysView[KeyType] if it wasn't already wrapped
+    if (!detail::get_type_info(typeid(KeysView))) {
+        class_<KeysView> keys_view(
+            scope, ("KeysView[" + key_type_name + "]").c_str(), pybind11::module_local(local));
+        keys_view.def("__len__", &KeysView::len);
+        keys_view.def("__iter__",
+                      &KeysView::iter,
+                      keep_alive<0, 1>() /* Essential: keep view alive while iterator exists */
+        );
+        keys_view.def("__contains__",
+                      static_cast<bool (KeysView::*)(const KeyType &)>(&KeysView::contains));
+        // Fallback for when the object is not of the key type
+        keys_view.def("__contains__",
+                      static_cast<bool (KeysView::*)(const object &)>(&KeysView::contains));
+    }
+    // Similarly for ValuesView:
+    if (!detail::get_type_info(typeid(ValuesView))) {
+        class_<ValuesView> values_view(scope,
+                                       ("ValuesView[" + mapped_type_name + "]").c_str(),
+                                       pybind11::module_local(local));
+        values_view.def("__len__", &ValuesView::len);
+        values_view.def("__iter__",
+                        &ValuesView::iter,
+                        keep_alive<0, 1>() /* Essential: keep view alive while iterator exists */
+        );
+    }
+    // Similarly for ItemsView:
+    if (!detail::get_type_info(typeid(ItemsView))) {
+        class_<ItemsView> items_view(
+            scope,
+            ("ItemsView[" + key_type_name + ", ").append(mapped_type_name + "]").c_str(),
+            pybind11::module_local(local));
+        items_view.def("__len__", &ItemsView::len);
+        items_view.def("__iter__",
+                       &ItemsView::iter,
+                       keep_alive<0, 1>() /* Essential: keep view alive while iterator exists */
+        );
+    }
 
     cl.def(init<>());
 
     // Register stream insertion operator (if possible)
     detail::map_if_insertion_operator<Map, Class_>(cl, name);
 
-    cl.def("__bool__",
+    cl.def(
+        "__bool__",
         [](const Map &m) -> bool { return !m.empty(); },
-        "Check whether the map is nonempty"
+        "Check whether the map is nonempty");
+
+    cl.def(
+        "__iter__",
+        [](Map &m) { return make_key_iterator(m.begin(), m.end()); },
+        keep_alive<0, 1>() /* Essential: keep map alive while iterator exists */
     );
 
-    cl.def("__iter__",
-           [](Map &m) { return make_key_iterator(m.begin(), m.end()); },
-           keep_alive<0, 1>() /* Essential: keep map alive while iterator exists */
+    cl.def(
+        "keys",
+        [](Map &m) {
+            return std::unique_ptr<KeysView>(new detail::KeysViewImpl<Map, KeysView>(m));
+        },
+        keep_alive<0, 1>() /* Essential: keep map alive while view exists */
     );
 
-    cl.def("keys",
-           [](Map &m) { return KeysView{m}; },
-           keep_alive<0, 1>() /* Essential: keep map alive while view exists */
+    cl.def(
+        "values",
+        [](Map &m) {
+            return std::unique_ptr<ValuesView>(new detail::ValuesViewImpl<Map, ValuesView>(m));
+        },
+        keep_alive<0, 1>() /* Essential: keep map alive while view exists */
     );
 
-    cl.def("values",
-           [](Map &m) { return ValuesView{m}; },
-           keep_alive<0, 1>() /* Essential: keep map alive while view exists */
+    cl.def(
+        "items",
+        [](Map &m) {
+            return std::unique_ptr<ItemsView>(new detail::ItemsViewImpl<Map, ItemsView>(m));
+        },
+        keep_alive<0, 1>() /* Essential: keep map alive while view exists */
     );
 
-    cl.def("items",
-           [](Map &m) { return ItemsView{m}; },
-           keep_alive<0, 1>() /* Essential: keep map alive while view exists */
-    );
-
-    cl.def("__getitem__",
+    cl.def(
+        "__getitem__",
         [](Map &m, const KeyType &k) -> MappedType & {
             auto it = m.find(k);
-            if (it == m.end())
-              throw key_error();
-           return it->second;
+            if (it == m.end()) {
+                throw key_error();
+            }
+            return it->second;
         },
         return_value_policy::reference_internal // ref + keepalive
     );
 
-    cl.def("__contains__",
-        [](Map &m, const KeyType &k) -> bool {
-            auto it = m.find(k);
-            if (it == m.end())
-              return false;
-           return true;
+    cl.def("__contains__", [](Map &m, const KeyType &k) -> bool {
+        auto it = m.find(k);
+        if (it == m.end()) {
+            return false;
         }
-    );
+        return true;
+    });
     // Fallback for when the object is not of the key type
     cl.def("__contains__", [](Map &, const object &) -> bool { return false; });
 
     // Assignment provided only if the type is copyable
     detail::map_assignment<Map, Class_>(cl);
 
-    cl.def("__delitem__",
-           [](Map &m, const KeyType &k) {
-               auto it = m.find(k);
-               if (it == m.end())
-                   throw key_error();
-               m.erase(it);
-           }
-    );
+    cl.def("__delitem__", [](Map &m, const KeyType &k) {
+        auto it = m.find(k);
+        if (it == m.end()) {
+            throw key_error();
+        }
+        m.erase(it);
+    });
 
     cl.def("__len__", &Map::size);
 
-    keys_view.def("__len__", [](KeysView &view) { return view.map.size(); });
-    keys_view.def("__iter__",
-        [](KeysView &view) {
-            return make_key_iterator(view.map.begin(), view.map.end());
-        },
-        keep_alive<0, 1>() /* Essential: keep view alive while iterator exists */
-    );
-    keys_view.def("__contains__",
-        [](KeysView &view, const KeyType &k) -> bool {
-            auto it = view.map.find(k);
-            if (it == view.map.end())
-                return false;
-            return true;
-        }
-    );
-    // Fallback for when the object is not of the key type
-    keys_view.def("__contains__", [](KeysView &, const object &) -> bool { return false; });
-
-    values_view.def("__len__", [](ValuesView &view) { return view.map.size(); });
-    values_view.def("__iter__",
-        [](ValuesView &view) {
-            return make_value_iterator(view.map.begin(), view.map.end());
-        },
-        keep_alive<0, 1>() /* Essential: keep view alive while iterator exists */
-    );
-
-    items_view.def("__len__", [](ItemsView &view) { return view.map.size(); });
-    items_view.def("__iter__",
-        [](ItemsView &view) {
-            return make_iterator(view.map.begin(), view.map.end());
-        },
-        keep_alive<0, 1>() /* Essential: keep view alive while iterator exists */
-    );
-
     return cl;
 }
 
diff --git a/ext/pybind11/noxfile.py b/ext/pybind11/noxfile.py
index 757a53843a..021ced2453 100644
--- a/ext/pybind11/noxfile.py
+++ b/ext/pybind11/noxfile.py
@@ -1,8 +1,24 @@
+import os
+
 import nox
 
+nox.needs_version = ">=2022.1.7"
 nox.options.sessions = ["lint", "tests", "tests_packaging"]
 
-PYTHON_VERISONS = ["2.7", "3.5", "3.6", "3.7", "3.8", "3.9", "3.10", "3.11"]
+PYTHON_VERSIONS = [
+    "3.6",
+    "3.7",
+    "3.8",
+    "3.9",
+    "3.10",
+    "3.11",
+    "pypy3.7",
+    "pypy3.8",
+    "pypy3.9",
+]
+
+if os.environ.get("CI", None):
+    nox.options.error_on_missing_interpreters = True
 
 
 @nox.session(reuse_venv=True)
@@ -11,10 +27,10 @@ def lint(session: nox.Session) -> None:
     Lint the codebase (except for clang-format/tidy).
     """
     session.install("pre-commit")
-    session.run("pre-commit", "run", "-a")
+    session.run("pre-commit", "run", "-a", *session.posargs)
 
 
-@nox.session(python=PYTHON_VERISONS)
+@nox.session(python=PYTHON_VERSIONS)
 def tests(session: nox.Session) -> None:
     """
     Run the tests (requires a compiler).
@@ -24,14 +40,12 @@ def tests(session: nox.Session) -> None:
     session.install("-r", "tests/requirements.txt")
     session.run(
         "cmake",
-        "-S",
-        ".",
-        "-B",
-        tmpdir,
+        "-S.",
+        f"-B{tmpdir}",
         "-DPYBIND11_WERROR=ON",
         "-DDOWNLOAD_CATCH=ON",
         "-DDOWNLOAD_EIGEN=ON",
-        *session.posargs
+        *session.posargs,
     )
     session.run("cmake", "--build", tmpdir)
     session.run("cmake", "--build", tmpdir, "--config=Release", "--target", "check")
@@ -44,7 +58,7 @@ def tests_packaging(session: nox.Session) -> None:
     """
 
     session.install("-r", "tests/requirements.txt", "--prefer-binary")
-    session.run("pytest", "tests/extra_python_package")
+    session.run("pytest", "tests/extra_python_package", *session.posargs)
 
 
 @nox.session(reuse_venv=True)
@@ -85,5 +99,9 @@ def build(session: nox.Session) -> None:
     """
 
     session.install("build")
-    session.run("python", "-m", "build")
-    session.run("python", "-m", "build", env={"PYBIND11_GLOBAL_SDIST": "1"})
+    session.log("Building normal files")
+    session.run("python", "-m", "build", *session.posargs)
+    session.log("Building pybind11-global files (PYBIND11_GLOBAL_SDIST=1)")
+    session.run(
+        "python", "-m", "build", *session.posargs, env={"PYBIND11_GLOBAL_SDIST": "1"}
+    )
diff --git a/ext/pybind11/pybind11/__init__.py b/ext/pybind11/pybind11/__init__.py
index 64e999ba06..4fbb17079f 100644
--- a/ext/pybind11/pybind11/__init__.py
+++ b/ext/pybind11/pybind11/__init__.py
@@ -1,11 +1,17 @@
-# -*- coding: utf-8 -*-
+import sys
+
+if sys.version_info < (3, 6):
+    msg = "pybind11 does not support Python < 3.6. 2.9 was the last release supporting Python 2.7 and 3.5."
+    raise ImportError(msg)
+
 
 from ._version import __version__, version_info
-from .commands import get_cmake_dir, get_include
+from .commands import get_cmake_dir, get_include, get_pkgconfig_dir
 
 __all__ = (
     "version_info",
     "__version__",
     "get_include",
     "get_cmake_dir",
+    "get_pkgconfig_dir",
 )
diff --git a/ext/pybind11/pybind11/__main__.py b/ext/pybind11/pybind11/__main__.py
index 3235747bed..8c89533843 100644
--- a/ext/pybind11/pybind11/__main__.py
+++ b/ext/pybind11/pybind11/__main__.py
@@ -1,15 +1,13 @@
-# -*- coding: utf-8 -*-
-from __future__ import print_function
+# pylint: disable=missing-function-docstring
 
 import argparse
 import sys
 import sysconfig
 
-from .commands import get_cmake_dir, get_include
+from .commands import get_cmake_dir, get_include, get_pkgconfig_dir
 
 
-def print_includes():
-    # type: () -> None
+def print_includes() -> None:
     dirs = [
         sysconfig.get_path("include"),
         sysconfig.get_path("platinclude"),
@@ -25,8 +23,7 @@ def print_includes():
     print(" ".join("-I" + d for d in unique_dirs))
 
 
-def main():
-    # type: () -> None
+def main() -> None:
 
     parser = argparse.ArgumentParser()
     parser.add_argument(
@@ -39,6 +36,11 @@ def main():
         action="store_true",
         help="Print the CMake module directory, ideal for setting -Dpybind11_ROOT in CMake.",
     )
+    parser.add_argument(
+        "--pkgconfigdir",
+        action="store_true",
+        help="Print the pkgconfig directory, ideal for setting $PKG_CONFIG_PATH.",
+    )
     args = parser.parse_args()
     if not sys.argv[1:]:
         parser.print_help()
@@ -46,6 +48,8 @@ def main():
         print_includes()
     if args.cmakedir:
         print(get_cmake_dir())
+    if args.pkgconfigdir:
+        print(get_pkgconfig_dir())
 
 
 if __name__ == "__main__":
diff --git a/ext/pybind11/pybind11/_version.py b/ext/pybind11/pybind11/_version.py
index 7cc1002869..63078bbe67 100644
--- a/ext/pybind11/pybind11/_version.py
+++ b/ext/pybind11/pybind11/_version.py
@@ -1,12 +1,12 @@
-# -*- coding: utf-8 -*-
+from typing import Union
 
 
-def _to_int(s):
+def _to_int(s: str) -> Union[int, str]:
     try:
         return int(s)
     except ValueError:
         return s
 
 
-__version__ = "2.8.1"
+__version__ = "2.10.3"
 version_info = tuple(_to_int(s) for s in __version__.split("."))
diff --git a/ext/pybind11/pybind11/_version.pyi b/ext/pybind11/pybind11/_version.pyi
deleted file mode 100644
index d45e5dc907..0000000000
--- a/ext/pybind11/pybind11/_version.pyi
+++ /dev/null
@@ -1,6 +0,0 @@
-from typing import Tuple, Union
-
-def _to_int(s: str) -> Union[int, str]: ...
-
-__version__: str
-version_info: Tuple[Union[int, str], ...]
diff --git a/ext/pybind11/pybind11/commands.py b/ext/pybind11/pybind11/commands.py
index 11f81d2d6d..152fa20ce7 100644
--- a/ext/pybind11/pybind11/commands.py
+++ b/ext/pybind11/pybind11/commands.py
@@ -1,21 +1,37 @@
-# -*- coding: utf-8 -*-
 import os
 
 DIR = os.path.abspath(os.path.dirname(__file__))
 
 
-def get_include(user=False):
-    # type: (bool) -> str
+def get_include(user: bool = False) -> str:  # pylint: disable=unused-argument
+    """
+    Return the path to the pybind11 include directory. The historical "user"
+    argument is unused, and may be removed.
+    """
     installed_path = os.path.join(DIR, "include")
     source_path = os.path.join(os.path.dirname(DIR), "include")
     return installed_path if os.path.exists(installed_path) else source_path
 
 
-def get_cmake_dir():
-    # type: () -> str
+def get_cmake_dir() -> str:
+    """
+    Return the path to the pybind11 CMake module directory.
+    """
     cmake_installed_path = os.path.join(DIR, "share", "cmake", "pybind11")
     if os.path.exists(cmake_installed_path):
         return cmake_installed_path
-    else:
-        msg = "pybind11 not installed, installation required to access the CMake files"
-        raise ImportError(msg)
+
+    msg = "pybind11 not installed, installation required to access the CMake files"
+    raise ImportError(msg)
+
+
+def get_pkgconfig_dir() -> str:
+    """
+    Return the path to the pybind11 pkgconfig directory.
+    """
+    pkgconfig_installed_path = os.path.join(DIR, "share", "pkgconfig")
+    if os.path.exists(pkgconfig_installed_path):
+        return pkgconfig_installed_path
+
+    msg = "pybind11 not installed, installation required to access the pkgconfig files"
+    raise ImportError(msg)
diff --git a/ext/pybind11/pybind11/setup_helpers.py b/ext/pybind11/pybind11/setup_helpers.py
index 4ff1a0cb37..1fd04b9154 100644
--- a/ext/pybind11/pybind11/setup_helpers.py
+++ b/ext/pybind11/pybind11/setup_helpers.py
@@ -1,5 +1,3 @@
-# -*- coding: utf-8 -*-
-
 """
 This module provides helpers for C++11+ projects using pybind11.
 
@@ -42,12 +40,27 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 import contextlib
 import os
 import platform
+import shlex
 import shutil
 import sys
 import sysconfig
 import tempfile
 import threading
 import warnings
+from functools import lru_cache
+from pathlib import Path
+from typing import (
+    Any,
+    Callable,
+    Dict,
+    Iterable,
+    Iterator,
+    List,
+    Optional,
+    Tuple,
+    TypeVar,
+    Union,
+)
 
 try:
     from setuptools import Extension as _Extension
@@ -60,7 +73,6 @@ import distutils.ccompiler
 import distutils.errors
 
 WIN = sys.platform.startswith("win32") and "mingw" not in sysconfig.get_platform()
-PY2 = sys.version_info[0] < 3
 MACOS = sys.platform.startswith("darwin")
 STD_TMPL = "/std:c++{}" if WIN else "-std=c++{}"
 
@@ -72,7 +84,7 @@ STD_TMPL = "/std:c++{}" if WIN else "-std=c++{}"
 # directory into your path if it sits beside your setup.py.
 
 
-class Pybind11Extension(_Extension):
+class Pybind11Extension(_Extension):  # type: ignore[misc]
     """
     Build a C++11+ Extension module with pybind11. This automatically adds the
     recommended flags when you init the extension and assumes C++ sources - you
@@ -94,21 +106,18 @@ class Pybind11Extension(_Extension):
 
     If you want to add pybind11 headers manually, for example for an exact
     git checkout, then set ``include_pybind11=False``.
-
-    Warning: do not use property-based access to the instance on Python 2 -
-    this is an ugly old-style class due to Distutils.
     """
 
     # flags are prepended, so that they can be further overridden, e.g. by
     # ``extra_compile_args=["-g"]``.
 
-    def _add_cflags(self, flags):
+    def _add_cflags(self, flags: List[str]) -> None:
         self.extra_compile_args[:0] = flags
 
-    def _add_ldflags(self, flags):
+    def _add_ldflags(self, flags: List[str]) -> None:
         self.extra_link_args[:0] = flags
 
-    def __init__(self, *args, **kwargs):
+    def __init__(self, *args: Any, **kwargs: Any) -> None:
 
         self._cxx_level = 0
         cxx_std = kwargs.pop("cxx_std", 0)
@@ -118,9 +127,7 @@ class Pybind11Extension(_Extension):
 
         include_pybind11 = kwargs.pop("include_pybind11", True)
 
-        # Can't use super here because distutils has old-style classes in
-        # Python 2!
-        _Extension.__init__(self, *args, **kwargs)
+        super().__init__(*args, **kwargs)
 
         # Include the installed package pybind11 headers
         if include_pybind11:
@@ -132,18 +139,22 @@ class Pybind11Extension(_Extension):
 
                 if pyinc not in self.include_dirs:
                     self.include_dirs.append(pyinc)
-            except ImportError:
+            except ModuleNotFoundError:
                 pass
 
-        # Have to use the accessor manually to support Python 2 distutils
-        Pybind11Extension.cxx_std.__set__(self, cxx_std)
+        self.cxx_std = cxx_std
 
         cflags = []
         ldflags = []
         if WIN:
             cflags += ["/EHsc", "/bigobj"]
         else:
-            cflags += ["-fvisibility=hidden", "-g0"]
+            cflags += ["-fvisibility=hidden"]
+            env_cflags = os.environ.get("CFLAGS", "")
+            env_cppflags = os.environ.get("CPPFLAGS", "")
+            c_cpp_flags = shlex.split(env_cflags) + shlex.split(env_cppflags)
+            if not any(opt.startswith("-g") for opt in c_cpp_flags):
+                cflags += ["-g0"]
             if MACOS:
                 cflags += ["-stdlib=libc++"]
                 ldflags += ["-stdlib=libc++"]
@@ -151,18 +162,18 @@ class Pybind11Extension(_Extension):
         self._add_ldflags(ldflags)
 
     @property
-    def cxx_std(self):
+    def cxx_std(self) -> int:
         """
-        The CXX standard level. If set, will add the required flags. If left
-        at 0, it will trigger an automatic search when pybind11's build_ext
-        is used. If None, will have no effect.  Besides just the flags, this
-        may add a register warning/error fix for Python 2 or macos-min 10.9
-        or 10.14.
+        The CXX standard level. If set, will add the required flags. If left at
+        0, it will trigger an automatic search when pybind11's build_ext is
+        used. If None, will have no effect.  Besides just the flags, this may
+        add a macos-min 10.9 or 10.14 flag if MACOSX_DEPLOYMENT_TARGET is
+        unset.
         """
         return self._cxx_level
 
     @cxx_std.setter
-    def cxx_std(self, level):
+    def cxx_std(self, level: int) -> None:
 
         if self._cxx_level:
             warnings.warn("You cannot safely change the cxx_level after setting it!")
@@ -189,31 +200,20 @@ class Pybind11Extension(_Extension):
             current_macos = tuple(int(x) for x in platform.mac_ver()[0].split(".")[:2])
             desired_macos = (10, 9) if level < 17 else (10, 14)
             macos_string = ".".join(str(x) for x in min(current_macos, desired_macos))
-            macosx_min = "-mmacosx-version-min=" + macos_string
+            macosx_min = f"-mmacosx-version-min={macos_string}"
             cflags += [macosx_min]
             ldflags += [macosx_min]
 
-        if PY2:
-            if WIN:
-                # Will be ignored on MSVC 2015, where C++17 is not supported so
-                # this flag is not valid.
-                cflags += ["/wd5033"]
-            elif level >= 17:
-                cflags += ["-Wno-register"]
-            elif level >= 14:
-                cflags += ["-Wno-deprecated-register"]
-
         self._add_cflags(cflags)
         self._add_ldflags(ldflags)
 
 
 # Just in case someone clever tries to multithread
 tmp_chdir_lock = threading.Lock()
-cpp_cache_lock = threading.Lock()
 
 
 @contextlib.contextmanager
-def tmp_chdir():
+def tmp_chdir() -> Iterator[str]:
     "Prepare and enter a temporary directory, cleanup when done"
 
     # Threadsafe
@@ -229,7 +229,7 @@ def tmp_chdir():
 
 
 # cf http://bugs.python.org/issue26689
-def has_flag(compiler, flag):
+def has_flag(compiler: Any, flag: str) -> bool:
     """
     Return the flag if a flag name is supported on the
     specified compiler, otherwise None (can be used as a boolean).
@@ -237,13 +237,12 @@ def has_flag(compiler, flag):
     """
 
     with tmp_chdir():
-        fname = "flagcheck.cpp"
-        with open(fname, "w") as f:
-            # Don't trigger -Wunused-parameter.
-            f.write("int main (int, char **) { return 0; }")
+        fname = Path("flagcheck.cpp")
+        # Don't trigger -Wunused-parameter.
+        fname.write_text("int main (int, char **) { return 0; }", encoding="utf-8")
 
         try:
-            compiler.compile([fname], extra_postargs=[flag])
+            compiler.compile([str(fname)], extra_postargs=[flag])
         except distutils.errors.CompileError:
             return False
         return True
@@ -253,7 +252,8 @@ def has_flag(compiler, flag):
 cpp_flag_cache = None
 
 
-def auto_cpp_level(compiler):
+@lru_cache()
+def auto_cpp_level(compiler: Any) -> Union[str, int]:
     """
     Return the max supported C++ std level (17, 14, or 11). Returns latest on Windows.
     """
@@ -261,48 +261,38 @@ def auto_cpp_level(compiler):
     if WIN:
         return "latest"
 
-    global cpp_flag_cache
-
-    # If this has been previously calculated with the same args, return that
-    with cpp_cache_lock:
-        if cpp_flag_cache:
-            return cpp_flag_cache
-
     levels = [17, 14, 11]
 
     for level in levels:
         if has_flag(compiler, STD_TMPL.format(level)):
-            with cpp_cache_lock:
-                cpp_flag_cache = level
             return level
 
     msg = "Unsupported compiler -- at least C++11 support is needed!"
     raise RuntimeError(msg)
 
 
-class build_ext(_build_ext):  # noqa: N801
+class build_ext(_build_ext):  # type: ignore[misc] # noqa: N801
     """
     Customized build_ext that allows an auto-search for the highest supported
     C++ level for Pybind11Extension. This is only needed for the auto-search
     for now, and is completely optional otherwise.
     """
 
-    def build_extensions(self):
+    def build_extensions(self) -> None:
         """
         Build extensions, injecting C++ std for Pybind11Extension if needed.
         """
 
         for ext in self.extensions:
             if hasattr(ext, "_cxx_level") and ext._cxx_level == 0:
-                # Python 2 syntax - old-style distutils class
-                ext.__class__.cxx_std.__set__(ext, auto_cpp_level(self.compiler))
+                ext.cxx_std = auto_cpp_level(self.compiler)
 
-        # Python 2 doesn't allow super here, since distutils uses old-style
-        # classes!
-        _build_ext.build_extensions(self)
+        super().build_extensions()
 
 
-def intree_extensions(paths, package_dir=None):
+def intree_extensions(
+    paths: Iterable[str], package_dir: Optional[Dict[str, str]] = None
+) -> List[Pybind11Extension]:
     """
     Generate Pybind11Extensions from source files directly located in a Python
     source tree.
@@ -312,33 +302,37 @@ def intree_extensions(paths, package_dir=None):
     not contain an ``__init__.py`` file.
     """
     exts = []
-    for path in paths:
-        if package_dir is None:
+
+    if package_dir is None:
+        for path in paths:
             parent, _ = os.path.split(path)
             while os.path.exists(os.path.join(parent, "__init__.py")):
                 parent, _ = os.path.split(parent)
             relname, _ = os.path.splitext(os.path.relpath(path, parent))
             qualified_name = relname.replace(os.path.sep, ".")
             exts.append(Pybind11Extension(qualified_name, [path]))
+        return exts
+
+    for path in paths:
+        for prefix, parent in package_dir.items():
+            if path.startswith(parent):
+                relname, _ = os.path.splitext(os.path.relpath(path, parent))
+                qualified_name = relname.replace(os.path.sep, ".")
+                if prefix:
+                    qualified_name = prefix + "." + qualified_name
+                exts.append(Pybind11Extension(qualified_name, [path]))
+                break
         else:
-            found = False
-            for prefix, parent in package_dir.items():
-                if path.startswith(parent):
-                    found = True
-                    relname, _ = os.path.splitext(os.path.relpath(path, parent))
-                    qualified_name = relname.replace(os.path.sep, ".")
-                    if prefix:
-                        qualified_name = prefix + "." + qualified_name
-                    exts.append(Pybind11Extension(qualified_name, [path]))
-            if not found:
-                raise ValueError(
-                    "path {} is not a child of any of the directories listed "
-                    "in 'package_dir' ({})".format(path, package_dir)
-                )
+            msg = (
+                f"path {path} is not a child of any of the directories listed "
+                f"in 'package_dir' ({package_dir})"
+            )
+            raise ValueError(msg)
+
     return exts
 
 
-def naive_recompile(obj, src):
+def naive_recompile(obj: str, src: str) -> bool:
     """
     This will recompile only if the source file changes. It does not check
     header files, so a more advanced function or Ccache is better if you have
@@ -347,7 +341,7 @@ def naive_recompile(obj, src):
     return os.stat(obj).st_mtime < os.stat(src).st_mtime
 
 
-def no_recompile(obg, src):
+def no_recompile(obg: str, src: str) -> bool:  # pylint: disable=unused-argument
     """
     This is the safest but slowest choice (and is the default) - will always
     recompile sources.
@@ -355,15 +349,33 @@ def no_recompile(obg, src):
     return True
 
 
+S = TypeVar("S", bound="ParallelCompile")
+
+CCompilerMethod = Callable[
+    [
+        distutils.ccompiler.CCompiler,
+        List[str],
+        Optional[str],
+        Optional[Union[Tuple[str], Tuple[str, Optional[str]]]],
+        Optional[List[str]],
+        bool,
+        Optional[List[str]],
+        Optional[List[str]],
+        Optional[List[str]],
+    ],
+    List[str],
+]
+
+
 # Optional parallel compile utility
 # inspired by: http://stackoverflow.com/questions/11013851/speeding-up-build-process-with-distutils
 # and: https://github.com/tbenthompson/cppimport/blob/stable/cppimport/build_module.py
 # and NumPy's parallel distutils module:
 #              https://github.com/numpy/numpy/blob/master/numpy/distutils/ccompiler.py
-class ParallelCompile(object):
+class ParallelCompile:
     """
     Make a parallel compile function. Inspired by
-    numpy.distutils.ccompiler.CCompiler_compile and cppimport.
+    numpy.distutils.ccompiler.CCompiler.compile and cppimport.
 
     This takes several arguments that allow you to customize the compile
     function created:
@@ -398,35 +410,41 @@ class ParallelCompile(object):
 
     __slots__ = ("envvar", "default", "max", "_old", "needs_recompile")
 
-    def __init__(self, envvar=None, default=0, max=0, needs_recompile=no_recompile):
+    def __init__(
+        self,
+        envvar: Optional[str] = None,
+        default: int = 0,
+        max: int = 0,  # pylint: disable=redefined-builtin
+        needs_recompile: Callable[[str, str], bool] = no_recompile,
+    ) -> None:
         self.envvar = envvar
         self.default = default
         self.max = max
         self.needs_recompile = needs_recompile
-        self._old = []
+        self._old: List[CCompilerMethod] = []
 
-    def function(self):
+    def function(self) -> CCompilerMethod:
         """
         Builds a function object usable as distutils.ccompiler.CCompiler.compile.
         """
 
         def compile_function(
-            compiler,
-            sources,
-            output_dir=None,
-            macros=None,
-            include_dirs=None,
-            debug=0,
-            extra_preargs=None,
-            extra_postargs=None,
-            depends=None,
-        ):
+            compiler: distutils.ccompiler.CCompiler,
+            sources: List[str],
+            output_dir: Optional[str] = None,
+            macros: Optional[Union[Tuple[str], Tuple[str, Optional[str]]]] = None,
+            include_dirs: Optional[List[str]] = None,
+            debug: bool = False,
+            extra_preargs: Optional[List[str]] = None,
+            extra_postargs: Optional[List[str]] = None,
+            depends: Optional[List[str]] = None,
+        ) -> Any:
 
             # These lines are directly from distutils.ccompiler.CCompiler
-            macros, objects, extra_postargs, pp_opts, build = compiler._setup_compile(
+            macros, objects, extra_postargs, pp_opts, build = compiler._setup_compile(  # type: ignore[attr-defined]
                 output_dir, macros, include_dirs, sources, depends, extra_postargs
             )
-            cc_args = compiler._get_cc_args(pp_opts, debug, extra_preargs)
+            cc_args = compiler._get_cc_args(pp_opts, debug, extra_preargs)  # type: ignore[attr-defined]
 
             # The number of threads; start with default.
             threads = self.default
@@ -435,14 +453,14 @@ class ParallelCompile(object):
             if self.envvar is not None:
                 threads = int(os.environ.get(self.envvar, self.default))
 
-            def _single_compile(obj):
+            def _single_compile(obj: Any) -> None:
                 try:
                     src, ext = build[obj]
                 except KeyError:
                     return
 
                 if not os.path.exists(obj) or self.needs_recompile(obj, src):
-                    compiler._compile(obj, src, ext, cc_args, extra_postargs, pp_opts)
+                    compiler._compile(obj, src, ext, cc_args, extra_postargs, pp_opts)  # type: ignore[attr-defined]
 
             try:
                 # Importing .synchronize checks for platforms that have some multiprocessing
@@ -460,8 +478,9 @@ class ParallelCompile(object):
                     threads = 1
 
             if threads > 1:
-                for _ in ThreadPool(threads).imap_unordered(_single_compile, objects):
-                    pass
+                with ThreadPool(threads) as pool:
+                    for _ in pool.imap_unordered(_single_compile, objects):
+                        pass
             else:
                 for ob in objects:
                     _single_compile(ob)
@@ -470,13 +489,16 @@ class ParallelCompile(object):
 
         return compile_function
 
-    def install(self):
-        distutils.ccompiler.CCompiler.compile = self.function()
+    def install(self: S) -> S:
+        """
+        Installs the compile function into distutils.ccompiler.CCompiler.compile.
+        """
+        distutils.ccompiler.CCompiler.compile = self.function()  # type: ignore[assignment]
         return self
 
-    def __enter__(self):
+    def __enter__(self: S) -> S:
         self._old.append(distutils.ccompiler.CCompiler.compile)
         return self.install()
 
-    def __exit__(self, *args):
-        distutils.ccompiler.CCompiler.compile = self._old.pop()
+    def __exit__(self, *args: Any) -> None:
+        distutils.ccompiler.CCompiler.compile = self._old.pop()  # type: ignore[assignment]
diff --git a/ext/pybind11/pybind11/setup_helpers.pyi b/ext/pybind11/pybind11/setup_helpers.pyi
deleted file mode 100644
index 074744eb82..0000000000
--- a/ext/pybind11/pybind11/setup_helpers.pyi
+++ /dev/null
@@ -1,63 +0,0 @@
-# IMPORTANT: Should stay in sync with setup_helpers.py (mostly checked by CI /
-# pre-commit).
-
-import contextlib
-import distutils.ccompiler
-from distutils.command.build_ext import build_ext as _build_ext  # type: ignore
-from distutils.extension import Extension as _Extension
-from types import TracebackType
-from typing import Any, Callable, Dict, Iterator, List, Optional, Type, TypeVar, Union
-
-WIN: bool
-PY2: bool
-MACOS: bool
-STD_TMPL: str
-
-class Pybind11Extension(_Extension):
-    def _add_cflags(self, *flags: str) -> None: ...
-    def _add_lflags(self, *flags: str) -> None: ...
-    def __init__(
-        self, *args: Any, cxx_std: int = 0, language: str = "c++", **kwargs: Any
-    ) -> None: ...
-    @property
-    def cxx_std(self) -> int: ...
-    @cxx_std.setter
-    def cxx_std(self, level: int) -> None: ...
-
-@contextlib.contextmanager
-def tmp_chdir() -> Iterator[str]: ...
-def has_flag(compiler: distutils.ccompiler.CCompiler, flag: str) -> bool: ...
-def auto_cpp_level(compiler: distutils.ccompiler.CCompiler) -> Union[int, str]: ...
-
-class build_ext(_build_ext):  # type: ignore
-    def build_extensions(self) -> None: ...
-
-def intree_extensions(
-    paths: Iterator[str], package_dir: Optional[Dict[str, str]] = None
-) -> List[Pybind11Extension]: ...
-def no_recompile(obj: str, src: str) -> bool: ...
-def naive_recompile(obj: str, src: str) -> bool: ...
-
-T = TypeVar("T", bound="ParallelCompile")
-
-class ParallelCompile:
-    envvar: Optional[str]
-    default: int
-    max: int
-    needs_recompile: Callable[[str, str], bool]
-    def __init__(
-        self,
-        envvar: Optional[str] = None,
-        default: int = 0,
-        max: int = 0,
-        needs_recompile: Callable[[str, str], bool] = no_recompile,
-    ) -> None: ...
-    def function(self) -> Any: ...
-    def install(self: T) -> T: ...
-    def __enter__(self: T) -> T: ...
-    def __exit__(
-        self,
-        exc_type: Optional[Type[BaseException]],
-        exc_value: Optional[BaseException],
-        traceback: Optional[TracebackType],
-    ) -> None: ...
diff --git a/ext/pybind11/pyproject.toml b/ext/pybind11/pyproject.toml
index 7d7a1c8213..3ba1b4b22f 100644
--- a/ext/pybind11/pyproject.toml
+++ b/ext/pybind11/pyproject.toml
@@ -1,5 +1,5 @@
 [build-system]
-requires = ["setuptools>=42", "wheel", "cmake>=3.18", "ninja"]
+requires = ["setuptools>=42", "cmake>=3.18", "ninja"]
 build-backend = "setuptools.build_meta"
 
 [tool.check-manifest]
@@ -22,20 +22,40 @@ known_first_party = "env,pybind11_cross_module_tests,pybind11_tests,"
 profile = "black"
 
 [tool.mypy]
-files = "pybind11"
-python_version = "2.7"
-warn_unused_configs = true
+files = ["pybind11"]
+python_version = "3.6"
+strict = true
+show_error_codes = true
+enable_error_code = ["ignore-without-code", "redundant-expr", "truthy-bool"]
+warn_unreachable = true
 
-disallow_any_generics = true
-disallow_subclassing_any = true
-disallow_untyped_calls = true
-disallow_untyped_defs = true
-disallow_incomplete_defs = true
-check_untyped_defs = true
-disallow_untyped_decorators = true
-no_implicit_optional = true
-warn_redundant_casts = true
-warn_unused_ignores = true
-warn_return_any = true
-no_implicit_reexport = true
-strict_equality = true
+[[tool.mypy.overrides]]
+module = ["ghapi.*", "setuptools.*"]
+ignore_missing_imports = true
+
+
+[tool.pytest.ini_options]
+minversion = "6.0"
+addopts = ["-ra", "--showlocals", "--strict-markers", "--strict-config"]
+xfail_strict = true
+filterwarnings = ["error"]
+log_cli_level = "info"
+testpaths = [
+    "tests",
+]
+timeout=300
+
+
+[tool.pylint]
+master.py-version = "3.6"
+reports.output-format = "colorized"
+messages_control.disable = [
+  "design",
+  "fixme",
+  "imports",
+  "line-too-long",
+  "imports",
+  "invalid-name",
+  "protected-access",
+  "missing-module-docstring",
+]
diff --git a/ext/pybind11/setup.cfg b/ext/pybind11/setup.cfg
index 95963d2f87..8b3361981a 100644
--- a/ext/pybind11/setup.cfg
+++ b/ext/pybind11/setup.cfg
@@ -13,14 +13,13 @@ classifiers =
     Topic :: Software Development :: Libraries :: Python Modules
     Topic :: Utilities
     Programming Language :: C++
-    Programming Language :: Python :: 2.7
-    Programming Language :: Python :: 3
-    Programming Language :: Python :: 3.5
+    Programming Language :: Python :: 3 :: Only
     Programming Language :: Python :: 3.6
     Programming Language :: Python :: 3.7
     Programming Language :: Python :: 3.8
     Programming Language :: Python :: 3.9
     Programming Language :: Python :: 3.10
+    Programming Language :: Python :: 3.11
     License :: OSI Approved :: BSD License
     Programming Language :: Python :: Implementation :: PyPy
     Programming Language :: Python :: Implementation :: CPython
@@ -31,26 +30,21 @@ keywords =
     C++11
     Python bindings
 
-[options]
-python_requires = >=2.7, !=3.0, !=3.1, !=3.2, !=3.3, !=3.4
-zip_safe = False
+project_urls =
+    Documentation = https://pybind11.readthedocs.io/
+    Bug Tracker = https://github.com/pybind/pybind11/issues
+    Discussions = https://github.com/pybind/pybind11/discussions
+    Changelog = https://pybind11.readthedocs.io/en/latest/changelog.html
+    Chat = https://gitter.im/pybind/Lobby
 
-[bdist_wheel]
-universal=1
+[options]
+python_requires = >=3.6
+zip_safe = False
 
 
 [flake8]
-max-line-length = 99
+max-line-length = 120
 show_source = True
 exclude = .git, __pycache__, build, dist, docs, tools, venv
-ignore =
-    # required for pretty matrix formatting: multiple spaces after `,` and `[`
-    E201, E241, W504,
-    # camelcase 'cPickle' imported as lowercase 'pickle'
-    N813
-    # Black conflict
-    W503, E203
-
-
-[tool:pytest]
-timeout = 300
+extend-ignore = E203, E722
+extend-select = B902, B904
diff --git a/ext/pybind11/setup.py b/ext/pybind11/setup.py
index a2326287d8..68573519c1 100644
--- a/ext/pybind11/setup.py
+++ b/ext/pybind11/setup.py
@@ -1,53 +1,50 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
+#!/usr/bin/env python3
 
 # Setup script for PyPI; use CMakeFile.txt to build extension modules
 
 import contextlib
-import io
 import os
 import re
 import shutil
 import string
 import subprocess
 import sys
-import tempfile
+from pathlib import Path
+from tempfile import TemporaryDirectory
+from typing import Dict, Iterator, List, Union
 
 import setuptools.command.sdist
 
-DIR = os.path.abspath(os.path.dirname(__file__))
+DIR = Path(__file__).parent.absolute()
 VERSION_REGEX = re.compile(
     r"^\s*#\s*define\s+PYBIND11_VERSION_([A-Z]+)\s+(.*)$", re.MULTILINE
 )
+VERSION_FILE = Path("pybind11/_version.py")
+COMMON_FILE = Path("include/pybind11/detail/common.h")
 
 
-def build_expected_version_hex(matches):
+def build_expected_version_hex(matches: Dict[str, str]) -> str:
     patch_level_serial = matches["PATCH"]
     serial = None
-    try:
-        major = int(matches["MAJOR"])
-        minor = int(matches["MINOR"])
-        flds = patch_level_serial.split(".")
-        if flds:
-            patch = int(flds[0])
-            level = None
-            if len(flds) == 1:
-                level = "0"
-                serial = 0
-            elif len(flds) == 2:
-                level_serial = flds[1]
-                for level in ("a", "b", "c", "dev"):
-                    if level_serial.startswith(level):
-                        serial = int(level_serial[len(level) :])
-                        break
-    except ValueError:
-        pass
+    major = int(matches["MAJOR"])
+    minor = int(matches["MINOR"])
+    flds = patch_level_serial.split(".")
+    if flds:
+        patch = int(flds[0])
+        if len(flds) == 1:
+            level = "0"
+            serial = 0
+        elif len(flds) == 2:
+            level_serial = flds[1]
+            for level in ("a", "b", "c", "dev"):
+                if level_serial.startswith(level):
+                    serial = int(level_serial[len(level) :])
+                    break
     if serial is None:
-        msg = 'Invalid PYBIND11_VERSION_PATCH: "{}"'.format(patch_level_serial)
+        msg = f'Invalid PYBIND11_VERSION_PATCH: "{patch_level_serial}"'
         raise RuntimeError(msg)
-    return "0x{:02x}{:02x}{:02x}{}{:x}".format(
-        major, minor, patch, level[:1].upper(), serial
-    )
+    version_hex_str = f"{major:02x}{minor:02x}{patch:02x}{level[:1]}{serial:x}"
+    return f"0x{version_hex_str.upper()}"
 
 
 # PYBIND11_GLOBAL_SDIST will build a different sdist, with the python-headers
@@ -55,82 +52,67 @@ def build_expected_version_hex(matches):
 
 global_sdist = os.environ.get("PYBIND11_GLOBAL_SDIST", False)
 
-setup_py = "tools/setup_global.py.in" if global_sdist else "tools/setup_main.py.in"
+setup_py = Path(
+    "tools/setup_global.py.in" if global_sdist else "tools/setup_main.py.in"
+)
 extra_cmd = 'cmdclass["sdist"] = SDist\n'
 
 to_src = (
-    ("pyproject.toml", "tools/pyproject.toml"),
-    ("setup.py", setup_py),
+    (Path("pyproject.toml"), Path("tools/pyproject.toml")),
+    (Path("setup.py"), setup_py),
 )
 
+
 # Read the listed version
-with open("pybind11/_version.py") as f:
-    code = compile(f.read(), "pybind11/_version.py", "exec")
-loc = {}
+loc: Dict[str, str] = {}
+code = compile(VERSION_FILE.read_text(encoding="utf-8"), "pybind11/_version.py", "exec")
 exec(code, loc)
 version = loc["__version__"]
 
 # Verify that the version matches the one in C++
-with io.open("include/pybind11/detail/common.h", encoding="utf8") as f:
-    matches = dict(VERSION_REGEX.findall(f.read()))
+matches = dict(VERSION_REGEX.findall(COMMON_FILE.read_text(encoding="utf8")))
 cpp_version = "{MAJOR}.{MINOR}.{PATCH}".format(**matches)
 if version != cpp_version:
-    msg = "Python version {} does not match C++ version {}!".format(
-        version, cpp_version
-    )
+    msg = f"Python version {version} does not match C++ version {cpp_version}!"
     raise RuntimeError(msg)
 
 version_hex = matches.get("HEX", "MISSING")
-expected_version_hex = build_expected_version_hex(matches)
-if version_hex != expected_version_hex:
-    msg = "PYBIND11_VERSION_HEX {} does not match expected value {}!".format(
-        version_hex,
-        expected_version_hex,
-    )
+exp_version_hex = build_expected_version_hex(matches)
+if version_hex != exp_version_hex:
+    msg = f"PYBIND11_VERSION_HEX {version_hex} does not match expected value {exp_version_hex}!"
     raise RuntimeError(msg)
 
 
-def get_and_replace(filename, binary=False, **opts):
-    with open(filename, "rb" if binary else "r") as f:
-        contents = f.read()
-    # Replacement has to be done on text in Python 3 (both work in Python 2)
+# TODO: use literals & overload (typing extensions or Python 3.8)
+def get_and_replace(
+    filename: Path, binary: bool = False, **opts: str
+) -> Union[bytes, str]:
     if binary:
+        contents = filename.read_bytes()
         return string.Template(contents.decode()).substitute(opts).encode()
-    else:
-        return string.Template(contents).substitute(opts)
+
+    return string.Template(filename.read_text()).substitute(opts)
 
 
 # Use our input files instead when making the SDist (and anything that depends
 # on it, like a wheel)
-class SDist(setuptools.command.sdist.sdist):
-    def make_release_tree(self, base_dir, files):
-        setuptools.command.sdist.sdist.make_release_tree(self, base_dir, files)
+class SDist(setuptools.command.sdist.sdist):  # type: ignore[misc]
+    def make_release_tree(self, base_dir: str, files: List[str]) -> None:
+        super().make_release_tree(base_dir, files)
 
         for to, src in to_src:
             txt = get_and_replace(src, binary=True, version=version, extra_cmd="")
 
-            dest = os.path.join(base_dir, to)
+            dest = Path(base_dir) / to
 
             # This is normally linked, so unlink before writing!
-            os.unlink(dest)
-            with open(dest, "wb") as f:
-                f.write(txt)
-
-
-# Backport from Python 3
-@contextlib.contextmanager
-def TemporaryDirectory():  # noqa: N802
-    "Prepare a temporary directory, cleanup when done"
-    try:
-        tmpdir = tempfile.mkdtemp()
-        yield tmpdir
-    finally:
-        shutil.rmtree(tmpdir)
+            dest.unlink()
+            dest.write_bytes(txt)  # type: ignore[arg-type]
 
 
 # Remove the CMake install directory when done
 @contextlib.contextmanager
-def remove_output(*sources):
+def remove_output(*sources: str) -> Iterator[None]:
     try:
         yield
     finally:
@@ -145,10 +127,23 @@ with remove_output("pybind11/include", "pybind11/share"):
             "-DCMAKE_INSTALL_PREFIX=pybind11",
             "-DBUILD_TESTING=OFF",
             "-DPYBIND11_NOPYTHON=ON",
+            "-Dprefix_for_pc_file=${pcfiledir}/../../",
         ]
-        cmake_opts = dict(cwd=DIR, stdout=sys.stdout, stderr=sys.stderr)
-        subprocess.check_call(cmd, **cmake_opts)
-        subprocess.check_call(["cmake", "--install", tmpdir], **cmake_opts)
+        if "CMAKE_ARGS" in os.environ:
+            fcommand = [
+                c
+                for c in os.environ["CMAKE_ARGS"].split()
+                if "DCMAKE_INSTALL_PREFIX" not in c
+            ]
+            cmd += fcommand
+        subprocess.run(cmd, check=True, cwd=DIR, stdout=sys.stdout, stderr=sys.stderr)
+        subprocess.run(
+            ["cmake", "--install", tmpdir],
+            check=True,
+            cwd=DIR,
+            stdout=sys.stdout,
+            stderr=sys.stderr,
+        )
 
     txt = get_and_replace(setup_py, version=version, extra_cmd=extra_cmd)
     code = compile(txt, setup_py, "exec")
diff --git a/ext/pybind11/tests/CMakeLists.txt b/ext/pybind11/tests/CMakeLists.txt
index 6dc67a11a3..9beb268ed7 100644
--- a/ext/pybind11/tests/CMakeLists.txt
+++ b/ext/pybind11/tests/CMakeLists.txt
@@ -19,7 +19,7 @@ endif()
 # Only needed for CMake < 3.5 support
 include(CMakeParseArguments)
 
-# Filter out items; print an optional message if any items filtered
+# Filter out items; print an optional message if any items filtered. This ignores extensions.
 #
 # Usage:
 #   pybind11_filter_tests(LISTNAME file1.cpp file2.cpp ... MESSAGE "")
@@ -27,10 +27,17 @@ include(CMakeParseArguments)
 macro(pybind11_filter_tests LISTNAME)
   cmake_parse_arguments(ARG "" "MESSAGE" "" ${ARGN})
   set(PYBIND11_FILTER_TESTS_FOUND OFF)
+  # Make a list of the test without any extensions, for easier filtering.
+  set(_TMP_ACTUAL_LIST "${${LISTNAME}};") # enforce ';' at the end to allow matching last item.
+  string(REGEX REPLACE "\\.[^.;]*;" ";" LIST_WITHOUT_EXTENSIONS "${_TMP_ACTUAL_LIST}")
   foreach(filename IN LISTS ARG_UNPARSED_ARGUMENTS)
-    list(FIND ${LISTNAME} ${filename} _FILE_FOUND)
+    string(REGEX REPLACE "\\.[^.]*$" "" filename_no_ext ${filename})
+    # Search in the list without extensions.
+    list(FIND LIST_WITHOUT_EXTENSIONS ${filename_no_ext} _FILE_FOUND)
     if(_FILE_FOUND GREATER -1)
-      list(REMOVE_AT ${LISTNAME} ${_FILE_FOUND})
+      list(REMOVE_AT ${LISTNAME} ${_FILE_FOUND}) # And remove from the list with extensions.
+      list(REMOVE_AT LIST_WITHOUT_EXTENSIONS ${_FILE_FOUND}
+      )# And our search list, to ensure it is in sync.
       set(PYBIND11_FILTER_TESTS_FOUND ON)
     endif()
   endforeach()
@@ -47,6 +54,18 @@ macro(possibly_uninitialized)
   endforeach()
 endmacro()
 
+# Function to add additional targets if any of the provided tests are found.
+# Needles; Specifies the test names to look for.
+# Additions; Specifies the additional test targets to add when any of the needles are found.
+macro(tests_extra_targets needles additions)
+  # Add the index for this relation to the index extra targets map.
+  list(LENGTH PYBIND11_TEST_EXTRA_TARGETS PYBIND11_TEST_EXTRA_TARGETS_LEN)
+  list(APPEND PYBIND11_TEST_EXTRA_TARGETS ${PYBIND11_TEST_EXTRA_TARGETS_LEN})
+  # Add the test names to look for, and the associated test target additions.
+  set(PYBIND11_TEST_EXTRA_TARGETS_NEEDLES_${PYBIND11_TEST_EXTRA_TARGETS_LEN} ${needles})
+  set(PYBIND11_TEST_EXTRA_TARGETS_ADDITION_${PYBIND11_TEST_EXTRA_TARGETS_LEN} ${additions})
+endmacro()
+
 # New Python support
 if(DEFINED Python_EXECUTABLE)
   set(PYTHON_EXECUTABLE "${Python_EXECUTABLE}")
@@ -92,54 +111,68 @@ if(PYBIND11_CUDA_TESTS)
   set(CMAKE_CUDA_STANDARD_REQUIRED ON)
 endif()
 
-# Full set of test files (you can override these; see below)
+# Full set of test files (you can override these; see below, overrides ignore extension)
+# Any test that has no extension is both .py and .cpp, so 'foo' will add 'foo.cpp' and 'foo.py'.
+# Any test that has an extension is exclusively that and handled as such.
 set(PYBIND11_TEST_FILES
-    test_async.cpp
-    test_buffers.cpp
-    test_builtin_casters.cpp
-    test_call_policies.cpp
-    test_callbacks.cpp
-    test_chrono.cpp
-    test_class.cpp
-    test_constants_and_functions.cpp
-    test_copy_move.cpp
-    test_custom_type_casters.cpp
-    test_custom_type_setup.cpp
-    test_docstring_options.cpp
-    test_eigen.cpp
-    test_enum.cpp
-    test_eval.cpp
-    test_exceptions.cpp
-    test_factory_constructors.cpp
-    test_gil_scoped.cpp
-    test_iostream.cpp
-    test_kwargs_and_defaults.cpp
-    test_local_bindings.cpp
-    test_methods_and_attributes.cpp
-    test_modules.cpp
-    test_multiple_inheritance.cpp
-    test_numpy_array.cpp
-    test_numpy_dtypes.cpp
-    test_numpy_vectorize.cpp
-    test_opaque_types.cpp
-    test_operator_overloading.cpp
-    test_pickling.cpp
-    test_pytypes.cpp
-    test_sequences_and_iterators.cpp
-    test_smart_ptr.cpp
-    test_stl.cpp
-    test_stl_binders.cpp
-    test_tagbased_polymorphic.cpp
-    test_thread.cpp
-    test_union.cpp
-    test_virtual_functions.cpp)
+    test_async
+    test_buffers
+    test_builtin_casters
+    test_call_policies
+    test_callbacks
+    test_chrono
+    test_class
+    test_const_name
+    test_constants_and_functions
+    test_copy_move
+    test_custom_type_casters
+    test_custom_type_setup
+    test_docstring_options
+    test_eigen_matrix
+    test_eigen_tensor
+    test_enum
+    test_eval
+    test_exceptions
+    test_factory_constructors
+    test_gil_scoped
+    test_iostream
+    test_kwargs_and_defaults
+    test_local_bindings
+    test_methods_and_attributes
+    test_modules
+    test_multiple_inheritance
+    test_numpy_array
+    test_numpy_dtypes
+    test_numpy_vectorize
+    test_opaque_types
+    test_operator_overloading
+    test_pickling
+    test_pytypes
+    test_sequences_and_iterators
+    test_smart_ptr
+    test_stl
+    test_stl_binders
+    test_tagbased_polymorphic
+    test_thread
+    test_union
+    test_virtual_functions)
 
 # Invoking cmake with something like:
 #     cmake -DPYBIND11_TEST_OVERRIDE="test_callbacks.cpp;test_pickling.cpp" ..
 # lets you override the tests that get compiled and run.  You can restore to all tests with:
 #     cmake -DPYBIND11_TEST_OVERRIDE= ..
 if(PYBIND11_TEST_OVERRIDE)
-  set(PYBIND11_TEST_FILES ${PYBIND11_TEST_OVERRIDE})
+  # Instead of doing a direct override here, we iterate over the overrides without extension and
+  # match them against entries from the PYBIND11_TEST_FILES, anything that not matches goes into the filter list.
+  string(REGEX REPLACE "\\.[^.;]*;" ";" TEST_OVERRIDE_NO_EXT "${PYBIND11_TEST_OVERRIDE};")
+  string(REGEX REPLACE "\\.[^.;]*;" ";" TEST_FILES_NO_EXT "${PYBIND11_TEST_FILES};")
+  # This allows the override to be done with extensions, preserving backwards compatibility.
+  foreach(test_name ${TEST_FILES_NO_EXT})
+    if(NOT ${test_name} IN_LIST TEST_OVERRIDE_NO_EXT
+    )# If not in the whitelist, add to be filtered out.
+      list(APPEND PYBIND11_TEST_FILTER ${test_name})
+    endif()
+  endforeach()
 endif()
 
 # You can also filter tests:
@@ -147,11 +180,6 @@ if(PYBIND11_TEST_FILTER)
   pybind11_filter_tests(PYBIND11_TEST_FILES ${PYBIND11_TEST_FILTER})
 endif()
 
-if(PYTHON_VERSION VERSION_LESS 3.5)
-  pybind11_filter_tests(PYBIND11_TEST_FILES test_async.cpp MESSAGE
-                        "Skipping test_async on Python 2")
-endif()
-
 # Skip tests for CUDA check:
 # /pybind11/tests/test_constants_and_functions.cpp(125):
 #   error: incompatible exception specifications
@@ -161,15 +189,35 @@ if(PYBIND11_CUDA_TESTS)
     "Skipping test_constants_and_functions due to incompatible exception specifications")
 endif()
 
-string(REPLACE ".cpp" ".py" PYBIND11_PYTEST_FILES "${PYBIND11_TEST_FILES}")
+# Now that the test filtering is complete, we need to split the list into the test for PYTEST
+# and the list for the cpp targets.
+set(PYBIND11_CPPTEST_FILES "")
+set(PYBIND11_PYTEST_FILES "")
+
+foreach(test_name ${PYBIND11_TEST_FILES})
+  if(test_name MATCHES "\\.py$") # Ends in .py, purely python test.
+    list(APPEND PYBIND11_PYTEST_FILES ${test_name})
+  elseif(test_name MATCHES "\\.cpp$") # Ends in .cpp, purely cpp test.
+    list(APPEND PYBIND11_CPPTEST_FILES ${test_name})
+  elseif(NOT test_name MATCHES "\\.") # No extension specified, assume both, add extension.
+    list(APPEND PYBIND11_PYTEST_FILES ${test_name}.py)
+    list(APPEND PYBIND11_CPPTEST_FILES ${test_name}.cpp)
+  else()
+    message(WARNING "Unhanded test extension in test: ${test_name}")
+  endif()
+endforeach()
+set(PYBIND11_TEST_FILES ${PYBIND11_CPPTEST_FILES})
+list(SORT PYBIND11_PYTEST_FILES)
 
 # Contains the set of test files that require pybind11_cross_module_tests to be
 # built; if none of these are built (i.e. because TEST_OVERRIDE is used and
 # doesn't include them) the second module doesn't get built.
-set(PYBIND11_CROSS_MODULE_TESTS test_exceptions.py test_local_bindings.py test_stl.py
-                                test_stl_binders.py)
+tests_extra_targets("test_exceptions.py;test_local_bindings.py;test_stl.py;test_stl_binders.py"
+                    "pybind11_cross_module_tests")
 
-set(PYBIND11_CROSS_MODULE_GIL_TESTS test_gil_scoped.py)
+# And add additional targets for other tests.
+tests_extra_targets("test_exceptions.py" "cross_module_interleaved_error_already_set")
+tests_extra_targets("test_gil_scoped.py" "cross_module_gil_utils")
 
 set(PYBIND11_EIGEN_REPO
     "https://gitlab.com/libeigen/eigen.git"
@@ -186,7 +234,10 @@ list(GET PYBIND11_EIGEN_VERSION_AND_HASH 1 PYBIND11_EIGEN_VERSION_HASH)
 # Check if Eigen is available; if not, remove from PYBIND11_TEST_FILES (but
 # keep it in PYBIND11_PYTEST_FILES, so that we get the "eigen is not installed"
 # skip message).
-list(FIND PYBIND11_TEST_FILES test_eigen.cpp PYBIND11_TEST_FILES_EIGEN_I)
+list(FIND PYBIND11_TEST_FILES test_eigen_matrix.cpp PYBIND11_TEST_FILES_EIGEN_I)
+if(PYBIND11_TEST_FILES_EIGEN_I EQUAL -1)
+  list(FIND PYBIND11_TEST_FILES test_eigen_tensor.cpp PYBIND11_TEST_FILES_EIGEN_I)
+endif()
 if(PYBIND11_TEST_FILES_EIGEN_I GREATER -1)
   # Try loading via newer Eigen's Eigen3Config first (bypassing tools/FindEigen3.cmake).
   # Eigen 3.3.1+ exports a cmake 3.0+ target for handling dependency requirements, but also
@@ -241,13 +292,34 @@ if(PYBIND11_TEST_FILES_EIGEN_I GREATER -1)
       set(EIGEN3_VERSION ${EIGEN3_VERSION_STRING})
     endif()
     message(STATUS "Building tests with Eigen v${EIGEN3_VERSION}")
+
+    if(NOT (CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 5.0))
+      tests_extra_targets("test_eigen_tensor.py" "eigen_tensor_avoid_stl_array")
+    endif()
+
   else()
-    list(REMOVE_AT PYBIND11_TEST_FILES ${PYBIND11_TEST_FILES_EIGEN_I})
+    list(FIND PYBIND11_TEST_FILES test_eigen_matrix.cpp PYBIND11_TEST_FILES_EIGEN_I)
+    if(PYBIND11_TEST_FILES_EIGEN_I GREATER -1)
+      list(REMOVE_AT PYBIND11_TEST_FILES ${PYBIND11_TEST_FILES_EIGEN_I})
+    endif()
+
+    list(FIND PYBIND11_TEST_FILES test_eigen_tensor.cpp PYBIND11_TEST_FILES_EIGEN_I)
+    if(PYBIND11_TEST_FILES_EIGEN_I GREATER -1)
+      list(REMOVE_AT PYBIND11_TEST_FILES ${PYBIND11_TEST_FILES_EIGEN_I})
+    endif()
     message(
       STATUS "Building tests WITHOUT Eigen, use -DDOWNLOAD_EIGEN=ON on CMake 3.11+ to download")
   endif()
 endif()
 
+# Some code doesn't support gcc 4
+if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 5.0)
+  list(FIND PYBIND11_TEST_FILES test_eigen_tensor.cpp PYBIND11_TEST_FILES_EIGEN_I)
+  if(PYBIND11_TEST_FILES_EIGEN_I GREATER -1)
+    list(REMOVE_AT PYBIND11_TEST_FILES ${PYBIND11_TEST_FILES_EIGEN_I})
+  endif()
+endif()
+
 # Optional dependency for some tests (boost::variant is only supported with version >= 1.56)
 find_package(Boost 1.56)
 
@@ -305,7 +377,7 @@ endif()
 # Compile with compiler warnings turned on
 function(pybind11_enable_warnings target_name)
   if(MSVC)
-    target_compile_options(${target_name} PRIVATE /W4)
+    target_compile_options(${target_name} PRIVATE /W4 /wd4189)
   elseif(CMAKE_CXX_COMPILER_ID MATCHES "(GNU|Intel|Clang)" AND NOT PYBIND11_CUDA_TESTS)
     target_compile_options(
       ${target_name}
@@ -326,6 +398,9 @@ function(pybind11_enable_warnings target_name)
     elseif(CMAKE_CXX_COMPILER_ID MATCHES "(GNU|Clang|IntelLLVM)")
       target_compile_options(${target_name} PRIVATE -Werror)
     elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
+      if(CMAKE_CXX_STANDARD EQUAL 17) # See PR #3570
+        target_compile_options(${target_name} PRIVATE -Wno-conversion)
+      endif()
       target_compile_options(
         ${target_name}
         PRIVATE
@@ -334,36 +409,21 @@ function(pybind11_enable_warnings target_name)
           -diag-disable 11074,11076)
     endif()
   endif()
-
-  # Needs to be re-added since the ordering requires these to be after the ones above
-  if(CMAKE_CXX_STANDARD
-     AND CMAKE_CXX_COMPILER_ID MATCHES "Clang"
-     AND PYTHON_VERSION VERSION_LESS 3.0)
-    if(CMAKE_CXX_STANDARD LESS 17)
-      target_compile_options(${target_name} PUBLIC -Wno-deprecated-register)
-    else()
-      target_compile_options(${target_name} PUBLIC -Wno-register)
-    endif()
-  endif()
 endfunction()
 
 set(test_targets pybind11_tests)
 
-# Build pybind11_cross_module_tests if any test_whatever.py are being built that require it
-foreach(t ${PYBIND11_CROSS_MODULE_TESTS})
-  list(FIND PYBIND11_PYTEST_FILES ${t} i)
-  if(i GREATER -1)
-    list(APPEND test_targets pybind11_cross_module_tests)
-    break()
-  endif()
-endforeach()
-
-foreach(t ${PYBIND11_CROSS_MODULE_GIL_TESTS})
-  list(FIND PYBIND11_PYTEST_FILES ${t} i)
-  if(i GREATER -1)
-    list(APPEND test_targets cross_module_gil_utils)
-    break()
-  endif()
+# Check if any tests need extra targets by iterating through the mappings registered.
+foreach(i ${PYBIND11_TEST_EXTRA_TARGETS})
+  foreach(needle ${PYBIND11_TEST_EXTRA_TARGETS_NEEDLES_${i}})
+    if(needle IN_LIST PYBIND11_PYTEST_FILES)
+      # Add all the additional targets to the test list. List join in newer cmake.
+      foreach(extra_target ${PYBIND11_TEST_EXTRA_TARGETS_ADDITION_${i}})
+        list(APPEND test_targets ${extra_target})
+      endforeach()
+      break() # Breaks out of the needle search, continues with the next mapping.
+    endif()
+  endforeach()
 endforeach()
 
 # Support CUDA testing by forcing the target file to compile with NVCC
@@ -429,6 +489,14 @@ foreach(target ${test_targets})
   endif()
 endforeach()
 
+# Provide nice organisation in IDEs
+if(NOT CMAKE_VERSION VERSION_LESS 3.8)
+  source_group(
+    TREE "${CMAKE_CURRENT_SOURCE_DIR}/../include"
+    PREFIX "Header Files"
+    FILES ${PYBIND11_HEADERS})
+endif()
+
 # Make sure pytest is found or produce a warning
 pybind11_find_import(pytest VERSION 3.1)
 
diff --git a/ext/pybind11/tests/conftest.py b/ext/pybind11/tests/conftest.py
index 362eb80691..402fd4b25b 100644
--- a/ext/pybind11/tests/conftest.py
+++ b/ext/pybind11/tests/conftest.py
@@ -1,31 +1,43 @@
-# -*- coding: utf-8 -*-
 """pytest configuration
 
 Extends output capture as needed by pybind11: ignore constructors, optional unordered lines.
-Adds docstring and exceptions message sanitizers: ignore Python 2 vs 3 differences.
+Adds docstring and exceptions message sanitizers.
 """
 
 import contextlib
 import difflib
 import gc
+import multiprocessing
+import os
 import re
 import textwrap
 
 import pytest
 
-import env
-
 # Early diagnostic for failed imports
-import pybind11_tests  # noqa: F401
+import pybind11_tests
+
+
+@pytest.fixture(scope="session", autouse=True)
+def always_forkserver_on_unix():
+    if os.name == "nt":
+        return
+
+    # Full background: https://github.com/pybind/pybind11/issues/4105#issuecomment-1301004592
+    # In a nutshell: fork() after starting threads == flakiness in the form of deadlocks.
+    # It is actually a well-known pitfall, unfortunately without guard rails.
+    # "forkserver" is more performant than "spawn" (~9s vs ~13s for tests/test_gil_scoped.py,
+    # visit the issuecomment link above for details).
+    # Windows does not have fork() and the associated pitfall, therefore it is best left
+    # running with defaults.
+    multiprocessing.set_start_method("forkserver")
+
 
-_unicode_marker = re.compile(r"u(\'[^\']*\')")
 _long_marker = re.compile(r"([0-9])L")
 _hexadecimal = re.compile(r"0x[0-9a-fA-F]+")
 
 # Avoid collecting Python3 only files
 collect_ignore = []
-if env.PY2:
-    collect_ignore.append("test_async.py")
 
 
 def _strip_and_dedent(s):
@@ -45,7 +57,7 @@ def _make_explanation(a, b):
     ]
 
 
-class Output(object):
+class Output:
     """Basic output post-processing and comparison"""
 
     def __init__(self, string):
@@ -83,7 +95,7 @@ class Unordered(Output):
             return False
 
 
-class Capture(object):
+class Capture:
     def __init__(self, capfd):
         self.capfd = capfd
         self.out = ""
@@ -126,7 +138,7 @@ def capture(capsys):
     return Capture(capsys)
 
 
-class SanitizedString(object):
+class SanitizedString:
     def __init__(self, sanitizer):
         self.sanitizer = sanitizer
         self.string = ""
@@ -149,9 +161,7 @@ class SanitizedString(object):
 def _sanitize_general(s):
     s = s.strip()
     s = s.replace("pybind11_tests.", "m.")
-    s = s.replace("unicode", "str")
     s = _long_marker.sub(r"\1", s)
-    s = _unicode_marker.sub(r"\1", s)
     return s
 
 
@@ -206,3 +216,17 @@ def gc_collect():
 def pytest_configure():
     pytest.suppress = suppress
     pytest.gc_collect = gc_collect
+
+
+def pytest_report_header(config):
+    del config  # Unused.
+    assert (
+        pybind11_tests.compiler_info is not None
+    ), "Please update pybind11_tests.cpp if this assert fails."
+    return (
+        "C++ Info:"
+        f" {pybind11_tests.compiler_info}"
+        f" {pybind11_tests.cpp_std}"
+        f" {pybind11_tests.PYBIND11_INTERNALS_ID}"
+        f" PYBIND11_SIMPLE_GIL_MANAGEMENT={pybind11_tests.PYBIND11_SIMPLE_GIL_MANAGEMENT}"
+    )
diff --git a/ext/pybind11/tests/constructor_stats.h b/ext/pybind11/tests/constructor_stats.h
index 805968a09b..937f6c233b 100644
--- a/ext/pybind11/tests/constructor_stats.h
+++ b/ext/pybind11/tests/constructor_stats.h
@@ -56,7 +56,8 @@ from the ConstructorStats instance `.values()` method.
 In some cases, when you need to track instances of a C++ class not registered with pybind11, you
 need to add a function returning the ConstructorStats for the C++ class; this can be done with:
 
-    m.def("get_special_cstats", &ConstructorStats::get<SpecialClass>, py::return_value_policy::reference)
+    m.def("get_special_cstats", &ConstructorStats::get<SpecialClass>,
+py::return_value_policy::reference)
 
 Finally, you can suppress the output messages, but keep the constructor tracking (for
 inspection/testing in python) by using the functions with `print_` replaced with `track_` (e.g.
@@ -65,15 +66,18 @@ inspection/testing in python) by using the functions with `print_` replaced with
 */
 
 #include "pybind11_tests.h"
-#include <unordered_map>
+
 #include <list>
-#include <typeindex>
 #include <sstream>
+#include <typeindex>
+#include <unordered_map>
 
 class ConstructorStats {
 protected:
-    std::unordered_map<void*, int> _instances; // Need a map rather than set because members can shared address with parents
-    std::list<std::string> _values; // Used to track values (e.g. of value constructors)
+    std::unordered_map<void *, int> _instances; // Need a map rather than set because members can
+                                                // shared address with parents
+    std::list<std::string> _values;             // Used to track values
+                                                // (e.g. of value constructors)
 public:
     int default_constructions = 0;
     int copy_constructions = 0;
@@ -96,26 +100,26 @@ public:
         default_constructions++;
     }
 
-    void created(void *inst) {
-        ++_instances[inst];
-    }
+    void created(void *inst) { ++_instances[inst]; }
 
     void destroyed(void *inst) {
-        if (--_instances[inst] < 0)
+        if (--_instances[inst] < 0) {
             throw std::runtime_error("cstats.destroyed() called with unknown "
                                      "instance; potential double-destruction "
                                      "or a missing cstats.created()");
+        }
     }
 
     static void gc() {
         // Force garbage collection to ensure any pending destructors are invoked:
 #if defined(PYPY_VERSION)
         PyObject *globals = PyEval_GetGlobals();
-        PyObject *result = PyRun_String(
-            "import gc\n"
-            "for i in range(2):"
-            "    gc.collect()\n",
-            Py_file_input, globals, globals);
+        PyObject *result = PyRun_String("import gc\n"
+                                        "for i in range(2):\n"
+                                        "    gc.collect()\n",
+                                        Py_file_input,
+                                        globals,
+                                        globals);
         if (result == nullptr)
             throw py::error_already_set();
         Py_DECREF(result);
@@ -127,15 +131,18 @@ public:
     int alive() {
         gc();
         int total = 0;
-        for (const auto &p : _instances)
-            if (p.second > 0)
+        for (const auto &p : _instances) {
+            if (p.second > 0) {
                 total += p.second;
+            }
+        }
         return total;
     }
 
     void value() {} // Recursion terminator
     // Takes one or more values, converts them to strings, then stores them.
-    template <typename T, typename... Tmore> void value(const T &v, Tmore &&...args) {
+    template <typename T, typename... Tmore>
+    void value(const T &v, Tmore &&...args) {
         std::ostringstream oss;
         oss << v;
         _values.push_back(oss.str());
@@ -145,19 +152,22 @@ public:
     // Move out stored values
     py::list values() {
         py::list l;
-        for (const auto &v : _values) l.append(py::cast(v));
+        for (const auto &v : _values) {
+            l.append(py::cast(v));
+        }
         _values.clear();
         return l;
     }
 
     // Gets constructor stats from a C++ type index
-    static ConstructorStats& get(std::type_index type) {
+    static ConstructorStats &get(std::type_index type) {
         static std::unordered_map<std::type_index, ConstructorStats> all_cstats;
         return all_cstats[type];
     }
 
     // Gets constructor stats from a C++ type
-    template <typename T> static ConstructorStats& get() {
+    template <typename T>
+    static ConstructorStats &get() {
 #if defined(PYPY_VERSION)
         gc();
 #endif
@@ -165,11 +175,12 @@ public:
     }
 
     // Gets constructor stats from a Python class
-    static ConstructorStats& get(py::object class_) {
+    static ConstructorStats &get(py::object class_) {
         auto &internals = py::detail::get_internals();
         const std::type_index *t1 = nullptr, *t2 = nullptr;
         try {
-            auto *type_info = internals.registered_types_py.at((PyTypeObject *) class_.ptr()).at(0);
+            auto *type_info
+                = internals.registered_types_py.at((PyTypeObject *) class_.ptr()).at(0);
             for (auto &p : internals.registered_types_cpp) {
                 if (p.second == type_info) {
                     if (t1) {
@@ -179,17 +190,23 @@ public:
                     t1 = &p.first;
                 }
             }
+        } catch (const std::out_of_range &) {
+        }
+        if (!t1) {
+            throw std::runtime_error("Unknown class passed to ConstructorStats::get()");
         }
-        catch (const std::out_of_range&) {}
-        if (!t1) throw std::runtime_error("Unknown class passed to ConstructorStats::get()");
         auto &cs1 = get(*t1);
-        // If we have both a t1 and t2 match, one is probably the trampoline class; return whichever
-        // has more constructions (typically one or the other will be 0)
+        // If we have both a t1 and t2 match, one is probably the trampoline class; return
+        // whichever has more constructions (typically one or the other will be 0)
         if (t2) {
             auto &cs2 = get(*t2);
-            int cs1_total = cs1.default_constructions + cs1.copy_constructions + cs1.move_constructions + (int) cs1._values.size();
-            int cs2_total = cs2.default_constructions + cs2.copy_constructions + cs2.move_constructions + (int) cs2._values.size();
-            if (cs2_total > cs1_total) return cs2;
+            int cs1_total = cs1.default_constructions + cs1.copy_constructions
+                            + cs1.move_constructions + (int) cs1._values.size();
+            int cs2_total = cs2.default_constructions + cs2.copy_constructions
+                            + cs2.move_constructions + (int) cs2._values.size();
+            if (cs2_total > cs1_total) {
+                return cs2;
+            }
         }
         return cs1;
     }
@@ -198,78 +215,108 @@ public:
 // To track construction/destruction, you need to call these methods from the various
 // constructors/operators.  The ones that take extra values record the given values in the
 // constructor stats values for later inspection.
-template <class T> void track_copy_created(T *inst) { ConstructorStats::get<T>().copy_created(inst); }
-template <class T> void track_move_created(T *inst) { ConstructorStats::get<T>().move_created(inst); }
-template <class T, typename... Values> void track_copy_assigned(T *, Values &&...values) {
+template <class T>
+void track_copy_created(T *inst) {
+    ConstructorStats::get<T>().copy_created(inst);
+}
+template <class T>
+void track_move_created(T *inst) {
+    ConstructorStats::get<T>().move_created(inst);
+}
+template <class T, typename... Values>
+void track_copy_assigned(T *, Values &&...values) {
     auto &cst = ConstructorStats::get<T>();
     cst.copy_assignments++;
     cst.value(std::forward<Values>(values)...);
 }
-template <class T, typename... Values> void track_move_assigned(T *, Values &&...values) {
+template <class T, typename... Values>
+void track_move_assigned(T *, Values &&...values) {
     auto &cst = ConstructorStats::get<T>();
     cst.move_assignments++;
     cst.value(std::forward<Values>(values)...);
 }
-template <class T, typename... Values> void track_default_created(T *inst, Values &&...values) {
+template <class T, typename... Values>
+void track_default_created(T *inst, Values &&...values) {
     auto &cst = ConstructorStats::get<T>();
     cst.default_created(inst);
     cst.value(std::forward<Values>(values)...);
 }
-template <class T, typename... Values> void track_created(T *inst, Values &&...values) {
+template <class T, typename... Values>
+void track_created(T *inst, Values &&...values) {
     auto &cst = ConstructorStats::get<T>();
     cst.created(inst);
     cst.value(std::forward<Values>(values)...);
 }
-template <class T, typename... Values> void track_destroyed(T *inst) {
+template <class T, typename... Values>
+void track_destroyed(T *inst) {
     ConstructorStats::get<T>().destroyed(inst);
 }
-template <class T, typename... Values> void track_values(T *, Values &&...values) {
+template <class T, typename... Values>
+void track_values(T *, Values &&...values) {
     ConstructorStats::get<T>().value(std::forward<Values>(values)...);
 }
 
 /// Don't cast pointers to Python, print them as strings
 inline const char *format_ptrs(const char *p) { return p; }
 template <typename T>
-py::str format_ptrs(T *p) { return "{:#x}"_s.format(reinterpret_cast<std::uintptr_t>(p)); }
+py::str format_ptrs(T *p) {
+    return "{:#x}"_s.format(reinterpret_cast<std::uintptr_t>(p));
+}
 template <typename T>
-auto format_ptrs(T &&x) -> decltype(std::forward<T>(x)) { return std::forward<T>(x); }
+auto format_ptrs(T &&x) -> decltype(std::forward<T>(x)) {
+    return std::forward<T>(x);
+}
 
 template <class T, typename... Output>
 void print_constr_details(T *inst, const std::string &action, Output &&...output) {
-    py::print("###", py::type_id<T>(), "@", format_ptrs(inst), action,
+    py::print("###",
+              py::type_id<T>(),
+              "@",
+              format_ptrs(inst),
+              action,
               format_ptrs(std::forward<Output>(output))...);
 }
 
 // Verbose versions of the above:
-template <class T, typename... Values> void print_copy_created(T *inst, Values &&...values) { // NB: this prints, but doesn't store, given values
+template <class T, typename... Values>
+void print_copy_created(T *inst,
+                        Values &&...values) { // NB: this prints, but doesn't store, given values
     print_constr_details(inst, "created via copy constructor", values...);
     track_copy_created(inst);
 }
-template <class T, typename... Values> void print_move_created(T *inst, Values &&...values) { // NB: this prints, but doesn't store, given values
+template <class T, typename... Values>
+void print_move_created(T *inst,
+                        Values &&...values) { // NB: this prints, but doesn't store, given values
     print_constr_details(inst, "created via move constructor", values...);
     track_move_created(inst);
 }
-template <class T, typename... Values> void print_copy_assigned(T *inst, Values &&...values) {
+template <class T, typename... Values>
+void print_copy_assigned(T *inst, Values &&...values) {
     print_constr_details(inst, "assigned via copy assignment", values...);
     track_copy_assigned(inst, values...);
 }
-template <class T, typename... Values> void print_move_assigned(T *inst, Values &&...values) {
+template <class T, typename... Values>
+void print_move_assigned(T *inst, Values &&...values) {
     print_constr_details(inst, "assigned via move assignment", values...);
     track_move_assigned(inst, values...);
 }
-template <class T, typename... Values> void print_default_created(T *inst, Values &&...values) {
+template <class T, typename... Values>
+void print_default_created(T *inst, Values &&...values) {
     print_constr_details(inst, "created via default constructor", values...);
     track_default_created(inst, values...);
 }
-template <class T, typename... Values> void print_created(T *inst, Values &&...values) {
+template <class T, typename... Values>
+void print_created(T *inst, Values &&...values) {
     print_constr_details(inst, "created", values...);
     track_created(inst, values...);
 }
-template <class T, typename... Values> void print_destroyed(T *inst, Values &&...values) { // Prints but doesn't store given values
+template <class T, typename... Values>
+void print_destroyed(T *inst, Values &&...values) { // Prints but doesn't store given values
     print_constr_details(inst, "destroyed", values...);
     track_destroyed(inst);
 }
-template <class T, typename... Values> void print_values(T *inst, Values &&...values) {
+template <class T, typename... Values>
+void print_values(T *inst, Values &&...values) {
     print_constr_details(inst, ":", values...);
     track_values(inst, values...);
 }
diff --git a/ext/pybind11/tests/cross_module_gil_utils.cpp b/ext/pybind11/tests/cross_module_gil_utils.cpp
index 07db9f6e48..7c20849dd9 100644
--- a/ext/pybind11/tests/cross_module_gil_utils.cpp
+++ b/ext/pybind11/tests/cross_module_gil_utils.cpp
@@ -6,8 +6,15 @@
     All rights reserved. Use of this source code is governed by a
     BSD-style license that can be found in the LICENSE file.
 */
+#if defined(PYBIND11_INTERNALS_VERSION)
+#    undef PYBIND11_INTERNALS_VERSION
+#endif
+#define PYBIND11_INTERNALS_VERSION 21814642 // Ensure this module has its own `internals` instance.
 #include <pybind11/pybind11.h>
+
 #include <cstdint>
+#include <string>
+#include <thread>
 
 // This file mimics a DSO that makes pybind11 calls but does not define a
 // PYBIND11_MODULE. The purpose is to test that such a DSO can create a
@@ -20,54 +27,82 @@
 namespace {
 
 namespace py = pybind11;
+
 void gil_acquire() { py::gil_scoped_acquire gil; }
 
+std::string gil_multi_acquire_release(unsigned bits) {
+    if ((bits & 0x1u) != 0u) {
+        py::gil_scoped_acquire gil;
+    }
+    if ((bits & 0x2u) != 0u) {
+        py::gil_scoped_release gil;
+    }
+    if ((bits & 0x4u) != 0u) {
+        py::gil_scoped_acquire gil;
+    }
+    if ((bits & 0x8u) != 0u) {
+        py::gil_scoped_release gil;
+    }
+    return PYBIND11_INTERNALS_ID;
+}
+
+struct CustomAutoGIL {
+    CustomAutoGIL() : gstate(PyGILState_Ensure()) {}
+    ~CustomAutoGIL() { PyGILState_Release(gstate); }
+
+    PyGILState_STATE gstate;
+};
+struct CustomAutoNoGIL {
+    CustomAutoNoGIL() : save(PyEval_SaveThread()) {}
+    ~CustomAutoNoGIL() { PyEval_RestoreThread(save); }
+
+    PyThreadState *save;
+};
+
+template <typename Acquire, typename Release>
+void gil_acquire_inner() {
+    Acquire acquire_outer;
+    Acquire acquire_inner;
+    Release release;
+}
+
+template <typename Acquire, typename Release>
+void gil_acquire_nested() {
+    Acquire acquire_outer;
+    Acquire acquire_inner;
+    Release release;
+    auto thread = std::thread(&gil_acquire_inner<Acquire, Release>);
+    thread.join();
+}
+
 constexpr char kModuleName[] = "cross_module_gil_utils";
 
-#if PY_MAJOR_VERSION >= 3
 struct PyModuleDef moduledef = {
-    PyModuleDef_HEAD_INIT,
-    kModuleName,
-    NULL,
-    0,
-    NULL,
-    NULL,
-    NULL,
-    NULL,
-    NULL
-};
-#else
-PyMethodDef module_methods[] = {
-    {NULL, NULL, 0, NULL}
-};
-#endif
+    PyModuleDef_HEAD_INIT, kModuleName, nullptr, 0, nullptr, nullptr, nullptr, nullptr, nullptr};
 
-}  // namespace
+} // namespace
 
-extern "C" PYBIND11_EXPORT
-#if PY_MAJOR_VERSION >= 3
-PyObject* PyInit_cross_module_gil_utils()
-#else
-void initcross_module_gil_utils()
-#endif
-{
+#define ADD_FUNCTION(Name, ...)                                                                   \
+    PyModule_AddObject(m, Name, PyLong_FromVoidPtr(reinterpret_cast<void *>(&__VA_ARGS__)));
 
-    PyObject* m =
-#if PY_MAJOR_VERSION >= 3
-        PyModule_Create(&moduledef);
-#else
-        Py_InitModule(kModuleName, module_methods);
-#endif
+extern "C" PYBIND11_EXPORT PyObject *PyInit_cross_module_gil_utils() {
 
-    if (m != NULL) {
-        static_assert(
-            sizeof(&gil_acquire) == sizeof(void*),
-            "Function pointer must have the same size as void*");
-        PyModule_AddObject(m, "gil_acquire_funcaddr",
-                           PyLong_FromVoidPtr(reinterpret_cast<void*>(&gil_acquire)));
+    PyObject *m = PyModule_Create(&moduledef);
+
+    if (m != nullptr) {
+        static_assert(sizeof(&gil_acquire) == sizeof(void *),
+                      "Function pointer must have the same size as void*");
+        ADD_FUNCTION("gil_acquire_funcaddr", gil_acquire)
+        ADD_FUNCTION("gil_multi_acquire_release_funcaddr", gil_multi_acquire_release)
+        ADD_FUNCTION("gil_acquire_inner_custom_funcaddr",
+                     gil_acquire_inner<CustomAutoGIL, CustomAutoNoGIL>)
+        ADD_FUNCTION("gil_acquire_nested_custom_funcaddr",
+                     gil_acquire_nested<CustomAutoGIL, CustomAutoNoGIL>)
+        ADD_FUNCTION("gil_acquire_inner_pybind11_funcaddr",
+                     gil_acquire_inner<py::gil_scoped_acquire, py::gil_scoped_release>)
+        ADD_FUNCTION("gil_acquire_nested_pybind11_funcaddr",
+                     gil_acquire_nested<py::gil_scoped_acquire, py::gil_scoped_release>)
     }
 
-#if PY_MAJOR_VERSION >= 3
     return m;
-#endif
 }
diff --git a/ext/pybind11/tests/cross_module_interleaved_error_already_set.cpp b/ext/pybind11/tests/cross_module_interleaved_error_already_set.cpp
new file mode 100644
index 0000000000..fdd9939e45
--- /dev/null
+++ b/ext/pybind11/tests/cross_module_interleaved_error_already_set.cpp
@@ -0,0 +1,51 @@
+/*
+    Copyright (c) 2022 Google LLC
+
+    All rights reserved. Use of this source code is governed by a
+    BSD-style license that can be found in the LICENSE file.
+*/
+
+#include <pybind11/pybind11.h>
+
+// This file mimics a DSO that makes pybind11 calls but does not define a PYBIND11_MODULE,
+// so that the first call of cross_module_error_already_set() triggers the first call of
+// pybind11::detail::get_internals().
+
+namespace {
+
+namespace py = pybind11;
+
+void interleaved_error_already_set() {
+    PyErr_SetString(PyExc_RuntimeError, "1st error.");
+    try {
+        throw py::error_already_set();
+    } catch (const py::error_already_set &) {
+        // The 2nd error could be conditional in a real application.
+        PyErr_SetString(PyExc_RuntimeError, "2nd error.");
+    } // Here the 1st error is destroyed before the 2nd error is fetched.
+    // The error_already_set dtor triggers a pybind11::detail::get_internals()
+    // call via pybind11::gil_scoped_acquire.
+    if (PyErr_Occurred()) {
+        throw py::error_already_set();
+    }
+}
+
+constexpr char kModuleName[] = "cross_module_interleaved_error_already_set";
+
+struct PyModuleDef moduledef = {
+    PyModuleDef_HEAD_INIT, kModuleName, nullptr, 0, nullptr, nullptr, nullptr, nullptr, nullptr};
+
+} // namespace
+
+extern "C" PYBIND11_EXPORT PyObject *PyInit_cross_module_interleaved_error_already_set() {
+    PyObject *m = PyModule_Create(&moduledef);
+    if (m != nullptr) {
+        static_assert(sizeof(&interleaved_error_already_set) == sizeof(void *),
+                      "Function pointer must have the same size as void *");
+        PyModule_AddObject(
+            m,
+            "funcaddr",
+            PyLong_FromVoidPtr(reinterpret_cast<void *>(&interleaved_error_already_set)));
+    }
+    return m;
+}
diff --git a/ext/pybind11/tests/eigen_tensor_avoid_stl_array.cpp b/ext/pybind11/tests/eigen_tensor_avoid_stl_array.cpp
new file mode 100644
index 0000000000..eacc9e9bd6
--- /dev/null
+++ b/ext/pybind11/tests/eigen_tensor_avoid_stl_array.cpp
@@ -0,0 +1,14 @@
+/*
+    tests/eigen_tensor.cpp -- automatic conversion of Eigen Tensor
+
+    All rights reserved. Use of this source code is governed by a
+    BSD-style license that can be found in the LICENSE file.
+*/
+
+#ifndef EIGEN_AVOID_STL_ARRAY
+#    define EIGEN_AVOID_STL_ARRAY
+#endif
+
+#include "test_eigen_tensor.inl"
+
+PYBIND11_MODULE(eigen_tensor_avoid_stl_array, m) { eigen_tensor_test::test_module(m); }
diff --git a/ext/pybind11/tests/env.py b/ext/pybind11/tests/env.py
index 6172b451b3..0345df65dc 100644
--- a/ext/pybind11/tests/env.py
+++ b/ext/pybind11/tests/env.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 import platform
 import sys
 
@@ -11,10 +10,6 @@ WIN = sys.platform.startswith("win32") or sys.platform.startswith("cygwin")
 CPYTHON = platform.python_implementation() == "CPython"
 PYPY = platform.python_implementation() == "PyPy"
 
-PY2 = sys.version_info.major == 2
-
-PY = sys.version_info
-
 
 def deprecated_call():
     """
diff --git a/ext/pybind11/tests/extra_python_package/test_files.py b/ext/pybind11/tests/extra_python_package/test_files.py
index 337a72dfea..9a9bb1556a 100644
--- a/ext/pybind11/tests/extra_python_package/test_files.py
+++ b/ext/pybind11/tests/extra_python_package/test_files.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 import contextlib
 import os
 import string
@@ -13,6 +12,16 @@ import zipfile
 DIR = os.path.abspath(os.path.dirname(__file__))
 MAIN_DIR = os.path.dirname(os.path.dirname(DIR))
 
+PKGCONFIG = """\
+prefix=${{pcfiledir}}/../../
+includedir=${{prefix}}/include
+
+Name: pybind11
+Description: Seamless operability between C++11 and Python
+Version: {VERSION}
+Cflags: -I${{includedir}}
+"""
+
 
 main_headers = {
     "include/pybind11/attr.h",
@@ -46,6 +55,11 @@ detail_headers = {
     "include/pybind11/detail/typeid.h",
 }
 
+eigen_headers = {
+    "include/pybind11/eigen/matrix.h",
+    "include/pybind11/eigen/tensor.h",
+}
+
 stl_headers = {
     "include/pybind11/stl/filesystem.h",
 }
@@ -60,19 +74,21 @@ cmake_files = {
     "share/cmake/pybind11/pybind11Tools.cmake",
 }
 
+pkgconfig_files = {
+    "share/pkgconfig/pybind11.pc",
+}
+
 py_files = {
     "__init__.py",
     "__main__.py",
     "_version.py",
-    "_version.pyi",
     "commands.py",
     "py.typed",
     "setup_helpers.py",
-    "setup_helpers.pyi",
 }
 
-headers = main_headers | detail_headers | stl_headers
-src_files = headers | cmake_files
+headers = main_headers | detail_headers | eigen_headers | stl_headers
+src_files = headers | cmake_files | pkgconfig_files
 all_files = src_files | py_files
 
 
@@ -81,10 +97,12 @@ sdist_files = {
     "pybind11/include",
     "pybind11/include/pybind11",
     "pybind11/include/pybind11/detail",
+    "pybind11/include/pybind11/eigen",
     "pybind11/include/pybind11/stl",
     "pybind11/share",
     "pybind11/share/cmake",
     "pybind11/share/cmake/pybind11",
+    "pybind11/share/pkgconfig",
     "pyproject.toml",
     "setup.cfg",
     "setup.py",
@@ -104,62 +122,57 @@ local_sdist_files = {
 }
 
 
+def read_tz_file(tar: tarfile.TarFile, name: str) -> bytes:
+    start = tar.getnames()[0] + "/"
+    inner_file = tar.extractfile(tar.getmember(f"{start}{name}"))
+    assert inner_file
+    with contextlib.closing(inner_file) as f:
+        return f.read()
+
+
+def normalize_line_endings(value: bytes) -> bytes:
+    return value.replace(os.linesep.encode("utf-8"), b"\n")
+
+
 def test_build_sdist(monkeypatch, tmpdir):
 
     monkeypatch.chdir(MAIN_DIR)
 
-    out = subprocess.check_output(
-        [
-            sys.executable,
-            "setup.py",
-            "sdist",
-            "--formats=tar",
-            "--dist-dir",
-            str(tmpdir),
-        ]
+    subprocess.run(
+        [sys.executable, "-m", "build", "--sdist", f"--outdir={tmpdir}"], check=True
     )
-    if hasattr(out, "decode"):
-        out = out.decode()
 
-    (sdist,) = tmpdir.visit("*.tar")
+    (sdist,) = tmpdir.visit("*.tar.gz")
 
-    with tarfile.open(str(sdist)) as tar:
+    with tarfile.open(str(sdist), "r:gz") as tar:
         start = tar.getnames()[0] + "/"
         version = start[9:-1]
         simpler = {n.split("/", 1)[-1] for n in tar.getnames()[1:]}
 
-        with contextlib.closing(
-            tar.extractfile(tar.getmember(start + "setup.py"))
-        ) as f:
-            setup_py = f.read()
+        setup_py = read_tz_file(tar, "setup.py")
+        pyproject_toml = read_tz_file(tar, "pyproject.toml")
+        pkgconfig = read_tz_file(tar, "pybind11/share/pkgconfig/pybind11.pc")
+        cmake_cfg = read_tz_file(
+            tar, "pybind11/share/cmake/pybind11/pybind11Config.cmake"
+        )
 
-        with contextlib.closing(
-            tar.extractfile(tar.getmember(start + "pyproject.toml"))
-        ) as f:
-            pyproject_toml = f.read()
+    assert (
+        'set(pybind11_INCLUDE_DIR "${PACKAGE_PREFIX_DIR}/include")'
+        in cmake_cfg.decode("utf-8")
+    )
 
-        with contextlib.closing(
-            tar.extractfile(
-                tar.getmember(
-                    start + "pybind11/share/cmake/pybind11/pybind11Config.cmake"
-                )
-            )
-        ) as f:
-            contents = f.read().decode("utf8")
-        assert 'set(pybind11_INCLUDE_DIR "${PACKAGE_PREFIX_DIR}/include")' in contents
-
-    files = {"pybind11/{}".format(n) for n in all_files}
+    files = {f"pybind11/{n}" for n in all_files}
     files |= sdist_files
-    files |= {"pybind11{}".format(n) for n in local_sdist_files}
+    files |= {f"pybind11{n}" for n in local_sdist_files}
     files.add("pybind11.egg-info/entry_points.txt")
     files.add("pybind11.egg-info/requires.txt")
     assert simpler == files
 
     with open(os.path.join(MAIN_DIR, "tools", "setup_main.py.in"), "rb") as f:
         contents = (
-            string.Template(f.read().decode())
+            string.Template(f.read().decode("utf-8"))
             .substitute(version=version, extra_cmd="")
-            .encode()
+            .encode("utf-8")
         )
     assert setup_py == contents
 
@@ -167,52 +180,48 @@ def test_build_sdist(monkeypatch, tmpdir):
         contents = f.read()
     assert pyproject_toml == contents
 
+    simple_version = ".".join(version.split(".")[:3])
+    pkgconfig_expected = PKGCONFIG.format(VERSION=simple_version).encode("utf-8")
+    assert normalize_line_endings(pkgconfig) == pkgconfig_expected
+
 
 def test_build_global_dist(monkeypatch, tmpdir):
 
     monkeypatch.chdir(MAIN_DIR)
     monkeypatch.setenv("PYBIND11_GLOBAL_SDIST", "1")
-
-    out = subprocess.check_output(
-        [
-            sys.executable,
-            "setup.py",
-            "sdist",
-            "--formats=tar",
-            "--dist-dir",
-            str(tmpdir),
-        ]
+    subprocess.run(
+        [sys.executable, "-m", "build", "--sdist", "--outdir", str(tmpdir)], check=True
     )
-    if hasattr(out, "decode"):
-        out = out.decode()
 
-    (sdist,) = tmpdir.visit("*.tar")
+    (sdist,) = tmpdir.visit("*.tar.gz")
 
-    with tarfile.open(str(sdist)) as tar:
+    with tarfile.open(str(sdist), "r:gz") as tar:
         start = tar.getnames()[0] + "/"
         version = start[16:-1]
         simpler = {n.split("/", 1)[-1] for n in tar.getnames()[1:]}
 
-        with contextlib.closing(
-            tar.extractfile(tar.getmember(start + "setup.py"))
-        ) as f:
-            setup_py = f.read()
+        setup_py = read_tz_file(tar, "setup.py")
+        pyproject_toml = read_tz_file(tar, "pyproject.toml")
+        pkgconfig = read_tz_file(tar, "pybind11/share/pkgconfig/pybind11.pc")
+        cmake_cfg = read_tz_file(
+            tar, "pybind11/share/cmake/pybind11/pybind11Config.cmake"
+        )
 
-        with contextlib.closing(
-            tar.extractfile(tar.getmember(start + "pyproject.toml"))
-        ) as f:
-            pyproject_toml = f.read()
+    assert (
+        'set(pybind11_INCLUDE_DIR "${PACKAGE_PREFIX_DIR}/include")'
+        in cmake_cfg.decode("utf-8")
+    )
 
-    files = {"pybind11/{}".format(n) for n in all_files}
+    files = {f"pybind11/{n}" for n in all_files}
     files |= sdist_files
-    files |= {"pybind11_global{}".format(n) for n in local_sdist_files}
+    files |= {f"pybind11_global{n}" for n in local_sdist_files}
     assert simpler == files
 
     with open(os.path.join(MAIN_DIR, "tools", "setup_global.py.in"), "rb") as f:
         contents = (
             string.Template(f.read().decode())
             .substitute(version=version, extra_cmd="")
-            .encode()
+            .encode("utf-8")
         )
         assert setup_py == contents
 
@@ -220,17 +229,21 @@ def test_build_global_dist(monkeypatch, tmpdir):
         contents = f.read()
         assert pyproject_toml == contents
 
+    simple_version = ".".join(version.split(".")[:3])
+    pkgconfig_expected = PKGCONFIG.format(VERSION=simple_version).encode("utf-8")
+    assert normalize_line_endings(pkgconfig) == pkgconfig_expected
+
 
 def tests_build_wheel(monkeypatch, tmpdir):
     monkeypatch.chdir(MAIN_DIR)
 
-    subprocess.check_output(
-        [sys.executable, "-m", "pip", "wheel", ".", "-w", str(tmpdir)]
+    subprocess.run(
+        [sys.executable, "-m", "pip", "wheel", ".", "-w", str(tmpdir)], check=True
     )
 
     (wheel,) = tmpdir.visit("*.whl")
 
-    files = {"pybind11/{}".format(n) for n in all_files}
+    files = {f"pybind11/{n}" for n in all_files}
     files |= {
         "dist-info/LICENSE",
         "dist-info/METADATA",
@@ -244,9 +257,7 @@ def tests_build_wheel(monkeypatch, tmpdir):
         names = z.namelist()
 
     trimmed = {n for n in names if "dist-info" not in n}
-    trimmed |= {
-        "dist-info/{}".format(n.split("/", 1)[-1]) for n in names if "dist-info" in n
-    }
+    trimmed |= {f"dist-info/{n.split('/', 1)[-1]}" for n in names if "dist-info" in n}
     assert files == trimmed
 
 
@@ -254,14 +265,14 @@ def tests_build_global_wheel(monkeypatch, tmpdir):
     monkeypatch.chdir(MAIN_DIR)
     monkeypatch.setenv("PYBIND11_GLOBAL_SDIST", "1")
 
-    subprocess.check_output(
-        [sys.executable, "-m", "pip", "wheel", ".", "-w", str(tmpdir)]
+    subprocess.run(
+        [sys.executable, "-m", "pip", "wheel", ".", "-w", str(tmpdir)], check=True
     )
 
     (wheel,) = tmpdir.visit("*.whl")
 
-    files = {"data/data/{}".format(n) for n in src_files}
-    files |= {"data/headers/{}".format(n[8:]) for n in headers}
+    files = {f"data/data/{n}" for n in src_files}
+    files |= {f"data/headers/{n[8:]}" for n in headers}
     files |= {
         "dist-info/LICENSE",
         "dist-info/METADATA",
diff --git a/ext/pybind11/tests/extra_setuptools/test_setuphelper.py b/ext/pybind11/tests/extra_setuptools/test_setuphelper.py
index c24f50af8a..d5d3093bf0 100644
--- a/ext/pybind11/tests/extra_setuptools/test_setuphelper.py
+++ b/ext/pybind11/tests/extra_setuptools/test_setuphelper.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 import os
 import subprocess
 import sys
@@ -8,6 +7,7 @@ import pytest
 
 DIR = os.path.abspath(os.path.dirname(__file__))
 MAIN_DIR = os.path.dirname(os.path.dirname(DIR))
+WIN = sys.platform.startswith("win32") or sys.platform.startswith("cygwin")
 
 
 @pytest.mark.parametrize("parallel", [False, True])
@@ -18,7 +18,7 @@ def test_simple_setup_py(monkeypatch, tmpdir, parallel, std):
 
     (tmpdir / "setup.py").write_text(
         dedent(
-            u"""\
+            f"""\
             import sys
             sys.path.append({MAIN_DIR!r})
 
@@ -51,13 +51,13 @@ def test_simple_setup_py(monkeypatch, tmpdir, parallel, std):
                 ext_modules=ext_modules,
             )
             """
-        ).format(MAIN_DIR=MAIN_DIR, std=std, parallel=parallel),
+        ),
         encoding="ascii",
     )
 
     (tmpdir / "main.cpp").write_text(
         dedent(
-            u"""\
+            """\
             #include <pybind11/pybind11.h>
 
             int f(int x) {
@@ -71,13 +71,20 @@ def test_simple_setup_py(monkeypatch, tmpdir, parallel, std):
         encoding="ascii",
     )
 
-    subprocess.check_call(
+    out = subprocess.check_output(
         [sys.executable, "setup.py", "build_ext", "--inplace"],
-        stdout=sys.stdout,
-        stderr=sys.stderr,
     )
+    if not WIN:
+        assert b"-g0" in out
+    out = subprocess.check_output(
+        [sys.executable, "setup.py", "build_ext", "--inplace", "--force"],
+        env=dict(os.environ, CFLAGS="-g"),
+    )
+    if not WIN:
+        assert b"-g0" not in out
 
     # Debug helper printout, normally hidden
+    print(out)
     for item in tmpdir.listdir():
         print(item.basename)
 
@@ -88,7 +95,7 @@ def test_simple_setup_py(monkeypatch, tmpdir, parallel, std):
 
     (tmpdir / "test.py").write_text(
         dedent(
-            u"""\
+            """\
             import simple_setup
             assert simple_setup.f(3) == 9
             """
@@ -113,10 +120,11 @@ def test_intree_extensions(monkeypatch, tmpdir):
     subdir.ensure_dir()
     src = subdir / "ext.cpp"
     src.ensure()
-    (ext,) = intree_extensions([src.relto(tmpdir)])
+    relpath = src.relto(tmpdir)
+    (ext,) = intree_extensions([relpath])
     assert ext.name == "ext"
     subdir.ensure("__init__.py")
-    (ext,) = intree_extensions([src.relto(tmpdir)])
+    (ext,) = intree_extensions([relpath])
     assert ext.name == "dir.ext"
 
 
diff --git a/ext/pybind11/tests/local_bindings.h b/ext/pybind11/tests/local_bindings.h
index 4c936c19a5..01d2785353 100644
--- a/ext/pybind11/tests/local_bindings.h
+++ b/ext/pybind11/tests/local_bindings.h
@@ -1,12 +1,13 @@
 #pragma once
-#include <utility>
-
 #include "pybind11_tests.h"
 
+#include <utility>
+
 /// Simple class used to test py::local:
-template <int> class LocalBase {
+template <int>
+class LocalBase {
 public:
-    explicit LocalBase(int i) : i(i) { }
+    explicit LocalBase(int i) : i(i) {}
     int i = -1;
 };
 
@@ -35,12 +36,12 @@ using NonLocalVec2 = std::vector<NonLocal2>;
 using NonLocalMap = std::unordered_map<std::string, NonLocalType>;
 using NonLocalMap2 = std::unordered_map<std::string, uint8_t>;
 
-
 // Exception that will be caught via the module local translator.
 class LocalException : public std::exception {
 public:
-    explicit LocalException(const char * m) : message{m} {}
-    const char * what() const noexcept override {return message.c_str();}
+    explicit LocalException(const char *m) : message{m} {}
+    const char *what() const noexcept override { return message.c_str(); }
+
 private:
     std::string message = "";
 };
@@ -48,8 +49,9 @@ private:
 // Exception that will be registered with register_local_exception_translator
 class LocalSimpleException : public std::exception {
 public:
-    explicit LocalSimpleException(const char * m) : message{m} {}
-    const char * what() const noexcept override {return message.c_str();}
+    explicit LocalSimpleException(const char *m) : message{m} {}
+    const char *what() const noexcept override { return message.c_str(); }
+
 private:
     std::string message = "";
 };
@@ -58,17 +60,16 @@ PYBIND11_MAKE_OPAQUE(LocalVec);
 PYBIND11_MAKE_OPAQUE(LocalVec2);
 PYBIND11_MAKE_OPAQUE(LocalMap);
 PYBIND11_MAKE_OPAQUE(NonLocalVec);
-//PYBIND11_MAKE_OPAQUE(NonLocalVec2); // same type as LocalVec2
+// PYBIND11_MAKE_OPAQUE(NonLocalVec2); // same type as LocalVec2
 PYBIND11_MAKE_OPAQUE(NonLocalMap);
 PYBIND11_MAKE_OPAQUE(NonLocalMap2);
 
-
 // Simple bindings (used with the above):
 template <typename T, int Adjust = 0, typename... Args>
-py::class_<T> bind_local(Args && ...args) {
-    return py::class_<T>(std::forward<Args>(args)...)
-        .def(py::init<int>())
-        .def("get", [](T &i) { return i.i + Adjust; });
+py::class_<T> bind_local(Args &&...args) {
+    return py::class_<T>(std::forward<Args>(args)...).def(py::init<int>()).def("get", [](T &i) {
+        return i.i + Adjust;
+    });
 };
 
 // Simulate a foreign library base class (to match the example in the docs):
@@ -81,5 +82,11 @@ public:
 };
 } // namespace pets
 
-struct MixGL { int i; explicit MixGL(int i) : i{i} {} };
-struct MixGL2 { int i; explicit MixGL2(int i) : i{i} {} };
+struct MixGL {
+    int i;
+    explicit MixGL(int i) : i{i} {}
+};
+struct MixGL2 {
+    int i;
+    explicit MixGL2(int i) : i{i} {}
+};
diff --git a/ext/pybind11/tests/object.h b/ext/pybind11/tests/object.h
index be21bf6316..564dd4a7a5 100644
--- a/ext/pybind11/tests/object.h
+++ b/ext/pybind11/tests/object.h
@@ -1,8 +1,9 @@
 #if !defined(__OBJECT_H)
-#define __OBJECT_H
+#    define __OBJECT_H
 
-#include <atomic>
-#include "constructor_stats.h"
+#    include "constructor_stats.h"
+
+#    include <atomic>
 
 /// Reference counted object base class
 class Object {
@@ -27,20 +28,23 @@ public:
      */
     void decRef(bool dealloc = true) const {
         --m_refCount;
-        if (m_refCount == 0 && dealloc)
+        if (m_refCount == 0 && dealloc) {
             delete this;
-        else if (m_refCount < 0)
+        } else if (m_refCount < 0) {
             throw std::runtime_error("Internal error: reference count < 0!");
+        }
     }
 
     virtual std::string toString() const = 0;
+
 protected:
     /** \brief Virtual protected deconstructor.
      * (Will only be called by \ref ref)
      */
     virtual ~Object() { print_destroyed(this); }
+
 private:
-    mutable std::atomic<int> m_refCount { 0 };
+    mutable std::atomic<int> m_refCount{0};
 };
 
 // Tag class used to track constructions of ref objects.  When we track constructors, below, we
@@ -59,84 +63,105 @@ class ref_tag {};
  *
  * \ingroup libcore
  */
-template <typename T> class ref {
+template <typename T>
+class ref {
 public:
     /// Create a nullptr reference
-    ref() : m_ptr(nullptr) { print_default_created(this); track_default_created((ref_tag*) this); }
+    ref() : m_ptr(nullptr) {
+        print_default_created(this);
+        track_default_created((ref_tag *) this);
+    }
 
     /// Construct a reference from a pointer
     explicit ref(T *ptr) : m_ptr(ptr) {
-        if (m_ptr) ((Object *) m_ptr)->incRef();
-
-        print_created(this, "from pointer", m_ptr); track_created((ref_tag*) this, "from pointer");
+        if (m_ptr) {
+            ((Object *) m_ptr)->incRef();
+        }
 
+        print_created(this, "from pointer", m_ptr);
+        track_created((ref_tag *) this, "from pointer");
     }
 
     /// Copy constructor
     ref(const ref &r) : m_ptr(r.m_ptr) {
-        if (m_ptr)
+        if (m_ptr) {
             ((Object *) m_ptr)->incRef();
+        }
 
-        print_copy_created(this, "with pointer", m_ptr); track_copy_created((ref_tag*) this);
+        print_copy_created(this, "with pointer", m_ptr);
+        track_copy_created((ref_tag *) this);
     }
 
     /// Move constructor
     ref(ref &&r) noexcept : m_ptr(r.m_ptr) {
         r.m_ptr = nullptr;
 
-        print_move_created(this, "with pointer", m_ptr); track_move_created((ref_tag*) this);
+        print_move_created(this, "with pointer", m_ptr);
+        track_move_created((ref_tag *) this);
     }
 
     /// Destroy this reference
     ~ref() {
-        if (m_ptr)
+        if (m_ptr) {
             ((Object *) m_ptr)->decRef();
+        }
 
-        print_destroyed(this); track_destroyed((ref_tag*) this);
+        print_destroyed(this);
+        track_destroyed((ref_tag *) this);
     }
 
     /// Move another reference into the current one
     ref &operator=(ref &&r) noexcept {
-        print_move_assigned(this, "pointer", r.m_ptr); track_move_assigned((ref_tag*) this);
+        print_move_assigned(this, "pointer", r.m_ptr);
+        track_move_assigned((ref_tag *) this);
 
-        if (*this == r)
+        if (*this == r) {
             return *this;
-        if (m_ptr)
+        }
+        if (m_ptr) {
             ((Object *) m_ptr)->decRef();
+        }
         m_ptr = r.m_ptr;
         r.m_ptr = nullptr;
         return *this;
     }
 
     /// Overwrite this reference with another reference
-    ref& operator=(const ref& r) {
+    ref &operator=(const ref &r) {
         if (this == &r) {
             return *this;
         }
         print_copy_assigned(this, "pointer", r.m_ptr);
         track_copy_assigned((ref_tag *) this);
 
-        if (m_ptr == r.m_ptr)
+        if (m_ptr == r.m_ptr) {
             return *this;
-        if (m_ptr)
+        }
+        if (m_ptr) {
             ((Object *) m_ptr)->decRef();
+        }
         m_ptr = r.m_ptr;
-        if (m_ptr)
+        if (m_ptr) {
             ((Object *) m_ptr)->incRef();
+        }
         return *this;
     }
 
     /// Overwrite this reference with a pointer to another object
-    ref& operator=(T *ptr) {
-        print_values(this, "assigned pointer"); track_values((ref_tag*) this, "assigned pointer");
+    ref &operator=(T *ptr) {
+        print_values(this, "assigned pointer");
+        track_values((ref_tag *) this, "assigned pointer");
 
-        if (m_ptr == ptr)
+        if (m_ptr == ptr) {
             return *this;
-        if (m_ptr)
+        }
+        if (m_ptr) {
             ((Object *) m_ptr)->decRef();
+        }
         m_ptr = ptr;
-        if (m_ptr)
+        if (m_ptr) {
             ((Object *) m_ptr)->incRef();
+        }
         return *this;
     }
 
@@ -147,31 +172,32 @@ public:
     bool operator!=(const ref &r) const { return m_ptr != r.m_ptr; }
 
     /// Compare this reference with a pointer
-    bool operator==(const T* ptr) const { return m_ptr == ptr; }
+    bool operator==(const T *ptr) const { return m_ptr == ptr; }
 
     /// Compare this reference with a pointer
-    bool operator!=(const T* ptr) const { return m_ptr != ptr; }
+    bool operator!=(const T *ptr) const { return m_ptr != ptr; }
 
     /// Access the object referenced by this reference
-    T* operator->() { return m_ptr; }
+    T *operator->() { return m_ptr; }
 
     /// Access the object referenced by this reference
-    const T* operator->() const { return m_ptr; }
+    const T *operator->() const { return m_ptr; }
 
     /// Return a C++ reference to the referenced object
-    T& operator*() { return *m_ptr; }
+    T &operator*() { return *m_ptr; }
 
     /// Return a const C++ reference to the referenced object
-    const T& operator*() const { return *m_ptr; }
+    const T &operator*() const { return *m_ptr; }
 
     /// Return a pointer to the referenced object
-    explicit operator T* () { return m_ptr; }
+    explicit operator T *() { return m_ptr; }
 
     /// Return a const pointer to the referenced object
-    T* get_ptr() { return m_ptr; }
+    T *get_ptr() { return m_ptr; }
 
     /// Return a pointer to the referenced object
-    const T* get_ptr() const { return m_ptr; }
+    const T *get_ptr() const { return m_ptr; }
+
 private:
     T *m_ptr;
 };
diff --git a/ext/pybind11/tests/pybind11_cross_module_tests.cpp b/ext/pybind11/tests/pybind11_cross_module_tests.cpp
index 5838cb2746..9379f3f259 100644
--- a/ext/pybind11/tests/pybind11_cross_module_tests.cpp
+++ b/ext/pybind11/tests/pybind11_cross_module_tests.cpp
@@ -7,12 +7,12 @@
     BSD-style license that can be found in the LICENSE file.
 */
 
-#include "pybind11_tests.h"
-#include "local_bindings.h"
-#include "test_exceptions.h"
-
 #include <pybind11/stl_bind.h>
 
+#include "local_bindings.h"
+#include "pybind11_tests.h"
+#include "test_exceptions.h"
+
 #include <numeric>
 #include <utility>
 
@@ -30,37 +30,45 @@ PYBIND11_MODULE(pybind11_cross_module_tests, m) {
 
     // test_exceptions.py
     py::register_local_exception<LocalSimpleException>(m, "LocalSimpleException");
-    m.def("raise_runtime_error", []() { PyErr_SetString(PyExc_RuntimeError, "My runtime error"); throw py::error_already_set(); });
-    m.def("raise_value_error", []() { PyErr_SetString(PyExc_ValueError, "My value error"); throw py::error_already_set(); });
+    m.def("raise_runtime_error", []() {
+        PyErr_SetString(PyExc_RuntimeError, "My runtime error");
+        throw py::error_already_set();
+    });
+    m.def("raise_value_error", []() {
+        PyErr_SetString(PyExc_ValueError, "My value error");
+        throw py::error_already_set();
+    });
     m.def("throw_pybind_value_error", []() { throw py::value_error("pybind11 value error"); });
     m.def("throw_pybind_type_error", []() { throw py::type_error("pybind11 type error"); });
     m.def("throw_stop_iteration", []() { throw py::stop_iteration(); });
     m.def("throw_local_error", []() { throw LocalException("just local"); });
     m.def("throw_local_simple_error", []() { throw LocalSimpleException("external mod"); });
     py::register_exception_translator([](std::exception_ptr p) {
-      try {
-          if (p) std::rethrow_exception(p);
-      } catch (const shared_exception &e) {
-          PyErr_SetString(PyExc_KeyError, e.what());
-      }
+        try {
+            if (p) {
+                std::rethrow_exception(p);
+            }
+        } catch (const shared_exception &e) {
+            PyErr_SetString(PyExc_KeyError, e.what());
+        }
     });
 
     // translate the local exception into a key error but only in this module
     py::register_local_exception_translator([](std::exception_ptr p) {
-      try {
-          if (p) {
-            std::rethrow_exception(p);
-          }
-      } catch (const LocalException &e) {
-        PyErr_SetString(PyExc_KeyError, e.what());
-      }
+        try {
+            if (p) {
+                std::rethrow_exception(p);
+            }
+        } catch (const LocalException &e) {
+            PyErr_SetString(PyExc_KeyError, e.what());
+        }
     });
 
     // test_local_bindings.py
     // Local to both:
-    bind_local<LocalType, 1>(m, "LocalType", py::module_local())
-        .def("get2", [](LocalType &t) { return t.i + 2; })
-        ;
+    bind_local<LocalType, 1>(m, "LocalType", py::module_local()).def("get2", [](LocalType &t) {
+        return t.i + 2;
+    });
 
     // Can only be called with our python type:
     m.def("local_value", [](LocalType &l) { return l.i; });
@@ -68,9 +76,7 @@ PYBIND11_MODULE(pybind11_cross_module_tests, m) {
     // test_nonlocal_failure
     // This registration will fail (global registration when LocalFail is already registered
     // globally in the main test module):
-    m.def("register_nonlocal", [m]() {
-        bind_local<NonLocalType, 0>(m, "NonLocalType");
-    });
+    m.def("register_nonlocal", [m]() { bind_local<NonLocalType, 0>(m, "NonLocalType"); });
 
     // test_stl_bind_local
     // stl_bind.h binders defaults to py::module_local if the types are local or converting:
@@ -80,27 +86,21 @@ PYBIND11_MODULE(pybind11_cross_module_tests, m) {
     // test_stl_bind_global
     // and global if the type (or one of the types, for the map) is global (so these will fail,
     // assuming pybind11_tests is already loaded):
-    m.def("register_nonlocal_vec", [m]() {
-        py::bind_vector<NonLocalVec>(m, "NonLocalVec");
-    });
-    m.def("register_nonlocal_map", [m]() {
-        py::bind_map<NonLocalMap>(m, "NonLocalMap");
-    });
+    m.def("register_nonlocal_vec", [m]() { py::bind_vector<NonLocalVec>(m, "NonLocalVec"); });
+    m.def("register_nonlocal_map", [m]() { py::bind_map<NonLocalMap>(m, "NonLocalMap"); });
     // The default can, however, be overridden to global using `py::module_local()` or
     // `py::module_local(false)`.
     // Explicitly made local:
     py::bind_vector<NonLocalVec2>(m, "NonLocalVec2", py::module_local());
     // Explicitly made global (and so will fail to bind):
-    m.def("register_nonlocal_map2", [m]() {
-        py::bind_map<NonLocalMap2>(m, "NonLocalMap2", py::module_local(false));
-    });
+    m.def("register_nonlocal_map2",
+          [m]() { py::bind_map<NonLocalMap2>(m, "NonLocalMap2", py::module_local(false)); });
 
     // test_mixed_local_global
     // We try this both with the global type registered first and vice versa (the order shouldn't
     // matter).
-    m.def("register_mixed_global_local", [m]() {
-        bind_local<MixedGlobalLocal, 200>(m, "MixedGlobalLocal", py::module_local());
-    });
+    m.def("register_mixed_global_local",
+          [m]() { bind_local<MixedGlobalLocal, 200>(m, "MixedGlobalLocal", py::module_local()); });
     m.def("register_mixed_local_global", [m]() {
         bind_local<MixedLocalGlobal, 2000>(m, "MixedLocalGlobal", py::module_local(false));
     });
@@ -108,14 +108,14 @@ PYBIND11_MODULE(pybind11_cross_module_tests, m) {
     m.def("get_mixed_lg", [](int i) { return MixedLocalGlobal(i); });
 
     // test_internal_locals_differ
-    m.def("local_cpp_types_addr", []() { return (uintptr_t) &py::detail::get_local_internals().registered_types_cpp; });
+    m.def("local_cpp_types_addr",
+          []() { return (uintptr_t) &py::detail::get_local_internals().registered_types_cpp; });
 
     // test_stl_caster_vs_stl_bind
     py::bind_vector<std::vector<int>>(m, "VectorInt");
 
-    m.def("load_vector_via_binding", [](std::vector<int> &v) {
-        return std::accumulate(v.begin(), v.end(), 0);
-    });
+    m.def("load_vector_via_binding",
+          [](std::vector<int> &v) { return std::accumulate(v.begin(), v.end(), 0); });
 
     // test_cross_module_calls
     m.def("return_self", [](LocalVec *v) { return v; });
@@ -125,11 +125,9 @@ PYBIND11_MODULE(pybind11_cross_module_tests, m) {
     public:
         explicit Dog(std::string name) : Pet(std::move(name)) {}
     };
-    py::class_<pets::Pet>(m, "Pet", py::module_local())
-        .def("name", &pets::Pet::name);
+    py::class_<pets::Pet>(m, "Pet", py::module_local()).def("name", &pets::Pet::name);
     // Binding for local extending class:
-    py::class_<Dog, pets::Pet>(m, "Dog")
-        .def(py::init<std::string>());
+    py::class_<Dog, pets::Pet>(m, "Dog").def(py::init<std::string>());
     m.def("pet_name", [](pets::Pet &p) { return p.name(); });
 
     py::class_<MixGL>(m, "MixGL", py::module_local()).def(py::init<int>());
diff --git a/ext/pybind11/tests/pybind11_tests.cpp b/ext/pybind11/tests/pybind11_tests.cpp
index 439cd40129..6240346487 100644
--- a/ext/pybind11/tests/pybind11_tests.cpp
+++ b/ext/pybind11/tests/pybind11_tests.cpp
@@ -8,6 +8,7 @@
 */
 
 #include "pybind11_tests.h"
+
 #include "constructor_stats.h"
 
 #include <functional>
@@ -31,9 +32,7 @@ std::list<std::function<void(py::module_ &)>> &initializers() {
     return inits;
 }
 
-test_initializer::test_initializer(Initializer init) {
-    initializers().emplace_back(init);
-}
+test_initializer::test_initializer(Initializer init) { initializers().emplace_back(init); }
 
 test_initializer::test_initializer(const char *submodule_name, Initializer init) {
     initializers().emplace_back([=](py::module_ &parent) {
@@ -51,26 +50,58 @@ void bind_ConstructorStats(py::module_ &m) {
         .def_readwrite("move_assignments", &ConstructorStats::move_assignments)
         .def_readwrite("copy_constructions", &ConstructorStats::copy_constructions)
         .def_readwrite("move_constructions", &ConstructorStats::move_constructions)
-        .def_static("get", (ConstructorStats &(*)(py::object)) &ConstructorStats::get, py::return_value_policy::reference_internal)
+        .def_static("get",
+                    (ConstructorStats & (*) (py::object)) & ConstructorStats::get,
+                    py::return_value_policy::reference_internal)
 
-        // Not exactly ConstructorStats, but related: expose the internal pybind number of registered instances
-        // to allow instance cleanup checks (invokes a GC first)
+        // Not exactly ConstructorStats, but related: expose the internal pybind number of
+        // registered instances to allow instance cleanup checks (invokes a GC first)
         .def_static("detail_reg_inst", []() {
             ConstructorStats::gc();
             return py::detail::get_internals().registered_instances.size();
-        })
-        ;
+        });
+}
+
+const char *cpp_std() {
+    return
+#if defined(PYBIND11_CPP20)
+        "C++20";
+#elif defined(PYBIND11_CPP17)
+        "C++17";
+#elif defined(PYBIND11_CPP14)
+        "C++14";
+#else
+        "C++11";
+#endif
 }
 
 PYBIND11_MODULE(pybind11_tests, m) {
     m.doc() = "pybind11 test module";
 
+    // Intentionally kept minimal to not create a maintenance chore
+    // ("just enough" to be conclusive).
+#if defined(_MSC_FULL_VER)
+    m.attr("compiler_info") = "MSVC " PYBIND11_TOSTRING(_MSC_FULL_VER);
+#elif defined(__VERSION__)
+    m.attr("compiler_info") = __VERSION__;
+#else
+    m.attr("compiler_info") = py::none();
+#endif
+    m.attr("cpp_std") = cpp_std();
+    m.attr("PYBIND11_INTERNALS_ID") = PYBIND11_INTERNALS_ID;
+    m.attr("PYBIND11_SIMPLE_GIL_MANAGEMENT") =
+#if defined(PYBIND11_SIMPLE_GIL_MANAGEMENT)
+        true;
+#else
+        false;
+#endif
+
     bind_ConstructorStats(m);
 
-#if !defined(NDEBUG)
-    m.attr("debug_enabled") = true;
+#if defined(PYBIND11_DETAILED_ERROR_MESSAGES)
+    m.attr("detailed_error_messages_enabled") = true;
 #else
-    m.attr("debug_enabled") = false;
+    m.attr("detailed_error_messages_enabled") = false;
 #endif
 
     py::class_<UserType>(m, "UserType", "A `py::class_` type for testing")
@@ -79,13 +110,14 @@ PYBIND11_MODULE(pybind11_tests, m) {
         .def("get_value", &UserType::value, "Get value using a method")
         .def("set_value", &UserType::set, "Set value using a method")
         .def_property("value", &UserType::value, &UserType::set, "Get/set value using a property")
-        .def("__repr__", [](const UserType& u) { return "UserType({})"_s.format(u.value()); });
+        .def("__repr__", [](const UserType &u) { return "UserType({})"_s.format(u.value()); });
 
     py::class_<IncType, UserType>(m, "IncType")
         .def(py::init<>())
         .def(py::init<int>())
-        .def("__repr__", [](const IncType& u) { return "IncType({})"_s.format(u.value()); });
+        .def("__repr__", [](const IncType &u) { return "IncType({})"_s.format(u.value()); });
 
-    for (const auto &initializer : initializers())
+    for (const auto &initializer : initializers()) {
         initializer(m);
+    }
 }
diff --git a/ext/pybind11/tests/pybind11_tests.h b/ext/pybind11/tests/pybind11_tests.h
index 800ddda48b..a7c00c2f9b 100644
--- a/ext/pybind11/tests/pybind11_tests.h
+++ b/ext/pybind11/tests/pybind11_tests.h
@@ -1,13 +1,7 @@
 #pragma once
 
-#include <pybind11/pybind11.h>
 #include <pybind11/eval.h>
-
-#if defined(_MSC_VER) && _MSC_VER < 1910
-// We get some really long type names here which causes MSVC 2015 to emit warnings
-#    pragma warning(                                                                              \
-        disable : 4503) // warning C4503: decorated name length exceeded, name was truncated
-#endif
+#include <pybind11/pybind11.h>
 
 namespace py = pybind11;
 using namespace pybind11::literals;
@@ -26,13 +20,13 @@ public:
     void test_submodule_##name(py::module_ &(variable))
 
 /// Dummy type which is not exported anywhere -- something to trigger a conversion error
-struct UnregisteredType { };
+struct UnregisteredType {};
 
 /// A user-defined type which is exported and can be used by any test
 class UserType {
 public:
     UserType() = default;
-    explicit UserType(int i) : i(i) { }
+    explicit UserType(int i) : i(i) {}
 
     int value() const { return i; }
     void set(int set) { i = set; }
@@ -46,7 +40,7 @@ class IncType : public UserType {
 public:
     using UserType::UserType;
     IncType() = default;
-    IncType(const IncType &other) : IncType(other.value() + 1) { }
+    IncType(const IncType &other) : IncType(other.value() + 1) {}
     IncType(IncType &&) = delete;
     IncType &operator=(const IncType &) = delete;
     IncType &operator=(IncType &&) = delete;
@@ -58,16 +52,21 @@ union IntFloat {
     float f;
 };
 
-/// Custom cast-only type that casts to a string "rvalue" or "lvalue" depending on the cast context.
-/// Used to test recursive casters (e.g. std::tuple, stl containers).
+/// Custom cast-only type that casts to a string "rvalue" or "lvalue" depending on the cast
+/// context. Used to test recursive casters (e.g. std::tuple, stl containers).
 struct RValueCaster {};
 PYBIND11_NAMESPACE_BEGIN(pybind11)
 PYBIND11_NAMESPACE_BEGIN(detail)
-template<> class type_caster<RValueCaster> {
+template <>
+class type_caster<RValueCaster> {
 public:
-    PYBIND11_TYPE_CASTER(RValueCaster, _("RValueCaster"));
-    static handle cast(RValueCaster &&, return_value_policy, handle) { return py::str("rvalue").release(); }
-    static handle cast(const RValueCaster &, return_value_policy, handle) { return py::str("lvalue").release(); }
+    PYBIND11_TYPE_CASTER(RValueCaster, const_name("RValueCaster"));
+    static handle cast(RValueCaster &&, return_value_policy, handle) {
+        return py::str("rvalue").release();
+    }
+    static handle cast(const RValueCaster &, return_value_policy, handle) {
+        return py::str("lvalue").release();
+    }
 };
 PYBIND11_NAMESPACE_END(detail)
 PYBIND11_NAMESPACE_END(pybind11)
@@ -81,5 +80,6 @@ void ignoreOldStyleInitWarnings(F &&body) {
     with warnings.catch_warnings():
         warnings.filterwarnings("ignore", message=message, category=FutureWarning)
         body()
-    )", py::dict(py::arg("body") = py::cpp_function(body)));
+    )",
+             py::dict(py::arg("body") = py::cpp_function(body)));
 }
diff --git a/ext/pybind11/tests/pytest.ini b/ext/pybind11/tests/pytest.ini
index a3871d6c3a..792ba361f7 100644
--- a/ext/pybind11/tests/pytest.ini
+++ b/ext/pybind11/tests/pytest.ini
@@ -1,12 +1,15 @@
 [pytest]
-minversion = 3.1
+minversion = 3.10
 norecursedirs = test_* extra_*
 xfail_strict = True
 addopts =
-    # show summary of skipped tests
-    -rs
+    # show summary of tests
+    -ra
     # capture only Python print and C++ py::print, but not C output (low-level Python errors)
     --capture=sys
+    # Show local info when a failure occurs
+    --showlocals
+log_cli_level = info
 filterwarnings =
     # make warnings into errors but ignore certain third-party extension issues
     error
diff --git a/ext/pybind11/tests/requirements.txt b/ext/pybind11/tests/requirements.txt
index 8d2742a71c..04aafa8cf9 100644
--- a/ext/pybind11/tests/requirements.txt
+++ b/ext/pybind11/tests/requirements.txt
@@ -1,12 +1,9 @@
-numpy==1.16.6; python_version<"3.6" and sys_platform!="win32" and platform_python_implementation!="PyPy"
-numpy==1.19.0; platform_python_implementation=="PyPy" and sys_platform=="linux" and python_version=="3.6"
-numpy==1.20.0; platform_python_implementation=="PyPy" and sys_platform=="linux" and python_version=="3.7"
+build==0.8.0
+numpy==1.21.5; platform_python_implementation=="PyPy" and sys_platform=="linux" and python_version=="3.7"
 numpy==1.19.3; platform_python_implementation!="PyPy" and python_version=="3.6"
-numpy==1.21.3; platform_python_implementation!="PyPy" and python_version>="3.7"
-py @ git+https://github.com/pytest-dev/py; python_version>="3.11"
-pytest==4.6.9; python_version<"3.5"
-pytest==6.1.2; python_version=="3.5"
-pytest==6.2.4; python_version>="3.6"
+numpy==1.21.5; platform_python_implementation!="PyPy" and python_version>="3.7" and python_version<"3.10"
+numpy==1.22.2; platform_python_implementation!="PyPy" and python_version>="3.10" and python_version<"3.11"
+pytest==7.0.0
 pytest-timeout
-scipy==1.2.3; platform_python_implementation!="PyPy" and python_version<"3.6"
-scipy==1.5.4; platform_python_implementation!="PyPy" and python_version>="3.6" and python_version<"3.10"
+scipy==1.5.4; platform_python_implementation!="PyPy" and python_version<"3.10"
+scipy==1.8.0; platform_python_implementation!="PyPy" and python_version=="3.10"
diff --git a/ext/pybind11/tests/test_async.cpp b/ext/pybind11/tests/test_async.cpp
index e6e01d72c9..a5d7224657 100644
--- a/ext/pybind11/tests/test_async.cpp
+++ b/ext/pybind11/tests/test_async.cpp
@@ -11,12 +11,11 @@
 
 TEST_SUBMODULE(async_module, m) {
     struct DoesNotSupportAsync {};
-    py::class_<DoesNotSupportAsync>(m, "DoesNotSupportAsync")
-        .def(py::init<>());
+    py::class_<DoesNotSupportAsync>(m, "DoesNotSupportAsync").def(py::init<>());
     struct SupportsAsync {};
     py::class_<SupportsAsync>(m, "SupportsAsync")
         .def(py::init<>())
-        .def("__await__", [](const SupportsAsync& self) -> py::object {
+        .def("__await__", [](const SupportsAsync &self) -> py::object {
             static_cast<void>(self);
             py::object loop = py::module_::import("asyncio.events").attr("get_event_loop")();
             py::object f = loop.attr("create_future")();
diff --git a/ext/pybind11/tests/test_async.py b/ext/pybind11/tests/test_async.py
index df4489c499..b9ff9514d2 100644
--- a/ext/pybind11/tests/test_async.py
+++ b/ext/pybind11/tests/test_async.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 import pytest
 
 asyncio = pytest.importorskip("asyncio")
diff --git a/ext/pybind11/tests/test_buffers.cpp b/ext/pybind11/tests/test_buffers.cpp
index 3a8e3e7b75..6b6e8cba7f 100644
--- a/ext/pybind11/tests/test_buffers.cpp
+++ b/ext/pybind11/tests/test_buffers.cpp
@@ -7,22 +7,26 @@
     BSD-style license that can be found in the LICENSE file.
 */
 
-#include "pybind11_tests.h"
-#include "constructor_stats.h"
 #include <pybind11/stl.h>
 
+#include "constructor_stats.h"
+#include "pybind11_tests.h"
+
 TEST_SUBMODULE(buffers, m) {
     // test_from_python / test_to_python:
     class Matrix {
     public:
         Matrix(py::ssize_t rows, py::ssize_t cols) : m_rows(rows), m_cols(cols) {
             print_created(this, std::to_string(m_rows) + "x" + std::to_string(m_cols) + " matrix");
-            m_data = new float[(size_t) (rows*cols)];
+            // NOLINTNEXTLINE(cppcoreguidelines-prefer-member-initializer)
+            m_data = new float[(size_t) (rows * cols)];
             memset(m_data, 0, sizeof(float) * (size_t) (rows * cols));
         }
 
         Matrix(const Matrix &s) : m_rows(s.m_rows), m_cols(s.m_cols) {
-            print_copy_created(this, std::to_string(m_rows) + "x" + std::to_string(m_cols) + " matrix");
+            print_copy_created(this,
+                               std::to_string(m_rows) + "x" + std::to_string(m_cols) + " matrix");
+            // NOLINTNEXTLINE(cppcoreguidelines-prefer-member-initializer)
             m_data = new float[(size_t) (m_rows * m_cols)];
             memcpy(m_data, s.m_data, sizeof(float) * (size_t) (m_rows * m_cols));
         }
@@ -35,7 +39,8 @@ TEST_SUBMODULE(buffers, m) {
         }
 
         ~Matrix() {
-            print_destroyed(this, std::to_string(m_rows) + "x" + std::to_string(m_cols) + " matrix");
+            print_destroyed(this,
+                            std::to_string(m_rows) + "x" + std::to_string(m_cols) + " matrix");
             delete[] m_data;
         }
 
@@ -54,27 +59,33 @@ TEST_SUBMODULE(buffers, m) {
         }
 
         Matrix &operator=(Matrix &&s) noexcept {
-            print_move_assigned(this, std::to_string(m_rows) + "x" + std::to_string(m_cols) + " matrix");
+            print_move_assigned(this,
+                                std::to_string(m_rows) + "x" + std::to_string(m_cols) + " matrix");
             if (&s != this) {
                 delete[] m_data;
-                m_rows = s.m_rows; m_cols = s.m_cols; m_data = s.m_data;
-                s.m_rows = 0; s.m_cols = 0; s.m_data = nullptr;
+                m_rows = s.m_rows;
+                m_cols = s.m_cols;
+                m_data = s.m_data;
+                s.m_rows = 0;
+                s.m_cols = 0;
+                s.m_data = nullptr;
             }
             return *this;
         }
 
         float operator()(py::ssize_t i, py::ssize_t j) const {
-            return m_data[(size_t) (i*m_cols + j)];
+            return m_data[(size_t) (i * m_cols + j)];
         }
 
         float &operator()(py::ssize_t i, py::ssize_t j) {
-            return m_data[(size_t) (i*m_cols + j)];
+            return m_data[(size_t) (i * m_cols + j)];
         }
 
         float *data() { return m_data; }
 
         py::ssize_t rows() const { return m_rows; }
         py::ssize_t cols() const { return m_cols; }
+
     private:
         py::ssize_t m_rows;
         py::ssize_t m_cols;
@@ -85,10 +96,11 @@ TEST_SUBMODULE(buffers, m) {
         /// Construct from a buffer
         .def(py::init([](const py::buffer &b) {
             py::buffer_info info = b.request();
-            if (info.format != py::format_descriptor<float>::format() || info.ndim != 2)
+            if (info.format != py::format_descriptor<float>::format() || info.ndim != 2) {
                 throw std::runtime_error("Incompatible buffer format!");
+            }
 
-            auto v = new Matrix(info.shape[0], info.shape[1]);
+            auto *v = new Matrix(info.shape[0], info.shape[1]);
             memcpy(v->data(), info.ptr, sizeof(float) * (size_t) (v->rows() * v->cols()));
             return v;
         }))
@@ -99,24 +111,25 @@ TEST_SUBMODULE(buffers, m) {
         /// Bare bones interface
         .def("__getitem__",
              [](const Matrix &m, std::pair<py::ssize_t, py::ssize_t> i) {
-                 if (i.first >= m.rows() || i.second >= m.cols())
+                 if (i.first >= m.rows() || i.second >= m.cols()) {
                      throw py::index_error();
+                 }
                  return m(i.first, i.second);
              })
         .def("__setitem__",
              [](Matrix &m, std::pair<py::ssize_t, py::ssize_t> i, float v) {
-                 if (i.first >= m.rows() || i.second >= m.cols())
+                 if (i.first >= m.rows() || i.second >= m.cols()) {
                      throw py::index_error();
+                 }
                  m(i.first, i.second) = v;
              })
         /// Provide buffer access
         .def_buffer([](Matrix &m) -> py::buffer_info {
             return py::buffer_info(
-                m.data(),                               /* Pointer to buffer */
-                { m.rows(), m.cols() },                 /* Buffer dimensions */
-                { sizeof(float) * size_t(m.cols()),     /* Strides (in bytes) for each index */
-                  sizeof(float) }
-            );
+                m.data(),                          /* Pointer to buffer */
+                {m.rows(), m.cols()},              /* Buffer dimensions */
+                {sizeof(float) * size_t(m.cols()), /* Strides (in bytes) for each index */
+                 sizeof(float)});
         });
 
     // test_inherited_protocol
@@ -125,9 +138,7 @@ TEST_SUBMODULE(buffers, m) {
         explicit SquareMatrix(py::ssize_t n) : Matrix(n, n) {}
     };
     // Derived classes inherit the buffer protocol and the buffer access function
-    py::class_<SquareMatrix, Matrix>(m, "SquareMatrix")
-        .def(py::init<py::ssize_t>());
-
+    py::class_<SquareMatrix, Matrix>(m, "SquareMatrix").def(py::init<py::ssize_t>());
 
     // test_pointer_to_member_fn
     // Tests that passing a pointer to member to the base class works in
@@ -136,8 +147,8 @@ TEST_SUBMODULE(buffers, m) {
         int32_t value = 0;
 
         py::buffer_info get_buffer_info() {
-            return py::buffer_info(&value, sizeof(value),
-                                   py::format_descriptor<int32_t>::format(), 1);
+            return py::buffer_info(
+                &value, sizeof(value), py::format_descriptor<int32_t>::format(), 1);
         }
     };
     py::class_<Buffer>(m, "Buffer", py::buffer_protocol())
@@ -145,7 +156,6 @@ TEST_SUBMODULE(buffers, m) {
         .def_readwrite("value", &Buffer::value)
         .def_buffer(&Buffer::get_buffer_info);
 
-
     class ConstBuffer {
         std::unique_ptr<int32_t> value;
 
@@ -154,8 +164,8 @@ TEST_SUBMODULE(buffers, m) {
         void set_value(int32_t v) { *value = v; }
 
         py::buffer_info get_buffer_info() const {
-            return py::buffer_info(value.get(), sizeof(*value),
-                                   py::format_descriptor<int32_t>::format(), 1);
+            return py::buffer_info(
+                value.get(), sizeof(*value), py::format_descriptor<int32_t>::format(), 1);
         }
 
         ConstBuffer() : value(new int32_t{0}) {}
@@ -165,7 +175,7 @@ TEST_SUBMODULE(buffers, m) {
         .def_property("value", &ConstBuffer::get_value, &ConstBuffer::set_value)
         .def_buffer(&ConstBuffer::get_buffer_info);
 
-    struct DerivedBuffer : public Buffer { };
+    struct DerivedBuffer : public Buffer {};
     py::class_<DerivedBuffer>(m, "DerivedBuffer", py::buffer_protocol())
         .def(py::init<>())
         .def_readwrite("value", (int32_t DerivedBuffer::*) &DerivedBuffer::value)
@@ -175,9 +185,7 @@ TEST_SUBMODULE(buffers, m) {
         const uint8_t value = 0;
         explicit BufferReadOnly(uint8_t value) : value(value) {}
 
-        py::buffer_info get_buffer_info() {
-            return py::buffer_info(&value, 1);
-        }
+        py::buffer_info get_buffer_info() { return py::buffer_info(&value, 1); }
     };
     py::class_<BufferReadOnly>(m, "BufferReadOnly", py::buffer_protocol())
         .def(py::init<uint8_t>())
@@ -187,9 +195,7 @@ TEST_SUBMODULE(buffers, m) {
         uint8_t value = 0;
         bool readonly = false;
 
-        py::buffer_info get_buffer_info() {
-            return py::buffer_info(&value, 1, readonly);
-        }
+        py::buffer_info get_buffer_info() { return py::buffer_info(&value, 1, readonly); }
     };
     py::class_<BufferReadOnlySelect>(m, "BufferReadOnlySelect", py::buffer_protocol())
         .def(py::init<>())
@@ -208,9 +214,11 @@ TEST_SUBMODULE(buffers, m) {
         .def_readonly("strides", &py::buffer_info::strides)
         .def_readonly("readonly", &py::buffer_info::readonly)
         .def("__repr__", [](py::handle self) {
-             return py::str("itemsize={0.itemsize!r}, size={0.size!r}, format={0.format!r}, ndim={0.ndim!r}, shape={0.shape!r}, strides={0.strides!r}, readonly={0.readonly!r}").format(self);
-        })
-        ;
+            return py::str("itemsize={0.itemsize!r}, size={0.size!r}, format={0.format!r}, "
+                           "ndim={0.ndim!r}, shape={0.shape!r}, strides={0.strides!r}, "
+                           "readonly={0.readonly!r}")
+                .format(self);
+        });
 
     m.def("get_buffer_info", [](const py::buffer &buffer) { return buffer.request(); });
 }
diff --git a/ext/pybind11/tests/test_buffers.py b/ext/pybind11/tests/test_buffers.py
index adf7cadff2..8354b68cda 100644
--- a/ext/pybind11/tests/test_buffers.py
+++ b/ext/pybind11/tests/test_buffers.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 import ctypes
 import io
 import struct
@@ -36,6 +35,10 @@ def test_from_python():
 
 
 # https://foss.heptapod.net/pypy/pypy/-/issues/2444
+# TODO: fix on recent PyPy
+@pytest.mark.xfail(
+    env.PYPY, reason="PyPy 7.3.7 doesn't clear this anymore", strict=False
+)
 def test_to_python():
     mat = m.Matrix(5, 4)
     assert memoryview(mat).shape == (5, 4)
@@ -89,16 +92,16 @@ def test_pointer_to_member_fn():
 def test_readonly_buffer():
     buf = m.BufferReadOnly(0x64)
     view = memoryview(buf)
-    assert view[0] == b"d" if env.PY2 else 0x64
+    assert view[0] == 0x64
     assert view.readonly
     with pytest.raises(TypeError):
-        view[0] = b"\0" if env.PY2 else 0
+        view[0] = 0
 
 
 def test_selective_readonly_buffer():
     buf = m.BufferReadOnlySelect()
 
-    memoryview(buf)[0] = b"d" if env.PY2 else 0x64
+    memoryview(buf)[0] = 0x64
     assert buf.value == 0x64
 
     io.BytesIO(b"A").readinto(buf)
@@ -106,7 +109,7 @@ def test_selective_readonly_buffer():
 
     buf.readonly = True
     with pytest.raises(TypeError):
-        memoryview(buf)[0] = b"\0" if env.PY2 else 0
+        memoryview(buf)[0] = 0
     with pytest.raises(TypeError):
         io.BytesIO(b"1").readinto(buf)
 
@@ -141,9 +144,6 @@ def test_ctypes_array_2d():
         assert not info.readonly
 
 
-@pytest.mark.skipif(
-    "env.PYPY and env.PY2", reason="PyPy2 bytes buffer not reported as readonly"
-)
 def test_ctypes_from_buffer():
     test_pystr = b"0123456789"
     for pyarray in (test_pystr, bytearray(test_pystr)):
diff --git a/ext/pybind11/tests/test_builtin_casters.cpp b/ext/pybind11/tests/test_builtin_casters.cpp
index 71c778e8e2..0623b85dc9 100644
--- a/ext/pybind11/tests/test_builtin_casters.cpp
+++ b/ext/pybind11/tests/test_builtin_casters.cpp
@@ -7,95 +7,120 @@
     BSD-style license that can be found in the LICENSE file.
 */
 
-#include "pybind11_tests.h"
 #include <pybind11/complex.h>
 
+#include "pybind11_tests.h"
+
 struct ConstRefCasted {
-  int tag;
+    int tag;
 };
 
 PYBIND11_NAMESPACE_BEGIN(pybind11)
 PYBIND11_NAMESPACE_BEGIN(detail)
 template <>
 class type_caster<ConstRefCasted> {
- public:
-  static constexpr auto name = _<ConstRefCasted>();
+public:
+    static constexpr auto name = const_name<ConstRefCasted>();
 
-  // Input is unimportant, a new value will always be constructed based on the
-  // cast operator.
-  bool load(handle, bool) { return true; }
+    // Input is unimportant, a new value will always be constructed based on the
+    // cast operator.
+    bool load(handle, bool) { return true; }
 
-  explicit operator ConstRefCasted &&() {
-      value = {1};
-      // NOLINTNEXTLINE(performance-move-const-arg)
-      return std::move(value);
-  }
-  explicit operator ConstRefCasted &() {
-      value = {2};
-      return value;
-  }
-  explicit operator ConstRefCasted *() {
-      value = {3};
-      return &value;
-  }
+    explicit operator ConstRefCasted &&() {
+        value = {1};
+        // NOLINTNEXTLINE(performance-move-const-arg)
+        return std::move(value);
+    }
+    explicit operator ConstRefCasted &() {
+        value = {2};
+        return value;
+    }
+    explicit operator ConstRefCasted *() {
+        value = {3};
+        return &value;
+    }
 
-  explicit operator const ConstRefCasted &() {
-      value = {4};
-      return value;
-  }
-  explicit operator const ConstRefCasted *() {
-      value = {5};
-      return &value;
-  }
+    explicit operator const ConstRefCasted &() {
+        value = {4};
+        return value;
+    }
+    explicit operator const ConstRefCasted *() {
+        value = {5};
+        return &value;
+    }
 
-  // custom cast_op to explicitly propagate types to the conversion operators.
-  template <typename T_>
-  using cast_op_type =
-      /// const
-      conditional_t<
-          std::is_same<remove_reference_t<T_>, const ConstRefCasted*>::value, const ConstRefCasted*,
-      conditional_t<
-          std::is_same<T_, const ConstRefCasted&>::value, const ConstRefCasted&,
-      /// non-const
-      conditional_t<
-          std::is_same<remove_reference_t<T_>, ConstRefCasted*>::value, ConstRefCasted*,
-      conditional_t<
-          std::is_same<T_, ConstRefCasted&>::value, ConstRefCasted&,
-          /* else */ConstRefCasted&&>>>>;
+    // custom cast_op to explicitly propagate types to the conversion operators.
+    template <typename T_>
+    using cast_op_type =
+        /// const
+        conditional_t<
+            std::is_same<remove_reference_t<T_>, const ConstRefCasted *>::value,
+            const ConstRefCasted *,
+            conditional_t<
+                std::is_same<T_, const ConstRefCasted &>::value,
+                const ConstRefCasted &,
+                /// non-const
+                conditional_t<std::is_same<remove_reference_t<T_>, ConstRefCasted *>::value,
+                              ConstRefCasted *,
+                              conditional_t<std::is_same<T_, ConstRefCasted &>::value,
+                                            ConstRefCasted &,
+                                            /* else */ ConstRefCasted &&>>>>;
 
- private:
-  ConstRefCasted value = {0};
+private:
+    ConstRefCasted value = {0};
 };
 PYBIND11_NAMESPACE_END(detail)
 PYBIND11_NAMESPACE_END(pybind11)
 
 TEST_SUBMODULE(builtin_casters, m) {
+    PYBIND11_WARNING_PUSH
+    PYBIND11_WARNING_DISABLE_MSVC(4127)
+
     // test_simple_string
     m.def("string_roundtrip", [](const char *s) { return s; });
 
     // test_unicode_conversion
-    // Some test characters in utf16 and utf32 encodings.  The last one (the 𝐀) contains a null byte
-    char32_t a32 = 0x61 /*a*/, z32 = 0x7a /*z*/, ib32 = 0x203d /*‽*/, cake32 = 0x1f382 /*🎂*/,              mathbfA32 = 0x1d400 /*𝐀*/;
-    char16_t b16 = 0x62 /*b*/, z16 = 0x7a,       ib16 = 0x203d,       cake16_1 = 0xd83c, cake16_2 = 0xdf82, mathbfA16_1 = 0xd835, mathbfA16_2 = 0xdc00;
+    // Some test characters in utf16 and utf32 encodings.  The last one (the 𝐀) contains a null
+    // byte
+    char32_t a32 = 0x61 /*a*/, z32 = 0x7a /*z*/, ib32 = 0x203d /*‽*/, cake32 = 0x1f382 /*🎂*/,
+             mathbfA32 = 0x1d400 /*𝐀*/;
+    char16_t b16 = 0x62 /*b*/, z16 = 0x7a, ib16 = 0x203d, cake16_1 = 0xd83c, cake16_2 = 0xdf82,
+             mathbfA16_1 = 0xd835, mathbfA16_2 = 0xdc00;
     std::wstring wstr;
-    wstr.push_back(0x61); // a
+    wstr.push_back(0x61);   // a
     wstr.push_back(0x2e18); // ⸘
-    if (PYBIND11_SILENCE_MSVC_C4127(sizeof(wchar_t) == 2)) { wstr.push_back(mathbfA16_1); wstr.push_back(mathbfA16_2); } // 𝐀, utf16
-    else { wstr.push_back((wchar_t) mathbfA32); } // 𝐀, utf32
+    if (sizeof(wchar_t) == 2) {
+        wstr.push_back(mathbfA16_1);
+        wstr.push_back(mathbfA16_2);
+    } // 𝐀, utf16
+    else {
+        wstr.push_back((wchar_t) mathbfA32);
+    }                     // 𝐀, utf32
     wstr.push_back(0x7a); // z
 
-    m.def("good_utf8_string", []() { return std::string((const char*)u8"Say utf8\u203d \U0001f382 \U0001d400"); }); // Say utf8‽ 🎂 𝐀
-    m.def("good_utf16_string", [=]() { return std::u16string({ b16, ib16, cake16_1, cake16_2, mathbfA16_1, mathbfA16_2, z16 }); }); // b‽🎂𝐀z
-    m.def("good_utf32_string", [=]() { return std::u32string({ a32, mathbfA32, cake32, ib32, z32 }); }); // a𝐀🎂‽z
+    m.def("good_utf8_string", []() {
+        return std::string((const char *) u8"Say utf8\u203d \U0001f382 \U0001d400");
+    }); // Say utf8‽ 🎂 𝐀
+    m.def("good_utf16_string", [=]() {
+        return std::u16string({b16, ib16, cake16_1, cake16_2, mathbfA16_1, mathbfA16_2, z16});
+    }); // b‽🎂𝐀z
+    m.def("good_utf32_string", [=]() {
+        return std::u32string({a32, mathbfA32, cake32, ib32, z32});
+    });                                                 // a𝐀🎂‽z
     m.def("good_wchar_string", [=]() { return wstr; }); // a‽𝐀z
-    m.def("bad_utf8_string", []()  { return std::string("abc\xd0" "def"); });
-    m.def("bad_utf16_string", [=]() { return std::u16string({ b16, char16_t(0xd800), z16 }); });
-#if PY_MAJOR_VERSION >= 3
-    // Under Python 2.7, invalid unicode UTF-32 characters don't appear to trigger UnicodeDecodeError
-    m.def("bad_utf32_string", [=]() { return std::u32string({ a32, char32_t(0xd800), z32 }); });
-    if (PYBIND11_SILENCE_MSVC_C4127(sizeof(wchar_t) == 2))
-        m.def("bad_wchar_string", [=]() { return std::wstring({ wchar_t(0x61), wchar_t(0xd800) }); });
-#endif
+    m.def("bad_utf8_string", []() {
+        return std::string("abc\xd0"
+                           "def");
+    });
+    m.def("bad_utf16_string", [=]() { return std::u16string({b16, char16_t(0xd800), z16}); });
+    // Under Python 2.7, invalid unicode UTF-32 characters didn't appear to trigger
+    // UnicodeDecodeError
+    m.def("bad_utf32_string", [=]() { return std::u32string({a32, char32_t(0xd800), z32}); });
+    if (sizeof(wchar_t) == 2) {
+        m.def("bad_wchar_string", [=]() {
+            return std::wstring({wchar_t(0x61), wchar_t(0xd800)});
+        });
+    }
     m.def("u8_Z", []() -> char { return 'Z'; });
     m.def("u8_eacute", []() -> char { return '\xe9'; });
     m.def("u16_ibang", [=]() -> char16_t { return ib16; });
@@ -117,8 +142,13 @@ TEST_SUBMODULE(builtin_casters, m) {
 
 #ifdef PYBIND11_HAS_U8STRING
     m.attr("has_u8string") = true;
-    m.def("good_utf8_u8string", []() { return std::u8string(u8"Say utf8\u203d \U0001f382 \U0001d400"); }); // Say utf8‽ 🎂 𝐀
-    m.def("bad_utf8_u8string", []()  { return std::u8string((const char8_t*)"abc\xd0" "def"); });
+    m.def("good_utf8_u8string", []() {
+        return std::u8string(u8"Say utf8\u203d \U0001f382 \U0001d400");
+    }); // Say utf8‽ 🎂 𝐀
+    m.def("bad_utf8_u8string", []() {
+        return std::u8string((const char8_t *) "abc\xd0"
+                                               "def");
+    });
 
     m.def("u8_char8_Z", []() -> char8_t { return u8'Z'; });
 
@@ -130,21 +160,72 @@ TEST_SUBMODULE(builtin_casters, m) {
     // test_string_view
 #ifdef PYBIND11_HAS_STRING_VIEW
     m.attr("has_string_view") = true;
-    m.def("string_view_print",   [](std::string_view s)    { py::print(s, s.size()); });
+    m.def("string_view_print", [](std::string_view s) { py::print(s, s.size()); });
     m.def("string_view16_print", [](std::u16string_view s) { py::print(s, s.size()); });
     m.def("string_view32_print", [](std::u32string_view s) { py::print(s, s.size()); });
-    m.def("string_view_chars",   [](std::string_view s)    { py::list l; for (auto c : s) l.append((std::uint8_t) c); return l; });
-    m.def("string_view16_chars", [](std::u16string_view s) { py::list l; for (auto c : s) l.append((int) c); return l; });
-    m.def("string_view32_chars", [](std::u32string_view s) { py::list l; for (auto c : s) l.append((int) c); return l; });
-    m.def("string_view_return",   []() { return std::string_view((const char*)u8"utf8 secret \U0001f382"); });
-    m.def("string_view16_return", []() { return std::u16string_view(u"utf16 secret \U0001f382"); });
-    m.def("string_view32_return", []() { return std::u32string_view(U"utf32 secret \U0001f382"); });
+    m.def("string_view_chars", [](std::string_view s) {
+        py::list l;
+        for (auto c : s) {
+            l.append((std::uint8_t) c);
+        }
+        return l;
+    });
+    m.def("string_view16_chars", [](std::u16string_view s) {
+        py::list l;
+        for (auto c : s) {
+            l.append((int) c);
+        }
+        return l;
+    });
+    m.def("string_view32_chars", [](std::u32string_view s) {
+        py::list l;
+        for (auto c : s) {
+            l.append((int) c);
+        }
+        return l;
+    });
+    m.def("string_view_return",
+          []() { return std::string_view((const char *) u8"utf8 secret \U0001f382"); });
+    m.def("string_view16_return",
+          []() { return std::u16string_view(u"utf16 secret \U0001f382"); });
+    m.def("string_view32_return",
+          []() { return std::u32string_view(U"utf32 secret \U0001f382"); });
 
-#   ifdef PYBIND11_HAS_U8STRING
-    m.def("string_view8_print",  [](std::u8string_view s) { py::print(s, s.size()); });
-    m.def("string_view8_chars",  [](std::u8string_view s) { py::list l; for (auto c : s) l.append((std::uint8_t) c); return l; });
+    // The inner lambdas here are to also test implicit conversion
+    using namespace std::literals;
+    m.def("string_view_bytes",
+          []() { return [](py::bytes b) { return b; }("abc \x80\x80 def"sv); });
+    m.def("string_view_str",
+          []() { return [](py::str s) { return s; }("abc \342\200\275 def"sv); });
+    m.def("string_view_from_bytes",
+          [](const py::bytes &b) { return [](std::string_view s) { return s; }(b); });
+    m.def("string_view_memoryview", []() {
+        static constexpr auto val = "Have some \360\237\216\202"sv;
+        return py::memoryview::from_memory(val);
+    });
+
+#    ifdef PYBIND11_HAS_U8STRING
+    m.def("string_view8_print", [](std::u8string_view s) { py::print(s, s.size()); });
+    m.def("string_view8_chars", [](std::u8string_view s) {
+        py::list l;
+        for (auto c : s)
+            l.append((std::uint8_t) c);
+        return l;
+    });
     m.def("string_view8_return", []() { return std::u8string_view(u8"utf8 secret \U0001f382"); });
-#   endif
+    m.def("string_view8_str", []() { return py::str{std::u8string_view{u8"abc ‽ def"}}; });
+#    endif
+
+    struct TypeWithBothOperatorStringAndStringView {
+        // NOLINTNEXTLINE(google-explicit-constructor)
+        operator std::string() const { return "success"; }
+        // NOLINTNEXTLINE(google-explicit-constructor)
+        operator std::string_view() const { return "failure"; }
+    };
+    m.def("bytes_from_type_with_both_operator_string_and_string_view",
+          []() { return py::bytes(TypeWithBothOperatorStringAndStringView()); });
+    m.def("str_from_type_with_both_operator_string_and_string_view",
+          []() { return py::str(TypeWithBothOperatorStringAndStringView()); });
 #endif
 
     // test_integer_casting
@@ -155,7 +236,8 @@ TEST_SUBMODULE(builtin_casters, m) {
 
     // test_int_convert
     m.def("int_passthrough", [](int arg) { return arg; });
-    m.def("int_passthrough_noconvert", [](int arg) { return arg; }, py::arg{}.noconvert());
+    m.def(
+        "int_passthrough_noconvert", [](int arg) { return arg; }, py::arg{}.noconvert());
 
     // test_tuple
     m.def(
@@ -164,31 +246,45 @@ TEST_SUBMODULE(builtin_casters, m) {
             return std::make_pair(input.second, input.first);
         },
         "Return a pair in reversed order");
-    m.def("tuple_passthrough", [](std::tuple<bool, std::string, int> input) {
-        return std::make_tuple(std::get<2>(input), std::get<1>(input), std::get<0>(input));
-    }, "Return a triple in reversed order");
+    m.def(
+        "tuple_passthrough",
+        [](std::tuple<bool, std::string, int> input) {
+            return std::make_tuple(std::get<2>(input), std::get<1>(input), std::get<0>(input));
+        },
+        "Return a triple in reversed order");
     m.def("empty_tuple", []() { return std::tuple<>(); });
     static std::pair<RValueCaster, RValueCaster> lvpair;
     static std::tuple<RValueCaster, RValueCaster, RValueCaster> lvtuple;
-    static std::pair<RValueCaster, std::tuple<RValueCaster, std::pair<RValueCaster, RValueCaster>>> lvnested;
+    static std::pair<RValueCaster, std::tuple<RValueCaster, std::pair<RValueCaster, RValueCaster>>>
+        lvnested;
     m.def("rvalue_pair", []() { return std::make_pair(RValueCaster{}, RValueCaster{}); });
     m.def("lvalue_pair", []() -> const decltype(lvpair) & { return lvpair; });
-    m.def("rvalue_tuple", []() { return std::make_tuple(RValueCaster{}, RValueCaster{}, RValueCaster{}); });
+    m.def("rvalue_tuple",
+          []() { return std::make_tuple(RValueCaster{}, RValueCaster{}, RValueCaster{}); });
     m.def("lvalue_tuple", []() -> const decltype(lvtuple) & { return lvtuple; });
     m.def("rvalue_nested", []() {
-        return std::make_pair(RValueCaster{}, std::make_tuple(RValueCaster{}, std::make_pair(RValueCaster{}, RValueCaster{}))); });
+        return std::make_pair(
+            RValueCaster{},
+            std::make_tuple(RValueCaster{}, std::make_pair(RValueCaster{}, RValueCaster{})));
+    });
     m.def("lvalue_nested", []() -> const decltype(lvnested) & { return lvnested; });
 
-    static std::pair<int, std::string> int_string_pair{2, "items"};
-    m.def("int_string_pair", []() { return &int_string_pair; });
+    m.def(
+        "int_string_pair",
+        []() {
+            // Using no-destructor idiom to side-step warnings from overzealous compilers.
+            static auto *int_string_pair = new std::pair<int, std::string>{2, "items"};
+            return int_string_pair;
+        },
+        py::return_value_policy::reference);
 
     // test_builtins_cast_return_none
     m.def("return_none_string", []() -> std::string * { return nullptr; });
-    m.def("return_none_char",   []() -> const char *  { return nullptr; });
-    m.def("return_none_bool",   []() -> bool *        { return nullptr; });
-    m.def("return_none_int",    []() -> int *         { return nullptr; });
-    m.def("return_none_float",  []() -> float *       { return nullptr; });
-    m.def("return_none_pair",   []() -> std::pair<int,int> * { return nullptr; });
+    m.def("return_none_char", []() -> const char * { return nullptr; });
+    m.def("return_none_bool", []() -> bool * { return nullptr; });
+    m.def("return_none_int", []() -> int * { return nullptr; });
+    m.def("return_none_float", []() -> float * { return nullptr; });
+    m.def("return_none_pair", []() -> std::pair<int, int> * { return nullptr; });
 
     // test_none_deferred
     m.def("defer_none_cstring", [](char *) { return false; });
@@ -206,7 +302,8 @@ TEST_SUBMODULE(builtin_casters, m) {
 
     // test_bool_caster
     m.def("bool_passthrough", [](bool arg) { return arg; });
-    m.def("bool_passthrough_noconvert", [](bool arg) { return arg; }, py::arg{}.noconvert());
+    m.def(
+        "bool_passthrough_noconvert", [](bool arg) { return arg; }, py::arg{}.noconvert());
 
     // TODO: This should be disabled and fixed in future Intel compilers
 #if !defined(__INTEL_COMPILER)
@@ -214,13 +311,15 @@ TEST_SUBMODULE(builtin_casters, m) {
     // When compiled with the Intel compiler, this results in segmentation faults when importing
     // the module. Tested with icc (ICC) 2021.1 Beta 20200827, this should be tested again when
     // a newer version of icc is available.
-    m.def("bool_passthrough_noconvert2", [](bool arg) { return arg; }, py::arg().noconvert());
+    m.def(
+        "bool_passthrough_noconvert2", [](bool arg) { return arg; }, py::arg().noconvert());
 #endif
 
     // test_reference_wrapper
     m.def("refwrap_builtin", [](std::reference_wrapper<int> p) { return 10 * p.get(); });
     m.def("refwrap_usertype", [](std::reference_wrapper<UserType> p) { return p.get().value(); });
-    m.def("refwrap_usertype_const", [](std::reference_wrapper<const UserType> p) { return p.get().value(); });
+    m.def("refwrap_usertype_const",
+          [](std::reference_wrapper<const UserType> p) { return p.get().value(); });
 
     m.def("refwrap_lvalue", []() -> std::reference_wrapper<UserType> {
         static UserType x(1);
@@ -233,17 +332,20 @@ TEST_SUBMODULE(builtin_casters, m) {
 
     // Not currently supported (std::pair caster has return-by-value cast operator);
     // triggers static_assert failure.
-    //m.def("refwrap_pair", [](std::reference_wrapper<std::pair<int, int>>) { });
+    // m.def("refwrap_pair", [](std::reference_wrapper<std::pair<int, int>>) { });
 
-    m.def("refwrap_list", [](bool copy) {
-        static IncType x1(1), x2(2);
-        py::list l;
-        for (auto &f : {std::ref(x1), std::ref(x2)}) {
-            l.append(py::cast(f, copy ? py::return_value_policy::copy
-                                      : py::return_value_policy::reference));
-        }
-        return l;
-    }, "copy"_a);
+    m.def(
+        "refwrap_list",
+        [](bool copy) {
+            static IncType x1(1), x2(2);
+            py::list l;
+            for (const auto &f : {std::ref(x1), std::ref(x2)}) {
+                l.append(py::cast(
+                    f, copy ? py::return_value_policy::copy : py::return_value_policy::reference));
+            }
+            return l;
+        },
+        "copy"_a);
 
     m.def("refwrap_iiw", [](const IncType &w) { return w.value(); });
     m.def("refwrap_call_iiw", [](IncType &w, const py::function &f) {
@@ -260,12 +362,13 @@ TEST_SUBMODULE(builtin_casters, m) {
 
     // test_complex
     m.def("complex_cast", [](float x) { return "{}"_s.format(x); });
-    m.def("complex_cast", [](std::complex<float> x) { return "({}, {})"_s.format(x.real(), x.imag()); });
+    m.def("complex_cast",
+          [](std::complex<float> x) { return "({}, {})"_s.format(x.real(), x.imag()); });
 
     // test int vs. long (Python 2)
-    m.def("int_cast", []() {return (int) 42;});
-    m.def("long_cast", []() {return (long) 42;});
-    m.def("longlong_cast", []() {return  ULLONG_MAX;});
+    m.def("int_cast", []() { return (int) 42; });
+    m.def("long_cast", []() { return (long) 42; });
+    m.def("longlong_cast", []() { return ULLONG_MAX; });
 
     /// test void* cast operator
     m.def("test_void_caster", []() -> bool {
@@ -276,11 +379,14 @@ TEST_SUBMODULE(builtin_casters, m) {
 
     // Tests const/non-const propagation in cast_op.
     m.def("takes", [](ConstRefCasted x) { return x.tag; });
-    m.def("takes_move", [](ConstRefCasted&& x) { return x.tag; });
-    m.def("takes_ptr", [](ConstRefCasted* x) { return x->tag; });
-    m.def("takes_ref", [](ConstRefCasted& x) { return x.tag; });
+    m.def("takes_move", [](ConstRefCasted &&x) { return x.tag; });
+    m.def("takes_ptr", [](ConstRefCasted *x) { return x->tag; });
+    m.def("takes_ref", [](ConstRefCasted &x) { return x.tag; });
     m.def("takes_ref_wrap", [](std::reference_wrapper<ConstRefCasted> x) { return x.get().tag; });
-    m.def("takes_const_ptr", [](const ConstRefCasted* x) { return x->tag; });
-    m.def("takes_const_ref", [](const ConstRefCasted& x) { return x.tag; });
-    m.def("takes_const_ref_wrap", [](std::reference_wrapper<const ConstRefCasted> x) { return x.get().tag; });
+    m.def("takes_const_ptr", [](const ConstRefCasted *x) { return x->tag; });
+    m.def("takes_const_ref", [](const ConstRefCasted &x) { return x.tag; });
+    m.def("takes_const_ref_wrap",
+          [](std::reference_wrapper<const ConstRefCasted> x) { return x.get().tag; });
+
+    PYBIND11_WARNING_POP
 }
diff --git a/ext/pybind11/tests/test_builtin_casters.py b/ext/pybind11/tests/test_builtin_casters.py
index 2a061c193e..d38ae68028 100644
--- a/ext/pybind11/tests/test_builtin_casters.py
+++ b/ext/pybind11/tests/test_builtin_casters.py
@@ -1,4 +1,5 @@
-# -*- coding: utf-8 -*-
+import sys
+
 import pytest
 
 import env
@@ -12,12 +13,12 @@ def test_simple_string():
 
 def test_unicode_conversion():
     """Tests unicode conversion and error reporting."""
-    assert m.good_utf8_string() == u"Say utf8‽ 🎂 𝐀"
-    assert m.good_utf16_string() == u"b‽🎂𝐀z"
-    assert m.good_utf32_string() == u"a𝐀🎂‽z"
-    assert m.good_wchar_string() == u"a⸘𝐀z"
+    assert m.good_utf8_string() == "Say utf8‽ 🎂 𝐀"
+    assert m.good_utf16_string() == "b‽🎂𝐀z"
+    assert m.good_utf32_string() == "a𝐀🎂‽z"
+    assert m.good_wchar_string() == "a⸘𝐀z"
     if hasattr(m, "has_u8string"):
-        assert m.good_utf8_u8string() == u"Say utf8‽ 🎂 𝐀"
+        assert m.good_utf8_u8string() == "Say utf8‽ 🎂 𝐀"
 
     with pytest.raises(UnicodeDecodeError):
         m.bad_utf8_string()
@@ -25,7 +26,7 @@ def test_unicode_conversion():
     with pytest.raises(UnicodeDecodeError):
         m.bad_utf16_string()
 
-    # These are provided only if they actually fail (they don't when 32-bit and under Python 2.7)
+    # These are provided only if they actually fail (they don't when 32-bit)
     if hasattr(m, "bad_utf32_string"):
         with pytest.raises(UnicodeDecodeError):
             m.bad_utf32_string()
@@ -37,10 +38,10 @@ def test_unicode_conversion():
             m.bad_utf8_u8string()
 
     assert m.u8_Z() == "Z"
-    assert m.u8_eacute() == u"é"
-    assert m.u16_ibang() == u"‽"
-    assert m.u32_mathbfA() == u"𝐀"
-    assert m.wchar_heart() == u"♥"
+    assert m.u8_eacute() == "é"
+    assert m.u16_ibang() == "‽"
+    assert m.u32_mathbfA() == "𝐀"
+    assert m.wchar_heart() == "♥"
     if hasattr(m, "has_u8string"):
         assert m.u8_char8_Z() == "Z"
 
@@ -49,72 +50,72 @@ def test_single_char_arguments():
     """Tests failures for passing invalid inputs to char-accepting functions"""
 
     def toobig_message(r):
-        return "Character code point not in range({:#x})".format(r)
+        return f"Character code point not in range({r:#x})"
 
     toolong_message = "Expected a character, but multi-character string found"
 
-    assert m.ord_char(u"a") == 0x61  # simple ASCII
-    assert m.ord_char_lv(u"b") == 0x62
+    assert m.ord_char("a") == 0x61  # simple ASCII
+    assert m.ord_char_lv("b") == 0x62
     assert (
-        m.ord_char(u"é") == 0xE9
+        m.ord_char("é") == 0xE9
     )  # requires 2 bytes in utf-8, but can be stuffed in a char
     with pytest.raises(ValueError) as excinfo:
-        assert m.ord_char(u"Ā") == 0x100  # requires 2 bytes, doesn't fit in a char
+        assert m.ord_char("Ā") == 0x100  # requires 2 bytes, doesn't fit in a char
     assert str(excinfo.value) == toobig_message(0x100)
     with pytest.raises(ValueError) as excinfo:
-        assert m.ord_char(u"ab")
+        assert m.ord_char("ab")
     assert str(excinfo.value) == toolong_message
 
-    assert m.ord_char16(u"a") == 0x61
-    assert m.ord_char16(u"é") == 0xE9
-    assert m.ord_char16_lv(u"ê") == 0xEA
-    assert m.ord_char16(u"Ā") == 0x100
-    assert m.ord_char16(u"‽") == 0x203D
-    assert m.ord_char16(u"♥") == 0x2665
-    assert m.ord_char16_lv(u"♡") == 0x2661
+    assert m.ord_char16("a") == 0x61
+    assert m.ord_char16("é") == 0xE9
+    assert m.ord_char16_lv("ê") == 0xEA
+    assert m.ord_char16("Ā") == 0x100
+    assert m.ord_char16("‽") == 0x203D
+    assert m.ord_char16("♥") == 0x2665
+    assert m.ord_char16_lv("♡") == 0x2661
     with pytest.raises(ValueError) as excinfo:
-        assert m.ord_char16(u"🎂") == 0x1F382  # requires surrogate pair
+        assert m.ord_char16("🎂") == 0x1F382  # requires surrogate pair
     assert str(excinfo.value) == toobig_message(0x10000)
     with pytest.raises(ValueError) as excinfo:
-        assert m.ord_char16(u"aa")
+        assert m.ord_char16("aa")
     assert str(excinfo.value) == toolong_message
 
-    assert m.ord_char32(u"a") == 0x61
-    assert m.ord_char32(u"é") == 0xE9
-    assert m.ord_char32(u"Ā") == 0x100
-    assert m.ord_char32(u"‽") == 0x203D
-    assert m.ord_char32(u"♥") == 0x2665
-    assert m.ord_char32(u"🎂") == 0x1F382
+    assert m.ord_char32("a") == 0x61
+    assert m.ord_char32("é") == 0xE9
+    assert m.ord_char32("Ā") == 0x100
+    assert m.ord_char32("‽") == 0x203D
+    assert m.ord_char32("♥") == 0x2665
+    assert m.ord_char32("🎂") == 0x1F382
     with pytest.raises(ValueError) as excinfo:
-        assert m.ord_char32(u"aa")
+        assert m.ord_char32("aa")
     assert str(excinfo.value) == toolong_message
 
-    assert m.ord_wchar(u"a") == 0x61
-    assert m.ord_wchar(u"é") == 0xE9
-    assert m.ord_wchar(u"Ā") == 0x100
-    assert m.ord_wchar(u"‽") == 0x203D
-    assert m.ord_wchar(u"♥") == 0x2665
+    assert m.ord_wchar("a") == 0x61
+    assert m.ord_wchar("é") == 0xE9
+    assert m.ord_wchar("Ā") == 0x100
+    assert m.ord_wchar("‽") == 0x203D
+    assert m.ord_wchar("♥") == 0x2665
     if m.wchar_size == 2:
         with pytest.raises(ValueError) as excinfo:
-            assert m.ord_wchar(u"🎂") == 0x1F382  # requires surrogate pair
+            assert m.ord_wchar("🎂") == 0x1F382  # requires surrogate pair
         assert str(excinfo.value) == toobig_message(0x10000)
     else:
-        assert m.ord_wchar(u"🎂") == 0x1F382
+        assert m.ord_wchar("🎂") == 0x1F382
     with pytest.raises(ValueError) as excinfo:
-        assert m.ord_wchar(u"aa")
+        assert m.ord_wchar("aa")
     assert str(excinfo.value) == toolong_message
 
     if hasattr(m, "has_u8string"):
-        assert m.ord_char8(u"a") == 0x61  # simple ASCII
-        assert m.ord_char8_lv(u"b") == 0x62
+        assert m.ord_char8("a") == 0x61  # simple ASCII
+        assert m.ord_char8_lv("b") == 0x62
         assert (
-            m.ord_char8(u"é") == 0xE9
+            m.ord_char8("é") == 0xE9
         )  # requires 2 bytes in utf-8, but can be stuffed in a char
         with pytest.raises(ValueError) as excinfo:
-            assert m.ord_char8(u"Ā") == 0x100  # requires 2 bytes, doesn't fit in a char
+            assert m.ord_char8("Ā") == 0x100  # requires 2 bytes, doesn't fit in a char
         assert str(excinfo.value) == toobig_message(0x100)
         with pytest.raises(ValueError) as excinfo:
-            assert m.ord_char8(u"ab")
+            assert m.ord_char8("ab")
         assert str(excinfo.value) == toolong_message
 
 
@@ -123,18 +124,22 @@ def test_bytes_to_string():
     one-way: the only way to return bytes to Python is via the pybind11::bytes class."""
     # Issue #816
 
-    def to_bytes(s):
-        b = s if env.PY2 else s.encode("utf8")
-        assert isinstance(b, bytes)
-        return b
-
-    assert m.strlen(to_bytes("hi")) == 2
-    assert m.string_length(to_bytes("world")) == 5
-    assert m.string_length(to_bytes("a\x00b")) == 3
-    assert m.strlen(to_bytes("a\x00b")) == 1  # C-string limitation
+    assert m.strlen(b"hi") == 2
+    assert m.string_length(b"world") == 5
+    assert m.string_length("a\x00b".encode()) == 3
+    assert m.strlen("a\x00b".encode()) == 1  # C-string limitation
 
     # passing in a utf8 encoded string should work
-    assert m.string_length(u"💩".encode("utf8")) == 4
+    assert m.string_length("💩".encode()) == 4
+
+
+def test_bytearray_to_string():
+    """Tests the ability to pass bytearray to C++ string-accepting functions"""
+    assert m.string_length(bytearray(b"Hi")) == 2
+    assert m.strlen(bytearray(b"bytearray")) == 9
+    assert m.string_length(bytearray()) == 0
+    assert m.string_length(bytearray("🦜", "utf-8", "strict")) == 4
+    assert m.string_length(bytearray(b"\x80")) == 1
 
 
 @pytest.mark.skipif(not hasattr(m, "has_string_view"), reason="no <string_view>")
@@ -142,26 +147,26 @@ def test_string_view(capture):
     """Tests support for C++17 string_view arguments and return values"""
     assert m.string_view_chars("Hi") == [72, 105]
     assert m.string_view_chars("Hi 🎂") == [72, 105, 32, 0xF0, 0x9F, 0x8E, 0x82]
-    assert m.string_view16_chars(u"Hi 🎂") == [72, 105, 32, 0xD83C, 0xDF82]
-    assert m.string_view32_chars(u"Hi 🎂") == [72, 105, 32, 127874]
+    assert m.string_view16_chars("Hi 🎂") == [72, 105, 32, 0xD83C, 0xDF82]
+    assert m.string_view32_chars("Hi 🎂") == [72, 105, 32, 127874]
     if hasattr(m, "has_u8string"):
         assert m.string_view8_chars("Hi") == [72, 105]
-        assert m.string_view8_chars(u"Hi 🎂") == [72, 105, 32, 0xF0, 0x9F, 0x8E, 0x82]
+        assert m.string_view8_chars("Hi 🎂") == [72, 105, 32, 0xF0, 0x9F, 0x8E, 0x82]
 
-    assert m.string_view_return() == u"utf8 secret 🎂"
-    assert m.string_view16_return() == u"utf16 secret 🎂"
-    assert m.string_view32_return() == u"utf32 secret 🎂"
+    assert m.string_view_return() == "utf8 secret 🎂"
+    assert m.string_view16_return() == "utf16 secret 🎂"
+    assert m.string_view32_return() == "utf32 secret 🎂"
     if hasattr(m, "has_u8string"):
-        assert m.string_view8_return() == u"utf8 secret 🎂"
+        assert m.string_view8_return() == "utf8 secret 🎂"
 
     with capture:
         m.string_view_print("Hi")
         m.string_view_print("utf8 🎂")
-        m.string_view16_print(u"utf16 🎂")
-        m.string_view32_print(u"utf32 🎂")
+        m.string_view16_print("utf16 🎂")
+        m.string_view32_print("utf32 🎂")
     assert (
         capture
-        == u"""
+        == """
         Hi 2
         utf8 🎂 9
         utf16 🎂 8
@@ -171,10 +176,10 @@ def test_string_view(capture):
     if hasattr(m, "has_u8string"):
         with capture:
             m.string_view8_print("Hi")
-            m.string_view8_print(u"utf8 🎂")
+            m.string_view8_print("utf8 🎂")
         assert (
             capture
-            == u"""
+            == """
             Hi 2
             utf8 🎂 9
         """
@@ -183,11 +188,11 @@ def test_string_view(capture):
     with capture:
         m.string_view_print("Hi, ascii")
         m.string_view_print("Hi, utf8 🎂")
-        m.string_view16_print(u"Hi, utf16 🎂")
-        m.string_view32_print(u"Hi, utf32 🎂")
+        m.string_view16_print("Hi, utf16 🎂")
+        m.string_view32_print("Hi, utf32 🎂")
     assert (
         capture
-        == u"""
+        == """
         Hi, ascii 9
         Hi, utf8 🎂 13
         Hi, utf16 🎂 12
@@ -197,15 +202,25 @@ def test_string_view(capture):
     if hasattr(m, "has_u8string"):
         with capture:
             m.string_view8_print("Hi, ascii")
-            m.string_view8_print(u"Hi, utf8 🎂")
+            m.string_view8_print("Hi, utf8 🎂")
         assert (
             capture
-            == u"""
+            == """
             Hi, ascii 9
             Hi, utf8 🎂 13
         """
         )
 
+    assert m.string_view_bytes() == b"abc \x80\x80 def"
+    assert m.string_view_str() == "abc ‽ def"
+    assert m.string_view_from_bytes("abc ‽ def".encode()) == "abc ‽ def"
+    if hasattr(m, "has_u8string"):
+        assert m.string_view8_str() == "abc ‽ def"
+    assert m.string_view_memoryview() == "Have some 🎂".encode()
+
+    assert m.bytes_from_type_with_both_operator_string_and_string_view() == b"success"
+    assert m.str_from_type_with_both_operator_string_and_string_view() == "success"
+
 
 def test_integer_casting():
     """Issue #929 - out-of-range integer values shouldn't be accepted"""
@@ -213,20 +228,8 @@ def test_integer_casting():
     assert m.i64_str(-1) == "-1"
     assert m.i32_str(2000000000) == "2000000000"
     assert m.u32_str(2000000000) == "2000000000"
-    if env.PY2:
-        assert m.i32_str(long(-1)) == "-1"  # noqa: F821 undefined name 'long'
-        assert m.i64_str(long(-1)) == "-1"  # noqa: F821 undefined name 'long'
-        assert (
-            m.i64_str(long(-999999999999))  # noqa: F821 undefined name 'long'
-            == "-999999999999"
-        )
-        assert (
-            m.u64_str(long(999999999999))  # noqa: F821 undefined name 'long'
-            == "999999999999"
-        )
-    else:
-        assert m.i64_str(-999999999999) == "-999999999999"
-        assert m.u64_str(999999999999) == "999999999999"
+    assert m.i64_str(-999999999999) == "-999999999999"
+    assert m.u64_str(999999999999) == "999999999999"
 
     with pytest.raises(TypeError) as excinfo:
         m.u32_str(-1)
@@ -241,46 +244,38 @@ def test_integer_casting():
         m.i32_str(3000000000)
     assert "incompatible function arguments" in str(excinfo.value)
 
-    if env.PY2:
-        with pytest.raises(TypeError) as excinfo:
-            m.u32_str(long(-1))  # noqa: F821 undefined name 'long'
-        assert "incompatible function arguments" in str(excinfo.value)
-        with pytest.raises(TypeError) as excinfo:
-            m.u64_str(long(-1))  # noqa: F821 undefined name 'long'
-        assert "incompatible function arguments" in str(excinfo.value)
-
 
 def test_int_convert():
-    class Int(object):
+    class Int:
         def __int__(self):
             return 42
 
-    class NotInt(object):
+    class NotInt:
         pass
 
-    class Float(object):
+    class Float:
         def __float__(self):
             return 41.99999
 
-    class Index(object):
+    class Index:
         def __index__(self):
             return 42
 
-    class IntAndIndex(object):
+    class IntAndIndex:
         def __int__(self):
             return 42
 
         def __index__(self):
             return 0
 
-    class RaisingTypeErrorOnIndex(object):
+    class RaisingTypeErrorOnIndex:
         def __index__(self):
             raise TypeError
 
         def __int__(self):
             return 42
 
-    class RaisingValueErrorOnIndex(object):
+    class RaisingValueErrorOnIndex:
         def __index__(self):
             raise ValueError
 
@@ -299,7 +294,8 @@ def test_int_convert():
     assert noconvert(7) == 7
     cant_convert(3.14159)
     # TODO: Avoid DeprecationWarning in `PyLong_AsLong` (and similar)
-    if (3, 8) <= env.PY < (3, 10):
+    # TODO: PyPy 3.8 does not behave like CPython 3.8 here yet (7.3.7)
+    if (3, 8) <= sys.version_info < (3, 10) and env.CPYTHON:
         with env.deprecated_call():
             assert convert(Int()) == 42
     else:
@@ -334,7 +330,9 @@ def test_numpy_int_convert():
 
     # The implicit conversion from np.float32 is undesirable but currently accepted.
     # TODO: Avoid DeprecationWarning in `PyLong_AsLong` (and similar)
-    if (3, 8) <= env.PY < (3, 10):
+    # TODO: PyPy 3.8 does not behave like CPython 3.8 here yet (7.3.7)
+    # https://github.com/pybind/pybind11/issues/3408
+    if (3, 8) <= sys.version_info < (3, 10) and env.CPYTHON:
         with env.deprecated_call():
             assert convert(np.float32(3.14159)) == 3
     else:
@@ -461,7 +459,7 @@ def test_bool_caster():
     require_implicit(None)
     assert convert(None) is False
 
-    class A(object):
+    class A:
         def __init__(self, x):
             self.x = x
 
@@ -471,7 +469,7 @@ def test_bool_caster():
         def __bool__(self):
             return self.x
 
-    class B(object):
+    class B:
         pass
 
     # Arbitrary objects are not accepted
@@ -501,17 +499,9 @@ def test_numpy_bool():
 
 
 def test_int_long():
-    """In Python 2, a C++ int should return a Python int rather than long
-    if possible: longs are not always accepted where ints are used (such
-    as the argument to sys.exit()). A C++ long long is always a Python
-    long."""
-
-    import sys
-
-    must_be_long = type(getattr(sys, "maxint", 1) + 1)
     assert isinstance(m.int_cast(), int)
     assert isinstance(m.long_cast(), int)
-    assert isinstance(m.longlong_cast(), must_be_long)
+    assert isinstance(m.longlong_cast(), int)
 
 
 def test_void_caster_2():
diff --git a/ext/pybind11/tests/test_call_policies.cpp b/ext/pybind11/tests/test_call_policies.cpp
index 7cb98d0d86..d177008cfe 100644
--- a/ext/pybind11/tests/test_call_policies.cpp
+++ b/ext/pybind11/tests/test_call_policies.cpp
@@ -40,18 +40,17 @@ TEST_SUBMODULE(call_policies, m) {
         Child(Child &&) = default;
         ~Child() { py::print("Releasing child."); }
     };
-    py::class_<Child>(m, "Child")
-        .def(py::init<>());
+    py::class_<Child>(m, "Child").def(py::init<>());
 
     class Parent {
     public:
         Parent() { py::print("Allocating parent."); }
-        Parent(const Parent& parent) = default;
+        Parent(const Parent &parent) = default;
         ~Parent() { py::print("Releasing parent."); }
-        void addChild(Child *) { }
+        void addChild(Child *) {}
         Child *returnChild() { return new Child(); }
         Child *returnNullChild() { return nullptr; }
-        static Child *staticFunction(Parent*) { return new Child(); }
+        static Child *staticFunction(Parent *) { return new Child(); }
     };
     py::class_<Parent>(m, "Parent")
         .def(py::init<>())
@@ -62,11 +61,12 @@ TEST_SUBMODULE(call_policies, m) {
         .def("returnChildKeepAlive", &Parent::returnChild, py::keep_alive<1, 0>())
         .def("returnNullChildKeepAliveChild", &Parent::returnNullChild, py::keep_alive<1, 0>())
         .def("returnNullChildKeepAliveParent", &Parent::returnNullChild, py::keep_alive<0, 1>())
-        .def_static(
-            "staticFunction", &Parent::staticFunction, py::keep_alive<1, 0>());
+        .def_static("staticFunction", &Parent::staticFunction, py::keep_alive<1, 0>());
 
-    m.def("free_function", [](Parent*, Child*) {}, py::keep_alive<1, 2>());
-    m.def("invalid_arg_index", []{}, py::keep_alive<0, 1>());
+    m.def(
+        "free_function", [](Parent *, Child *) {}, py::keep_alive<1, 2>());
+    m.def(
+        "invalid_arg_index", [] {}, py::keep_alive<0, 1>());
 
 #if !defined(PYPY_VERSION)
     // test_alive_gc
@@ -74,29 +74,37 @@ TEST_SUBMODULE(call_policies, m) {
     public:
         using Parent::Parent;
     };
-    py::class_<ParentGC, Parent>(m, "ParentGC", py::dynamic_attr())
-        .def(py::init<>());
+    py::class_<ParentGC, Parent>(m, "ParentGC", py::dynamic_attr()).def(py::init<>());
 #endif
 
     // test_call_guard
     m.def("unguarded_call", &CustomGuard::report_status);
     m.def("guarded_call", &CustomGuard::report_status, py::call_guard<CustomGuard>());
 
-    m.def("multiple_guards_correct_order", []() {
-        return CustomGuard::report_status() + std::string(" & ") + DependentGuard::report_status();
-    }, py::call_guard<CustomGuard, DependentGuard>());
+    m.def(
+        "multiple_guards_correct_order",
+        []() {
+            return CustomGuard::report_status() + std::string(" & ")
+                   + DependentGuard::report_status();
+        },
+        py::call_guard<CustomGuard, DependentGuard>());
 
-    m.def("multiple_guards_wrong_order", []() {
-        return DependentGuard::report_status() + std::string(" & ") + CustomGuard::report_status();
-    }, py::call_guard<DependentGuard, CustomGuard>());
+    m.def(
+        "multiple_guards_wrong_order",
+        []() {
+            return DependentGuard::report_status() + std::string(" & ")
+                   + CustomGuard::report_status();
+        },
+        py::call_guard<DependentGuard, CustomGuard>());
 
 #if defined(WITH_THREAD) && !defined(PYPY_VERSION)
     // `py::call_guard<py::gil_scoped_release>()` should work in PyPy as well,
     // but it's unclear how to test it without `PyGILState_GetThisThreadState`.
     auto report_gil_status = []() {
         auto is_gil_held = false;
-        if (auto tstate = py::detail::get_thread_state_unchecked())
+        if (auto *tstate = py::detail::get_thread_state_unchecked()) {
             is_gil_held = (tstate == PyGILState_GetThisThreadState());
+        }
 
         return is_gil_held ? "GIL held" : "GIL released";
     };
diff --git a/ext/pybind11/tests/test_call_policies.py b/ext/pybind11/tests/test_call_policies.py
index 3599cf81af..6160564123 100644
--- a/ext/pybind11/tests/test_call_policies.py
+++ b/ext/pybind11/tests/test_call_policies.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 import pytest
 
 import env  # noqa: F401
diff --git a/ext/pybind11/tests/test_callbacks.cpp b/ext/pybind11/tests/test_callbacks.cpp
index 58688b6e8b..2fd05dec72 100644
--- a/ext/pybind11/tests/test_callbacks.cpp
+++ b/ext/pybind11/tests/test_callbacks.cpp
@@ -7,11 +7,12 @@
     BSD-style license that can be found in the LICENSE file.
 */
 
-#include "pybind11_tests.h"
-#include "constructor_stats.h"
 #include <pybind11/functional.h>
-#include <thread>
 
+#include "constructor_stats.h"
+#include "pybind11_tests.h"
+
+#include <thread>
 
 int dummy_function(int i) { return i + 1; }
 
@@ -20,11 +21,12 @@ TEST_SUBMODULE(callbacks, m) {
     m.def("test_callback1", [](const py::object &func) { return func(); });
     m.def("test_callback2", [](const py::object &func) { return func("Hello", 'x', true, 5); });
     m.def("test_callback3", [](const std::function<int(int)> &func) {
-        return "func(43) = " + std::to_string(func(43)); });
-    m.def("test_callback4", []() -> std::function<int(int)> { return [](int i) { return i+1; }; });
-    m.def("test_callback5", []() {
-        return py::cpp_function([](int i) { return i+1; }, py::arg("number"));
+        return "func(43) = " + std::to_string(func(43));
     });
+    m.def("test_callback4",
+          []() -> std::function<int(int)> { return [](int i) { return i + 1; }; });
+    m.def("test_callback5",
+          []() { return py::cpp_function([](int i) { return i + 1; }, py::arg("number")); });
 
     // test_keyword_args_and_generalized_unpacking
     m.def("test_tuple_unpacking", [](const py::function &f) {
@@ -34,9 +36,9 @@ TEST_SUBMODULE(callbacks, m) {
     });
 
     m.def("test_dict_unpacking", [](const py::function &f) {
-        auto d1 = py::dict("key"_a="value", "a"_a=1);
+        auto d1 = py::dict("key"_a = "value", "a"_a = 1);
         auto d2 = py::dict();
-        auto d3 = py::dict("b"_a=2);
+        auto d3 = py::dict("b"_a = 2);
         return f("positional", 1, **d1, **d2, **d3);
     });
 
@@ -44,32 +46,40 @@ TEST_SUBMODULE(callbacks, m) {
 
     m.def("test_unpacking_and_keywords1", [](const py::function &f) {
         auto args = py::make_tuple(2);
-        auto kwargs = py::dict("d"_a=4);
-        return f(1, *args, "c"_a=3, **kwargs);
+        auto kwargs = py::dict("d"_a = 4);
+        return f(1, *args, "c"_a = 3, **kwargs);
     });
 
     m.def("test_unpacking_and_keywords2", [](const py::function &f) {
-        auto kwargs1 = py::dict("a"_a=1);
-        auto kwargs2 = py::dict("c"_a=3, "d"_a=4);
-        return f("positional", *py::make_tuple(1), 2, *py::make_tuple(3, 4), 5,
-                 "key"_a="value", **kwargs1, "b"_a=2, **kwargs2, "e"_a=5);
+        auto kwargs1 = py::dict("a"_a = 1);
+        auto kwargs2 = py::dict("c"_a = 3, "d"_a = 4);
+        return f("positional",
+                 *py::make_tuple(1),
+                 2,
+                 *py::make_tuple(3, 4),
+                 5,
+                 "key"_a = "value",
+                 **kwargs1,
+                 "b"_a = 2,
+                 **kwargs2,
+                 "e"_a = 5);
     });
 
     m.def("test_unpacking_error1", [](const py::function &f) {
-        auto kwargs = py::dict("x"_a=3);
-        return f("x"_a=1, "y"_a=2, **kwargs); // duplicate ** after keyword
+        auto kwargs = py::dict("x"_a = 3);
+        return f("x"_a = 1, "y"_a = 2, **kwargs); // duplicate ** after keyword
     });
 
     m.def("test_unpacking_error2", [](const py::function &f) {
-        auto kwargs = py::dict("x"_a=3);
-        return f(**kwargs, "x"_a=1); // duplicate keyword after **
+        auto kwargs = py::dict("x"_a = 3);
+        return f(**kwargs, "x"_a = 1); // duplicate keyword after **
     });
 
     m.def("test_arg_conversion_error1",
           [](const py::function &f) { f(234, UnregisteredType(), "kw"_a = 567); });
 
     m.def("test_arg_conversion_error2", [](const py::function &f) {
-        f(234, "expected_name"_a=UnregisteredType(), "kw"_a=567);
+        f(234, "expected_name"_a = UnregisteredType(), "kw"_a = 567);
     });
 
     // test_lambda_closure_cleanup
@@ -136,14 +146,19 @@ TEST_SUBMODULE(callbacks, m) {
     m.def("dummy_function_overloaded", [](int i, int j) { return i + j; });
     m.def("dummy_function_overloaded", &dummy_function);
     m.def("dummy_function2", [](int i, int j) { return i + j; });
-    m.def("roundtrip", [](std::function<int(int)> f, bool expect_none = false) {
-        if (expect_none && f)
-            throw std::runtime_error("Expected None to be converted to empty std::function");
-        return f;
-    }, py::arg("f"), py::arg("expect_none")=false);
+    m.def(
+        "roundtrip",
+        [](std::function<int(int)> f, bool expect_none = false) {
+            if (expect_none && f) {
+                throw std::runtime_error("Expected None to be converted to empty std::function");
+            }
+            return f;
+        },
+        py::arg("f"),
+        py::arg("expect_none") = false);
     m.def("test_dummy_function", [](const std::function<int(int)> &f) -> std::string {
         using fn_type = int (*)(int);
-        auto result = f.target<fn_type>();
+        const auto *result = f.target<fn_type>();
         if (!result) {
             auto r = f(1);
             return "can't convert to function pointer: eval(1) = " + std::to_string(r);
@@ -153,7 +168,6 @@ TEST_SUBMODULE(callbacks, m) {
             return "matches dummy_function: eval(1) = " + std::to_string(r);
         }
         return "argument does NOT match dummy_function. This should never happen!";
-
     });
 
     class AbstractBase {
@@ -185,7 +199,7 @@ TEST_SUBMODULE(callbacks, m) {
     // test_movable_object
     m.def("callback_with_movable", [](const std::function<void(MovableObject &)> &f) {
         auto x = MovableObject();
-        f(x); // lvalue reference shouldn't move out object
+        f(x);           // lvalue reference shouldn't move out object
         return x.valid; // must still return `true`
     });
 
@@ -197,9 +211,10 @@ TEST_SUBMODULE(callbacks, m) {
 
     // This checks that builtin functions can be passed as callbacks
     // rather than throwing RuntimeError due to trying to extract as capsule
-    m.def("test_sum_builtin", [](const std::function<double(py::iterable)> &sum_builtin, const py::iterable &i) {
-      return sum_builtin(i);
-    });
+    m.def("test_sum_builtin",
+          [](const std::function<double(py::iterable)> &sum_builtin, const py::iterable &i) {
+              return sum_builtin(i);
+          });
 
     // test async Python callbacks
     using callback_f = std::function<void(int)>;
@@ -215,8 +230,9 @@ TEST_SUBMODULE(callbacks, m) {
         };
 
         // spawn worker threads
-        for (auto i : work)
+        for (auto i : work) {
             start_f(py::cast<int>(i));
+        }
     });
 
     m.def("callback_num_times", [](const py::function &f, std::size_t num) {
@@ -224,4 +240,41 @@ TEST_SUBMODULE(callbacks, m) {
             f();
         }
     });
+
+    auto *custom_def = []() {
+        static PyMethodDef def;
+        def.ml_name = "example_name";
+        def.ml_doc = "Example doc";
+        def.ml_meth = [](PyObject *, PyObject *args) -> PyObject * {
+            if (PyTuple_Size(args) != 1) {
+                throw std::runtime_error("Invalid number of arguments for example_name");
+            }
+            PyObject *first = PyTuple_GetItem(args, 0);
+            if (!PyLong_Check(first)) {
+                throw std::runtime_error("Invalid argument to example_name");
+            }
+            auto result = py::cast(PyLong_AsLong(first) * 9);
+            return result.release().ptr();
+        };
+        def.ml_flags = METH_VARARGS;
+        return &def;
+    }();
+
+    // rec_capsule with name that has the same value (but not pointer) as our internal one
+    // This capsule should be detected by our code as foreign and not inspected as the pointers
+    // shouldn't match
+    constexpr const char *rec_capsule_name
+        = pybind11::detail::internals_function_record_capsule_name;
+    py::capsule rec_capsule(std::malloc(1), [](void *data) { std::free(data); });
+    rec_capsule.set_name(rec_capsule_name);
+    m.add_object("custom_function", PyCFunction_New(custom_def, rec_capsule.ptr()));
+
+    // This test requires a new ABI version to pass
+#if PYBIND11_INTERNALS_VERSION > 4
+    // rec_capsule with nullptr name
+    py::capsule rec_capsule2(std::malloc(1), [](void *data) { std::free(data); });
+    m.add_object("custom_function2", PyCFunction_New(custom_def, rec_capsule2.ptr()));
+#else
+    m.add_object("custom_function2", py::none());
+#endif
 }
diff --git a/ext/pybind11/tests/test_callbacks.py b/ext/pybind11/tests/test_callbacks.py
index f41ad86e7f..57b6599880 100644
--- a/ext/pybind11/tests/test_callbacks.py
+++ b/ext/pybind11/tests/test_callbacks.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 import time
 from threading import Thread
 
@@ -18,7 +17,7 @@ def test_callbacks():
         return "func2", a, b, c, d
 
     def func3(a):
-        return "func3({})".format(a)
+        return f"func3({a})"
 
     assert m.test_callback1(func1) == "func1"
     assert m.test_callback2(func2) == ("func2", "Hello", "x", True, 5)
@@ -189,14 +188,21 @@ def test_callback_num_times():
         if not rep:
             print()
         print(
-            "callback_num_times: {:d} million / {:.3f} seconds = {:.3f} million / second".format(
-                num_millions, td, rate
-            )
+            f"callback_num_times: {num_millions:d} million / {td:.3f} seconds = {rate:.3f} million / second"
         )
     if len(rates) > 1:
         print("Min    Mean   Max")
-        print(
-            "{:6.3f} {:6.3f} {:6.3f}".format(
-                min(rates), sum(rates) / len(rates), max(rates)
-            )
-        )
+        print(f"{min(rates):6.3f} {sum(rates) / len(rates):6.3f} {max(rates):6.3f}")
+
+
+def test_custom_func():
+    assert m.custom_function(4) == 36
+    assert m.roundtrip(m.custom_function)(4) == 36
+
+
+@pytest.mark.skipif(
+    m.custom_function2 is None, reason="Current PYBIND11_INTERNALS_VERSION too low"
+)
+def test_custom_func2():
+    assert m.custom_function2(3) == 27
+    assert m.roundtrip(m.custom_function2)(3) == 27
diff --git a/ext/pybind11/tests/test_chrono.cpp b/ext/pybind11/tests/test_chrono.cpp
index 6537050803..8be0ffd183 100644
--- a/ext/pybind11/tests/test_chrono.cpp
+++ b/ext/pybind11/tests/test_chrono.cpp
@@ -8,21 +8,20 @@
     BSD-style license that can be found in the LICENSE file.
 */
 
-#include "pybind11_tests.h"
 #include <pybind11/chrono.h>
+
+#include "pybind11_tests.h"
+
 #include <chrono>
 
 struct different_resolutions {
-    using time_point_h = std::chrono::time_point<
-        std::chrono::system_clock, std::chrono::hours>;
-    using time_point_m = std::chrono::time_point<
-        std::chrono::system_clock, std::chrono::minutes>;
-    using time_point_s = std::chrono::time_point<
-        std::chrono::system_clock, std::chrono::seconds>;
-    using time_point_ms = std::chrono::time_point<
-        std::chrono::system_clock, std::chrono::milliseconds>;
-    using time_point_us = std::chrono::time_point<
-        std::chrono::system_clock, std::chrono::microseconds>;
+    using time_point_h = std::chrono::time_point<std::chrono::system_clock, std::chrono::hours>;
+    using time_point_m = std::chrono::time_point<std::chrono::system_clock, std::chrono::minutes>;
+    using time_point_s = std::chrono::time_point<std::chrono::system_clock, std::chrono::seconds>;
+    using time_point_ms
+        = std::chrono::time_point<std::chrono::system_clock, std::chrono::milliseconds>;
+    using time_point_us
+        = std::chrono::time_point<std::chrono::system_clock, std::chrono::microseconds>;
     time_point_h timestamp_h;
     time_point_m timestamp_m;
     time_point_s timestamp_s;
@@ -65,12 +64,11 @@ TEST_SUBMODULE(chrono, m) {
     // Roundtrip a duration in microseconds from a float argument
     m.def("test_chrono7", [](std::chrono::microseconds t) { return t; });
     // Float durations (issue #719)
-    m.def("test_chrono_float_diff", [](std::chrono::duration<float> a, std::chrono::duration<float> b) {
-        return a - b; });
+    m.def("test_chrono_float_diff",
+          [](std::chrono::duration<float> a, std::chrono::duration<float> b) { return a - b; });
 
-    m.def("test_nano_timepoint", [](timestamp start, timespan delta) -> timestamp {
-        return start + delta;
-    });
+    m.def("test_nano_timepoint",
+          [](timestamp start, timespan delta) -> timestamp { return start + delta; });
 
     // Test different resolutions
     py::class_<different_resolutions>(m, "different_resolutions")
@@ -79,6 +77,5 @@ TEST_SUBMODULE(chrono, m) {
         .def_readwrite("timestamp_m", &different_resolutions::timestamp_m)
         .def_readwrite("timestamp_s", &different_resolutions::timestamp_s)
         .def_readwrite("timestamp_ms", &different_resolutions::timestamp_ms)
-        .def_readwrite("timestamp_us", &different_resolutions::timestamp_us)
-        ;
+        .def_readwrite("timestamp_us", &different_resolutions::timestamp_us);
 }
diff --git a/ext/pybind11/tests/test_chrono.py b/ext/pybind11/tests/test_chrono.py
index fdd73d6908..7f47b37a25 100644
--- a/ext/pybind11/tests/test_chrono.py
+++ b/ext/pybind11/tests/test_chrono.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 import datetime
 
 import pytest
@@ -101,7 +100,7 @@ SKIP_TZ_ENV_ON_WIN = pytest.mark.skipif(
 )
 def test_chrono_system_clock_roundtrip_time(time1, tz, monkeypatch):
     if tz is not None:
-        monkeypatch.setenv("TZ", "/usr/share/zoneinfo/{}".format(tz))
+        monkeypatch.setenv("TZ", f"/usr/share/zoneinfo/{tz}")
 
     # Roundtrip the time
     datetime2 = m.test_chrono2(time1)
diff --git a/ext/pybind11/tests/test_class.cpp b/ext/pybind11/tests/test_class.cpp
index 52a41a3bc0..ca925917e6 100644
--- a/ext/pybind11/tests/test_class.cpp
+++ b/ext/pybind11/tests/test_class.cpp
@@ -11,19 +11,19 @@
 // Intel compiler requires a separate header file to support aligned new operators
 // and does not set the __cpp_aligned_new feature macro.
 // This header needs to be included before pybind11.
-#include <aligned_new>
+#    include <aligned_new>
 #endif
 
-#include "pybind11_tests.h"
+#include <pybind11/stl.h>
+
 #include "constructor_stats.h"
 #include "local_bindings.h"
-#include <pybind11/stl.h>
+#include "pybind11_tests.h"
 
 #include <utility>
 
-#if defined(_MSC_VER)
-#  pragma warning(disable: 4324) // warning C4324: structure was padded due to alignment specifier
-#endif
+PYBIND11_WARNING_DISABLE_MSVC(4324)
+//     warning C4324: structure was padded due to alignment specifier
 
 // test_brace_initialization
 struct NoBraceInitialization {
@@ -34,7 +34,29 @@ struct NoBraceInitialization {
     std::vector<int> vec;
 };
 
+namespace test_class {
+namespace pr4220_tripped_over_this { // PR #4227
+
+template <int>
+struct SoEmpty {};
+
+template <typename T>
+std::string get_msg(const T &) {
+    return "This is really only meant to exercise successful compilation.";
+}
+
+using Empty0 = SoEmpty<0x0>;
+
+void bind_empty0(py::module_ &m) {
+    py::class_<Empty0>(m, "Empty0").def(py::init<>()).def("get_msg", get_msg<Empty0>);
+}
+
+} // namespace pr4220_tripped_over_this
+} // namespace test_class
+
 TEST_SUBMODULE(class_, m) {
+    m.def("obj_class_name", [](py::handle obj) { return py::detail::obj_class_name(obj.ptr()); });
+
     // test_instance
     struct NoConstructor {
         NoConstructor() = default;
@@ -74,6 +96,7 @@ TEST_SUBMODULE(class_, m) {
             : m_name(name), m_species(species) {}
         std::string name() const { return m_name; }
         std::string species() const { return m_species; }
+
     private:
         std::string m_name;
         std::string m_species;
@@ -100,27 +123,24 @@ TEST_SUBMODULE(class_, m) {
     };
 
     py::class_<Pet> pet_class(m, "Pet");
-    pet_class
-        .def(py::init<std::string, std::string>())
+    pet_class.def(py::init<std::string, std::string>())
         .def("name", &Pet::name)
         .def("species", &Pet::species);
 
     /* One way of declaring a subclass relationship: reference parent's class_ object */
-    py::class_<Dog>(m, "Dog", pet_class)
-        .def(py::init<std::string>());
+    py::class_<Dog>(m, "Dog", pet_class).def(py::init<std::string>());
 
     /* Another way of declaring a subclass relationship: reference parent's C++ type */
-    py::class_<Rabbit, Pet>(m, "Rabbit")
-        .def(py::init<std::string>());
+    py::class_<Rabbit, Pet>(m, "Rabbit").def(py::init<std::string>());
 
     /* And another: list parent in class template arguments */
-    py::class_<Hamster, Pet>(m, "Hamster")
-        .def(py::init<std::string>());
+    py::class_<Hamster, Pet>(m, "Hamster").def(py::init<std::string>());
 
     /* Constructors are not inherited by default */
     py::class_<Chimera, Pet>(m, "Chimera");
 
-    m.def("pet_name_species", [](const Pet &pet) { return pet.name() + " is a " + pet.species(); });
+    m.def("pet_name_species",
+          [](const Pet &pet) { return pet.name() + " is a " + pet.species(); });
     m.def("dog_bark", [](const Dog &dog) { return dog.bark(); });
 
     // test_automatic_upcasting
@@ -130,33 +150,35 @@ TEST_SUBMODULE(class_, m) {
         BaseClass(BaseClass &&) = default;
         virtual ~BaseClass() = default;
     };
-    struct DerivedClass1 : BaseClass { };
-    struct DerivedClass2 : BaseClass { };
+    struct DerivedClass1 : BaseClass {};
+    struct DerivedClass2 : BaseClass {};
 
     py::class_<BaseClass>(m, "BaseClass").def(py::init<>());
     py::class_<DerivedClass1>(m, "DerivedClass1").def(py::init<>());
     py::class_<DerivedClass2>(m, "DerivedClass2").def(py::init<>());
 
-    m.def("return_class_1", []() -> BaseClass* { return new DerivedClass1(); });
-    m.def("return_class_2", []() -> BaseClass* { return new DerivedClass2(); });
-    m.def("return_class_n", [](int n) -> BaseClass* {
-        if (n == 1) return new DerivedClass1();
-        if (n == 2) return new DerivedClass2();
+    m.def("return_class_1", []() -> BaseClass * { return new DerivedClass1(); });
+    m.def("return_class_2", []() -> BaseClass * { return new DerivedClass2(); });
+    m.def("return_class_n", [](int n) -> BaseClass * {
+        if (n == 1) {
+            return new DerivedClass1();
+        }
+        if (n == 2) {
+            return new DerivedClass2();
+        }
         return new BaseClass();
     });
-    m.def("return_none", []() -> BaseClass* { return nullptr; });
+    m.def("return_none", []() -> BaseClass * { return nullptr; });
 
     // test_isinstance
     m.def("check_instances", [](const py::list &l) {
-        return py::make_tuple(
-            py::isinstance<py::tuple>(l[0]),
-            py::isinstance<py::dict>(l[1]),
-            py::isinstance<Pet>(l[2]),
-            py::isinstance<Pet>(l[3]),
-            py::isinstance<Dog>(l[4]),
-            py::isinstance<Rabbit>(l[5]),
-            py::isinstance<UnregisteredType>(l[6])
-        );
+        return py::make_tuple(py::isinstance<py::tuple>(l[0]),
+                              py::isinstance<py::dict>(l[1]),
+                              py::isinstance<Pet>(l[2]),
+                              py::isinstance<Pet>(l[3]),
+                              py::isinstance<Dog>(l[4]),
+                              py::isinstance<Rabbit>(l[5]),
+                              py::isinstance<UnregisteredType>(l[6]));
     });
 
     struct Invalid {};
@@ -167,25 +189,24 @@ TEST_SUBMODULE(class_, m) {
         // See https://github.com/pybind/pybind11/issues/2486
         // if (category == 2)
         //     return py::type::of<int>();
-        if (category == 1)
+        if (category == 1) {
             return py::type::of<DerivedClass1>();
+        }
         return py::type::of<Invalid>();
     });
 
     m.def("get_type_of", [](py::object ob) { return py::type::of(std::move(ob)); });
 
-    m.def("get_type_classic", [](py::handle h) {
-        return h.get_type();
-    });
+    m.def("get_type_classic", [](py::handle h) { return h.get_type(); });
 
     m.def("as_type", [](const py::object &ob) { return py::type(ob); });
 
     // test_mismatched_holder
-    struct MismatchBase1 { };
-    struct MismatchDerived1 : MismatchBase1 { };
+    struct MismatchBase1 {};
+    struct MismatchDerived1 : MismatchBase1 {};
 
-    struct MismatchBase2 { };
-    struct MismatchDerived2 : MismatchBase2 { };
+    struct MismatchBase2 {};
+    struct MismatchDerived2 : MismatchBase2 {};
 
     m.def("mismatched_holder_1", []() {
         auto mod = py::module_::import("__main__");
@@ -195,16 +216,14 @@ TEST_SUBMODULE(class_, m) {
     m.def("mismatched_holder_2", []() {
         auto mod = py::module_::import("__main__");
         py::class_<MismatchBase2>(mod, "MismatchBase2");
-        py::class_<MismatchDerived2, std::shared_ptr<MismatchDerived2>,
-                   MismatchBase2>(mod, "MismatchDerived2");
+        py::class_<MismatchDerived2, std::shared_ptr<MismatchDerived2>, MismatchBase2>(
+            mod, "MismatchDerived2");
     });
 
     // test_override_static
     // #511: problem with inheritance + overwritten def_static
     struct MyBase {
-        static std::unique_ptr<MyBase> make() {
-            return std::unique_ptr<MyBase>(new MyBase());
-        }
+        static std::unique_ptr<MyBase> make() { return std::unique_ptr<MyBase>(new MyBase()); }
     };
 
     struct MyDerived : MyBase {
@@ -213,8 +232,7 @@ TEST_SUBMODULE(class_, m) {
         }
     };
 
-    py::class_<MyBase>(m, "MyBase")
-        .def_static("make", &MyBase::make);
+    py::class_<MyBase>(m, "MyBase").def_static("make", &MyBase::make);
 
     py::class_<MyDerived, MyBase>(m, "MyDerived")
         .def_static("make", &MyDerived::make)
@@ -227,8 +245,7 @@ TEST_SUBMODULE(class_, m) {
         explicit ConvertibleFromUserType(UserType u) : i(u.value()) {}
     };
 
-    py::class_<ConvertibleFromUserType>(m, "AcceptsUserType")
-        .def(py::init<UserType>());
+    py::class_<ConvertibleFromUserType>(m, "AcceptsUserType").def(py::init<UserType>());
     py::implicitly_convertible<UserType, ConvertibleFromUserType>();
 
     m.def("implicitly_convert_argument", [](const ConvertibleFromUserType &r) { return r.i; });
@@ -250,32 +267,61 @@ TEST_SUBMODULE(class_, m) {
             return py::str().release().ptr();
         };
 
-        auto def = new PyMethodDef{"f", f, METH_VARARGS, nullptr};
-        py::capsule def_capsule(def, [](void *ptr) { delete reinterpret_cast<PyMethodDef *>(ptr); });
-        return py::reinterpret_steal<py::object>(PyCFunction_NewEx(def, def_capsule.ptr(), m.ptr()));
+        auto *def = new PyMethodDef{"f", f, METH_VARARGS, nullptr};
+        py::capsule def_capsule(def,
+                                [](void *ptr) { delete reinterpret_cast<PyMethodDef *>(ptr); });
+        return py::reinterpret_steal<py::object>(
+            PyCFunction_NewEx(def, def_capsule.ptr(), m.ptr()));
     }());
 
     // test_operator_new_delete
     struct HasOpNewDel {
         std::uint64_t i;
-        static void *operator new(size_t s) { py::print("A new", s); return ::operator new(s); }
-        static void *operator new(size_t s, void *ptr) { py::print("A placement-new", s); return ptr; }
-        static void operator delete(void *p) { py::print("A delete"); return ::operator delete(p); }
+        static void *operator new(size_t s) {
+            py::print("A new", s);
+            return ::operator new(s);
+        }
+        static void *operator new(size_t s, void *ptr) {
+            py::print("A placement-new", s);
+            return ptr;
+        }
+        static void operator delete(void *p) {
+            py::print("A delete");
+            return ::operator delete(p);
+        }
     };
     struct HasOpNewDelSize {
         std::uint32_t i;
-        static void *operator new(size_t s) { py::print("B new", s); return ::operator new(s); }
-        static void *operator new(size_t s, void *ptr) { py::print("B placement-new", s); return ptr; }
-        static void operator delete(void *p, size_t s) { py::print("B delete", s); return ::operator delete(p); }
+        static void *operator new(size_t s) {
+            py::print("B new", s);
+            return ::operator new(s);
+        }
+        static void *operator new(size_t s, void *ptr) {
+            py::print("B placement-new", s);
+            return ptr;
+        }
+        static void operator delete(void *p, size_t s) {
+            py::print("B delete", s);
+            return ::operator delete(p);
+        }
     };
     struct AliasedHasOpNewDelSize {
         std::uint64_t i;
-        static void *operator new(size_t s) { py::print("C new", s); return ::operator new(s); }
-        static void *operator new(size_t s, void *ptr) { py::print("C placement-new", s); return ptr; }
-        static void operator delete(void *p, size_t s) { py::print("C delete", s); return ::operator delete(p); }
+        static void *operator new(size_t s) {
+            py::print("C new", s);
+            return ::operator new(s);
+        }
+        static void *operator new(size_t s, void *ptr) {
+            py::print("C placement-new", s);
+            return ptr;
+        }
+        static void operator delete(void *p, size_t s) {
+            py::print("C delete", s);
+            return ::operator delete(p);
+        }
         virtual ~AliasedHasOpNewDelSize() = default;
         AliasedHasOpNewDelSize() = default;
-        AliasedHasOpNewDelSize(const AliasedHasOpNewDelSize&) = delete;
+        AliasedHasOpNewDelSize(const AliasedHasOpNewDelSize &) = delete;
     };
     struct PyAliasedHasOpNewDelSize : AliasedHasOpNewDelSize {
         PyAliasedHasOpNewDelSize() = default;
@@ -284,15 +330,28 @@ TEST_SUBMODULE(class_, m) {
     };
     struct HasOpNewDelBoth {
         std::uint32_t i[8];
-        static void *operator new(size_t s) { py::print("D new", s); return ::operator new(s); }
-        static void *operator new(size_t s, void *ptr) { py::print("D placement-new", s); return ptr; }
-        static void operator delete(void *p) { py::print("D delete"); return ::operator delete(p); }
-        static void operator delete(void *p, size_t s) { py::print("D wrong delete", s); return ::operator delete(p); }
+        static void *operator new(size_t s) {
+            py::print("D new", s);
+            return ::operator new(s);
+        }
+        static void *operator new(size_t s, void *ptr) {
+            py::print("D placement-new", s);
+            return ptr;
+        }
+        static void operator delete(void *p) {
+            py::print("D delete");
+            return ::operator delete(p);
+        }
+        static void operator delete(void *p, size_t s) {
+            py::print("D wrong delete", s);
+            return ::operator delete(p);
+        }
     };
     py::class_<HasOpNewDel>(m, "HasOpNewDel").def(py::init<>());
     py::class_<HasOpNewDelSize>(m, "HasOpNewDelSize").def(py::init<>());
     py::class_<HasOpNewDelBoth>(m, "HasOpNewDelBoth").def(py::init<>());
-    py::class_<AliasedHasOpNewDelSize, PyAliasedHasOpNewDelSize> aliased(m, "AliasedHasOpNewDelSize");
+    py::class_<AliasedHasOpNewDelSize, PyAliasedHasOpNewDelSize> aliased(m,
+                                                                         "AliasedHasOpNewDelSize");
     aliased.def(py::init<>());
     aliased.attr("size_noalias") = py::int_(sizeof(AliasedHasOpNewDelSize));
     aliased.attr("size_alias") = py::int_(sizeof(PyAliasedHasOpNewDelSize));
@@ -315,13 +374,7 @@ TEST_SUBMODULE(class_, m) {
         using ProtectedA::foo;
     };
 
-    py::class_<ProtectedA>(m, "ProtectedA")
-        .def(py::init<>())
-#if !defined(_MSC_VER) || _MSC_VER >= 1910
-        .def("foo", &PublicistA::foo);
-#else
-        .def("foo", static_cast<int (ProtectedA::*)() const>(&PublicistA::foo));
-#endif
+    py::class_<ProtectedA>(m, "ProtectedA").def(py::init<>()).def("foo", &PublicistA::foo);
 
     class ProtectedB {
     public:
@@ -331,6 +384,8 @@ TEST_SUBMODULE(class_, m) {
 
     protected:
         virtual int foo() const { return value; }
+        virtual void *void_foo() { return static_cast<void *>(&value); }
+        virtual void *get_self() { return static_cast<void *>(this); }
 
     private:
         int value = 42;
@@ -339,6 +394,8 @@ TEST_SUBMODULE(class_, m) {
     class TrampolineB : public ProtectedB {
     public:
         int foo() const override { PYBIND11_OVERRIDE(int, ProtectedB, foo, ); }
+        void *void_foo() override { PYBIND11_OVERRIDE(void *, ProtectedB, void_foo, ); }
+        void *get_self() override { PYBIND11_OVERRIDE(void *, ProtectedB, get_self, ); }
     };
 
     class PublicistB : public ProtectedB {
@@ -346,17 +403,25 @@ TEST_SUBMODULE(class_, m) {
         // [workaround(intel)] = default does not work here
         // Removing or defaulting this destructor results in linking errors with the Intel compiler
         // (in Debug builds only, tested with icpc (ICC) 2021.1 Beta 20200827)
-        ~PublicistB() override {};  // NOLINT(modernize-use-equals-default)
+        ~PublicistB() override{}; // NOLINT(modernize-use-equals-default)
         using ProtectedB::foo;
+        using ProtectedB::get_self;
+        using ProtectedB::void_foo;
     };
 
+    m.def("read_foo", [](const void *original) {
+        const int *ptr = reinterpret_cast<const int *>(original);
+        return *ptr;
+    });
+
+    m.def("pointers_equal",
+          [](const void *original, const void *comparison) { return original == comparison; });
+
     py::class_<ProtectedB, TrampolineB>(m, "ProtectedB")
         .def(py::init<>())
-#if !defined(_MSC_VER) || _MSC_VER >= 1910
-        .def("foo", &PublicistB::foo);
-#else
-        .def("foo", static_cast<int (ProtectedB::*)() const>(&PublicistB::foo));
-#endif
+        .def("foo", &PublicistB::foo)
+        .def("void_foo", &PublicistB::void_foo)
+        .def("get_self", &PublicistB::get_self);
 
     // test_brace_initialization
     struct BraceInitialization {
@@ -396,8 +461,8 @@ TEST_SUBMODULE(class_, m) {
     py::class_<Nested>(base, "Nested")
         .def(py::init<>())
         .def("fn", [](Nested &, int, NestBase &, Nested &) {})
-        .def("fa", [](Nested &, int, NestBase &, Nested &) {},
-                "a"_a, "b"_a, "c"_a);
+        .def(
+            "fa", [](Nested &, int, NestBase &, Nested &) {}, "a"_a, "b"_a, "c"_a);
     base.def("g", [](NestBase &, Nested &) {});
     base.def("h", []() { return NestBase(); });
 
@@ -407,21 +472,21 @@ TEST_SUBMODULE(class_, m) {
     // generate a useful error message
 
     struct NotRegistered {};
-    struct StringWrapper { std::string str; };
+    struct StringWrapper {
+        std::string str;
+    };
     m.def("test_error_after_conversions", [](int) {});
     m.def("test_error_after_conversions",
           [](const StringWrapper &) -> NotRegistered { return {}; });
     py::class_<StringWrapper>(m, "StringWrapper").def(py::init<std::string>());
     py::implicitly_convertible<std::string, StringWrapper>();
 
-    #if defined(PYBIND11_CPP17)
-        struct alignas(1024) Aligned {
-            std::uintptr_t ptr() const { return (uintptr_t) this; }
-        };
-        py::class_<Aligned>(m, "Aligned")
-            .def(py::init<>())
-            .def("ptr", &Aligned::ptr);
-    #endif
+#if defined(PYBIND11_CPP17)
+    struct alignas(1024) Aligned {
+        std::uintptr_t ptr() const { return (uintptr_t) this; }
+    };
+    py::class_<Aligned>(m, "Aligned").def(py::init<>()).def("ptr", &Aligned::ptr);
+#endif
 
     // test_final
     struct IsFinal final {};
@@ -434,9 +499,7 @@ TEST_SUBMODULE(class_, m) {
     // test_exception_rvalue_abort
     struct PyPrintDestructor {
         PyPrintDestructor() = default;
-        ~PyPrintDestructor() {
-            py::print("Print from destructor");
-        }
+        ~PyPrintDestructor() { py::print("Print from destructor"); }
         void throw_something() { throw std::runtime_error("error"); }
     };
     py::class_<PyPrintDestructor>(m, "PyPrintDestructor")
@@ -450,8 +513,7 @@ TEST_SUBMODULE(class_, m) {
         .def(py::init([]() { return &samePointer; }));
 
     struct Empty {};
-    py::class_<Empty>(m, "Empty")
-        .def(py::init<>());
+    py::class_<Empty>(m, "Empty").def(py::init<>());
 
     // test_base_and_derived_nested_scope
     struct BaseWithNested {
@@ -491,14 +553,19 @@ TEST_SUBMODULE(class_, m) {
         py::class_<OtherDuplicateNested>(gt, "OtherDuplicateNested");
         py::class_<OtherDuplicateNested>(gt, "YetAnotherDuplicateNested");
     });
+
+    test_class::pr4220_tripped_over_this::bind_empty0(m);
 }
 
-template <int N> class BreaksBase { public:
+template <int N>
+class BreaksBase {
+public:
     virtual ~BreaksBase() = default;
     BreaksBase() = default;
-    BreaksBase(const BreaksBase&) = delete;
+    BreaksBase(const BreaksBase &) = delete;
 };
-template <int N> class BreaksTramp : public BreaksBase<N> {};
+template <int N>
+class BreaksTramp : public BreaksBase<N> {};
 // These should all compile just fine:
 using DoesntBreak1 = py::class_<BreaksBase<1>, std::unique_ptr<BreaksBase<1>>, BreaksTramp<1>>;
 using DoesntBreak2 = py::class_<BreaksBase<2>, BreaksTramp<2>, std::unique_ptr<BreaksBase<2>>>;
@@ -508,43 +575,83 @@ using DoesntBreak5 = py::class_<BreaksBase<5>>;
 using DoesntBreak6 = py::class_<BreaksBase<6>, std::shared_ptr<BreaksBase<6>>, BreaksTramp<6>>;
 using DoesntBreak7 = py::class_<BreaksBase<7>, BreaksTramp<7>, std::shared_ptr<BreaksBase<7>>>;
 using DoesntBreak8 = py::class_<BreaksBase<8>, std::shared_ptr<BreaksBase<8>>>;
-#define CHECK_BASE(N) static_assert(std::is_same<typename DoesntBreak##N::type, BreaksBase<(N)>>::value, \
-        "DoesntBreak" #N " has wrong type!")
-CHECK_BASE(1); CHECK_BASE(2); CHECK_BASE(3); CHECK_BASE(4); CHECK_BASE(5); CHECK_BASE(6); CHECK_BASE(7); CHECK_BASE(8);
-#define CHECK_ALIAS(N) static_assert(DoesntBreak##N::has_alias && std::is_same<typename DoesntBreak##N::type_alias, BreaksTramp<(N)>>::value, \
+#define CHECK_BASE(N)                                                                             \
+    static_assert(std::is_same<typename DoesntBreak##N::type, BreaksBase<(N)>>::value,            \
+                  "DoesntBreak" #N " has wrong type!")
+CHECK_BASE(1);
+CHECK_BASE(2);
+CHECK_BASE(3);
+CHECK_BASE(4);
+CHECK_BASE(5);
+CHECK_BASE(6);
+CHECK_BASE(7);
+CHECK_BASE(8);
+#define CHECK_ALIAS(N)                                                                            \
+    static_assert(                                                                                \
+        DoesntBreak##N::has_alias                                                                 \
+            && std::is_same<typename DoesntBreak##N::type_alias, BreaksTramp<(N)>>::value,        \
         "DoesntBreak" #N " has wrong type_alias!")
-#define CHECK_NOALIAS(N) static_assert(!DoesntBreak##N::has_alias && std::is_void<typename DoesntBreak##N::type_alias>::value, \
-        "DoesntBreak" #N " has type alias, but shouldn't!")
-CHECK_ALIAS(1); CHECK_ALIAS(2); CHECK_NOALIAS(3); CHECK_ALIAS(4); CHECK_NOALIAS(5); CHECK_ALIAS(6); CHECK_ALIAS(7); CHECK_NOALIAS(8);
-#define CHECK_HOLDER(N, TYPE) static_assert(std::is_same<typename DoesntBreak##N::holder_type, std::TYPE##_ptr<BreaksBase<(N)>>>::value, \
-        "DoesntBreak" #N " has wrong holder_type!")
-CHECK_HOLDER(1, unique); CHECK_HOLDER(2, unique); CHECK_HOLDER(3, unique); CHECK_HOLDER(4, unique); CHECK_HOLDER(5, unique);
-CHECK_HOLDER(6, shared); CHECK_HOLDER(7, shared); CHECK_HOLDER(8, shared);
+#define CHECK_NOALIAS(N)                                                                          \
+    static_assert(!DoesntBreak##N::has_alias                                                      \
+                      && std::is_void<typename DoesntBreak##N::type_alias>::value,                \
+                  "DoesntBreak" #N " has type alias, but shouldn't!")
+CHECK_ALIAS(1);
+CHECK_ALIAS(2);
+CHECK_NOALIAS(3);
+CHECK_ALIAS(4);
+CHECK_NOALIAS(5);
+CHECK_ALIAS(6);
+CHECK_ALIAS(7);
+CHECK_NOALIAS(8);
+#define CHECK_HOLDER(N, TYPE)                                                                     \
+    static_assert(std::is_same<typename DoesntBreak##N::holder_type,                              \
+                               std::TYPE##_ptr<BreaksBase<(N)>>>::value,                          \
+                  "DoesntBreak" #N " has wrong holder_type!")
+CHECK_HOLDER(1, unique);
+CHECK_HOLDER(2, unique);
+CHECK_HOLDER(3, unique);
+CHECK_HOLDER(4, unique);
+CHECK_HOLDER(5, unique);
+CHECK_HOLDER(6, shared);
+CHECK_HOLDER(7, shared);
+CHECK_HOLDER(8, shared);
 
 // There's no nice way to test that these fail because they fail to compile; leave them here,
 // though, so that they can be manually tested by uncommenting them (and seeing that compilation
 // failures occurs).
 
 // We have to actually look into the type: the typedef alone isn't enough to instantiate the type:
-#define CHECK_BROKEN(N) static_assert(std::is_same<typename Breaks##N::type, BreaksBase<-(N)>>::value, \
-        "Breaks1 has wrong type!");
+#define CHECK_BROKEN(N)                                                                           \
+    static_assert(std::is_same<typename Breaks##N::type, BreaksBase<-(N)>>::value,                \
+                  "Breaks1 has wrong type!");
 
-//// Two holder classes:
-//typedef py::class_<BreaksBase<-1>, std::unique_ptr<BreaksBase<-1>>, std::unique_ptr<BreaksBase<-1>>> Breaks1;
-//CHECK_BROKEN(1);
-//// Two aliases:
-//typedef py::class_<BreaksBase<-2>, BreaksTramp<-2>, BreaksTramp<-2>> Breaks2;
-//CHECK_BROKEN(2);
-//// Holder + 2 aliases
-//typedef py::class_<BreaksBase<-3>, std::unique_ptr<BreaksBase<-3>>, BreaksTramp<-3>, BreaksTramp<-3>> Breaks3;
-//CHECK_BROKEN(3);
-//// Alias + 2 holders
-//typedef py::class_<BreaksBase<-4>, std::unique_ptr<BreaksBase<-4>>, BreaksTramp<-4>, std::shared_ptr<BreaksBase<-4>>> Breaks4;
-//CHECK_BROKEN(4);
-//// Invalid option (not a subclass or holder)
-//typedef py::class_<BreaksBase<-5>, BreaksTramp<-4>> Breaks5;
-//CHECK_BROKEN(5);
-//// Invalid option: multiple inheritance not supported:
-//template <> struct BreaksBase<-8> : BreaksBase<-6>, BreaksBase<-7> {};
-//typedef py::class_<BreaksBase<-8>, BreaksBase<-6>, BreaksBase<-7>> Breaks8;
-//CHECK_BROKEN(8);
+#ifdef PYBIND11_NEVER_DEFINED_EVER
+// Two holder classes:
+typedef py::
+    class_<BreaksBase<-1>, std::unique_ptr<BreaksBase<-1>>, std::unique_ptr<BreaksBase<-1>>>
+        Breaks1;
+CHECK_BROKEN(1);
+// Two aliases:
+typedef py::class_<BreaksBase<-2>, BreaksTramp<-2>, BreaksTramp<-2>> Breaks2;
+CHECK_BROKEN(2);
+// Holder + 2 aliases
+typedef py::
+    class_<BreaksBase<-3>, std::unique_ptr<BreaksBase<-3>>, BreaksTramp<-3>, BreaksTramp<-3>>
+        Breaks3;
+CHECK_BROKEN(3);
+// Alias + 2 holders
+typedef py::class_<BreaksBase<-4>,
+                   std::unique_ptr<BreaksBase<-4>>,
+                   BreaksTramp<-4>,
+                   std::shared_ptr<BreaksBase<-4>>>
+    Breaks4;
+CHECK_BROKEN(4);
+// Invalid option (not a subclass or holder)
+typedef py::class_<BreaksBase<-5>, BreaksTramp<-4>> Breaks5;
+CHECK_BROKEN(5);
+// Invalid option: multiple inheritance not supported:
+template <>
+struct BreaksBase<-8> : BreaksBase<-6>, BreaksBase<-7> {};
+typedef py::class_<BreaksBase<-8>, BreaksBase<-6>, BreaksBase<-7>> Breaks8;
+CHECK_BROKEN(8);
+#endif
diff --git a/ext/pybind11/tests/test_class.py b/ext/pybind11/tests/test_class.py
index caafe2068d..9c964e001b 100644
--- a/ext/pybind11/tests/test_class.py
+++ b/ext/pybind11/tests/test_class.py
@@ -1,13 +1,20 @@
-# -*- coding: utf-8 -*-
 import pytest
 
-import env  # noqa: F401
+import env
 from pybind11_tests import ConstructorStats, UserType
 from pybind11_tests import class_ as m
 
 
+def test_obj_class_name():
+    if env.PYPY:
+        expected_name = "UserType"
+    else:
+        expected_name = "pybind11_tests.UserType"
+    assert m.obj_class_name(UserType(1)) == expected_name
+    assert m.obj_class_name(UserType) == expected_name
+
+
 def test_repr():
-    # In Python 3.3+, repr() accesses __qualname__
     assert "pybind11_type" in repr(type(UserType))
     assert "UserType" in repr(UserType)
 
@@ -103,8 +110,8 @@ def test_docstrings(doc):
 
 
 def test_qualname(doc):
-    """Tests that a properly qualified name is set in __qualname__ (even in pre-3.3, where we
-    backport the attribute) and that generated docstrings properly use it and the module name"""
+    """Tests that a properly qualified name is set in __qualname__ and that
+    generated docstrings properly use it and the module name"""
     assert m.NestBase.__qualname__ == "NestBase"
     assert m.NestBase.Nested.__qualname__ == "NestBase.Nested"
 
@@ -130,13 +137,13 @@ def test_qualname(doc):
         doc(m.NestBase.Nested.fn)
         == """
         fn(self: m.class_.NestBase.Nested, arg0: int, arg1: m.class_.NestBase, arg2: m.class_.NestBase.Nested) -> None
-    """  # noqa: E501 line too long
+    """
     )
     assert (
         doc(m.NestBase.Nested.fa)
         == """
         fa(self: m.class_.NestBase.Nested, a: int, b: m.class_.NestBase, c: m.class_.NestBase.Nested) -> None
-    """  # noqa: E501 line too long
+    """
     )
     assert m.NestBase.__module__ == "pybind11_tests.class_"
     assert m.NestBase.Nested.__module__ == "pybind11_tests.class_"
@@ -315,6 +322,8 @@ def test_bind_protected_functions():
 
     b = m.ProtectedB()
     assert b.foo() == 42
+    assert m.read_foo(b.void_foo()) == 42
+    assert m.pointers_equal(b.get_self(), b)
 
     class C(m.ProtectedB):
         def __init__(self):
@@ -471,3 +480,10 @@ def test_register_duplicate_class():
         m.register_duplicate_nested_class_type(ClassScope)
     expected = 'generic_type: type "YetAnotherDuplicateNested" is already registered!'
     assert str(exc_info.value) == expected
+
+
+def test_pr4220_tripped_over_this():
+    assert (
+        m.Empty0().get_msg()
+        == "This is really only meant to exercise successful compilation."
+    )
diff --git a/ext/pybind11/tests/test_cmake_build/embed.cpp b/ext/pybind11/tests/test_cmake_build/embed.cpp
index a3abc8a84d..30bc4f1e14 100644
--- a/ext/pybind11/tests/test_cmake_build/embed.cpp
+++ b/ext/pybind11/tests/test_cmake_build/embed.cpp
@@ -6,15 +6,17 @@ PYBIND11_EMBEDDED_MODULE(test_cmake_build, m) {
 }
 
 int main(int argc, char *argv[]) {
-    if (argc != 2)
+    if (argc != 2) {
         throw std::runtime_error("Expected test.py file as the first argument");
-    auto test_py_file = argv[1];
+    }
+    auto *test_py_file = argv[1];
 
     py::scoped_interpreter guard{};
 
     auto m = py::module_::import("test_cmake_build");
-    if (m.attr("add")(1, 2).cast<int>() != 3)
+    if (m.attr("add")(1, 2).cast<int>() != 3) {
         throw std::runtime_error("embed.cpp failed");
+    }
 
     py::module_::import("sys").attr("argv") = py::make_tuple("test.py", "embed.cpp");
     py::eval_file(test_py_file, py::globals());
diff --git a/ext/pybind11/tests/test_cmake_build/test.py b/ext/pybind11/tests/test_cmake_build/test.py
index 972a27bea4..807fd43b4a 100644
--- a/ext/pybind11/tests/test_cmake_build/test.py
+++ b/ext/pybind11/tests/test_cmake_build/test.py
@@ -1,10 +1,8 @@
-# -*- coding: utf-8 -*-
 import sys
 
 import test_cmake_build
 
-if str is not bytes:  # If not Python2
-    assert isinstance(__file__, str)  # Test this is properly set
+assert isinstance(__file__, str)  # Test this is properly set
 
 assert test_cmake_build.add(1, 2) == 3
-print("{} imports, runs, and adds: 1 + 2 = 3".format(sys.argv[1]))
+print(f"{sys.argv[1]} imports, runs, and adds: 1 + 2 = 3")
diff --git a/ext/pybind11/tests/test_const_name.cpp b/ext/pybind11/tests/test_const_name.cpp
new file mode 100644
index 0000000000..2ad01e6827
--- /dev/null
+++ b/ext/pybind11/tests/test_const_name.cpp
@@ -0,0 +1,55 @@
+// Copyright (c) 2021 The Pybind Development Team.
+// All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+
+#include "pybind11_tests.h"
+
+// IUT = Implementation Under Test
+#define CONST_NAME_TESTS(TEST_FUNC, IUT)                                                          \
+    std::string TEST_FUNC(int selector) {                                                         \
+        switch (selector) {                                                                       \
+            case 0:                                                                               \
+                return IUT("").text;                                                              \
+            case 1:                                                                               \
+                return IUT("A").text;                                                             \
+            case 2:                                                                               \
+                return IUT("Bd").text;                                                            \
+            case 3:                                                                               \
+                return IUT("Cef").text;                                                           \
+            case 4:                                                                               \
+                return IUT<int>().text; /*NOLINT(bugprone-macro-parentheses)*/                    \
+            case 5:                                                                               \
+                return IUT<std::string>().text; /*NOLINT(bugprone-macro-parentheses)*/            \
+            case 6:                                                                               \
+                return IUT<true>("T1", "T2").text; /*NOLINT(bugprone-macro-parentheses)*/         \
+            case 7:                                                                               \
+                return IUT<false>("U1", "U2").text; /*NOLINT(bugprone-macro-parentheses)*/        \
+            case 8:                                                                               \
+                /*NOLINTNEXTLINE(bugprone-macro-parentheses)*/                                    \
+                return IUT<true>(IUT("D1"), IUT("D2")).text;                                      \
+            case 9:                                                                               \
+                /*NOLINTNEXTLINE(bugprone-macro-parentheses)*/                                    \
+                return IUT<false>(IUT("E1"), IUT("E2")).text;                                     \
+            case 10:                                                                              \
+                return IUT("KeepAtEnd").text;                                                     \
+            default:                                                                              \
+                break;                                                                            \
+        }                                                                                         \
+        throw std::runtime_error("Invalid selector value.");                                      \
+    }
+
+CONST_NAME_TESTS(const_name_tests, py::detail::const_name)
+
+#ifdef PYBIND11_DETAIL_UNDERSCORE_BACKWARD_COMPATIBILITY
+CONST_NAME_TESTS(underscore_tests, py::detail::_)
+#endif
+
+TEST_SUBMODULE(const_name, m) {
+    m.def("const_name_tests", const_name_tests);
+
+#if defined(PYBIND11_DETAIL_UNDERSCORE_BACKWARD_COMPATIBILITY)
+    m.def("underscore_tests", underscore_tests);
+#else
+    m.attr("underscore_tests") = "PYBIND11_DETAIL_UNDERSCORE_BACKWARD_COMPATIBILITY not defined.";
+#endif
+}
diff --git a/ext/pybind11/tests/test_const_name.py b/ext/pybind11/tests/test_const_name.py
new file mode 100644
index 0000000000..10b0caee2e
--- /dev/null
+++ b/ext/pybind11/tests/test_const_name.py
@@ -0,0 +1,29 @@
+import pytest
+
+from pybind11_tests import const_name as m
+
+
+@pytest.mark.parametrize("func", (m.const_name_tests, m.underscore_tests))
+@pytest.mark.parametrize(
+    "selector, expected",
+    enumerate(
+        (
+            "",
+            "A",
+            "Bd",
+            "Cef",
+            "%",
+            "%",
+            "T1",
+            "U2",
+            "D1",
+            "E2",
+            "KeepAtEnd",
+        )
+    ),
+)
+def test_const_name(func, selector, expected):
+    if isinstance(func, str):
+        pytest.skip(func)
+    text = func(selector)
+    assert text == expected
diff --git a/ext/pybind11/tests/test_constants_and_functions.cpp b/ext/pybind11/tests/test_constants_and_functions.cpp
index c0554503fa..922375c5ea 100644
--- a/ext/pybind11/tests/test_constants_and_functions.cpp
+++ b/ext/pybind11/tests/test_constants_and_functions.cpp
@@ -12,20 +12,14 @@
 
 enum MyEnum { EFirstEntry = 1, ESecondEntry };
 
-std::string test_function1() {
-    return "test_function()";
-}
+std::string test_function1() { return "test_function()"; }
 
-std::string test_function2(MyEnum k) {
-    return "test_function(enum=" + std::to_string(k) + ")";
-}
+std::string test_function2(MyEnum k) { return "test_function(enum=" + std::to_string(k) + ")"; }
 
-std::string test_function3(int i) {
-    return "test_function(" + std::to_string(i) + ")";
-}
+std::string test_function3(int i) { return "test_function(" + std::to_string(i) + ")"; }
 
-py::str test_function4()           { return "test_function()"; }
-py::str test_function4(char *)     { return "test_function(char *)"; }
+py::str test_function4() { return "test_function()"; }
+py::str test_function4(char *) { return "test_function(char *)"; }
 py::str test_function4(int, float) { return "test_function(int, float)"; }
 py::str test_function4(float, int) { return "test_function(float, int)"; }
 
@@ -37,57 +31,51 @@ py::bytes return_bytes() {
 std::string print_bytes(const py::bytes &bytes) {
     std::string ret = "bytes[";
     const auto value = static_cast<std::string>(bytes);
-    for (size_t i = 0; i < value.length(); ++i) {
-        ret += std::to_string(static_cast<int>(value[i])) + " ";
+    for (char c : value) {
+        ret += std::to_string(static_cast<int>(c)) + ' ';
     }
     ret.back() = ']';
     return ret;
 }
 
-// Test that we properly handle C++17 exception specifiers (which are part of the function signature
-// in C++17).  These should all still work before C++17, but don't affect the function signature.
+// Test that we properly handle C++17 exception specifiers (which are part of the function
+// signature in C++17).  These should all still work before C++17, but don't affect the function
+// signature.
 namespace test_exc_sp {
 // [workaround(intel)] Unable to use noexcept instead of noexcept(true)
 // Make the f1 test basically the same as the f2 test in C++17 mode for the Intel compiler as
 // it fails to compile with a plain noexcept (tested with icc (ICC) 2021.1 Beta 20200827).
 #if defined(__INTEL_COMPILER) && defined(PYBIND11_CPP17)
-int f1(int x) noexcept(true) { return x+1; }
+int f1(int x) noexcept(true) { return x + 1; }
 #else
-int f1(int x) noexcept { return x+1; }
-#endif
-int f2(int x) noexcept(true) { return x+2; }
-int f3(int x) noexcept(false) { return x+3; }
-#if defined(__GNUG__) && !defined(__INTEL_COMPILER)
-#  pragma GCC diagnostic push
-#  pragma GCC diagnostic ignored "-Wdeprecated"
+int f1(int x) noexcept { return x + 1; }
 #endif
+int f2(int x) noexcept(true) { return x + 2; }
+int f3(int x) noexcept(false) { return x + 3; }
+PYBIND11_WARNING_PUSH
+PYBIND11_WARNING_DISABLE_GCC("-Wdeprecated")
+PYBIND11_WARNING_DISABLE_CLANG("-Wdeprecated")
 // NOLINTNEXTLINE(modernize-use-noexcept)
-int f4(int x) throw() { return x+4; } // Deprecated equivalent to noexcept(true)
-#if defined(__GNUG__) && !defined(__INTEL_COMPILER)
-#  pragma GCC diagnostic pop
-#endif
+int f4(int x) throw() { return x + 4; } // Deprecated equivalent to noexcept(true)
+PYBIND11_WARNING_POP
 struct C {
-    int m1(int x) noexcept { return x-1; }
-    int m2(int x) const noexcept { return x-2; }
-    int m3(int x) noexcept(true) { return x-3; }
-    int m4(int x) const noexcept(true) { return x-4; }
-    int m5(int x) noexcept(false) { return x-5; }
-    int m6(int x) const noexcept(false) { return x-6; }
-#if defined(__GNUG__) && !defined(__INTEL_COMPILER)
-#  pragma GCC diagnostic push
-#  pragma GCC diagnostic ignored "-Wdeprecated"
-#endif
+    int m1(int x) noexcept { return x - 1; }
+    int m2(int x) const noexcept { return x - 2; }
+    int m3(int x) noexcept(true) { return x - 3; }
+    int m4(int x) const noexcept(true) { return x - 4; }
+    int m5(int x) noexcept(false) { return x - 5; }
+    int m6(int x) const noexcept(false) { return x - 6; }
+    PYBIND11_WARNING_PUSH
+    PYBIND11_WARNING_DISABLE_GCC("-Wdeprecated")
+    PYBIND11_WARNING_DISABLE_CLANG("-Wdeprecated")
     // NOLINTNEXTLINE(modernize-use-noexcept)
     int m7(int x) throw() { return x - 7; }
     // NOLINTNEXTLINE(modernize-use-noexcept)
     int m8(int x) const throw() { return x - 8; }
-#if defined(__GNUG__) && !defined(__INTEL_COMPILER)
-#  pragma GCC diagnostic pop
-#endif
+    PYBIND11_WARNING_POP
 };
 } // namespace test_exc_sp
 
-
 TEST_SUBMODULE(constants_and_functions, m) {
     // test_constants
     m.attr("some_constant") = py::int_(14);
@@ -129,29 +117,27 @@ TEST_SUBMODULE(constants_and_functions, m) {
         .def("m5", &C::m5)
         .def("m6", &C::m6)
         .def("m7", &C::m7)
-        .def("m8", &C::m8)
-        ;
+        .def("m8", &C::m8);
     m.def("f1", f1);
     m.def("f2", f2);
-#if defined(__INTEL_COMPILER)
-#    pragma warning push
-#    pragma warning disable 878 // incompatible exception specifications
-#endif
+
+    PYBIND11_WARNING_PUSH
+    PYBIND11_WARNING_DISABLE_INTEL(878) // incompatible exception specifications
     m.def("f3", f3);
-#if defined(__INTEL_COMPILER)
-#    pragma warning pop
-#endif
+    PYBIND11_WARNING_POP
+
     m.def("f4", f4);
 
     // test_function_record_leaks
-    struct LargeCapture {
+    m.def("register_large_capture_with_invalid_arguments", [](py::module_ m) {
         // This should always be enough to trigger the alternative branch
         // where `sizeof(capture) > sizeof(rec->data)`
-        uint64_t zeros[10] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
-    };
-    m.def("register_large_capture_with_invalid_arguments", [](py::module_ m) {
-        LargeCapture capture;  // VS 2015's MSVC is acting up if we create the array here
-        m.def("should_raise", [capture](int) { return capture.zeros[9] + 33; }, py::kw_only(), py::arg());
+        uint64_t capture[10] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
+#if defined(__GNUC__) && __GNUC__ == 4 // CentOS7
+        py::detail::silence_unused_warnings(capture);
+#endif
+        m.def(
+            "should_raise", [capture](int) { return capture[9] + 33; }, py::kw_only(), py::arg());
     });
     m.def("register_with_raising_repr", [](py::module_ m, const py::object &default_value) {
         m.def(
diff --git a/ext/pybind11/tests/test_constants_and_functions.py b/ext/pybind11/tests/test_constants_and_functions.py
index ff13bd0f26..5da0b84b8e 100644
--- a/ext/pybind11/tests/test_constants_and_functions.py
+++ b/ext/pybind11/tests/test_constants_and_functions.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 import pytest
 
 m = pytest.importorskip("pybind11_tests.constants_and_functions")
diff --git a/ext/pybind11/tests/test_copy_move.cpp b/ext/pybind11/tests/test_copy_move.cpp
index 5fb0dd810c..28c2445644 100644
--- a/ext/pybind11/tests/test_copy_move.cpp
+++ b/ext/pybind11/tests/test_copy_move.cpp
@@ -8,30 +8,33 @@
     BSD-style license that can be found in the LICENSE file.
 */
 
-#include "pybind11_tests.h"
-#include "constructor_stats.h"
 #include <pybind11/stl.h>
 
+#include "constructor_stats.h"
+#include "pybind11_tests.h"
+
 template <typename derived>
 struct empty {
-    static const derived& get_one() { return instance_; }
+    static const derived &get_one() { return instance_; }
     static derived instance_;
 };
 
 struct lacking_copy_ctor : public empty<lacking_copy_ctor> {
     lacking_copy_ctor() = default;
-    lacking_copy_ctor(const lacking_copy_ctor& other) = delete;
+    lacking_copy_ctor(const lacking_copy_ctor &other) = delete;
 };
 
-template <> lacking_copy_ctor empty<lacking_copy_ctor>::instance_ = {};
+template <>
+lacking_copy_ctor empty<lacking_copy_ctor>::instance_ = {};
 
 struct lacking_move_ctor : public empty<lacking_move_ctor> {
     lacking_move_ctor() = default;
-    lacking_move_ctor(const lacking_move_ctor& other) = delete;
-    lacking_move_ctor(lacking_move_ctor&& other) = delete;
+    lacking_move_ctor(const lacking_move_ctor &other) = delete;
+    lacking_move_ctor(lacking_move_ctor &&other) = delete;
 };
 
-template <> lacking_move_ctor empty<lacking_move_ctor>::instance_ = {};
+template <>
+lacking_move_ctor empty<lacking_move_ctor>::instance_ = {};
 
 /* Custom type caster move/copy test classes */
 class MoveOnlyInt {
@@ -66,8 +69,16 @@ public:
         std::swap(value, m.value);
         return *this;
     }
-    MoveOrCopyInt(const MoveOrCopyInt &c) { print_copy_created(this, c.value); value = c.value; }
-    MoveOrCopyInt &operator=(const MoveOrCopyInt &c) { print_copy_assigned(this, c.value); value = c.value; return *this; }
+    MoveOrCopyInt(const MoveOrCopyInt &c) {
+        print_copy_created(this, c.value);
+        // NOLINTNEXTLINE(cppcoreguidelines-prefer-member-initializer)
+        value = c.value;
+    }
+    MoveOrCopyInt &operator=(const MoveOrCopyInt &c) {
+        print_copy_assigned(this, c.value);
+        value = c.value;
+        return *this;
+    }
     ~MoveOrCopyInt() { print_destroyed(this); }
 
     int value;
@@ -76,40 +87,70 @@ class CopyOnlyInt {
 public:
     CopyOnlyInt() { print_default_created(this); }
     explicit CopyOnlyInt(int v) : value{v} { print_created(this, value); }
-    CopyOnlyInt(const CopyOnlyInt &c) { print_copy_created(this, c.value); value = c.value; }
-    CopyOnlyInt &operator=(const CopyOnlyInt &c) { print_copy_assigned(this, c.value); value = c.value; return *this; }
+    CopyOnlyInt(const CopyOnlyInt &c) {
+        print_copy_created(this, c.value);
+        // NOLINTNEXTLINE(cppcoreguidelines-prefer-member-initializer)
+        value = c.value;
+    }
+    CopyOnlyInt &operator=(const CopyOnlyInt &c) {
+        print_copy_assigned(this, c.value);
+        value = c.value;
+        return *this;
+    }
     ~CopyOnlyInt() { print_destroyed(this); }
 
     int value;
 };
 PYBIND11_NAMESPACE_BEGIN(pybind11)
 PYBIND11_NAMESPACE_BEGIN(detail)
-template <> struct type_caster<MoveOnlyInt> {
-    PYBIND11_TYPE_CASTER(MoveOnlyInt, _("MoveOnlyInt"));
-    bool load(handle src, bool) { value = MoveOnlyInt(src.cast<int>()); return true; }
-    static handle cast(const MoveOnlyInt &m, return_value_policy r, handle p) { return pybind11::cast(m.value, r, p); }
+template <>
+struct type_caster<MoveOnlyInt> {
+    PYBIND11_TYPE_CASTER(MoveOnlyInt, const_name("MoveOnlyInt"));
+    bool load(handle src, bool) {
+        value = MoveOnlyInt(src.cast<int>());
+        return true;
+    }
+    static handle cast(const MoveOnlyInt &m, return_value_policy r, handle p) {
+        return pybind11::cast(m.value, r, p);
+    }
 };
 
-template <> struct type_caster<MoveOrCopyInt> {
-    PYBIND11_TYPE_CASTER(MoveOrCopyInt, _("MoveOrCopyInt"));
-    bool load(handle src, bool) { value = MoveOrCopyInt(src.cast<int>()); return true; }
-    static handle cast(const MoveOrCopyInt &m, return_value_policy r, handle p) { return pybind11::cast(m.value, r, p); }
+template <>
+struct type_caster<MoveOrCopyInt> {
+    PYBIND11_TYPE_CASTER(MoveOrCopyInt, const_name("MoveOrCopyInt"));
+    bool load(handle src, bool) {
+        value = MoveOrCopyInt(src.cast<int>());
+        return true;
+    }
+    static handle cast(const MoveOrCopyInt &m, return_value_policy r, handle p) {
+        return pybind11::cast(m.value, r, p);
+    }
 };
 
-template <> struct type_caster<CopyOnlyInt> {
+template <>
+struct type_caster<CopyOnlyInt> {
 protected:
     CopyOnlyInt value;
+
 public:
-    static constexpr auto name = _("CopyOnlyInt");
-    bool load(handle src, bool) { value = CopyOnlyInt(src.cast<int>()); return true; }
-    static handle cast(const CopyOnlyInt &m, return_value_policy r, handle p) { return pybind11::cast(m.value, r, p); }
+    static constexpr auto name = const_name("CopyOnlyInt");
+    bool load(handle src, bool) {
+        value = CopyOnlyInt(src.cast<int>());
+        return true;
+    }
+    static handle cast(const CopyOnlyInt &m, return_value_policy r, handle p) {
+        return pybind11::cast(m.value, r, p);
+    }
     static handle cast(const CopyOnlyInt *src, return_value_policy policy, handle parent) {
-        if (!src) return none().release();
+        if (!src) {
+            return none().release();
+        }
         return cast(*src, policy, parent);
     }
     explicit operator CopyOnlyInt *() { return &value; }
     explicit operator CopyOnlyInt &() { return value; }
-    template <typename T> using cast_op_type = pybind11::detail::cast_op_type<T>;
+    template <typename T>
+    using cast_op_type = pybind11::detail::cast_op_type<T>;
 };
 PYBIND11_NAMESPACE_END(detail)
 PYBIND11_NAMESPACE_END(pybind11)
@@ -117,23 +158,21 @@ PYBIND11_NAMESPACE_END(pybind11)
 TEST_SUBMODULE(copy_move_policies, m) {
     // test_lacking_copy_ctor
     py::class_<lacking_copy_ctor>(m, "lacking_copy_ctor")
-        .def_static("get_one", &lacking_copy_ctor::get_one,
-                    py::return_value_policy::copy);
+        .def_static("get_one", &lacking_copy_ctor::get_one, py::return_value_policy::copy);
     // test_lacking_move_ctor
     py::class_<lacking_move_ctor>(m, "lacking_move_ctor")
-        .def_static("get_one", &lacking_move_ctor::get_one,
-                    py::return_value_policy::move);
+        .def_static("get_one", &lacking_move_ctor::get_one, py::return_value_policy::move);
 
     // test_move_and_copy_casts
     // NOLINTNEXTLINE(performance-unnecessary-value-param)
     m.def("move_and_copy_casts", [](const py::object &o) {
         int r = 0;
         r += py::cast<MoveOrCopyInt>(o).value; /* moves */
-        r += py::cast<MoveOnlyInt>(o).value; /* moves */
-        r += py::cast<CopyOnlyInt>(o).value; /* copies */
-        auto m1(py::cast<MoveOrCopyInt>(o)); /* moves */
-        auto m2(py::cast<MoveOnlyInt>(o)); /* moves */
-        auto m3(py::cast<CopyOnlyInt>(o)); /* copies */
+        r += py::cast<MoveOnlyInt>(o).value;   /* moves */
+        r += py::cast<CopyOnlyInt>(o).value;   /* copies */
+        auto m1(py::cast<MoveOrCopyInt>(o));   /* moves */
+        auto m2(py::cast<MoveOnlyInt>(o));     /* moves */
+        auto m3(py::cast<CopyOnlyInt>(o));     /* copies */
         r += m1.value + m2.value + m3.value;
 
         return r;
@@ -147,28 +186,34 @@ TEST_SUBMODULE(copy_move_policies, m) {
     // Changing this breaks the existing test: needs careful review.
     // NOLINTNEXTLINE(performance-unnecessary-value-param)
     m.def("copy_only", [](CopyOnlyInt m) { return m.value; });
-    m.def("move_pair", [](std::pair<MoveOnlyInt, MoveOrCopyInt> p) {
-        return p.first.value + p.second.value;
-    });
+    m.def("move_pair",
+          [](std::pair<MoveOnlyInt, MoveOrCopyInt> p) { return p.first.value + p.second.value; });
     m.def("move_tuple", [](std::tuple<MoveOnlyInt, MoveOrCopyInt, MoveOnlyInt> t) {
         return std::get<0>(t).value + std::get<1>(t).value + std::get<2>(t).value;
     });
     m.def("copy_tuple", [](std::tuple<CopyOnlyInt, CopyOnlyInt> t) {
         return std::get<0>(t).value + std::get<1>(t).value;
     });
-    m.def("move_copy_nested", [](std::pair<MoveOnlyInt, std::pair<std::tuple<MoveOrCopyInt, CopyOnlyInt, std::tuple<MoveOnlyInt>>, MoveOrCopyInt>> x) {
-        return x.first.value + std::get<0>(x.second.first).value + std::get<1>(x.second.first).value +
-            std::get<0>(std::get<2>(x.second.first)).value + x.second.second.value;
-    });
+    m.def("move_copy_nested",
+          [](std::pair<MoveOnlyInt,
+                       std::pair<std::tuple<MoveOrCopyInt, CopyOnlyInt, std::tuple<MoveOnlyInt>>,
+                                 MoveOrCopyInt>> x) {
+              return x.first.value + std::get<0>(x.second.first).value
+                     + std::get<1>(x.second.first).value
+                     + std::get<0>(std::get<2>(x.second.first)).value + x.second.second.value;
+          });
     m.def("move_and_copy_cstats", []() {
         ConstructorStats::gc();
         // Reset counts to 0 so that previous tests don't affect later ones:
         auto &mc = ConstructorStats::get<MoveOrCopyInt>();
-        mc.move_assignments = mc.move_constructions = mc.copy_assignments = mc.copy_constructions = 0;
+        mc.move_assignments = mc.move_constructions = mc.copy_assignments = mc.copy_constructions
+            = 0;
         auto &mo = ConstructorStats::get<MoveOnlyInt>();
-        mo.move_assignments = mo.move_constructions = mo.copy_assignments = mo.copy_constructions = 0;
+        mo.move_assignments = mo.move_constructions = mo.copy_assignments = mo.copy_constructions
+            = 0;
         auto &co = ConstructorStats::get<CopyOnlyInt>();
-        co.move_assignments = co.move_constructions = co.copy_assignments = co.copy_constructions = 0;
+        co.move_assignments = co.move_constructions = co.copy_assignments = co.copy_constructions
+            = 0;
         py::dict d;
         d["MoveOrCopyInt"] = py::cast(mc, py::return_value_policy::reference);
         d["MoveOnlyInt"] = py::cast(mo, py::return_value_policy::reference);
@@ -178,18 +223,13 @@ TEST_SUBMODULE(copy_move_policies, m) {
 #ifdef PYBIND11_HAS_OPTIONAL
     // test_move_and_copy_load_optional
     m.attr("has_optional") = true;
-    m.def("move_optional", [](std::optional<MoveOnlyInt> o) {
-        return o->value;
-    });
-    m.def("move_or_copy_optional", [](std::optional<MoveOrCopyInt> o) {
-        return o->value;
-    });
-    m.def("copy_optional", [](std::optional<CopyOnlyInt> o) {
-        return o->value;
-    });
-    m.def("move_optional_tuple", [](std::optional<std::tuple<MoveOrCopyInt, MoveOnlyInt, CopyOnlyInt>> x) {
-        return std::get<0>(*x).value + std::get<1>(*x).value + std::get<2>(*x).value;
-    });
+    m.def("move_optional", [](std::optional<MoveOnlyInt> o) { return o->value; });
+    m.def("move_or_copy_optional", [](std::optional<MoveOrCopyInt> o) { return o->value; });
+    m.def("copy_optional", [](std::optional<CopyOnlyInt> o) { return o->value; });
+    m.def("move_optional_tuple",
+          [](std::optional<std::tuple<MoveOrCopyInt, MoveOnlyInt, CopyOnlyInt>> x) {
+              return std::get<0>(*x).value + std::get<1>(*x).value + std::get<2>(*x).value;
+          });
 #else
     m.attr("has_optional") = false;
 #endif
@@ -200,20 +240,25 @@ TEST_SUBMODULE(copy_move_policies, m) {
     // added later.
     struct PrivateOpNew {
         int value = 1;
+
     private:
         void *operator new(size_t bytes) {
             void *ptr = std::malloc(bytes);
-            if (ptr)
+            if (ptr) {
                 return ptr;
+            }
             throw std::bad_alloc{};
         }
     };
     py::class_<PrivateOpNew>(m, "PrivateOpNew").def_readonly("value", &PrivateOpNew::value);
     m.def("private_op_new_value", []() { return PrivateOpNew(); });
-    m.def("private_op_new_reference", []() -> const PrivateOpNew & {
-        static PrivateOpNew x{};
-        return x;
-    }, py::return_value_policy::reference);
+    m.def(
+        "private_op_new_reference",
+        []() -> const PrivateOpNew & {
+            static PrivateOpNew x{};
+            return x;
+        },
+        py::return_value_policy::reference);
 
     // test_move_fallback
     // #389: rvp::move should fall-through to copy on non-movable objects
@@ -223,16 +268,28 @@ TEST_SUBMODULE(copy_move_policies, m) {
         MoveIssue1(const MoveIssue1 &c) = default;
         MoveIssue1(MoveIssue1 &&) = delete;
     };
-    py::class_<MoveIssue1>(m, "MoveIssue1").def(py::init<int>()).def_readwrite("value", &MoveIssue1::v);
+    py::class_<MoveIssue1>(m, "MoveIssue1")
+        .def(py::init<int>())
+        .def_readwrite("value", &MoveIssue1::v);
 
     struct MoveIssue2 {
         int v;
         explicit MoveIssue2(int v) : v{v} {}
         MoveIssue2(MoveIssue2 &&) = default;
     };
-    py::class_<MoveIssue2>(m, "MoveIssue2").def(py::init<int>()).def_readwrite("value", &MoveIssue2::v);
+    py::class_<MoveIssue2>(m, "MoveIssue2")
+        .def(py::init<int>())
+        .def_readwrite("value", &MoveIssue2::v);
 
-    // #2742: Don't expect ownership of raw pointer to `new`ed object to be transferred with `py::return_value_policy::move`
-    m.def("get_moveissue1", [](int i) { return std::unique_ptr<MoveIssue1>(new MoveIssue1(i)); }, py::return_value_policy::move);
-    m.def("get_moveissue2", [](int i) { return MoveIssue2(i); }, py::return_value_policy::move);
+    // #2742: Don't expect ownership of raw pointer to `new`ed object to be transferred with
+    // `py::return_value_policy::move`
+    m.def(
+        "get_moveissue1",
+        [](int i) { return std::unique_ptr<MoveIssue1>(new MoveIssue1(i)); },
+        py::return_value_policy::move);
+    m.def(
+        "get_moveissue2", [](int i) { return MoveIssue2(i); }, py::return_value_policy::move);
+
+    // Make sure that cast from pytype rvalue to other pytype works
+    m.def("get_pytype_rvalue_castissue", [](double i) { return py::float_(i).cast<py::int_>(); });
 }
diff --git a/ext/pybind11/tests/test_copy_move.py b/ext/pybind11/tests/test_copy_move.py
index eb1efddd50..9fef089339 100644
--- a/ext/pybind11/tests/test_copy_move.py
+++ b/ext/pybind11/tests/test_copy_move.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 import pytest
 
 from pybind11_tests import copy_move_policies as m
@@ -124,3 +123,10 @@ def test_move_fallback():
     assert m1.value == 1
     m2 = m.get_moveissue2(2)
     assert m2.value == 2
+
+
+def test_pytype_rvalue_cast():
+    """Make sure that cast from pytype rvalue to other pytype works"""
+
+    value = m.get_pytype_rvalue_castissue(1.0)
+    assert value == 1
diff --git a/ext/pybind11/tests/test_custom_type_casters.cpp b/ext/pybind11/tests/test_custom_type_casters.cpp
index 076777d6f1..b4af02a452 100644
--- a/ext/pybind11/tests/test_custom_type_casters.cpp
+++ b/ext/pybind11/tests/test_custom_type_casters.cpp
@@ -7,23 +7,37 @@
     BSD-style license that can be found in the LICENSE file.
 */
 
-#include "pybind11_tests.h"
 #include "constructor_stats.h"
-
+#include "pybind11_tests.h"
 
 // py::arg/py::arg_v testing: these arguments just record their argument when invoked
-class ArgInspector1 { public: std::string arg = "(default arg inspector 1)"; };
-class ArgInspector2 { public: std::string arg = "(default arg inspector 2)"; };
-class ArgAlwaysConverts { };
-namespace pybind11 { namespace detail {
-template <> struct type_caster<ArgInspector1> {
+class ArgInspector1 {
 public:
+    std::string arg = "(default arg inspector 1)";
+};
+class ArgInspector2 {
+public:
+    std::string arg = "(default arg inspector 2)";
+};
+class ArgAlwaysConverts {};
+
+namespace PYBIND11_NAMESPACE {
+namespace detail {
+template <>
+struct type_caster<ArgInspector1> {
+public:
+    // Classic
+#ifdef PYBIND11_DETAIL_UNDERSCORE_BACKWARD_COMPATIBILITY
     PYBIND11_TYPE_CASTER(ArgInspector1, _("ArgInspector1"));
+#else
+    PYBIND11_TYPE_CASTER(ArgInspector1, const_name("ArgInspector1"));
+#endif
 
     bool load(handle src, bool convert) {
-        value.arg = "loading ArgInspector1 argument " +
-            std::string(convert ? "WITH" : "WITHOUT") + " conversion allowed.  "
-            "Argument value = " + (std::string) str(src);
+        value.arg = "loading ArgInspector1 argument " + std::string(convert ? "WITH" : "WITHOUT")
+                    + " conversion allowed.  "
+                      "Argument value = "
+                    + (std::string) str(src);
         return true;
     }
 
@@ -31,14 +45,16 @@ public:
         return str(src.arg).release();
     }
 };
-template <> struct type_caster<ArgInspector2> {
+template <>
+struct type_caster<ArgInspector2> {
 public:
-    PYBIND11_TYPE_CASTER(ArgInspector2, _("ArgInspector2"));
+    PYBIND11_TYPE_CASTER(ArgInspector2, const_name("ArgInspector2"));
 
     bool load(handle src, bool convert) {
-        value.arg = "loading ArgInspector2 argument " +
-            std::string(convert ? "WITH" : "WITHOUT") + " conversion allowed.  "
-            "Argument value = " + (std::string) str(src);
+        value.arg = "loading ArgInspector2 argument " + std::string(convert ? "WITH" : "WITHOUT")
+                    + " conversion allowed.  "
+                      "Argument value = "
+                    + (std::string) str(src);
         return true;
     }
 
@@ -46,20 +62,19 @@ public:
         return str(src.arg).release();
     }
 };
-template <> struct type_caster<ArgAlwaysConverts> {
+template <>
+struct type_caster<ArgAlwaysConverts> {
 public:
-    PYBIND11_TYPE_CASTER(ArgAlwaysConverts, _("ArgAlwaysConverts"));
+    PYBIND11_TYPE_CASTER(ArgAlwaysConverts, const_name("ArgAlwaysConverts"));
 
-    bool load(handle, bool convert) {
-        return convert;
-    }
+    bool load(handle, bool convert) { return convert; }
 
     static handle cast(const ArgAlwaysConverts &, return_value_policy, handle) {
         return py::none().release();
     }
 };
 } // namespace detail
-} // namespace pybind11
+} // namespace PYBIND11_NAMESPACE
 
 // test_custom_caster_destruction
 class DestructionTester {
@@ -68,15 +83,20 @@ public:
     ~DestructionTester() { print_destroyed(this); }
     DestructionTester(const DestructionTester &) { print_copy_created(this); }
     DestructionTester(DestructionTester &&) noexcept { print_move_created(this); }
-    DestructionTester &operator=(const DestructionTester &) { print_copy_assigned(this); return *this; }
+    DestructionTester &operator=(const DestructionTester &) {
+        print_copy_assigned(this);
+        return *this;
+    }
     DestructionTester &operator=(DestructionTester &&) noexcept {
         print_move_assigned(this);
         return *this;
     }
 };
-namespace pybind11 { namespace detail {
-template <> struct type_caster<DestructionTester> {
-    PYBIND11_TYPE_CASTER(DestructionTester, _("DestructionTester"));
+namespace PYBIND11_NAMESPACE {
+namespace detail {
+template <>
+struct type_caster<DestructionTester> {
+    PYBIND11_TYPE_CASTER(DestructionTester, const_name("DestructionTester"));
     bool load(handle, bool) { return true; }
 
     static handle cast(const DestructionTester &, return_value_policy, handle) {
@@ -84,7 +104,35 @@ template <> struct type_caster<DestructionTester> {
     }
 };
 } // namespace detail
-} // namespace pybind11
+} // namespace PYBIND11_NAMESPACE
+
+// Define type caster outside of `pybind11::detail` and then alias it.
+namespace other_lib {
+struct MyType {};
+// Corrupt `py` shorthand alias for surrounding context.
+namespace py {}
+// Corrupt unqualified relative `pybind11` namespace.
+namespace PYBIND11_NAMESPACE {}
+// Correct alias.
+namespace py_ = ::pybind11;
+// Define caster. This is effectively no-op, we only ensure it compiles and we
+// don't have any symbol collision when using macro mixin.
+struct my_caster {
+    PYBIND11_TYPE_CASTER(MyType, py_::detail::const_name("MyType"));
+    bool load(py_::handle, bool) { return true; }
+
+    static py_::handle cast(const MyType &, py_::return_value_policy, py_::handle) {
+        return py_::bool_(true).release();
+    }
+};
+} // namespace other_lib
+// Effectively "alias" it into correct namespace (via inheritance).
+namespace PYBIND11_NAMESPACE {
+namespace detail {
+template <>
+struct type_caster<other_lib::MyType> : public other_lib::my_caster {};
+} // namespace detail
+} // namespace PYBIND11_NAMESPACE
 
 TEST_SUBMODULE(custom_type_casters, m) {
     // test_custom_type_casters
@@ -110,9 +158,14 @@ TEST_SUBMODULE(custom_type_casters, m) {
     py::class_<ArgInspector>(m, "ArgInspector")
         .def(py::init<>())
         .def("f", &ArgInspector::f, py::arg(), py::arg() = ArgAlwaysConverts())
-        .def("g", &ArgInspector::g, "a"_a.noconvert(), "b"_a, "c"_a.noconvert()=13, "d"_a=ArgInspector2(), py::arg() = ArgAlwaysConverts())
-        .def_static("h", &ArgInspector::h, py::arg{}.noconvert(), py::arg() = ArgAlwaysConverts())
-        ;
+        .def("g",
+             &ArgInspector::g,
+             "a"_a.noconvert(),
+             "b"_a,
+             "c"_a.noconvert() = 13,
+             "d"_a = ArgInspector2(),
+             py::arg() = ArgAlwaysConverts())
+        .def_static("h", &ArgInspector::h, py::arg{}.noconvert(), py::arg() = ArgAlwaysConverts());
     m.def(
         "arg_inspect_func",
         [](const ArgInspector2 &a, const ArgInspector1 &b, ArgAlwaysConverts) {
@@ -122,20 +175,35 @@ TEST_SUBMODULE(custom_type_casters, m) {
         py::arg_v(nullptr, ArgInspector1()).noconvert(true),
         py::arg() = ArgAlwaysConverts());
 
-    m.def("floats_preferred", [](double f) { return 0.5 * f; }, "f"_a);
-    m.def("floats_only", [](double f) { return 0.5 * f; }, "f"_a.noconvert());
-    m.def("ints_preferred", [](int i) { return i / 2; }, "i"_a);
-    m.def("ints_only", [](int i) { return i / 2; }, "i"_a.noconvert());
+    m.def(
+        "floats_preferred", [](double f) { return 0.5 * f; }, "f"_a);
+    m.def(
+        "floats_only", [](double f) { return 0.5 * f; }, "f"_a.noconvert());
+    m.def(
+        "ints_preferred", [](int i) { return i / 2; }, "i"_a);
+    m.def(
+        "ints_only", [](int i) { return i / 2; }, "i"_a.noconvert());
 
     // test_custom_caster_destruction
     // Test that `take_ownership` works on types with a custom type caster when given a pointer
 
     // default policy: don't take ownership:
-    m.def("custom_caster_no_destroy", []() { static auto *dt = new DestructionTester(); return dt; });
+    m.def("custom_caster_no_destroy", []() {
+        static auto *dt = new DestructionTester();
+        return dt;
+    });
 
-    m.def("custom_caster_destroy", []() { return new DestructionTester(); },
-            py::return_value_policy::take_ownership); // Takes ownership: destroy when finished
-    m.def("custom_caster_destroy_const", []() -> const DestructionTester * { return new DestructionTester(); },
-            py::return_value_policy::take_ownership); // Likewise (const doesn't inhibit destruction)
-    m.def("destruction_tester_cstats", &ConstructorStats::get<DestructionTester>, py::return_value_policy::reference);
+    m.def(
+        "custom_caster_destroy",
+        []() { return new DestructionTester(); },
+        py::return_value_policy::take_ownership); // Takes ownership: destroy when finished
+    m.def(
+        "custom_caster_destroy_const",
+        []() -> const DestructionTester * { return new DestructionTester(); },
+        py::return_value_policy::take_ownership); // Likewise (const doesn't inhibit destruction)
+    m.def("destruction_tester_cstats",
+          &ConstructorStats::get<DestructionTester>,
+          py::return_value_policy::reference);
+
+    m.def("other_lib_type", [](other_lib::MyType x) { return x; });
 }
diff --git a/ext/pybind11/tests/test_custom_type_casters.py b/ext/pybind11/tests/test_custom_type_casters.py
index a10646ff46..adfa6cf86e 100644
--- a/ext/pybind11/tests/test_custom_type_casters.py
+++ b/ext/pybind11/tests/test_custom_type_casters.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 import pytest
 
 from pybind11_tests import custom_type_casters as m
@@ -19,7 +18,7 @@ def test_noconvert_args(msg):
         loading ArgInspector1 argument WITH conversion allowed.  Argument value = this is b
         13
         loading ArgInspector2 argument WITH conversion allowed.  Argument value = (default arg inspector 2)
-    """  # noqa: E501 line too long
+    """
     )
     assert (
         msg(a.g("this is a", "this is b", 42))
@@ -28,7 +27,7 @@ def test_noconvert_args(msg):
         loading ArgInspector1 argument WITH conversion allowed.  Argument value = this is b
         42
         loading ArgInspector2 argument WITH conversion allowed.  Argument value = (default arg inspector 2)
-    """  # noqa: E501 line too long
+    """
     )
     assert (
         msg(a.g("this is a", "this is b", 42, "this is d"))
@@ -76,7 +75,7 @@ def test_noconvert_args(msg):
             1. (i: int) -> int
 
         Invoked with: 4.0
-    """  # noqa: E501 line too long
+    """
     )
 
     assert m.ints_only(4) == 2
@@ -115,3 +114,7 @@ def test_custom_caster_destruction():
 
     # Make sure we still only have the original object (from ..._no_destroy()) alive:
     assert cstats.alive() == 1
+
+
+def test_custom_caster_other_lib():
+    assert m.other_lib_type(True)
diff --git a/ext/pybind11/tests/test_custom_type_setup.py b/ext/pybind11/tests/test_custom_type_setup.py
index ef96f08141..19b44c9de2 100644
--- a/ext/pybind11/tests/test_custom_type_setup.py
+++ b/ext/pybind11/tests/test_custom_type_setup.py
@@ -1,5 +1,3 @@
-# -*- coding: utf-8 -*-
-
 import gc
 import weakref
 
diff --git a/ext/pybind11/tests/test_docstring_options.cpp b/ext/pybind11/tests/test_docstring_options.cpp
index 8a97af55fc..dda1cf6e41 100644
--- a/ext/pybind11/tests/test_docstring_options.cpp
+++ b/ext/pybind11/tests/test_docstring_options.cpp
@@ -15,35 +15,52 @@ TEST_SUBMODULE(docstring_options, m) {
         py::options options;
         options.disable_function_signatures();
 
-        m.def("test_function1", [](int, int) {}, py::arg("a"), py::arg("b"));
-        m.def("test_function2", [](int, int) {}, py::arg("a"), py::arg("b"), "A custom docstring");
+        m.def(
+            "test_function1", [](int, int) {}, py::arg("a"), py::arg("b"));
+        m.def(
+            "test_function2", [](int, int) {}, py::arg("a"), py::arg("b"), "A custom docstring");
 
-        m.def("test_overloaded1", [](int) {}, py::arg("i"), "Overload docstring");
-        m.def("test_overloaded1", [](double) {}, py::arg("d"));
+        m.def(
+            "test_overloaded1", [](int) {}, py::arg("i"), "Overload docstring");
+        m.def(
+            "test_overloaded1", [](double) {}, py::arg("d"));
 
-        m.def("test_overloaded2", [](int) {}, py::arg("i"), "overload docstring 1");
-        m.def("test_overloaded2", [](double) {}, py::arg("d"), "overload docstring 2");
+        m.def(
+            "test_overloaded2", [](int) {}, py::arg("i"), "overload docstring 1");
+        m.def(
+            "test_overloaded2", [](double) {}, py::arg("d"), "overload docstring 2");
 
-        m.def("test_overloaded3", [](int) {}, py::arg("i"));
-        m.def("test_overloaded3", [](double) {}, py::arg("d"), "Overload docstr");
+        m.def(
+            "test_overloaded3", [](int) {}, py::arg("i"));
+        m.def(
+            "test_overloaded3", [](double) {}, py::arg("d"), "Overload docstr");
 
         options.enable_function_signatures();
 
-        m.def("test_function3", [](int, int) {}, py::arg("a"), py::arg("b"));
-        m.def("test_function4", [](int, int) {}, py::arg("a"), py::arg("b"), "A custom docstring");
+        m.def(
+            "test_function3", [](int, int) {}, py::arg("a"), py::arg("b"));
+        m.def(
+            "test_function4", [](int, int) {}, py::arg("a"), py::arg("b"), "A custom docstring");
 
         options.disable_function_signatures().disable_user_defined_docstrings();
 
-        m.def("test_function5", [](int, int) {}, py::arg("a"), py::arg("b"), "A custom docstring");
+        m.def(
+            "test_function5", [](int, int) {}, py::arg("a"), py::arg("b"), "A custom docstring");
 
         {
             py::options nested_options;
             nested_options.enable_user_defined_docstrings();
-            m.def("test_function6", [](int, int) {}, py::arg("a"), py::arg("b"), "A custom docstring");
+            m.def(
+                "test_function6",
+                [](int, int) {},
+                py::arg("a"),
+                py::arg("b"),
+                "A custom docstring");
         }
     }
 
-    m.def("test_function7", [](int, int) {}, py::arg("a"), py::arg("b"), "A custom docstring");
+    m.def(
+        "test_function7", [](int, int) {}, py::arg("a"), py::arg("b"), "A custom docstring");
 
     {
         py::options options;
@@ -63,7 +80,62 @@ TEST_SUBMODULE(docstring_options, m) {
             int getValue() const { return value; }
         };
         py::class_<DocstringTestFoo>(m, "DocstringTestFoo", "This is a class docstring")
-            .def_property("value_prop", &DocstringTestFoo::getValue, &DocstringTestFoo::setValue, "This is a property docstring")
-        ;
+            .def_property("value_prop",
+                          &DocstringTestFoo::getValue,
+                          &DocstringTestFoo::setValue,
+                          "This is a property docstring");
+    }
+
+    {
+        enum class DocstringTestEnum1 { Member1, Member2 };
+
+        py::enum_<DocstringTestEnum1>(m, "DocstringTestEnum1", "Enum docstring")
+            .value("Member1", DocstringTestEnum1::Member1)
+            .value("Member2", DocstringTestEnum1::Member2);
+    }
+
+    {
+        py::options options;
+        options.enable_enum_members_docstring();
+
+        enum class DocstringTestEnum2 { Member1, Member2 };
+
+        py::enum_<DocstringTestEnum2>(m, "DocstringTestEnum2", "Enum docstring")
+            .value("Member1", DocstringTestEnum2::Member1)
+            .value("Member2", DocstringTestEnum2::Member2);
+    }
+
+    {
+        py::options options;
+        options.disable_enum_members_docstring();
+
+        enum class DocstringTestEnum3 { Member1, Member2 };
+
+        py::enum_<DocstringTestEnum3>(m, "DocstringTestEnum3", "Enum docstring")
+            .value("Member1", DocstringTestEnum3::Member1)
+            .value("Member2", DocstringTestEnum3::Member2);
+    }
+
+    {
+        py::options options;
+        options.disable_user_defined_docstrings();
+
+        enum class DocstringTestEnum4 { Member1, Member2 };
+
+        py::enum_<DocstringTestEnum4>(m, "DocstringTestEnum4", "Enum docstring")
+            .value("Member1", DocstringTestEnum4::Member1)
+            .value("Member2", DocstringTestEnum4::Member2);
+    }
+
+    {
+        py::options options;
+        options.disable_user_defined_docstrings();
+        options.disable_enum_members_docstring();
+
+        enum class DocstringTestEnum5 { Member1, Member2 };
+
+        py::enum_<DocstringTestEnum5>(m, "DocstringTestEnum5", "Enum docstring")
+            .value("Member1", DocstringTestEnum5::Member1)
+            .value("Member2", DocstringTestEnum5::Member2);
     }
 }
diff --git a/ext/pybind11/tests/test_docstring_options.py b/ext/pybind11/tests/test_docstring_options.py
index 8ee6613884..e6f5a9d987 100644
--- a/ext/pybind11/tests/test_docstring_options.py
+++ b/ext/pybind11/tests/test_docstring_options.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 from pybind11_tests import docstring_options as m
 
 
@@ -40,3 +39,26 @@ def test_docstring_options():
     # Suppression of user-defined docstrings for non-function objects
     assert not m.DocstringTestFoo.__doc__
     assert not m.DocstringTestFoo.value_prop.__doc__
+
+    # Check existig behaviour of enum docstings
+    assert (
+        m.DocstringTestEnum1.__doc__
+        == "Enum docstring\n\nMembers:\n\n  Member1\n\n  Member2"
+    )
+
+    # options.enable_enum_members_docstring()
+    assert (
+        m.DocstringTestEnum2.__doc__
+        == "Enum docstring\n\nMembers:\n\n  Member1\n\n  Member2"
+    )
+
+    # options.disable_enum_members_docstring()
+    assert m.DocstringTestEnum3.__doc__ == "Enum docstring"
+
+    # options.disable_user_defined_docstrings()
+    assert m.DocstringTestEnum4.__doc__ == "Members:\n\n  Member1\n\n  Member2"
+
+    # options.disable_user_defined_docstrings()
+    # options.disable_enum_members_docstring()
+    # When all options are disabled, no docstring (instead of an empty one) should be generated
+    assert m.DocstringTestEnum5.__doc__ is None
diff --git a/ext/pybind11/tests/test_eigen.cpp b/ext/pybind11/tests/test_eigen_matrix.cpp
similarity index 66%
rename from ext/pybind11/tests/test_eigen.cpp
rename to ext/pybind11/tests/test_eigen_matrix.cpp
index d22a94a1a1..554cc4d7f8 100644
--- a/ext/pybind11/tests/test_eigen.cpp
+++ b/ext/pybind11/tests/test_eigen_matrix.cpp
@@ -7,29 +7,27 @@
     BSD-style license that can be found in the LICENSE file.
 */
 
-#include "pybind11_tests.h"
-#include "constructor_stats.h"
-#include <pybind11/eigen.h>
+#include <pybind11/eigen/matrix.h>
 #include <pybind11/stl.h>
 
-#if defined(_MSC_VER)
-#if _MSC_VER < 1910  // VS 2015's MSVC
-#  pragma warning(disable: 4127) // C4127: conditional expression is constant
-#endif
-#  pragma warning(disable: 4996) // C4996: std::unary_negation is deprecated
-#endif
+#include "constructor_stats.h"
+#include "pybind11_tests.h"
+
+PYBIND11_WARNING_DISABLE_MSVC(4996)
 
 #include <Eigen/Cholesky>
 
 using MatrixXdR = Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>;
 
-
-
 // Sets/resets a testing reference matrix to have values of 10*r + c, where r and c are the
 // (1-based) row/column number.
-template <typename M> void reset_ref(M &x) {
-    for (int i = 0; i < x.rows(); i++) for (int j = 0; j < x.cols(); j++)
-        x(i, j) = 11 + 10*i + j;
+template <typename M>
+void reset_ref(M &x) {
+    for (int i = 0; i < x.rows(); i++) {
+        for (int j = 0; j < x.cols(); j++) {
+            x(i, j) = 11 + 10 * i + j;
+        }
+    }
 }
 
 // Returns a static, column-major matrix
@@ -61,11 +59,14 @@ double get_elem(const Eigen::Ref<const Eigen::MatrixXd> &m) { return m(2, 1); };
 
 // Returns a matrix with 10*r + 100*c added to each matrix element (to help test that the matrix
 // reference is referencing rows/columns correctly).
-template <typename MatrixArgType> Eigen::MatrixXd adjust_matrix(MatrixArgType m) {
+template <typename MatrixArgType>
+Eigen::MatrixXd adjust_matrix(MatrixArgType m) {
     Eigen::MatrixXd ret(m);
-    for (int c = 0; c < m.cols(); c++)
-        for (int r = 0; r < m.rows(); r++)
-            ret(r, c) += 10*r + 100*c;  // NOLINT(clang-analyzer-core.uninitialized.Assign)
+    for (int c = 0; c < m.cols(); c++) {
+        for (int r = 0; r < m.rows(); r++) {
+            ret(r, c) += 10 * r + 100 * c; // NOLINT(clang-analyzer-core.uninitialized.Assign)
+        }
+    }
     return ret;
 }
 
@@ -78,7 +79,7 @@ struct CustomOperatorNew {
     EIGEN_MAKE_ALIGNED_OPERATOR_NEW;
 };
 
-TEST_SUBMODULE(eigen, m) {
+TEST_SUBMODULE(eigen_matrix, m) {
     using FixedMatrixR = Eigen::Matrix<float, 5, 6, Eigen::RowMajor>;
     using FixedMatrixC = Eigen::Matrix<float, 5, 6>;
     using DenseMatrixR = Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>;
@@ -92,8 +93,10 @@ TEST_SUBMODULE(eigen, m) {
 
     // various tests
     m.def("double_col", [](const Eigen::VectorXf &x) -> Eigen::VectorXf { return 2.0f * x; });
-    m.def("double_row", [](const Eigen::RowVectorXf &x) -> Eigen::RowVectorXf { return 2.0f * x; });
-    m.def("double_complex", [](const Eigen::VectorXcf &x) -> Eigen::VectorXcf { return 2.0f * x; });
+    m.def("double_row",
+          [](const Eigen::RowVectorXf &x) -> Eigen::RowVectorXf { return 2.0f * x; });
+    m.def("double_complex",
+          [](const Eigen::VectorXcf &x) -> Eigen::VectorXcf { return 2.0f * x; });
     m.def("double_threec", [](py::EigenDRef<Eigen::Vector3f> x) { x *= 2; });
     m.def("double_threer", [](py::EigenDRef<Eigen::RowVector3f> x) { x *= 2; });
     m.def("double_mat_cm", [](const Eigen::MatrixXf &x) -> Eigen::MatrixXf { return 2.0f * x; });
@@ -103,19 +106,22 @@ TEST_SUBMODULE(eigen, m) {
     // Different ways of passing via Eigen::Ref; the first and second are the Eigen-recommended
     m.def("cholesky1",
           [](const Eigen::Ref<MatrixXdR> &x) -> Eigen::MatrixXd { return x.llt().matrixL(); });
-    m.def("cholesky2", [](const Eigen::Ref<const MatrixXdR> &x) -> Eigen::MatrixXd { return x.llt().matrixL(); });
-    m.def("cholesky3", [](const Eigen::Ref<MatrixXdR> &x) -> Eigen::MatrixXd { return x.llt().matrixL(); });
+    m.def("cholesky2", [](const Eigen::Ref<const MatrixXdR> &x) -> Eigen::MatrixXd {
+        return x.llt().matrixL();
+    });
+    m.def("cholesky3",
+          [](const Eigen::Ref<MatrixXdR> &x) -> Eigen::MatrixXd { return x.llt().matrixL(); });
     m.def("cholesky4", [](const Eigen::Ref<const MatrixXdR> &x) -> Eigen::MatrixXd {
         return x.llt().matrixL();
     });
 
     // test_eigen_ref_mutators
-    // Mutators: these add some value to the given element using Eigen, but Eigen should be mapping into
-    // the numpy array data and so the result should show up there.  There are three versions: one that
-    // works on a contiguous-row matrix (numpy's default), one for a contiguous-column matrix, and one
-    // for any matrix.
-    auto add_rm = [](Eigen::Ref<MatrixXdR> x, int r, int c, double v) { x(r,c) += v; };
-    auto add_cm = [](Eigen::Ref<Eigen::MatrixXd> x, int r, int c, double v) { x(r,c) += v; };
+    // Mutators: these add some value to the given element using Eigen, but Eigen should be mapping
+    // into the numpy array data and so the result should show up there.  There are three versions:
+    // one that works on a contiguous-row matrix (numpy's default), one for a contiguous-column
+    // matrix, and one for any matrix.
+    auto add_rm = [](Eigen::Ref<MatrixXdR> x, int r, int c, double v) { x(r, c) += v; };
+    auto add_cm = [](Eigen::Ref<Eigen::MatrixXd> x, int r, int c, double v) { x(r, c) += v; };
 
     // Mutators (Eigen maps into numpy variables):
     m.def("add_rm", add_rm); // Only takes row-contiguous
@@ -126,7 +132,8 @@ TEST_SUBMODULE(eigen, m) {
     m.def("add2", add_cm);
     m.def("add2", add_rm);
     // This one accepts a matrix of any stride:
-    m.def("add_any", [](py::EigenDRef<Eigen::MatrixXd> x, int r, int c, double v) { x(r,c) += v; });
+    m.def("add_any",
+          [](py::EigenDRef<Eigen::MatrixXd> x, int r, int c, double v) { x(r, c) += v; });
 
     // Return mutable references (numpy maps into eigen variables)
     m.def("get_cm_ref", []() { return Eigen::Ref<Eigen::MatrixXd>(get_cm()); });
@@ -138,45 +145,96 @@ TEST_SUBMODULE(eigen, m) {
     m.def("reset_refs", reset_refs); // Restores get_{cm,rm}_ref to original values
 
     // Increments and returns ref to (same) matrix
-    m.def("incr_matrix", [](Eigen::Ref<Eigen::MatrixXd> m, double v) {
-        m += Eigen::MatrixXd::Constant(m.rows(), m.cols(), v);
-        return m;
-    }, py::return_value_policy::reference);
+    m.def(
+        "incr_matrix",
+        [](Eigen::Ref<Eigen::MatrixXd> m, double v) {
+            m += Eigen::MatrixXd::Constant(m.rows(), m.cols(), v);
+            return m;
+        },
+        py::return_value_policy::reference);
 
     // Same, but accepts a matrix of any strides
-    m.def("incr_matrix_any", [](py::EigenDRef<Eigen::MatrixXd> m, double v) {
-        m += Eigen::MatrixXd::Constant(m.rows(), m.cols(), v);
-        return m;
-    }, py::return_value_policy::reference);
+    m.def(
+        "incr_matrix_any",
+        [](py::EigenDRef<Eigen::MatrixXd> m, double v) {
+            m += Eigen::MatrixXd::Constant(m.rows(), m.cols(), v);
+            return m;
+        },
+        py::return_value_policy::reference);
 
     // Returns an eigen slice of even rows
-    m.def("even_rows", [](py::EigenDRef<Eigen::MatrixXd> m) {
-        return py::EigenDMap<Eigen::MatrixXd>(
-                m.data(), (m.rows() + 1) / 2, m.cols(),
+    m.def(
+        "even_rows",
+        [](py::EigenDRef<Eigen::MatrixXd> m) {
+            return py::EigenDMap<Eigen::MatrixXd>(
+                m.data(),
+                (m.rows() + 1) / 2,
+                m.cols(),
                 py::EigenDStride(m.outerStride(), 2 * m.innerStride()));
-    }, py::return_value_policy::reference);
+        },
+        py::return_value_policy::reference);
 
     // Returns an eigen slice of even columns
-    m.def("even_cols", [](py::EigenDRef<Eigen::MatrixXd> m) {
-        return py::EigenDMap<Eigen::MatrixXd>(
-                m.data(), m.rows(), (m.cols() + 1) / 2,
+    m.def(
+        "even_cols",
+        [](py::EigenDRef<Eigen::MatrixXd> m) {
+            return py::EigenDMap<Eigen::MatrixXd>(
+                m.data(),
+                m.rows(),
+                (m.cols() + 1) / 2,
                 py::EigenDStride(2 * m.outerStride(), m.innerStride()));
-    }, py::return_value_policy::reference);
+        },
+        py::return_value_policy::reference);
 
     // Returns diagonals: a vector-like object with an inner stride != 1
     m.def("diagonal", [](const Eigen::Ref<const Eigen::MatrixXd> &x) { return x.diagonal(); });
-    m.def("diagonal_1", [](const Eigen::Ref<const Eigen::MatrixXd> &x) { return x.diagonal<1>(); });
-    m.def("diagonal_n", [](const Eigen::Ref<const Eigen::MatrixXd> &x, int index) { return x.diagonal(index); });
+    m.def("diagonal_1",
+          [](const Eigen::Ref<const Eigen::MatrixXd> &x) { return x.diagonal<1>(); });
+    m.def("diagonal_n",
+          [](const Eigen::Ref<const Eigen::MatrixXd> &x, int index) { return x.diagonal(index); });
 
     // Return a block of a matrix (gives non-standard strides)
-    m.def("block", [](const Eigen::Ref<const Eigen::MatrixXd> &x, int start_row, int start_col, int block_rows, int block_cols) {
-        return x.block(start_row, start_col, block_rows, block_cols);
-    });
+    m.def("block",
+          [m](const py::object &x_obj,
+              int start_row,
+              int start_col,
+              int block_rows,
+              int block_cols) {
+              return m.attr("_block")(x_obj, x_obj, start_row, start_col, block_rows, block_cols);
+          });
+
+    m.def(
+        "_block",
+        [](const py::object &x_obj,
+           const Eigen::Ref<const Eigen::MatrixXd> &x,
+           int start_row,
+           int start_col,
+           int block_rows,
+           int block_cols) {
+            // See PR #4217 for background. This test is a bit over the top, but might be useful
+            // as a concrete example to point to when explaining the dangling reference trap.
+            auto i0 = py::make_tuple(0, 0);
+            auto x0_orig = x_obj[*i0].cast<double>();
+            if (x(0, 0) != x0_orig) {
+                throw std::runtime_error(
+                    "Something in the type_caster for Eigen::Ref is terribly wrong.");
+            }
+            double x0_mod = x0_orig + 1;
+            x_obj[*i0] = x0_mod;
+            auto copy_detected = (x(0, 0) != x0_mod);
+            x_obj[*i0] = x0_orig;
+            if (copy_detected) {
+                throw std::runtime_error("type_caster for Eigen::Ref made a copy.");
+            }
+            return x.block(start_row, start_col, block_rows, block_cols);
+        },
+        py::keep_alive<0, 1>());
 
     // test_eigen_return_references, test_eigen_keepalive
     // return value referencing/copying tests:
     class ReturnTester {
         Eigen::MatrixXd mat = create();
+
     public:
         ReturnTester() { print_created(this); }
         ~ReturnTester() { print_destroyed(this); }
@@ -189,12 +247,24 @@ TEST_SUBMODULE(eigen, m) {
         const Eigen::MatrixXd *viewPtr() { return &mat; }
         Eigen::Ref<Eigen::MatrixXd> ref() { return mat; }
         Eigen::Ref<const Eigen::MatrixXd> refConst() { return mat; }
-        Eigen::Block<Eigen::MatrixXd> block(int r, int c, int nrow, int ncol) { return mat.block(r, c, nrow, ncol); }
-        Eigen::Block<const Eigen::MatrixXd> blockConst(int r, int c, int nrow, int ncol) const { return mat.block(r, c, nrow, ncol); }
-        py::EigenDMap<Eigen::Matrix2d> corners() { return py::EigenDMap<Eigen::Matrix2d>(mat.data(),
-                    py::EigenDStride(mat.outerStride() * (mat.outerSize()-1), mat.innerStride() * (mat.innerSize()-1))); }
-        py::EigenDMap<const Eigen::Matrix2d> cornersConst() const { return py::EigenDMap<const Eigen::Matrix2d>(mat.data(),
-                    py::EigenDStride(mat.outerStride() * (mat.outerSize()-1), mat.innerStride() * (mat.innerSize()-1))); }
+        Eigen::Block<Eigen::MatrixXd> block(int r, int c, int nrow, int ncol) {
+            return mat.block(r, c, nrow, ncol);
+        }
+        Eigen::Block<const Eigen::MatrixXd> blockConst(int r, int c, int nrow, int ncol) const {
+            return mat.block(r, c, nrow, ncol);
+        }
+        py::EigenDMap<Eigen::Matrix2d> corners() {
+            return py::EigenDMap<Eigen::Matrix2d>(
+                mat.data(),
+                py::EigenDStride(mat.outerStride() * (mat.outerSize() - 1),
+                                 mat.innerStride() * (mat.innerSize() - 1)));
+        }
+        py::EigenDMap<const Eigen::Matrix2d> cornersConst() const {
+            return py::EigenDMap<const Eigen::Matrix2d>(
+                mat.data(),
+                py::EigenDStride(mat.outerStride() * (mat.outerSize() - 1),
+                                 mat.innerStride() * (mat.innerSize() - 1)));
+        }
     };
     using rvp = py::return_value_policy;
     py::class_<ReturnTester>(m, "ReturnTester")
@@ -205,9 +275,9 @@ TEST_SUBMODULE(eigen, m) {
         .def("get_ptr", &ReturnTester::getPtr, rvp::reference_internal)
         .def("view", &ReturnTester::view, rvp::reference_internal)
         .def("view_ptr", &ReturnTester::view, rvp::reference_internal)
-        .def("copy_get", &ReturnTester::get)   // Default rvp: copy
-        .def("copy_view", &ReturnTester::view) //         "
-        .def("ref", &ReturnTester::ref) // Default for Ref is to reference
+        .def("copy_get", &ReturnTester::get)       // Default rvp: copy
+        .def("copy_view", &ReturnTester::view)     //         "
+        .def("ref", &ReturnTester::ref)            // Default for Ref is to reference
         .def("ref_const", &ReturnTester::refConst) // Likewise, but const
         .def("ref_safe", &ReturnTester::ref, rvp::reference_internal)
         .def("ref_const_safe", &ReturnTester::refConst, rvp::reference_internal)
@@ -218,33 +288,29 @@ TEST_SUBMODULE(eigen, m) {
         .def("block_const", &ReturnTester::blockConst, rvp::reference_internal)
         .def("copy_block", &ReturnTester::block, rvp::copy)
         .def("corners", &ReturnTester::corners, rvp::reference_internal)
-        .def("corners_const", &ReturnTester::cornersConst, rvp::reference_internal)
-        ;
+        .def("corners_const", &ReturnTester::cornersConst, rvp::reference_internal);
 
     // test_special_matrix_objects
     // Returns a DiagonalMatrix with diagonal (1,2,3,...)
     m.def("incr_diag", [](int k) {
         Eigen::DiagonalMatrix<int, Eigen::Dynamic> m(k);
-        for (int i = 0; i < k; i++) m.diagonal()[i] = i+1;
+        for (int i = 0; i < k; i++) {
+            m.diagonal()[i] = i + 1;
+        }
         return m;
     });
 
     // Returns a SelfAdjointView referencing the lower triangle of m
-    m.def("symmetric_lower", [](const Eigen::MatrixXi &m) {
-            return m.selfadjointView<Eigen::Lower>();
-    });
+    m.def("symmetric_lower",
+          [](const Eigen::MatrixXi &m) { return m.selfadjointView<Eigen::Lower>(); });
     // Returns a SelfAdjointView referencing the lower triangle of m
-    m.def("symmetric_upper", [](const Eigen::MatrixXi &m) {
-            return m.selfadjointView<Eigen::Upper>();
-    });
+    m.def("symmetric_upper",
+          [](const Eigen::MatrixXi &m) { return m.selfadjointView<Eigen::Upper>(); });
 
     // Test matrix for various functions below.
     Eigen::MatrixXf mat(5, 6);
-    mat << 0,  3,  0,  0,  0, 11,
-           22, 0,  0,  0, 17, 11,
-           7,  5,  0,  1,  0, 11,
-           0,  0,  0,  0,  0, 11,
-           0,  0, 14,  0,  8, 11;
+    mat << 0, 3, 0, 0, 0, 11, 22, 0, 0, 0, 17, 11, 7, 5, 0, 1, 0, 11, 0, 0, 0, 0, 0, 11, 0, 0, 14,
+        0, 8, 11;
 
     // test_fixed, and various other tests
     m.def("fixed_r", [mat]() -> FixedMatrixR { return FixedMatrixR(mat); });
@@ -269,7 +335,8 @@ TEST_SUBMODULE(eigen, m) {
         // NOLINTNEXTLINE(clang-analyzer-core.uninitialized.UndefReturn)
         return Eigen::SparseView<Eigen::MatrixXf>(mat);
     });
-    m.def("sparse_c", [mat]() -> SparseMatrixC { return Eigen::SparseView<Eigen::MatrixXf>(mat); });
+    m.def("sparse_c",
+          [mat]() -> SparseMatrixC { return Eigen::SparseView<Eigen::MatrixXf>(mat); });
     m.def("sparse_copy_r", [](const SparseMatrixR &m) -> SparseMatrixR { return m; });
     m.def("sparse_copy_c", [](const SparseMatrixC &m) -> SparseMatrixC { return m; });
     // test_partially_fixed
@@ -283,7 +350,8 @@ TEST_SUBMODULE(eigen, m) {
     m.def("cpp_copy", [](py::handle m) { return m.cast<Eigen::MatrixXd>()(1, 0); });
     m.def("cpp_ref_c", [](py::handle m) { return m.cast<Eigen::Ref<Eigen::MatrixXd>>()(1, 0); });
     m.def("cpp_ref_r", [](py::handle m) { return m.cast<Eigen::Ref<MatrixXdR>>()(1, 0); });
-    m.def("cpp_ref_any", [](py::handle m) { return m.cast<py::EigenDRef<Eigen::MatrixXd>>()(1, 0); });
+    m.def("cpp_ref_any",
+          [](py::handle m) { return m.cast<py::EigenDRef<Eigen::MatrixXd>>()(1, 0); });
 
     // [workaround(intel)] ICC 20/21 breaks with py::arg().stuff, using py::arg{}.stuff works.
 
@@ -297,15 +365,26 @@ TEST_SUBMODULE(eigen, m) {
         [](const Eigen::Ref<const Eigen::MatrixXd> &m) -> double { return get_elem(m); },
         py::arg{}.noconvert());
     // Also test a row-major-only no-copy const ref:
-    m.def("get_elem_rm_nocopy", [](Eigen::Ref<const Eigen::Matrix<long, -1, -1, Eigen::RowMajor>> &m) -> long { return m(2, 1); },
-            py::arg{}.noconvert());
+    m.def(
+        "get_elem_rm_nocopy",
+        [](Eigen::Ref<const Eigen::Matrix<long, -1, -1, Eigen::RowMajor>> &m) -> long {
+            return m(2, 1);
+        },
+        py::arg{}.noconvert());
 
-    // test_issue738
-    // Issue #738: 1xN or Nx1 2D matrices were neither accepted nor properly copied with an
+    // test_issue738, test_zero_length
+    // Issue #738: 1×N or N×1 2D matrices were neither accepted nor properly copied with an
     // incompatible stride value on the length-1 dimension--but that should be allowed (without
     // requiring a copy!) because the stride value can be safely ignored on a size-1 dimension.
-    m.def("iss738_f1", &adjust_matrix<const Eigen::Ref<const Eigen::MatrixXd> &>, py::arg{}.noconvert());
-    m.def("iss738_f2", &adjust_matrix<const Eigen::Ref<const Eigen::Matrix<double, -1, -1, Eigen::RowMajor>> &>, py::arg{}.noconvert());
+    // Similarly, 0×N or N×0 matrices were not accepted--again, these should be allowed since
+    // they contain no data. This particularly affects numpy ≥ 1.23, which sets the strides to
+    // 0 if any dimension size is 0.
+    m.def("iss738_f1",
+          &adjust_matrix<const Eigen::Ref<const Eigen::MatrixXd> &>,
+          py::arg{}.noconvert());
+    m.def("iss738_f2",
+          &adjust_matrix<const Eigen::Ref<const Eigen::Matrix<double, -1, -1, Eigen::RowMajor>> &>,
+          py::arg{}.noconvert());
 
     // test_issue1105
     // Issue #1105: when converting from a numpy two-dimensional (Nx1) or (1xN) value into a dense
@@ -320,8 +399,9 @@ TEST_SUBMODULE(eigen, m) {
         "matrix_multiply",
         [](const py::EigenDRef<const Eigen::MatrixXd> &A,
            const py::EigenDRef<const Eigen::MatrixXd> &B) -> Eigen::MatrixXd {
-            if (A.cols() != B.rows())
+            if (A.cols() != B.rows()) {
                 throw std::domain_error("Nonconformable matrices!");
+            }
             return A * B;
         },
         py::arg("A"),
diff --git a/ext/pybind11/tests/test_eigen.py b/ext/pybind11/tests/test_eigen_matrix.py
similarity index 90%
rename from ext/pybind11/tests/test_eigen.py
rename to ext/pybind11/tests/test_eigen_matrix.py
index e53826cbbb..4407fa6aee 100644
--- a/ext/pybind11/tests/test_eigen.py
+++ b/ext/pybind11/tests/test_eigen_matrix.py
@@ -1,10 +1,9 @@
-# -*- coding: utf-8 -*-
 import pytest
 
 from pybind11_tests import ConstructorStats
 
 np = pytest.importorskip("numpy")
-m = pytest.importorskip("pybind11_tests.eigen")
+m = pytest.importorskip("pybind11_tests.eigen_matrix")
 
 
 ref = np.array(
@@ -201,7 +200,7 @@ def test_negative_stride_from_python(msg):
         double_threer(): incompatible function arguments. The following argument types are supported:
             1. (arg0: numpy.ndarray[numpy.float32[1, 3], flags.writeable]) -> None
 
-        Invoked with: """  # noqa: E501 line too long
+        Invoked with: """
         + repr(np.array([5.0, 4.0, 3.0], dtype="float32"))
     )
 
@@ -213,22 +212,29 @@ def test_negative_stride_from_python(msg):
         double_threec(): incompatible function arguments. The following argument types are supported:
             1. (arg0: numpy.ndarray[numpy.float32[3, 1], flags.writeable]) -> None
 
-        Invoked with: """  # noqa: E501 line too long
+        Invoked with: """
         + repr(np.array([7.0, 4.0, 1.0], dtype="float32"))
     )
 
 
+def test_block_runtime_error_type_caster_eigen_ref_made_a_copy():
+    with pytest.raises(RuntimeError) as excinfo:
+        m.block(ref, 0, 0, 0, 0)
+    assert str(excinfo.value) == "type_caster for Eigen::Ref made a copy."
+
+
 def test_nonunit_stride_to_python():
     assert np.all(m.diagonal(ref) == ref.diagonal())
     assert np.all(m.diagonal_1(ref) == ref.diagonal(1))
     for i in range(-5, 7):
-        assert np.all(
-            m.diagonal_n(ref, i) == ref.diagonal(i)
-        ), "m.diagonal_n({})".format(i)
+        assert np.all(m.diagonal_n(ref, i) == ref.diagonal(i)), f"m.diagonal_n({i})"
 
-    assert np.all(m.block(ref, 2, 1, 3, 3) == ref[2:5, 1:4])
-    assert np.all(m.block(ref, 1, 4, 4, 2) == ref[1:, 4:])
-    assert np.all(m.block(ref, 1, 4, 3, 2) == ref[1:4, 4:])
+    # Must be order="F", otherwise the type_caster will make a copy and
+    # m.block() will return a dangling reference (heap-use-after-free).
+    rof = np.asarray(ref, order="F")
+    assert np.all(m.block(rof, 2, 1, 3, 3) == rof[2:5, 1:4])
+    assert np.all(m.block(rof, 1, 4, 4, 2) == rof[1:, 4:])
+    assert np.all(m.block(rof, 1, 4, 3, 2) == rof[1:4, 4:])
 
 
 def test_eigen_ref_to_python():
@@ -237,7 +243,7 @@ def test_eigen_ref_to_python():
         mymat = chol(np.array([[1.0, 2, 4], [2, 13, 23], [4, 23, 77]]))
         assert np.all(
             mymat == np.array([[1, 0, 0], [2, 3, 0], [4, 5, 6]])
-        ), "cholesky{}".format(i)
+        ), f"cholesky{i}"
 
 
 def assign_both(a1, a2, r, c, v):
@@ -254,14 +260,14 @@ def array_copy_but_one(a, r, c, v):
 def test_eigen_return_references():
     """Tests various ways of returning references and non-referencing copies"""
 
-    master = np.ones((10, 10))
+    primary = np.ones((10, 10))
     a = m.ReturnTester()
     a_get1 = a.get()
     assert not a_get1.flags.owndata and a_get1.flags.writeable
-    assign_both(a_get1, master, 3, 3, 5)
+    assign_both(a_get1, primary, 3, 3, 5)
     a_get2 = a.get_ptr()
     assert not a_get2.flags.owndata and a_get2.flags.writeable
-    assign_both(a_get1, master, 2, 3, 6)
+    assign_both(a_get1, primary, 2, 3, 6)
 
     a_view1 = a.view()
     assert not a_view1.flags.owndata and not a_view1.flags.writeable
@@ -274,25 +280,25 @@ def test_eigen_return_references():
 
     a_copy1 = a.copy_get()
     assert a_copy1.flags.owndata and a_copy1.flags.writeable
-    np.testing.assert_array_equal(a_copy1, master)
+    np.testing.assert_array_equal(a_copy1, primary)
     a_copy1[7, 7] = -44  # Shouldn't affect anything else
-    c1want = array_copy_but_one(master, 7, 7, -44)
+    c1want = array_copy_but_one(primary, 7, 7, -44)
     a_copy2 = a.copy_view()
     assert a_copy2.flags.owndata and a_copy2.flags.writeable
-    np.testing.assert_array_equal(a_copy2, master)
+    np.testing.assert_array_equal(a_copy2, primary)
     a_copy2[4, 4] = -22  # Shouldn't affect anything else
-    c2want = array_copy_but_one(master, 4, 4, -22)
+    c2want = array_copy_but_one(primary, 4, 4, -22)
 
     a_ref1 = a.ref()
     assert not a_ref1.flags.owndata and a_ref1.flags.writeable
-    assign_both(a_ref1, master, 1, 1, 15)
+    assign_both(a_ref1, primary, 1, 1, 15)
     a_ref2 = a.ref_const()
     assert not a_ref2.flags.owndata and not a_ref2.flags.writeable
     with pytest.raises(ValueError):
         a_ref2[5, 5] = 33
     a_ref3 = a.ref_safe()
     assert not a_ref3.flags.owndata and a_ref3.flags.writeable
-    assign_both(a_ref3, master, 0, 7, 99)
+    assign_both(a_ref3, primary, 0, 7, 99)
     a_ref4 = a.ref_const_safe()
     assert not a_ref4.flags.owndata and not a_ref4.flags.writeable
     with pytest.raises(ValueError):
@@ -300,23 +306,23 @@ def test_eigen_return_references():
 
     a_copy3 = a.copy_ref()
     assert a_copy3.flags.owndata and a_copy3.flags.writeable
-    np.testing.assert_array_equal(a_copy3, master)
+    np.testing.assert_array_equal(a_copy3, primary)
     a_copy3[8, 1] = 11
-    c3want = array_copy_but_one(master, 8, 1, 11)
+    c3want = array_copy_but_one(primary, 8, 1, 11)
     a_copy4 = a.copy_ref_const()
     assert a_copy4.flags.owndata and a_copy4.flags.writeable
-    np.testing.assert_array_equal(a_copy4, master)
+    np.testing.assert_array_equal(a_copy4, primary)
     a_copy4[8, 4] = 88
-    c4want = array_copy_but_one(master, 8, 4, 88)
+    c4want = array_copy_but_one(primary, 8, 4, 88)
 
     a_block1 = a.block(3, 3, 2, 2)
     assert not a_block1.flags.owndata and a_block1.flags.writeable
     a_block1[0, 0] = 55
-    master[3, 3] = 55
+    primary[3, 3] = 55
     a_block2 = a.block_safe(2, 2, 3, 2)
     assert not a_block2.flags.owndata and a_block2.flags.writeable
     a_block2[2, 1] = -123
-    master[4, 3] = -123
+    primary[4, 3] = -123
     a_block3 = a.block_const(6, 7, 4, 3)
     assert not a_block3.flags.owndata and not a_block3.flags.writeable
     with pytest.raises(ValueError):
@@ -324,18 +330,18 @@ def test_eigen_return_references():
 
     a_copy5 = a.copy_block(2, 2, 2, 3)
     assert a_copy5.flags.owndata and a_copy5.flags.writeable
-    np.testing.assert_array_equal(a_copy5, master[2:4, 2:5])
+    np.testing.assert_array_equal(a_copy5, primary[2:4, 2:5])
     a_copy5[1, 1] = 777
-    c5want = array_copy_but_one(master[2:4, 2:5], 1, 1, 777)
+    c5want = array_copy_but_one(primary[2:4, 2:5], 1, 1, 777)
 
     a_corn1 = a.corners()
     assert not a_corn1.flags.owndata and a_corn1.flags.writeable
     a_corn1 *= 50
     a_corn1[1, 1] = 999
-    master[0, 0] = 50
-    master[0, 9] = 50
-    master[9, 0] = 50
-    master[9, 9] = 999
+    primary[0, 0] = 50
+    primary[0, 9] = 50
+    primary[9, 0] = 50
+    primary[9, 9] = 999
     a_corn2 = a.corners_const()
     assert not a_corn2.flags.owndata and not a_corn2.flags.writeable
     with pytest.raises(ValueError):
@@ -343,22 +349,22 @@ def test_eigen_return_references():
 
     # All of the changes made all the way along should be visible everywhere
     # now (except for the copies, of course)
-    np.testing.assert_array_equal(a_get1, master)
-    np.testing.assert_array_equal(a_get2, master)
-    np.testing.assert_array_equal(a_view1, master)
-    np.testing.assert_array_equal(a_view2, master)
-    np.testing.assert_array_equal(a_ref1, master)
-    np.testing.assert_array_equal(a_ref2, master)
-    np.testing.assert_array_equal(a_ref3, master)
-    np.testing.assert_array_equal(a_ref4, master)
-    np.testing.assert_array_equal(a_block1, master[3:5, 3:5])
-    np.testing.assert_array_equal(a_block2, master[2:5, 2:4])
-    np.testing.assert_array_equal(a_block3, master[6:10, 7:10])
+    np.testing.assert_array_equal(a_get1, primary)
+    np.testing.assert_array_equal(a_get2, primary)
+    np.testing.assert_array_equal(a_view1, primary)
+    np.testing.assert_array_equal(a_view2, primary)
+    np.testing.assert_array_equal(a_ref1, primary)
+    np.testing.assert_array_equal(a_ref2, primary)
+    np.testing.assert_array_equal(a_ref3, primary)
+    np.testing.assert_array_equal(a_ref4, primary)
+    np.testing.assert_array_equal(a_block1, primary[3:5, 3:5])
+    np.testing.assert_array_equal(a_block2, primary[2:5, 2:4])
+    np.testing.assert_array_equal(a_block3, primary[6:10, 7:10])
     np.testing.assert_array_equal(
-        a_corn1, master[0 :: master.shape[0] - 1, 0 :: master.shape[1] - 1]
+        a_corn1, primary[0 :: primary.shape[0] - 1, 0 :: primary.shape[1] - 1]
     )
     np.testing.assert_array_equal(
-        a_corn2, master[0 :: master.shape[0] - 1, 0 :: master.shape[1] - 1]
+        a_corn2, primary[0 :: primary.shape[0] - 1, 0 :: primary.shape[1] - 1]
     )
 
     np.testing.assert_array_equal(a_copy1, c1want)
@@ -724,13 +730,13 @@ def test_sparse_signature(doc):
         doc(m.sparse_copy_r)
         == """
         sparse_copy_r(arg0: scipy.sparse.csr_matrix[numpy.float32]) -> scipy.sparse.csr_matrix[numpy.float32]
-    """  # noqa: E501 line too long
+    """
     )
     assert (
         doc(m.sparse_copy_c)
         == """
         sparse_copy_c(arg0: scipy.sparse.csc_matrix[numpy.float32]) -> scipy.sparse.csc_matrix[numpy.float32]
-    """  # noqa: E501 line too long
+    """
     )
 
 
@@ -747,6 +753,13 @@ def test_issue738():
     )
 
 
+@pytest.mark.parametrize("func", [m.iss738_f1, m.iss738_f2])
+@pytest.mark.parametrize("sizes", [(0, 2), (2, 0)])
+def test_zero_length(func, sizes):
+    """Ignore strides on a length-0 dimension (even if they would be incompatible length > 1)"""
+    assert np.all(func(np.zeros(sizes)) == np.zeros(sizes))
+
+
 def test_issue1105():
     """Issue 1105: 1xN or Nx1 input arrays weren't accepted for eigen
     compile-time row vectors or column vector"""
diff --git a/ext/pybind11/tests/test_eigen_tensor.cpp b/ext/pybind11/tests/test_eigen_tensor.cpp
new file mode 100644
index 0000000000..503c69c7d3
--- /dev/null
+++ b/ext/pybind11/tests/test_eigen_tensor.cpp
@@ -0,0 +1,18 @@
+/*
+    tests/eigen_tensor.cpp -- automatic conversion of Eigen Tensor
+
+    All rights reserved. Use of this source code is governed by a
+    BSD-style license that can be found in the LICENSE file.
+*/
+
+#define PYBIND11_TEST_EIGEN_TENSOR_NAMESPACE eigen_tensor
+
+#ifdef EIGEN_AVOID_STL_ARRAY
+#    undef EIGEN_AVOID_STL_ARRAY
+#endif
+
+#include "test_eigen_tensor.inl"
+
+#include "pybind11_tests.h"
+
+test_initializer egien_tensor("eigen_tensor", eigen_tensor_test::test_module);
diff --git a/ext/pybind11/tests/test_eigen_tensor.inl b/ext/pybind11/tests/test_eigen_tensor.inl
new file mode 100644
index 0000000000..d864ce7379
--- /dev/null
+++ b/ext/pybind11/tests/test_eigen_tensor.inl
@@ -0,0 +1,333 @@
+/*
+    tests/eigen_tensor.cpp -- automatic conversion of Eigen Tensor
+
+    All rights reserved. Use of this source code is governed by a
+    BSD-style license that can be found in the LICENSE file.
+*/
+
+#include <pybind11/eigen/tensor.h>
+
+PYBIND11_NAMESPACE_BEGIN(eigen_tensor_test)
+
+namespace py = pybind11;
+
+PYBIND11_WARNING_DISABLE_MSVC(4127)
+
+template <typename M>
+void reset_tensor(M &x) {
+    for (int i = 0; i < x.dimension(0); i++) {
+        for (int j = 0; j < x.dimension(1); j++) {
+            for (int k = 0; k < x.dimension(2); k++) {
+                x(i, j, k) = i * (5 * 2) + j * 2 + k;
+            }
+        }
+    }
+}
+
+template <typename M>
+bool check_tensor(M &x) {
+    for (int i = 0; i < x.dimension(0); i++) {
+        for (int j = 0; j < x.dimension(1); j++) {
+            for (int k = 0; k < x.dimension(2); k++) {
+                if (x(i, j, k) != (i * (5 * 2) + j * 2 + k)) {
+                    return false;
+                }
+            }
+        }
+    }
+    return true;
+}
+
+template <int Options>
+Eigen::Tensor<double, 3, Options> &get_tensor() {
+    static Eigen::Tensor<double, 3, Options> *x;
+
+    if (!x) {
+        x = new Eigen::Tensor<double, 3, Options>(3, 5, 2);
+        reset_tensor(*x);
+    }
+
+    return *x;
+}
+
+template <int Options>
+Eigen::TensorMap<Eigen::Tensor<double, 3, Options>> &get_tensor_map() {
+    static Eigen::TensorMap<Eigen::Tensor<double, 3, Options>> *x;
+
+    if (!x) {
+        x = new Eigen::TensorMap<Eigen::Tensor<double, 3, Options>>(get_tensor<Options>());
+    }
+
+    return *x;
+}
+
+template <int Options>
+Eigen::TensorFixedSize<double, Eigen::Sizes<3, 5, 2>, Options> &get_fixed_tensor() {
+    static Eigen::TensorFixedSize<double, Eigen::Sizes<3, 5, 2>, Options> *x;
+
+    if (!x) {
+        Eigen::aligned_allocator<Eigen::TensorFixedSize<double, Eigen::Sizes<3, 5, 2>, Options>>
+            allocator;
+        x = new (allocator.allocate(1))
+            Eigen::TensorFixedSize<double, Eigen::Sizes<3, 5, 2>, Options>();
+        reset_tensor(*x);
+    }
+
+    return *x;
+}
+
+template <int Options>
+const Eigen::Tensor<double, 3, Options> &get_const_tensor() {
+    return get_tensor<Options>();
+}
+
+template <int Options>
+struct CustomExample {
+    CustomExample() : member(get_tensor<Options>()), view_member(member) {}
+
+    Eigen::Tensor<double, 3, Options> member;
+    Eigen::TensorMap<Eigen::Tensor<double, 3, Options>> view_member;
+};
+
+template <int Options>
+void init_tensor_module(pybind11::module &m) {
+    const char *needed_options = "";
+    if (Options == Eigen::ColMajor) {
+        needed_options = "F";
+    } else {
+        needed_options = "C";
+    }
+    m.attr("needed_options") = needed_options;
+
+    m.def("setup", []() {
+        reset_tensor(get_tensor<Options>());
+        reset_tensor(get_fixed_tensor<Options>());
+    });
+
+    m.def("is_ok", []() {
+        return check_tensor(get_tensor<Options>()) && check_tensor(get_fixed_tensor<Options>());
+    });
+
+    py::class_<CustomExample<Options>>(m, "CustomExample", py::module_local())
+        .def(py::init<>())
+        .def_readonly(
+            "member", &CustomExample<Options>::member, py::return_value_policy::reference_internal)
+        .def_readonly("member_view",
+                      &CustomExample<Options>::view_member,
+                      py::return_value_policy::reference_internal);
+
+    m.def(
+        "copy_fixed_tensor",
+        []() { return &get_fixed_tensor<Options>(); },
+        py::return_value_policy::copy);
+
+    m.def(
+        "copy_tensor", []() { return &get_tensor<Options>(); }, py::return_value_policy::copy);
+
+    m.def(
+        "copy_const_tensor",
+        []() { return &get_const_tensor<Options>(); },
+        py::return_value_policy::copy);
+
+    m.def(
+        "move_fixed_tensor_copy",
+        []() -> Eigen::TensorFixedSize<double, Eigen::Sizes<3, 5, 2>, Options> {
+            return get_fixed_tensor<Options>();
+        },
+        py::return_value_policy::move);
+
+    m.def(
+        "move_tensor_copy",
+        []() -> Eigen::Tensor<double, 3, Options> { return get_tensor<Options>(); },
+        py::return_value_policy::move);
+
+    m.def(
+        "move_const_tensor",
+        []() -> const Eigen::Tensor<double, 3, Options> & { return get_const_tensor<Options>(); },
+        py::return_value_policy::move);
+
+    m.def(
+        "take_fixed_tensor",
+
+        []() {
+            Eigen::aligned_allocator<
+                Eigen::TensorFixedSize<double, Eigen::Sizes<3, 5, 2>, Options>>
+                allocator;
+            return new (allocator.allocate(1))
+                Eigen::TensorFixedSize<double, Eigen::Sizes<3, 5, 2>, Options>(
+                    get_fixed_tensor<Options>());
+        },
+        py::return_value_policy::take_ownership);
+
+    m.def(
+        "take_tensor",
+        []() { return new Eigen::Tensor<double, 3, Options>(get_tensor<Options>()); },
+        py::return_value_policy::take_ownership);
+
+    m.def(
+        "take_const_tensor",
+        []() -> const Eigen::Tensor<double, 3, Options> * {
+            return new Eigen::Tensor<double, 3, Options>(get_tensor<Options>());
+        },
+        py::return_value_policy::take_ownership);
+
+    m.def(
+        "take_view_tensor",
+        []() -> const Eigen::TensorMap<Eigen::Tensor<double, 3, Options>> * {
+            return new Eigen::TensorMap<Eigen::Tensor<double, 3, Options>>(get_tensor<Options>());
+        },
+        py::return_value_policy::take_ownership);
+
+    m.def(
+        "reference_tensor",
+        []() { return &get_tensor<Options>(); },
+        py::return_value_policy::reference);
+
+    m.def(
+        "reference_tensor_v2",
+        []() -> Eigen::Tensor<double, 3, Options> & { return get_tensor<Options>(); },
+        py::return_value_policy::reference);
+
+    m.def(
+        "reference_tensor_internal",
+        []() { return &get_tensor<Options>(); },
+        py::return_value_policy::reference_internal);
+
+    m.def(
+        "reference_fixed_tensor",
+        []() { return &get_tensor<Options>(); },
+        py::return_value_policy::reference);
+
+    m.def(
+        "reference_const_tensor",
+        []() { return &get_const_tensor<Options>(); },
+        py::return_value_policy::reference);
+
+    m.def(
+        "reference_const_tensor_v2",
+        []() -> const Eigen::Tensor<double, 3, Options> & { return get_const_tensor<Options>(); },
+        py::return_value_policy::reference);
+
+    m.def(
+        "reference_view_of_tensor",
+        []() -> Eigen::TensorMap<Eigen::Tensor<double, 3, Options>> {
+            return get_tensor_map<Options>();
+        },
+        py::return_value_policy::reference);
+
+    m.def(
+        "reference_view_of_tensor_v2",
+        // NOLINTNEXTLINE(readability-const-return-type)
+        []() -> const Eigen::TensorMap<Eigen::Tensor<double, 3, Options>> {
+            return get_tensor_map<Options>(); // NOLINT(readability-const-return-type)
+        },                                    // NOLINT(readability-const-return-type)
+        py::return_value_policy::reference);
+
+    m.def(
+        "reference_view_of_tensor_v3",
+        []() -> Eigen::TensorMap<Eigen::Tensor<double, 3, Options>> * {
+            return &get_tensor_map<Options>();
+        },
+        py::return_value_policy::reference);
+
+    m.def(
+        "reference_view_of_tensor_v4",
+        []() -> const Eigen::TensorMap<Eigen::Tensor<double, 3, Options>> * {
+            return &get_tensor_map<Options>();
+        },
+        py::return_value_policy::reference);
+
+    m.def(
+        "reference_view_of_tensor_v5",
+        []() -> Eigen::TensorMap<Eigen::Tensor<double, 3, Options>> & {
+            return get_tensor_map<Options>();
+        },
+        py::return_value_policy::reference);
+
+    m.def(
+        "reference_view_of_tensor_v6",
+        []() -> const Eigen::TensorMap<Eigen::Tensor<double, 3, Options>> & {
+            return get_tensor_map<Options>();
+        },
+        py::return_value_policy::reference);
+
+    m.def(
+        "reference_view_of_fixed_tensor",
+        []() {
+            return Eigen::TensorMap<
+                Eigen::TensorFixedSize<double, Eigen::Sizes<3, 5, 2>, Options>>(
+                get_fixed_tensor<Options>());
+        },
+        py::return_value_policy::reference);
+
+    m.def("round_trip_tensor",
+          [](const Eigen::Tensor<double, 3, Options> &tensor) { return tensor; });
+
+    m.def(
+        "round_trip_tensor_noconvert",
+        [](const Eigen::Tensor<double, 3, Options> &tensor) { return tensor; },
+        py::arg("tensor").noconvert());
+
+    m.def("round_trip_tensor2",
+          [](const Eigen::Tensor<int32_t, 3, Options> &tensor) { return tensor; });
+
+    m.def("round_trip_fixed_tensor",
+          [](const Eigen::TensorFixedSize<double, Eigen::Sizes<3, 5, 2>, Options> &tensor) {
+              return tensor;
+          });
+
+    m.def(
+        "round_trip_view_tensor",
+        [](Eigen::TensorMap<Eigen::Tensor<double, 3, Options>> view) { return view; },
+        py::return_value_policy::reference);
+
+    m.def(
+        "round_trip_view_tensor_ref",
+        [](Eigen::TensorMap<Eigen::Tensor<double, 3, Options>> &view) { return view; },
+        py::return_value_policy::reference);
+
+    m.def(
+        "round_trip_view_tensor_ptr",
+        [](Eigen::TensorMap<Eigen::Tensor<double, 3, Options>> *view) { return view; },
+        py::return_value_policy::reference);
+
+    m.def(
+        "round_trip_aligned_view_tensor",
+        [](Eigen::TensorMap<Eigen::Tensor<double, 3, Options>, Eigen::Aligned> view) {
+            return view;
+        },
+        py::return_value_policy::reference);
+
+    m.def(
+        "round_trip_const_view_tensor",
+        [](Eigen::TensorMap<const Eigen::Tensor<double, 3, Options>> view) {
+            return Eigen::Tensor<double, 3, Options>(view);
+        },
+        py::return_value_policy::move);
+
+    m.def(
+        "round_trip_rank_0",
+        [](const Eigen::Tensor<double, 0, Options> &tensor) { return tensor; },
+        py::return_value_policy::move);
+
+    m.def(
+        "round_trip_rank_0_noconvert",
+        [](const Eigen::Tensor<double, 0, Options> &tensor) { return tensor; },
+        py::arg("tensor").noconvert(),
+        py::return_value_policy::move);
+
+    m.def(
+        "round_trip_rank_0_view",
+        [](Eigen::TensorMap<Eigen::Tensor<double, 0, Options>> &tensor) { return tensor; },
+        py::return_value_policy::reference);
+}
+
+void test_module(py::module_ &m) {
+    auto f_style = m.def_submodule("f_style");
+    auto c_style = m.def_submodule("c_style");
+
+    init_tensor_module<Eigen::ColMajor>(f_style);
+    init_tensor_module<Eigen::RowMajor>(c_style);
+}
+
+PYBIND11_NAMESPACE_END(eigen_tensor_test)
diff --git a/ext/pybind11/tests/test_eigen_tensor.py b/ext/pybind11/tests/test_eigen_tensor.py
new file mode 100644
index 0000000000..dc8aa4643e
--- /dev/null
+++ b/ext/pybind11/tests/test_eigen_tensor.py
@@ -0,0 +1,296 @@
+import sys
+
+import pytest
+
+np = pytest.importorskip("numpy")
+eigen_tensor = pytest.importorskip("pybind11_tests.eigen_tensor")
+submodules = [eigen_tensor.c_style, eigen_tensor.f_style]
+try:
+    import eigen_tensor_avoid_stl_array as avoid
+
+    submodules += [avoid.c_style, avoid.f_style]
+except ImportError as e:
+    # Ensure config, build, toolchain, etc. issues are not masked here:
+    raise RuntimeError(
+        "import eigen_tensor_avoid_stl_array FAILED, while "
+        "import pybind11_tests.eigen_tensor succeeded. "
+        "Please ensure that "
+        "test_eigen_tensor.cpp & "
+        "eigen_tensor_avoid_stl_array.cpp "
+        "are built together (or both are not built if Eigen is not available)."
+    ) from e
+
+tensor_ref = np.empty((3, 5, 2), dtype=np.int64)
+
+for i in range(tensor_ref.shape[0]):
+    for j in range(tensor_ref.shape[1]):
+        for k in range(tensor_ref.shape[2]):
+            tensor_ref[i, j, k] = i * (5 * 2) + j * 2 + k
+
+indices = (2, 3, 1)
+
+
+@pytest.fixture(autouse=True)
+def cleanup():
+    for module in submodules:
+        module.setup()
+
+    yield
+
+    for module in submodules:
+        assert module.is_ok()
+
+
+def test_import_avoid_stl_array():
+    pytest.importorskip("eigen_tensor_avoid_stl_array")
+    assert len(submodules) == 4
+
+
+def assert_equal_tensor_ref(mat, writeable=True, modified=None):
+    assert mat.flags.writeable == writeable
+
+    copy = np.array(tensor_ref)
+    if modified is not None:
+        copy[indices] = modified
+
+    np.testing.assert_array_equal(mat, copy)
+
+
+@pytest.mark.parametrize("m", submodules)
+@pytest.mark.parametrize("member_name", ["member", "member_view"])
+def test_reference_internal(m, member_name):
+
+    if not hasattr(sys, "getrefcount"):
+        pytest.skip("No reference counting")
+    foo = m.CustomExample()
+    counts = sys.getrefcount(foo)
+    mem = getattr(foo, member_name)
+    assert_equal_tensor_ref(mem, writeable=False)
+    new_counts = sys.getrefcount(foo)
+    assert new_counts == counts + 1
+    assert_equal_tensor_ref(mem, writeable=False)
+    del mem
+    assert sys.getrefcount(foo) == counts
+
+
+assert_equal_funcs = [
+    "copy_tensor",
+    "copy_fixed_tensor",
+    "copy_const_tensor",
+    "move_tensor_copy",
+    "move_fixed_tensor_copy",
+    "take_tensor",
+    "take_fixed_tensor",
+    "reference_tensor",
+    "reference_tensor_v2",
+    "reference_fixed_tensor",
+    "reference_view_of_tensor",
+    "reference_view_of_tensor_v3",
+    "reference_view_of_tensor_v5",
+    "reference_view_of_fixed_tensor",
+]
+
+assert_equal_const_funcs = [
+    "reference_view_of_tensor_v2",
+    "reference_view_of_tensor_v4",
+    "reference_view_of_tensor_v6",
+    "reference_const_tensor",
+    "reference_const_tensor_v2",
+]
+
+
+@pytest.mark.parametrize("m", submodules)
+@pytest.mark.parametrize("func_name", assert_equal_funcs + assert_equal_const_funcs)
+def test_convert_tensor_to_py(m, func_name):
+    writeable = func_name in assert_equal_funcs
+    assert_equal_tensor_ref(getattr(m, func_name)(), writeable=writeable)
+
+
+@pytest.mark.parametrize("m", submodules)
+def test_bad_cpp_to_python_casts(m):
+
+    with pytest.raises(
+        RuntimeError, match="Cannot use reference internal when there is no parent"
+    ):
+        m.reference_tensor_internal()
+
+    with pytest.raises(RuntimeError, match="Cannot move from a constant reference"):
+        m.move_const_tensor()
+
+    with pytest.raises(
+        RuntimeError, match="Cannot take ownership of a const reference"
+    ):
+        m.take_const_tensor()
+
+    with pytest.raises(
+        RuntimeError,
+        match="Invalid return_value_policy for Eigen Map type, must be either reference or reference_internal",
+    ):
+        m.take_view_tensor()
+
+
+@pytest.mark.parametrize("m", submodules)
+def test_bad_python_to_cpp_casts(m):
+
+    with pytest.raises(
+        TypeError, match=r"^round_trip_tensor\(\): incompatible function arguments"
+    ):
+        m.round_trip_tensor(np.zeros((2, 3)))
+
+    with pytest.raises(TypeError, match=r"^Cannot cast array data from dtype"):
+        m.round_trip_tensor(np.zeros(dtype=np.str_, shape=(2, 3, 1)))
+
+    with pytest.raises(
+        TypeError,
+        match=r"^round_trip_tensor_noconvert\(\): incompatible function arguments",
+    ):
+        m.round_trip_tensor_noconvert(tensor_ref)
+
+    assert_equal_tensor_ref(
+        m.round_trip_tensor_noconvert(tensor_ref.astype(np.float64))
+    )
+
+    if m.needed_options == "F":
+        bad_options = "C"
+    else:
+        bad_options = "F"
+    # Shape, dtype and the order need to be correct for a TensorMap cast
+    with pytest.raises(
+        TypeError, match=r"^round_trip_view_tensor\(\): incompatible function arguments"
+    ):
+        m.round_trip_view_tensor(
+            np.zeros((3, 5, 2), dtype=np.float64, order=bad_options)
+        )
+
+    with pytest.raises(
+        TypeError, match=r"^round_trip_view_tensor\(\): incompatible function arguments"
+    ):
+        m.round_trip_view_tensor(
+            np.zeros((3, 5, 2), dtype=np.float32, order=m.needed_options)
+        )
+
+    with pytest.raises(
+        TypeError, match=r"^round_trip_view_tensor\(\): incompatible function arguments"
+    ):
+        m.round_trip_view_tensor(
+            np.zeros((3, 5), dtype=np.float64, order=m.needed_options)
+        )
+
+    with pytest.raises(
+        TypeError, match=r"^round_trip_view_tensor\(\): incompatible function arguments"
+    ):
+        temp = np.zeros((3, 5, 2), dtype=np.float64, order=m.needed_options)
+        m.round_trip_view_tensor(
+            temp[:, ::-1, :],
+        )
+
+    with pytest.raises(
+        TypeError, match=r"^round_trip_view_tensor\(\): incompatible function arguments"
+    ):
+        temp = np.zeros((3, 5, 2), dtype=np.float64, order=m.needed_options)
+        temp.setflags(write=False)
+        m.round_trip_view_tensor(temp)
+
+
+@pytest.mark.parametrize("m", submodules)
+def test_references_actually_refer(m):
+
+    a = m.reference_tensor()
+    temp = a[indices]
+    a[indices] = 100
+    assert_equal_tensor_ref(m.copy_const_tensor(), modified=100)
+    a[indices] = temp
+    assert_equal_tensor_ref(m.copy_const_tensor())
+
+    a = m.reference_view_of_tensor()
+    a[indices] = 100
+    assert_equal_tensor_ref(m.copy_const_tensor(), modified=100)
+    a[indices] = temp
+    assert_equal_tensor_ref(m.copy_const_tensor())
+
+
+@pytest.mark.parametrize("m", submodules)
+def test_round_trip(m):
+
+    assert_equal_tensor_ref(m.round_trip_tensor(tensor_ref))
+
+    with pytest.raises(TypeError, match="^Cannot cast array data from"):
+        assert_equal_tensor_ref(m.round_trip_tensor2(tensor_ref))
+
+    assert_equal_tensor_ref(m.round_trip_tensor2(np.array(tensor_ref, dtype=np.int32)))
+    assert_equal_tensor_ref(m.round_trip_fixed_tensor(tensor_ref))
+    assert_equal_tensor_ref(m.round_trip_aligned_view_tensor(m.reference_tensor()))
+
+    copy = np.array(tensor_ref, dtype=np.float64, order=m.needed_options)
+    assert_equal_tensor_ref(m.round_trip_view_tensor(copy))
+    assert_equal_tensor_ref(m.round_trip_view_tensor_ref(copy))
+    assert_equal_tensor_ref(m.round_trip_view_tensor_ptr(copy))
+    copy.setflags(write=False)
+    assert_equal_tensor_ref(m.round_trip_const_view_tensor(copy))
+
+    np.testing.assert_array_equal(
+        tensor_ref[:, ::-1, :], m.round_trip_tensor(tensor_ref[:, ::-1, :])
+    )
+
+    assert m.round_trip_rank_0(np.float64(3.5)) == 3.5
+    assert m.round_trip_rank_0(3.5) == 3.5
+
+    with pytest.raises(
+        TypeError,
+        match=r"^round_trip_rank_0_noconvert\(\): incompatible function arguments",
+    ):
+        m.round_trip_rank_0_noconvert(np.float64(3.5))
+
+    with pytest.raises(
+        TypeError,
+        match=r"^round_trip_rank_0_noconvert\(\): incompatible function arguments",
+    ):
+        m.round_trip_rank_0_noconvert(3.5)
+
+    with pytest.raises(
+        TypeError, match=r"^round_trip_rank_0_view\(\): incompatible function arguments"
+    ):
+        m.round_trip_rank_0_view(np.float64(3.5))
+
+    with pytest.raises(
+        TypeError, match=r"^round_trip_rank_0_view\(\): incompatible function arguments"
+    ):
+        m.round_trip_rank_0_view(3.5)
+
+
+@pytest.mark.parametrize("m", submodules)
+def test_round_trip_references_actually_refer(m):
+
+    # Need to create a copy that matches the type on the C side
+    copy = np.array(tensor_ref, dtype=np.float64, order=m.needed_options)
+    a = m.round_trip_view_tensor(copy)
+    temp = a[indices]
+    a[indices] = 100
+    assert_equal_tensor_ref(copy, modified=100)
+    a[indices] = temp
+    assert_equal_tensor_ref(copy)
+
+
+@pytest.mark.parametrize("m", submodules)
+def test_doc_string(m, doc):
+    assert (
+        doc(m.copy_tensor) == "copy_tensor() -> numpy.ndarray[numpy.float64[?, ?, ?]]"
+    )
+    assert (
+        doc(m.copy_fixed_tensor)
+        == "copy_fixed_tensor() -> numpy.ndarray[numpy.float64[3, 5, 2]]"
+    )
+    assert (
+        doc(m.reference_const_tensor)
+        == "reference_const_tensor() -> numpy.ndarray[numpy.float64[?, ?, ?]]"
+    )
+
+    order_flag = f"flags.{m.needed_options.lower()}_contiguous"
+    assert doc(m.round_trip_view_tensor) == (
+        f"round_trip_view_tensor(arg0: numpy.ndarray[numpy.float64[?, ?, ?], flags.writeable, {order_flag}])"
+        + f" -> numpy.ndarray[numpy.float64[?, ?, ?], flags.writeable, {order_flag}]"
+    )
+    assert doc(m.round_trip_const_view_tensor) == (
+        f"round_trip_const_view_tensor(arg0: numpy.ndarray[numpy.float64[?, ?, ?], {order_flag}])"
+        + " -> numpy.ndarray[numpy.float64[?, ?, ?]]"
+    )
diff --git a/ext/pybind11/tests/test_embed/CMakeLists.txt b/ext/pybind11/tests/test_embed/CMakeLists.txt
index 3b89d6e584..09a3693999 100644
--- a/ext/pybind11/tests/test_embed/CMakeLists.txt
+++ b/ext/pybind11/tests/test_embed/CMakeLists.txt
@@ -7,7 +7,7 @@ if("${PYTHON_MODULE_EXTENSION}" MATCHES "pypy" OR "${Python_INTERPRETER_ID}" STR
   return()
 endif()
 
-find_package(Catch 2.13.2)
+find_package(Catch 2.13.9)
 
 if(CATCH_FOUND)
   message(STATUS "Building interpreter tests using Catch v${CATCH_VERSION}")
@@ -25,7 +25,7 @@ pybind11_enable_warnings(test_embed)
 target_link_libraries(test_embed PRIVATE pybind11::embed Catch2::Catch2 Threads::Threads)
 
 if(NOT CMAKE_CURRENT_SOURCE_DIR STREQUAL CMAKE_CURRENT_BINARY_DIR)
-  file(COPY test_interpreter.py DESTINATION "${CMAKE_CURRENT_BINARY_DIR}")
+  file(COPY test_interpreter.py test_trampoline.py DESTINATION "${CMAKE_CURRENT_BINARY_DIR}")
 endif()
 
 add_custom_target(
diff --git a/ext/pybind11/tests/test_embed/catch.cpp b/ext/pybind11/tests/test_embed/catch.cpp
index dd137385cb..558a7a35e5 100644
--- a/ext/pybind11/tests/test_embed/catch.cpp
+++ b/ext/pybind11/tests/test_embed/catch.cpp
@@ -3,10 +3,13 @@
 
 #include <pybind11/embed.h>
 
-#ifdef _MSC_VER
-// Silence MSVC C++17 deprecation warning from Catch regarding std::uncaught_exceptions (up to catch
-// 2.0.1; this should be fixed in the next catch release after 2.0.1).
-#  pragma warning(disable: 4996)
+// Silence MSVC C++17 deprecation warning from Catch regarding std::uncaught_exceptions (up to
+// catch 2.0.1; this should be fixed in the next catch release after 2.0.1).
+PYBIND11_WARNING_DISABLE_MSVC(4996)
+
+// Catch uses _ internally, which breaks gettext style defines
+#ifdef _
+#    undef _
 #endif
 
 #define CATCH_CONFIG_RUNNER
@@ -15,7 +18,25 @@
 namespace py = pybind11;
 
 int main(int argc, char *argv[]) {
+    // Setup for TEST_CASE in test_interpreter.cpp, tagging on a large random number:
+    std::string updated_pythonpath("pybind11_test_embed_PYTHONPATH_2099743835476552");
+    const char *preexisting_pythonpath = getenv("PYTHONPATH");
+    if (preexisting_pythonpath != nullptr) {
+#if defined(_WIN32)
+        updated_pythonpath += ';';
+#else
+        updated_pythonpath += ':';
+#endif
+        updated_pythonpath += preexisting_pythonpath;
+    }
+#if defined(_WIN32)
+    _putenv_s("PYTHONPATH", updated_pythonpath.c_str());
+#else
+    setenv("PYTHONPATH", updated_pythonpath.c_str(), /*replace=*/1);
+#endif
+
     py::scoped_interpreter guard{};
+
     auto result = Catch::Session().run(argc, argv);
 
     return result < 0xff ? result : 0xff;
diff --git a/ext/pybind11/tests/test_embed/external_module.cpp b/ext/pybind11/tests/test_embed/external_module.cpp
index 4909522993..5c482fe061 100644
--- a/ext/pybind11/tests/test_embed/external_module.cpp
+++ b/ext/pybind11/tests/test_embed/external_module.cpp
@@ -13,11 +13,8 @@ PYBIND11_MODULE(external_module, m) {
         int v;
     };
 
-    py::class_<A>(m, "A")
-        .def(py::init<int>())
-        .def_readwrite("value", &A::v);
+    py::class_<A>(m, "A").def(py::init<int>()).def_readwrite("value", &A::v);
 
-    m.def("internals_at", []() {
-        return reinterpret_cast<uintptr_t>(&py::detail::get_internals());
-    });
+    m.def("internals_at",
+          []() { return reinterpret_cast<uintptr_t>(&py::detail::get_internals()); });
 }
diff --git a/ext/pybind11/tests/test_embed/test_interpreter.cpp b/ext/pybind11/tests/test_embed/test_interpreter.cpp
index 20bcade0ac..10b20f3715 100644
--- a/ext/pybind11/tests/test_embed/test_interpreter.cpp
+++ b/ext/pybind11/tests/test_embed/test_interpreter.cpp
@@ -1,13 +1,10 @@
 #include <pybind11/embed.h>
 
-#ifdef _MSC_VER
-// Silence MSVC C++17 deprecation warning from Catch regarding std::uncaught_exceptions (up to catch
-// 2.0.1; this should be fixed in the next catch release after 2.0.1).
-#  pragma warning(disable: 4996)
-#endif
+// Silence MSVC C++17 deprecation warning from Catch regarding std::uncaught_exceptions (up to
+// catch 2.0.1; this should be fixed in the next catch release after 2.0.1).
+PYBIND11_WARNING_DISABLE_MSVC(4996)
 
 #include <catch.hpp>
-
 #include <cstdlib>
 #include <fstream>
 #include <functional>
@@ -17,6 +14,11 @@
 namespace py = pybind11;
 using namespace py::literals;
 
+size_t get_sys_path_size() {
+    auto sys_path = py::module::import("sys").attr("path");
+    return py::len(sys_path);
+}
+
 class Widget {
 public:
     explicit Widget(std::string message) : message(std::move(message)) {}
@@ -37,6 +39,22 @@ class PyWidget final : public Widget {
     std::string argv0() const override { PYBIND11_OVERRIDE_PURE(std::string, Widget, argv0); }
 };
 
+class test_override_cache_helper {
+
+public:
+    virtual int func() { return 0; }
+
+    test_override_cache_helper() = default;
+    virtual ~test_override_cache_helper() = default;
+    // Non-copyable
+    test_override_cache_helper &operator=(test_override_cache_helper const &Right) = delete;
+    test_override_cache_helper(test_override_cache_helper const &Copy) = delete;
+};
+
+class test_override_cache_helper_trampoline : public test_override_cache_helper {
+    int func() override { PYBIND11_OVERRIDE(int, test_override_cache_helper, func); }
+};
+
 PYBIND11_EMBEDDED_MODULE(widget_module, m) {
     py::class_<Widget, PyWidget>(m, "Widget")
         .def(py::init<std::string>())
@@ -45,24 +63,39 @@ PYBIND11_EMBEDDED_MODULE(widget_module, m) {
     m.def("add", [](int i, int j) { return i + j; });
 }
 
-PYBIND11_EMBEDDED_MODULE(throw_exception, ) {
-    throw std::runtime_error("C++ Error");
+PYBIND11_EMBEDDED_MODULE(trampoline_module, m) {
+    py::class_<test_override_cache_helper,
+               test_override_cache_helper_trampoline,
+               std::shared_ptr<test_override_cache_helper>>(m, "test_override_cache_helper")
+        .def(py::init_alias<>())
+        .def("func", &test_override_cache_helper::func);
 }
 
+PYBIND11_EMBEDDED_MODULE(throw_exception, ) { throw std::runtime_error("C++ Error"); }
+
 PYBIND11_EMBEDDED_MODULE(throw_error_already_set, ) {
     auto d = py::dict();
     d["missing"].cast<py::object>();
 }
 
+TEST_CASE("PYTHONPATH is used to update sys.path") {
+    // The setup for this TEST_CASE is in catch.cpp!
+    auto sys_path = py::str(py::module_::import("sys").attr("path")).cast<std::string>();
+    REQUIRE_THAT(sys_path,
+                 Catch::Matchers::Contains("pybind11_test_embed_PYTHONPATH_2099743835476552"));
+}
+
 TEST_CASE("Pass classes and data between modules defined in C++ and Python") {
     auto module_ = py::module_::import("test_interpreter");
     REQUIRE(py::hasattr(module_, "DerivedWidget"));
 
-    auto locals = py::dict("hello"_a="Hello, World!", "x"_a=5, **module_.attr("__dict__"));
+    auto locals = py::dict("hello"_a = "Hello, World!", "x"_a = 5, **module_.attr("__dict__"));
     py::exec(R"(
         widget = DerivedWidget("{} - {}".format(hello, x))
         message = widget.the_message
-    )", py::globals(), locals);
+    )",
+             py::globals(),
+             locals);
     REQUIRE(locals["message"].cast<std::string>() == "Hello, World! - 5");
 
     auto py_widget = module_.attr("DerivedWidget")("The question");
@@ -73,28 +106,51 @@ TEST_CASE("Pass classes and data between modules defined in C++ and Python") {
     REQUIRE(cpp_widget.the_answer() == 42);
 }
 
+TEST_CASE("Override cache") {
+    auto module_ = py::module_::import("test_trampoline");
+    REQUIRE(py::hasattr(module_, "func"));
+    REQUIRE(py::hasattr(module_, "func2"));
+
+    auto locals = py::dict(**module_.attr("__dict__"));
+
+    int i = 0;
+    for (; i < 1500; ++i) {
+        std::shared_ptr<test_override_cache_helper> p_obj;
+        std::shared_ptr<test_override_cache_helper> p_obj2;
+
+        py::object loc_inst = locals["func"]();
+        p_obj = py::cast<std::shared_ptr<test_override_cache_helper>>(loc_inst);
+
+        int ret = p_obj->func();
+
+        REQUIRE(ret == 42);
+
+        loc_inst = locals["func2"]();
+
+        p_obj2 = py::cast<std::shared_ptr<test_override_cache_helper>>(loc_inst);
+
+        p_obj2->func();
+    }
+}
+
 TEST_CASE("Import error handling") {
     REQUIRE_NOTHROW(py::module_::import("widget_module"));
-    REQUIRE_THROWS_WITH(py::module_::import("throw_exception"),
-                        "ImportError: C++ Error");
-#if PY_VERSION_HEX >= 0x03030000
+    REQUIRE_THROWS_WITH(py::module_::import("throw_exception"), "ImportError: C++ Error");
     REQUIRE_THROWS_WITH(py::module_::import("throw_error_already_set"),
                         Catch::Contains("ImportError: initialization failed"));
 
-    auto locals = py::dict("is_keyerror"_a=false, "message"_a="not set");
+    auto locals = py::dict("is_keyerror"_a = false, "message"_a = "not set");
     py::exec(R"(
         try:
             import throw_error_already_set
         except ImportError as e:
             is_keyerror = type(e.__cause__) == KeyError
             message = str(e.__cause__)
-    )", py::globals(), locals);
+    )",
+             py::globals(),
+             locals);
     REQUIRE(locals["is_keyerror"].cast<bool>() == true);
     REQUIRE(locals["message"].cast<std::string>() == "'missing'");
-#else
-    REQUIRE_THROWS_WITH(py::module_::import("throw_error_already_set"),
-                        Catch::Contains("ImportError: KeyError"));
-#endif
 }
 
 TEST_CASE("There can be only one interpreter") {
@@ -115,6 +171,70 @@ TEST_CASE("There can be only one interpreter") {
     py::initialize_interpreter();
 }
 
+#if PY_VERSION_HEX >= PYBIND11_PYCONFIG_SUPPORT_PY_VERSION_HEX
+TEST_CASE("Custom PyConfig") {
+    py::finalize_interpreter();
+    PyConfig config;
+    PyConfig_InitPythonConfig(&config);
+    REQUIRE_NOTHROW(py::scoped_interpreter{&config});
+    {
+        py::scoped_interpreter p{&config};
+        REQUIRE(py::module_::import("widget_module").attr("add")(1, 41).cast<int>() == 42);
+    }
+    py::initialize_interpreter();
+}
+
+TEST_CASE("Custom PyConfig with argv") {
+    py::finalize_interpreter();
+    {
+        PyConfig config;
+        PyConfig_InitIsolatedConfig(&config);
+        char *argv[] = {strdup("a.out")};
+        py::scoped_interpreter argv_scope{&config, 1, argv};
+        std::free(argv[0]);
+        auto module = py::module::import("test_interpreter");
+        auto py_widget = module.attr("DerivedWidget")("The question");
+        const auto &cpp_widget = py_widget.cast<const Widget &>();
+        REQUIRE(cpp_widget.argv0() == "a.out");
+    }
+    py::initialize_interpreter();
+}
+#endif
+
+TEST_CASE("Add program dir to path pre-PyConfig") {
+    py::finalize_interpreter();
+    size_t path_size_add_program_dir_to_path_false = 0;
+    {
+        py::scoped_interpreter scoped_interp{true, 0, nullptr, false};
+        path_size_add_program_dir_to_path_false = get_sys_path_size();
+    }
+    {
+        py::scoped_interpreter scoped_interp{};
+        REQUIRE(get_sys_path_size() == path_size_add_program_dir_to_path_false + 1);
+    }
+    py::initialize_interpreter();
+}
+
+#if PY_VERSION_HEX >= PYBIND11_PYCONFIG_SUPPORT_PY_VERSION_HEX
+TEST_CASE("Add program dir to path using PyConfig") {
+    py::finalize_interpreter();
+    size_t path_size_add_program_dir_to_path_false = 0;
+    {
+        PyConfig config;
+        PyConfig_InitPythonConfig(&config);
+        py::scoped_interpreter scoped_interp{&config, 0, nullptr, false};
+        path_size_add_program_dir_to_path_false = get_sys_path_size();
+    }
+    {
+        PyConfig config;
+        PyConfig_InitPythonConfig(&config);
+        py::scoped_interpreter scoped_interp{&config};
+        REQUIRE(get_sys_path_size() == path_size_add_program_dir_to_path_false + 1);
+    }
+    py::initialize_interpreter();
+}
+#endif
+
 bool has_pybind11_internals_builtin() {
     auto builtins = py::handle(PyEval_GetBuiltins());
     return builtins.contains(PYBIND11_INTERNALS_ID);
@@ -130,11 +250,12 @@ TEST_CASE("Restart the interpreter") {
     REQUIRE(py::module_::import("widget_module").attr("add")(1, 2).cast<int>() == 3);
     REQUIRE(has_pybind11_internals_builtin());
     REQUIRE(has_pybind11_internals_static());
-    REQUIRE(py::module_::import("external_module").attr("A")(123).attr("value").cast<int>() == 123);
+    REQUIRE(py::module_::import("external_module").attr("A")(123).attr("value").cast<int>()
+            == 123);
 
     // local and foreign module internals should point to the same internals:
-    REQUIRE(reinterpret_cast<uintptr_t>(*py::detail::get_internals_pp()) ==
-            py::module_::import("external_module").attr("internals_at")().cast<uintptr_t>());
+    REQUIRE(reinterpret_cast<uintptr_t>(*py::detail::get_internals_pp())
+            == py::module_::import("external_module").attr("internals_at")().cast<uintptr_t>());
 
     // Restart the interpreter.
     py::finalize_interpreter();
@@ -149,16 +270,19 @@ TEST_CASE("Restart the interpreter") {
     pybind11::detail::get_internals();
     REQUIRE(has_pybind11_internals_builtin());
     REQUIRE(has_pybind11_internals_static());
-    REQUIRE(reinterpret_cast<uintptr_t>(*py::detail::get_internals_pp()) ==
-            py::module_::import("external_module").attr("internals_at")().cast<uintptr_t>());
+    REQUIRE(reinterpret_cast<uintptr_t>(*py::detail::get_internals_pp())
+            == py::module_::import("external_module").attr("internals_at")().cast<uintptr_t>());
 
     // Make sure that an interpreter with no get_internals() created until finalize still gets the
     // internals destroyed
     py::finalize_interpreter();
     py::initialize_interpreter();
     bool ran = false;
-    py::module_::import("__main__").attr("internals_destroy_test") =
-        py::capsule(&ran, [](void *ran) { py::detail::get_internals(); *static_cast<bool *>(ran) = true; });
+    py::module_::import("__main__").attr("internals_destroy_test")
+        = py::capsule(&ran, [](void *ran) {
+              py::detail::get_internals();
+              *static_cast<bool *>(ran) = true;
+          });
     REQUIRE_FALSE(has_pybind11_internals_builtin());
     REQUIRE_FALSE(has_pybind11_internals_static());
     REQUIRE_FALSE(ran);
@@ -191,8 +315,8 @@ TEST_CASE("Subinterpreter") {
     REQUIRE(has_pybind11_internals_static());
 
     /// Create and switch to a subinterpreter.
-    auto main_tstate = PyThreadState_Get();
-    auto sub_tstate = Py_NewInterpreter();
+    auto *main_tstate = PyThreadState_Get();
+    auto *sub_tstate = Py_NewInterpreter();
 
     // Subinterpreters get their own copy of builtins. detail::get_internals() still
     // works by returning from the static variable, i.e. all interpreters share a single
@@ -232,11 +356,10 @@ TEST_CASE("Threads") {
     REQUIRE_FALSE(has_pybind11_internals_static());
 
     constexpr auto num_threads = 10;
-    auto locals = py::dict("count"_a=0);
+    auto locals = py::dict("count"_a = 0);
 
     {
         py::gil_scoped_release gil_release{};
-        REQUIRE(has_pybind11_internals_static());
 
         auto threads = std::vector<std::thread>();
         for (auto i = 0; i < num_threads; ++i) {
@@ -258,7 +381,11 @@ TEST_CASE("Threads") {
 struct scope_exit {
     std::function<void()> f_;
     explicit scope_exit(std::function<void()> f) noexcept : f_(std::move(f)) {}
-    ~scope_exit() { if (f_) f_(); }
+    ~scope_exit() {
+        if (f_) {
+            f_();
+        }
+    }
 };
 
 TEST_CASE("Reload module from file") {
@@ -269,9 +396,8 @@ TEST_CASE("Reload module from file") {
     bool dont_write_bytecode = sys.attr("dont_write_bytecode").cast<bool>();
     sys.attr("dont_write_bytecode") = true;
     // Reset the value at scope exit
-    scope_exit reset_dont_write_bytecode([&]() {
-        sys.attr("dont_write_bytecode") = dont_write_bytecode;
-    });
+    scope_exit reset_dont_write_bytecode(
+        [&]() { sys.attr("dont_write_bytecode") = dont_write_bytecode; });
 
     std::string module_name = "test_module_reload";
     std::string module_file = module_name + ".py";
@@ -282,9 +408,7 @@ TEST_CASE("Reload module from file") {
     test_module << "    return 1\n";
     test_module.close();
     // Delete the file at scope exit
-    scope_exit delete_module_file([&]() {
-        std::remove(module_file.c_str());
-    });
+    scope_exit delete_module_file([&]() { std::remove(module_file.c_str()); });
 
     // Import the module from file
     auto module_ = py::module_::import(module_name.c_str());
@@ -324,3 +448,21 @@ TEST_CASE("sys.argv gets initialized properly") {
     }
     py::initialize_interpreter();
 }
+
+TEST_CASE("make_iterator can be called before then after finalizing an interpreter") {
+    // Reproduction of issue #2101 (https://github.com/pybind/pybind11/issues/2101)
+    py::finalize_interpreter();
+
+    std::vector<int> container;
+    {
+        pybind11::scoped_interpreter g;
+        auto iter = pybind11::make_iterator(container.begin(), container.end());
+    }
+
+    REQUIRE_NOTHROW([&]() {
+        pybind11::scoped_interpreter g;
+        auto iter = pybind11::make_iterator(container.begin(), container.end());
+    }());
+
+    py::initialize_interpreter();
+}
diff --git a/ext/pybind11/tests/test_embed/test_interpreter.py b/ext/pybind11/tests/test_embed/test_interpreter.py
index 5ab55a4b37..f279449722 100644
--- a/ext/pybind11/tests/test_embed/test_interpreter.py
+++ b/ext/pybind11/tests/test_embed/test_interpreter.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 import sys
 
 from widget_module import Widget
@@ -6,7 +5,7 @@ from widget_module import Widget
 
 class DerivedWidget(Widget):
     def __init__(self, message):
-        super(DerivedWidget, self).__init__(message)
+        super().__init__(message)
 
     def the_answer(self):
         return 42
diff --git a/ext/pybind11/tests/test_embed/test_trampoline.py b/ext/pybind11/tests/test_embed/test_trampoline.py
new file mode 100644
index 0000000000..8e14e8ef0b
--- /dev/null
+++ b/ext/pybind11/tests/test_embed/test_trampoline.py
@@ -0,0 +1,16 @@
+import trampoline_module
+
+
+def func():
+    class Test(trampoline_module.test_override_cache_helper):
+        def func(self):
+            return 42
+
+    return Test()
+
+
+def func2():
+    class Test(trampoline_module.test_override_cache_helper):
+        pass
+
+    return Test()
diff --git a/ext/pybind11/tests/test_enum.cpp b/ext/pybind11/tests/test_enum.cpp
index 40c48d412a..2597b275ef 100644
--- a/ext/pybind11/tests/test_enum.cpp
+++ b/ext/pybind11/tests/test_enum.cpp
@@ -11,11 +11,7 @@
 
 TEST_SUBMODULE(enums, m) {
     // test_unscoped_enum
-    enum UnscopedEnum {
-        EOne = 1,
-        ETwo,
-        EThree
-    };
+    enum UnscopedEnum { EOne = 1, ETwo, EThree };
     py::enum_<UnscopedEnum>(m, "UnscopedEnum", py::arithmetic(), "An unscoped enumeration")
         .value("EOne", EOne, "Docstring for EOne")
         .value("ETwo", ETwo, "Docstring for ETwo")
@@ -23,10 +19,7 @@ TEST_SUBMODULE(enums, m) {
         .export_values();
 
     // test_scoped_enum
-    enum class ScopedEnum {
-        Two = 2,
-        Three
-    };
+    enum class ScopedEnum { Two = 2, Three };
     py::enum_<ScopedEnum>(m, "ScopedEnum", py::arithmetic())
         .value("Two", ScopedEnum::Two)
         .value("Three", ScopedEnum::Three);
@@ -36,11 +29,7 @@ TEST_SUBMODULE(enums, m) {
     });
 
     // test_binary_operators
-    enum Flags {
-        Read = 4,
-        Write = 2,
-        Execute = 1
-    };
+    enum Flags { Read = 4, Write = 2, Execute = 1 };
     py::enum_<Flags>(m, "Flags", py::arithmetic())
         .value("Read", Flags::Read)
         .value("Write", Flags::Write)
@@ -50,14 +39,9 @@ TEST_SUBMODULE(enums, m) {
     // test_implicit_conversion
     class ClassWithUnscopedEnum {
     public:
-        enum EMode {
-            EFirstMode = 1,
-            ESecondMode
-        };
+        enum EMode { EFirstMode = 1, ESecondMode };
 
-        static EMode test_function(EMode mode) {
-            return mode;
-        }
+        static EMode test_function(EMode mode) { return mode; }
     };
     py::class_<ClassWithUnscopedEnum> exenum_class(m, "ClassWithUnscopedEnum");
     exenum_class.def_static("test_function", &ClassWithUnscopedEnum::test_function);
@@ -67,19 +51,17 @@ TEST_SUBMODULE(enums, m) {
         .export_values();
 
     // test_enum_to_int
-    m.def("test_enum_to_int", [](int) { });
-    m.def("test_enum_to_uint", [](uint32_t) { });
-    m.def("test_enum_to_long_long", [](long long) { });
+    m.def("test_enum_to_int", [](int) {});
+    m.def("test_enum_to_uint", [](uint32_t) {});
+    m.def("test_enum_to_long_long", [](long long) {});
 
     // test_duplicate_enum_name
-    enum SimpleEnum
-    {
-        ONE, TWO, THREE
-    };
+    enum SimpleEnum { ONE, TWO, THREE };
 
     m.def("register_bad_enum", [m]() {
         py::enum_<SimpleEnum>(m, "SimpleEnum")
-            .value("ONE", SimpleEnum::ONE)          //NOTE: all value function calls are called with the same first parameter value
+            .value("ONE", SimpleEnum::ONE) // NOTE: all value function calls are called with the
+                                           // same first parameter value
             .value("ONE", SimpleEnum::TWO)
             .value("ONE", SimpleEnum::THREE)
             .export_values();
@@ -90,33 +72,36 @@ TEST_SUBMODULE(enums, m) {
     enum class ScopedShortEnum : short {};
     enum class ScopedLongEnum : long {};
     enum UnscopedUInt64Enum : std::uint64_t {};
-    static_assert(py::detail::all_of<
-        std::is_same<py::enum_<UnscopedUCharEnum>::Scalar, unsigned char>,
-        std::is_same<py::enum_<ScopedShortEnum>::Scalar, short>,
-        std::is_same<py::enum_<ScopedLongEnum>::Scalar, long>,
-        std::is_same<py::enum_<UnscopedUInt64Enum>::Scalar, std::uint64_t>
-    >::value, "Error during the deduction of enum's scalar type with normal integer underlying");
+    static_assert(
+        py::detail::all_of<
+            std::is_same<py::enum_<UnscopedUCharEnum>::Scalar, unsigned char>,
+            std::is_same<py::enum_<ScopedShortEnum>::Scalar, short>,
+            std::is_same<py::enum_<ScopedLongEnum>::Scalar, long>,
+            std::is_same<py::enum_<UnscopedUInt64Enum>::Scalar, std::uint64_t>>::value,
+        "Error during the deduction of enum's scalar type with normal integer underlying");
 
     // test_enum_scalar_with_char_underlying
-    enum class ScopedCharEnum   : char     { Zero, Positive };
-    enum class ScopedWCharEnum  : wchar_t  { Zero, Positive };
+    enum class ScopedCharEnum : char { Zero, Positive };
+    enum class ScopedWCharEnum : wchar_t { Zero, Positive };
     enum class ScopedChar32Enum : char32_t { Zero, Positive };
     enum class ScopedChar16Enum : char16_t { Zero, Positive };
 
     // test the scalar of char type enums according to chapter 'Character types'
     // from https://en.cppreference.com/w/cpp/language/types
-    static_assert(py::detail::any_of<
-        std::is_same<py::enum_<ScopedCharEnum>::Scalar, signed char>, // e.g. gcc on x86
-        std::is_same<py::enum_<ScopedCharEnum>::Scalar, unsigned char>  // e.g. arm linux
-    >::value, "char should be cast to either signed char or unsigned char");
     static_assert(
-        sizeof(py::enum_<ScopedWCharEnum>::Scalar) == 2 ||
-        sizeof(py::enum_<ScopedWCharEnum>::Scalar) == 4
-    , "wchar_t should be either 16 bits (Windows) or 32 (everywhere else)");
-    static_assert(py::detail::all_of<
-        std::is_same<py::enum_<ScopedChar32Enum>::Scalar, std::uint_least32_t>,
-        std::is_same<py::enum_<ScopedChar16Enum>::Scalar, std::uint_least16_t>
-    >::value, "char32_t, char16_t (and char8_t)'s size, signedness, and alignment is determined");
+        py::detail::any_of<
+            std::is_same<py::enum_<ScopedCharEnum>::Scalar, signed char>,  // e.g. gcc on x86
+            std::is_same<py::enum_<ScopedCharEnum>::Scalar, unsigned char> // e.g. arm linux
+            >::value,
+        "char should be cast to either signed char or unsigned char");
+    static_assert(sizeof(py::enum_<ScopedWCharEnum>::Scalar) == 2
+                      || sizeof(py::enum_<ScopedWCharEnum>::Scalar) == 4,
+                  "wchar_t should be either 16 bits (Windows) or 32 (everywhere else)");
+    static_assert(
+        py::detail::all_of<
+            std::is_same<py::enum_<ScopedChar32Enum>::Scalar, std::uint_least32_t>,
+            std::is_same<py::enum_<ScopedChar16Enum>::Scalar, std::uint_least16_t>>::value,
+        "char32_t, char16_t (and char8_t)'s size, signedness, and alignment is determined");
 #if defined(PYBIND11_HAS_U8STRING)
     enum class ScopedChar8Enum : char8_t { Zero, Positive };
     static_assert(std::is_same<py::enum_<ScopedChar8Enum>::Scalar, unsigned char>::value);
diff --git a/ext/pybind11/tests/test_enum.py b/ext/pybind11/tests/test_enum.py
index 14c754e726..f14a72398f 100644
--- a/ext/pybind11/tests/test_enum.py
+++ b/ext/pybind11/tests/test_enum.py
@@ -1,7 +1,5 @@
-# -*- coding: utf-8 -*-
 import pytest
 
-import env
 from pybind11_tests import enums as m
 
 
@@ -241,10 +239,7 @@ def test_char_underlying_enum():  # Issue #1331/PR #1334:
     assert type(m.ScopedCharEnum.Positive.__int__()) is int
     assert int(m.ScopedChar16Enum.Zero) == 0
     assert hash(m.ScopedChar32Enum.Positive) == 1
-    if env.PY2:
-        assert m.ScopedCharEnum.Positive.__getstate__() == 1  # long
-    else:
-        assert type(m.ScopedCharEnum.Positive.__getstate__()) is int
+    assert type(m.ScopedCharEnum.Positive.__getstate__()) is int
     assert m.ScopedWCharEnum(1) == m.ScopedWCharEnum.Positive
     with pytest.raises(TypeError):
         # Even if the underlying type is char, only an int can be used to construct the enum:
@@ -255,10 +250,7 @@ def test_bool_underlying_enum():
     assert type(m.ScopedBoolEnum.TRUE.__int__()) is int
     assert int(m.ScopedBoolEnum.FALSE) == 0
     assert hash(m.ScopedBoolEnum.TRUE) == 1
-    if env.PY2:
-        assert m.ScopedBoolEnum.TRUE.__getstate__() == 1  # long
-    else:
-        assert type(m.ScopedBoolEnum.TRUE.__getstate__()) is int
+    assert type(m.ScopedBoolEnum.TRUE.__getstate__()) is int
     assert m.ScopedBoolEnum(1) == m.ScopedBoolEnum.TRUE
     # Enum could construct with a bool
     # (bool is a strict subclass of int, and False will be converted to 0)
diff --git a/ext/pybind11/tests/test_eval.cpp b/ext/pybind11/tests/test_eval.cpp
index 29366f6798..cd2903f0ab 100644
--- a/ext/pybind11/tests/test_eval.cpp
+++ b/ext/pybind11/tests/test_eval.cpp
@@ -7,10 +7,10 @@
     BSD-style license that can be found in the LICENSE file.
 */
 
-
 #include <pybind11/eval.h>
 
 #include "pybind11_tests.h"
+
 #include <utility>
 
 TEST_SUBMODULE(eval_, m) {
@@ -20,16 +20,13 @@ TEST_SUBMODULE(eval_, m) {
 
     m.def("test_eval_statements", [global]() {
         auto local = py::dict();
-        local["call_test"] = py::cpp_function([&]() -> int {
-            return 42;
-        });
+        local["call_test"] = py::cpp_function([&]() -> int { return 42; });
 
         // Regular string literal
-        py::exec(
-            "message = 'Hello World!'\n"
-            "x = call_test()",
-            global, local
-        );
+        py::exec("message = 'Hello World!'\n"
+                 "x = call_test()",
+                 global,
+                 local);
 
         // Multi-line raw string literal
         py::exec(R"(
@@ -37,8 +34,9 @@ TEST_SUBMODULE(eval_, m) {
                 print(message)
             else:
                 raise RuntimeError
-            )", global, local
-        );
+            )",
+                 global,
+                 local);
         auto x = local["x"].cast<int>();
 
         return x == 42;
@@ -53,9 +51,7 @@ TEST_SUBMODULE(eval_, m) {
 
     m.def("test_eval_single_statement", []() {
         auto local = py::dict();
-        local["call_test"] = py::cpp_function([&]() -> int {
-            return 42;
-        });
+        local["call_test"] = py::cpp_function([&]() -> int { return 42; });
 
         auto result = py::eval<py::eval_single_statement>("x = call_test()", py::dict(), local);
         auto x = local["x"].cast<int>();
@@ -93,8 +89,9 @@ TEST_SUBMODULE(eval_, m) {
 
     // test_eval_empty_globals
     m.def("eval_empty_globals", [](py::object global) {
-        if (global.is_none())
+        if (global.is_none()) {
             global = py::dict();
+        }
         auto int_class = py::eval("isinstance(42, int)", global);
         return global;
     });
@@ -113,7 +110,9 @@ TEST_SUBMODULE(eval_, m) {
 
             def func_local():
                 return local_value
-            )", global, local);
+            )",
+                 global,
+                 local);
         return std::make_pair(global, local);
     });
 }
diff --git a/ext/pybind11/tests/test_eval.py b/ext/pybind11/tests/test_eval.py
index 1bbd991bc0..51b6b796b4 100644
--- a/ext/pybind11/tests/test_eval.py
+++ b/ext/pybind11/tests/test_eval.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 import os
 
 import pytest
@@ -18,7 +17,7 @@ def test_evals(capture):
     assert m.test_eval_failure()
 
 
-@pytest.mark.xfail("env.PYPY and not env.PY2", raises=RuntimeError)
+@pytest.mark.xfail("env.PYPY", raises=RuntimeError)
 def test_eval_file():
     filename = os.path.join(os.path.dirname(__file__), "test_eval_call.py")
     assert m.test_eval_file(filename)
diff --git a/ext/pybind11/tests/test_eval_call.py b/ext/pybind11/tests/test_eval_call.py
index 373b67bac8..fd1da2a5cc 100644
--- a/ext/pybind11/tests/test_eval_call.py
+++ b/ext/pybind11/tests/test_eval_call.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 # This file is called from 'test_eval.py'
 
 if "call_test2" in locals():
diff --git a/ext/pybind11/tests/test_exceptions.cpp b/ext/pybind11/tests/test_exceptions.cpp
index 25adb32ed1..f57e095068 100644
--- a/ext/pybind11/tests/test_exceptions.cpp
+++ b/ext/pybind11/tests/test_exceptions.cpp
@@ -9,15 +9,18 @@
 #include "test_exceptions.h"
 
 #include "local_bindings.h"
-
 #include "pybind11_tests.h"
+
+#include <exception>
+#include <stdexcept>
 #include <utility>
 
 // A type that should be raised as an exception in Python
 class MyException : public std::exception {
 public:
-    explicit MyException(const char * m) : message{m} {}
-    const char * what() const noexcept override {return message.c_str();}
+    explicit MyException(const char *m) : message{m} {}
+    const char *what() const noexcept override { return message.c_str(); }
+
 private:
     std::string message = "";
 };
@@ -25,8 +28,9 @@ private:
 // A type that should be translated to a standard Python exception
 class MyException2 : public std::exception {
 public:
-    explicit MyException2(const char * m) : message{m} {}
-    const char * what() const noexcept override {return message.c_str();}
+    explicit MyException2(const char *m) : message{m} {}
+    const char *what() const noexcept override { return message.c_str(); }
+
 private:
     std::string message = "";
 };
@@ -34,13 +38,13 @@ private:
 // A type that is not derived from std::exception (and is thus unknown)
 class MyException3 {
 public:
-    explicit MyException3(const char * m) : message{m} {}
-    virtual const char * what() const noexcept {return message.c_str();}
+    explicit MyException3(const char *m) : message{m} {}
+    virtual const char *what() const noexcept { return message.c_str(); }
     // Rule of 5 BEGIN: to preempt compiler warnings.
-    MyException3(const MyException3&) = default;
-    MyException3(MyException3&&) = default;
-    MyException3& operator=(const MyException3&) = default;
-    MyException3& operator=(MyException3&&) = default;
+    MyException3(const MyException3 &) = default;
+    MyException3(MyException3 &&) = default;
+    MyException3 &operator=(const MyException3 &) = default;
+    MyException3 &operator=(MyException3 &&) = default;
     virtual ~MyException3() = default;
     // Rule of 5 END.
 private:
@@ -51,13 +55,13 @@ private:
 // and delegated to its exception translator
 class MyException4 : public std::exception {
 public:
-    explicit MyException4(const char * m) : message{m} {}
-    const char * what() const noexcept override {return message.c_str();}
+    explicit MyException4(const char *m) : message{m} {}
+    const char *what() const noexcept override { return message.c_str(); }
+
 private:
     std::string message = "";
 };
 
-
 // Like the above, but declared via the helper function
 class MyException5 : public std::logic_error {
 public:
@@ -69,17 +73,16 @@ class MyException5_1 : public MyException5 {
     using MyException5::MyException5;
 };
 
-
 // Exception that will be caught via the module local translator.
 class MyException6 : public std::exception {
 public:
-    explicit MyException6(const char * m) : message{m} {}
-    const char * what() const noexcept override {return message.c_str();}
+    explicit MyException6(const char *m) : message{m} {}
+    const char *what() const noexcept override { return message.c_str(); }
+
 private:
     std::string message = "";
 };
 
-
 struct PythonCallInDestructor {
     explicit PythonCallInDestructor(const py::dict &d) : d(d) {}
     ~PythonCallInDestructor() { d["good"] = true; }
@@ -87,8 +90,6 @@ struct PythonCallInDestructor {
     py::dict d;
 };
 
-
-
 struct PythonAlreadySetInDestructor {
     explicit PythonAlreadySetInDestructor(const py::str &s) : s(s) {}
     ~PythonAlreadySetInDestructor() {
@@ -96,8 +97,7 @@ struct PythonAlreadySetInDestructor {
         try {
             // Assign to a py::object to force read access of nonexistent dict entry
             py::object o = foo["bar"];
-        }
-        catch (py::error_already_set& ex) {
+        } catch (py::error_already_set &ex) {
             ex.discard_as_unraisable(s);
         }
     }
@@ -105,17 +105,17 @@ struct PythonAlreadySetInDestructor {
     py::str s;
 };
 
-
 TEST_SUBMODULE(exceptions, m) {
-    m.def("throw_std_exception", []() {
-        throw std::runtime_error("This exception was intentionally thrown.");
-    });
+    m.def("throw_std_exception",
+          []() { throw std::runtime_error("This exception was intentionally thrown."); });
 
     // make a new custom exception and use it as a translation target
     static py::exception<MyException> ex(m, "MyException");
     py::register_exception_translator([](std::exception_ptr p) {
         try {
-            if (p) std::rethrow_exception(p);
+            if (p) {
+                std::rethrow_exception(p);
+            }
         } catch (const MyException &e) {
             // Set MyException as the active python error
             ex(e.what());
@@ -127,7 +127,9 @@ TEST_SUBMODULE(exceptions, m) {
     // never by visible from Python
     py::register_exception_translator([](std::exception_ptr p) {
         try {
-            if (p) std::rethrow_exception(p);
+            if (p) {
+                std::rethrow_exception(p);
+            }
         } catch (const MyException2 &e) {
             // Translate this exception to a standard RuntimeError
             PyErr_SetString(PyExc_RuntimeError, e.what());
@@ -139,7 +141,9 @@ TEST_SUBMODULE(exceptions, m) {
     // translator for MyException by throwing a new exception
     py::register_exception_translator([](std::exception_ptr p) {
         try {
-            if (p) std::rethrow_exception(p);
+            if (p) {
+                std::rethrow_exception(p);
+            }
         } catch (const MyException4 &e) {
             throw MyException(e.what());
         }
@@ -150,26 +154,30 @@ TEST_SUBMODULE(exceptions, m) {
     // A slightly more complicated one that declares MyException5_1 as a subclass of MyException5
     py::register_exception<MyException5_1>(m, "MyException5_1", ex5.ptr());
 
-    //py::register_local_exception<LocalSimpleException>(m, "LocalSimpleException")
+    // py::register_local_exception<LocalSimpleException>(m, "LocalSimpleException")
 
     py::register_local_exception_translator([](std::exception_ptr p) {
-      try {
-          if (p) {
-            std::rethrow_exception(p);
-          }
-      } catch (const MyException6 &e) {
-        PyErr_SetString(PyExc_RuntimeError, e.what());
-      }
+        try {
+            if (p) {
+                std::rethrow_exception(p);
+            }
+        } catch (const MyException6 &e) {
+            PyErr_SetString(PyExc_RuntimeError, e.what());
+        }
     });
 
     m.def("throws1", []() { throw MyException("this error should go to a custom type"); });
-    m.def("throws2", []() { throw MyException2("this error should go to a standard Python exception"); });
+    m.def("throws2",
+          []() { throw MyException2("this error should go to a standard Python exception"); });
     m.def("throws3", []() { throw MyException3("this error cannot be translated"); });
     m.def("throws4", []() { throw MyException4("this error is rethrown"); });
-    m.def("throws5", []() { throw MyException5("this is a helper-defined translated exception"); });
+    m.def("throws5",
+          []() { throw MyException5("this is a helper-defined translated exception"); });
     m.def("throws5_1", []() { throw MyException5_1("MyException5 subclass"); });
     m.def("throws6", []() { throw MyException6("MyException6 only handled in this module"); });
-    m.def("throws_logic_error", []() { throw std::logic_error("this error should fall through to the standard handler"); });
+    m.def("throws_logic_error", []() {
+        throw std::logic_error("this error should fall through to the standard handler");
+    });
     m.def("throws_overflow_error", []() { throw std::overflow_error(""); });
     m.def("throws_local_error", []() { throw LocalException("never caught"); });
     m.def("throws_local_simple_error", []() { throw LocalSimpleException("this mod"); });
@@ -178,9 +186,10 @@ TEST_SUBMODULE(exceptions, m) {
         try {
             // Assign to a py::object to force read access of nonexistent dict entry
             py::object o = foo["bar"];
-        }
-        catch (py::error_already_set& ex) {
-            if (!ex.matches(PyExc_KeyError)) throw;
+        } catch (py::error_already_set &ex) {
+            if (!ex.matches(PyExc_KeyError)) {
+                throw;
+            }
             return true;
         }
         return false;
@@ -190,9 +199,10 @@ TEST_SUBMODULE(exceptions, m) {
         try {
             // Assign to a py::object to force read access of nonexistent dict entry
             py::object o = foo["bar"];
-        }
-        catch (py::error_already_set &ex) {
-            if (!ex.matches(PyExc_Exception)) throw;
+        } catch (py::error_already_set &ex) {
+            if (!ex.matches(PyExc_Exception)) {
+                throw;
+            }
             return true;
         }
         return false;
@@ -201,30 +211,35 @@ TEST_SUBMODULE(exceptions, m) {
         try {
             // On Python >= 3.6, this raises a ModuleNotFoundError, a subclass of ImportError
             py::module_::import("nonexistent");
-        }
-        catch (py::error_already_set &ex) {
-            if (!ex.matches(PyExc_ImportError)) throw;
+        } catch (py::error_already_set &ex) {
+            if (!ex.matches(PyExc_ImportError)) {
+                throw;
+            }
             return true;
         }
         return false;
     });
 
     m.def("throw_already_set", [](bool err) {
-        if (err)
+        if (err) {
             PyErr_SetString(PyExc_ValueError, "foo");
+        }
         try {
             throw py::error_already_set();
-        } catch (const std::runtime_error& e) {
-            if ((err && e.what() != std::string("ValueError: foo")) ||
-                (!err && e.what() != std::string("Unknown internal error occurred")))
-            {
+        } catch (const std::runtime_error &e) {
+            if ((err && e.what() != std::string("ValueError: foo"))
+                || (!err
+                    && e.what()
+                           != std::string("Internal error: pybind11::error_already_set called "
+                                          "while Python error indicator not set."))) {
                 PyErr_Clear();
                 throw std::runtime_error("error message mismatch");
             }
         }
         PyErr_Clear();
-        if (err)
+        if (err) {
             PyErr_SetString(PyExc_ValueError, "foo");
+        }
         throw py::error_already_set();
     });
 
@@ -234,7 +249,7 @@ TEST_SUBMODULE(exceptions, m) {
             PythonCallInDestructor set_dict_in_destructor(d);
             PyErr_SetString(PyExc_ValueError, "foo");
             throw py::error_already_set();
-        } catch (const py::error_already_set&) {
+        } catch (const py::error_already_set &) {
             retval = true;
         }
         return retval;
@@ -251,20 +266,21 @@ TEST_SUBMODULE(exceptions, m) {
               try {
                   f(*args);
               } catch (py::error_already_set &ex) {
-                  if (ex.matches(exc_type))
+                  if (ex.matches(exc_type)) {
                       py::print(ex.what());
-                  else
-                      throw;
+                  } else {
+                      // Simply `throw;` also works and is better, but using `throw ex;`
+                      // here to cover that situation (as observed in the wild).
+                      throw ex; // Invokes the copy ctor.
+                  }
               }
           });
 
     // Test repr that cannot be displayed
-    m.def("simple_bool_passthrough", [](bool x) {return x;});
+    m.def("simple_bool_passthrough", [](bool x) { return x; });
 
     m.def("throw_should_be_translated_to_key_error", []() { throw shared_exception(); });
 
-#if PY_VERSION_HEX >= 0x03030000
-
     m.def("raise_from", []() {
         PyErr_SetString(PyExc_ValueError, "inner");
         py::raise_from(PyExc_ValueError, "outer");
@@ -275,11 +291,52 @@ TEST_SUBMODULE(exceptions, m) {
         try {
             PyErr_SetString(PyExc_ValueError, "inner");
             throw py::error_already_set();
-        } catch (py::error_already_set& e) {
+        } catch (py::error_already_set &e) {
             py::raise_from(e, PyExc_ValueError, "outer");
             throw py::error_already_set();
         }
     });
 
-#endif
+    m.def("throw_nested_exception", []() {
+        try {
+            throw std::runtime_error("Inner Exception");
+        } catch (const std::runtime_error &) {
+            std::throw_with_nested(std::runtime_error("Outer Exception"));
+        }
+    });
+
+    m.def("error_already_set_what", [](const py::object &exc_type, const py::object &exc_value) {
+        PyErr_SetObject(exc_type.ptr(), exc_value.ptr());
+        std::string what = py::error_already_set().what();
+        bool py_err_set_after_what = (PyErr_Occurred() != nullptr);
+        PyErr_Clear();
+        return py::make_tuple(std::move(what), py_err_set_after_what);
+    });
+
+    m.def("test_cross_module_interleaved_error_already_set", []() {
+        auto cm = py::module_::import("cross_module_interleaved_error_already_set");
+        auto interleaved_error_already_set
+            = reinterpret_cast<void (*)()>(PyLong_AsVoidPtr(cm.attr("funcaddr").ptr()));
+        interleaved_error_already_set();
+    });
+
+    m.def("test_error_already_set_double_restore", [](bool dry_run) {
+        PyErr_SetString(PyExc_ValueError, "Random error.");
+        py::error_already_set e;
+        e.restore();
+        PyErr_Clear();
+        if (!dry_run) {
+            e.restore();
+        }
+    });
+
+    // https://github.com/pybind/pybind11/issues/4075
+    m.def("test_pypy_oserror_normalization", []() {
+        try {
+            py::module_::import("io").attr("open")("this_filename_must_not_exist", "r");
+        } catch (const py::error_already_set &e) {
+            return py::str(e.what()); // str must be built before e goes out of scope.
+        }
+        return py::str("UNEXPECTED");
+    });
 }
diff --git a/ext/pybind11/tests/test_exceptions.h b/ext/pybind11/tests/test_exceptions.h
index 9d428312eb..03684b89fa 100644
--- a/ext/pybind11/tests/test_exceptions.h
+++ b/ext/pybind11/tests/test_exceptions.h
@@ -1,5 +1,6 @@
 #pragma once
 #include "pybind11_tests.h"
+
 #include <stdexcept>
 
 // shared exceptions for cross_module_tests
diff --git a/ext/pybind11/tests/test_exceptions.py b/ext/pybind11/tests/test_exceptions.py
index 3821eadaa4..0d2c808143 100644
--- a/ext/pybind11/tests/test_exceptions.py
+++ b/ext/pybind11/tests/test_exceptions.py
@@ -1,10 +1,10 @@
-# -*- coding: utf-8 -*-
 import sys
 
 import pytest
 
-import env  # noqa: F401
+import env
 import pybind11_cross_module_tests as cm
+import pybind11_tests  # noqa: F401
 from pybind11_tests import exceptions as m
 
 
@@ -17,14 +17,16 @@ def test_std_exception(msg):
 def test_error_already_set(msg):
     with pytest.raises(RuntimeError) as excinfo:
         m.throw_already_set(False)
-    assert msg(excinfo.value) == "Unknown internal error occurred"
+    assert (
+        msg(excinfo.value)
+        == "Internal error: pybind11::error_already_set called while Python error indicator not set."
+    )
 
     with pytest.raises(ValueError) as excinfo:
         m.throw_already_set(True)
     assert msg(excinfo.value) == "foo"
 
 
-@pytest.mark.skipif("env.PY2")
 def test_raise_from(msg):
     with pytest.raises(ValueError) as excinfo:
         m.raise_from()
@@ -32,7 +34,6 @@ def test_raise_from(msg):
     assert msg(excinfo.value.__cause__) == "inner"
 
 
-@pytest.mark.skipif("env.PY2")
 def test_raise_from_already_set(msg):
     with pytest.raises(ValueError) as excinfo:
         m.raise_from_already_set()
@@ -72,9 +73,9 @@ def test_cross_module_exceptions(msg):
 
 # TODO: FIXME
 @pytest.mark.xfail(
-    "env.PYPY and env.MACOS",
+    "env.MACOS and (env.PYPY or pybind11_tests.compiler_info.startswith('Homebrew Clang'))",
     raises=RuntimeError,
-    reason="Expected failure with PyPy and libc++ (Issue #2847 & PR #2999)",
+    reason="See Issue #2847, PR #2999, PR #4324",
 )
 def test_cross_module_exception_translator():
     with pytest.raises(KeyError):
@@ -91,16 +92,18 @@ def test_python_call_in_catch():
 def ignore_pytest_unraisable_warning(f):
     unraisable = "PytestUnraisableExceptionWarning"
     if hasattr(pytest, unraisable):  # Python >= 3.8 and pytest >= 6
-        dec = pytest.mark.filterwarnings("ignore::pytest.{}".format(unraisable))
+        dec = pytest.mark.filterwarnings(f"ignore::pytest.{unraisable}")
         return dec(f)
     else:
         return f
 
 
+# TODO: find out why this fails on PyPy, https://foss.heptapod.net/pypy/pypy/-/issues/3583
+@pytest.mark.xfail(env.PYPY, reason="Failure on PyPy 3.8 (7.3.7)", strict=False)
 @ignore_pytest_unraisable_warning
 def test_python_alreadyset_in_destructor(monkeypatch, capsys):
     hooked = False
-    triggered = [False]  # mutable, so Python 2.7 closure can modify it
+    triggered = False
 
     if hasattr(sys, "unraisablehook"):  # Python 3.8+
         hooked = True
@@ -110,7 +113,8 @@ def test_python_alreadyset_in_destructor(monkeypatch, capsys):
         def hook(unraisable_hook_args):
             exc_type, exc_value, exc_tb, err_msg, obj = unraisable_hook_args
             if obj == "already_set demo":
-                triggered[0] = True
+                nonlocal triggered
+                triggered = True
             default_hook(unraisable_hook_args)
             return
 
@@ -119,11 +123,11 @@ def test_python_alreadyset_in_destructor(monkeypatch, capsys):
 
     assert m.python_alreadyset_in_destructor("already_set demo") is True
     if hooked:
-        assert triggered[0] is True
+        assert triggered is True
 
     _, captured_stderr = capsys.readouterr()
-    # Error message is different in Python 2 and 3, check for words that appear in both
-    assert "ignored" in captured_stderr and "already_set demo" in captured_stderr
+    assert captured_stderr.startswith("Exception ignored in: 'already_set demo'")
+    assert captured_stderr.rstrip().endswith("KeyError: 'bar'")
 
 
 def test_exception_matches():
@@ -182,8 +186,8 @@ def test_custom(msg):
     with pytest.raises(m.MyException5) as excinfo:
         try:
             m.throws5()
-        except m.MyException5_1:
-            raise RuntimeError("Exception error: caught child from parent")
+        except m.MyException5_1 as err:
+            raise RuntimeError("Exception error: caught child from parent") from err
     assert msg(excinfo.value) == "this is a helper-defined translated exception"
 
 
@@ -237,9 +241,16 @@ def test_nested_throws(capture):
     assert str(excinfo.value) == "this is a helper-defined translated exception"
 
 
+def test_throw_nested_exception():
+    with pytest.raises(RuntimeError) as excinfo:
+        m.throw_nested_exception()
+    assert str(excinfo.value) == "Outer Exception"
+    assert str(excinfo.value.__cause__) == "Inner Exception"
+
+
 # This can often happen if you wrap a pybind11 class in a Python wrapper
 def test_invalid_repr():
-    class MyRepr(object):
+    class MyRepr:
         def __repr__(self):
             raise AttributeError("Example error")
 
@@ -263,3 +274,110 @@ def test_local_translator(msg):
         m.throws_local_simple_error()
     assert not isinstance(excinfo.value, cm.LocalSimpleException)
     assert msg(excinfo.value) == "this mod"
+
+
+def test_error_already_set_message_with_unicode_surrogate():  # Issue #4288
+    assert m.error_already_set_what(RuntimeError, "\ud927") == (
+        "RuntimeError: \\ud927",
+        False,
+    )
+
+
+def test_error_already_set_message_with_malformed_utf8():
+    assert m.error_already_set_what(RuntimeError, b"\x80") == (
+        "RuntimeError: b'\\x80'",
+        False,
+    )
+
+
+class FlakyException(Exception):
+    def __init__(self, failure_point):
+        if failure_point == "failure_point_init":
+            raise ValueError("triggered_failure_point_init")
+        self.failure_point = failure_point
+
+    def __str__(self):
+        if self.failure_point == "failure_point_str":
+            raise ValueError("triggered_failure_point_str")
+        return "FlakyException.__str__"
+
+
+@pytest.mark.parametrize(
+    "exc_type, exc_value, expected_what",
+    (
+        (ValueError, "plain_str", "ValueError: plain_str"),
+        (ValueError, ("tuple_elem",), "ValueError: tuple_elem"),
+        (FlakyException, ("happy",), "FlakyException: FlakyException.__str__"),
+    ),
+)
+def test_error_already_set_what_with_happy_exceptions(
+    exc_type, exc_value, expected_what
+):
+    what, py_err_set_after_what = m.error_already_set_what(exc_type, exc_value)
+    assert not py_err_set_after_what
+    assert what == expected_what
+
+
+@pytest.mark.skipif("env.PYPY", reason="PyErr_NormalizeException Segmentation fault")
+def test_flaky_exception_failure_point_init():
+    with pytest.raises(RuntimeError) as excinfo:
+        m.error_already_set_what(FlakyException, ("failure_point_init",))
+    lines = str(excinfo.value).splitlines()
+    # PyErr_NormalizeException replaces the original FlakyException with ValueError:
+    assert lines[:3] == [
+        "pybind11::error_already_set: MISMATCH of original and normalized active exception types:"
+        " ORIGINAL FlakyException REPLACED BY ValueError: triggered_failure_point_init",
+        "",
+        "At:",
+    ]
+    # Checking the first two lines of the traceback as formatted in error_string():
+    assert "test_exceptions.py(" in lines[3]
+    assert lines[3].endswith("): __init__")
+    assert lines[4].endswith("): test_flaky_exception_failure_point_init")
+
+
+def test_flaky_exception_failure_point_str():
+    what, py_err_set_after_what = m.error_already_set_what(
+        FlakyException, ("failure_point_str",)
+    )
+    assert not py_err_set_after_what
+    lines = what.splitlines()
+    if env.PYPY and len(lines) == 3:
+        n = 3  # Traceback is missing.
+    else:
+        n = 5
+    assert (
+        lines[:n]
+        == [
+            "FlakyException: <MESSAGE UNAVAILABLE DUE TO ANOTHER EXCEPTION>",
+            "",
+            "MESSAGE UNAVAILABLE DUE TO EXCEPTION: ValueError: triggered_failure_point_str",
+            "",
+            "At:",
+        ][:n]
+    )
+
+
+def test_cross_module_interleaved_error_already_set():
+    with pytest.raises(RuntimeError) as excinfo:
+        m.test_cross_module_interleaved_error_already_set()
+    assert str(excinfo.value) in (
+        "2nd error.",  # Almost all platforms.
+        "RuntimeError: 2nd error.",  # Some PyPy builds (seen under macOS).
+    )
+
+
+def test_error_already_set_double_restore():
+    m.test_error_already_set_double_restore(True)  # dry_run
+    with pytest.raises(RuntimeError) as excinfo:
+        m.test_error_already_set_double_restore(False)
+    assert str(excinfo.value) == (
+        "Internal error: pybind11::detail::error_fetch_and_normalize::restore()"
+        " called a second time. ORIGINAL ERROR: ValueError: Random error."
+    )
+
+
+def test_pypy_oserror_normalization():
+    # https://github.com/pybind/pybind11/issues/4075
+    what = m.test_pypy_oserror_normalization()
+    assert "this_filename_must_not_exist" in what
diff --git a/ext/pybind11/tests/test_factory_constructors.cpp b/ext/pybind11/tests/test_factory_constructors.cpp
index 660e2896af..a387cd2e76 100644
--- a/ext/pybind11/tests/test_factory_constructors.cpp
+++ b/ext/pybind11/tests/test_factory_constructors.cpp
@@ -10,6 +10,7 @@
 
 #include "constructor_stats.h"
 #include "pybind11_tests.h"
+
 #include <cmath>
 #include <new>
 #include <utility>
@@ -38,8 +39,7 @@ class TestFactory2 {
     explicit TestFactory2(std::string v) : value(std::move(v)) { print_created(this, value); }
 
 public:
-    TestFactory2(TestFactory2 &&m) noexcept {
-        value = std::move(m.value);
+    TestFactory2(TestFactory2 &&m) noexcept : value{std::move(m.value)} {
         print_move_created(this);
     }
     TestFactory2 &operator=(TestFactory2 &&m) noexcept {
@@ -59,8 +59,7 @@ protected:
 
 public:
     explicit TestFactory3(std::string v) : value(std::move(v)) { print_created(this, value); }
-    TestFactory3(TestFactory3 &&m) noexcept {
-        value = std::move(m.value);
+    TestFactory3(TestFactory3 &&m) noexcept : value{std::move(m.value)} {
         print_move_created(this);
     }
     TestFactory3 &operator=(TestFactory3 &&m) noexcept {
@@ -89,14 +88,23 @@ class TestFactory6 {
 protected:
     int value;
     bool alias = false;
+
 public:
     explicit TestFactory6(int i) : value{i} { print_created(this, i); }
     TestFactory6(TestFactory6 &&f) noexcept {
         print_move_created(this);
+        // NOLINTNEXTLINE(cppcoreguidelines-prefer-member-initializer)
         value = f.value;
+        // NOLINTNEXTLINE(cppcoreguidelines-prefer-member-initializer)
+        alias = f.alias;
+    }
+    TestFactory6(const TestFactory6 &f) {
+        print_copy_created(this);
+        // NOLINTNEXTLINE(cppcoreguidelines-prefer-member-initializer)
+        value = f.value;
+        // NOLINTNEXTLINE(cppcoreguidelines-prefer-member-initializer)
         alias = f.alias;
     }
-    TestFactory6(const TestFactory6 &f) { print_copy_created(this); value = f.value; alias = f.alias; }
     virtual ~TestFactory6() { print_destroyed(this); }
     virtual int get() { return value; }
     bool has_alias() const { return alias; }
@@ -127,14 +135,23 @@ class TestFactory7 {
 protected:
     int value;
     bool alias = false;
+
 public:
     explicit TestFactory7(int i) : value{i} { print_created(this, i); }
     TestFactory7(TestFactory7 &&f) noexcept {
         print_move_created(this);
+        // NOLINTNEXTLINE(cppcoreguidelines-prefer-member-initializer)
         value = f.value;
+        // NOLINTNEXTLINE(cppcoreguidelines-prefer-member-initializer)
+        alias = f.alias;
+    }
+    TestFactory7(const TestFactory7 &f) {
+        print_copy_created(this);
+        // NOLINTNEXTLINE(cppcoreguidelines-prefer-member-initializer)
+        value = f.value;
+        // NOLINTNEXTLINE(cppcoreguidelines-prefer-member-initializer)
         alias = f.alias;
     }
-    TestFactory7(const TestFactory7 &f) { print_copy_created(this); value = f.value; alias = f.alias; }
     virtual ~TestFactory7() { print_destroyed(this); }
     virtual int get() { return value; }
     bool has_alias() const { return alias; }
@@ -142,6 +159,7 @@ public:
 class PyTF7 : public TestFactory7 {
 public:
     explicit PyTF7(int i) : TestFactory7(i) {
+        // NOLINTNEXTLINE(cppcoreguidelines-prefer-member-initializer)
         alias = true;
         print_created(this, i);
     }
@@ -151,14 +169,15 @@ public:
     int get() override { PYBIND11_OVERRIDE(int, TestFactory7, get, /*no args*/); }
 };
 
-
 class TestFactoryHelper {
 public:
     // Non-movable, non-copyable type:
     // Return via pointer:
     static TestFactory1 *construct1() { return new TestFactory1(); }
     // Holder:
-    static std::unique_ptr<TestFactory1> construct1(int a) { return std::unique_ptr<TestFactory1>(new TestFactory1(a)); }
+    static std::unique_ptr<TestFactory1> construct1(int a) {
+        return std::unique_ptr<TestFactory1>(new TestFactory1(a));
+    }
     // pointer again
     static TestFactory1 *construct1_string(std::string a) {
         return new TestFactory1(std::move(a));
@@ -168,7 +187,9 @@ public:
     // pointer:
     static TestFactory2 *construct2() { return new TestFactory2(); }
     // holder:
-    static std::unique_ptr<TestFactory2> construct2(int a) { return std::unique_ptr<TestFactory2>(new TestFactory2(a)); }
+    static std::unique_ptr<TestFactory2> construct2(int a) {
+        return std::unique_ptr<TestFactory2>(new TestFactory2(a));
+    }
     // by value moving:
     static TestFactory2 construct2(std::string a) { return TestFactory2(std::move(a)); }
 
@@ -176,16 +197,18 @@ public:
     // pointer:
     static TestFactory3 *construct3() { return new TestFactory3(); }
     // holder:
-    static std::shared_ptr<TestFactory3> construct3(int a) { return std::shared_ptr<TestFactory3>(new TestFactory3(a)); }
+    static std::shared_ptr<TestFactory3> construct3(int a) {
+        return std::shared_ptr<TestFactory3>(new TestFactory3(a));
+    }
 };
 
 TEST_SUBMODULE(factory_constructors, m) {
 
     // Define various trivial types to allow simpler overload resolution:
     py::module_ m_tag = m.def_submodule("tag");
-#define MAKE_TAG_TYPE(Name) \
-    struct Name##_tag {}; \
-    py::class_<Name##_tag>(m_tag, #Name "_tag").def(py::init<>()); \
+#define MAKE_TAG_TYPE(Name)                                                                       \
+    struct Name##_tag {};                                                                         \
+    py::class_<Name##_tag>(m_tag, #Name "_tag").def(py::init<>());                                \
     m_tag.attr(#Name) = py::cast(Name##_tag{})
     MAKE_TAG_TYPE(pointer);
     MAKE_TAG_TYPE(unique_ptr);
@@ -208,9 +231,9 @@ TEST_SUBMODULE(factory_constructors, m) {
         .def(py::init([](unique_ptr_tag, int v) { return TestFactoryHelper::construct1(v); }))
         .def(py::init(&TestFactoryHelper::construct1_string)) // raw function pointer
         .def(py::init([](pointer_tag) { return TestFactoryHelper::construct1(); }))
-        .def(py::init([](py::handle, int v, py::handle) { return TestFactoryHelper::construct1(v); }))
-        .def_readwrite("value", &TestFactory1::value)
-        ;
+        .def(py::init(
+            [](py::handle, int v, py::handle) { return TestFactoryHelper::construct1(v); }))
+        .def_readwrite("value", &TestFactory1::value);
     py::class_<TestFactory2>(m, "TestFactory2")
         .def(py::init([](pointer_tag, int v) { return TestFactoryHelper::construct2(v); }))
         .def(py::init([](unique_ptr_tag, std::string v) {
@@ -221,7 +244,10 @@ TEST_SUBMODULE(factory_constructors, m) {
 
     // Stateful & reused:
     int c = 1;
-    auto c4a = [c](pointer_tag, TF4_tag, int a) { (void) c; return new TestFactory4(a);};
+    auto c4a = [c](pointer_tag, TF4_tag, int a) {
+        (void) c;
+        return new TestFactory4(a);
+    };
 
     // test_init_factory_basic, test_init_factory_casting
     py::class_<TestFactory3, std::shared_ptr<TestFactory3>> pyTestFactory3(m, "TestFactory3");
@@ -238,16 +264,17 @@ TEST_SUBMODULE(factory_constructors, m) {
         .def(py::init(c4a)) // derived ptr
         .def(py::init([](pointer_tag, TF5_tag, int a) { return new TestFactory5(a); }))
         // derived shared ptr:
-        .def(py::init([](shared_ptr_tag, TF4_tag, int a) { return std::make_shared<TestFactory4>(a); }))
-        .def(py::init([](shared_ptr_tag, TF5_tag, int a) { return std::make_shared<TestFactory5>(a); }))
+        .def(py::init(
+            [](shared_ptr_tag, TF4_tag, int a) { return std::make_shared<TestFactory4>(a); }))
+        .def(py::init(
+            [](shared_ptr_tag, TF5_tag, int a) { return std::make_shared<TestFactory5>(a); }))
 
         // Returns nullptr:
         .def(py::init([](null_ptr_tag) { return (TestFactory3 *) nullptr; }))
         .def(py::init([](null_unique_ptr_tag) { return std::unique_ptr<TestFactory3>(); }))
         .def(py::init([](null_shared_ptr_tag) { return std::shared_ptr<TestFactory3>(); }))
 
-        .def_readwrite("value", &TestFactory3::value)
-        ;
+        .def_readwrite("value", &TestFactory3::value);
 
     // test_init_factory_casting
     py::class_<TestFactory4, TestFactory3, std::shared_ptr<TestFactory4>>(m, "TestFactory4")
@@ -331,9 +358,7 @@ TEST_SUBMODULE(factory_constructors, m) {
     py::class_<NoPlacementNew>(m, "NoPlacementNew")
         .def(py::init<int>())
         .def(py::init([]() { return new NoPlacementNew(100); }))
-        .def_readwrite("i", &NoPlacementNew::i)
-        ;
-
+        .def_readwrite("i", &NoPlacementNew::i);
 
     // test_reallocations
     // Class that has verbose operator_new/operator_delete calls
@@ -343,23 +368,29 @@ TEST_SUBMODULE(factory_constructors, m) {
         explicit NoisyAlloc(double d) { py::print(py::str("NoisyAlloc(double {})").format(d)); }
         ~NoisyAlloc() { py::print("~NoisyAlloc()"); }
 
-        static void *operator new(size_t s) { py::print("noisy new"); return ::operator new(s); }
-        static void *operator new(size_t, void *p) { py::print("noisy placement new"); return p; }
-        static void operator delete(void *p, size_t) { py::print("noisy delete"); ::operator delete(p); }
+        static void *operator new(size_t s) {
+            py::print("noisy new");
+            return ::operator new(s);
+        }
+        static void *operator new(size_t, void *p) {
+            py::print("noisy placement new");
+            return p;
+        }
+        static void operator delete(void *p, size_t) {
+            py::print("noisy delete");
+            ::operator delete(p);
+        }
         static void operator delete(void *, void *) { py::print("noisy placement delete"); }
-#if defined(_MSC_VER) && _MSC_VER < 1910
-        // MSVC 2015 bug: the above "noisy delete" isn't invoked (fixed in MSVC 2017)
-        static void operator delete(void *p) { py::print("noisy delete"); ::operator delete(p); }
-#endif
     };
 
-
     py::class_<NoisyAlloc> pyNoisyAlloc(m, "NoisyAlloc");
-        // Since these overloads have the same number of arguments, the dispatcher will try each of
-        // them until the arguments convert.  Thus we can get a pre-allocation here when passing a
-        // single non-integer:
+    // Since these overloads have the same number of arguments, the dispatcher will try each of
+    // them until the arguments convert.  Thus we can get a pre-allocation here when passing a
+    // single non-integer:
     ignoreOldStyleInitWarnings([&pyNoisyAlloc]() {
-        pyNoisyAlloc.def("__init__", [](NoisyAlloc *a, int i) { new (a) NoisyAlloc(i); }); // Regular constructor, runs first, requires preallocation
+        pyNoisyAlloc.def("__init__", [](NoisyAlloc *a, int i) {
+            new (a) NoisyAlloc(i);
+        }); // Regular constructor, runs first, requires preallocation
     });
 
     pyNoisyAlloc.def(py::init([](double d) { return new NoisyAlloc(d); }));
@@ -370,7 +401,8 @@ TEST_SUBMODULE(factory_constructors, m) {
     pyNoisyAlloc.def(py::init([](double d, int) { return NoisyAlloc(d); }));
     // Old-style placement new init; requires preallocation
     ignoreOldStyleInitWarnings([&pyNoisyAlloc]() {
-        pyNoisyAlloc.def("__init__", [](NoisyAlloc &a, double d, double) { new (&a) NoisyAlloc(d); });
+        pyNoisyAlloc.def("__init__",
+                         [](NoisyAlloc &a, double d, double) { new (&a) NoisyAlloc(d); });
     });
     // Requires deallocation of previous overload preallocated value:
     pyNoisyAlloc.def(py::init([](int i, double) { return new NoisyAlloc(i); }));
@@ -380,7 +412,8 @@ TEST_SUBMODULE(factory_constructors, m) {
             "__init__", [](NoisyAlloc &a, int i, const std::string &) { new (&a) NoisyAlloc(i); });
     });
 
-    // static_assert testing (the following def's should all fail with appropriate compilation errors):
+    // static_assert testing (the following def's should all fail with appropriate compilation
+    // errors):
 #if 0
     struct BadF1Base {};
     struct BadF1 : BadF1Base {};
diff --git a/ext/pybind11/tests/test_factory_constructors.py b/ext/pybind11/tests/test_factory_constructors.py
index 8bc0269852..120a587c45 100644
--- a/ext/pybind11/tests/test_factory_constructors.py
+++ b/ext/pybind11/tests/test_factory_constructors.py
@@ -1,9 +1,7 @@
-# -*- coding: utf-8 -*-
 import re
 
 import pytest
 
-import env  # noqa: F401
 from pybind11_tests import ConstructorStats
 from pybind11_tests import factory_constructors as m
 from pybind11_tests.factory_constructors import tag
@@ -82,7 +80,7 @@ def test_init_factory_signature(msg):
             4. m.factory_constructors.TestFactory1(arg0: handle, arg1: int, arg2: handle)
 
         Invoked with: 'invalid', 'constructor', 'arguments'
-    """  # noqa: E501 line too long
+    """
     )
 
     assert (
@@ -465,12 +463,10 @@ def test_reallocation_g(capture, msg):
     )
 
 
-@pytest.mark.skipif("env.PY2")
 def test_invalid_self():
-    """Tests invocation of the pybind-registered base class with an invalid `self` argument.  You
-    can only actually do this on Python 3: Python 2 raises an exception itself if you try."""
+    """Tests invocation of the pybind-registered base class with an invalid `self` argument."""
 
-    class NotPybindDerived(object):
+    class NotPybindDerived:
         pass
 
     # Attempts to initialize with an invalid type passed as `self`:
diff --git a/ext/pybind11/tests/test_gil_scoped.cpp b/ext/pybind11/tests/test_gil_scoped.cpp
index b261085c88..f136086e84 100644
--- a/ext/pybind11/tests/test_gil_scoped.cpp
+++ b/ext/pybind11/tests/test_gil_scoped.cpp
@@ -7,43 +7,138 @@
     BSD-style license that can be found in the LICENSE file.
 */
 
-#include "pybind11_tests.h"
 #include <pybind11/functional.h>
 
+#include "pybind11_tests.h"
 
-class VirtClass  {
+#include <string>
+#include <thread>
+
+#define CROSS_MODULE(Function)                                                                    \
+    auto cm = py::module_::import("cross_module_gil_utils");                                      \
+    auto target = reinterpret_cast<void (*)()>(PyLong_AsVoidPtr(cm.attr(Function).ptr()));
+
+class VirtClass {
 public:
     virtual ~VirtClass() = default;
     VirtClass() = default;
-    VirtClass(const VirtClass&) = delete;
+    VirtClass(const VirtClass &) = delete;
     virtual void virtual_func() {}
     virtual void pure_virtual_func() = 0;
 };
 
 class PyVirtClass : public VirtClass {
-    void virtual_func() override {
-        PYBIND11_OVERRIDE(void, VirtClass, virtual_func,);
-    }
+    void virtual_func() override { PYBIND11_OVERRIDE(void, VirtClass, virtual_func, ); }
     void pure_virtual_func() override {
-        PYBIND11_OVERRIDE_PURE(void, VirtClass, pure_virtual_func,);
+        PYBIND11_OVERRIDE_PURE(void, VirtClass, pure_virtual_func, );
     }
 };
 
 TEST_SUBMODULE(gil_scoped, m) {
-  py::class_<VirtClass, PyVirtClass>(m, "VirtClass")
-      .def(py::init<>())
-      .def("virtual_func", &VirtClass::virtual_func)
-      .def("pure_virtual_func", &VirtClass::pure_virtual_func);
+    m.attr("defined_THREAD_SANITIZER") =
+#if defined(THREAD_SANITIZER)
+        true;
+#else
+        false;
+#endif
 
-  m.def("test_callback_py_obj", [](py::object &func) { func(); });
-  m.def("test_callback_std_func", [](const std::function<void()> &func) { func(); });
-  m.def("test_callback_virtual_func", [](VirtClass &virt) { virt.virtual_func(); });
-  m.def("test_callback_pure_virtual_func", [](VirtClass &virt) { virt.pure_virtual_func(); });
-  m.def("test_cross_module_gil", []() {
-      auto cm = py::module_::import("cross_module_gil_utils");
-      auto gil_acquire
-          = reinterpret_cast<void (*)()>(PyLong_AsVoidPtr(cm.attr("gil_acquire_funcaddr").ptr()));
-      py::gil_scoped_release gil_release;
-      gil_acquire();
-  });
+    m.def("intentional_deadlock",
+          []() { std::thread([]() { py::gil_scoped_acquire gil_acquired; }).join(); });
+
+    py::class_<VirtClass, PyVirtClass>(m, "VirtClass")
+        .def(py::init<>())
+        .def("virtual_func", &VirtClass::virtual_func)
+        .def("pure_virtual_func", &VirtClass::pure_virtual_func);
+
+    m.def("test_callback_py_obj", [](py::object &func) { func(); });
+    m.def("test_callback_std_func", [](const std::function<void()> &func) { func(); });
+    m.def("test_callback_virtual_func", [](VirtClass &virt) { virt.virtual_func(); });
+    m.def("test_callback_pure_virtual_func", [](VirtClass &virt) { virt.pure_virtual_func(); });
+    m.def("test_cross_module_gil_released", []() {
+        CROSS_MODULE("gil_acquire_funcaddr")
+        py::gil_scoped_release gil_release;
+        target();
+    });
+    m.def("test_cross_module_gil_acquired", []() {
+        CROSS_MODULE("gil_acquire_funcaddr")
+        py::gil_scoped_acquire gil_acquire;
+        target();
+    });
+    m.def("test_cross_module_gil_inner_custom_released", []() {
+        CROSS_MODULE("gil_acquire_inner_custom_funcaddr")
+        py::gil_scoped_release gil_release;
+        target();
+    });
+    m.def("test_cross_module_gil_inner_custom_acquired", []() {
+        CROSS_MODULE("gil_acquire_inner_custom_funcaddr")
+        py::gil_scoped_acquire gil_acquire;
+        target();
+    });
+    m.def("test_cross_module_gil_inner_pybind11_released", []() {
+        CROSS_MODULE("gil_acquire_inner_pybind11_funcaddr")
+        py::gil_scoped_release gil_release;
+        target();
+    });
+    m.def("test_cross_module_gil_inner_pybind11_acquired", []() {
+        CROSS_MODULE("gil_acquire_inner_pybind11_funcaddr")
+        py::gil_scoped_acquire gil_acquire;
+        target();
+    });
+    m.def("test_cross_module_gil_nested_custom_released", []() {
+        CROSS_MODULE("gil_acquire_nested_custom_funcaddr")
+        py::gil_scoped_release gil_release;
+        target();
+    });
+    m.def("test_cross_module_gil_nested_custom_acquired", []() {
+        CROSS_MODULE("gil_acquire_nested_custom_funcaddr")
+        py::gil_scoped_acquire gil_acquire;
+        target();
+    });
+    m.def("test_cross_module_gil_nested_pybind11_released", []() {
+        CROSS_MODULE("gil_acquire_nested_pybind11_funcaddr")
+        py::gil_scoped_release gil_release;
+        target();
+    });
+    m.def("test_cross_module_gil_nested_pybind11_acquired", []() {
+        CROSS_MODULE("gil_acquire_nested_pybind11_funcaddr")
+        py::gil_scoped_acquire gil_acquire;
+        target();
+    });
+    m.def("test_release_acquire", [](const py::object &obj) {
+        py::gil_scoped_release gil_released;
+        py::gil_scoped_acquire gil_acquired;
+        return py::str(obj);
+    });
+    m.def("test_nested_acquire", [](const py::object &obj) {
+        py::gil_scoped_release gil_released;
+        py::gil_scoped_acquire gil_acquired_outer;
+        py::gil_scoped_acquire gil_acquired_inner;
+        return py::str(obj);
+    });
+    m.def("test_multi_acquire_release_cross_module", [](unsigned bits) {
+        py::set internals_ids;
+        internals_ids.add(PYBIND11_INTERNALS_ID);
+        {
+            py::gil_scoped_release gil_released;
+            auto thread_f = [bits, &internals_ids]() {
+                py::gil_scoped_acquire gil_acquired;
+                auto cm = py::module_::import("cross_module_gil_utils");
+                auto target = reinterpret_cast<std::string (*)(unsigned)>(
+                    PyLong_AsVoidPtr(cm.attr("gil_multi_acquire_release_funcaddr").ptr()));
+                std::string cm_internals_id = target(bits >> 3);
+                internals_ids.add(cm_internals_id);
+            };
+            if ((bits & 0x1u) != 0u) {
+                thread_f();
+            }
+            if ((bits & 0x2u) != 0u) {
+                std::thread non_python_thread(thread_f);
+                non_python_thread.join();
+            }
+            if ((bits & 0x4u) != 0u) {
+                thread_f();
+            }
+        }
+        return internals_ids;
+    });
 }
diff --git a/ext/pybind11/tests/test_gil_scoped.py b/ext/pybind11/tests/test_gil_scoped.py
index 0a1d62747d..6af6a472d5 100644
--- a/ext/pybind11/tests/test_gil_scoped.py
+++ b/ext/pybind11/tests/test_gil_scoped.py
@@ -1,46 +1,199 @@
-# -*- coding: utf-8 -*-
 import multiprocessing
+import sys
 import threading
+import time
 
+import pytest
+
+import env
 from pybind11_tests import gil_scoped as m
 
 
+class ExtendedVirtClass(m.VirtClass):
+    def virtual_func(self):
+        pass
+
+    def pure_virtual_func(self):
+        pass
+
+
+def test_callback_py_obj():
+    m.test_callback_py_obj(lambda: None)
+
+
+def test_callback_std_func():
+    m.test_callback_std_func(lambda: None)
+
+
+def test_callback_virtual_func():
+    extended = ExtendedVirtClass()
+    m.test_callback_virtual_func(extended)
+
+
+def test_callback_pure_virtual_func():
+    extended = ExtendedVirtClass()
+    m.test_callback_pure_virtual_func(extended)
+
+
+def test_cross_module_gil_released():
+    """Makes sure that the GIL can be acquired by another module from a GIL-released state."""
+    m.test_cross_module_gil_released()  # Should not raise a SIGSEGV
+
+
+def test_cross_module_gil_acquired():
+    """Makes sure that the GIL can be acquired by another module from a GIL-acquired state."""
+    m.test_cross_module_gil_acquired()  # Should not raise a SIGSEGV
+
+
+def test_cross_module_gil_inner_custom_released():
+    """Makes sure that the GIL can be acquired/released by another module
+    from a GIL-released state using custom locking logic."""
+    m.test_cross_module_gil_inner_custom_released()
+
+
+def test_cross_module_gil_inner_custom_acquired():
+    """Makes sure that the GIL can be acquired/acquired by another module
+    from a GIL-acquired state using custom locking logic."""
+    m.test_cross_module_gil_inner_custom_acquired()
+
+
+def test_cross_module_gil_inner_pybind11_released():
+    """Makes sure that the GIL can be acquired/released by another module
+    from a GIL-released state using pybind11 locking logic."""
+    m.test_cross_module_gil_inner_pybind11_released()
+
+
+def test_cross_module_gil_inner_pybind11_acquired():
+    """Makes sure that the GIL can be acquired/acquired by another module
+    from a GIL-acquired state using pybind11 locking logic."""
+    m.test_cross_module_gil_inner_pybind11_acquired()
+
+
+def test_cross_module_gil_nested_custom_released():
+    """Makes sure that the GIL can be nested acquired/released by another module
+    from a GIL-released state using custom locking logic."""
+    m.test_cross_module_gil_nested_custom_released()
+
+
+def test_cross_module_gil_nested_custom_acquired():
+    """Makes sure that the GIL can be nested acquired/acquired by another module
+    from a GIL-acquired state using custom locking logic."""
+    m.test_cross_module_gil_nested_custom_acquired()
+
+
+def test_cross_module_gil_nested_pybind11_released():
+    """Makes sure that the GIL can be nested acquired/released by another module
+    from a GIL-released state using pybind11 locking logic."""
+    m.test_cross_module_gil_nested_pybind11_released()
+
+
+def test_cross_module_gil_nested_pybind11_acquired():
+    """Makes sure that the GIL can be nested acquired/acquired by another module
+    from a GIL-acquired state using pybind11 locking logic."""
+    m.test_cross_module_gil_nested_pybind11_acquired()
+
+
+def test_release_acquire():
+    assert m.test_release_acquire(0xAB) == "171"
+
+
+def test_nested_acquire():
+    assert m.test_nested_acquire(0xAB) == "171"
+
+
+def test_multi_acquire_release_cross_module():
+    for bits in range(16 * 8):
+        internals_ids = m.test_multi_acquire_release_cross_module(bits)
+        assert len(internals_ids) == 2 if bits % 8 else 1
+
+
+# Intentionally putting human review in the loop here, to guard against accidents.
+VARS_BEFORE_ALL_BASIC_TESTS = dict(vars())  # Make a copy of the dict (critical).
+ALL_BASIC_TESTS = (
+    test_callback_py_obj,
+    test_callback_std_func,
+    test_callback_virtual_func,
+    test_callback_pure_virtual_func,
+    test_cross_module_gil_released,
+    test_cross_module_gil_acquired,
+    test_cross_module_gil_inner_custom_released,
+    test_cross_module_gil_inner_custom_acquired,
+    test_cross_module_gil_inner_pybind11_released,
+    test_cross_module_gil_inner_pybind11_acquired,
+    test_cross_module_gil_nested_custom_released,
+    test_cross_module_gil_nested_custom_acquired,
+    test_cross_module_gil_nested_pybind11_released,
+    test_cross_module_gil_nested_pybind11_acquired,
+    test_release_acquire,
+    test_nested_acquire,
+    test_multi_acquire_release_cross_module,
+)
+
+
+def test_all_basic_tests_completeness():
+    num_found = 0
+    for key, value in VARS_BEFORE_ALL_BASIC_TESTS.items():
+        if not key.startswith("test_"):
+            continue
+        assert value in ALL_BASIC_TESTS
+        num_found += 1
+    assert len(ALL_BASIC_TESTS) == num_found
+
+
+def _intentional_deadlock():
+    m.intentional_deadlock()
+
+
+ALL_BASIC_TESTS_PLUS_INTENTIONAL_DEADLOCK = ALL_BASIC_TESTS + (_intentional_deadlock,)
+
+
 def _run_in_process(target, *args, **kwargs):
-    """Runs target in process and returns its exitcode after 10s (None if still alive)."""
+    if len(args) == 0:
+        test_fn = target
+    else:
+        test_fn = args[0]
+    # Do not need to wait much, 10s should be more than enough.
+    timeout = 0.1 if test_fn is _intentional_deadlock else 10
     process = multiprocessing.Process(target=target, args=args, kwargs=kwargs)
     process.daemon = True
     try:
+        t_start = time.time()
         process.start()
-        # Do not need to wait much, 10s should be more than enough.
-        process.join(timeout=10)
+        if timeout >= 100:  # For debugging.
+            print(
+                "\nprocess.pid STARTED", process.pid, (sys.argv, target, args, kwargs)
+            )
+            print(f"COPY-PASTE-THIS: gdb {sys.argv[0]} -p {process.pid}", flush=True)
+        process.join(timeout=timeout)
+        if timeout >= 100:
+            print("\nprocess.pid JOINED", process.pid, flush=True)
+        t_delta = time.time() - t_start
+        if process.exitcode == 66 and m.defined_THREAD_SANITIZER:  # Issue #2754
+            # WOULD-BE-NICE-TO-HAVE: Check that the message below is actually in the output.
+            # Maybe this could work:
+            # https://gist.github.com/alexeygrigorev/01ce847f2e721b513b42ea4a6c96905e
+            pytest.skip(
+                "ThreadSanitizer: starting new threads after multi-threaded fork is not supported."
+            )
+        elif test_fn is _intentional_deadlock:
+            assert process.exitcode is None
+            return 0
+        elif process.exitcode is None:
+            assert t_delta > 0.9 * timeout
+            msg = "DEADLOCK, most likely, exactly what this test is meant to detect."
+            if env.PYPY and env.WIN:
+                pytest.skip(msg)
+            raise RuntimeError(msg)
         return process.exitcode
     finally:
         if process.is_alive():
             process.terminate()
 
 
-def _python_to_cpp_to_python():
-    """Calls different C++ functions that come back to Python."""
-
-    class ExtendedVirtClass(m.VirtClass):
-        def virtual_func(self):
-            pass
-
-        def pure_virtual_func(self):
-            pass
-
-    extended = ExtendedVirtClass()
-    m.test_callback_py_obj(lambda: None)
-    m.test_callback_std_func(lambda: None)
-    m.test_callback_virtual_func(extended)
-    m.test_callback_pure_virtual_func(extended)
-
-
-def _python_to_cpp_to_python_from_threads(num_threads, parallel=False):
-    """Calls different C++ functions that come back to Python, from Python threads."""
+def _run_in_threads(test_fn, num_threads, parallel):
     threads = []
     for _ in range(num_threads):
-        thread = threading.Thread(target=_python_to_cpp_to_python)
+        thread = threading.Thread(target=test_fn)
         thread.daemon = True
         thread.start()
         if parallel:
@@ -52,43 +205,40 @@ def _python_to_cpp_to_python_from_threads(num_threads, parallel=False):
 
 
 # TODO: FIXME, sometimes returns -11 (segfault) instead of 0 on macOS Python 3.9
-def test_python_to_cpp_to_python_from_thread():
+@pytest.mark.parametrize("test_fn", ALL_BASIC_TESTS_PLUS_INTENTIONAL_DEADLOCK)
+def test_run_in_process_one_thread(test_fn):
     """Makes sure there is no GIL deadlock when running in a thread.
 
     It runs in a separate process to be able to stop and assert if it deadlocks.
     """
-    assert _run_in_process(_python_to_cpp_to_python_from_threads, 1) == 0
+    assert _run_in_process(_run_in_threads, test_fn, num_threads=1, parallel=False) == 0
 
 
 # TODO: FIXME on macOS Python 3.9
-def test_python_to_cpp_to_python_from_thread_multiple_parallel():
+@pytest.mark.parametrize("test_fn", ALL_BASIC_TESTS_PLUS_INTENTIONAL_DEADLOCK)
+def test_run_in_process_multiple_threads_parallel(test_fn):
     """Makes sure there is no GIL deadlock when running in a thread multiple times in parallel.
 
     It runs in a separate process to be able to stop and assert if it deadlocks.
     """
-    assert _run_in_process(_python_to_cpp_to_python_from_threads, 8, parallel=True) == 0
+    assert _run_in_process(_run_in_threads, test_fn, num_threads=8, parallel=True) == 0
 
 
 # TODO: FIXME on macOS Python 3.9
-def test_python_to_cpp_to_python_from_thread_multiple_sequential():
+@pytest.mark.parametrize("test_fn", ALL_BASIC_TESTS_PLUS_INTENTIONAL_DEADLOCK)
+def test_run_in_process_multiple_threads_sequential(test_fn):
     """Makes sure there is no GIL deadlock when running in a thread multiple times sequentially.
 
     It runs in a separate process to be able to stop and assert if it deadlocks.
     """
-    assert (
-        _run_in_process(_python_to_cpp_to_python_from_threads, 8, parallel=False) == 0
-    )
+    assert _run_in_process(_run_in_threads, test_fn, num_threads=8, parallel=False) == 0
 
 
 # TODO: FIXME on macOS Python 3.9
-def test_python_to_cpp_to_python_from_process():
+@pytest.mark.parametrize("test_fn", ALL_BASIC_TESTS_PLUS_INTENTIONAL_DEADLOCK)
+def test_run_in_process_direct(test_fn):
     """Makes sure there is no GIL deadlock when using processes.
 
     This test is for completion, but it was never an issue.
     """
-    assert _run_in_process(_python_to_cpp_to_python) == 0
-
-
-def test_cross_module_gil():
-    """Makes sure that the GIL can be acquired by another module from a GIL-released state."""
-    m.test_cross_module_gil()  # Should not raise a SIGSEGV
+    assert _run_in_process(test_fn) == 0
diff --git a/ext/pybind11/tests/test_iostream.cpp b/ext/pybind11/tests/test_iostream.cpp
index c620b59493..421eaa2dd8 100644
--- a/ext/pybind11/tests/test_iostream.cpp
+++ b/ext/pybind11/tests/test_iostream.cpp
@@ -7,12 +7,10 @@
     BSD-style license that can be found in the LICENSE file.
 */
 
-#if defined(_MSC_VER) && _MSC_VER < 1910  // VS 2015's MSVC
-#  pragma warning(disable: 4702) // unreachable code in system header (xatomic.h(382))
-#endif
-
 #include <pybind11/iostream.h>
+
 #include "pybind11_tests.h"
+
 #include <atomic>
 #include <iostream>
 #include <mutex>
@@ -22,8 +20,9 @@
 void noisy_function(const std::string &msg, bool flush) {
 
     std::cout << msg;
-    if (flush)
+    if (flush) {
         std::cout << std::flush;
+    }
 }
 
 void noisy_funct_dual(const std::string &msg, const std::string &emsg) {
@@ -50,13 +49,12 @@ struct TestThread {
                     std::cout << "x" << std::flush;
                 }
                 std::this_thread::sleep_for(std::chrono::microseconds(50));
-            } };
+            }
+        };
         t_ = new std::thread(std::move(thread_f));
     }
 
-    ~TestThread() {
-        delete t_;
-    }
+    ~TestThread() { delete t_; }
 
     void stop() { stop_ = true; }
 
@@ -74,7 +72,6 @@ struct TestThread {
     std::atomic<bool> stop_;
 };
 
-
 TEST_SUBMODULE(iostream, m) {
 
     add_ostream_redirect(m);
@@ -91,9 +88,11 @@ TEST_SUBMODULE(iostream, m) {
         std::cout << msg << std::flush;
     });
 
-    m.def("guard_output", &noisy_function,
-            py::call_guard<py::scoped_ostream_redirect>(),
-            py::arg("msg"), py::arg("flush")=true);
+    m.def("guard_output",
+          &noisy_function,
+          py::call_guard<py::scoped_ostream_redirect>(),
+          py::arg("msg"),
+          py::arg("flush") = true);
 
     m.def("captured_err", [](const std::string &msg) {
         py::scoped_ostream_redirect redir(std::cerr, py::module_::import("sys").attr("stderr"));
@@ -102,9 +101,11 @@ TEST_SUBMODULE(iostream, m) {
 
     m.def("noisy_function", &noisy_function, py::arg("msg"), py::arg("flush") = true);
 
-    m.def("dual_guard", &noisy_funct_dual,
-            py::call_guard<py::scoped_ostream_redirect, py::scoped_estream_redirect>(),
-            py::arg("msg"), py::arg("emsg"));
+    m.def("dual_guard",
+          &noisy_funct_dual,
+          py::call_guard<py::scoped_ostream_redirect, py::scoped_estream_redirect>(),
+          py::arg("msg"),
+          py::arg("emsg"));
 
     m.def("raw_output", [](const std::string &msg) { std::cout << msg << std::flush; });
 
diff --git a/ext/pybind11/tests/test_iostream.py b/ext/pybind11/tests/test_iostream.py
index 7f18ca65c6..5bbdf6955d 100644
--- a/ext/pybind11/tests/test_iostream.py
+++ b/ext/pybind11/tests/test_iostream.py
@@ -1,44 +1,8 @@
-# -*- coding: utf-8 -*-
-import sys
-from contextlib import contextmanager
+from contextlib import redirect_stderr, redirect_stdout
+from io import StringIO
 
 from pybind11_tests import iostream as m
 
-try:
-    # Python 3
-    from io import StringIO
-except ImportError:
-    # Python 2
-    try:
-        from cStringIO import StringIO
-    except ImportError:
-        from StringIO import StringIO
-
-try:
-    # Python 3.4
-    from contextlib import redirect_stdout
-except ImportError:
-
-    @contextmanager
-    def redirect_stdout(target):
-        original = sys.stdout
-        sys.stdout = target
-        yield
-        sys.stdout = original
-
-
-try:
-    # Python 3.5
-    from contextlib import redirect_stderr
-except ImportError:
-
-    @contextmanager
-    def redirect_stderr(target):
-        original = sys.stderr
-        sys.stderr = target
-        yield
-        sys.stderr = original
-
 
 def test_captured(capsys):
     msg = "I've been redirected to Python, I hope!"
diff --git a/ext/pybind11/tests/test_kwargs_and_defaults.cpp b/ext/pybind11/tests/test_kwargs_and_defaults.cpp
index 63332d32ea..77e72c0c70 100644
--- a/ext/pybind11/tests/test_kwargs_and_defaults.cpp
+++ b/ext/pybind11/tests/test_kwargs_and_defaults.cpp
@@ -7,37 +7,50 @@
     BSD-style license that can be found in the LICENSE file.
 */
 
-#include "pybind11_tests.h"
-#include "constructor_stats.h"
 #include <pybind11/stl.h>
 
+#include "constructor_stats.h"
+#include "pybind11_tests.h"
+
 #include <utility>
 
 TEST_SUBMODULE(kwargs_and_defaults, m) {
-    auto kw_func = [](int x, int y) { return "x=" + std::to_string(x) + ", y=" + std::to_string(y); };
+    auto kw_func
+        = [](int x, int y) { return "x=" + std::to_string(x) + ", y=" + std::to_string(y); };
 
     // test_named_arguments
     m.def("kw_func0", kw_func);
     m.def("kw_func1", kw_func, py::arg("x"), py::arg("y"));
     m.def("kw_func2", kw_func, py::arg("x") = 100, py::arg("y") = 200);
-    m.def("kw_func3", [](const char *) { }, py::arg("data") = std::string("Hello world!"));
+    m.def(
+        "kw_func3", [](const char *) {}, py::arg("data") = std::string("Hello world!"));
 
     /* A fancier default argument */
     std::vector<int> list{{13, 17}};
-    m.def("kw_func4", [](const std::vector<int> &entries) {
-        std::string ret = "{";
-        for (int i : entries)
-            ret += std::to_string(i) + " ";
-        ret.back() = '}';
-        return ret;
-    }, py::arg("myList") = list);
+    m.def(
+        "kw_func4",
+        [](const std::vector<int> &entries) {
+            std::string ret = "{";
+            for (int i : entries) {
+                ret += std::to_string(i) + " ";
+            }
+            ret.back() = '}';
+            return ret;
+        },
+        py::arg("myList") = list);
 
-    m.def("kw_func_udl", kw_func, "x"_a, "y"_a=300);
-    m.def("kw_func_udl_z", kw_func, "x"_a, "y"_a=0);
+    m.def("kw_func_udl", kw_func, "x"_a, "y"_a = 300);
+    m.def("kw_func_udl_z", kw_func, "x"_a, "y"_a = 0);
 
     // test_args_and_kwargs
     m.def("args_function", [](py::args args) -> py::tuple {
-        return std::move(args);
+        PYBIND11_WARNING_PUSH
+
+#ifdef PYBIND11_DETECTED_CLANG_WITH_MISLEADING_CALL_STD_MOVE_EXPLICITLY_WARNING
+        PYBIND11_WARNING_DISABLE_CLANG("-Wreturn-std-move")
+#endif
+        return args;
+        PYBIND11_WARNING_POP
     });
     m.def("args_kwargs_function", [](const py::args &args, const py::kwargs &kwargs) {
         return py::make_tuple(args, kwargs);
@@ -53,18 +66,60 @@ TEST_SUBMODULE(kwargs_and_defaults, m) {
     };
     m.def("mixed_plus_args_kwargs", mixed_plus_both);
 
-    m.def("mixed_plus_args_kwargs_defaults", mixed_plus_both,
-            py::arg("i") = 1, py::arg("j") = 3.14159);
+    m.def("mixed_plus_args_kwargs_defaults",
+          mixed_plus_both,
+          py::arg("i") = 1,
+          py::arg("j") = 3.14159);
 
-    // test_args_refcount
-    // PyPy needs a garbage collection to get the reference count values to match CPython's behaviour
-    #ifdef PYPY_VERSION
-    #define GC_IF_NEEDED ConstructorStats::gc()
-    #else
-    #define GC_IF_NEEDED
-    #endif
-    m.def("arg_refcount_h", [](py::handle h) { GC_IF_NEEDED; return h.ref_count(); });
-    m.def("arg_refcount_h", [](py::handle h, py::handle, py::handle) { GC_IF_NEEDED; return h.ref_count(); });
+    m.def(
+        "args_kwonly",
+        [](int i, double j, const py::args &args, int z) { return py::make_tuple(i, j, args, z); },
+        "i"_a,
+        "j"_a,
+        "z"_a);
+    m.def(
+        "args_kwonly_kwargs",
+        [](int i, double j, const py::args &args, int z, const py::kwargs &kwargs) {
+            return py::make_tuple(i, j, args, z, kwargs);
+        },
+        "i"_a,
+        "j"_a,
+        py::kw_only{},
+        "z"_a);
+    m.def(
+        "args_kwonly_kwargs_defaults",
+        [](int i, double j, const py::args &args, int z, const py::kwargs &kwargs) {
+            return py::make_tuple(i, j, args, z, kwargs);
+        },
+        "i"_a = 1,
+        "j"_a = 3.14159,
+        "z"_a = 42);
+    m.def(
+        "args_kwonly_full_monty",
+        [](int h, int i, double j, const py::args &args, int z, const py::kwargs &kwargs) {
+            return py::make_tuple(h, i, j, args, z, kwargs);
+        },
+        py::arg() = 1,
+        py::arg() = 2,
+        py::pos_only{},
+        "j"_a = 3.14159,
+        "z"_a = 42);
+
+// test_args_refcount
+// PyPy needs a garbage collection to get the reference count values to match CPython's behaviour
+#ifdef PYPY_VERSION
+#    define GC_IF_NEEDED ConstructorStats::gc()
+#else
+#    define GC_IF_NEEDED
+#endif
+    m.def("arg_refcount_h", [](py::handle h) {
+        GC_IF_NEEDED;
+        return h.ref_count();
+    });
+    m.def("arg_refcount_h", [](py::handle h, py::handle, py::handle) {
+        GC_IF_NEEDED;
+        return h.ref_count();
+    });
     m.def("arg_refcount_o", [](const py::object &o) {
         GC_IF_NEEDED;
         return o.ref_count();
@@ -72,40 +127,61 @@ TEST_SUBMODULE(kwargs_and_defaults, m) {
     m.def("args_refcount", [](py::args a) {
         GC_IF_NEEDED;
         py::tuple t(a.size());
-        for (size_t i = 0; i < a.size(); i++)
+        for (size_t i = 0; i < a.size(); i++) {
             // Use raw Python API here to avoid an extra, intermediate incref on the tuple item:
             t[i] = (int) Py_REFCNT(PyTuple_GET_ITEM(a.ptr(), static_cast<py::ssize_t>(i)));
+        }
         return t;
     });
     m.def("mixed_args_refcount", [](const py::object &o, py::args a) {
         GC_IF_NEEDED;
         py::tuple t(a.size() + 1);
         t[0] = o.ref_count();
-        for (size_t i = 0; i < a.size(); i++)
+        for (size_t i = 0; i < a.size(); i++) {
             // Use raw Python API here to avoid an extra, intermediate incref on the tuple item:
             t[i + 1] = (int) Py_REFCNT(PyTuple_GET_ITEM(a.ptr(), static_cast<py::ssize_t>(i)));
+        }
         return t;
     });
 
     // pybind11 won't allow these to be bound: args and kwargs, if present, must be at the end.
     // Uncomment these to test that the static_assert is indeed working:
-//    m.def("bad_args1", [](py::args, int) {});
-//    m.def("bad_args2", [](py::kwargs, int) {});
-//    m.def("bad_args3", [](py::kwargs, py::args) {});
-//    m.def("bad_args4", [](py::args, int, py::kwargs) {});
-//    m.def("bad_args5", [](py::args, py::kwargs, int) {});
-//    m.def("bad_args6", [](py::args, py::args) {});
-//    m.def("bad_args7", [](py::kwargs, py::kwargs) {});
+    //    m.def("bad_args1", [](py::args, int) {});
+    //    m.def("bad_args2", [](py::kwargs, int) {});
+    //    m.def("bad_args3", [](py::kwargs, py::args) {});
+    //    m.def("bad_args4", [](py::args, int, py::kwargs) {});
+    //    m.def("bad_args5", [](py::args, py::kwargs, int) {});
+    //    m.def("bad_args6", [](py::args, py::args) {});
+    //    m.def("bad_args7", [](py::kwargs, py::kwargs) {});
 
     // test_keyword_only_args
-    m.def("kw_only_all", [](int i, int j) { return py::make_tuple(i, j); },
-            py::kw_only(), py::arg("i"), py::arg("j"));
-    m.def("kw_only_some", [](int i, int j, int k) { return py::make_tuple(i, j, k); },
-            py::arg(), py::kw_only(), py::arg("j"), py::arg("k"));
-    m.def("kw_only_with_defaults", [](int i, int j, int k, int z) { return py::make_tuple(i, j, k, z); },
-            py::arg() = 3, "j"_a = 4, py::kw_only(), "k"_a = 5, "z"_a);
-    m.def("kw_only_mixed", [](int i, int j) { return py::make_tuple(i, j); },
-            "i"_a, py::kw_only(), "j"_a);
+    m.def(
+        "kw_only_all",
+        [](int i, int j) { return py::make_tuple(i, j); },
+        py::kw_only(),
+        py::arg("i"),
+        py::arg("j"));
+    m.def(
+        "kw_only_some",
+        [](int i, int j, int k) { return py::make_tuple(i, j, k); },
+        py::arg(),
+        py::kw_only(),
+        py::arg("j"),
+        py::arg("k"));
+    m.def(
+        "kw_only_with_defaults",
+        [](int i, int j, int k, int z) { return py::make_tuple(i, j, k, z); },
+        py::arg() = 3,
+        "j"_a = 4,
+        py::kw_only(),
+        "k"_a = 5,
+        "z"_a);
+    m.def(
+        "kw_only_mixed",
+        [](int i, int j) { return py::make_tuple(i, j); },
+        "i"_a,
+        py::kw_only(),
+        "j"_a);
     m.def(
         "kw_only_plus_more",
         [](int i, int j, int k, const py::kwargs &kwargs) {
@@ -117,29 +193,57 @@ TEST_SUBMODULE(kwargs_and_defaults, m) {
         py::arg("k") /* kw-only */);
 
     m.def("register_invalid_kw_only", [](py::module_ m) {
-        m.def("bad_kw_only", [](int i, int j) { return py::make_tuple(i, j); },
-                py::kw_only(), py::arg() /* invalid unnamed argument */, "j"_a);
+        m.def(
+            "bad_kw_only",
+            [](int i, int j) { return py::make_tuple(i, j); },
+            py::kw_only(),
+            py::arg() /* invalid unnamed argument */,
+            "j"_a);
     });
 
     // test_positional_only_args
-    m.def("pos_only_all", [](int i, int j) { return py::make_tuple(i, j); },
-            py::arg("i"), py::arg("j"), py::pos_only());
-    m.def("pos_only_mix", [](int i, int j) { return py::make_tuple(i, j); },
-            py::arg("i"), py::pos_only(), py::arg("j"));
-    m.def("pos_kw_only_mix", [](int i, int j, int k) { return py::make_tuple(i, j, k); },
-            py::arg("i"), py::pos_only(), py::arg("j"), py::kw_only(), py::arg("k"));
-    m.def("pos_only_def_mix", [](int i, int j, int k) { return py::make_tuple(i, j, k); },
-            py::arg("i"), py::arg("j") = 2, py::pos_only(), py::arg("k") = 3);
-
+    m.def(
+        "pos_only_all",
+        [](int i, int j) { return py::make_tuple(i, j); },
+        py::arg("i"),
+        py::arg("j"),
+        py::pos_only());
+    m.def(
+        "pos_only_mix",
+        [](int i, int j) { return py::make_tuple(i, j); },
+        py::arg("i"),
+        py::pos_only(),
+        py::arg("j"));
+    m.def(
+        "pos_kw_only_mix",
+        [](int i, int j, int k) { return py::make_tuple(i, j, k); },
+        py::arg("i"),
+        py::pos_only(),
+        py::arg("j"),
+        py::kw_only(),
+        py::arg("k"));
+    m.def(
+        "pos_only_def_mix",
+        [](int i, int j, int k) { return py::make_tuple(i, j, k); },
+        py::arg("i"),
+        py::arg("j") = 2,
+        py::pos_only(),
+        py::arg("k") = 3);
 
     // These should fail to compile:
+#ifdef PYBIND11_NEVER_DEFINED_EVER
     // argument annotations are required when using kw_only
-//    m.def("bad_kw_only1", [](int) {}, py::kw_only());
+    m.def(
+        "bad_kw_only1", [](int) {}, py::kw_only());
     // can't specify both `py::kw_only` and a `py::args` argument
-//    m.def("bad_kw_only2", [](int i, py::args) {}, py::kw_only(), "i"_a);
+    m.def(
+        "bad_kw_only2", [](int i, py::args) {}, py::kw_only(), "i"_a);
+#endif
 
     // test_function_signatures (along with most of the above)
-    struct KWClass { void foo(int, float) {} };
+    struct KWClass {
+        void foo(int, float) {}
+    };
     py::class_<KWClass>(m, "KWClass")
         .def("foo0", &KWClass::foo)
         .def("foo1", &KWClass::foo, "x"_a, "y"_a);
@@ -150,4 +254,28 @@ TEST_SUBMODULE(kwargs_and_defaults, m) {
         "class_default_argument",
         [](py::object a) { return py::repr(std::move(a)); },
         "a"_a = py::module_::import("decimal").attr("Decimal"));
+
+    // Initial implementation of kw_only was broken when used on a method/constructor before any
+    // other arguments
+    // https://github.com/pybind/pybind11/pull/3402#issuecomment-963341987
+
+    struct first_arg_kw_only {};
+    py::class_<first_arg_kw_only>(m, "first_arg_kw_only")
+        .def(py::init([](int) { return first_arg_kw_only(); }),
+             py::kw_only(), // This being before any args was broken
+             py::arg("i") = 0)
+        .def(
+            "method",
+            [](first_arg_kw_only &, int, int) {},
+            py::kw_only(), // and likewise here
+            py::arg("i") = 1,
+            py::arg("j") = 2)
+        // Closely related: pos_only marker didn't show up properly when it was before any other
+        // arguments (although that is fairly useless in practice).
+        .def(
+            "pos_only",
+            [](first_arg_kw_only &, int, int) {},
+            py::pos_only{},
+            py::arg("i"),
+            py::arg("j"));
 }
diff --git a/ext/pybind11/tests/test_kwargs_and_defaults.py b/ext/pybind11/tests/test_kwargs_and_defaults.py
index ddc387eeb9..ab7017886e 100644
--- a/ext/pybind11/tests/test_kwargs_and_defaults.py
+++ b/ext/pybind11/tests/test_kwargs_and_defaults.py
@@ -1,7 +1,5 @@
-# -*- coding: utf-8 -*-
 import pytest
 
-import env  # noqa: F401
 from pybind11_tests import kwargs_and_defaults as m
 
 
@@ -82,7 +80,7 @@ def test_mixed_args_and_kwargs(msg):
             1. (arg0: int, arg1: float, *args) -> tuple
 
         Invoked with: 1
-    """  # noqa: E501 line too long
+    """
     )
     with pytest.raises(TypeError) as excinfo:
         assert mpa()
@@ -93,7 +91,7 @@ def test_mixed_args_and_kwargs(msg):
             1. (arg0: int, arg1: float, *args) -> tuple
 
         Invoked with:
-    """  # noqa: E501 line too long
+    """
     )
 
     assert mpk(-2, 3.5, pi=3.14159, e=2.71828) == (
@@ -127,7 +125,7 @@ def test_mixed_args_and_kwargs(msg):
             1. (i: int = 1, j: float = 3.14159, *args, **kwargs) -> tuple
 
         Invoked with: 1; kwargs: i=1
-    """  # noqa: E501 line too long
+    """
     )
     with pytest.raises(TypeError) as excinfo:
         assert mpakd(1, 2, j=1)
@@ -138,9 +136,56 @@ def test_mixed_args_and_kwargs(msg):
             1. (i: int = 1, j: float = 3.14159, *args, **kwargs) -> tuple
 
         Invoked with: 1, 2; kwargs: j=1
-    """  # noqa: E501 line too long
+    """
     )
 
+    # Arguments after a py::args are automatically keyword-only (pybind 2.9+)
+    assert m.args_kwonly(2, 2.5, z=22) == (2, 2.5, (), 22)
+    assert m.args_kwonly(2, 2.5, "a", "b", "c", z=22) == (2, 2.5, ("a", "b", "c"), 22)
+    assert m.args_kwonly(z=22, i=4, j=16) == (4, 16, (), 22)
+
+    with pytest.raises(TypeError) as excinfo:
+        assert m.args_kwonly(2, 2.5, 22)  # missing z= keyword
+    assert (
+        msg(excinfo.value)
+        == """
+        args_kwonly(): incompatible function arguments. The following argument types are supported:
+            1. (i: int, j: float, *args, z: int) -> tuple
+
+        Invoked with: 2, 2.5, 22
+    """
+    )
+
+    assert m.args_kwonly_kwargs(i=1, k=4, j=10, z=-1, y=9) == (
+        1,
+        10,
+        (),
+        -1,
+        {"k": 4, "y": 9},
+    )
+    assert m.args_kwonly_kwargs(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, z=11, y=12) == (
+        1,
+        2,
+        (3, 4, 5, 6, 7, 8, 9, 10),
+        11,
+        {"y": 12},
+    )
+    assert (
+        m.args_kwonly_kwargs.__doc__
+        == "args_kwonly_kwargs(i: int, j: float, *args, z: int, **kwargs) -> tuple\n"
+    )
+
+    assert (
+        m.args_kwonly_kwargs_defaults.__doc__
+        == "args_kwonly_kwargs_defaults(i: int = 1, j: float = 3.14159, *args, z: int = 42, **kwargs) -> tuple\n"  # noqa: E501 line too long
+    )
+    assert m.args_kwonly_kwargs_defaults() == (1, 3.14159, (), 42, {})
+    assert m.args_kwonly_kwargs_defaults(2) == (2, 3.14159, (), 42, {})
+    assert m.args_kwonly_kwargs_defaults(z=-99) == (1, 3.14159, (), -99, {})
+    assert m.args_kwonly_kwargs_defaults(5, 6, 7, 8) == (5, 6, (7, 8), 42, {})
+    assert m.args_kwonly_kwargs_defaults(5, 6, 7, m=8) == (5, 6, (7,), 42, {"m": 8})
+    assert m.args_kwonly_kwargs_defaults(5, 6, 7, m=8, z=9) == (5, 6, (7,), 9, {"m": 8})
+
 
 def test_keyword_only_args(msg):
     assert m.kw_only_all(i=1, j=2) == (1, 2)
@@ -178,10 +223,23 @@ def test_keyword_only_args(msg):
     assert (
         msg(excinfo.value)
         == """
-        arg(): cannot specify an unnamed argument after an kw_only() annotation
+        arg(): cannot specify an unnamed argument after a kw_only() annotation or args() argument
     """
     )
 
+    # https://github.com/pybind/pybind11/pull/3402#issuecomment-963341987
+    x = m.first_arg_kw_only(i=1)
+    x.method()
+    x.method(i=1, j=2)
+    assert (
+        m.first_arg_kw_only.__init__.__doc__
+        == "__init__(self: pybind11_tests.kwargs_and_defaults.first_arg_kw_only, *, i: int = 0) -> None\n"  # noqa: E501 line too long
+    )
+    assert (
+        m.first_arg_kw_only.method.__doc__
+        == "method(self: pybind11_tests.kwargs_and_defaults.first_arg_kw_only, *, i: int = 1, j: int = 2) -> None\n"  # noqa: E501 line too long
+    )
+
 
 def test_positional_only_args(msg):
     assert m.pos_only_all(1, 2) == (1, 2)
@@ -222,6 +280,55 @@ def test_positional_only_args(msg):
         m.pos_only_def_mix(1, j=4)
     assert "incompatible function arguments" in str(excinfo.value)
 
+    # Mix it with args and kwargs:
+    assert (
+        m.args_kwonly_full_monty.__doc__
+        == "args_kwonly_full_monty(arg0: int = 1, arg1: int = 2, /, j: float = 3.14159, *args, z: int = 42, **kwargs) -> tuple\n"  # noqa: E501 line too long
+    )
+    assert m.args_kwonly_full_monty() == (1, 2, 3.14159, (), 42, {})
+    assert m.args_kwonly_full_monty(8) == (8, 2, 3.14159, (), 42, {})
+    assert m.args_kwonly_full_monty(8, 9) == (8, 9, 3.14159, (), 42, {})
+    assert m.args_kwonly_full_monty(8, 9, 10) == (8, 9, 10.0, (), 42, {})
+    assert m.args_kwonly_full_monty(3, 4, 5, 6, 7, m=8, z=9) == (
+        3,
+        4,
+        5.0,
+        (
+            6,
+            7,
+        ),
+        9,
+        {"m": 8},
+    )
+    assert m.args_kwonly_full_monty(3, 4, 5, 6, 7, m=8, z=9) == (
+        3,
+        4,
+        5.0,
+        (
+            6,
+            7,
+        ),
+        9,
+        {"m": 8},
+    )
+    assert m.args_kwonly_full_monty(5, j=7, m=8, z=9) == (5, 2, 7.0, (), 9, {"m": 8})
+    assert m.args_kwonly_full_monty(i=5, j=7, m=8, z=9) == (
+        1,
+        2,
+        7.0,
+        (),
+        9,
+        {"i": 5, "m": 8},
+    )
+
+    # pos_only at the beginning of the argument list was "broken" in how it was displayed (though
+    # this is fairly useless in practice).  Related to:
+    # https://github.com/pybind/pybind11/pull/3402#issuecomment-963341987
+    assert (
+        m.first_arg_kw_only.pos_only.__doc__
+        == "pos_only(self: pybind11_tests.kwargs_and_defaults.first_arg_kw_only, /, i: int, j: int) -> None\n"  # noqa: E501 line too long
+    )
+
 
 def test_signatures():
     assert "kw_only_all(*, i: int, j: int) -> tuple\n" == m.kw_only_all.__doc__
@@ -234,7 +341,6 @@ def test_signatures():
     )
 
 
-@pytest.mark.xfail("env.PYPY and env.PY2", reason="PyPy2 doesn't double count")
 def test_args_refcount():
     """Issue/PR #1216 - py::args elements get double-inc_ref()ed when combined with regular
     arguments"""
diff --git a/ext/pybind11/tests/test_local_bindings.cpp b/ext/pybind11/tests/test_local_bindings.cpp
index a5808e2f2a..1373677447 100644
--- a/ext/pybind11/tests/test_local_bindings.cpp
+++ b/ext/pybind11/tests/test_local_bindings.cpp
@@ -8,12 +8,12 @@
     BSD-style license that can be found in the LICENSE file.
 */
 
-#include "pybind11_tests.h"
-#include "local_bindings.h"
-
 #include <pybind11/stl.h>
 #include <pybind11/stl_bind.h>
 
+#include "local_bindings.h"
+#include "pybind11_tests.h"
+
 #include <numeric>
 #include <utility>
 
@@ -24,9 +24,9 @@ TEST_SUBMODULE(local_bindings, m) {
 
     // test_local_bindings
     // Register a class with py::module_local:
-    bind_local<LocalType, -1>(m, "LocalType", py::module_local())
-        .def("get3", [](LocalType &t) { return t.i + 3; })
-        ;
+    bind_local<LocalType, -1>(m, "LocalType", py::module_local()).def("get3", [](LocalType &t) {
+        return t.i + 3;
+    });
 
     m.def("local_value", [](LocalType &l) { return l.i; });
 
@@ -35,20 +35,21 @@ TEST_SUBMODULE(local_bindings, m) {
     // one, in pybind11_cross_module_tests.cpp, is designed to fail):
     bind_local<NonLocalType, 0>(m, "NonLocalType")
         .def(py::init<int>())
-        .def("get", [](LocalType &i) { return i.i; })
-        ;
+        .def("get", [](LocalType &i) { return i.i; });
 
     // test_duplicate_local
-    // py::module_local declarations should be visible across compilation units that get linked together;
-    // this tries to register a duplicate local.  It depends on a definition in test_class.cpp and
-    // should raise a runtime error from the duplicate definition attempt.  If test_class isn't
-    // available it *also* throws a runtime error (with "test_class not enabled" as value).
+    // py::module_local declarations should be visible across compilation units that get linked
+    // together; this tries to register a duplicate local.  It depends on a definition in
+    // test_class.cpp and should raise a runtime error from the duplicate definition attempt.  If
+    // test_class isn't available it *also* throws a runtime error (with "test_class not enabled"
+    // as value).
     m.def("register_local_external", [m]() {
         auto main = py::module_::import("pybind11_tests");
         if (py::hasattr(main, "class_")) {
             bind_local<LocalExternal, 7>(m, "LocalExternal", py::module_local());
+        } else {
+            throw std::runtime_error("test_class not enabled");
         }
-        else throw std::runtime_error("test_class not enabled");
     });
 
     // test_stl_bind_local
@@ -78,12 +79,12 @@ TEST_SUBMODULE(local_bindings, m) {
     m.def("get_mixed_lg", [](int i) { return MixedLocalGlobal(i); });
 
     // test_internal_locals_differ
-    m.def("local_cpp_types_addr", []() { return (uintptr_t) &py::detail::get_local_internals().registered_types_cpp; });
+    m.def("local_cpp_types_addr",
+          []() { return (uintptr_t) &py::detail::get_local_internals().registered_types_cpp; });
 
     // test_stl_caster_vs_stl_bind
-    m.def("load_vector_via_caster", [](std::vector<int> v) {
-        return std::accumulate(v.begin(), v.end(), 0);
-    });
+    m.def("load_vector_via_caster",
+          [](std::vector<int> v) { return std::accumulate(v.begin(), v.end(), 0); });
 
     // test_cross_module_calls
     m.def("return_self", [](LocalVec *v) { return v; });
@@ -93,11 +94,9 @@ TEST_SUBMODULE(local_bindings, m) {
     public:
         explicit Cat(std::string name) : Pet(std::move(name)) {}
     };
-    py::class_<pets::Pet>(m, "Pet", py::module_local())
-        .def("get_name", &pets::Pet::name);
+    py::class_<pets::Pet>(m, "Pet", py::module_local()).def("get_name", &pets::Pet::name);
     // Binding for local extending class:
-    py::class_<Cat, pets::Pet>(m, "Cat")
-        .def(py::init<std::string>());
+    py::class_<Cat, pets::Pet>(m, "Cat").def(py::init<std::string>());
     m.def("pet_name", [](pets::Pet &p) { return p.name(); });
 
     py::class_<MixGL>(m, "MixGL").def(py::init<int>());
diff --git a/ext/pybind11/tests/test_local_bindings.py b/ext/pybind11/tests/test_local_bindings.py
index 52b1b63358..654d96d490 100644
--- a/ext/pybind11/tests/test_local_bindings.py
+++ b/ext/pybind11/tests/test_local_bindings.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 import pytest
 
 import env  # noqa: F401
@@ -200,7 +199,7 @@ def test_stl_caster_vs_stl_bind(msg):
         1. (arg0: pybind11_cross_module_tests.VectorInt) -> int
 
     Invoked with: [1, 2, 3]
-    """  # noqa: E501 line too long
+    """
     )
 
 
diff --git a/ext/pybind11/tests/test_methods_and_attributes.cpp b/ext/pybind11/tests/test_methods_and_attributes.cpp
index 2d303a44e3..815dd5e98a 100644
--- a/ext/pybind11/tests/test_methods_and_attributes.cpp
+++ b/ext/pybind11/tests/test_methods_and_attributes.cpp
@@ -8,8 +8,8 @@
     BSD-style license that can be found in the LICENSE file.
 */
 
-#include "pybind11_tests.h"
 #include "constructor_stats.h"
+#include "pybind11_tests.h"
 
 #if !defined(PYBIND11_OVERLOAD_CAST)
 template <typename... Args>
@@ -27,7 +27,10 @@ public:
 
     std::string toString() const { return "ExampleMandA[value=" + std::to_string(value) + "]"; }
 
-    void operator=(const ExampleMandA &e) { print_copy_assigned(this); value = e.value; }
+    void operator=(const ExampleMandA &e) {
+        print_copy_assigned(this);
+        value = e.value;
+    }
     void operator=(ExampleMandA &&e) noexcept {
         print_move_assigned(this);
         value = e.value;
@@ -40,37 +43,37 @@ public:
     void add4(ExampleMandA *other) { value += other->value; }       // passing by pointer
     void add5(const ExampleMandA *other) { value += other->value; } // passing by const pointer
 
-    void add6(int other) { value += other; }                        // passing by value
-    void add7(int &other) { value += other; }                       // passing by reference
-    void add8(const int &other) { value += other; }                 // passing by const reference
+    void add6(int other) { value += other; }        // passing by value
+    void add7(int &other) { value += other; }       // passing by reference
+    void add8(const int &other) { value += other; } // passing by const reference
     // NOLINTNEXTLINE(readability-non-const-parameter) Deliberately non-const for testing
-    void add9(int *other) { value += *other; }                      // passing by pointer
-    void add10(const int *other) { value += *other; }               // passing by const pointer
+    void add9(int *other) { value += *other; }        // passing by pointer
+    void add10(const int *other) { value += *other; } // passing by const pointer
 
-    void consume_str(std::string&&) {}
+    void consume_str(std::string &&) {}
 
-    ExampleMandA self1() { return *this; }                          // return by value
-    ExampleMandA &self2() { return *this; }                         // return by reference
-    const ExampleMandA &self3() const { return *this; }             // return by const reference
-    ExampleMandA *self4() { return this; }                          // return by pointer
-    const ExampleMandA *self5() const { return this; }              // return by const pointer
+    ExampleMandA self1() { return *this; }              // return by value
+    ExampleMandA &self2() { return *this; }             // return by reference
+    const ExampleMandA &self3() const { return *this; } // return by const reference
+    ExampleMandA *self4() { return this; }              // return by pointer
+    const ExampleMandA *self5() const { return this; }  // return by const pointer
 
-    int internal1() const { return value; }                         // return by value
-    int &internal2() { return value; }                              // return by reference
-    const int &internal3() const { return value; }                  // return by const reference
-    int *internal4() { return &value; }                             // return by pointer
-    const int *internal5() { return &value; }                       // return by const pointer
+    int internal1() const { return value; }        // return by value
+    int &internal2() { return value; }             // return by reference
+    const int &internal3() const { return value; } // return by const reference
+    int *internal4() { return &value; }            // return by pointer
+    const int *internal5() { return &value; }      // return by const pointer
 
-    py::str overloaded()             { return "()"; }
-    py::str overloaded(int)          { return "(int)"; }
-    py::str overloaded(int, float)   { return "(int, float)"; }
-    py::str overloaded(float, int)   { return "(float, int)"; }
-    py::str overloaded(int, int)     { return "(int, int)"; }
+    py::str overloaded() { return "()"; }
+    py::str overloaded(int) { return "(int)"; }
+    py::str overloaded(int, float) { return "(int, float)"; }
+    py::str overloaded(float, int) { return "(float, int)"; }
+    py::str overloaded(int, int) { return "(int, int)"; }
     py::str overloaded(float, float) { return "(float, float)"; }
-    py::str overloaded(int)          const { return "(int) const"; }
-    py::str overloaded(int, float)   const { return "(int, float) const"; }
-    py::str overloaded(float, int)   const { return "(float, int) const"; }
-    py::str overloaded(int, int)     const { return "(int, int) const"; }
+    py::str overloaded(int) const { return "(int) const"; }
+    py::str overloaded(int, float) const { return "(int, float) const"; }
+    py::str overloaded(float, int) const { return "(float, int) const"; }
+    py::str overloaded(int, int) const { return "(int, int) const"; }
     py::str overloaded(float, float) const { return "(float, float) const"; }
 
     static py::str overloaded(float) { return "static float"; }
@@ -112,7 +115,10 @@ UserType TestPropRVP::sv1(1);
 UserType TestPropRVP::sv2(1);
 
 // Test None-allowed py::arg argument policy
-class NoneTester { public: int answer = 42; };
+class NoneTester {
+public:
+    int answer = 42;
+};
 int none1(const NoneTester &obj) { return obj.answer; }
 int none2(NoneTester *obj) { return obj ? obj->answer : -1; }
 int none3(std::shared_ptr<NoneTester> &obj) { return obj ? obj->answer : -1; }
@@ -134,11 +140,15 @@ struct StrIssue {
     explicit StrIssue(int i) : val{i} {}
 };
 
-// Issues #854, #910: incompatible function args when member function/pointer is in unregistered base class
+// Issues #854, #910: incompatible function args when member function/pointer is in unregistered
+// base class
 class UnregisteredBase {
 public:
     void do_nothing() const {}
-    void increase_value() { rw_value++; ro_value += 0.25; }
+    void increase_value() {
+        rw_value++;
+        ro_value += 0.25;
+    }
     void set_int(int v) { rw_value = v; }
     int get_int() const { return rw_value; }
     double get_double() const { return ro_value; }
@@ -159,13 +169,21 @@ struct RefQualified {
     int constRefQualified(int other) const & { return value + other; }
 };
 
+// Test rvalue ref param
+struct RValueRefParam {
+    std::size_t func1(std::string &&s) { return s.size(); }
+    std::size_t func2(std::string &&s) const { return s.size(); }
+    std::size_t func3(std::string &&s) & { return s.size(); }
+    std::size_t func4(std::string &&s) const & { return s.size(); }
+};
+
 TEST_SUBMODULE(methods_and_attributes, m) {
     // test_methods_and_attributes
     py::class_<ExampleMandA> emna(m, "ExampleMandA");
     emna.def(py::init<>())
         .def(py::init<int>())
-        .def(py::init<std::string&&>())
-        .def(py::init<const ExampleMandA&>())
+        .def(py::init<std::string &&>())
+        .def(py::init<const ExampleMandA &>())
         .def("add1", &ExampleMandA::add1)
         .def("add2", &ExampleMandA::add2)
         .def("add3", &ExampleMandA::add3)
@@ -190,16 +208,20 @@ TEST_SUBMODULE(methods_and_attributes, m) {
 #if defined(PYBIND11_OVERLOAD_CAST)
         .def("overloaded", py::overload_cast<>(&ExampleMandA::overloaded))
         .def("overloaded", py::overload_cast<int>(&ExampleMandA::overloaded))
-        .def("overloaded", py::overload_cast<int,   float>(&ExampleMandA::overloaded))
-        .def("overloaded", py::overload_cast<float,   int>(&ExampleMandA::overloaded))
-        .def("overloaded", py::overload_cast<int,     int>(&ExampleMandA::overloaded))
+        .def("overloaded", py::overload_cast<int, float>(&ExampleMandA::overloaded))
+        .def("overloaded", py::overload_cast<float, int>(&ExampleMandA::overloaded))
+        .def("overloaded", py::overload_cast<int, int>(&ExampleMandA::overloaded))
         .def("overloaded", py::overload_cast<float, float>(&ExampleMandA::overloaded))
         .def("overloaded_float", py::overload_cast<float, float>(&ExampleMandA::overloaded))
-        .def("overloaded_const", py::overload_cast<int         >(&ExampleMandA::overloaded, py::const_))
-        .def("overloaded_const", py::overload_cast<int,   float>(&ExampleMandA::overloaded, py::const_))
-        .def("overloaded_const", py::overload_cast<float,   int>(&ExampleMandA::overloaded, py::const_))
-        .def("overloaded_const", py::overload_cast<int,     int>(&ExampleMandA::overloaded, py::const_))
-        .def("overloaded_const", py::overload_cast<float, float>(&ExampleMandA::overloaded, py::const_))
+        .def("overloaded_const", py::overload_cast<int>(&ExampleMandA::overloaded, py::const_))
+        .def("overloaded_const",
+             py::overload_cast<int, float>(&ExampleMandA::overloaded, py::const_))
+        .def("overloaded_const",
+             py::overload_cast<float, int>(&ExampleMandA::overloaded, py::const_))
+        .def("overloaded_const",
+             py::overload_cast<int, int>(&ExampleMandA::overloaded, py::const_))
+        .def("overloaded_const",
+             py::overload_cast<float, float>(&ExampleMandA::overloaded, py::const_))
 #else
         // Use both the traditional static_cast method and the C++11 compatible overload_cast_
         .def("overloaded", overload_cast_<>()(&ExampleMandA::overloaded))
@@ -217,16 +239,29 @@ TEST_SUBMODULE(methods_and_attributes, m) {
 #endif
         // test_no_mixed_overloads
         // Raise error if trying to mix static/non-static overloads on the same name:
-        .def_static("add_mixed_overloads1", []() {
-            auto emna = py::reinterpret_borrow<py::class_<ExampleMandA>>(py::module_::import("pybind11_tests.methods_and_attributes").attr("ExampleMandA"));
-            emna.def       ("overload_mixed1", static_cast<py::str (ExampleMandA::*)(int, int)>(&ExampleMandA::overloaded))
-                .def_static("overload_mixed1", static_cast<py::str (              *)(float   )>(&ExampleMandA::overloaded));
-        })
-        .def_static("add_mixed_overloads2", []() {
-            auto emna = py::reinterpret_borrow<py::class_<ExampleMandA>>(py::module_::import("pybind11_tests.methods_and_attributes").attr("ExampleMandA"));
-            emna.def_static("overload_mixed2", static_cast<py::str (              *)(float   )>(&ExampleMandA::overloaded))
-                .def       ("overload_mixed2", static_cast<py::str (ExampleMandA::*)(int, int)>(&ExampleMandA::overloaded));
-        })
+        .def_static("add_mixed_overloads1",
+                    []() {
+                        auto emna = py::reinterpret_borrow<py::class_<ExampleMandA>>(
+                            py::module_::import("pybind11_tests.methods_and_attributes")
+                                .attr("ExampleMandA"));
+                        emna.def("overload_mixed1",
+                                 static_cast<py::str (ExampleMandA::*)(int, int)>(
+                                     &ExampleMandA::overloaded))
+                            .def_static(
+                                "overload_mixed1",
+                                static_cast<py::str (*)(float)>(&ExampleMandA::overloaded));
+                    })
+        .def_static("add_mixed_overloads2",
+                    []() {
+                        auto emna = py::reinterpret_borrow<py::class_<ExampleMandA>>(
+                            py::module_::import("pybind11_tests.methods_and_attributes")
+                                .attr("ExampleMandA"));
+                        emna.def_static("overload_mixed2",
+                                        static_cast<py::str (*)(float)>(&ExampleMandA::overloaded))
+                            .def("overload_mixed2",
+                                 static_cast<py::str (ExampleMandA::*)(int, int)>(
+                                     &ExampleMandA::overloaded));
+                    })
         .def("__str__", &ExampleMandA::toString)
         .def_readwrite("value", &ExampleMandA::value);
 
@@ -299,7 +334,7 @@ TEST_SUBMODULE(methods_and_attributes, m) {
                                       [](const py::object &) { return UserType(1); });
 
     // test_metaclass_override
-    struct MetaclassOverride { };
+    struct MetaclassOverride {};
     py::class_<MetaclassOverride>(m, "MetaclassOverride", py::metaclass((PyObject *) &PyType_Type))
         .def_property_readonly_static("readonly", [](const py::object &) { return 1; });
 
@@ -307,45 +342,51 @@ TEST_SUBMODULE(methods_and_attributes, m) {
     m.def("overload_order", [](const std::string &) { return 1; });
     m.def("overload_order", [](const std::string &) { return 2; });
     m.def("overload_order", [](int) { return 3; });
-    m.def("overload_order", [](int) { return 4; }, py::prepend{});
+    m.def(
+        "overload_order", [](int) { return 4; }, py::prepend{});
 
 #if !defined(PYPY_VERSION)
     // test_dynamic_attributes
     class DynamicClass {
     public:
         DynamicClass() { print_default_created(this); }
-        DynamicClass(const DynamicClass&) = delete;
+        DynamicClass(const DynamicClass &) = delete;
         ~DynamicClass() { print_destroyed(this); }
     };
-    py::class_<DynamicClass>(m, "DynamicClass", py::dynamic_attr())
-        .def(py::init());
+    py::class_<DynamicClass>(m, "DynamicClass", py::dynamic_attr()).def(py::init());
 
-    class CppDerivedDynamicClass : public DynamicClass { };
-    py::class_<CppDerivedDynamicClass, DynamicClass>(m, "CppDerivedDynamicClass")
-        .def(py::init());
+    class CppDerivedDynamicClass : public DynamicClass {};
+    py::class_<CppDerivedDynamicClass, DynamicClass>(m, "CppDerivedDynamicClass").def(py::init());
 #endif
 
     // test_bad_arg_default
     // Issue/PR #648: bad arg default debugging output
-#if !defined(NDEBUG)
-    m.attr("debug_enabled") = true;
+#if defined(PYBIND11_DETAILED_ERROR_MESSAGES)
+    m.attr("detailed_error_messages_enabled") = true;
 #else
-    m.attr("debug_enabled") = false;
+    m.attr("detailed_error_messages_enabled") = false;
 #endif
-    m.def("bad_arg_def_named", []{
+    m.def("bad_arg_def_named", [] {
         auto m = py::module_::import("pybind11_tests");
-        m.def("should_fail", [](int, UnregisteredType) {}, py::arg(), py::arg("a") = UnregisteredType());
+        m.def(
+            "should_fail",
+            [](int, UnregisteredType) {},
+            py::arg(),
+            py::arg("a") = UnregisteredType());
     });
-    m.def("bad_arg_def_unnamed", []{
+    m.def("bad_arg_def_unnamed", [] {
         auto m = py::module_::import("pybind11_tests");
-        m.def("should_fail", [](int, UnregisteredType) {}, py::arg(), py::arg() = UnregisteredType());
+        m.def(
+            "should_fail",
+            [](int, UnregisteredType) {},
+            py::arg(),
+            py::arg() = UnregisteredType());
     });
 
     // [workaround(intel)] ICC 20/21 breaks with py::arg().stuff, using py::arg{}.stuff works.
 
     // test_accepts_none
-    py::class_<NoneTester, std::shared_ptr<NoneTester>>(m, "NoneTester")
-        .def(py::init<>());
+    py::class_<NoneTester, std::shared_ptr<NoneTester>>(m, "NoneTester").def(py::init<>());
     m.def("no_none1", &none1, py::arg{}.none(false));
     m.def("no_none2", &none2, py::arg{}.none(false));
     m.def("no_none3", &none3, py::arg{}.none(false));
@@ -363,21 +404,19 @@ TEST_SUBMODULE(methods_and_attributes, m) {
     // test_casts_none
     // Issue #2778: implicit casting from None to object (not pointer)
     py::class_<NoneCastTester>(m, "NoneCastTester")
-          .def(py::init<>())
-          .def(py::init<int>())
-          .def(py::init([](py::none const&) { return NoneCastTester{}; }));
+        .def(py::init<>())
+        .def(py::init<int>())
+        .def(py::init([](py::none const &) { return NoneCastTester{}; }));
     py::implicitly_convertible<py::none, NoneCastTester>();
-    m.def("ok_obj_or_none", [](NoneCastTester const& foo) { return foo.answer; });
-
+    m.def("ok_obj_or_none", [](NoneCastTester const &foo) { return foo.answer; });
 
     // test_str_issue
     // Issue #283: __str__ called on uninitialized instance when constructor arguments invalid
     py::class_<StrIssue>(m, "StrIssue")
         .def(py::init<int>())
         .def(py::init<>())
-        .def("__str__", [](const StrIssue &si) {
-            return "StrIssue[" + std::to_string(si.val) + "]"; }
-        );
+        .def("__str__",
+             [](const StrIssue &si) { return "StrIssue[" + std::to_string(si.val) + "]"; });
 
     // test_unregistered_base_implementations
     //
@@ -400,7 +439,8 @@ TEST_SUBMODULE(methods_and_attributes, m) {
         // This one is in the registered class:
         .def("sum", &RegisteredDerived::sum);
 
-    using Adapted = decltype(py::method_adaptor<RegisteredDerived>(&RegisteredDerived::do_nothing));
+    using Adapted
+        = decltype(py::method_adaptor<RegisteredDerived>(&RegisteredDerived::do_nothing));
     static_assert(std::is_same<Adapted, void (RegisteredDerived::*)() const>::value, "");
 
     // test_methods_and_attributes
@@ -409,4 +449,11 @@ TEST_SUBMODULE(methods_and_attributes, m) {
         .def_readonly("value", &RefQualified::value)
         .def("refQualified", &RefQualified::refQualified)
         .def("constRefQualified", &RefQualified::constRefQualified);
+
+    py::class_<RValueRefParam>(m, "RValueRefParam")
+        .def(py::init<>())
+        .def("func1", &RValueRefParam::func1)
+        .def("func2", &RValueRefParam::func2)
+        .def("func3", &RValueRefParam::func3)
+        .def("func4", &RValueRefParam::func4);
 }
diff --git a/ext/pybind11/tests/test_methods_and_attributes.py b/ext/pybind11/tests/test_methods_and_attributes.py
index 866b3cea18..0a2ae1239a 100644
--- a/ext/pybind11/tests/test_methods_and_attributes.py
+++ b/ext/pybind11/tests/test_methods_and_attributes.py
@@ -1,10 +1,21 @@
-# -*- coding: utf-8 -*-
+import sys
+
 import pytest
 
 import env  # noqa: F401
 from pybind11_tests import ConstructorStats
 from pybind11_tests import methods_and_attributes as m
 
+NO_GETTER_MSG = (
+    "unreadable attribute" if sys.version_info < (3, 11) else "object has no getter"
+)
+NO_SETTER_MSG = (
+    "can't set attribute" if sys.version_info < (3, 11) else "object has no setter"
+)
+NO_DELETER_MSG = (
+    "can't delete attribute" if sys.version_info < (3, 11) else "object has no deleter"
+)
+
 
 def test_methods_and_attributes():
     instance1 = m.ExampleMandA()
@@ -103,32 +114,32 @@ def test_properties():
 
     with pytest.raises(AttributeError) as excinfo:
         dummy = instance.def_property_writeonly  # unused var
-    assert "unreadable attribute" in str(excinfo.value)
+    assert NO_GETTER_MSG in str(excinfo.value)
 
     instance.def_property_writeonly = 4
     assert instance.def_property_readonly == 4
 
     with pytest.raises(AttributeError) as excinfo:
         dummy = instance.def_property_impossible  # noqa: F841 unused var
-    assert "unreadable attribute" in str(excinfo.value)
+    assert NO_GETTER_MSG in str(excinfo.value)
 
     with pytest.raises(AttributeError) as excinfo:
         instance.def_property_impossible = 5
-    assert "can't set attribute" in str(excinfo.value)
+    assert NO_SETTER_MSG in str(excinfo.value)
 
 
 def test_static_properties():
     assert m.TestProperties.def_readonly_static == 1
     with pytest.raises(AttributeError) as excinfo:
         m.TestProperties.def_readonly_static = 2
-    assert "can't set attribute" in str(excinfo.value)
+    assert NO_SETTER_MSG in str(excinfo.value)
 
     m.TestProperties.def_readwrite_static = 2
     assert m.TestProperties.def_readwrite_static == 2
 
     with pytest.raises(AttributeError) as excinfo:
         dummy = m.TestProperties.def_writeonly_static  # unused var
-    assert "unreadable attribute" in str(excinfo.value)
+    assert NO_GETTER_MSG in str(excinfo.value)
 
     m.TestProperties.def_writeonly_static = 3
     assert m.TestProperties.def_readonly_static == 3
@@ -136,14 +147,14 @@ def test_static_properties():
     assert m.TestProperties.def_property_readonly_static == 3
     with pytest.raises(AttributeError) as excinfo:
         m.TestProperties.def_property_readonly_static = 99
-    assert "can't set attribute" in str(excinfo.value)
+    assert NO_SETTER_MSG in str(excinfo.value)
 
     m.TestProperties.def_property_static = 4
     assert m.TestProperties.def_property_static == 4
 
     with pytest.raises(AttributeError) as excinfo:
         dummy = m.TestProperties.def_property_writeonly_static
-    assert "unreadable attribute" in str(excinfo.value)
+    assert NO_GETTER_MSG in str(excinfo.value)
 
     m.TestProperties.def_property_writeonly_static = 5
     assert m.TestProperties.def_property_static == 5
@@ -161,7 +172,7 @@ def test_static_properties():
 
     with pytest.raises(AttributeError) as excinfo:
         dummy = instance.def_property_writeonly_static  # noqa: F841 unused var
-    assert "unreadable attribute" in str(excinfo.value)
+    assert NO_GETTER_MSG in str(excinfo.value)
 
     instance.def_property_writeonly_static = 4
     assert instance.def_property_static == 4
@@ -181,7 +192,7 @@ def test_static_properties():
     properties_override = m.TestPropertiesOverride()
     with pytest.raises(AttributeError) as excinfo:
         del properties_override.def_readonly
-    assert "can't delete attribute" in str(excinfo.value)
+    assert NO_DELETER_MSG in str(excinfo.value)
 
 
 def test_static_cls():
@@ -217,15 +228,15 @@ def test_metaclass_override():
 
 
 def test_no_mixed_overloads():
-    from pybind11_tests import debug_enabled
+    from pybind11_tests import detailed_error_messages_enabled
 
     with pytest.raises(RuntimeError) as excinfo:
         m.ExampleMandA.add_mixed_overloads1()
     assert str(
         excinfo.value
     ) == "overloading a method with both static and instance methods is not supported; " + (
-        "compile in debug mode for more details"
-        if not debug_enabled
+        "#define PYBIND11_DETAILED_ERROR_MESSAGES or compile in debug mode for more details"
+        if not detailed_error_messages_enabled
         else "error while attempting to bind static method ExampleMandA.overload_mixed1"
         "(arg0: float) -> str"
     )
@@ -235,8 +246,8 @@ def test_no_mixed_overloads():
     assert str(
         excinfo.value
     ) == "overloading a method with both static and instance methods is not supported; " + (
-        "compile in debug mode for more details"
-        if not debug_enabled
+        "#define PYBIND11_DETAILED_ERROR_MESSAGES or compile in debug mode for more details"
+        if not detailed_error_messages_enabled
         else "error while attempting to bind instance method ExampleMandA.overload_mixed2"
         "(self: pybind11_tests.methods_and_attributes.ExampleMandA, arg0: int, arg1: int)"
         " -> str"
@@ -345,16 +356,16 @@ def test_cyclic_gc():
 
 
 def test_bad_arg_default(msg):
-    from pybind11_tests import debug_enabled
+    from pybind11_tests import detailed_error_messages_enabled
 
     with pytest.raises(RuntimeError) as excinfo:
         m.bad_arg_def_named()
     assert msg(excinfo.value) == (
         "arg(): could not convert default argument 'a: UnregisteredType' in function "
         "'should_fail' into a Python object (type not registered yet?)"
-        if debug_enabled
+        if detailed_error_messages_enabled
         else "arg(): could not convert default argument into a Python object (type not registered "
-        "yet?). Compile in debug mode for more information."
+        "yet?). #define PYBIND11_DETAILED_ERROR_MESSAGES or compile in debug mode for more information."
     )
 
     with pytest.raises(RuntimeError) as excinfo:
@@ -362,9 +373,9 @@ def test_bad_arg_default(msg):
     assert msg(excinfo.value) == (
         "arg(): could not convert default argument 'UnregisteredType' in function "
         "'should_fail' into a Python object (type not registered yet?)"
-        if debug_enabled
+        if detailed_error_messages_enabled
         else "arg(): could not convert default argument into a Python object (type not registered "
-        "yet?). Compile in debug mode for more information."
+        "yet?). #define PYBIND11_DETAILED_ERROR_MESSAGES or compile in debug mode for more information."
     )
 
 
@@ -494,24 +505,23 @@ def test_overload_ordering():
     assert m.overload_order("string") == 1
     assert m.overload_order(0) == 4
 
-    # Different for Python 2 vs. 3
-    uni_name = type(u"").__name__
-
     assert "1. overload_order(arg0: int) -> int" in m.overload_order.__doc__
-    assert (
-        "2. overload_order(arg0: {}) -> int".format(uni_name)
-        in m.overload_order.__doc__
-    )
-    assert (
-        "3. overload_order(arg0: {}) -> int".format(uni_name)
-        in m.overload_order.__doc__
-    )
+    assert "2. overload_order(arg0: str) -> int" in m.overload_order.__doc__
+    assert "3. overload_order(arg0: str) -> int" in m.overload_order.__doc__
     assert "4. overload_order(arg0: int) -> int" in m.overload_order.__doc__
 
     with pytest.raises(TypeError) as err:
         m.overload_order(1.1)
 
     assert "1. (arg0: int) -> int" in str(err.value)
-    assert "2. (arg0: {}) -> int".format(uni_name) in str(err.value)
-    assert "3. (arg0: {}) -> int".format(uni_name) in str(err.value)
+    assert "2. (arg0: str) -> int" in str(err.value)
+    assert "3. (arg0: str) -> int" in str(err.value)
     assert "4. (arg0: int) -> int" in str(err.value)
+
+
+def test_rvalue_ref_param():
+    r = m.RValueRefParam()
+    assert r.func1("123") == 3
+    assert r.func2("1234") == 4
+    assert r.func3("12345") == 5
+    assert r.func4("123456") == 6
diff --git a/ext/pybind11/tests/test_modules.cpp b/ext/pybind11/tests/test_modules.cpp
index ce61c1a25c..18a7ec74cc 100644
--- a/ext/pybind11/tests/test_modules.cpp
+++ b/ext/pybind11/tests/test_modules.cpp
@@ -8,8 +8,8 @@
     BSD-style license that can be found in the LICENSE file.
 */
 
-#include "pybind11_tests.h"
 #include "constructor_stats.h"
+#include "pybind11_tests.h"
 
 TEST_SUBMODULE(modules, m) {
     // test_nested_modules
@@ -22,23 +22,30 @@ TEST_SUBMODULE(modules, m) {
     public:
         explicit A(int v) : v(v) { print_created(this, v); }
         ~A() { print_destroyed(this); }
-        A(const A&) { print_copy_created(this); }
-        A& operator=(const A &copy) { print_copy_assigned(this); v = copy.v; return *this; }
+        A(const A &) { print_copy_created(this); }
+        A &operator=(const A &copy) {
+            print_copy_assigned(this);
+            v = copy.v;
+            return *this;
+        }
         std::string toString() const { return "A[" + std::to_string(v) + "]"; }
 
     private:
         int v;
     };
-    py::class_<A>(m_sub, "A")
-        .def(py::init<int>())
-        .def("__repr__", &A::toString);
+    py::class_<A>(m_sub, "A").def(py::init<int>()).def("__repr__", &A::toString);
 
     class B {
     public:
         B() { print_default_created(this); }
         ~B() { print_destroyed(this); }
-        B(const B&) { print_copy_created(this); }
-        B& operator=(const B &copy) { print_copy_assigned(this); a1 = copy.a1; a2 = copy.a2; return *this; }
+        B(const B &) { print_copy_created(this); }
+        B &operator=(const B &copy) {
+            print_copy_assigned(this);
+            a1 = copy.a1;
+            a2 = copy.a2;
+            return *this;
+        }
         A &get_a1() { return a1; }
         A &get_a2() { return a2; }
 
@@ -47,9 +54,16 @@ TEST_SUBMODULE(modules, m) {
     };
     py::class_<B>(m_sub, "B")
         .def(py::init<>())
-        .def("get_a1", &B::get_a1, "Return the internal A 1", py::return_value_policy::reference_internal)
-        .def("get_a2", &B::get_a2, "Return the internal A 2", py::return_value_policy::reference_internal)
-        .def_readwrite("a1", &B::a1)  // def_readonly uses an internal reference return policy by default
+        .def("get_a1",
+             &B::get_a1,
+             "Return the internal A 1",
+             py::return_value_policy::reference_internal)
+        .def("get_a2",
+             &B::get_a2,
+             "Return the internal A 2",
+             py::return_value_policy::reference_internal)
+        .def_readwrite("a1", &B::a1) // def_readonly uses an internal
+                                     // reference return policy by default
         .def_readwrite("a2", &B::a2);
 
     // This is intentionally "py::module" to verify it still can be used in place of "py::module_"
@@ -58,13 +72,14 @@ TEST_SUBMODULE(modules, m) {
     // test_duplicate_registration
     // Registering two things with the same name
     m.def("duplicate_registration", []() {
-        class Dupe1 { };
-        class Dupe2 { };
-        class Dupe3 { };
-        class DupeException { };
+        class Dupe1 {};
+        class Dupe2 {};
+        class Dupe3 {};
+        class DupeException {};
 
         // Go ahead and leak, until we have a non-leaking py::module_ constructor
-        auto dm = py::module_::create_extension_module("dummy", nullptr, new py::module_::module_def);
+        auto dm
+            = py::module_::create_extension_module("dummy", nullptr, new py::module_::module_def);
         auto failures = py::list();
 
         py::class_<Dupe1>(dm, "Dupe1");
@@ -75,28 +90,36 @@ TEST_SUBMODULE(modules, m) {
         try {
             py::class_<Dupe1>(dm, "Dupe1");
             failures.append("Dupe1 class");
-        } catch (std::runtime_error &) {}
+        } catch (std::runtime_error &) {
+        }
         try {
             dm.def("Dupe1", []() { return Dupe1(); });
             failures.append("Dupe1 function");
-        } catch (std::runtime_error &) {}
+        } catch (std::runtime_error &) {
+        }
         try {
             py::class_<Dupe3>(dm, "dupe1_factory");
             failures.append("dupe1_factory");
-        } catch (std::runtime_error &) {}
+        } catch (std::runtime_error &) {
+        }
         try {
             py::exception<Dupe3>(dm, "Dupe2");
             failures.append("Dupe2");
-        } catch (std::runtime_error &) {}
+        } catch (std::runtime_error &) {
+        }
         try {
             dm.def("DupeException", []() { return 30; });
             failures.append("DupeException1");
-        } catch (std::runtime_error &) {}
+        } catch (std::runtime_error &) {
+        }
         try {
             py::class_<DupeException>(dm, "DupeException");
             failures.append("DupeException2");
-        } catch (std::runtime_error &) {}
+        } catch (std::runtime_error &) {
+        }
 
         return failures;
     });
+
+    m.def("def_submodule", [](py::module_ m, const char *name) { return m.def_submodule(name); });
 }
diff --git a/ext/pybind11/tests/test_modules.py b/ext/pybind11/tests/test_modules.py
index 49e1ea5e30..e11d68e78e 100644
--- a/ext/pybind11/tests/test_modules.py
+++ b/ext/pybind11/tests/test_modules.py
@@ -1,4 +1,6 @@
-# -*- coding: utf-8 -*-
+import pytest
+
+import env
 from pybind11_tests import ConstructorStats
 from pybind11_tests import modules as m
 from pybind11_tests.modules import subsubmodule as ms
@@ -90,3 +92,30 @@ def test_builtin_key_type():
         keys = __builtins__.__dict__.keys()
 
     assert {type(k) for k in keys} == {str}
+
+
+@pytest.mark.xfail("env.PYPY", reason="PyModule_GetName()")
+def test_def_submodule_failures():
+    sm = m.def_submodule(m, b"ScratchSubModuleName")  # Using bytes to show it works.
+    assert sm.__name__ == m.__name__ + "." + "ScratchSubModuleName"
+    malformed_utf8 = b"\x80"
+    if env.PYPY:
+        # It is not worth the effort finding a trigger for a failure when running with PyPy.
+        pytest.skip("Sufficiently exercised on platforms other than PyPy.")
+    else:
+        # Meant to trigger PyModule_GetName() failure:
+        sm_name_orig = sm.__name__
+        sm.__name__ = malformed_utf8
+        try:
+            with pytest.raises(Exception):
+                # Seen with Python 3.9: SystemError: nameless module
+                # But we do not want to exercise the internals of PyModule_GetName(), which could
+                # change in future versions of Python, but a bad __name__ is very likely to cause
+                # some kind of failure indefinitely.
+                m.def_submodule(sm, b"SubSubModuleName")
+        finally:
+            # Clean up to ensure nothing gets upset by a module with an invalid __name__.
+            sm.__name__ = sm_name_orig  # Purely precautionary.
+    # Meant to trigger PyImport_AddModule() failure:
+    with pytest.raises(UnicodeDecodeError):
+        m.def_submodule(sm, malformed_utf8)
diff --git a/ext/pybind11/tests/test_multiple_inheritance.cpp b/ext/pybind11/tests/test_multiple_inheritance.cpp
index 6963197a5c..5916ae9010 100644
--- a/ext/pybind11/tests/test_multiple_inheritance.cpp
+++ b/ext/pybind11/tests/test_multiple_inheritance.cpp
@@ -8,14 +8,15 @@
     BSD-style license that can be found in the LICENSE file.
 */
 
-#include "pybind11_tests.h"
 #include "constructor_stats.h"
+#include "pybind11_tests.h"
 
 namespace {
 
 // Many bases for testing that multiple inheritance from many classes (i.e. requiring extra
 // space for holder constructed flags) works.
-template <int N> struct BaseN {
+template <int N>
+struct BaseN {
     explicit BaseN(int i) : i(i) {}
     int i;
 };
@@ -57,13 +58,23 @@ struct Base2a {
     int i;
 };
 struct Base12a : Base1a, Base2a {
-    Base12a(int i, int j) : Base1a(i), Base2a(j) { }
+    Base12a(int i, int j) : Base1a(i), Base2a(j) {}
 };
 
 // test_mi_unaligned_base
 // test_mi_base_return
-struct I801B1 { int a = 1; I801B1() = default; I801B1(const I801B1 &) = default; virtual ~I801B1() = default; };
-struct I801B2 { int b = 2; I801B2() = default; I801B2(const I801B2 &) = default; virtual ~I801B2() = default; };
+struct I801B1 {
+    int a = 1;
+    I801B1() = default;
+    I801B1(const I801B1 &) = default;
+    virtual ~I801B1() = default;
+};
+struct I801B2 {
+    int b = 2;
+    I801B2() = default;
+    I801B2(const I801B2 &) = default;
+    virtual ~I801B2() = default;
+};
 struct I801C : I801B1, I801B2 {};
 struct I801D : I801C {}; // Indirect MI
 
@@ -82,8 +93,7 @@ TEST_SUBMODULE(multiple_inheritance, m) {
         int i;
     };
     py::class_<Base1> b1(m, "Base1");
-    b1.def(py::init<int>())
-      .def("foo", &Base1::foo);
+    b1.def(py::init<int>()).def("foo", &Base1::foo);
 
     struct Base2 {
         explicit Base2(int i) : i(i) {}
@@ -91,42 +101,49 @@ TEST_SUBMODULE(multiple_inheritance, m) {
         int i;
     };
     py::class_<Base2> b2(m, "Base2");
-    b2.def(py::init<int>())
-      .def("bar", &Base2::bar);
-
+    b2.def(py::init<int>()).def("bar", &Base2::bar);
 
     // test_multiple_inheritance_cpp
     struct Base12 : Base1, Base2 {
-        Base12(int i, int j) : Base1(i), Base2(j) { }
+        Base12(int i, int j) : Base1(i), Base2(j) {}
     };
     struct MIType : Base12 {
-        MIType(int i, int j) : Base12(i, j) { }
+        MIType(int i, int j) : Base12(i, j) {}
     };
     py::class_<Base12, Base1, Base2>(m, "Base12");
-    py::class_<MIType, Base12>(m, "MIType")
-        .def(py::init<int, int>());
-
+    py::class_<MIType, Base12>(m, "MIType").def(py::init<int, int>());
 
     // test_multiple_inheritance_python_many_bases
 #define PYBIND11_BASEN(N)                                                                         \
     py::class_<BaseN<(N)>>(m, "BaseN" #N).def(py::init<int>()).def("f" #N, [](BaseN<N> &b) {      \
         return b.i + (N);                                                                         \
     })
-    PYBIND11_BASEN( 1); PYBIND11_BASEN( 2); PYBIND11_BASEN( 3); PYBIND11_BASEN( 4);
-    PYBIND11_BASEN( 5); PYBIND11_BASEN( 6); PYBIND11_BASEN( 7); PYBIND11_BASEN( 8);
-    PYBIND11_BASEN( 9); PYBIND11_BASEN(10); PYBIND11_BASEN(11); PYBIND11_BASEN(12);
-    PYBIND11_BASEN(13); PYBIND11_BASEN(14); PYBIND11_BASEN(15); PYBIND11_BASEN(16);
+    PYBIND11_BASEN(1);
+    PYBIND11_BASEN(2);
+    PYBIND11_BASEN(3);
+    PYBIND11_BASEN(4);
+    PYBIND11_BASEN(5);
+    PYBIND11_BASEN(6);
+    PYBIND11_BASEN(7);
+    PYBIND11_BASEN(8);
+    PYBIND11_BASEN(9);
+    PYBIND11_BASEN(10);
+    PYBIND11_BASEN(11);
+    PYBIND11_BASEN(12);
+    PYBIND11_BASEN(13);
+    PYBIND11_BASEN(14);
+    PYBIND11_BASEN(15);
+    PYBIND11_BASEN(16);
     PYBIND11_BASEN(17);
 
     // Uncommenting this should result in a compile time failure (MI can only be specified via
-    // template parameters because pybind has to know the types involved; see discussion in #742 for
-    // details).
-//    struct Base12v2 : Base1, Base2 {
-//        Base12v2(int i, int j) : Base1(i), Base2(j) { }
-//    };
-//    py::class_<Base12v2>(m, "Base12v2", b1, b2)
-//        .def(py::init<int, int>());
-
+    // template parameters because pybind has to know the types involved; see discussion in #742
+    // for details).
+    //    struct Base12v2 : Base1, Base2 {
+    //        Base12v2(int i, int j) : Base1(i), Base2(j) { }
+    //    };
+    //    py::class_<Base12v2>(m, "Base12v2", b1, b2)
+    //        .def(py::init<int, int>());
 
     // test_multiple_inheritance_virtbase
     // Test the case where not all base classes are specified, and where pybind11 requires the
@@ -139,8 +156,8 @@ TEST_SUBMODULE(multiple_inheritance, m) {
         .def(py::init<int>())
         .def("bar", &Base2a::bar);
 
-    py::class_<Base12a, /* Base1 missing */ Base2a,
-               std::shared_ptr<Base12a>>(m, "Base12a", py::multiple_inheritance())
+    py::class_<Base12a, /* Base1 missing */ Base2a, std::shared_ptr<Base12a>>(
+        m, "Base12a", py::multiple_inheritance())
         .def(py::init<int, int>());
 
     m.def("bar_base2a", [](Base2a *b) { return b->bar(); });
@@ -150,11 +167,18 @@ TEST_SUBMODULE(multiple_inheritance, m) {
     // test_mi_base_return
     // Issue #801: invalid casting to derived type with MI bases
     // Unregistered classes:
-    struct I801B3 { int c = 3; virtual ~I801B3() = default; };
+    struct I801B3 {
+        int c = 3;
+        virtual ~I801B3() = default;
+    };
     struct I801E : I801B3, I801D {};
 
-    py::class_<I801B1, std::shared_ptr<I801B1>>(m, "I801B1").def(py::init<>()).def_readonly("a", &I801B1::a);
-    py::class_<I801B2, std::shared_ptr<I801B2>>(m, "I801B2").def(py::init<>()).def_readonly("b", &I801B2::b);
+    py::class_<I801B1, std::shared_ptr<I801B1>>(m, "I801B1")
+        .def(py::init<>())
+        .def_readonly("a", &I801B1::a);
+    py::class_<I801B2, std::shared_ptr<I801B2>>(m, "I801B2")
+        .def(py::init<>())
+        .def_readonly("b", &I801B2::b);
     py::class_<I801C, I801B1, I801B2, std::shared_ptr<I801C>>(m, "I801C").def(py::init<>());
     py::class_<I801D, I801C, std::shared_ptr<I801D>>(m, "I801D").def(py::init<>());
 
@@ -179,11 +203,8 @@ TEST_SUBMODULE(multiple_inheritance, m) {
     m.def("i801e_c", []() -> I801C * { return new I801E(); });
     m.def("i801e_b2", []() -> I801B2 * { return new I801E(); });
 
-
     // test_mi_static_properties
-    py::class_<Vanilla>(m, "Vanilla")
-        .def(py::init<>())
-        .def("vanilla", &Vanilla::vanilla);
+    py::class_<Vanilla>(m, "Vanilla").def(py::init<>()).def("vanilla", &Vanilla::vanilla);
 
     py::class_<WithStatic1>(m, "WithStatic1")
         .def(py::init<>())
@@ -195,22 +216,19 @@ TEST_SUBMODULE(multiple_inheritance, m) {
         .def_static("static_func2", &WithStatic2::static_func2)
         .def_readwrite_static("static_value2", &WithStatic2::static_value2);
 
-    py::class_<VanillaStaticMix1, Vanilla, WithStatic1, WithStatic2>(
-        m, "VanillaStaticMix1")
+    py::class_<VanillaStaticMix1, Vanilla, WithStatic1, WithStatic2>(m, "VanillaStaticMix1")
         .def(py::init<>())
         .def_static("static_func", &VanillaStaticMix1::static_func)
         .def_readwrite_static("static_value", &VanillaStaticMix1::static_value);
 
-    py::class_<VanillaStaticMix2, WithStatic1, Vanilla, WithStatic2>(
-        m, "VanillaStaticMix2")
+    py::class_<VanillaStaticMix2, WithStatic1, Vanilla, WithStatic2>(m, "VanillaStaticMix2")
         .def(py::init<>())
         .def_static("static_func", &VanillaStaticMix2::static_func)
         .def_readwrite_static("static_value", &VanillaStaticMix2::static_value);
 
-
-    struct WithDict { };
-    struct VanillaDictMix1 : Vanilla, WithDict { };
-    struct VanillaDictMix2 : WithDict, Vanilla { };
+    struct WithDict {};
+    struct VanillaDictMix1 : Vanilla, WithDict {};
+    struct VanillaDictMix2 : WithDict, Vanilla {};
     py::class_<WithDict>(m, "WithDict", py::dynamic_attr()).def(py::init<>());
     py::class_<VanillaDictMix1, Vanilla, WithDict>(m, "VanillaDictMix1").def(py::init<>());
     py::class_<VanillaDictMix2, WithDict, Vanilla>(m, "VanillaDictMix2").def(py::init<>());
@@ -218,16 +236,106 @@ TEST_SUBMODULE(multiple_inheritance, m) {
     // test_diamond_inheritance
     // Issue #959: segfault when constructing diamond inheritance instance
     // All of these have int members so that there will be various unequal pointers involved.
-    struct B { int b; B() = default; B(const B&) = default; virtual ~B() = default; };
-    struct C0 : public virtual B { int c0; };
-    struct C1 : public virtual B { int c1; };
-    struct D : public C0, public C1 { int d; };
-    py::class_<B>(m, "B")
-        .def("b", [](B *self) { return self; });
-    py::class_<C0, B>(m, "C0")
-        .def("c0", [](C0 *self) { return self; });
-    py::class_<C1, B>(m, "C1")
-        .def("c1", [](C1 *self) { return self; });
-    py::class_<D, C0, C1>(m, "D")
-        .def(py::init<>());
+    struct B {
+        int b;
+        B() = default;
+        B(const B &) = default;
+        virtual ~B() = default;
+    };
+    struct C0 : public virtual B {
+        int c0;
+    };
+    struct C1 : public virtual B {
+        int c1;
+    };
+    struct D : public C0, public C1 {
+        int d;
+    };
+    py::class_<B>(m, "B").def("b", [](B *self) { return self; });
+    py::class_<C0, B>(m, "C0").def("c0", [](C0 *self) { return self; });
+    py::class_<C1, B>(m, "C1").def("c1", [](C1 *self) { return self; });
+    py::class_<D, C0, C1>(m, "D").def(py::init<>());
+
+    // test_pr3635_diamond_*
+    // - functions are get_{base}_{var}, return {var}
+    struct MVB {
+        MVB() = default;
+        MVB(const MVB &) = default;
+        virtual ~MVB() = default;
+
+        int b = 1;
+        int get_b_b() const { return b; }
+    };
+    struct MVC : virtual MVB {
+        int c = 2;
+        int get_c_b() const { return b; }
+        int get_c_c() const { return c; }
+    };
+    struct MVD0 : virtual MVC {
+        int d0 = 3;
+        int get_d0_b() const { return b; }
+        int get_d0_c() const { return c; }
+        int get_d0_d0() const { return d0; }
+    };
+    struct MVD1 : virtual MVC {
+        int d1 = 4;
+        int get_d1_b() const { return b; }
+        int get_d1_c() const { return c; }
+        int get_d1_d1() const { return d1; }
+    };
+    struct MVE : virtual MVD0, virtual MVD1 {
+        int e = 5;
+        int get_e_b() const { return b; }
+        int get_e_c() const { return c; }
+        int get_e_d0() const { return d0; }
+        int get_e_d1() const { return d1; }
+        int get_e_e() const { return e; }
+    };
+    struct MVF : virtual MVE {
+        int f = 6;
+        int get_f_b() const { return b; }
+        int get_f_c() const { return c; }
+        int get_f_d0() const { return d0; }
+        int get_f_d1() const { return d1; }
+        int get_f_e() const { return e; }
+        int get_f_f() const { return f; }
+    };
+    py::class_<MVB>(m, "MVB")
+        .def(py::init<>())
+        .def("get_b_b", &MVB::get_b_b)
+        .def_readwrite("b", &MVB::b);
+    py::class_<MVC, MVB>(m, "MVC")
+        .def(py::init<>())
+        .def("get_c_b", &MVC::get_c_b)
+        .def("get_c_c", &MVC::get_c_c)
+        .def_readwrite("c", &MVC::c);
+    py::class_<MVD0, MVC>(m, "MVD0")
+        .def(py::init<>())
+        .def("get_d0_b", &MVD0::get_d0_b)
+        .def("get_d0_c", &MVD0::get_d0_c)
+        .def("get_d0_d0", &MVD0::get_d0_d0)
+        .def_readwrite("d0", &MVD0::d0);
+    py::class_<MVD1, MVC>(m, "MVD1")
+        .def(py::init<>())
+        .def("get_d1_b", &MVD1::get_d1_b)
+        .def("get_d1_c", &MVD1::get_d1_c)
+        .def("get_d1_d1", &MVD1::get_d1_d1)
+        .def_readwrite("d1", &MVD1::d1);
+    py::class_<MVE, MVD0, MVD1>(m, "MVE")
+        .def(py::init<>())
+        .def("get_e_b", &MVE::get_e_b)
+        .def("get_e_c", &MVE::get_e_c)
+        .def("get_e_d0", &MVE::get_e_d0)
+        .def("get_e_d1", &MVE::get_e_d1)
+        .def("get_e_e", &MVE::get_e_e)
+        .def_readwrite("e", &MVE::e);
+    py::class_<MVF, MVE>(m, "MVF")
+        .def(py::init<>())
+        .def("get_f_b", &MVF::get_f_b)
+        .def("get_f_c", &MVF::get_f_c)
+        .def("get_f_d0", &MVF::get_f_d0)
+        .def("get_f_d1", &MVF::get_f_d1)
+        .def("get_f_e", &MVF::get_f_e)
+        .def("get_f_f", &MVF::get_f_f)
+        .def_readwrite("f", &MVF::f);
 }
diff --git a/ext/pybind11/tests/test_multiple_inheritance.py b/ext/pybind11/tests/test_multiple_inheritance.py
index a02c313001..3a1d88d711 100644
--- a/ext/pybind11/tests/test_multiple_inheritance.py
+++ b/ext/pybind11/tests/test_multiple_inheritance.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 import pytest
 
 import env  # noqa: F401
@@ -13,8 +12,7 @@ def test_multiple_inheritance_cpp():
     assert mt.bar() == 4
 
 
-@pytest.mark.skipif("env.PYPY and env.PY2")
-@pytest.mark.xfail("env.PYPY and not env.PY2")
+@pytest.mark.xfail("env.PYPY")
 def test_multiple_inheritance_mix1():
     class Base1:
         def __init__(self, i):
@@ -53,15 +51,14 @@ def test_multiple_inheritance_mix2():
     assert mt.bar() == 4
 
 
-@pytest.mark.skipif("env.PYPY and env.PY2")
-@pytest.mark.xfail("env.PYPY and not env.PY2")
+@pytest.mark.xfail("env.PYPY")
 def test_multiple_inheritance_python():
     class MI1(m.Base1, m.Base2):
         def __init__(self, i, j):
             m.Base1.__init__(self, i)
             m.Base2.__init__(self, j)
 
-    class B1(object):
+    class B1:
         def v(self):
             return 1
 
@@ -96,7 +93,7 @@ def test_multiple_inheritance_python():
         def v(self):
             return 2
 
-    class B3(object):
+    class B3:
         def v(self):
             return 3
 
@@ -358,3 +355,139 @@ def test_diamond_inheritance():
     assert d is d.c0().b()
     assert d is d.c1().b()
     assert d is d.c0().c1().b().c0().b()
+
+
+def test_pr3635_diamond_b():
+    o = m.MVB()
+    assert o.b == 1
+
+    assert o.get_b_b() == 1
+
+
+def test_pr3635_diamond_c():
+    o = m.MVC()
+    assert o.b == 1
+    assert o.c == 2
+
+    assert o.get_b_b() == 1
+    assert o.get_c_b() == 1
+
+    assert o.get_c_c() == 2
+
+
+def test_pr3635_diamond_d0():
+    o = m.MVD0()
+    assert o.b == 1
+    assert o.c == 2
+    assert o.d0 == 3
+
+    assert o.get_b_b() == 1
+    assert o.get_c_b() == 1
+    assert o.get_d0_b() == 1
+
+    assert o.get_c_c() == 2
+    assert o.get_d0_c() == 2
+
+    assert o.get_d0_d0() == 3
+
+
+def test_pr3635_diamond_d1():
+    o = m.MVD1()
+    assert o.b == 1
+    assert o.c == 2
+    assert o.d1 == 4
+
+    assert o.get_b_b() == 1
+    assert o.get_c_b() == 1
+    assert o.get_d1_b() == 1
+
+    assert o.get_c_c() == 2
+    assert o.get_d1_c() == 2
+
+    assert o.get_d1_d1() == 4
+
+
+def test_pr3635_diamond_e():
+    o = m.MVE()
+    assert o.b == 1
+    assert o.c == 2
+    assert o.d0 == 3
+    assert o.d1 == 4
+    assert o.e == 5
+
+    assert o.get_b_b() == 1
+    assert o.get_c_b() == 1
+    assert o.get_d0_b() == 1
+    assert o.get_d1_b() == 1
+    assert o.get_e_b() == 1
+
+    assert o.get_c_c() == 2
+    assert o.get_d0_c() == 2
+    assert o.get_d1_c() == 2
+    assert o.get_e_c() == 2
+
+    assert o.get_d0_d0() == 3
+    assert o.get_e_d0() == 3
+
+    assert o.get_d1_d1() == 4
+    assert o.get_e_d1() == 4
+
+    assert o.get_e_e() == 5
+
+
+def test_pr3635_diamond_f():
+    o = m.MVF()
+    assert o.b == 1
+    assert o.c == 2
+    assert o.d0 == 3
+    assert o.d1 == 4
+    assert o.e == 5
+    assert o.f == 6
+
+    assert o.get_b_b() == 1
+    assert o.get_c_b() == 1
+    assert o.get_d0_b() == 1
+    assert o.get_d1_b() == 1
+    assert o.get_e_b() == 1
+    assert o.get_f_b() == 1
+
+    assert o.get_c_c() == 2
+    assert o.get_d0_c() == 2
+    assert o.get_d1_c() == 2
+    assert o.get_e_c() == 2
+    assert o.get_f_c() == 2
+
+    assert o.get_d0_d0() == 3
+    assert o.get_e_d0() == 3
+    assert o.get_f_d0() == 3
+
+    assert o.get_d1_d1() == 4
+    assert o.get_e_d1() == 4
+    assert o.get_f_d1() == 4
+
+    assert o.get_e_e() == 5
+    assert o.get_f_e() == 5
+
+    assert o.get_f_f() == 6
+
+
+def test_python_inherit_from_mi():
+    """Tests extending a Python class from a single inheritor of a MI class"""
+
+    class PyMVF(m.MVF):
+        g = 7
+
+        def get_g_g(self):
+            return self.g
+
+    o = PyMVF()
+
+    assert o.b == 1
+    assert o.c == 2
+    assert o.d0 == 3
+    assert o.d1 == 4
+    assert o.e == 5
+    assert o.f == 6
+    assert o.g == 7
+
+    assert o.get_g_g() == 7
diff --git a/ext/pybind11/tests/test_numpy_array.cpp b/ext/pybind11/tests/test_numpy_array.cpp
index 30a71acc9b..b118e2c6cc 100644
--- a/ext/pybind11/tests/test_numpy_array.cpp
+++ b/ext/pybind11/tests/test_numpy_array.cpp
@@ -7,11 +7,11 @@
     BSD-style license that can be found in the LICENSE file.
 */
 
-#include "pybind11_tests.h"
-
 #include <pybind11/numpy.h>
 #include <pybind11/stl.h>
 
+#include "pybind11_tests.h"
+
 #include <cstdint>
 #include <utility>
 
@@ -22,7 +22,7 @@ struct DtypeCheck {
 };
 
 template <typename T>
-DtypeCheck get_dtype_check(const char* name) {
+DtypeCheck get_dtype_check(const char *name) {
     py::module_ np = py::module_::import("numpy");
     DtypeCheck check{};
     check.numpy = np.attr("dtype")(np.attr(name));
@@ -31,17 +31,15 @@ DtypeCheck get_dtype_check(const char* name) {
 }
 
 std::vector<DtypeCheck> get_concrete_dtype_checks() {
-    return {
-        // Normalization
-        get_dtype_check<std::int8_t>("int8"),
-        get_dtype_check<std::uint8_t>("uint8"),
-        get_dtype_check<std::int16_t>("int16"),
-        get_dtype_check<std::uint16_t>("uint16"),
-        get_dtype_check<std::int32_t>("int32"),
-        get_dtype_check<std::uint32_t>("uint32"),
-        get_dtype_check<std::int64_t>("int64"),
-        get_dtype_check<std::uint64_t>("uint64")
-    };
+    return {// Normalization
+            get_dtype_check<std::int8_t>("int8"),
+            get_dtype_check<std::uint8_t>("uint8"),
+            get_dtype_check<std::int16_t>("int16"),
+            get_dtype_check<std::uint16_t>("uint16"),
+            get_dtype_check<std::int32_t>("int32"),
+            get_dtype_check<std::uint32_t>("uint32"),
+            get_dtype_check<std::int64_t>("int64"),
+            get_dtype_check<std::uint64_t>("uint64")};
 }
 
 struct DtypeSizeCheck {
@@ -80,43 +78,71 @@ using arr = py::array;
 using arr_t = py::array_t<uint16_t, 0>;
 static_assert(std::is_same<arr_t::value_type, uint16_t>::value, "");
 
-template<typename... Ix> arr data(const arr& a, Ix... index) {
+template <typename... Ix>
+arr data(const arr &a, Ix... index) {
     return arr(a.nbytes() - a.offset_at(index...), (const uint8_t *) a.data(index...));
 }
 
-template<typename... Ix> arr data_t(const arr_t& a, Ix... index) {
+template <typename... Ix>
+arr data_t(const arr_t &a, Ix... index) {
     return arr(a.size() - a.index_at(index...), a.data(index...));
 }
 
-template<typename... Ix> arr& mutate_data(arr& a, Ix... index) {
-    auto ptr = (uint8_t *) a.mutable_data(index...);
-    for (py::ssize_t i = 0; i < a.nbytes() - a.offset_at(index...); i++)
+template <typename... Ix>
+arr &mutate_data(arr &a, Ix... index) {
+    auto *ptr = (uint8_t *) a.mutable_data(index...);
+    for (py::ssize_t i = 0; i < a.nbytes() - a.offset_at(index...); i++) {
         ptr[i] = (uint8_t) (ptr[i] * 2);
+    }
     return a;
 }
 
-template<typename... Ix> arr_t& mutate_data_t(arr_t& a, Ix... index) {
+template <typename... Ix>
+arr_t &mutate_data_t(arr_t &a, Ix... index) {
     auto ptr = a.mutable_data(index...);
-    for (py::ssize_t i = 0; i < a.size() - a.index_at(index...); i++)
+    for (py::ssize_t i = 0; i < a.size() - a.index_at(index...); i++) {
         ptr[i]++;
+    }
     return a;
 }
 
-template<typename... Ix> py::ssize_t index_at(const arr& a, Ix... idx) { return a.index_at(idx...); }
-template<typename... Ix> py::ssize_t index_at_t(const arr_t& a, Ix... idx) { return a.index_at(idx...); }
-template<typename... Ix> py::ssize_t offset_at(const arr& a, Ix... idx) { return a.offset_at(idx...); }
-template<typename... Ix> py::ssize_t offset_at_t(const arr_t& a, Ix... idx) { return a.offset_at(idx...); }
-template<typename... Ix> py::ssize_t at_t(const arr_t& a, Ix... idx) { return a.at(idx...); }
-template<typename... Ix> arr_t& mutate_at_t(arr_t& a, Ix... idx) { a.mutable_at(idx...)++; return a; }
+template <typename... Ix>
+py::ssize_t index_at(const arr &a, Ix... idx) {
+    return a.index_at(idx...);
+}
+template <typename... Ix>
+py::ssize_t index_at_t(const arr_t &a, Ix... idx) {
+    return a.index_at(idx...);
+}
+template <typename... Ix>
+py::ssize_t offset_at(const arr &a, Ix... idx) {
+    return a.offset_at(idx...);
+}
+template <typename... Ix>
+py::ssize_t offset_at_t(const arr_t &a, Ix... idx) {
+    return a.offset_at(idx...);
+}
+template <typename... Ix>
+py::ssize_t at_t(const arr_t &a, Ix... idx) {
+    return a.at(idx...);
+}
+template <typename... Ix>
+arr_t &mutate_at_t(arr_t &a, Ix... idx) {
+    a.mutable_at(idx...)++;
+    return a;
+}
 
-#define def_index_fn(name, type) \
-    sm.def(#name, [](type a) { return name(a); }); \
-    sm.def(#name, [](type a, int i) { return name(a, i); }); \
-    sm.def(#name, [](type a, int i, int j) { return name(a, i, j); }); \
+#define def_index_fn(name, type)                                                                  \
+    sm.def(#name, [](type a) { return name(a); });                                                \
+    sm.def(#name, [](type a, int i) { return name(a, i); });                                      \
+    sm.def(#name, [](type a, int i, int j) { return name(a, i, j); });                            \
     sm.def(#name, [](type a, int i, int j, int k) { return name(a, i, j, k); });
 
-template <typename T, typename T2> py::handle auxiliaries(T &&r, T2 &&r2) {
-    if (r.ndim() != 2) throw std::domain_error("error: ndim != 2");
+template <typename T, typename T2>
+py::handle auxiliaries(T &&r, T2 &&r2) {
+    if (r.ndim() != 2) {
+        throw std::domain_error("error: ndim != 2");
+    }
     py::list l;
     l.append(*r.data(0, 0));
     l.append(*r2.mutable_data(0, 0));
@@ -134,16 +160,18 @@ template <typename T, typename T2> py::handle auxiliaries(T &&r, T2 &&r2) {
 static int data_i = 42;
 
 TEST_SUBMODULE(numpy_array, sm) {
-    try { py::module_::import("numpy"); }
-    catch (...) { return; }
+    try {
+        py::module_::import("numpy");
+    } catch (const py::error_already_set &) {
+        return;
+    }
 
     // test_dtypes
     py::class_<DtypeCheck>(sm, "DtypeCheck")
         .def_readonly("numpy", &DtypeCheck::numpy)
         .def_readonly("pybind11", &DtypeCheck::pybind11)
-        .def("__repr__", [](const DtypeCheck& self) {
-            return py::str("<DtypeCheck numpy={} pybind11={}>").format(
-                self.numpy, self.pybind11);
+        .def("__repr__", [](const DtypeCheck &self) {
+            return py::str("<DtypeCheck numpy={} pybind11={}>").format(self.numpy, self.pybind11);
         });
     sm.def("get_concrete_dtype_checks", &get_concrete_dtype_checks);
 
@@ -151,41 +179,41 @@ TEST_SUBMODULE(numpy_array, sm) {
         .def_readonly("name", &DtypeSizeCheck::name)
         .def_readonly("size_cpp", &DtypeSizeCheck::size_cpp)
         .def_readonly("size_numpy", &DtypeSizeCheck::size_numpy)
-        .def("__repr__", [](const DtypeSizeCheck& self) {
-            return py::str("<DtypeSizeCheck name='{}' size_cpp={} size_numpy={} dtype={}>").format(
-                self.name, self.size_cpp, self.size_numpy, self.dtype);
+        .def("__repr__", [](const DtypeSizeCheck &self) {
+            return py::str("<DtypeSizeCheck name='{}' size_cpp={} size_numpy={} dtype={}>")
+                .format(self.name, self.size_cpp, self.size_numpy, self.dtype);
         });
     sm.def("get_platform_dtype_size_checks", &get_platform_dtype_size_checks);
 
     // test_array_attributes
-    sm.def("ndim", [](const arr& a) { return a.ndim(); });
-    sm.def("shape", [](const arr& a) { return arr(a.ndim(), a.shape()); });
-    sm.def("shape", [](const arr& a, py::ssize_t dim) { return a.shape(dim); });
-    sm.def("strides", [](const arr& a) { return arr(a.ndim(), a.strides()); });
-    sm.def("strides", [](const arr& a, py::ssize_t dim) { return a.strides(dim); });
-    sm.def("writeable", [](const arr& a) { return a.writeable(); });
-    sm.def("size", [](const arr& a) { return a.size(); });
-    sm.def("itemsize", [](const arr& a) { return a.itemsize(); });
-    sm.def("nbytes", [](const arr& a) { return a.nbytes(); });
-    sm.def("owndata", [](const arr& a) { return a.owndata(); });
+    sm.def("ndim", [](const arr &a) { return a.ndim(); });
+    sm.def("shape", [](const arr &a) { return arr(a.ndim(), a.shape()); });
+    sm.def("shape", [](const arr &a, py::ssize_t dim) { return a.shape(dim); });
+    sm.def("strides", [](const arr &a) { return arr(a.ndim(), a.strides()); });
+    sm.def("strides", [](const arr &a, py::ssize_t dim) { return a.strides(dim); });
+    sm.def("writeable", [](const arr &a) { return a.writeable(); });
+    sm.def("size", [](const arr &a) { return a.size(); });
+    sm.def("itemsize", [](const arr &a) { return a.itemsize(); });
+    sm.def("nbytes", [](const arr &a) { return a.nbytes(); });
+    sm.def("owndata", [](const arr &a) { return a.owndata(); });
 
     // test_index_offset
-    def_index_fn(index_at, const arr&);
-    def_index_fn(index_at_t, const arr_t&);
-    def_index_fn(offset_at, const arr&);
-    def_index_fn(offset_at_t, const arr_t&);
+    def_index_fn(index_at, const arr &);
+    def_index_fn(index_at_t, const arr_t &);
+    def_index_fn(offset_at, const arr &);
+    def_index_fn(offset_at_t, const arr_t &);
     // test_data
-    def_index_fn(data, const arr&);
-    def_index_fn(data_t, const arr_t&);
+    def_index_fn(data, const arr &);
+    def_index_fn(data_t, const arr_t &);
     // test_mutate_data, test_mutate_readonly
-    def_index_fn(mutate_data, arr&);
-    def_index_fn(mutate_data_t, arr_t&);
-    def_index_fn(at_t, const arr_t&);
-    def_index_fn(mutate_at_t, arr_t&);
+    def_index_fn(mutate_data, arr &);
+    def_index_fn(mutate_data_t, arr_t &);
+    def_index_fn(at_t, const arr_t &);
+    def_index_fn(mutate_at_t, arr_t &);
 
     // test_make_c_f_array
-    sm.def("make_f_array", [] { return py::array_t<float>({ 2, 2 }, { 4, 8 }); });
-    sm.def("make_c_array", [] { return py::array_t<float>({ 2, 2 }, { 8, 4 }); });
+    sm.def("make_f_array", [] { return py::array_t<float>({2, 2}, {4, 8}); });
+    sm.def("make_c_array", [] { return py::array_t<float>({2, 2}, {8, 4}); });
 
     // test_empty_shaped_array
     sm.def("make_empty_shaped_array", [] { return py::array(py::dtype("f"), {}, {}); });
@@ -194,18 +222,16 @@ TEST_SUBMODULE(numpy_array, sm) {
 
     // test_wrap
     sm.def("wrap", [](const py::array &a) {
-        return py::array(
-            a.dtype(),
-            {a.shape(), a.shape() + a.ndim()},
-            {a.strides(), a.strides() + a.ndim()},
-            a.data(),
-            a
-        );
+        return py::array(a.dtype(),
+                         {a.shape(), a.shape() + a.ndim()},
+                         {a.strides(), a.strides() + a.ndim()},
+                         a.data(),
+                         a);
     });
 
     // test_numpy_view
     struct ArrayClass {
-        int data[2] = { 1, 2 };
+        int data[2] = {1, 2};
         ArrayClass() { py::print("ArrayClass()"); }
         ~ArrayClass() { py::print("~ArrayClass()"); }
     };
@@ -213,13 +239,12 @@ TEST_SUBMODULE(numpy_array, sm) {
         .def(py::init<>())
         .def("numpy_view", [](py::object &obj) {
             py::print("ArrayClass::numpy_view()");
-            auto &a = obj.cast<ArrayClass&>();
+            auto &a = obj.cast<ArrayClass &>();
             return py::array_t<int>({2}, {4}, a.data, obj);
-        }
-    );
+        });
 
     // test_cast_numpy_int64_to_uint64
-    sm.def("function_taking_uint64", [](uint64_t) { });
+    sm.def("function_taking_uint64", [](uint64_t) {});
 
     // test_isinstance
     sm.def("isinstance_untyped", [](py::object yes, py::object no) {
@@ -232,18 +257,14 @@ TEST_SUBMODULE(numpy_array, sm) {
 
     // test_constructors
     sm.def("default_constructors", []() {
-        return py::dict(
-            "array"_a=py::array(),
-            "array_t<int32>"_a=py::array_t<std::int32_t>(),
-            "array_t<double>"_a=py::array_t<double>()
-        );
+        return py::dict("array"_a = py::array(),
+                        "array_t<int32>"_a = py::array_t<std::int32_t>(),
+                        "array_t<double>"_a = py::array_t<double>());
     });
     sm.def("converting_constructors", [](const py::object &o) {
-        return py::dict(
-            "array"_a=py::array(o),
-            "array_t<int32>"_a=py::array_t<std::int32_t>(o),
-            "array_t<double>"_a=py::array_t<double>(o)
-        );
+        return py::dict("array"_a = py::array(o),
+                        "array_t<int32>"_a = py::array_t<std::int32_t>(o),
+                        "array_t<double>"_a = py::array_t<double>(o));
     });
 
     // test_overload_resolution
@@ -290,36 +311,49 @@ TEST_SUBMODULE(numpy_array, sm) {
     sm.def("issue685", [](const py::object &) { return "other"; });
 
     // test_array_unchecked_fixed_dims
-    sm.def("proxy_add2", [](py::array_t<double> a, double v) {
-        auto r = a.mutable_unchecked<2>();
-        for (py::ssize_t i = 0; i < r.shape(0); i++)
-            for (py::ssize_t j = 0; j < r.shape(1); j++)
-                r(i, j) += v;
-    }, py::arg{}.noconvert(), py::arg());
+    sm.def(
+        "proxy_add2",
+        [](py::array_t<double> a, double v) {
+            auto r = a.mutable_unchecked<2>();
+            for (py::ssize_t i = 0; i < r.shape(0); i++) {
+                for (py::ssize_t j = 0; j < r.shape(1); j++) {
+                    r(i, j) += v;
+                }
+            }
+        },
+        py::arg{}.noconvert(),
+        py::arg());
 
     sm.def("proxy_init3", [](double start) {
-        py::array_t<double, py::array::c_style> a({ 3, 3, 3 });
+        py::array_t<double, py::array::c_style> a({3, 3, 3});
         auto r = a.mutable_unchecked<3>();
-        for (py::ssize_t i = 0; i < r.shape(0); i++)
-        for (py::ssize_t j = 0; j < r.shape(1); j++)
-        for (py::ssize_t k = 0; k < r.shape(2); k++)
-            r(i, j, k) = start++;
+        for (py::ssize_t i = 0; i < r.shape(0); i++) {
+            for (py::ssize_t j = 0; j < r.shape(1); j++) {
+                for (py::ssize_t k = 0; k < r.shape(2); k++) {
+                    r(i, j, k) = start++;
+                }
+            }
+        }
         return a;
     });
     sm.def("proxy_init3F", [](double start) {
-        py::array_t<double, py::array::f_style> a({ 3, 3, 3 });
+        py::array_t<double, py::array::f_style> a({3, 3, 3});
         auto r = a.mutable_unchecked<3>();
-        for (py::ssize_t k = 0; k < r.shape(2); k++)
-        for (py::ssize_t j = 0; j < r.shape(1); j++)
-        for (py::ssize_t i = 0; i < r.shape(0); i++)
-            r(i, j, k) = start++;
+        for (py::ssize_t k = 0; k < r.shape(2); k++) {
+            for (py::ssize_t j = 0; j < r.shape(1); j++) {
+                for (py::ssize_t i = 0; i < r.shape(0); i++) {
+                    r(i, j, k) = start++;
+                }
+            }
+        }
         return a;
     });
     sm.def("proxy_squared_L2_norm", [](const py::array_t<double> &a) {
         auto r = a.unchecked<1>();
         double sumsq = 0;
-        for (py::ssize_t i = 0; i < r.shape(0); i++)
+        for (py::ssize_t i = 0; i < r.shape(0); i++) {
             sumsq += r[i] * r(i); // Either notation works for a 1D array
+        }
         return sumsq;
     });
 
@@ -343,51 +377,69 @@ TEST_SUBMODULE(numpy_array, sm) {
 
     // test_array_unchecked_dyn_dims
     // Same as the above, but without a compile-time dimensions specification:
-    sm.def("proxy_add2_dyn", [](py::array_t<double> a, double v) {
-        auto r = a.mutable_unchecked();
-        if (r.ndim() != 2) throw std::domain_error("error: ndim != 2");
-        for (py::ssize_t i = 0; i < r.shape(0); i++)
-            for (py::ssize_t j = 0; j < r.shape(1); j++)
-                r(i, j) += v;
-    }, py::arg{}.noconvert(), py::arg());
+    sm.def(
+        "proxy_add2_dyn",
+        [](py::array_t<double> a, double v) {
+            auto r = a.mutable_unchecked();
+            if (r.ndim() != 2) {
+                throw std::domain_error("error: ndim != 2");
+            }
+            for (py::ssize_t i = 0; i < r.shape(0); i++) {
+                for (py::ssize_t j = 0; j < r.shape(1); j++) {
+                    r(i, j) += v;
+                }
+            }
+        },
+        py::arg{}.noconvert(),
+        py::arg());
     sm.def("proxy_init3_dyn", [](double start) {
-        py::array_t<double, py::array::c_style> a({ 3, 3, 3 });
+        py::array_t<double, py::array::c_style> a({3, 3, 3});
         auto r = a.mutable_unchecked();
-        if (r.ndim() != 3) throw std::domain_error("error: ndim != 3");
-        for (py::ssize_t i = 0; i < r.shape(0); i++)
-        for (py::ssize_t j = 0; j < r.shape(1); j++)
-        for (py::ssize_t k = 0; k < r.shape(2); k++)
-            r(i, j, k) = start++;
+        if (r.ndim() != 3) {
+            throw std::domain_error("error: ndim != 3");
+        }
+        for (py::ssize_t i = 0; i < r.shape(0); i++) {
+            for (py::ssize_t j = 0; j < r.shape(1); j++) {
+                for (py::ssize_t k = 0; k < r.shape(2); k++) {
+                    r(i, j, k) = start++;
+                }
+            }
+        }
         return a;
     });
     sm.def("proxy_auxiliaries2_dyn", [](py::array_t<double> a) {
         return auxiliaries(a.unchecked(), a.mutable_unchecked());
     });
 
-    sm.def("array_auxiliaries2", [](py::array_t<double> a) {
-        return auxiliaries(a, a);
-    });
+    sm.def("array_auxiliaries2", [](py::array_t<double> a) { return auxiliaries(a, a); });
 
     // test_array_failures
-    // Issue #785: Uninformative "Unknown internal error" exception when constructing array from empty object:
+    // Issue #785: Uninformative "Unknown internal error" exception when constructing array from
+    // empty object:
     sm.def("array_fail_test", []() { return py::array(py::object()); });
     sm.def("array_t_fail_test", []() { return py::array_t<double>(py::object()); });
     // Make sure the error from numpy is being passed through:
-    sm.def("array_fail_test_negative_size", []() { int c = 0; return py::array(-1, &c); });
+    sm.def("array_fail_test_negative_size", []() {
+        int c = 0;
+        return py::array(-1, &c);
+    });
 
     // test_initializer_list
     // Issue (unnumbered; reported in #788): regression: initializer lists can be ambiguous
-    sm.def("array_initializer_list1", []() { return py::array_t<float>(1); }); // { 1 } also works, but clang warns about it
-    sm.def("array_initializer_list2", []() { return py::array_t<float>({ 1, 2 }); });
-    sm.def("array_initializer_list3", []() { return py::array_t<float>({ 1, 2, 3 }); });
-    sm.def("array_initializer_list4", []() { return py::array_t<float>({ 1, 2, 3, 4 }); });
+    sm.def("array_initializer_list1", []() { return py::array_t<float>(1); });
+    // { 1 } also works for the above, but clang warns about it
+    sm.def("array_initializer_list2", []() { return py::array_t<float>({1, 2}); });
+    sm.def("array_initializer_list3", []() { return py::array_t<float>({1, 2, 3}); });
+    sm.def("array_initializer_list4", []() { return py::array_t<float>({1, 2, 3, 4}); });
 
     // test_array_resize
     // reshape array to 2D without changing size
     sm.def("array_reshape2", [](py::array_t<double> a) {
-        const auto dim_sz = (py::ssize_t)std::sqrt(a.size());
-        if (dim_sz * dim_sz != a.size())
-            throw std::domain_error("array_reshape2: input array total size is not a squared integer");
+        const auto dim_sz = (py::ssize_t) std::sqrt(a.size());
+        if (dim_sz * dim_sz != a.size()) {
+            throw std::domain_error(
+                "array_reshape2: input array total size is not a squared integer");
+        }
         a.resize({dim_sz, dim_sz});
     });
 
@@ -469,4 +521,6 @@ TEST_SUBMODULE(numpy_array, sm) {
     sm.def("test_fmt_desc_double", [](const py::array_t<double> &) {});
     sm.def("test_fmt_desc_const_float", [](const py::array_t<const float> &) {});
     sm.def("test_fmt_desc_const_double", [](const py::array_t<const double> &) {});
+
+    sm.def("round_trip_float", [](double d) { return d; });
 }
diff --git a/ext/pybind11/tests/test_numpy_array.py b/ext/pybind11/tests/test_numpy_array.py
index e4138f0239..cdec9ad60b 100644
--- a/ext/pybind11/tests/test_numpy_array.py
+++ b/ext/pybind11/tests/test_numpy_array.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 import pytest
 
 import env  # noqa: F401
@@ -19,9 +18,7 @@ def test_dtypes():
         assert check.numpy == check.pybind11, check
         if check.numpy.num != check.pybind11.num:
             print(
-                "NOTE: typenum mismatch for {}: {} != {}".format(
-                    check, check.numpy.num, check.pybind11.num
-                )
+                f"NOTE: typenum mismatch for {check}: {check.numpy.num} != {check.pybind11.num}"
             )
 
 
@@ -117,9 +114,7 @@ def test_at_fail(arr, dim):
     for func in m.at_t, m.mutate_at_t:
         with pytest.raises(IndexError) as excinfo:
             func(arr, *([0] * dim))
-        assert str(excinfo.value) == "index dimension mismatch: {} (ndim = 2)".format(
-            dim
-        )
+        assert str(excinfo.value) == f"index dimension mismatch: {dim} (ndim = 2)"
 
 
 def test_at(arr):
@@ -193,8 +188,6 @@ def test_make_empty_shaped_array():
 
 def test_wrap():
     def assert_references(a, b, base=None):
-        from distutils.version import LooseVersion
-
         if base is None:
             base = a
         assert a is not b
@@ -205,7 +198,8 @@ def test_wrap():
         assert a.flags.f_contiguous == b.flags.f_contiguous
         assert a.flags.writeable == b.flags.writeable
         assert a.flags.aligned == b.flags.aligned
-        if LooseVersion(np.__version__) >= LooseVersion("1.14.0"):
+        # 1.13 supported Python 3.6
+        if tuple(int(x) for x in np.__version__.split(".")[:2]) >= (1, 14):
             assert a.flags.writebackifcopy == b.flags.writebackifcopy
         else:
             assert a.flags.updateifcopy == b.flags.updateifcopy
@@ -591,3 +585,9 @@ def test_dtype_refcount_leak():
     m.ndim(a)
     after = getrefcount(dtype)
     assert after == before
+
+
+def test_round_trip_float():
+    arr = np.zeros((), np.float64)
+    arr[()] = 37.2
+    assert m.round_trip_float(arr) == 37.2
diff --git a/ext/pybind11/tests/test_numpy_dtypes.cpp b/ext/pybind11/tests/test_numpy_dtypes.cpp
index bf4f4cee74..6654f9ed8f 100644
--- a/ext/pybind11/tests/test_numpy_dtypes.cpp
+++ b/ext/pybind11/tests/test_numpy_dtypes.cpp
@@ -7,13 +7,14 @@
   BSD-style license that can be found in the LICENSE file.
 */
 
-#include "pybind11_tests.h"
 #include <pybind11/numpy.h>
 
+#include "pybind11_tests.h"
+
 #ifdef __GNUC__
-#define PYBIND11_PACKED(cls) cls __attribute__((__packed__))
+#    define PYBIND11_PACKED(cls) cls __attribute__((__packed__))
 #else
-#define PYBIND11_PACKED(cls) __pragma(pack(push, 1)) cls __pragma(pack(pop))
+#    define PYBIND11_PACKED(cls) __pragma(pack(push, 1)) cls __pragma(pack(pop))
 #endif
 
 namespace py = pybind11;
@@ -25,7 +26,7 @@ struct SimpleStruct {
     long double ldbl_;
 };
 
-std::ostream& operator<<(std::ostream& os, const SimpleStruct& v) {
+std::ostream &operator<<(std::ostream &os, const SimpleStruct &v) {
     return os << "s:" << v.bool_ << "," << v.uint_ << "," << v.float_ << "," << v.ldbl_;
 }
 
@@ -43,7 +44,7 @@ PYBIND11_PACKED(struct PackedStruct {
     long double ldbl_;
 });
 
-std::ostream& operator<<(std::ostream& os, const PackedStruct& v) {
+std::ostream &operator<<(std::ostream &os, const PackedStruct &v) {
     return os << "p:" << v.bool_ << "," << v.uint_ << "," << v.float_ << "," << v.ldbl_;
 }
 
@@ -52,7 +53,7 @@ PYBIND11_PACKED(struct NestedStruct {
     PackedStruct b;
 });
 
-std::ostream& operator<<(std::ostream& os, const NestedStruct& v) {
+std::ostream &operator<<(std::ostream &os, const NestedStruct &v) {
     return os << "n:a=" << v.a << ";b=" << v.b;
 }
 
@@ -70,7 +71,7 @@ struct PartialNestedStruct {
     uint64_t dummy2;
 };
 
-struct UnboundStruct { };
+struct UnboundStruct {};
 
 struct StringStruct {
     char a[3];
@@ -82,7 +83,7 @@ struct ComplexStruct {
     std::complex<double> cdbl;
 };
 
-std::ostream& operator<<(std::ostream& os, const ComplexStruct& v) {
+std::ostream &operator<<(std::ostream &os, const ComplexStruct &v) {
     return os << "c:" << v.cflt << "," << v.cdbl;
 }
 
@@ -106,46 +107,50 @@ PYBIND11_PACKED(struct EnumStruct {
     E2 e2;
 });
 
-std::ostream& operator<<(std::ostream& os, const StringStruct& v) {
+std::ostream &operator<<(std::ostream &os, const StringStruct &v) {
     os << "a='";
-    for (size_t i = 0; i < 3 && (v.a[i] != 0); i++)
+    for (size_t i = 0; i < 3 && (v.a[i] != 0); i++) {
         os << v.a[i];
+    }
     os << "',b='";
-    for (size_t i = 0; i < 3 && (v.b[i] != 0); i++)
+    for (size_t i = 0; i < 3 && (v.b[i] != 0); i++) {
         os << v.b[i];
+    }
     return os << "'";
 }
 
-std::ostream& operator<<(std::ostream& os, const ArrayStruct& v) {
+std::ostream &operator<<(std::ostream &os, const ArrayStruct &v) {
     os << "a={";
     for (int i = 0; i < 3; i++) {
-        if (i > 0)
+        if (i > 0) {
             os << ',';
+        }
         os << '{';
-        for (int j = 0; j < 3; j++)
+        for (int j = 0; j < 3; j++) {
             os << v.a[i][j] << ',';
+        }
         os << v.a[i][3] << '}';
     }
     os << "},b={" << v.b[0] << ',' << v.b[1];
     os << "},c={" << int(v.c[0]) << ',' << int(v.c[1]) << ',' << int(v.c[2]);
     os << "},d={";
     for (int i = 0; i < 4; i++) {
-        if (i > 0)
+        if (i > 0) {
             os << ',';
+        }
         os << '{' << v.d[i][0] << ',' << v.d[i][1] << '}';
     }
     return os << '}';
 }
 
-std::ostream& operator<<(std::ostream& os, const EnumStruct& v) {
+std::ostream &operator<<(std::ostream &os, const EnumStruct &v) {
     return os << "e1=" << (v.e1 == E1::A ? "A" : "B") << ",e2=" << (v.e2 == E2::X ? "X" : "Y");
 }
 
 template <typename T>
 py::array mkarray_via_buffer(size_t n) {
-    return py::array(py::buffer_info(nullptr, sizeof(T),
-                                     py::format_descriptor<T>::format(),
-                                     1, { n }, { sizeof(T) }));
+    return py::array(py::buffer_info(
+        nullptr, sizeof(T), py::format_descriptor<T>::format(), 1, {n}, {sizeof(T)}));
 }
 
 #define SET_TEST_VALS(s, i)                                                                       \
@@ -160,7 +165,7 @@ template <typename S>
 py::array_t<S, 0> create_recarray(size_t n) {
     auto arr = mkarray_via_buffer<S>(n);
     auto req = arr.request();
-    auto ptr = static_cast<S*>(req.ptr);
+    auto *ptr = static_cast<S *>(req.ptr);
     for (size_t i = 0; i < n; i++) {
         SET_TEST_VALS(ptr[i], i);
     }
@@ -170,7 +175,7 @@ py::array_t<S, 0> create_recarray(size_t n) {
 template <typename S>
 py::list print_recarray(py::array_t<S, 0> arr) {
     const auto req = arr.request();
-    const auto ptr = static_cast<S*>(req.ptr);
+    auto *const ptr = static_cast<S *>(req.ptr);
     auto l = py::list();
     for (py::ssize_t i = 0; i < req.size; i++) {
         std::stringstream ss;
@@ -183,12 +188,12 @@ py::list print_recarray(py::array_t<S, 0> arr) {
 py::array_t<int32_t, 0> test_array_ctors(int i) {
     using arr_t = py::array_t<int32_t, 0>;
 
-    std::vector<int32_t> data { 1, 2, 3, 4, 5, 6 };
-    std::vector<py::ssize_t> shape { 3, 2 };
-    std::vector<py::ssize_t> strides { 8, 4 };
+    std::vector<int32_t> data{1, 2, 3, 4, 5, 6};
+    std::vector<py::ssize_t> shape{3, 2};
+    std::vector<py::ssize_t> strides{8, 4};
 
-    auto ptr = data.data();
-    auto vptr = (void *) ptr;
+    auto *ptr = data.data();
+    auto *vptr = (void *) ptr;
     auto dtype = py::dtype("int32");
 
     py::buffer_info buf_ndim1(vptr, 4, "i", 6);
@@ -198,41 +203,69 @@ py::array_t<int32_t, 0> test_array_ctors(int i) {
 
     auto fill = [](py::array arr) {
         auto req = arr.request();
-        for (int i = 0; i < 6; i++) ((int32_t *) req.ptr)[i] = i + 1;
+        for (int i = 0; i < 6; i++) {
+            ((int32_t *) req.ptr)[i] = i + 1;
+        }
         return arr;
     };
 
     switch (i) {
-    // shape: (3, 2)
-    case 10: return arr_t(shape, strides, ptr);
-    case 11: return py::array(shape, strides, ptr);
-    case 12: return py::array(dtype, shape, strides, vptr);
-    case 13: return arr_t(shape, ptr);
-    case 14: return py::array(shape, ptr);
-    case 15: return py::array(dtype, shape, vptr);
-    case 16: return arr_t(buf_ndim2);
-    case 17: return py::array(buf_ndim2);
-    // shape: (3, 2) - post-fill
-    case 20: return fill(arr_t(shape, strides));
-    case 21: return py::array(shape, strides, ptr); // can't have nullptr due to templated ctor
-    case 22: return fill(py::array(dtype, shape, strides));
-    case 23: return fill(arr_t(shape));
-    case 24: return py::array(shape, ptr); // can't have nullptr due to templated ctor
-    case 25: return fill(py::array(dtype, shape));
-    case 26: return fill(arr_t(buf_ndim2_null));
-    case 27: return fill(py::array(buf_ndim2_null));
-    // shape: (6, )
-    case 30: return arr_t(6, ptr);
-    case 31: return py::array(6, ptr);
-    case 32: return py::array(dtype, 6, vptr);
-    case 33: return arr_t(buf_ndim1);
-    case 34: return py::array(buf_ndim1);
-    // shape: (6, )
-    case 40: return fill(arr_t(6));
-    case 41: return py::array(6, ptr);  // can't have nullptr due to templated ctor
-    case 42: return fill(py::array(dtype, 6));
-    case 43: return fill(arr_t(buf_ndim1_null));
-    case 44: return fill(py::array(buf_ndim1_null));
+        // shape: (3, 2)
+        case 10:
+            return arr_t(shape, strides, ptr);
+        case 11:
+            return py::array(shape, strides, ptr);
+        case 12:
+            return py::array(dtype, shape, strides, vptr);
+        case 13:
+            return arr_t(shape, ptr);
+        case 14:
+            return py::array(shape, ptr);
+        case 15:
+            return py::array(dtype, shape, vptr);
+        case 16:
+            return arr_t(buf_ndim2);
+        case 17:
+            return py::array(buf_ndim2);
+        // shape: (3, 2) - post-fill
+        case 20:
+            return fill(arr_t(shape, strides));
+        case 21:
+            return py::array(shape, strides, ptr); // can't have nullptr due to templated ctor
+        case 22:
+            return fill(py::array(dtype, shape, strides));
+        case 23:
+            return fill(arr_t(shape));
+        case 24:
+            return py::array(shape, ptr); // can't have nullptr due to templated ctor
+        case 25:
+            return fill(py::array(dtype, shape));
+        case 26:
+            return fill(arr_t(buf_ndim2_null));
+        case 27:
+            return fill(py::array(buf_ndim2_null));
+        // shape: (6, )
+        case 30:
+            return arr_t(6, ptr);
+        case 31:
+            return py::array(6, ptr);
+        case 32:
+            return py::array(dtype, 6, vptr);
+        case 33:
+            return arr_t(buf_ndim1);
+        case 34:
+            return py::array(buf_ndim1);
+        // shape: (6, )
+        case 40:
+            return fill(arr_t(6));
+        case 41:
+            return py::array(6, ptr); // can't have nullptr due to templated ctor
+        case 42:
+            return fill(py::array(dtype, 6));
+        case 43:
+            return fill(arr_t(buf_ndim1_null));
+        case 44:
+            return fill(py::array(buf_ndim1_null));
     }
     return arr_t();
 }
@@ -244,14 +277,21 @@ py::list test_dtype_ctors() {
     list.append(py::dtype::from_args(py::str("bool")));
     py::list names, offsets, formats;
     py::dict dict;
-    names.append(py::str("a")); names.append(py::str("b")); dict["names"] = names;
-    offsets.append(py::int_(1)); offsets.append(py::int_(10)); dict["offsets"] = offsets;
-    formats.append(py::dtype("int32")); formats.append(py::dtype("float64")); dict["formats"] = formats;
+    names.append(py::str("a"));
+    names.append(py::str("b"));
+    dict["names"] = names;
+    offsets.append(py::int_(1));
+    offsets.append(py::int_(10));
+    dict["offsets"] = offsets;
+    formats.append(py::dtype("int32"));
+    formats.append(py::dtype("float64"));
+    dict["formats"] = formats;
     dict["itemsize"] = py::int_(20);
     list.append(py::dtype::from_args(dict));
     list.append(py::dtype(names, formats, offsets, 20));
-    list.append(py::dtype(py::buffer_info((void *) 0, sizeof(unsigned int), "I", 1)));
-    list.append(py::dtype(py::buffer_info((void *) 0, 0, "T{i:a:f:b:}", 1)));
+    list.append(py::dtype(py::buffer_info((void *) nullptr, sizeof(unsigned int), "I", 1)));
+    list.append(py::dtype(py::buffer_info((void *) nullptr, 0, "T{i:a:f:b:}", 1)));
+    list.append(py::dtype(py::detail::npy_api::NPY_DOUBLE_));
     return list;
 }
 
@@ -259,8 +299,11 @@ struct A {};
 struct B {};
 
 TEST_SUBMODULE(numpy_dtypes, m) {
-    try { py::module_::import("numpy"); }
-    catch (...) { return; }
+    try {
+        py::module_::import("numpy");
+    } catch (const py::error_already_set &) {
+        return;
+    }
 
     // typeinfo may be registered before the dtype descriptor for scalar casts to work...
     py::class_<SimpleStruct>(m, "SimpleStruct")
@@ -278,11 +321,10 @@ TEST_SUBMODULE(numpy_dtypes, m) {
             if (py::len(tup) != 4) {
                 throw py::cast_error("Invalid size");
             }
-            return SimpleStruct{
-                tup[0].cast<bool>(),
-                tup[1].cast<uint32_t>(),
-                tup[2].cast<float>(),
-                tup[3].cast<long double>()};
+            return SimpleStruct{tup[0].cast<bool>(),
+                                tup[1].cast<uint32_t>(),
+                                tup[2].cast<float>(),
+                                tup[3].cast<long double>()};
         });
 
     PYBIND11_NUMPY_DTYPE(SimpleStruct, bool_, uint_, float_, ldbl_);
@@ -301,19 +343,21 @@ TEST_SUBMODULE(numpy_dtypes, m) {
 
     PYBIND11_NUMPY_DTYPE_EX(StructWithUglyNames, __x__, "x", __y__, "y");
 
-    // If uncommented, this should produce a static_assert failure telling the user that the struct
+#ifdef PYBIND11_NEVER_DEFINED_EVER
+    // If enabled, this should produce a static_assert failure telling the user that the struct
     // is not a POD type
-//    struct NotPOD { std::string v; NotPOD() : v("hi") {}; };
-//    PYBIND11_NUMPY_DTYPE(NotPOD, v);
+    struct NotPOD {
+        std::string v;
+        NotPOD() : v("hi"){};
+    };
+    PYBIND11_NUMPY_DTYPE(NotPOD, v);
+#endif
 
     // Check that dtypes can be registered programmatically, both from
     // initializer lists of field descriptors and from other containers.
-    py::detail::npy_format_descriptor<A>::register_dtype(
-        {}
-    );
+    py::detail::npy_format_descriptor<A>::register_dtype({});
     py::detail::npy_format_descriptor<B>::register_dtype(
-        std::vector<py::detail::field_descriptor>{}
-    );
+        std::vector<py::detail::field_descriptor>{});
 
     // test_recarray, test_scalar_conversion
     m.def("create_rec_simple", &create_recarray<SimpleStruct>);
@@ -321,7 +365,7 @@ TEST_SUBMODULE(numpy_dtypes, m) {
     m.def("create_rec_nested", [](size_t n) { // test_signature
         py::array_t<NestedStruct, 0> arr = mkarray_via_buffer<NestedStruct>(n);
         auto req = arr.request();
-        auto ptr = static_cast<NestedStruct*>(req.ptr);
+        auto *ptr = static_cast<NestedStruct *>(req.ptr);
         for (size_t i = 0; i < n; i++) {
             SET_TEST_VALS(ptr[i].a, i);
             SET_TEST_VALS(ptr[i].b, i + 1);
@@ -332,7 +376,7 @@ TEST_SUBMODULE(numpy_dtypes, m) {
     m.def("create_rec_partial_nested", [](size_t n) {
         py::array_t<PartialNestedStruct, 0> arr = mkarray_via_buffer<PartialNestedStruct>(n);
         auto req = arr.request();
-        auto ptr = static_cast<PartialNestedStruct*>(req.ptr);
+        auto *ptr = static_cast<PartialNestedStruct *>(req.ptr);
         for (size_t i = 0; i < n; i++) {
             SET_TEST_VALS(ptr[i].a, i);
         }
@@ -346,17 +390,15 @@ TEST_SUBMODULE(numpy_dtypes, m) {
     m.def("get_format_unbound", []() { return py::format_descriptor<UnboundStruct>::format(); });
     m.def("print_format_descriptors", []() {
         py::list l;
-        for (const auto &fmt : {
-            py::format_descriptor<SimpleStruct>::format(),
-            py::format_descriptor<PackedStruct>::format(),
-            py::format_descriptor<NestedStruct>::format(),
-            py::format_descriptor<PartialStruct>::format(),
-            py::format_descriptor<PartialNestedStruct>::format(),
-            py::format_descriptor<StringStruct>::format(),
-            py::format_descriptor<ArrayStruct>::format(),
-            py::format_descriptor<EnumStruct>::format(),
-            py::format_descriptor<ComplexStruct>::format()
-        }) {
+        for (const auto &fmt : {py::format_descriptor<SimpleStruct>::format(),
+                                py::format_descriptor<PackedStruct>::format(),
+                                py::format_descriptor<NestedStruct>::format(),
+                                py::format_descriptor<PartialStruct>::format(),
+                                py::format_descriptor<PartialNestedStruct>::format(),
+                                py::format_descriptor<StringStruct>::format(),
+                                py::format_descriptor<ArrayStruct>::format(),
+                                py::format_descriptor<EnumStruct>::format(),
+                                py::format_descriptor<ComplexStruct>::format()}) {
             l.append(py::cast(fmt));
         }
         return l;
@@ -364,50 +406,79 @@ TEST_SUBMODULE(numpy_dtypes, m) {
 
     // test_dtype
     std::vector<const char *> dtype_names{
-        "byte", "short", "intc", "int_", "longlong",
-        "ubyte", "ushort", "uintc", "uint", "ulonglong",
-        "half", "single", "double", "longdouble",
-        "csingle", "cdouble", "clongdouble",
-        "bool_", "datetime64", "timedelta64", "object_"
-    };
+        "byte",    "short",   "intc",        "int_",  "longlong",   "ubyte",       "ushort",
+        "uintc",   "uint",    "ulonglong",   "half",  "single",     "double",      "longdouble",
+        "csingle", "cdouble", "clongdouble", "bool_", "datetime64", "timedelta64", "object_"};
 
     m.def("print_dtypes", []() {
         py::list l;
-        for (const py::handle &d : {
-            py::dtype::of<SimpleStruct>(),
-            py::dtype::of<PackedStruct>(),
-            py::dtype::of<NestedStruct>(),
-            py::dtype::of<PartialStruct>(),
-            py::dtype::of<PartialNestedStruct>(),
-            py::dtype::of<StringStruct>(),
-            py::dtype::of<ArrayStruct>(),
-            py::dtype::of<EnumStruct>(),
-            py::dtype::of<StructWithUglyNames>(),
-            py::dtype::of<ComplexStruct>()
-        })
+        for (const py::handle &d : {py::dtype::of<SimpleStruct>(),
+                                    py::dtype::of<PackedStruct>(),
+                                    py::dtype::of<NestedStruct>(),
+                                    py::dtype::of<PartialStruct>(),
+                                    py::dtype::of<PartialNestedStruct>(),
+                                    py::dtype::of<StringStruct>(),
+                                    py::dtype::of<ArrayStruct>(),
+                                    py::dtype::of<EnumStruct>(),
+                                    py::dtype::of<StructWithUglyNames>(),
+                                    py::dtype::of<ComplexStruct>()}) {
             l.append(py::str(d));
+        }
         return l;
     });
     m.def("test_dtype_ctors", &test_dtype_ctors);
     m.def("test_dtype_kind", [dtype_names]() {
         py::list list;
-        for (auto& dt_name : dtype_names)
+        for (const auto &dt_name : dtype_names) {
             list.append(py::dtype(dt_name).kind());
+        }
         return list;
     });
     m.def("test_dtype_char_", [dtype_names]() {
         py::list list;
-        for (auto& dt_name : dtype_names)
+        for (const auto &dt_name : dtype_names) {
             list.append(py::dtype(dt_name).char_());
+        }
+        return list;
+    });
+    m.def("test_dtype_num", [dtype_names]() {
+        py::list list;
+        for (const auto &dt_name : dtype_names) {
+            list.append(py::dtype(dt_name).num());
+        }
+        return list;
+    });
+    m.def("test_dtype_byteorder", [dtype_names]() {
+        py::list list;
+        for (const auto &dt_name : dtype_names) {
+            list.append(py::dtype(dt_name).byteorder());
+        }
+        return list;
+    });
+    m.def("test_dtype_alignment", [dtype_names]() {
+        py::list list;
+        for (const auto &dt_name : dtype_names) {
+            list.append(py::dtype(dt_name).alignment());
+        }
+        return list;
+    });
+    m.def("test_dtype_flags", [dtype_names]() {
+        py::list list;
+        for (const auto &dt_name : dtype_names) {
+            list.append(py::dtype(dt_name).flags());
+        }
         return list;
     });
     m.def("test_dtype_methods", []() {
         py::list list;
         auto dt1 = py::dtype::of<int32_t>();
         auto dt2 = py::dtype::of<SimpleStruct>();
-        list.append(dt1); list.append(dt2);
-        list.append(py::bool_(dt1.has_fields())); list.append(py::bool_(dt2.has_fields()));
-        list.append(py::int_(dt1.itemsize())); list.append(py::int_(dt2.itemsize()));
+        list.append(dt1);
+        list.append(dt2);
+        list.append(py::bool_(dt1.has_fields()));
+        list.append(py::bool_(dt2.has_fields()));
+        list.append(py::int_(dt1.itemsize()));
+        list.append(py::int_(dt2.itemsize()));
         return list;
     });
     struct TrailingPaddingStruct {
@@ -422,17 +493,24 @@ TEST_SUBMODULE(numpy_dtypes, m) {
         py::array_t<StringStruct, 0> arr = mkarray_via_buffer<StringStruct>(non_empty ? 4 : 0);
         if (non_empty) {
             auto req = arr.request();
-            auto ptr = static_cast<StringStruct*>(req.ptr);
-            for (py::ssize_t i = 0; i < req.size * req.itemsize; i++)
-                static_cast<char*>(req.ptr)[i] = 0;
-            ptr[1].a[0] = 'a'; ptr[1].b[0] = 'a';
-            ptr[2].a[0] = 'a'; ptr[2].b[0] = 'a';
-            ptr[3].a[0] = 'a'; ptr[3].b[0] = 'a';
+            auto *ptr = static_cast<StringStruct *>(req.ptr);
+            for (py::ssize_t i = 0; i < req.size * req.itemsize; i++) {
+                static_cast<char *>(req.ptr)[i] = 0;
+            }
+            ptr[1].a[0] = 'a';
+            ptr[1].b[0] = 'a';
+            ptr[2].a[0] = 'a';
+            ptr[2].b[0] = 'a';
+            ptr[3].a[0] = 'a';
+            ptr[3].b[0] = 'a';
 
-            ptr[2].a[1] = 'b'; ptr[2].b[1] = 'b';
-            ptr[3].a[1] = 'b'; ptr[3].b[1] = 'b';
+            ptr[2].a[1] = 'b';
+            ptr[2].b[1] = 'b';
+            ptr[3].a[1] = 'b';
+            ptr[3].b[1] = 'b';
 
-            ptr[3].a[2] = 'c'; ptr[3].b[2] = 'c';
+            ptr[3].a[2] = 'c';
+            ptr[3].b[2] = 'c';
         }
         return arr;
     });
@@ -441,18 +519,24 @@ TEST_SUBMODULE(numpy_dtypes, m) {
     // test_array_array
     m.def("create_array_array", [](size_t n) {
         py::array_t<ArrayStruct, 0> arr = mkarray_via_buffer<ArrayStruct>(n);
-        auto ptr = (ArrayStruct *) arr.mutable_data();
+        auto *ptr = (ArrayStruct *) arr.mutable_data();
         for (size_t i = 0; i < n; i++) {
-            for (size_t j = 0; j < 3; j++)
-                for (size_t k = 0; k < 4; k++)
+            for (size_t j = 0; j < 3; j++) {
+                for (size_t k = 0; k < 4; k++) {
                     ptr[i].a[j][k] = char('A' + (i * 100 + j * 10 + k) % 26);
-            for (size_t j = 0; j < 2; j++)
+                }
+            }
+            for (size_t j = 0; j < 2; j++) {
                 ptr[i].b[j] = int32_t(i * 1000 + j);
-            for (size_t j = 0; j < 3; j++)
+            }
+            for (size_t j = 0; j < 3; j++) {
                 ptr[i].c[j] = uint8_t(i * 10 + j);
-            for (size_t j = 0; j < 4; j++)
-                for (size_t k = 0; k < 2; k++)
+            }
+            for (size_t j = 0; j < 4; j++) {
+                for (size_t k = 0; k < 2; k++) {
                     ptr[i].d[j][k] = float(i) * 100.0f + float(j) * 10.0f + float(k);
+                }
+            }
         }
         return arr;
     });
@@ -461,7 +545,7 @@ TEST_SUBMODULE(numpy_dtypes, m) {
     // test_enum_array
     m.def("create_enum_array", [](size_t n) {
         py::array_t<EnumStruct, 0> arr = mkarray_via_buffer<EnumStruct>(n);
-        auto ptr = (EnumStruct *) arr.mutable_data();
+        auto *ptr = (EnumStruct *) arr.mutable_data();
         for (size_t i = 0; i < n; i++) {
             ptr[i].e1 = static_cast<E1>(-1 + ((int) i % 2) * 2);
             ptr[i].e2 = static_cast<E2>(1 + (i % 2));
@@ -473,7 +557,7 @@ TEST_SUBMODULE(numpy_dtypes, m) {
     // test_complex_array
     m.def("create_complex_array", [](size_t n) {
         py::array_t<ComplexStruct, 0> arr = mkarray_via_buffer<ComplexStruct>(n);
-        auto ptr = (ComplexStruct *) arr.mutable_data();
+        auto *ptr = (ComplexStruct *) arr.mutable_data();
         for (size_t i = 0; i < n; i++) {
             ptr[i].cflt.real(float(i));
             ptr[i].cflt.imag(float(i) + 0.25f);
@@ -496,14 +580,19 @@ TEST_SUBMODULE(numpy_dtypes, m) {
     PYBIND11_NUMPY_DTYPE(CompareStruct, x, y, z);
     m.def("compare_buffer_info", []() {
         py::list list;
-        list.append(py::bool_(py::detail::compare_buffer_info<float>::compare(py::buffer_info(nullptr, sizeof(float), "f", 1))));
-        list.append(py::bool_(py::detail::compare_buffer_info<unsigned>::compare(py::buffer_info(nullptr, sizeof(int), "I", 1))));
-        list.append(py::bool_(py::detail::compare_buffer_info<long>::compare(py::buffer_info(nullptr, sizeof(long), "l", 1))));
-        list.append(py::bool_(py::detail::compare_buffer_info<long>::compare(py::buffer_info(nullptr, sizeof(long), sizeof(long) == sizeof(int) ? "i" : "q", 1))));
-        list.append(py::bool_(py::detail::compare_buffer_info<CompareStruct>::compare(py::buffer_info(nullptr, sizeof(CompareStruct), "T{?:x:3xI:y:f:z:}", 1))));
+        list.append(py::bool_(py::detail::compare_buffer_info<float>::compare(
+            py::buffer_info(nullptr, sizeof(float), "f", 1))));
+        list.append(py::bool_(py::detail::compare_buffer_info<unsigned>::compare(
+            py::buffer_info(nullptr, sizeof(int), "I", 1))));
+        list.append(py::bool_(py::detail::compare_buffer_info<long>::compare(
+            py::buffer_info(nullptr, sizeof(long), "l", 1))));
+        list.append(py::bool_(py::detail::compare_buffer_info<long>::compare(
+            py::buffer_info(nullptr, sizeof(long), sizeof(long) == sizeof(int) ? "i" : "q", 1))));
+        list.append(py::bool_(py::detail::compare_buffer_info<CompareStruct>::compare(
+            py::buffer_info(nullptr, sizeof(CompareStruct), "T{?:x:3xI:y:f:z:}", 1))));
         return list;
     });
-    m.def("buffer_to_dtype", [](py::buffer& buf) { return py::dtype(buf.request()); });
+    m.def("buffer_to_dtype", [](py::buffer &buf) { return py::dtype(buf.request()); });
 
     // test_scalar_conversion
     auto f_simple = [](SimpleStruct s) { return s.uint_ * 10; };
@@ -517,8 +606,9 @@ TEST_SUBMODULE(numpy_dtypes, m) {
     m.def("f_simple_pass_thru_vectorized", py::vectorize(f_simple_pass_thru));
 
     // test_register_dtype
-    m.def("register_dtype", []() { PYBIND11_NUMPY_DTYPE(SimpleStruct, bool_, uint_, float_, ldbl_); });
+    m.def("register_dtype",
+          []() { PYBIND11_NUMPY_DTYPE(SimpleStruct, bool_, uint_, float_, ldbl_); });
 
     // test_str_leak
-    m.def("dtype_wrapper", [](py::object d) { return py::dtype::from_args(std::move(d)); });
+    m.def("dtype_wrapper", [](const py::object &d) { return py::dtype::from_args(d); });
 }
diff --git a/ext/pybind11/tests/test_numpy_dtypes.py b/ext/pybind11/tests/test_numpy_dtypes.py
index 06e578329e..fcfd587b18 100644
--- a/ext/pybind11/tests/test_numpy_dtypes.py
+++ b/ext/pybind11/tests/test_numpy_dtypes.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 import re
 
 import pytest
@@ -15,7 +14,7 @@ def simple_dtype():
     return np.dtype(
         {
             "names": ["bool_", "uint_", "float_", "ldbl_"],
-            "formats": ["?", "u4", "f4", "f{}".format(ld.itemsize)],
+            "formats": ["?", "u4", "f4", f"f{ld.itemsize}"],
             "offsets": [0, 4, 8, (16 if ld.alignment > 4 else 12)],
         }
     )
@@ -32,8 +31,8 @@ def dt_fmt():
     e = "<" if byteorder == "little" else ">"
     return (
         "{{'names':['bool_','uint_','float_','ldbl_'],"
-        " 'formats':['?','" + e + "u4','" + e + "f4','" + e + "f{}'],"
-        " 'offsets':[0,4,8,{}], 'itemsize':{}}}"
+        "'formats':['?','" + e + "u4','" + e + "f4','" + e + "f{}'],"
+        "'offsets':[0,4,8,{}],'itemsize':{}}}"
     )
 
 
@@ -46,7 +45,7 @@ def simple_dtype_fmt():
 def packed_dtype_fmt():
     from sys import byteorder
 
-    return "[('bool_', '?'), ('uint_', '{e}u4'), ('float_', '{e}f4'), ('ldbl_', '{e}f{}')]".format(
+    return "[('bool_','?'),('uint_','{e}u4'),('float_','{e}f4'),('ldbl_','{e}f{}')]".format(
         np.dtype("longdouble").itemsize, e="<" if byteorder == "little" else ">"
     )
 
@@ -77,7 +76,7 @@ def partial_nested_fmt():
     partial_size = partial_ld_off + ld.itemsize
     partial_end_padding = partial_size % np.dtype("uint64").alignment
     partial_nested_size = partial_nested_off * 2 + partial_size + partial_end_padding
-    return "{{'names':['a'], 'formats':[{}], 'offsets':[{}], 'itemsize':{}}}".format(
+    return "{{'names':['a'],'formats':[{}],'offsets':[{}],'itemsize':{}}}".format(
         partial_dtype_fmt(), partial_nested_off, partial_nested_size
     )
 
@@ -123,25 +122,25 @@ def test_dtype(simple_dtype):
 
     e = "<" if byteorder == "little" else ">"
 
-    assert m.print_dtypes() == [
+    assert [x.replace(" ", "") for x in m.print_dtypes()] == [
         simple_dtype_fmt(),
         packed_dtype_fmt(),
-        "[('a', {}), ('b', {})]".format(simple_dtype_fmt(), packed_dtype_fmt()),
+        f"[('a',{simple_dtype_fmt()}),('b',{packed_dtype_fmt()})]",
         partial_dtype_fmt(),
         partial_nested_fmt(),
-        "[('a', 'S3'), ('b', 'S3')]",
+        "[('a','S3'),('b','S3')]",
         (
-            "{{'names':['a','b','c','d'], "
-            + "'formats':[('S4', (3,)),('"
+            "{{'names':['a','b','c','d'],"
+            + "'formats':[('S4',(3,)),('"
             + e
-            + "i4', (2,)),('u1', (3,)),('"
+            + "i4',(2,)),('u1',(3,)),('"
             + e
-            + "f4', (4, 2))], "
-            + "'offsets':[0,12,20,24], 'itemsize':56}}"
+            + "f4',(4,2))],"
+            + "'offsets':[0,12,20,24],'itemsize':56}}"
         ).format(e=e),
-        "[('e1', '" + e + "i8'), ('e2', 'u1')]",
-        "[('x', 'i1'), ('y', '" + e + "u8')]",
-        "[('cflt', '" + e + "c8'), ('cdbl', '" + e + "c16')]",
+        "[('e1','" + e + "i8'),('e2','u1')]",
+        "[('x','i1'),('y','" + e + "u8')]",
+        "[('cflt','" + e + "c8'),('cdbl','" + e + "c16')]",
     ]
 
     d1 = np.dtype(
@@ -161,6 +160,7 @@ def test_dtype(simple_dtype):
         d1,
         np.dtype("uint32"),
         d2,
+        np.dtype("d"),
     ]
 
     assert m.test_dtype_methods() == [
@@ -176,8 +176,13 @@ def test_dtype(simple_dtype):
         np.zeros(1, m.trailing_padding_dtype())
     )
 
+    expected_chars = "bhilqBHILQefdgFDG?MmO"
     assert m.test_dtype_kind() == list("iiiiiuuuuuffffcccbMmO")
-    assert m.test_dtype_char_() == list("bhilqBHILQefdgFDG?MmO")
+    assert m.test_dtype_char_() == list(expected_chars)
+    assert m.test_dtype_num() == [np.dtype(ch).num for ch in expected_chars]
+    assert m.test_dtype_byteorder() == [np.dtype(ch).byteorder for ch in expected_chars]
+    assert m.test_dtype_alignment() == [np.dtype(ch).alignment for ch in expected_chars]
+    assert m.test_dtype_flags() == [chr(np.dtype(ch).flags) for ch in expected_chars]
 
 
 def test_recarray(simple_dtype, packed_dtype):
@@ -238,7 +243,7 @@ def test_recarray(simple_dtype, packed_dtype):
     ]
 
     arr = m.create_rec_partial(3)
-    assert str(arr.dtype) == partial_dtype_fmt()
+    assert str(arr.dtype).replace(" ", "") == partial_dtype_fmt()
     partial_dtype = arr.dtype
     assert "" not in arr.dtype.fields
     assert partial_dtype.itemsize > simple_dtype.itemsize
@@ -246,7 +251,7 @@ def test_recarray(simple_dtype, packed_dtype):
     assert_equal(arr, elements, packed_dtype)
 
     arr = m.create_rec_partial_nested(3)
-    assert str(arr.dtype) == partial_nested_fmt()
+    assert str(arr.dtype).replace(" ", "") == partial_nested_fmt()
     assert "" not in arr.dtype.fields
     assert "" not in arr.dtype.fields["a"][0].fields
     assert arr.dtype.itemsize > partial_dtype.itemsize
@@ -285,12 +290,12 @@ def test_array_array():
     e = "<" if byteorder == "little" else ">"
 
     arr = m.create_array_array(3)
-    assert str(arr.dtype) == (
-        "{{'names':['a','b','c','d'], "
-        + "'formats':[('S4', (3,)),('"
+    assert str(arr.dtype).replace(" ", "") == (
+        "{{'names':['a','b','c','d'],"
+        + "'formats':[('S4',(3,)),('"
         + e
-        + "i4', (2,)),('u1', (3,)),('{e}f4', (4, 2))], "
-        + "'offsets':[0,12,20,24], 'itemsize':56}}"
+        + "i4',(2,)),('u1',(3,)),('{e}f4',(4,2))],"
+        + "'offsets':[0,12,20,24],'itemsize':56}}"
     ).format(e=e)
     assert m.print_array_array(arr) == [
         "a={{A,B,C,D},{K,L,M,N},{U,V,W,X}},b={0,1},"
diff --git a/ext/pybind11/tests/test_numpy_vectorize.cpp b/ext/pybind11/tests/test_numpy_vectorize.cpp
index eb5281fb1d..dcc4c6ac25 100644
--- a/ext/pybind11/tests/test_numpy_vectorize.cpp
+++ b/ext/pybind11/tests/test_numpy_vectorize.cpp
@@ -8,38 +8,43 @@
     BSD-style license that can be found in the LICENSE file.
 */
 
-#include "pybind11_tests.h"
 #include <pybind11/numpy.h>
 
+#include "pybind11_tests.h"
+
 #include <utility>
 
 double my_func(int x, float y, double z) {
     py::print("my_func(x:int={}, y:float={:.0f}, z:float={:.0f})"_s.format(x, y, z));
-    return (float) x*y*z;
+    return (float) x * y * z;
 }
 
 TEST_SUBMODULE(numpy_vectorize, m) {
-    try { py::module_::import("numpy"); }
-    catch (...) { return; }
+    try {
+        py::module_::import("numpy");
+    } catch (const py::error_already_set &) {
+        return;
+    }
 
     // test_vectorize, test_docs, test_array_collapse
     // Vectorize all arguments of a function (though non-vector arguments are also allowed)
     m.def("vectorized_func", py::vectorize(my_func));
 
-    // Vectorize a lambda function with a capture object (e.g. to exclude some arguments from the vectorization)
+    // Vectorize a lambda function with a capture object (e.g. to exclude some arguments from the
+    // vectorization)
     m.def("vectorized_func2", [](py::array_t<int> x, py::array_t<float> y, float z) {
         return py::vectorize([z](int x, float y) { return my_func(x, y, z); })(std::move(x),
                                                                                std::move(y));
     });
 
     // Vectorize a complex-valued function
-    m.def("vectorized_func3", py::vectorize(
-        [](std::complex<double> c) { return c * std::complex<double>(2.f); }
-    ));
+    m.def("vectorized_func3",
+          py::vectorize([](std::complex<double> c) { return c * std::complex<double>(2.f); }));
 
     // test_type_selection
     // NumPy function which only accepts specific data types
-    // A lot of these no lints could be replaced with const refs, and probably should at some point.
+    // A lot of these no lints could be replaced with const refs, and probably should at some
+    // point.
     m.def("selective_func",
           [](const py::array_t<int, py::array::c_style> &) { return "Int branch taken."; });
     m.def("selective_func",
@@ -49,8 +54,8 @@ TEST_SUBMODULE(numpy_vectorize, m) {
     });
 
     // test_passthrough_arguments
-    // Passthrough test: references and non-pod types should be automatically passed through (in the
-    // function definition below, only `b`, `d`, and `g` are vectorized):
+    // Passthrough test: references and non-pod types should be automatically passed through (in
+    // the function definition below, only `b`, `d`, and `g` are vectorized):
     struct NonPODClass {
         explicit NonPODClass(int v) : value{v} {}
         int value;
@@ -76,8 +81,7 @@ TEST_SUBMODULE(numpy_vectorize, m) {
         int value = 0;
     };
     py::class_<VectorizeTestClass> vtc(m, "VectorizeTestClass");
-    vtc .def(py::init<int>())
-        .def_readwrite("value", &VectorizeTestClass::value);
+    vtc.def(py::init<int>()).def_readwrite("value", &VectorizeTestClass::value);
 
     // Automatic vectorizing of methods
     vtc.def("method", py::vectorize(&VectorizeTestClass::method));
@@ -99,5 +103,5 @@ TEST_SUBMODULE(numpy_vectorize, m) {
               return py::detail::broadcast(buffers, ndim, shape);
           });
 
-    m.def("add_to", py::vectorize([](NonPODClass& x, int a) { x.value += a; }));
+    m.def("add_to", py::vectorize([](NonPODClass &x, int a) { x.value += a; }));
 }
diff --git a/ext/pybind11/tests/test_numpy_vectorize.py b/ext/pybind11/tests/test_numpy_vectorize.py
index de5c9a607d..7e8c015c46 100644
--- a/ext/pybind11/tests/test_numpy_vectorize.py
+++ b/ext/pybind11/tests/test_numpy_vectorize.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 import pytest
 
 from pybind11_tests import numpy_vectorize as m
diff --git a/ext/pybind11/tests/test_opaque_types.cpp b/ext/pybind11/tests/test_opaque_types.cpp
index 804de6d4ff..0386dba03d 100644
--- a/ext/pybind11/tests/test_opaque_types.cpp
+++ b/ext/pybind11/tests/test_opaque_types.cpp
@@ -7,8 +7,10 @@
     BSD-style license that can be found in the LICENSE file.
 */
 
-#include "pybind11_tests.h"
 #include <pybind11/stl.h>
+
+#include "pybind11_tests.h"
+
 #include <vector>
 
 // IMPORTANT: Disable internal pybind11 translation mechanisms for STL data structures
@@ -26,12 +28,13 @@ TEST_SUBMODULE(opaque_types, m) {
         .def(py::init<>())
         .def("pop_back", &StringList::pop_back)
         /* There are multiple versions of push_back(), etc. Select the right ones. */
-        .def("push_back", (void (StringList::*)(const std::string &)) &StringList::push_back)
-        .def("back", (std::string &(StringList::*)()) &StringList::back)
+        .def("push_back", (void(StringList::*)(const std::string &)) & StringList::push_back)
+        .def("back", (std::string & (StringList::*) ()) & StringList::back)
         .def("__len__", [](const StringList &v) { return v.size(); })
-        .def("__iter__", [](StringList &v) {
-           return py::make_iterator(v.begin(), v.end());
-        }, py::keep_alive<0, 1>());
+        .def(
+            "__iter__",
+            [](StringList &v) { return py::make_iterator(v.begin(), v.end()); },
+            py::keep_alive<0, 1>());
 
     class ClassWithSTLVecProperty {
     public:
@@ -45,8 +48,9 @@ TEST_SUBMODULE(opaque_types, m) {
         std::string ret = "Opaque list: [";
         bool first = true;
         for (const auto &entry : l) {
-            if (!first)
+            if (!first) {
                 ret += ", ";
+            }
             ret += entry;
             first = false;
         }
diff --git a/ext/pybind11/tests/test_opaque_types.py b/ext/pybind11/tests/test_opaque_types.py
index 5495cb6b4a..5d4f2a1bf4 100644
--- a/ext/pybind11/tests/test_opaque_types.py
+++ b/ext/pybind11/tests/test_opaque_types.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 import pytest
 
 from pybind11_tests import ConstructorStats, UserType
@@ -13,7 +12,7 @@ def test_string_list():
     assert lst.back() == "Element 2"
 
     for i, k in enumerate(lst, start=1):
-        assert k == "Element {}".format(i)
+        assert k == f"Element {i}"
     lst.pop_back()
     assert m.print_opaque_list(lst) == "Opaque list: [Element 1]"
 
@@ -40,7 +39,7 @@ def test_pointers(msg):
             1. (arg0: capsule) -> int
 
         Invoked with: [1, 2, 3]
-    """  # noqa: E501 line too long
+    """
     )
 
     assert m.return_null_str() is None
diff --git a/ext/pybind11/tests/test_operator_overloading.cpp b/ext/pybind11/tests/test_operator_overloading.cpp
index 0b6c496cf2..112a363b4b 100644
--- a/ext/pybind11/tests/test_operator_overloading.cpp
+++ b/ext/pybind11/tests/test_operator_overloading.cpp
@@ -7,9 +7,12 @@
     BSD-style license that can be found in the LICENSE file.
 */
 
-#include "pybind11_tests.h"
-#include "constructor_stats.h"
 #include <pybind11/operators.h>
+#include <pybind11/stl.h>
+
+#include "constructor_stats.h"
+#include "pybind11_tests.h"
+
 #include <functional>
 
 class Vector2 {
@@ -20,17 +23,24 @@ public:
         print_move_created(this);
         v.x = v.y = 0;
     }
-    Vector2 &operator=(const Vector2 &v) { x = v.x; y = v.y; print_copy_assigned(this); return *this; }
+    Vector2 &operator=(const Vector2 &v) {
+        x = v.x;
+        y = v.y;
+        print_copy_assigned(this);
+        return *this;
+    }
     Vector2 &operator=(Vector2 &&v) noexcept {
-        x   = v.x;
-        y   = v.y;
+        x = v.x;
+        y = v.y;
         v.x = v.y = 0;
         print_move_assigned(this);
         return *this;
     }
     ~Vector2() { print_destroyed(this); }
 
-    std::string toString() const { return "[" + std::to_string(x) + ", " + std::to_string(y) + "]"; }
+    std::string toString() const {
+        return "[" + std::to_string(x) + ", " + std::to_string(y) + "]";
+    }
 
     Vector2 operator-() const { return Vector2(-x, -y); }
     Vector2 operator+(const Vector2 &v) const { return Vector2(x + v.x, y + v.y); }
@@ -41,71 +51,100 @@ public:
     Vector2 operator/(float value) const { return Vector2(x / value, y / value); }
     Vector2 operator*(const Vector2 &v) const { return Vector2(x * v.x, y * v.y); }
     Vector2 operator/(const Vector2 &v) const { return Vector2(x / v.x, y / v.y); }
-    Vector2& operator+=(const Vector2 &v) { x += v.x; y += v.y; return *this; }
-    Vector2& operator-=(const Vector2 &v) { x -= v.x; y -= v.y; return *this; }
-    Vector2& operator*=(float v) { x *= v; y *= v; return *this; }
-    Vector2& operator/=(float v) { x /= v; y /= v; return *this; }
-    Vector2& operator*=(const Vector2 &v) { x *= v.x; y *= v.y; return *this; }
-    Vector2& operator/=(const Vector2 &v) { x /= v.x; y /= v.y; return *this; }
+    Vector2 &operator+=(const Vector2 &v) {
+        x += v.x;
+        y += v.y;
+        return *this;
+    }
+    Vector2 &operator-=(const Vector2 &v) {
+        x -= v.x;
+        y -= v.y;
+        return *this;
+    }
+    Vector2 &operator*=(float v) {
+        x *= v;
+        y *= v;
+        return *this;
+    }
+    Vector2 &operator/=(float v) {
+        x /= v;
+        y /= v;
+        return *this;
+    }
+    Vector2 &operator*=(const Vector2 &v) {
+        x *= v.x;
+        y *= v.y;
+        return *this;
+    }
+    Vector2 &operator/=(const Vector2 &v) {
+        x /= v.x;
+        y /= v.y;
+        return *this;
+    }
 
     friend Vector2 operator+(float f, const Vector2 &v) { return Vector2(f + v.x, f + v.y); }
     friend Vector2 operator-(float f, const Vector2 &v) { return Vector2(f - v.x, f - v.y); }
     friend Vector2 operator*(float f, const Vector2 &v) { return Vector2(f * v.x, f * v.y); }
     friend Vector2 operator/(float f, const Vector2 &v) { return Vector2(f / v.x, f / v.y); }
 
-    bool operator==(const Vector2 &v) const {
-        return x == v.x && y == v.y;
-    }
-    bool operator!=(const Vector2 &v) const {
-        return x != v.x || y != v.y;
-    }
+    bool operator==(const Vector2 &v) const { return x == v.x && y == v.y; }
+    bool operator!=(const Vector2 &v) const { return x != v.x || y != v.y; }
+
 private:
     float x, y;
 };
 
-class C1 { };
-class C2 { };
+class C1 {};
+class C2 {};
 
 int operator+(const C1 &, const C1 &) { return 11; }
 int operator+(const C2 &, const C2 &) { return 22; }
 int operator+(const C2 &, const C1 &) { return 21; }
 int operator+(const C1 &, const C2 &) { return 12; }
 
+struct HashMe {
+    std::string member;
+};
+
+bool operator==(const HashMe &lhs, const HashMe &rhs) { return lhs.member == rhs.member; }
+
 // Note: Specializing explicit within `namespace std { ... }` is done due to a
 // bug in GCC<7. If you are supporting compilers later than this, consider
 // specializing `using template<> struct std::hash<...>` in the global
 // namespace instead, per this recommendation:
 // https://en.cppreference.com/w/cpp/language/extending_std#Adding_template_specializations
 namespace std {
-    template<>
-    struct hash<Vector2> {
-        // Not a good hash function, but easy to test
-        size_t operator()(const Vector2 &) { return 4; }
-    };
+template <>
+struct hash<Vector2> {
+    // Not a good hash function, but easy to test
+    size_t operator()(const Vector2 &) { return 4; }
+};
+
+// HashMe has a hash function in C++ but no `__hash__` for Python.
+template <>
+struct hash<HashMe> {
+    std::size_t operator()(const HashMe &selector) const {
+        return std::hash<std::string>()(selector.member);
+    }
+};
 } // namespace std
 
 // Not a good abs function, but easy to test.
-std::string abs(const Vector2&) {
-    return "abs(Vector2)";
-}
+std::string abs(const Vector2 &) { return "abs(Vector2)"; }
 
-// MSVC & Intel warns about unknown pragmas, and warnings are errors.
-#if !defined(_MSC_VER) && !defined(__INTEL_COMPILER)
-  #pragma GCC diagnostic push
-  // clang 7.0.0 and Apple LLVM 10.0.1 introduce `-Wself-assign-overloaded` to
-  // `-Wall`, which is used here for overloading (e.g. `py::self += py::self `).
-  // Here, we suppress the warning using `#pragma diagnostic`.
-  // Taken from: https://github.com/RobotLocomotion/drake/commit/aaf84b46
-  // TODO(eric): This could be resolved using a function / functor (e.g. `py::self()`).
-  #if defined(__APPLE__) && defined(__clang__)
-    #if (__clang_major__ >= 10)
-      #pragma GCC diagnostic ignored "-Wself-assign-overloaded"
-    #endif
-  #elif defined(__clang__)
-    #if (__clang_major__ >= 7)
-      #pragma GCC diagnostic ignored "-Wself-assign-overloaded"
-    #endif
-  #endif
+// clang 7.0.0 and Apple LLVM 10.0.1 introduce `-Wself-assign-overloaded` to
+// `-Wall`, which is used here for overloading (e.g. `py::self += py::self `).
+// Here, we suppress the warning
+// Taken from: https://github.com/RobotLocomotion/drake/commit/aaf84b46
+// TODO(eric): This could be resolved using a function / functor (e.g. `py::self()`).
+#if defined(__APPLE__) && defined(__clang__)
+#    if (__clang_major__ >= 10)
+PYBIND11_WARNING_DISABLE_CLANG("-Wself-assign-overloaded")
+#    endif
+#elif defined(__clang__)
+#    if (__clang_major__ >= 7)
+PYBIND11_WARNING_DISABLE_CLANG("-Wself-assign-overloaded")
+#    endif
 #endif
 
 TEST_SUBMODULE(operators, m) {
@@ -139,46 +178,52 @@ TEST_SUBMODULE(operators, m) {
         .def(py::hash(py::self))
         // N.B. See warning about usage of `py::detail::abs(py::self)` in
         // `operators.h`.
-        .def("__abs__", [](const Vector2& v) { return abs(v); })
-        ;
+        .def("__abs__", [](const Vector2 &v) { return abs(v); });
 
     m.attr("Vector") = m.attr("Vector2");
 
     // test_operators_notimplemented
     // #393: need to return NotSupported to ensure correct arithmetic operator behavior
-    py::class_<C1>(m, "C1")
-        .def(py::init<>())
-        .def(py::self + py::self);
+    py::class_<C1>(m, "C1").def(py::init<>()).def(py::self + py::self);
 
     py::class_<C2>(m, "C2")
         .def(py::init<>())
         .def(py::self + py::self)
-        .def("__add__", [](const C2& c2, const C1& c1) { return c2 + c1; })
-        .def("__radd__", [](const C2& c2, const C1& c1) { return c1 + c2; });
+        .def("__add__", [](const C2 &c2, const C1 &c1) { return c2 + c1; })
+        .def("__radd__", [](const C2 &c2, const C1 &c1) { return c1 + c2; });
 
     // test_nested
     // #328: first member in a class can't be used in operators
-    struct NestABase { int value = -2; };
+    struct NestABase {
+        int value = -2;
+    };
     py::class_<NestABase>(m, "NestABase")
         .def(py::init<>())
         .def_readwrite("value", &NestABase::value);
 
     struct NestA : NestABase {
         int value = 3;
-        NestA& operator+=(int i) { value += i; return *this; }
+        NestA &operator+=(int i) {
+            value += i;
+            return *this;
+        }
     };
     py::class_<NestA>(m, "NestA")
         .def(py::init<>())
         .def(py::self += int())
-        .def("as_base", [](NestA &a) -> NestABase& {
-            return (NestABase&) a;
-        }, py::return_value_policy::reference_internal);
+        .def(
+            "as_base",
+            [](NestA &a) -> NestABase & { return (NestABase &) a; },
+            py::return_value_policy::reference_internal);
     m.def("get_NestA", [](const NestA &a) { return a.value; });
 
     struct NestB {
         NestA a;
         int value = 4;
-        NestB& operator-=(int i) { value -= i; return *this; }
+        NestB &operator-=(int i) {
+            value -= i;
+            return *this;
+        }
     };
     py::class_<NestB>(m, "NestB")
         .def(py::init<>())
@@ -189,7 +234,10 @@ TEST_SUBMODULE(operators, m) {
     struct NestC {
         NestB b;
         int value = 5;
-        NestC& operator*=(int i) { value *= i; return *this; }
+        NestC &operator*=(int i) {
+            value *= i;
+            return *this;
+        }
     };
     py::class_<NestC>(m, "NestC")
         .def(py::init<>())
@@ -197,16 +245,15 @@ TEST_SUBMODULE(operators, m) {
         .def_readwrite("b", &NestC::b);
     m.def("get_NestC", [](const NestC &c) { return c.value; });
 
-
     // test_overriding_eq_reset_hash
     // #2191 Overriding __eq__ should set __hash__ to None
     struct Comparable {
         int value;
-        bool operator==(const Comparable& rhs) const {return value == rhs.value;}
+        bool operator==(const Comparable &rhs) const { return value == rhs.value; }
     };
 
     struct Hashable : Comparable {
-        explicit Hashable(int value): Comparable{value}{};
+        explicit Hashable(int value) : Comparable{value} {};
         size_t hash() const { return static_cast<size_t>(value); }
     };
 
@@ -214,9 +261,7 @@ TEST_SUBMODULE(operators, m) {
         using Hashable::Hashable;
     };
 
-    py::class_<Comparable>(m, "Comparable")
-        .def(py::init<int>())
-        .def(py::self == py::self);
+    py::class_<Comparable>(m, "Comparable").def(py::init<int>()).def(py::self == py::self);
 
     py::class_<Hashable>(m, "Hashable")
         .def(py::init<int>())
@@ -228,8 +273,9 @@ TEST_SUBMODULE(operators, m) {
         .def("__hash__", &Hashable::hash)
         .def(py::init<int>())
         .def(py::self == py::self);
-}
 
-#if !defined(_MSC_VER) && !defined(__INTEL_COMPILER)
-  #pragma GCC diagnostic pop
-#endif
+    // define __eq__ but not __hash__
+    py::class_<HashMe>(m, "HashMe").def(py::self == py::self);
+
+    m.def("get_unhashable_HashMe_set", []() { return std::unordered_set<HashMe>{{"one"}}; });
+}
diff --git a/ext/pybind11/tests/test_operator_overloading.py b/ext/pybind11/tests/test_operator_overloading.py
index b7137d1592..b228da3cc3 100644
--- a/ext/pybind11/tests/test_operator_overloading.py
+++ b/ext/pybind11/tests/test_operator_overloading.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 import pytest
 
 from pybind11_tests import ConstructorStats
@@ -135,8 +134,9 @@ def test_overriding_eq_reset_hash():
     assert m.Comparable(15) is not m.Comparable(15)
     assert m.Comparable(15) == m.Comparable(15)
 
-    with pytest.raises(TypeError):
-        hash(m.Comparable(15))  # TypeError: unhashable type: 'm.Comparable'
+    with pytest.raises(TypeError) as excinfo:
+        hash(m.Comparable(15))
+    assert str(excinfo.value).startswith("unhashable type:")
 
     for hashable in (m.Hashable, m.Hashable2):
         assert hashable(15) is not hashable(15)
@@ -144,3 +144,9 @@ def test_overriding_eq_reset_hash():
 
         assert hash(hashable(15)) == 15
         assert hash(hashable(15)) == hash(hashable(15))
+
+
+def test_return_set_of_unhashable():
+    with pytest.raises(TypeError) as excinfo:
+        m.get_unhashable_HashMe_set()
+    assert str(excinfo.value.__cause__).startswith("unhashable type:")
diff --git a/ext/pybind11/tests/test_pickling.cpp b/ext/pybind11/tests/test_pickling.cpp
index b77636dd1a..e154bc483c 100644
--- a/ext/pybind11/tests/test_pickling.cpp
+++ b/ext/pybind11/tests/test_pickling.cpp
@@ -1,4 +1,3 @@
-// clang-format off
 /*
     tests/test_pickling.cpp -- pickle support
 
@@ -11,8 +10,6 @@
 
 #include "pybind11_tests.h"
 
-// clang-format on
-
 #include <memory>
 #include <stdexcept>
 #include <utility>
@@ -20,11 +17,11 @@
 namespace exercise_trampoline {
 
 struct SimpleBase {
-    int num               = 0;
+    int num = 0;
     virtual ~SimpleBase() = default;
 
     // For compatibility with old clang versions:
-    SimpleBase()                   = default;
+    SimpleBase() = default;
     SimpleBase(const SimpleBase &) = default;
 };
 
@@ -39,16 +36,18 @@ void wrap(py::module m) {
         .def(py::pickle(
             [](const py::object &self) {
                 py::dict d;
-                if (py::hasattr(self, "__dict__"))
+                if (py::hasattr(self, "__dict__")) {
                     d = self.attr("__dict__");
+                }
                 return py::make_tuple(self.attr("num"), d);
             },
             [](const py::tuple &t) {
-                if (t.size() != 2)
+                if (t.size() != 2) {
                     throw std::runtime_error("Invalid state!");
+                }
                 auto cpp_state = std::unique_ptr<SimpleBase>(new SimpleBaseTrampoline);
                 cpp_state->num = t[0].cast<int>();
-                auto py_state  = t[1].cast<py::dict>();
+                auto py_state = t[1].cast<py::dict>();
                 return std::make_pair(std::move(cpp_state), py_state);
             }));
 
@@ -61,19 +60,20 @@ void wrap(py::module m) {
 
 } // namespace exercise_trampoline
 
-// clang-format off
-
 TEST_SUBMODULE(pickling, m) {
+    m.def("simple_callable", []() { return 20220426; });
+
     // test_roundtrip
     class Pickleable {
     public:
-        explicit Pickleable(const std::string &value) : m_value(value) { }
+        explicit Pickleable(const std::string &value) : m_value(value) {}
         const std::string &value() const { return m_value; }
 
         void setExtra1(int extra1) { m_extra1 = extra1; }
         void setExtra2(int extra2) { m_extra2 = extra2; }
         int extra1() const { return m_extra1; }
         int extra2() const { return m_extra2; }
+
     private:
         std::string m_value;
         int m_extra1 = 0;
@@ -86,8 +86,7 @@ TEST_SUBMODULE(pickling, m) {
     };
 
     py::class_<Pickleable> pyPickleable(m, "Pickleable");
-    pyPickleable
-        .def(py::init<std::string>())
+    pyPickleable.def(py::init<std::string>())
         .def("value", &Pickleable::value)
         .def("extra1", &Pickleable::extra1)
         .def("extra2", &Pickleable::extra2)
@@ -101,8 +100,9 @@ TEST_SUBMODULE(pickling, m) {
         });
     ignoreOldStyleInitWarnings([&pyPickleable]() {
         pyPickleable.def("__setstate__", [](Pickleable &p, const py::tuple &t) {
-            if (t.size() != 3)
+            if (t.size() != 3) {
                 throw std::runtime_error("Invalid state!");
+            }
             /* Invoke the constructor (need to use in-place version) */
             new (&p) Pickleable(t[0].cast<std::string>());
 
@@ -119,8 +119,9 @@ TEST_SUBMODULE(pickling, m) {
                 return py::make_tuple(p.value(), p.extra1(), p.extra2());
             },
             [](const py::tuple &t) {
-                if (t.size() != 3)
+                if (t.size() != 3) {
                     throw std::runtime_error("Invalid state!");
+                }
                 auto p = PickleableNew(t[0].cast<std::string>());
 
                 p.setExtra1(t[1].cast<int>());
@@ -132,7 +133,7 @@ TEST_SUBMODULE(pickling, m) {
     // test_roundtrip_with_dict
     class PickleableWithDict {
     public:
-        explicit PickleableWithDict(const std::string &value) : value(value) { }
+        explicit PickleableWithDict(const std::string &value) : value(value) {}
 
         std::string value;
         int extra;
@@ -143,7 +144,8 @@ TEST_SUBMODULE(pickling, m) {
         using PickleableWithDict::PickleableWithDict;
     };
 
-    py::class_<PickleableWithDict> pyPickleableWithDict(m, "PickleableWithDict", py::dynamic_attr());
+    py::class_<PickleableWithDict> pyPickleableWithDict(
+        m, "PickleableWithDict", py::dynamic_attr());
     pyPickleableWithDict.def(py::init<std::string>())
         .def_readwrite("value", &PickleableWithDict::value)
         .def_readwrite("extra", &PickleableWithDict::extra)
@@ -153,8 +155,9 @@ TEST_SUBMODULE(pickling, m) {
         });
     ignoreOldStyleInitWarnings([&pyPickleableWithDict]() {
         pyPickleableWithDict.def("__setstate__", [](const py::object &self, const py::tuple &t) {
-            if (t.size() != 3)
+            if (t.size() != 3) {
                 throw std::runtime_error("Invalid state!");
+            }
             /* Cast and construct */
             auto &p = self.cast<PickleableWithDict &>();
             new (&p) PickleableWithDict(t[0].cast<std::string>());
@@ -171,11 +174,13 @@ TEST_SUBMODULE(pickling, m) {
         .def(py::init<std::string>())
         .def(py::pickle(
             [](const py::object &self) {
-                return py::make_tuple(self.attr("value"), self.attr("extra"), self.attr("__dict__"));
+                return py::make_tuple(
+                    self.attr("value"), self.attr("extra"), self.attr("__dict__"));
             },
             [](const py::tuple &t) {
-                if (t.size() != 3)
+                if (t.size() != 3) {
                     throw std::runtime_error("Invalid state!");
+                }
 
                 auto cpp_state = PickleableWithDictNew(t[0].cast<std::string>());
                 cpp_state.extra = t[1].cast<int>();
diff --git a/ext/pybind11/tests/test_pickling.py b/ext/pybind11/tests/test_pickling.py
index 9f68f37dcf..12361a661e 100644
--- a/ext/pybind11/tests/test_pickling.py
+++ b/ext/pybind11/tests/test_pickling.py
@@ -1,13 +1,24 @@
-# -*- coding: utf-8 -*-
+import pickle
+import re
+
 import pytest
 
 import env
 from pybind11_tests import pickling as m
 
-try:
-    import cPickle as pickle  # Use cPickle on Python 2.7
-except ImportError:
-    import pickle
+
+def test_pickle_simple_callable():
+    assert m.simple_callable() == 20220426
+    if env.PYPY:
+        serialized = pickle.dumps(m.simple_callable)
+        deserialized = pickle.loads(serialized)
+        assert deserialized() == 20220426
+    else:
+        # To document broken behavior: currently it fails universally with
+        # all C Python versions.
+        with pytest.raises(TypeError) as excinfo:
+            pickle.dumps(m.simple_callable)
+        assert re.search("can.*t pickle .*PyCapsule.* object", str(excinfo.value))
 
 
 @pytest.mark.parametrize("cls_name", ["Pickleable", "PickleableNew"])
diff --git a/ext/pybind11/tests/test_pytypes.cpp b/ext/pybind11/tests/test_pytypes.cpp
index 9a1e918818..1028bb58e8 100644
--- a/ext/pybind11/tests/test_pytypes.cpp
+++ b/ext/pybind11/tests/test_pytypes.cpp
@@ -7,18 +7,117 @@
     BSD-style license that can be found in the LICENSE file.
 */
 
-#include <utility>
-
 #include "pybind11_tests.h"
 
+#include <utility>
+
+namespace external {
+namespace detail {
+bool check(PyObject *o) { return PyFloat_Check(o) != 0; }
+
+PyObject *conv(PyObject *o) {
+    PyObject *ret = nullptr;
+    if (PyLong_Check(o)) {
+        double v = PyLong_AsDouble(o);
+        if (!(v == -1.0 && PyErr_Occurred())) {
+            ret = PyFloat_FromDouble(v);
+        }
+    } else {
+        PyErr_SetString(PyExc_TypeError, "Unexpected type");
+    }
+    return ret;
+}
+
+PyObject *default_constructed() { return PyFloat_FromDouble(0.0); }
+} // namespace detail
+class float_ : public py::object {
+    PYBIND11_OBJECT_CVT(float_, py::object, external::detail::check, external::detail::conv)
+
+    float_() : py::object(external::detail::default_constructed(), stolen_t{}) {}
+
+    double get_value() const { return PyFloat_AsDouble(this->ptr()); }
+};
+} // namespace external
+
+namespace implicit_conversion_from_0_to_handle {
+// Uncomment to trigger compiler error. Note: Before PR #4008 this used to compile successfully.
+// void expected_to_trigger_compiler_error() { py::handle(0); }
+} // namespace implicit_conversion_from_0_to_handle
+
+// Used to validate systematically that PR #4008 does/did NOT change the behavior.
+void pure_compile_tests_for_handle_from_PyObject_pointers() {
+    {
+        PyObject *ptr = Py_None;
+        py::handle{ptr};
+    }
+    {
+        PyObject *const ptr = Py_None;
+        py::handle{ptr};
+    }
+    // Uncomment to trigger compiler errors.
+    // PyObject const *               ptr = Py_None; py::handle{ptr};
+    // PyObject const *const          ptr = Py_None; py::handle{ptr};
+    // PyObject volatile *            ptr = Py_None; py::handle{ptr};
+    // PyObject volatile *const       ptr = Py_None; py::handle{ptr};
+    // PyObject const volatile *      ptr = Py_None; py::handle{ptr};
+    // PyObject const volatile *const ptr = Py_None; py::handle{ptr};
+}
+
+namespace handle_from_move_only_type_with_operator_PyObject {
+
+// Reduced from
+// https://github.com/pytorch/pytorch/blob/279634f384662b7c3a9f8bf7ccc3a6afd2f05657/torch/csrc/utils/object_ptr.h
+struct operator_ncnst {
+    operator_ncnst() = default;
+    operator_ncnst(operator_ncnst &&) = default;
+    operator PyObject *() /* */ { return Py_None; } // NOLINT(google-explicit-constructor)
+};
+
+struct operator_const {
+    operator_const() = default;
+    operator_const(operator_const &&) = default;
+    operator PyObject *() const { return Py_None; } // NOLINT(google-explicit-constructor)
+};
+
+bool from_ncnst() {
+    operator_ncnst obj;
+    auto h = py::handle(obj);  // Critical part of test: does this compile?
+    return h.ptr() == Py_None; // Just something.
+}
+
+bool from_const() {
+    operator_const obj;
+    auto h = py::handle(obj);  // Critical part of test: does this compile?
+    return h.ptr() == Py_None; // Just something.
+}
+
+void m_defs(py::module_ &m) {
+    m.def("handle_from_move_only_type_with_operator_PyObject_ncnst", from_ncnst);
+    m.def("handle_from_move_only_type_with_operator_PyObject_const", from_const);
+}
+
+} // namespace handle_from_move_only_type_with_operator_PyObject
 
 TEST_SUBMODULE(pytypes, m) {
+    m.def("obj_class_name", [](py::handle obj) { return py::detail::obj_class_name(obj.ptr()); });
+
+    handle_from_move_only_type_with_operator_PyObject::m_defs(m);
+
+    // test_bool
+    m.def("get_bool", [] { return py::bool_(false); });
     // test_int
-    m.def("get_int", []{return py::int_(0);});
+    m.def("get_int", [] { return py::int_(0); });
     // test_iterator
-    m.def("get_iterator", []{return py::iterator();});
+    m.def("get_iterator", [] { return py::iterator(); });
     // test_iterable
-    m.def("get_iterable", []{return py::iterable();});
+    m.def("get_iterable", [] { return py::iterable(); });
+    m.def("get_frozenset_from_iterable",
+          [](const py::iterable &iter) { return py::frozenset(iter); });
+    m.def("get_list_from_iterable", [](const py::iterable &iter) { return py::list(iter); });
+    m.def("get_set_from_iterable", [](const py::iterable &iter) { return py::set(iter); });
+    m.def("get_tuple_from_iterable", [](const py::iterable &iter) { return py::tuple(iter); });
+    // test_float
+    m.def("get_float", [] { return py::float_(0.0f); });
     // test_list
     m.def("list_no_args", []() { return py::list{}; });
     m.def("list_ssize_t", []() { return py::list{(py::ssize_t) 0}; });
@@ -36,14 +135,15 @@ TEST_SUBMODULE(pytypes, m) {
     });
     m.def("print_list", [](const py::list &list) {
         int index = 0;
-        for (auto item : list)
+        for (auto item : list) {
             py::print("list item {}: {}"_s.format(index++, item));
+        }
     });
     // test_none
-    m.def("get_none", []{return py::none();});
+    m.def("get_none", [] { return py::none(); });
     m.def("print_none", [](const py::none &none) { py::print("none: {}"_s.format(none)); });
 
-    // test_set
+    // test_set, test_frozenset
     m.def("get_set", []() {
         py::set set;
         set.add(py::str("key1"));
@@ -51,27 +151,41 @@ TEST_SUBMODULE(pytypes, m) {
         set.add(std::string("key3"));
         return set;
     });
-    m.def("print_set", [](const py::set &set) {
-        for (auto item : set)
-            py::print("key:", item);
+    m.def("get_frozenset", []() {
+        py::set set;
+        set.add(py::str("key1"));
+        set.add("key2");
+        set.add(std::string("key3"));
+        return py::frozenset(set);
     });
-    m.def("set_contains",
-          [](const py::set &set, const py::object &key) { return set.contains(key); });
-    m.def("set_contains", [](const py::set &set, const char *key) { return set.contains(key); });
+    m.def("print_anyset", [](const py::anyset &set) {
+        for (auto item : set) {
+            py::print("key:", item);
+        }
+    });
+    m.def("anyset_size", [](const py::anyset &set) { return set.size(); });
+    m.def("anyset_empty", [](const py::anyset &set) { return set.empty(); });
+    m.def("anyset_contains",
+          [](const py::anyset &set, const py::object &key) { return set.contains(key); });
+    m.def("anyset_contains",
+          [](const py::anyset &set, const char *key) { return set.contains(key); });
+    m.def("set_add", [](py::set &set, const py::object &key) { set.add(key); });
+    m.def("set_clear", [](py::set &set) { set.clear(); });
 
     // test_dict
-    m.def("get_dict", []() { return py::dict("key"_a="value"); });
+    m.def("get_dict", []() { return py::dict("key"_a = "value"); });
     m.def("print_dict", [](const py::dict &dict) {
-        for (auto item : dict)
+        for (auto item : dict) {
             py::print("key: {}, value={}"_s.format(item.first, item.second));
+        }
     });
     m.def("dict_keyword_constructor", []() {
-        auto d1 = py::dict("x"_a=1, "y"_a=2);
-        auto d2 = py::dict("z"_a=3, **d1);
+        auto d1 = py::dict("x"_a = 1, "y"_a = 2);
+        auto d2 = py::dict("z"_a = 3, **d1);
         return d2;
     });
     m.def("dict_contains",
-          [](const py::dict &dict, py::object val) { return dict.contains(val); });
+          [](const py::dict &dict, const py::object &val) { return dict.contains(val); });
     m.def("dict_contains",
           [](const py::dict &dict, const char *val) { return dict.contains(val); });
 
@@ -81,31 +195,34 @@ TEST_SUBMODULE(pytypes, m) {
     m.def("tuple_size_t", []() { return py::tuple{(py::size_t) 0}; });
     m.def("get_tuple", []() { return py::make_tuple(42, py::none(), "spam"); });
 
-#if PY_VERSION_HEX >= 0x03030000
     // test_simple_namespace
     m.def("get_simple_namespace", []() {
-        auto ns = py::module_::import("types").attr("SimpleNamespace")("attr"_a=42, "x"_a="foo", "wrong"_a=1);
+        auto ns = py::module_::import("types").attr("SimpleNamespace")(
+            "attr"_a = 42, "x"_a = "foo", "wrong"_a = 1);
         py::delattr(ns, "wrong");
         py::setattr(ns, "right", py::int_(2));
         return ns;
     });
-#endif
 
     // test_str
     m.def("str_from_char_ssize_t", []() { return py::str{"red", (py::ssize_t) 3}; });
     m.def("str_from_char_size_t", []() { return py::str{"blue", (py::size_t) 4}; });
     m.def("str_from_string", []() { return py::str(std::string("baz")); });
+    m.def("str_from_std_string_input", [](const std::string &stri) { return py::str(stri); });
+    m.def("str_from_cstr_input", [](const char *c_str) { return py::str(c_str); });
     m.def("str_from_bytes", []() { return py::str(py::bytes("boo", 3)); });
-    m.def("str_from_object", [](const py::object& obj) { return py::str(obj); });
-    m.def("repr_from_object", [](const py::object& obj) { return py::repr(obj); });
+    m.def("str_from_bytes_input",
+          [](const py::bytes &encoded_str) { return py::str(encoded_str); });
+
+    m.def("str_from_object", [](const py::object &obj) { return py::str(obj); });
+    m.def("repr_from_object", [](const py::object &obj) { return py::repr(obj); });
     m.def("str_from_handle", [](py::handle h) { return py::str(h); });
-    m.def("str_from_string_from_str", [](const py::str& obj) {
-        return py::str(static_cast<std::string>(obj));
-    });
+    m.def("str_from_string_from_str",
+          [](const py::str &obj) { return py::str(static_cast<std::string>(obj)); });
 
     m.def("str_format", []() {
         auto s1 = "{} + {} = {}"_s.format(1, 2, 3);
-        auto s2 = "{a} + {b} = {c}"_s.format("a"_a=1, "b"_a=2, "c"_a=3);
+        auto s2 = "{a} + {b} = {c}"_s.format("a"_a = 1, "b"_a = 2, "c"_a = 3);
         return py::make_tuple(s1, s2);
     });
 
@@ -124,9 +241,16 @@ TEST_SUBMODULE(pytypes, m) {
     // test_capsule
     m.def("return_capsule_with_destructor", []() {
         py::print("creating capsule");
-        return py::capsule([]() {
-            py::print("destructing capsule");
-        });
+        return py::capsule([]() { py::print("destructing capsule"); });
+    });
+
+    m.def("return_renamed_capsule_with_destructor", []() {
+        py::print("creating capsule");
+        auto cap = py::capsule([]() { py::print("destructing capsule"); });
+        static const char *capsule_name = "test_name1";
+        py::print("renaming capsule");
+        cap.set_name(capsule_name);
+        return cap;
     });
 
     m.def("return_capsule_with_destructor_2", []() {
@@ -136,31 +260,48 @@ TEST_SUBMODULE(pytypes, m) {
         });
     });
 
+    m.def("return_renamed_capsule_with_destructor_2", []() {
+        py::print("creating capsule");
+        auto cap = py::capsule((void *) 1234, [](void *ptr) {
+            py::print("destructing capsule: {}"_s.format((size_t) ptr));
+        });
+        static const char *capsule_name = "test_name2";
+        py::print("renaming capsule");
+        cap.set_name(capsule_name);
+        return cap;
+    });
+
     m.def("return_capsule_with_name_and_destructor", []() {
         auto capsule = py::capsule((void *) 12345, "pointer type description", [](PyObject *ptr) {
             if (ptr) {
-                auto name = PyCapsule_GetName(ptr);
+                const auto *name = PyCapsule_GetName(ptr);
                 py::print("destructing capsule ({}, '{}')"_s.format(
-                    (size_t) PyCapsule_GetPointer(ptr, name), name
-                ));
+                    (size_t) PyCapsule_GetPointer(ptr, name), name));
             }
         });
 
         capsule.set_pointer((void *) 1234);
 
         // Using get_pointer<T>()
-        void* contents1 = static_cast<void*>(capsule);
-        void* contents2 = capsule.get_pointer();
-        void* contents3 = capsule.get_pointer<void>();
+        void *contents1 = static_cast<void *>(capsule);
+        void *contents2 = capsule.get_pointer();
+        void *contents3 = capsule.get_pointer<void>();
 
         auto result1 = reinterpret_cast<size_t>(contents1);
         auto result2 = reinterpret_cast<size_t>(contents2);
         auto result3 = reinterpret_cast<size_t>(contents3);
 
-        py::print("created capsule ({}, '{}')"_s.format(result1 & result2 & result3, capsule.name()));
+        py::print(
+            "created capsule ({}, '{}')"_s.format(result1 & result2 & result3, capsule.name()));
         return capsule;
     });
 
+    m.def("return_capsule_with_explicit_nullptr_dtor", []() {
+        py::print("creating capsule with explicit nullptr dtor");
+        return py::capsule(reinterpret_cast<void *>(1234),
+                           static_cast<void (*)(void *)>(nullptr)); // PR #4221
+    });
+
     // test_accessors
     m.def("accessor_api", [](const py::object &o) {
         auto d = py::dict();
@@ -235,53 +376,98 @@ TEST_SUBMODULE(pytypes, m) {
         return d;
     });
 
+    m.def("accessor_moves", []() { // See PR #3970
+        py::list return_list;
+#ifdef PYBIND11_HANDLE_REF_DEBUG
+        py::int_ py_int_0(0);
+        py::int_ py_int_42(42);
+        py::str py_str_count("count");
+
+        auto tup = py::make_tuple(0);
+
+        py::sequence seq(tup);
+
+        py::list lst;
+        lst.append(0);
+
+#    define PYBIND11_LOCAL_DEF(...)                                                               \
+        {                                                                                         \
+            std::size_t inc_refs = py::handle::inc_ref_counter();                                 \
+            __VA_ARGS__;                                                                          \
+            inc_refs = py::handle::inc_ref_counter() - inc_refs;                                  \
+            return_list.append(inc_refs);                                                         \
+        }
+
+        PYBIND11_LOCAL_DEF(tup[py_int_0])    // l-value (to have a control)
+        PYBIND11_LOCAL_DEF(tup[py::int_(0)]) // r-value
+
+        PYBIND11_LOCAL_DEF(tup.attr(py_str_count))     // l-value
+        PYBIND11_LOCAL_DEF(tup.attr(py::str("count"))) // r-value
+
+        PYBIND11_LOCAL_DEF(seq[py_int_0])    // l-value
+        PYBIND11_LOCAL_DEF(seq[py::int_(0)]) // r-value
+
+        PYBIND11_LOCAL_DEF(seq.attr(py_str_count))     // l-value
+        PYBIND11_LOCAL_DEF(seq.attr(py::str("count"))) // r-value
+
+        PYBIND11_LOCAL_DEF(lst[py_int_0])    // l-value
+        PYBIND11_LOCAL_DEF(lst[py::int_(0)]) // r-value
+
+        PYBIND11_LOCAL_DEF(lst.attr(py_str_count))     // l-value
+        PYBIND11_LOCAL_DEF(lst.attr(py::str("count"))) // r-value
+
+        auto lst_acc = lst[py::int_(0)];
+        lst_acc = py::int_(42);                    // Detaches lst_acc from lst.
+        PYBIND11_LOCAL_DEF(lst_acc = py_int_42)    // l-value
+        PYBIND11_LOCAL_DEF(lst_acc = py::int_(42)) // r-value
+#    undef PYBIND11_LOCAL_DEF
+#endif
+        return return_list;
+    });
+
     // test_constructors
     m.def("default_constructors", []() {
-        return py::dict(
-            "bytes"_a=py::bytes(),
-            "bytearray"_a=py::bytearray(),
-            "str"_a=py::str(),
-            "bool"_a=py::bool_(),
-            "int"_a=py::int_(),
-            "float"_a=py::float_(),
-            "tuple"_a=py::tuple(),
-            "list"_a=py::list(),
-            "dict"_a=py::dict(),
-            "set"_a=py::set()
-        );
+        return py::dict("bytes"_a = py::bytes(),
+                        "bytearray"_a = py::bytearray(),
+                        "str"_a = py::str(),
+                        "bool"_a = py::bool_(),
+                        "int"_a = py::int_(),
+                        "float"_a = py::float_(),
+                        "tuple"_a = py::tuple(),
+                        "list"_a = py::list(),
+                        "dict"_a = py::dict(),
+                        "set"_a = py::set());
     });
 
     m.def("converting_constructors", [](const py::dict &d) {
-        return py::dict(
-            "bytes"_a=py::bytes(d["bytes"]),
-            "bytearray"_a=py::bytearray(d["bytearray"]),
-            "str"_a=py::str(d["str"]),
-            "bool"_a=py::bool_(d["bool"]),
-            "int"_a=py::int_(d["int"]),
-            "float"_a=py::float_(d["float"]),
-            "tuple"_a=py::tuple(d["tuple"]),
-            "list"_a=py::list(d["list"]),
-            "dict"_a=py::dict(d["dict"]),
-            "set"_a=py::set(d["set"]),
-            "memoryview"_a=py::memoryview(d["memoryview"])
-        );
+        return py::dict("bytes"_a = py::bytes(d["bytes"]),
+                        "bytearray"_a = py::bytearray(d["bytearray"]),
+                        "str"_a = py::str(d["str"]),
+                        "bool"_a = py::bool_(d["bool"]),
+                        "int"_a = py::int_(d["int"]),
+                        "float"_a = py::float_(d["float"]),
+                        "tuple"_a = py::tuple(d["tuple"]),
+                        "list"_a = py::list(d["list"]),
+                        "dict"_a = py::dict(d["dict"]),
+                        "set"_a = py::set(d["set"]),
+                        "frozenset"_a = py::frozenset(d["frozenset"]),
+                        "memoryview"_a = py::memoryview(d["memoryview"]));
     });
 
     m.def("cast_functions", [](const py::dict &d) {
         // When converting between Python types, obj.cast<T>() should be the same as T(obj)
-        return py::dict(
-            "bytes"_a=d["bytes"].cast<py::bytes>(),
-            "bytearray"_a=d["bytearray"].cast<py::bytearray>(),
-            "str"_a=d["str"].cast<py::str>(),
-            "bool"_a=d["bool"].cast<py::bool_>(),
-            "int"_a=d["int"].cast<py::int_>(),
-            "float"_a=d["float"].cast<py::float_>(),
-            "tuple"_a=d["tuple"].cast<py::tuple>(),
-            "list"_a=d["list"].cast<py::list>(),
-            "dict"_a=d["dict"].cast<py::dict>(),
-            "set"_a=d["set"].cast<py::set>(),
-            "memoryview"_a=d["memoryview"].cast<py::memoryview>()
-        );
+        return py::dict("bytes"_a = d["bytes"].cast<py::bytes>(),
+                        "bytearray"_a = d["bytearray"].cast<py::bytearray>(),
+                        "str"_a = d["str"].cast<py::str>(),
+                        "bool"_a = d["bool"].cast<py::bool_>(),
+                        "int"_a = d["int"].cast<py::int_>(),
+                        "float"_a = d["float"].cast<py::float_>(),
+                        "tuple"_a = d["tuple"].cast<py::tuple>(),
+                        "list"_a = d["list"].cast<py::list>(),
+                        "dict"_a = d["dict"].cast<py::dict>(),
+                        "set"_a = d["set"].cast<py::set>(),
+                        "frozenset"_a = d["frozenset"].cast<py::frozenset>(),
+                        "memoryview"_a = d["memoryview"].cast<py::memoryview>());
     });
 
     m.def("convert_to_pybind11_str", [](const py::object &o) { return py::str(o); });
@@ -334,10 +520,7 @@ TEST_SUBMODULE(pytypes, m) {
         l.append(py::cast(12));
         l.append(py::int_(15));
 
-        return py::dict(
-            "d"_a=d,
-            "l"_a=l
-        );
+        return py::dict("d"_a = d, "l"_a = l);
     });
 
     // test_print
@@ -345,22 +528,26 @@ TEST_SUBMODULE(pytypes, m) {
         py::print("Hello, World!");
         py::print(1, 2.0, "three", true, std::string("-- multiple args"));
         auto args = py::make_tuple("and", "a", "custom", "separator");
-        py::print("*args", *args, "sep"_a="-");
-        py::print("no new line here", "end"_a=" -- ");
+        py::print("*args", *args, "sep"_a = "-");
+        py::print("no new line here", "end"_a = " -- ");
         py::print("next print");
 
         auto py_stderr = py::module_::import("sys").attr("stderr");
-        py::print("this goes to stderr", "file"_a=py_stderr);
+        py::print("this goes to stderr", "file"_a = py_stderr);
 
-        py::print("flush", "flush"_a=true);
+        py::print("flush", "flush"_a = true);
 
-        py::print("{a} + {b} = {c}"_s.format("a"_a="py::print", "b"_a="str.format", "c"_a="this"));
+        py::print(
+            "{a} + {b} = {c}"_s.format("a"_a = "py::print", "b"_a = "str.format", "c"_a = "this"));
     });
 
     m.def("print_failure", []() { py::print(42, UnregisteredType()); });
 
     m.def("hash_function", [](py::object obj) { return py::hash(std::move(obj)); });
 
+    m.def("obj_contains",
+          [](py::object &obj, const py::object &key) { return obj.contains(key); });
+
     m.def("test_number_protocol", [](const py::object &a, const py::object &b) {
         py::list l;
         l.append(a.equal(b));
@@ -399,43 +586,38 @@ TEST_SUBMODULE(pytypes, m) {
           [](const py::buffer &b) { return py::memoryview(b.request()); });
 
     m.def("test_memoryview_from_buffer", [](bool is_unsigned) {
-        static const int16_t si16[] = { 3, 1, 4, 1, 5 };
-        static const uint16_t ui16[] = { 2, 7, 1, 8 };
-        if (is_unsigned)
-            return py::memoryview::from_buffer(
-                ui16, { 4 }, { sizeof(uint16_t) });
+        static const int16_t si16[] = {3, 1, 4, 1, 5};
+        static const uint16_t ui16[] = {2, 7, 1, 8};
+        if (is_unsigned) {
+            return py::memoryview::from_buffer(ui16, {4}, {sizeof(uint16_t)});
+        }
         return py::memoryview::from_buffer(si16, {5}, {sizeof(int16_t)});
     });
 
     m.def("test_memoryview_from_buffer_nativeformat", []() {
-        static const char* format = "@i";
-        static const int32_t arr[] = { 4, 7, 5 };
-        return py::memoryview::from_buffer(
-            arr, sizeof(int32_t), format, { 3 }, { sizeof(int32_t) });
+        static const char *format = "@i";
+        static const int32_t arr[] = {4, 7, 5};
+        return py::memoryview::from_buffer(arr, sizeof(int32_t), format, {3}, {sizeof(int32_t)});
     });
 
     m.def("test_memoryview_from_buffer_empty_shape", []() {
-        static const char* buf = "";
-        return py::memoryview::from_buffer(buf, 1, "B", { }, { });
+        static const char *buf = "";
+        return py::memoryview::from_buffer(buf, 1, "B", {}, {});
     });
 
     m.def("test_memoryview_from_buffer_invalid_strides", []() {
-        static const char* buf = "\x02\x03\x04";
-        return py::memoryview::from_buffer(buf, 1, "B", { 3 }, { });
+        static const char *buf = "\x02\x03\x04";
+        return py::memoryview::from_buffer(buf, 1, "B", {3}, {});
     });
 
     m.def("test_memoryview_from_buffer_nullptr", []() {
-        return py::memoryview::from_buffer(
-            static_cast<void*>(nullptr), 1, "B", { }, { });
+        return py::memoryview::from_buffer(static_cast<void *>(nullptr), 1, "B", {}, {});
     });
 
-#if PY_MAJOR_VERSION >= 3
     m.def("test_memoryview_from_memory", []() {
-        const char* buf = "\xff\xe1\xab\x37";
-        return py::memoryview::from_memory(
-            buf, static_cast<py::ssize_t>(strlen(buf)));
+        const char *buf = "\xff\xe1\xab\x37";
+        return py::memoryview::from_memory(buf, static_cast<py::ssize_t>(strlen(buf)));
     });
-#endif
 
     // test_builtin_functions
     m.def("get_len", [](py::handle h) { return py::len(h); });
@@ -454,8 +636,7 @@ TEST_SUBMODULE(pytypes, m) {
     m.def("pass_to_std_string", [](const std::string &s) { return s.size(); });
 
     // test_weakref
-    m.def("weakref_from_handle",
-          [](py::handle h) { return py::weakref(h); });
+    m.def("weakref_from_handle", [](py::handle h) { return py::weakref(h); });
     m.def("weakref_from_handle_and_function",
           [](py::handle h, py::function f) { return py::weakref(h, std::move(f)); });
     m.def("weakref_from_object", [](const py::object &o) { return py::weakref(o); });
@@ -557,4 +738,77 @@ TEST_SUBMODULE(pytypes, m) {
         py::detail::accessor_policies::tuple_item::set(o, (py::size_t) 0, s0);
         return o;
     });
+
+    m.def("square_float_", [](const external::float_ &x) -> double {
+        double v = x.get_value();
+        return v * v;
+    });
+
+    m.def("tuple_rvalue_getter", [](const py::tuple &tup) {
+        // tests accessing tuple object with rvalue int
+        for (size_t i = 0; i < tup.size(); i++) {
+            auto o = py::handle(tup[py::int_(i)]);
+            if (!o) {
+                throw py::value_error("tuple is malformed");
+            }
+        }
+        return tup;
+    });
+    m.def("list_rvalue_getter", [](const py::list &l) {
+        // tests accessing list with rvalue int
+        for (size_t i = 0; i < l.size(); i++) {
+            auto o = py::handle(l[py::int_(i)]);
+            if (!o) {
+                throw py::value_error("list is malformed");
+            }
+        }
+        return l;
+    });
+    m.def("populate_dict_rvalue", [](int population) {
+        auto d = py::dict();
+        for (int i = 0; i < population; i++) {
+            d[py::int_(i)] = py::int_(i);
+        }
+        return d;
+    });
+    m.def("populate_obj_str_attrs", [](py::object &o, int population) {
+        for (int i = 0; i < population; i++) {
+            o.attr(py::str(py::int_(i))) = py::str(py::int_(i));
+        }
+        return o;
+    });
+
+    // testing immutable object augmented assignment: #issue 3812
+    m.def("inplace_append", [](py::object &a, const py::object &b) {
+        a += b;
+        return a;
+    });
+    m.def("inplace_subtract", [](py::object &a, const py::object &b) {
+        a -= b;
+        return a;
+    });
+    m.def("inplace_multiply", [](py::object &a, const py::object &b) {
+        a *= b;
+        return a;
+    });
+    m.def("inplace_divide", [](py::object &a, const py::object &b) {
+        a /= b;
+        return a;
+    });
+    m.def("inplace_or", [](py::object &a, const py::object &b) {
+        a |= b;
+        return a;
+    });
+    m.def("inplace_and", [](py::object &a, const py::object &b) {
+        a &= b;
+        return a;
+    });
+    m.def("inplace_lshift", [](py::object &a, const py::object &b) {
+        a <<= b;
+        return a;
+    });
+    m.def("inplace_rshift", [](py::object &a, const py::object &b) {
+        a >>= b;
+        return a;
+    });
 }
diff --git a/ext/pybind11/tests/test_pytypes.py b/ext/pybind11/tests/test_pytypes.py
index 5215b79bc1..8f9f2987e9 100644
--- a/ext/pybind11/tests/test_pytypes.py
+++ b/ext/pybind11/tests/test_pytypes.py
@@ -1,15 +1,29 @@
-# -*- coding: utf-8 -*-
-from __future__ import division
-
+import contextlib
 import sys
+import types
 
 import pytest
 
 import env
-from pybind11_tests import debug_enabled
+from pybind11_tests import detailed_error_messages_enabled
 from pybind11_tests import pytypes as m
 
 
+def test_obj_class_name():
+    assert m.obj_class_name(None) == "NoneType"
+    assert m.obj_class_name(list) == "list"
+    assert m.obj_class_name([]) == "list"
+
+
+def test_handle_from_move_only_type_with_operator_PyObject():  # noqa: N802
+    assert m.handle_from_move_only_type_with_operator_PyObject_ncnst()
+    assert m.handle_from_move_only_type_with_operator_PyObject_const()
+
+
+def test_bool(doc):
+    assert doc(m.get_bool) == "get_bool() -> bool"
+
+
 def test_int(doc):
     assert doc(m.get_int) == "get_int() -> int"
 
@@ -18,10 +32,30 @@ def test_iterator(doc):
     assert doc(m.get_iterator) == "get_iterator() -> Iterator"
 
 
+@pytest.mark.parametrize(
+    "pytype, from_iter_func",
+    [
+        (frozenset, m.get_frozenset_from_iterable),
+        (list, m.get_list_from_iterable),
+        (set, m.get_set_from_iterable),
+        (tuple, m.get_tuple_from_iterable),
+    ],
+)
+def test_from_iterable(pytype, from_iter_func):
+    my_iter = iter(range(10))
+    s = from_iter_func(my_iter)
+    assert type(s) == pytype
+    assert s == pytype(range(10))
+
+
 def test_iterable(doc):
     assert doc(m.get_iterable) == "get_iterable() -> Iterable"
 
 
+def test_float(doc):
+    assert doc(m.get_float) == "get_float() -> float"
+
+
 def test_list(capture, doc):
     assert m.list_no_args() == []
     assert m.list_ssize_t() == []
@@ -60,11 +94,12 @@ def test_none(capture, doc):
 
 def test_set(capture, doc):
     s = m.get_set()
+    assert isinstance(s, set)
     assert s == {"key1", "key2", "key3"}
 
+    s.add("key4")
     with capture:
-        s.add("key4")
-        m.print_set(s)
+        m.print_anyset(s)
     assert (
         capture.unordered
         == """
@@ -75,12 +110,43 @@ def test_set(capture, doc):
     """
     )
 
-    assert not m.set_contains(set(), 42)
-    assert m.set_contains({42}, 42)
-    assert m.set_contains({"foo"}, "foo")
+    m.set_add(s, "key5")
+    assert m.anyset_size(s) == 5
 
-    assert doc(m.get_list) == "get_list() -> list"
-    assert doc(m.print_list) == "print_list(arg0: list) -> None"
+    m.set_clear(s)
+    assert m.anyset_empty(s)
+
+    assert not m.anyset_contains(set(), 42)
+    assert m.anyset_contains({42}, 42)
+    assert m.anyset_contains({"foo"}, "foo")
+
+    assert doc(m.get_set) == "get_set() -> set"
+    assert doc(m.print_anyset) == "print_anyset(arg0: anyset) -> None"
+
+
+def test_frozenset(capture, doc):
+    s = m.get_frozenset()
+    assert isinstance(s, frozenset)
+    assert s == frozenset({"key1", "key2", "key3"})
+
+    with capture:
+        m.print_anyset(s)
+    assert (
+        capture.unordered
+        == """
+        key: key1
+        key: key2
+        key: key3
+    """
+    )
+    assert m.anyset_size(s) == 3
+    assert not m.anyset_empty(s)
+
+    assert not m.anyset_contains(frozenset(), 42)
+    assert m.anyset_contains(frozenset({42}), 42)
+    assert m.anyset_contains(frozenset({"foo"}), "foo")
+
+    assert doc(m.get_frozenset) == "get_frozenset() -> frozenset"
 
 
 def test_dict(capture, doc):
@@ -108,6 +174,31 @@ def test_dict(capture, doc):
     assert m.dict_keyword_constructor() == {"x": 1, "y": 2, "z": 3}
 
 
+class CustomContains:
+    d = {"key": None}
+
+    def __contains__(self, m):
+        return m in self.d
+
+
+@pytest.mark.parametrize(
+    "arg,func",
+    [
+        (set(), m.anyset_contains),
+        (dict(), m.dict_contains),
+        (CustomContains(), m.obj_contains),
+    ],
+)
+@pytest.mark.xfail("env.PYPY and sys.pypy_version_info < (7, 3, 10)", strict=False)
+def test_unhashable_exceptions(arg, func):
+    class Unhashable:
+        __hash__ = None
+
+    with pytest.raises(TypeError) as exc_info:
+        func(arg, Unhashable())
+    assert "unhashable type:" in str(exc_info.value)
+
+
 def test_tuple():
     assert m.tuple_no_args() == ()
     assert m.tuple_ssize_t() == ()
@@ -115,7 +206,6 @@ def test_tuple():
     assert m.get_tuple() == (42, None, "spam")
 
 
-@pytest.mark.skipif("env.PY2")
 def test_simple_namespace():
     ns = m.get_simple_namespace()
     assert ns.attr == 42
@@ -132,7 +222,7 @@ def test_str(doc):
 
     assert doc(m.str_from_bytes) == "str_from_bytes() -> str"
 
-    class A(object):
+    class A:
         def __str__(self):
             return "this is a str"
 
@@ -150,24 +240,28 @@ def test_str(doc):
     malformed_utf8 = b"\x80"
     if hasattr(m, "PYBIND11_STR_LEGACY_PERMISSIVE"):
         assert m.str_from_object(malformed_utf8) is malformed_utf8
-    elif env.PY2:
-        with pytest.raises(UnicodeDecodeError):
-            m.str_from_object(malformed_utf8)
     else:
         assert m.str_from_object(malformed_utf8) == "b'\\x80'"
-    if env.PY2:
-        with pytest.raises(UnicodeDecodeError):
-            m.str_from_handle(malformed_utf8)
-    else:
-        assert m.str_from_handle(malformed_utf8) == "b'\\x80'"
+    assert m.str_from_handle(malformed_utf8) == "b'\\x80'"
 
     assert m.str_from_string_from_str("this is a str") == "this is a str"
-    ucs_surrogates_str = u"\udcc3"
-    if env.PY2:
-        assert u"\udcc3" == m.str_from_string_from_str(ucs_surrogates_str)
-    else:
-        with pytest.raises(UnicodeEncodeError):
-            m.str_from_string_from_str(ucs_surrogates_str)
+    ucs_surrogates_str = "\udcc3"
+    with pytest.raises(UnicodeEncodeError):
+        m.str_from_string_from_str(ucs_surrogates_str)
+
+
+@pytest.mark.parametrize(
+    "func",
+    [
+        m.str_from_bytes_input,
+        m.str_from_cstr_input,
+        m.str_from_std_string_input,
+    ],
+)
+def test_surrogate_pairs_unicode_error(func):
+    input_str = "\ud83d\ude4f".encode("utf-8", "surrogatepass")
+    with pytest.raises(UnicodeDecodeError):
+        func(input_str)
 
 
 def test_bytes(doc):
@@ -176,9 +270,7 @@ def test_bytes(doc):
     assert m.bytes_from_string().decode() == "foo"
     assert m.bytes_from_str().decode() == "bar"
 
-    assert doc(m.bytes_from_str) == "bytes_from_str() -> {}".format(
-        "str" if env.PY2 else "bytes"
-    )
+    assert doc(m.bytes_from_str) == "bytes_from_str() -> bytes"
 
 
 def test_bytearray(doc):
@@ -202,6 +294,19 @@ def test_capsule(capture):
     """
     )
 
+    with capture:
+        a = m.return_renamed_capsule_with_destructor()
+        del a
+        pytest.gc_collect()
+    assert (
+        capture.unordered
+        == """
+        creating capsule
+        renaming capsule
+        destructing capsule
+    """
+    )
+
     with capture:
         a = m.return_capsule_with_destructor_2()
         del a
@@ -214,6 +319,19 @@ def test_capsule(capture):
     """
     )
 
+    with capture:
+        a = m.return_renamed_capsule_with_destructor_2()
+        del a
+        pytest.gc_collect()
+    assert (
+        capture.unordered
+        == """
+        creating capsule
+        renaming capsule
+        destructing capsule: 1234
+    """
+    )
+
     with capture:
         a = m.return_capsule_with_name_and_destructor()
         del a
@@ -226,6 +344,17 @@ def test_capsule(capture):
     """
     )
 
+    with capture:
+        a = m.return_capsule_with_explicit_nullptr_dtor()
+        del a
+        pytest.gc_collect()
+    assert (
+        capture.unordered
+        == """
+        creating capsule with explicit nullptr dtor
+    """
+    )
+
 
 def test_accessors():
     class SubTestObject:
@@ -266,15 +395,18 @@ def test_accessors():
     assert d["var"] == 99
 
 
+def test_accessor_moves():
+    inc_refs = m.accessor_moves()
+    if inc_refs:
+        assert inc_refs == [1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0]
+    else:
+        pytest.skip("Not defined: PYBIND11_HANDLE_REF_DEBUG")
+
+
 def test_constructors():
     """C++ default and converting constructors are equivalent to type calls in Python"""
     types = [bytes, bytearray, str, bool, int, float, tuple, list, dict, set]
     expected = {t.__name__: t() for t in types}
-    if env.PY2:
-        # Note that bytes.__name__ == 'str' in Python 2.
-        # pybind11::str is unicode even under Python 2.
-        expected["bytes"] = bytes()
-        expected["str"] = unicode()  # noqa: F821
     assert m.default_constructors() == expected
 
     data = {
@@ -288,15 +420,11 @@ def test_constructors():
         list: range(3),
         dict: [("two", 2), ("one", 1), ("three", 3)],
         set: [4, 4, 5, 6, 6, 6],
+        frozenset: [4, 4, 5, 6, 6, 6],
         memoryview: b"abc",
     }
     inputs = {k.__name__: v for k, v in data.items()}
     expected = {k.__name__: k(v) for k, v in data.items()}
-    if env.PY2:  # Similar to the above. See comments above.
-        inputs["bytes"] = b"41"
-        inputs["str"] = 42
-        expected["bytes"] = b"41"
-        expected["str"] = u"42"
 
     assert m.converting_constructors(inputs) == expected
     assert m.cast_functions(inputs) == expected
@@ -323,8 +451,8 @@ def test_non_converting_constructors():
         for move in [True, False]:
             with pytest.raises(TypeError) as excinfo:
                 m.nonconverting_constructor(t, v, move)
-            expected_error = "Object of type '{}' is not an instance of '{}'".format(
-                type(v).__name__, t
+            expected_error = (
+                f"Object of type '{type(v).__name__}' is not an instance of '{t}'"
             )
             assert str(excinfo.value) == expected_error
 
@@ -332,46 +460,39 @@ def test_non_converting_constructors():
 def test_pybind11_str_raw_str():
     # specifically to exercise pybind11::str::raw_str
     cvt = m.convert_to_pybind11_str
-    assert cvt(u"Str") == u"Str"
-    assert cvt(b"Bytes") == u"Bytes" if env.PY2 else "b'Bytes'"
-    assert cvt(None) == u"None"
-    assert cvt(False) == u"False"
-    assert cvt(True) == u"True"
-    assert cvt(42) == u"42"
-    assert cvt(2 ** 65) == u"36893488147419103232"
-    assert cvt(-1.50) == u"-1.5"
-    assert cvt(()) == u"()"
-    assert cvt((18,)) == u"(18,)"
-    assert cvt([]) == u"[]"
-    assert cvt([28]) == u"[28]"
-    assert cvt({}) == u"{}"
-    assert cvt({3: 4}) == u"{3: 4}"
-    assert cvt(set()) == u"set([])" if env.PY2 else "set()"
-    assert cvt({3, 3}) == u"set([3])" if env.PY2 else "{3}"
+    assert cvt("Str") == "Str"
+    assert cvt(b"Bytes") == "b'Bytes'"
+    assert cvt(None) == "None"
+    assert cvt(False) == "False"
+    assert cvt(True) == "True"
+    assert cvt(42) == "42"
+    assert cvt(2**65) == "36893488147419103232"
+    assert cvt(-1.50) == "-1.5"
+    assert cvt(()) == "()"
+    assert cvt((18,)) == "(18,)"
+    assert cvt([]) == "[]"
+    assert cvt([28]) == "[28]"
+    assert cvt({}) == "{}"
+    assert cvt({3: 4}) == "{3: 4}"
+    assert cvt(set()) == "set()"
+    assert cvt({3, 3}) == "{3}"
 
-    valid_orig = u"Ǳ"
+    valid_orig = "Ǳ"
     valid_utf8 = valid_orig.encode("utf-8")
     valid_cvt = cvt(valid_utf8)
     if hasattr(m, "PYBIND11_STR_LEGACY_PERMISSIVE"):
         assert valid_cvt is valid_utf8
     else:
-        assert type(valid_cvt) is unicode if env.PY2 else str  # noqa: F821
-        if env.PY2:
-            assert valid_cvt == valid_orig
-        else:
-            assert valid_cvt == "b'\\xc7\\xb1'"
+        assert type(valid_cvt) is str
+        assert valid_cvt == "b'\\xc7\\xb1'"
 
     malformed_utf8 = b"\x80"
     if hasattr(m, "PYBIND11_STR_LEGACY_PERMISSIVE"):
         assert cvt(malformed_utf8) is malformed_utf8
     else:
-        if env.PY2:
-            with pytest.raises(UnicodeDecodeError):
-                cvt(malformed_utf8)
-        else:
-            malformed_cvt = cvt(malformed_utf8)
-            assert type(malformed_cvt) is str
-            assert malformed_cvt == "b'\\x80'"
+        malformed_cvt = cvt(malformed_utf8)
+        assert type(malformed_cvt) is str
+        assert malformed_cvt == "b'\\x80'"
 
 
 def test_implicit_casting():
@@ -414,20 +535,20 @@ def test_print(capture):
         m.print_failure()
     assert str(excinfo.value) == "Unable to convert call argument " + (
         "'1' of type 'UnregisteredType' to Python object"
-        if debug_enabled
-        else "to Python object (compile in debug mode for details)"
+        if detailed_error_messages_enabled
+        else "to Python object (#define PYBIND11_DETAILED_ERROR_MESSAGES or compile in debug mode for details)"
     )
 
 
 def test_hash():
-    class Hashable(object):
+    class Hashable:
         def __init__(self, value):
             self.value = value
 
         def __hash__(self):
             return self.value
 
-    class Unhashable(object):
+    class Unhashable:
         __hash__ = None
 
     assert m.hash_function(Hashable(42)) == 42
@@ -485,12 +606,7 @@ def test_memoryview(method, args, fmt, expected_view):
     view = method(*args)
     assert isinstance(view, memoryview)
     assert view.format == fmt
-    if isinstance(expected_view, bytes) or not env.PY2:
-        view_as_list = list(view)
-    else:
-        # Using max to pick non-zero byte (big-endian vs little-endian).
-        view_as_list = [max(ord(c) for c in s) for s in view]
-    assert view_as_list == list(expected_view)
+    assert list(view) == list(expected_view)
 
 
 @pytest.mark.xfail("env.PYPY", reason="getrefcount is not available")
@@ -514,12 +630,7 @@ def test_memoryview_from_buffer_empty_shape():
     view = m.test_memoryview_from_buffer_empty_shape()
     assert isinstance(view, memoryview)
     assert view.format == "B"
-    if env.PY2:
-        # Python 2 behavior is weird, but Python 3 (the future) is fine.
-        # PyPy3 has <memoryview, while CPython 2 has <memory
-        assert bytes(view).startswith(b"<memory")
-    else:
-        assert bytes(view) == b""
+    assert bytes(view) == b""
 
 
 def test_test_memoryview_from_buffer_invalid_strides():
@@ -528,14 +639,10 @@ def test_test_memoryview_from_buffer_invalid_strides():
 
 
 def test_test_memoryview_from_buffer_nullptr():
-    if env.PY2:
+    with pytest.raises(ValueError):
         m.test_memoryview_from_buffer_nullptr()
-    else:
-        with pytest.raises(ValueError):
-            m.test_memoryview_from_buffer_nullptr()
 
 
-@pytest.mark.skipif("env.PY2")
 def test_memoryview_from_memory():
     view = m.test_memoryview_from_memory()
     assert isinstance(view, memoryview)
@@ -555,9 +662,9 @@ def test_builtin_functions():
 
 def test_isinstance_string_types():
     assert m.isinstance_pybind11_bytes(b"")
-    assert not m.isinstance_pybind11_bytes(u"")
+    assert not m.isinstance_pybind11_bytes("")
 
-    assert m.isinstance_pybind11_str(u"")
+    assert m.isinstance_pybind11_str("")
     if hasattr(m, "PYBIND11_STR_LEGACY_PERMISSIVE"):
         assert m.isinstance_pybind11_str(b"")
     else:
@@ -567,24 +674,21 @@ def test_isinstance_string_types():
 def test_pass_bytes_or_unicode_to_string_types():
     assert m.pass_to_pybind11_bytes(b"Bytes") == 5
     with pytest.raises(TypeError):
-        m.pass_to_pybind11_bytes(u"Str")
+        m.pass_to_pybind11_bytes("Str")
 
-    if hasattr(m, "PYBIND11_STR_LEGACY_PERMISSIVE") or env.PY2:
+    if hasattr(m, "PYBIND11_STR_LEGACY_PERMISSIVE"):
         assert m.pass_to_pybind11_str(b"Bytes") == 5
     else:
         with pytest.raises(TypeError):
             m.pass_to_pybind11_str(b"Bytes")
-    assert m.pass_to_pybind11_str(u"Str") == 3
+    assert m.pass_to_pybind11_str("Str") == 3
 
     assert m.pass_to_std_string(b"Bytes") == 5
-    assert m.pass_to_std_string(u"Str") == 3
+    assert m.pass_to_std_string("Str") == 3
 
     malformed_utf8 = b"\x80"
     if hasattr(m, "PYBIND11_STR_LEGACY_PERMISSIVE"):
         assert m.pass_to_pybind11_str(malformed_utf8) == 1
-    elif env.PY2:
-        with pytest.raises(UnicodeDecodeError):
-            m.pass_to_pybind11_str(malformed_utf8)
     else:
         with pytest.raises(TypeError):
             m.pass_to_pybind11_str(malformed_utf8)
@@ -601,12 +705,14 @@ def test_weakref(create_weakref, create_weakref_with_callback):
     from weakref import getweakrefcount
 
     # Apparently, you cannot weakly reference an object()
-    class WeaklyReferenced(object):
+    class WeaklyReferenced:
         pass
 
+    callback_called = False
+
     def callback(wr):
-        # No `nonlocal` in Python 2
-        callback.called = True
+        nonlocal callback_called
+        callback_called = True
 
     obj = WeaklyReferenced()
     assert getweakrefcount(obj) == 0
@@ -615,13 +721,37 @@ def test_weakref(create_weakref, create_weakref_with_callback):
 
     obj = WeaklyReferenced()
     assert getweakrefcount(obj) == 0
-    callback.called = False
     wr = create_weakref_with_callback(obj, callback)  # noqa: F841
     assert getweakrefcount(obj) == 1
-    assert not callback.called
+    assert not callback_called
     del obj
     pytest.gc_collect()
-    assert callback.called
+    assert callback_called
+
+
+@pytest.mark.parametrize(
+    "create_weakref, has_callback",
+    [
+        (m.weakref_from_handle, False),
+        (m.weakref_from_object, False),
+        (m.weakref_from_handle_and_function, True),
+        (m.weakref_from_object_and_function, True),
+    ],
+)
+def test_weakref_err(create_weakref, has_callback):
+    class C:
+        __slots__ = []
+
+    def callback(_):
+        pass
+
+    ob = C()
+    # Should raise TypeError on CPython
+    with pytest.raises(TypeError) if not env.PYPY else contextlib.nullcontext():
+        if has_callback:
+            _ = create_weakref(ob, callback)
+        else:
+            _ = create_weakref(ob)
 
 
 def test_cpp_iterators():
@@ -649,3 +779,107 @@ def test_implementation_details():
     assert m.tuple_item_set_ssize_t() == ("emely", "edmond")
     assert m.tuple_item_get_size_t(tup) == 93
     assert m.tuple_item_set_size_t() == ("candy", "cat")
+
+
+def test_external_float_():
+    r1 = m.square_float_(2.0)
+    assert r1 == 4.0
+
+
+def test_tuple_rvalue_getter():
+    pop = 1000
+    tup = tuple(range(pop))
+    m.tuple_rvalue_getter(tup)
+
+
+def test_list_rvalue_getter():
+    pop = 1000
+    my_list = list(range(pop))
+    m.list_rvalue_getter(my_list)
+
+
+def test_populate_dict_rvalue():
+    pop = 1000
+    my_dict = {i: i for i in range(pop)}
+    assert m.populate_dict_rvalue(pop) == my_dict
+
+
+def test_populate_obj_str_attrs():
+    pop = 1000
+    o = types.SimpleNamespace(**{str(i): i for i in range(pop)})
+    new_o = m.populate_obj_str_attrs(o, pop)
+    new_attrs = {k: v for k, v in new_o.__dict__.items() if not k.startswith("_")}
+    assert all(isinstance(v, str) for v in new_attrs.values())
+    assert len(new_attrs) == pop
+
+
+@pytest.mark.parametrize(
+    "a,b", [("foo", "bar"), (1, 2), (1.0, 2.0), (list(range(3)), list(range(3, 6)))]
+)
+def test_inplace_append(a, b):
+    expected = a + b
+    assert m.inplace_append(a, b) == expected
+
+
+@pytest.mark.parametrize("a,b", [(3, 2), (3.0, 2.0), (set(range(3)), set(range(2)))])
+def test_inplace_subtract(a, b):
+    expected = a - b
+    assert m.inplace_subtract(a, b) == expected
+
+
+@pytest.mark.parametrize("a,b", [(3, 2), (3.0, 2.0), ([1], 3)])
+def test_inplace_multiply(a, b):
+    expected = a * b
+    assert m.inplace_multiply(a, b) == expected
+
+
+@pytest.mark.parametrize("a,b", [(6, 3), (6.0, 3.0)])
+def test_inplace_divide(a, b):
+    expected = a / b
+    assert m.inplace_divide(a, b) == expected
+
+
+@pytest.mark.parametrize(
+    "a,b",
+    [
+        (False, True),
+        (
+            set(),
+            {
+                1,
+            },
+        ),
+    ],
+)
+def test_inplace_or(a, b):
+    expected = a | b
+    assert m.inplace_or(a, b) == expected
+
+
+@pytest.mark.parametrize(
+    "a,b",
+    [
+        (True, False),
+        (
+            {1, 2, 3},
+            {
+                1,
+            },
+        ),
+    ],
+)
+def test_inplace_and(a, b):
+    expected = a & b
+    assert m.inplace_and(a, b) == expected
+
+
+@pytest.mark.parametrize("a,b", [(8, 1), (-3, 2)])
+def test_inplace_lshift(a, b):
+    expected = a << b
+    assert m.inplace_lshift(a, b) == expected
+
+
+@pytest.mark.parametrize("a,b", [(8, 1), (-2, 2)])
+def test_inplace_rshift(a, b):
+    expected = a >> b
+    assert m.inplace_rshift(a, b) == expected
diff --git a/ext/pybind11/tests/test_sequences_and_iterators.cpp b/ext/pybind11/tests/test_sequences_and_iterators.cpp
index a378128ae2..1de65edbf2 100644
--- a/ext/pybind11/tests/test_sequences_and_iterators.cpp
+++ b/ext/pybind11/tests/test_sequences_and_iterators.cpp
@@ -8,44 +8,52 @@
     BSD-style license that can be found in the LICENSE file.
 */
 
-#include "pybind11_tests.h"
-#include "constructor_stats.h"
 #include <pybind11/operators.h>
 #include <pybind11/stl.h>
 
+#include "constructor_stats.h"
+#include "pybind11_tests.h"
+
 #include <algorithm>
 #include <utility>
 #include <vector>
 
 #ifdef PYBIND11_HAS_OPTIONAL
-#include <optional>
-#endif  // PYBIND11_HAS_OPTIONAL
+#    include <optional>
+#endif // PYBIND11_HAS_OPTIONAL
 
-
-template<typename T>
+template <typename T>
 class NonZeroIterator {
-    const T* ptr_;
+    const T *ptr_;
+
 public:
     explicit NonZeroIterator(const T *ptr) : ptr_(ptr) {}
-    const T& operator*() const { return *ptr_; }
-    NonZeroIterator& operator++() { ++ptr_; return *this; }
+    const T &operator*() const { return *ptr_; }
+    NonZeroIterator &operator++() {
+        ++ptr_;
+        return *this;
+    }
 };
 
 class NonZeroSentinel {};
 
-template<typename A, typename B>
-bool operator==(const NonZeroIterator<std::pair<A, B>>& it, const NonZeroSentinel&) {
+template <typename A, typename B>
+bool operator==(const NonZeroIterator<std::pair<A, B>> &it, const NonZeroSentinel &) {
     return !(*it).first || !(*it).second;
 }
 
 /* Iterator where dereferencing returns prvalues instead of references. */
-template<typename T>
+template <typename T>
 class NonRefIterator {
-    const T* ptr_;
+    const T *ptr_;
+
 public:
     explicit NonRefIterator(const T *ptr) : ptr_(ptr) {}
     T operator*() const { return T(*ptr_); }
-    NonRefIterator& operator++() { ++ptr_; return *this; }
+    NonRefIterator &operator++() {
+        ++ptr_;
+        return *this;
+    }
     bool operator==(const NonRefIterator &other) const { return ptr_ == other.ptr_; }
 };
 
@@ -54,17 +62,18 @@ public:
     explicit NonCopyableInt(int value) : value_(value) {}
     NonCopyableInt(const NonCopyableInt &) = delete;
     NonCopyableInt(NonCopyableInt &&other) noexcept : value_(other.value_) {
-        other.value_ = -1;  // detect when an unwanted move occurs
+        other.value_ = -1; // detect when an unwanted move occurs
     }
     NonCopyableInt &operator=(const NonCopyableInt &) = delete;
     NonCopyableInt &operator=(NonCopyableInt &&other) noexcept {
         value_ = other.value_;
-        other.value_ = -1;  // detect when an unwanted move occurs
+        other.value_ = -1; // detect when an unwanted move occurs
         return *this;
     }
     int get() const { return value_; }
     void set(int value) { value_ = value; }
     ~NonCopyableInt() = default;
+
 private:
     int value_;
 };
@@ -74,13 +83,16 @@ PYBIND11_MAKE_OPAQUE(std::vector<NonCopyableIntPair>);
 
 template <typename PythonType>
 py::list test_random_access_iterator(PythonType x) {
-    if (x.size() < 5)
+    if (x.size() < 5) {
         throw py::value_error("Please provide at least 5 elements for testing.");
+    }
 
     auto checks = py::list();
     auto assert_equal = [&checks](py::handle a, py::handle b) {
         auto result = PyObject_RichCompareBool(a.ptr(), b.ptr(), Py_EQ);
-        if (result == -1) { throw py::error_already_set(); }
+        if (result == -1) {
+            throw py::error_already_set();
+        }
         checks.append(result != 0);
     };
 
@@ -115,7 +127,7 @@ py::list test_random_access_iterator(PythonType x) {
 
 TEST_SUBMODULE(sequences_and_iterators, m) {
     // test_sliceable
-    class Sliceable{
+    class Sliceable {
     public:
         explicit Sliceable(int n) : size(n) {}
         int start, stop, step;
@@ -125,21 +137,24 @@ TEST_SUBMODULE(sequences_and_iterators, m) {
         .def(py::init<int>())
         .def("__getitem__", [](const Sliceable &s, const py::slice &slice) {
             py::ssize_t start = 0, stop = 0, step = 0, slicelength = 0;
-            if (!slice.compute(s.size, &start, &stop, &step, &slicelength))
+            if (!slice.compute(s.size, &start, &stop, &step, &slicelength)) {
                 throw py::error_already_set();
+            }
             int istart = static_cast<int>(start);
-            int istop  = static_cast<int>(stop);
-            int istep  = static_cast<int>(step);
+            int istop = static_cast<int>(stop);
+            int istep = static_cast<int>(step);
             return std::make_tuple(istart, istop, istep);
         });
 
     m.def("make_forward_slice_size_t", []() { return py::slice(0, -1, 1); });
-    m.def("make_reversed_slice_object", []() { return py::slice(py::none(), py::none(), py::int_(-1)); });
+    m.def("make_reversed_slice_object",
+          []() { return py::slice(py::none(), py::none(), py::int_(-1)); });
 #ifdef PYBIND11_HAS_OPTIONAL
     m.attr("has_optional") = true;
-    m.def("make_reversed_slice_size_t_optional_verbose", []() { return py::slice(std::nullopt, std::nullopt, -1); });
-    // Warning: The following spelling may still compile if optional<> is not present and give wrong answers.
-    // Please use with caution.
+    m.def("make_reversed_slice_size_t_optional_verbose",
+          []() { return py::slice(std::nullopt, std::nullopt, -1); });
+    // Warning: The following spelling may still compile if optional<> is not present and give
+    // wrong answers. Please use with caution.
     m.def("make_reversed_slice_size_t_optional", []() { return py::slice({}, {}, -1); });
 #else
     m.attr("has_optional") = false;
@@ -150,18 +165,21 @@ TEST_SUBMODULE(sequences_and_iterators, m) {
     public:
         explicit Sequence(size_t size) : m_size(size) {
             print_created(this, "of size", m_size);
+            // NOLINTNEXTLINE(cppcoreguidelines-prefer-member-initializer)
             m_data = new float[size];
             memset(m_data, 0, sizeof(float) * size);
         }
         explicit Sequence(const std::vector<float> &value) : m_size(value.size()) {
             print_created(this, "of size", m_size, "from std::vector");
+            // NOLINTNEXTLINE(cppcoreguidelines-prefer-member-initializer)
             m_data = new float[m_size];
             memcpy(m_data, &value[0], sizeof(float) * m_size);
         }
         Sequence(const Sequence &s) : m_size(s.m_size) {
             print_copy_created(this);
+            // NOLINTNEXTLINE(cppcoreguidelines-prefer-member-initializer)
             m_data = new float[m_size];
-            memcpy(m_data, s.m_data, sizeof(float)*m_size);
+            memcpy(m_data, s.m_data, sizeof(float) * m_size);
         }
         Sequence(Sequence &&s) noexcept : m_size(s.m_size), m_data(s.m_data) {
             print_move_created(this);
@@ -169,14 +187,17 @@ TEST_SUBMODULE(sequences_and_iterators, m) {
             s.m_data = nullptr;
         }
 
-        ~Sequence() { print_destroyed(this); delete[] m_data; }
+        ~Sequence() {
+            print_destroyed(this);
+            delete[] m_data;
+        }
 
         Sequence &operator=(const Sequence &s) {
             if (&s != this) {
                 delete[] m_data;
                 m_size = s.m_size;
                 m_data = new float[m_size];
-                memcpy(m_data, s.m_data, sizeof(float)*m_size);
+                memcpy(m_data, s.m_data, sizeof(float) * m_size);
             }
             print_copy_assigned(this);
             return *this;
@@ -195,10 +216,14 @@ TEST_SUBMODULE(sequences_and_iterators, m) {
         }
 
         bool operator==(const Sequence &s) const {
-            if (m_size != s.size()) return false;
-            for (size_t i = 0; i < m_size; ++i)
-                if (m_data[i] != s[i])
+            if (m_size != s.size()) {
+                return false;
+            }
+            for (size_t i = 0; i < m_size; ++i) {
+                if (m_data[i] != s[i]) {
                     return false;
+                }
+            }
             return true;
         }
         bool operator!=(const Sequence &s) const { return !operator==(s); }
@@ -207,23 +232,26 @@ TEST_SUBMODULE(sequences_and_iterators, m) {
         float &operator[](size_t index) { return m_data[index]; }
 
         bool contains(float v) const {
-            for (size_t i = 0; i < m_size; ++i)
-                if (v == m_data[i])
+            for (size_t i = 0; i < m_size; ++i) {
+                if (v == m_data[i]) {
                     return true;
+                }
+            }
             return false;
         }
 
         Sequence reversed() const {
             Sequence result(m_size);
-            for (size_t i = 0; i < m_size; ++i)
+            for (size_t i = 0; i < m_size; ++i) {
                 result[m_size - i - 1] = m_data[i];
+            }
             return result;
         }
 
         size_t size() const { return m_size; }
 
         const float *begin() const { return m_data; }
-        const float *end() const { return m_data+m_size; }
+        const float *end() const { return m_data + m_size; }
 
     private:
         size_t m_size;
@@ -235,14 +263,16 @@ TEST_SUBMODULE(sequences_and_iterators, m) {
         /// Bare bones interface
         .def("__getitem__",
              [](const Sequence &s, size_t i) {
-                 if (i >= s.size())
+                 if (i >= s.size()) {
                      throw py::index_error();
+                 }
                  return s[i];
              })
         .def("__setitem__",
              [](Sequence &s, size_t i, float v) {
-                 if (i >= s.size())
+                 if (i >= s.size()) {
                      throw py::index_error();
+                 }
                  s[i] = v;
              })
         .def("__len__", &Sequence::size)
@@ -257,8 +287,9 @@ TEST_SUBMODULE(sequences_and_iterators, m) {
         .def("__getitem__",
              [](const Sequence &s, const py::slice &slice) -> Sequence * {
                  size_t start = 0, stop = 0, step = 0, slicelength = 0;
-                 if (!slice.compute(s.size(), &start, &stop, &step, &slicelength))
+                 if (!slice.compute(s.size(), &start, &stop, &step, &slicelength)) {
                      throw py::error_already_set();
+                 }
                  auto *seq = new Sequence(slicelength);
                  for (size_t i = 0; i < slicelength; ++i) {
                      (*seq)[i] = s[start];
@@ -269,11 +300,13 @@ TEST_SUBMODULE(sequences_and_iterators, m) {
         .def("__setitem__",
              [](Sequence &s, const py::slice &slice, const Sequence &value) {
                  size_t start = 0, stop = 0, step = 0, slicelength = 0;
-                 if (!slice.compute(s.size(), &start, &stop, &step, &slicelength))
+                 if (!slice.compute(s.size(), &start, &stop, &step, &slicelength)) {
                      throw py::error_already_set();
-                 if (slicelength != value.size())
+                 }
+                 if (slicelength != value.size()) {
                      throw std::runtime_error(
                          "Left and right hand size of slice assignment have different sizes!");
+                 }
                  for (size_t i = 0; i < slicelength; ++i) {
                      s[start] = value[i];
                      start += step;
@@ -286,8 +319,8 @@ TEST_SUBMODULE(sequences_and_iterators, m) {
         ;
 
     // test_map_iterator
-    // Interface of a map-like object that isn't (directly) an unordered_map, but provides some basic
-    // map-like functionality.
+    // Interface of a map-like object that isn't (directly) an unordered_map, but provides some
+    // basic map-like functionality.
     class StringMap {
     public:
         StringMap() = default;
@@ -297,8 +330,10 @@ TEST_SUBMODULE(sequences_and_iterators, m) {
         void set(const std::string &key, std::string val) { map[key] = std::move(val); }
         std::string get(const std::string &key) const { return map.at(key); }
         size_t size() const { return map.size(); }
+
     private:
         std::unordered_map<std::string, std::string> map;
+
     public:
         decltype(map.cbegin()) begin() const { return map.cbegin(); }
         decltype(map.cend()) end() const { return map.cend(); }
@@ -333,90 +368,115 @@ TEST_SUBMODULE(sequences_and_iterators, m) {
     class IntPairs {
     public:
         explicit IntPairs(std::vector<std::pair<int, int>> data) : data_(std::move(data)) {}
-        const std::pair<int, int>* begin() const { return data_.data(); }
+        const std::pair<int, int> *begin() const { return data_.data(); }
         // .end() only required for py::make_iterator(self) overload
-        const std::pair<int, int>* end() const { return data_.data() + data_.size(); }
+        const std::pair<int, int> *end() const { return data_.data() + data_.size(); }
+
     private:
         std::vector<std::pair<int, int>> data_;
     };
     py::class_<IntPairs>(m, "IntPairs")
         .def(py::init<std::vector<std::pair<int, int>>>())
-        .def("nonzero", [](const IntPairs& s) {
-            return py::make_iterator(NonZeroIterator<std::pair<int, int>>(s.begin()), NonZeroSentinel());
-        }, py::keep_alive<0, 1>())
-        .def("nonzero_keys", [](const IntPairs& s) {
-            return py::make_key_iterator(NonZeroIterator<std::pair<int, int>>(s.begin()), NonZeroSentinel());
-        }, py::keep_alive<0, 1>())
-        .def("nonzero_values", [](const IntPairs& s) {
-            return py::make_value_iterator(NonZeroIterator<std::pair<int, int>>(s.begin()), NonZeroSentinel());
-        }, py::keep_alive<0, 1>())
+        .def(
+            "nonzero",
+            [](const IntPairs &s) {
+                return py::make_iterator(NonZeroIterator<std::pair<int, int>>(s.begin()),
+                                         NonZeroSentinel());
+            },
+            py::keep_alive<0, 1>())
+        .def(
+            "nonzero_keys",
+            [](const IntPairs &s) {
+                return py::make_key_iterator(NonZeroIterator<std::pair<int, int>>(s.begin()),
+                                             NonZeroSentinel());
+            },
+            py::keep_alive<0, 1>())
+        .def(
+            "nonzero_values",
+            [](const IntPairs &s) {
+                return py::make_value_iterator(NonZeroIterator<std::pair<int, int>>(s.begin()),
+                                               NonZeroSentinel());
+            },
+            py::keep_alive<0, 1>())
 
         // test iterator that returns values instead of references
-        .def("nonref", [](const IntPairs& s) {
-             return py::make_iterator(NonRefIterator<std::pair<int, int>>(s.begin()),
-                                      NonRefIterator<std::pair<int, int>>(s.end()));
-        }, py::keep_alive<0, 1>())
-        .def("nonref_keys", [](const IntPairs& s) {
-             return py::make_key_iterator(NonRefIterator<std::pair<int, int>>(s.begin()),
-                                          NonRefIterator<std::pair<int, int>>(s.end()));
-        }, py::keep_alive<0, 1>())
-        .def("nonref_values", [](const IntPairs& s) {
-             return py::make_value_iterator(NonRefIterator<std::pair<int, int>>(s.begin()),
-                                            NonRefIterator<std::pair<int, int>>(s.end()));
-        }, py::keep_alive<0, 1>())
+        .def(
+            "nonref",
+            [](const IntPairs &s) {
+                return py::make_iterator(NonRefIterator<std::pair<int, int>>(s.begin()),
+                                         NonRefIterator<std::pair<int, int>>(s.end()));
+            },
+            py::keep_alive<0, 1>())
+        .def(
+            "nonref_keys",
+            [](const IntPairs &s) {
+                return py::make_key_iterator(NonRefIterator<std::pair<int, int>>(s.begin()),
+                                             NonRefIterator<std::pair<int, int>>(s.end()));
+            },
+            py::keep_alive<0, 1>())
+        .def(
+            "nonref_values",
+            [](const IntPairs &s) {
+                return py::make_value_iterator(NonRefIterator<std::pair<int, int>>(s.begin()),
+                                               NonRefIterator<std::pair<int, int>>(s.end()));
+            },
+            py::keep_alive<0, 1>())
 
         // test single-argument make_iterator
-        .def("simple_iterator", [](IntPairs& self) {
-            return py::make_iterator(self);
-        }, py::keep_alive<0, 1>())
-        .def("simple_keys", [](IntPairs& self) {
-            return py::make_key_iterator(self);
-        }, py::keep_alive<0, 1>())
-        .def("simple_values", [](IntPairs& self) {
-            return py::make_value_iterator(self);
-        }, py::keep_alive<0, 1>())
+        .def(
+            "simple_iterator",
+            [](IntPairs &self) { return py::make_iterator(self); },
+            py::keep_alive<0, 1>())
+        .def(
+            "simple_keys",
+            [](IntPairs &self) { return py::make_key_iterator(self); },
+            py::keep_alive<0, 1>())
+        .def(
+            "simple_values",
+            [](IntPairs &self) { return py::make_value_iterator(self); },
+            py::keep_alive<0, 1>())
 
         // Test iterator with an Extra (doesn't do anything useful, so not used
         // at runtime, but tests need to be able to compile with the correct
         // overload. See PR #3293.
-        .def("_make_iterator_extras", [](IntPairs& self) {
-            return py::make_iterator(self, py::call_guard<int>());
-        }, py::keep_alive<0, 1>())
-        .def("_make_key_extras", [](IntPairs& self) {
-            return py::make_key_iterator(self, py::call_guard<int>());
-        }, py::keep_alive<0, 1>())
-        .def("_make_value_extras", [](IntPairs& self) {
-            return py::make_value_iterator(self, py::call_guard<int>());
-        }, py::keep_alive<0, 1>())
-        ;
+        .def(
+            "_make_iterator_extras",
+            [](IntPairs &self) { return py::make_iterator(self, py::call_guard<int>()); },
+            py::keep_alive<0, 1>())
+        .def(
+            "_make_key_extras",
+            [](IntPairs &self) { return py::make_key_iterator(self, py::call_guard<int>()); },
+            py::keep_alive<0, 1>())
+        .def(
+            "_make_value_extras",
+            [](IntPairs &self) { return py::make_value_iterator(self, py::call_guard<int>()); },
+            py::keep_alive<0, 1>());
 
-    // test_iterater_referencing
+    // test_iterator_referencing
     py::class_<NonCopyableInt>(m, "NonCopyableInt")
         .def(py::init<int>())
         .def("set", &NonCopyableInt::set)
-        .def("__int__", &NonCopyableInt::get)
-        ;
+        .def("__int__", &NonCopyableInt::get);
     py::class_<std::vector<NonCopyableInt>>(m, "VectorNonCopyableInt")
         .def(py::init<>())
-        .def("append", [](std::vector<NonCopyableInt> &vec, int value) {
-            vec.emplace_back(value);
-        })
+        .def("append",
+             [](std::vector<NonCopyableInt> &vec, int value) { vec.emplace_back(value); })
         .def("__iter__", [](std::vector<NonCopyableInt> &vec) {
             return py::make_iterator(vec.begin(), vec.end());
-        })
-        ;
+        });
     py::class_<std::vector<NonCopyableIntPair>>(m, "VectorNonCopyableIntPair")
         .def(py::init<>())
-        .def("append", [](std::vector<NonCopyableIntPair> &vec, const std::pair<int, int> &value) {
-            vec.emplace_back(NonCopyableInt(value.first), NonCopyableInt(value.second));
-        })
-        .def("keys", [](std::vector<NonCopyableIntPair> &vec) {
-            return py::make_key_iterator(vec.begin(), vec.end());
-        })
+        .def("append",
+             [](std::vector<NonCopyableIntPair> &vec, const std::pair<int, int> &value) {
+                 vec.emplace_back(NonCopyableInt(value.first), NonCopyableInt(value.second));
+             })
+        .def("keys",
+             [](std::vector<NonCopyableIntPair> &vec) {
+                 return py::make_key_iterator(vec.begin(), vec.end());
+             })
         .def("values", [](std::vector<NonCopyableIntPair> &vec) {
             return py::make_value_iterator(vec.begin(), vec.end());
-        })
-        ;
+        });
 
 #if 0
     // Obsolete: special data structure for exposing custom iterator types to python
@@ -494,7 +554,28 @@ TEST_SUBMODULE(sequences_and_iterators, m) {
 
     // test_iterator_rvp
     // #388: Can't make iterators via make_iterator() with different r/v policies
-    static std::vector<int> list = { 1, 2, 3 };
-    m.def("make_iterator_1", []() { return py::make_iterator<py::return_value_policy::copy>(list); });
-    m.def("make_iterator_2", []() { return py::make_iterator<py::return_value_policy::automatic>(list); });
+    static std::vector<int> list = {1, 2, 3};
+    m.def("make_iterator_1",
+          []() { return py::make_iterator<py::return_value_policy::copy>(list); });
+    m.def("make_iterator_2",
+          []() { return py::make_iterator<py::return_value_policy::automatic>(list); });
+
+    // test_iterator on c arrays
+    // #4100: ensure lvalue required as increment operand
+    class CArrayHolder {
+    public:
+        CArrayHolder(double x, double y, double z) {
+            values[0] = x;
+            values[1] = y;
+            values[2] = z;
+        };
+        double values[3];
+    };
+
+    py::class_<CArrayHolder>(m, "CArrayHolder")
+        .def(py::init<double, double, double>())
+        .def(
+            "__iter__",
+            [](const CArrayHolder &v) { return py::make_iterator(v.values, v.values + 3); },
+            py::keep_alive<0, 1>());
 }
diff --git a/ext/pybind11/tests/test_sequences_and_iterators.py b/ext/pybind11/tests/test_sequences_and_iterators.py
index 6985918a11..de486e3e8e 100644
--- a/ext/pybind11/tests/test_sequences_and_iterators.py
+++ b/ext/pybind11/tests/test_sequences_and_iterators.py
@@ -1,21 +1,10 @@
-# -*- coding: utf-8 -*-
 import pytest
+from pytest import approx
 
 from pybind11_tests import ConstructorStats
 from pybind11_tests import sequences_and_iterators as m
 
 
-def isclose(a, b, rel_tol=1e-05, abs_tol=0.0):
-    """Like math.isclose() from Python 3.5"""
-    return abs(a - b) <= max(rel_tol * max(abs(a), abs(b)), abs_tol)
-
-
-def allclose(a_list, b_list, rel_tol=1e-05, abs_tol=0.0):
-    return all(
-        isclose(a, b, rel_tol=rel_tol, abs_tol=abs_tol) for a, b in zip(a_list, b_list)
-    )
-
-
 def test_slice_constructors():
     assert m.make_forward_slice_size_t() == slice(0, -1, 1)
     assert m.make_reversed_slice_object() == slice(None, None, -1)
@@ -118,7 +107,8 @@ def test_sequence():
     assert 12.34 not in s
     s[0], s[3] = 12.34, 56.78
     assert 12.34 in s
-    assert isclose(s[0], 12.34) and isclose(s[3], 56.78)
+    assert s[0] == approx(12.34, rel=1e-05)
+    assert s[3] == approx(56.78, rel=1e-05)
 
     rev = reversed(s)
     assert cstats.values() == ["of size", "5"]
@@ -133,14 +123,14 @@ def test_sequence():
     assert cstats.values() == ["of size", "0"]
 
     expected = [0, 56.78, 0, 0, 12.34]
-    assert allclose(rev, expected)
-    assert allclose(rev2, expected)
+    assert rev == approx(expected, rel=1e-05)
+    assert rev2 == approx(expected, rel=1e-05)
     assert rev == rev2
 
     rev[0::2] = m.Sequence([2.0, 2.0, 2.0])
     assert cstats.values() == ["of size", "3", "from std::vector"]
 
-    assert allclose(rev, [2, 56.78, 2, 0, 2])
+    assert rev == approx([2, 56.78, 2, 0, 2], rel=1e-05)
 
     assert cstats.alive() == 4
     del it
@@ -251,3 +241,11 @@ def test_iterator_rvp():
     assert list(m.make_iterator_1()) == [1, 2, 3]
     assert list(m.make_iterator_2()) == [1, 2, 3]
     assert not isinstance(m.make_iterator_1(), type(m.make_iterator_2()))
+
+
+def test_carray_iterator():
+    """#4100: Check for proper iterator overload with C-Arrays"""
+    args_gt = list(float(i) for i in range(3))
+    arr_h = m.CArrayHolder(*args_gt)
+    args = list(arr_h)
+    assert args_gt == args
diff --git a/ext/pybind11/tests/test_smart_ptr.cpp b/ext/pybind11/tests/test_smart_ptr.cpp
index 94f04330a2..6d9efcedce 100644
--- a/ext/pybind11/tests/test_smart_ptr.cpp
+++ b/ext/pybind11/tests/test_smart_ptr.cpp
@@ -8,21 +8,19 @@
     BSD-style license that can be found in the LICENSE file.
 */
 
-#if defined(_MSC_VER) && _MSC_VER < 1910  // VS 2015's MSVC
-#  pragma warning(disable: 4702) // unreachable code in system header (xatomic.h(382))
-#endif
-
-#include "pybind11_tests.h"
 #include "object.h"
+#include "pybind11_tests.h"
 
 namespace {
 
 // This is just a wrapper around unique_ptr, but with extra fields to deliberately bloat up the
-// holder size to trigger the non-simple-layout internal instance layout for single inheritance with
-// large holder type:
-template <typename T> class huge_unique_ptr {
+// holder size to trigger the non-simple-layout internal instance layout for single inheritance
+// with large holder type:
+template <typename T>
+class huge_unique_ptr {
     std::unique_ptr<T> ptr;
     uint64_t padding[10];
+
 public:
     explicit huge_unique_ptr(T *p) : ptr(p) {}
     T *get() { return ptr.get(); }
@@ -32,10 +30,11 @@ public:
 template <typename T>
 class custom_unique_ptr {
     std::unique_ptr<T> impl;
+
 public:
     explicit custom_unique_ptr(T *p) : impl(p) {}
-    T* get() const { return impl.get(); }
-    T* release_ptr() { return impl.release(); }
+    T *get() const { return impl.get(); }
+    T *release_ptr() { return impl.release(); }
 };
 
 // Simple custom holder that works like shared_ptr and has operator& overload
@@ -44,11 +43,12 @@ public:
 template <typename T>
 class shared_ptr_with_addressof_operator {
     std::shared_ptr<T> impl;
+
 public:
-    shared_ptr_with_addressof_operator( ) = default;
+    shared_ptr_with_addressof_operator() = default;
     explicit shared_ptr_with_addressof_operator(T *p) : impl(p) {}
-    T* get() const { return impl.get(); }
-    T** operator&() { throw std::logic_error("Call of overloaded operator& is not expected"); }
+    T *get() const { return impl.get(); }
+    T **operator&() { throw std::logic_error("Call of overloaded operator& is not expected"); }
 };
 
 // Simple custom holder that works like unique_ptr and has operator& overload
@@ -57,12 +57,13 @@ public:
 template <typename T>
 class unique_ptr_with_addressof_operator {
     std::unique_ptr<T> impl;
+
 public:
     unique_ptr_with_addressof_operator() = default;
     explicit unique_ptr_with_addressof_operator(T *p) : impl(p) {}
-    T* get() const { return impl.get(); }
-    T* release_ptr() { return impl.release(); }
-    T** operator&() { throw std::logic_error("Call of overloaded operator& is not expected"); }
+    T *get() const { return impl.get(); }
+    T *release_ptr() { return impl.release(); }
+    T **operator&() { throw std::logic_error("Call of overloaded operator& is not expected"); }
 };
 
 // Custom object with builtin reference counting (see 'object.h' for the implementation)
@@ -70,8 +71,10 @@ class MyObject1 : public Object {
 public:
     explicit MyObject1(int value) : value(value) { print_created(this, toString()); }
     std::string toString() const override { return "MyObject1[" + std::to_string(value) + "]"; }
+
 protected:
     ~MyObject1() override { print_destroyed(this); }
+
 private:
     int value;
 };
@@ -83,6 +86,7 @@ public:
     explicit MyObject2(int value) : value(value) { print_created(this, toString()); }
     std::string toString() const { return "MyObject2[" + std::to_string(value) + "]"; }
     virtual ~MyObject2() { print_destroyed(this); }
+
 private:
     int value;
 };
@@ -94,6 +98,7 @@ public:
     explicit MyObject3(int value) : value(value) { print_created(this, toString()); }
     std::string toString() const { return "MyObject3[" + std::to_string(value) + "]"; }
     virtual ~MyObject3() { print_destroyed(this); }
+
 private:
     int value;
 };
@@ -113,9 +118,11 @@ public:
     static void cleanupAllInstances() {
         auto tmp = std::move(myobject4_instances);
         myobject4_instances.clear();
-        for (auto o : tmp)
+        for (auto *o : tmp) {
             delete o;
+        }
     }
+
 private:
     ~MyObject4() {
         myobject4_instances.erase(this);
@@ -130,8 +137,7 @@ class MyObject4a;
 std::unordered_set<MyObject4a *> myobject4a_instances;
 class MyObject4a {
 public:
-    explicit MyObject4a(int i) {
-        value = i;
+    explicit MyObject4a(int i) : value{i} {
         print_created(this);
         myobject4a_instances.insert(this);
     };
@@ -140,9 +146,11 @@ public:
     static void cleanupAllInstances() {
         auto tmp = std::move(myobject4a_instances);
         myobject4a_instances.clear();
-        for (auto o : tmp)
+        for (auto *o : tmp) {
             delete o;
+        }
     }
+
 protected:
     virtual ~MyObject4a() {
         myobject4a_instances.erase(this);
@@ -231,14 +239,14 @@ struct TypeForMoveOnlyHolderWithAddressOf {
 };
 
 // test_smart_ptr_from_default
-struct HeldByDefaultHolder { };
+struct HeldByDefaultHolder {};
 
 // test_shared_ptr_gc
 // #187: issue involving std::shared_ptr<> return value policy & garbage collection
 struct ElementBase {
     virtual ~ElementBase() = default; /* Force creation of virtual table */
     ElementBase() = default;
-    ElementBase(const ElementBase&) = delete;
+    ElementBase(const ElementBase &) = delete;
 };
 
 struct ElementA : ElementBase {
@@ -258,13 +266,14 @@ struct ElementList {
 // It is always possible to construct a ref<T> from an Object* pointer without
 // possible inconsistencies, hence the 'true' argument at the end.
 // Make pybind11 aware of the non-standard getter member function
-namespace pybind11 { namespace detail {
-    template <typename T>
-    struct holder_helper<ref<T>> {
-        static const T *get(const ref<T> &p) { return p.get_ptr(); }
-    };
+namespace PYBIND11_NAMESPACE {
+namespace detail {
+template <typename T>
+struct holder_helper<ref<T>> {
+    static const T *get(const ref<T> &p) { return p.get_ptr(); }
+};
 } // namespace detail
-} // namespace pybind11
+} // namespace PYBIND11_NAMESPACE
 
 // Make pybind aware of the ref-counted wrapper type (s):
 PYBIND11_DECLARE_HOLDER_TYPE(T, ref<T>, true);
@@ -286,8 +295,7 @@ TEST_SUBMODULE(smart_ptr, m) {
     py::class_<Object, ref<Object>> obj(m, "Object");
     obj.def("getRefCount", &Object::getRefCount);
 
-    py::class_<MyObject1, ref<MyObject1>>(m, "MyObject1", obj)
-        .def(py::init<int>());
+    py::class_<MyObject1, ref<MyObject1>>(m, "MyObject1", obj).def(py::init<int>());
     py::implicitly_convertible<py::int_, MyObject1>();
 
     m.def("make_object_1", []() -> Object * { return new MyObject1(1); });
@@ -306,25 +314,27 @@ TEST_SUBMODULE(smart_ptr, m) {
     // Expose constructor stats for the ref type
     m.def("cstats_ref", &ConstructorStats::get<ref_tag>);
 
-    py::class_<MyObject2, std::shared_ptr<MyObject2>>(m, "MyObject2")
-        .def(py::init<int>());
+    py::class_<MyObject2, std::shared_ptr<MyObject2>>(m, "MyObject2").def(py::init<int>());
     m.def("make_myobject2_1", []() { return new MyObject2(6); });
     m.def("make_myobject2_2", []() { return std::make_shared<MyObject2>(7); });
     m.def("print_myobject2_1", [](const MyObject2 *obj) { py::print(obj->toString()); });
     // NOLINTNEXTLINE(performance-unnecessary-value-param)
     m.def("print_myobject2_2", [](std::shared_ptr<MyObject2> obj) { py::print(obj->toString()); });
-    m.def("print_myobject2_3", [](const std::shared_ptr<MyObject2> &obj) { py::print(obj->toString()); });
-    m.def("print_myobject2_4", [](const std::shared_ptr<MyObject2> *obj) { py::print((*obj)->toString()); });
+    m.def("print_myobject2_3",
+          [](const std::shared_ptr<MyObject2> &obj) { py::print(obj->toString()); });
+    m.def("print_myobject2_4",
+          [](const std::shared_ptr<MyObject2> *obj) { py::print((*obj)->toString()); });
 
-    py::class_<MyObject3, std::shared_ptr<MyObject3>>(m, "MyObject3")
-        .def(py::init<int>());
+    py::class_<MyObject3, std::shared_ptr<MyObject3>>(m, "MyObject3").def(py::init<int>());
     m.def("make_myobject3_1", []() { return new MyObject3(8); });
     m.def("make_myobject3_2", []() { return std::make_shared<MyObject3>(9); });
     m.def("print_myobject3_1", [](const MyObject3 *obj) { py::print(obj->toString()); });
     // NOLINTNEXTLINE(performance-unnecessary-value-param)
     m.def("print_myobject3_2", [](std::shared_ptr<MyObject3> obj) { py::print(obj->toString()); });
-    m.def("print_myobject3_3", [](const std::shared_ptr<MyObject3> &obj) { py::print(obj->toString()); });
-    m.def("print_myobject3_4", [](const std::shared_ptr<MyObject3> *obj) { py::print((*obj)->toString()); });
+    m.def("print_myobject3_3",
+          [](const std::shared_ptr<MyObject3> &obj) { py::print(obj->toString()); });
+    m.def("print_myobject3_4",
+          [](const std::shared_ptr<MyObject3> *obj) { py::print((*obj)->toString()); });
 
     // test_smart_ptr_refcounting
     m.def("test_object1_refcounting", []() {
@@ -420,11 +430,18 @@ TEST_SUBMODULE(smart_ptr, m) {
              [](const HolderWithAddressOf *obj) { py::print((*obj).get()->toString()); });
 
     // test_move_only_holder_with_addressof_operator
-    using MoveOnlyHolderWithAddressOf = unique_ptr_with_addressof_operator<TypeForMoveOnlyHolderWithAddressOf>;
-    py::class_<TypeForMoveOnlyHolderWithAddressOf, MoveOnlyHolderWithAddressOf>(m, "TypeForMoveOnlyHolderWithAddressOf")
-        .def_static("make", []() { return MoveOnlyHolderWithAddressOf(new TypeForMoveOnlyHolderWithAddressOf(0)); })
+    using MoveOnlyHolderWithAddressOf
+        = unique_ptr_with_addressof_operator<TypeForMoveOnlyHolderWithAddressOf>;
+    py::class_<TypeForMoveOnlyHolderWithAddressOf, MoveOnlyHolderWithAddressOf>(
+        m, "TypeForMoveOnlyHolderWithAddressOf")
+        .def_static("make",
+                    []() {
+                        return MoveOnlyHolderWithAddressOf(
+                            new TypeForMoveOnlyHolderWithAddressOf(0));
+                    })
         .def_readwrite("value", &TypeForMoveOnlyHolderWithAddressOf::value)
-        .def("print_object", [](const TypeForMoveOnlyHolderWithAddressOf *obj) { py::print(obj->toString()); });
+        .def("print_object",
+             [](const TypeForMoveOnlyHolderWithAddressOf *obj) { py::print(obj->toString()); });
 
     // test_smart_ptr_from_default
     py::class_<HeldByDefaultHolder, std::unique_ptr<HeldByDefaultHolder>>(m, "HeldByDefaultHolder")
@@ -445,8 +462,9 @@ TEST_SUBMODULE(smart_ptr, m) {
         .def("add", &ElementList::add)
         .def("get", [](ElementList &el) {
             py::list list;
-            for (auto &e : el.l)
+            for (auto &e : el.l) {
                 list.append(py::cast(e));
+            }
             return list;
         });
 }
diff --git a/ext/pybind11/tests/test_smart_ptr.py b/ext/pybind11/tests/test_smart_ptr.py
index 85f61a3223..2f204e01b1 100644
--- a/ext/pybind11/tests/test_smart_ptr.py
+++ b/ext/pybind11/tests/test_smart_ptr.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 import pytest
 
 m = pytest.importorskip("pybind11_tests.smart_ptr")
@@ -16,7 +15,7 @@ def test_smart_ptr(capture):
             m.print_object_2(o)
             m.print_object_3(o)
             m.print_object_4(o)
-        assert capture == "MyObject1[{i}]\n".format(i=i) * 4
+        assert capture == f"MyObject1[{i}]\n" * 4
 
     for i, o in enumerate(
         [m.make_myobject1_1(), m.make_myobject1_2(), m.MyObject1(6), 7], start=4
@@ -34,13 +33,11 @@ def test_smart_ptr(capture):
             m.print_myobject1_4(o)
 
         times = 4 if isinstance(o, int) else 8
-        assert capture == "MyObject1[{i}]\n".format(i=i) * times
+        assert capture == f"MyObject1[{i}]\n" * times
 
     cstats = ConstructorStats.get(m.MyObject1)
     assert cstats.alive() == 0
-    expected_values = ["MyObject1[{}]".format(i) for i in range(1, 7)] + [
-        "MyObject1[7]"
-    ] * 4
+    expected_values = [f"MyObject1[{i}]" for i in range(1, 7)] + ["MyObject1[7]"] * 4
     assert cstats.values() == expected_values
     assert cstats.default_constructions == 0
     assert cstats.copy_constructions == 0
@@ -58,7 +55,7 @@ def test_smart_ptr(capture):
             m.print_myobject2_2(o)
             m.print_myobject2_3(o)
             m.print_myobject2_4(o)
-        assert capture == "MyObject2[{i}]\n".format(i=i) * 4
+        assert capture == f"MyObject2[{i}]\n" * 4
 
     cstats = ConstructorStats.get(m.MyObject2)
     assert cstats.alive() == 1
@@ -81,7 +78,7 @@ def test_smart_ptr(capture):
             m.print_myobject3_2(o)
             m.print_myobject3_3(o)
             m.print_myobject3_4(o)
-        assert capture == "MyObject3[{i}]\n".format(i=i) * 4
+        assert capture == f"MyObject3[{i}]\n" * 4
 
     cstats = ConstructorStats.get(m.MyObject3)
     assert cstats.alive() == 1
diff --git a/ext/pybind11/tests/test_stl.cpp b/ext/pybind11/tests/test_stl.cpp
index bc5c6553a2..d45465d681 100644
--- a/ext/pybind11/tests/test_stl.cpp
+++ b/ext/pybind11/tests/test_stl.cpp
@@ -7,39 +7,44 @@
     BSD-style license that can be found in the LICENSE file.
 */
 
-#include "pybind11_tests.h"
-#include "constructor_stats.h"
 #include <pybind11/stl.h>
 
+#include "constructor_stats.h"
+#include "pybind11_tests.h"
+
 #ifndef PYBIND11_HAS_FILESYSTEM_IS_OPTIONAL
-#define PYBIND11_HAS_FILESYSTEM_IS_OPTIONAL
+#    define PYBIND11_HAS_FILESYSTEM_IS_OPTIONAL
 #endif
 #include <pybind11/stl/filesystem.h>
 
-#include <vector>
 #include <string>
+#include <vector>
 
 #if defined(PYBIND11_TEST_BOOST)
-#include <boost/optional.hpp>
+#    include <boost/optional.hpp>
 
-namespace pybind11 { namespace detail {
+namespace PYBIND11_NAMESPACE {
+namespace detail {
 template <typename T>
 struct type_caster<boost::optional<T>> : optional_caster<boost::optional<T>> {};
 
 template <>
 struct type_caster<boost::none_t> : void_caster<boost::none_t> {};
-}} // namespace pybind11::detail
+} // namespace detail
+} // namespace PYBIND11_NAMESPACE
 #endif
 
 // Test with `std::variant` in C++17 mode, or with `boost::variant` in C++11/14
 #if defined(PYBIND11_HAS_VARIANT)
 using std::variant;
-#elif defined(PYBIND11_TEST_BOOST) && (!defined(_MSC_VER) || _MSC_VER >= 1910)
-#  include <boost/variant.hpp>
-#  define PYBIND11_HAS_VARIANT 1
+#    define PYBIND11_TEST_VARIANT 1
+#elif defined(PYBIND11_TEST_BOOST)
+#    include <boost/variant.hpp>
+#    define PYBIND11_TEST_VARIANT 1
 using boost::variant;
 
-namespace pybind11 { namespace detail {
+namespace PYBIND11_NAMESPACE {
+namespace detail {
 template <typename... Ts>
 struct type_caster<boost::variant<Ts...>> : variant_caster<boost::variant<Ts...>> {};
 
@@ -50,7 +55,8 @@ struct visit_helper<boost::variant> {
         return boost::apply_visitor(args...);
     }
 };
-}} // namespace pybind11::detail
+} // namespace detail
+} // namespace PYBIND11_NAMESPACE
 #endif
 
 PYBIND11_MAKE_OPAQUE(std::vector<std::string, std::allocator<std::string>>);
@@ -63,26 +69,23 @@ struct TplCtorClass {
 };
 
 namespace std {
-    template <>
-    struct hash<TplCtorClass> { size_t operator()(const TplCtorClass &) const { return 0; } };
+template <>
+struct hash<TplCtorClass> {
+    size_t operator()(const TplCtorClass &) const { return 0; }
+};
 } // namespace std
 
-
 template <template <typename> class OptionalImpl, typename T>
-struct OptionalHolder
-{
+struct OptionalHolder {
     // NOLINTNEXTLINE(modernize-use-equals-default): breaks GCC 4.8
-    OptionalHolder() {};
-    bool member_initialized() const {
-        return member && member->initialized;
-    }
+    OptionalHolder(){};
+    bool member_initialized() const { return member && member->initialized; }
     OptionalImpl<T> member = T{};
 };
 
-
 enum class EnumType {
-  kSet = 42,
-  kUnset = 85,
+    kSet = 42,
+    kUnset = 85,
 };
 
 // This is used to test that return-by-ref and return-by-copy policies are
@@ -102,7 +105,7 @@ public:
         value = EnumType::kUnset;
     }
 
-    OptionalEnumValue& access_by_ref() { return value; }
+    OptionalEnumValue &access_by_ref() { return value; }
     OptionalEnumValue access_by_copy() { return value; }
 
 private:
@@ -122,52 +125,47 @@ public:
 
     ReferenceSensitiveOptional() = default;
     // NOLINTNEXTLINE(google-explicit-constructor)
-    ReferenceSensitiveOptional(const T& value) : storage{value} {}
+    ReferenceSensitiveOptional(const T &value) : storage{value} {}
     // NOLINTNEXTLINE(google-explicit-constructor)
-    ReferenceSensitiveOptional(T&& value) : storage{std::move(value)} {}
-    ReferenceSensitiveOptional& operator=(const T& value) {
+    ReferenceSensitiveOptional(T &&value) : storage{std::move(value)} {}
+    ReferenceSensitiveOptional &operator=(const T &value) {
         storage = {value};
         return *this;
     }
-    ReferenceSensitiveOptional& operator=(T&& value) {
+    ReferenceSensitiveOptional &operator=(T &&value) {
         storage = {std::move(value)};
         return *this;
     }
 
     template <typename... Args>
-    T& emplace(Args&&... args) {
+    T &emplace(Args &&...args) {
         storage.clear();
         storage.emplace_back(std::forward<Args>(args)...);
         return storage.back();
     }
 
-    const T& value() const noexcept {
+    const T &value() const noexcept {
         assert(!storage.empty());
         return storage[0];
     }
 
-    const T& operator*() const noexcept {
-        return value();
-    }
+    const T &operator*() const noexcept { return value(); }
 
-    const T* operator->() const noexcept {
-        return &value();
-    }
+    const T *operator->() const noexcept { return &value(); }
 
-    explicit operator bool() const noexcept {
-        return !storage.empty();
-    }
+    explicit operator bool() const noexcept { return !storage.empty(); }
 
 private:
     std::vector<T> storage;
 };
 
-namespace pybind11 { namespace detail {
+namespace PYBIND11_NAMESPACE {
+namespace detail {
 template <typename T>
-struct type_caster<ReferenceSensitiveOptional<T>> : optional_caster<ReferenceSensitiveOptional<T>> {};
+struct type_caster<ReferenceSensitiveOptional<T>>
+    : optional_caster<ReferenceSensitiveOptional<T>> {};
 } // namespace detail
-} // namespace pybind11
-
+} // namespace PYBIND11_NAMESPACE
 
 TEST_SUBMODULE(stl, m) {
     // test_vector
@@ -175,24 +173,29 @@ TEST_SUBMODULE(stl, m) {
     m.def("load_vector", [](const std::vector<int> &v) { return v.at(0) == 1 && v.at(1) == 2; });
     // `std::vector<bool>` is special because it returns proxy objects instead of references
     m.def("cast_bool_vector", []() { return std::vector<bool>{true, false}; });
-    m.def("load_bool_vector", [](const std::vector<bool> &v) {
-        return v.at(0) == true && v.at(1) == false;
-    });
+    m.def("load_bool_vector",
+          [](const std::vector<bool> &v) { return v.at(0) == true && v.at(1) == false; });
     // Unnumbered regression (caused by #936): pointers to stl containers aren't castable
-    static std::vector<RValueCaster> lvv{2};
-    m.def("cast_ptr_vector", []() { return &lvv; });
+    m.def(
+        "cast_ptr_vector",
+        []() {
+            // Using no-destructor idiom to side-step warnings from overzealous compilers.
+            static auto *v = new std::vector<RValueCaster>{2};
+            return v;
+        },
+        py::return_value_policy::reference);
 
     // test_deque
     m.def("cast_deque", []() { return std::deque<int>{1}; });
     m.def("load_deque", [](const std::deque<int> &v) { return v.at(0) == 1 && v.at(1) == 2; });
 
     // test_array
-    m.def("cast_array", []() { return std::array<int, 2> {{1 , 2}}; });
+    m.def("cast_array", []() { return std::array<int, 2>{{1, 2}}; });
     m.def("load_array", [](const std::array<int, 2> &a) { return a[0] == 1 && a[1] == 2; });
 
     // test_valarray
     m.def("cast_valarray", []() { return std::valarray<int>{1, 4, 9}; });
-    m.def("load_valarray", [](const std::valarray<int>& v) {
+    m.def("load_valarray", [](const std::valarray<int> &v) {
         return v.size() == 3 && v[0] == 1 && v[1] == 4 && v[2] == 9;
     });
 
@@ -214,10 +217,12 @@ TEST_SUBMODULE(stl, m) {
     // NB: map and set keys are `const`, so while we technically do move them (as `const Type &&`),
     // casters don't typically do anything with that, which means they fall to the `const Type &`
     // caster.
-    m.def("cast_rv_map", []() { return std::unordered_map<std::string, RValueCaster>{{"a", RValueCaster{}}}; });
+    m.def("cast_rv_map", []() {
+        return std::unordered_map<std::string, RValueCaster>{{"a", RValueCaster{}}};
+    });
     m.def("cast_rv_nested", []() {
         std::vector<std::array<std::list<std::unordered_map<std::string, RValueCaster>>, 2>> v;
-        v.emplace_back(); // add an array
+        v.emplace_back();           // add an array
         v.back()[0].emplace_back(); // add a map to the array
         v.back()[0].back().emplace("b", RValueCaster{});
         v.back()[0].back().emplace("c", RValueCaster{});
@@ -226,15 +231,18 @@ TEST_SUBMODULE(stl, m) {
         return v;
     });
     static std::array<RValueCaster, 2> lva;
-    static std::unordered_map<std::string, RValueCaster> lvm{{"a", RValueCaster{}}, {"b", RValueCaster{}}};
-    static std::unordered_map<std::string, std::vector<std::list<std::array<RValueCaster, 2>>>> lvn;
-    lvn["a"].emplace_back(); // add a list
+    static std::unordered_map<std::string, RValueCaster> lvm{{"a", RValueCaster{}},
+                                                             {"b", RValueCaster{}}};
+    static std::unordered_map<std::string, std::vector<std::list<std::array<RValueCaster, 2>>>>
+        lvn;
+    lvn["a"].emplace_back();        // add a list
     lvn["a"].back().emplace_back(); // add an array
-    lvn["a"].emplace_back(); // another list
+    lvn["a"].emplace_back();        // another list
     lvn["a"].back().emplace_back(); // add an array
-    lvn["b"].emplace_back(); // add a list
+    lvn["b"].emplace_back();        // add a list
     lvn["b"].back().emplace_back(); // add an array
     lvn["b"].back().emplace_back(); // add another array
+    static std::vector<RValueCaster> lvv{2};
     m.def("cast_lv_vector", []() -> const decltype(lvv) & { return lvv; });
     m.def("cast_lv_array", []() -> const decltype(lva) & { return lva; });
     m.def("cast_lv_map", []() -> const decltype(lvm) & { return lvm; });
@@ -253,7 +261,9 @@ TEST_SUBMODULE(stl, m) {
 
     // test_move_out_container
     struct MoveOutContainer {
-        struct Value { int value; };
+        struct Value {
+            int value;
+        };
         std::list<Value> move_list() const { return {{0}, {1}, {2}}; }
     };
     py::class_<MoveOutContainer::Value>(m, "MoveOutContainerValue")
@@ -266,7 +276,7 @@ TEST_SUBMODULE(stl, m) {
     struct NoAssign {
         int value;
 
-        explicit NoAssign(int value = 0) : value(value) { }
+        explicit NoAssign(int value = 0) : value(value) {}
         NoAssign(const NoAssign &) = default;
         NoAssign(NoAssign &&) = default;
 
@@ -277,13 +287,10 @@ TEST_SUBMODULE(stl, m) {
         .def(py::init<>())
         .def(py::init<int>());
 
-
-    struct MoveOutDetector
-    {
+    struct MoveOutDetector {
         MoveOutDetector() = default;
-        MoveOutDetector(const MoveOutDetector&) = default;
-        MoveOutDetector(MoveOutDetector&& other) noexcept
-         : initialized(other.initialized) {
+        MoveOutDetector(const MoveOutDetector &) = default;
+        MoveOutDetector(MoveOutDetector &&other) noexcept : initialized(other.initialized) {
             // steal underlying resource
             other.initialized = false;
         }
@@ -293,23 +300,22 @@ TEST_SUBMODULE(stl, m) {
         .def(py::init<>())
         .def_readonly("initialized", &MoveOutDetector::initialized);
 
-
 #ifdef PYBIND11_HAS_OPTIONAL
     // test_optional
     m.attr("has_optional") = true;
 
     using opt_int = std::optional<int>;
     using opt_no_assign = std::optional<NoAssign>;
-    m.def("double_or_zero", [](const opt_int& x) -> int {
-        return x.value_or(0) * 2;
-    });
+    m.def("double_or_zero", [](const opt_int &x) -> int { return x.value_or(0) * 2; });
     m.def("half_or_none", [](int x) -> opt_int { return x != 0 ? opt_int(x / 2) : opt_int(); });
-    m.def("test_nullopt", [](opt_int x) {
-        return x.value_or(42);
-    }, py::arg_v("x", std::nullopt, "None"));
-    m.def("test_no_assign", [](const opt_no_assign &x) {
-        return x ? x->value : 42;
-    }, py::arg_v("x", std::nullopt, "None"));
+    m.def(
+        "test_nullopt",
+        [](opt_int x) { return x.value_or(42); },
+        py::arg_v("x", std::nullopt, "None"));
+    m.def(
+        "test_no_assign",
+        [](const opt_no_assign &x) { return x ? x->value : 42; },
+        py::arg_v("x", std::nullopt, "None"));
 
     m.def("nodefer_none_optional", [](std::optional<int>) { return true; });
     m.def("nodefer_none_optional", [](const py::none &) { return false; });
@@ -333,18 +339,17 @@ TEST_SUBMODULE(stl, m) {
 
     using exp_opt_int = std::experimental::optional<int>;
     using exp_opt_no_assign = std::experimental::optional<NoAssign>;
-    m.def("double_or_zero_exp", [](const exp_opt_int& x) -> int {
-        return x.value_or(0) * 2;
-    });
-    m.def("half_or_none_exp", [](int x) -> exp_opt_int {
-        return x ? exp_opt_int(x / 2) : exp_opt_int();
-    });
-    m.def("test_nullopt_exp", [](exp_opt_int x) {
-        return x.value_or(42);
-    }, py::arg_v("x", std::experimental::nullopt, "None"));
-    m.def("test_no_assign_exp", [](const exp_opt_no_assign &x) {
-        return x ? x->value : 42;
-    }, py::arg_v("x", std::experimental::nullopt, "None"));
+    m.def("double_or_zero_exp", [](const exp_opt_int &x) -> int { return x.value_or(0) * 2; });
+    m.def("half_or_none_exp",
+          [](int x) -> exp_opt_int { return x ? exp_opt_int(x / 2) : exp_opt_int(); });
+    m.def(
+        "test_nullopt_exp",
+        [](exp_opt_int x) { return x.value_or(42); },
+        py::arg_v("x", std::experimental::nullopt, "None"));
+    m.def(
+        "test_no_assign_exp",
+        [](const exp_opt_no_assign &x) { return x ? x->value : 42; },
+        py::arg_v("x", std::experimental::nullopt, "None"));
 
     using opt_exp_holder = OptionalHolder<std::experimental::optional, MoveOutDetector>;
     py::class_<opt_exp_holder>(m, "OptionalExpHolder", "Class with optional member")
@@ -365,18 +370,17 @@ TEST_SUBMODULE(stl, m) {
 
     using boost_opt_int = boost::optional<int>;
     using boost_opt_no_assign = boost::optional<NoAssign>;
-    m.def("double_or_zero_boost", [](const boost_opt_int& x) -> int {
-        return x.value_or(0) * 2;
-    });
-    m.def("half_or_none_boost", [](int x) -> boost_opt_int {
-        return x != 0 ? boost_opt_int(x / 2) : boost_opt_int();
-    });
-    m.def("test_nullopt_boost", [](boost_opt_int x) {
-        return x.value_or(42);
-    }, py::arg_v("x", boost::none, "None"));
-    m.def("test_no_assign_boost", [](const boost_opt_no_assign &x) {
-        return x ? x->value : 42;
-    }, py::arg_v("x", boost::none, "None"));
+    m.def("double_or_zero_boost", [](const boost_opt_int &x) -> int { return x.value_or(0) * 2; });
+    m.def("half_or_none_boost",
+          [](int x) -> boost_opt_int { return x != 0 ? boost_opt_int(x / 2) : boost_opt_int(); });
+    m.def(
+        "test_nullopt_boost",
+        [](boost_opt_int x) { return x.value_or(42); },
+        py::arg_v("x", boost::none, "None"));
+    m.def(
+        "test_no_assign_boost",
+        [](const boost_opt_no_assign &x) { return x ? x->value : 42; },
+        py::arg_v("x", boost::none, "None"));
 
     using opt_boost_holder = OptionalHolder<boost::optional, MoveOutDetector>;
     py::class_<opt_boost_holder>(m, "OptionalBoostHolder", "Class with optional member")
@@ -394,22 +398,24 @@ TEST_SUBMODULE(stl, m) {
     // test_refsensitive_optional
     using refsensitive_opt_int = ReferenceSensitiveOptional<int>;
     using refsensitive_opt_no_assign = ReferenceSensitiveOptional<NoAssign>;
-    m.def("double_or_zero_refsensitive", [](const refsensitive_opt_int& x) -> int {
-        return (x ? x.value() : 0) * 2;
-    });
+    m.def("double_or_zero_refsensitive",
+          [](const refsensitive_opt_int &x) -> int { return (x ? x.value() : 0) * 2; });
     m.def("half_or_none_refsensitive", [](int x) -> refsensitive_opt_int {
         return x != 0 ? refsensitive_opt_int(x / 2) : refsensitive_opt_int();
     });
-    // NOLINTNEXTLINE(performance-unnecessary-value-param)
-    m.def("test_nullopt_refsensitive", [](refsensitive_opt_int x) {
-        return x ? x.value() : 42;
-    }, py::arg_v("x", refsensitive_opt_int(), "None"));
-    m.def("test_no_assign_refsensitive", [](const refsensitive_opt_no_assign &x) {
-        return x ? x->value : 42;
-    }, py::arg_v("x", refsensitive_opt_no_assign(), "None"));
+    m.def(
+        "test_nullopt_refsensitive",
+        // NOLINTNEXTLINE(performance-unnecessary-value-param)
+        [](refsensitive_opt_int x) { return x ? x.value() : 42; },
+        py::arg_v("x", refsensitive_opt_int(), "None"));
+    m.def(
+        "test_no_assign_refsensitive",
+        [](const refsensitive_opt_no_assign &x) { return x ? x->value : 42; },
+        py::arg_v("x", refsensitive_opt_no_assign(), "None"));
 
     using opt_refsensitive_holder = OptionalHolder<ReferenceSensitiveOptional, MoveOutDetector>;
-    py::class_<opt_refsensitive_holder>(m, "OptionalRefSensitiveHolder", "Class with optional member")
+    py::class_<opt_refsensitive_holder>(
+        m, "OptionalRefSensitiveHolder", "Class with optional member")
         .def(py::init<>())
         .def_readonly("member", &opt_refsensitive_holder::member)
         .def("member_initialized", &opt_refsensitive_holder::member_initialized);
@@ -423,10 +429,10 @@ TEST_SUBMODULE(stl, m) {
 #ifdef PYBIND11_HAS_FILESYSTEM
     // test_fs_path
     m.attr("has_filesystem") = true;
-    m.def("parent_path", [](const std::filesystem::path& p) { return p.parent_path(); });
+    m.def("parent_path", [](const std::filesystem::path &p) { return p.parent_path(); });
 #endif
 
-#ifdef PYBIND11_HAS_VARIANT
+#ifdef PYBIND11_TEST_VARIANT
     static_assert(std::is_same<py::detail::variant_caster_visitor::result_type, py::handle>::value,
                   "visitor::result_type is required by boost::variant in C++11 mode");
 
@@ -437,6 +443,9 @@ TEST_SUBMODULE(stl, m) {
         result_type operator()(const std::string &) { return "std::string"; }
         result_type operator()(double) { return "double"; }
         result_type operator()(std::nullptr_t) { return "std::nullptr_t"; }
+#    if defined(PYBIND11_HAS_VARIANT)
+        result_type operator()(std::monostate) { return "std::monostate"; }
+#    endif
     };
 
     // test_variant
@@ -450,6 +459,18 @@ TEST_SUBMODULE(stl, m) {
         using V = variant<int, std::string>;
         return py::make_tuple(V(5), V("Hello"));
     });
+
+#    if defined(PYBIND11_HAS_VARIANT)
+    // std::monostate tests.
+    m.def("load_monostate_variant",
+          [](const variant<std::monostate, int, std::string> &v) -> const char * {
+              return py::detail::visit_helper<variant>::call(visitor(), v);
+          });
+    m.def("cast_monostate_variant", []() {
+        using V = variant<std::monostate, int, std::string>;
+        return py::make_tuple(V{}, V(5), V("Hello"));
+    });
+#    endif
 #endif
 
     // #528: templated constructor
@@ -471,13 +492,13 @@ TEST_SUBMODULE(stl, m) {
     // #171: Can't return STL structures containing reference wrapper
     m.def("return_vec_of_reference_wrapper", [](std::reference_wrapper<UserType> p4) {
         static UserType p1{1}, p2{2}, p3{3};
-        return std::vector<std::reference_wrapper<UserType>> {
-            std::ref(p1), std::ref(p2), std::ref(p3), p4
-        };
+        return std::vector<std::reference_wrapper<UserType>>{
+            std::ref(p1), std::ref(p2), std::ref(p3), p4};
     });
 
     // test_stl_pass_by_pointer
-    m.def("stl_pass_by_pointer", [](std::vector<int>* v) { return *v; }, "v"_a=nullptr);
+    m.def(
+        "stl_pass_by_pointer", [](std::vector<int> *v) { return *v; }, "v"_a = nullptr);
 
     // #1258: pybind11/stl.h converts string to vector<string>
     m.def("func_with_string_or_vector_string_arg_overload",
@@ -495,19 +516,24 @@ TEST_SUBMODULE(stl, m) {
     py::class_<Placeholder>(m, "Placeholder");
 
     /// test_stl_vector_ownership
-    m.def("test_stl_ownership",
-          []() {
-              std::vector<Placeholder *> result;
-              result.push_back(new Placeholder());
-              return result;
-          },
-          py::return_value_policy::take_ownership);
+    m.def(
+        "test_stl_ownership",
+        []() {
+            std::vector<Placeholder *> result;
+            result.push_back(new Placeholder());
+            return result;
+        },
+        py::return_value_policy::take_ownership);
 
     m.def("array_cast_sequence", [](std::array<int, 3> x) { return x; });
 
     /// test_issue_1561
-    struct Issue1561Inner { std::string data; };
-    struct Issue1561Outer { std::vector<Issue1561Inner> list; };
+    struct Issue1561Inner {
+        std::string data;
+    };
+    struct Issue1561Outer {
+        std::vector<Issue1561Inner> list;
+    };
 
     py::class_<Issue1561Inner>(m, "Issue1561Inner")
         .def(py::init<std::string>())
diff --git a/ext/pybind11/tests/test_stl.py b/ext/pybind11/tests/test_stl.py
index e217975944..d30c382113 100644
--- a/ext/pybind11/tests/test_stl.py
+++ b/ext/pybind11/tests/test_stl.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 import pytest
 
 from pybind11_tests import ConstructorStats, UserType
@@ -15,6 +14,7 @@ def test_vector(doc):
 
     assert m.cast_bool_vector() == [True, False]
     assert m.load_bool_vector([True, False])
+    assert m.load_bool_vector(tuple([True, False]))
 
     assert doc(m.cast_vector) == "cast_vector() -> List[int]"
     assert doc(m.load_vector) == "load_vector(arg0: List[int]) -> bool"
@@ -37,6 +37,7 @@ def test_array(doc):
     lst = m.cast_array()
     assert lst == [1, 2]
     assert m.load_array(lst)
+    assert m.load_array(tuple(lst))
 
     assert doc(m.cast_array) == "cast_array() -> List[int[2]]"
     assert doc(m.load_array) == "load_array(arg0: List[int[2]]) -> bool"
@@ -47,6 +48,7 @@ def test_valarray(doc):
     lst = m.cast_valarray()
     assert lst == [1, 4, 9]
     assert m.load_valarray(lst)
+    assert m.load_valarray(tuple(lst))
 
     assert doc(m.cast_valarray) == "cast_valarray() -> List[int]"
     assert doc(m.load_valarray) == "load_valarray(arg0: List[int]) -> bool"
@@ -71,6 +73,7 @@ def test_set(doc):
     assert s == {"key1", "key2"}
     s.add("key3")
     assert m.load_set(s)
+    assert m.load_set(frozenset(s))
 
     assert doc(m.cast_set) == "cast_set() -> Set[str]"
     assert doc(m.load_set) == "load_set(arg0: Set[str]) -> bool"
@@ -264,6 +267,22 @@ def test_variant(doc):
     )
 
 
+@pytest.mark.skipif(
+    not hasattr(m, "load_monostate_variant"), reason="no std::monostate"
+)
+def test_variant_monostate(doc):
+    assert m.load_monostate_variant(None) == "std::monostate"
+    assert m.load_monostate_variant(1) == "int"
+    assert m.load_monostate_variant("1") == "std::string"
+
+    assert m.cast_monostate_variant() == (None, 5, "Hello")
+
+    assert (
+        doc(m.load_monostate_variant)
+        == "load_monostate_variant(arg0: Union[None, int, str]) -> str"
+    )
+
+
 def test_vec_of_reference_wrapper():
     """#171: Can't return reference wrappers (or STL structures containing them)"""
     assert (
@@ -283,7 +302,7 @@ def test_stl_pass_by_pointer(msg):
             1. (v: List[int] = None) -> List[int]
 
         Invoked with:
-    """  # noqa: E501 line too long
+    """
     )
 
     with pytest.raises(TypeError) as excinfo:
@@ -295,7 +314,7 @@ def test_stl_pass_by_pointer(msg):
             1. (v: List[int] = None) -> List[int]
 
         Invoked with: None
-    """  # noqa: E501 line too long
+    """
     )
 
     assert m.stl_pass_by_pointer([1, 2, 3]) == [1, 2, 3]
diff --git a/ext/pybind11/tests/test_stl_binders.cpp b/ext/pybind11/tests/test_stl_binders.cpp
index 6b23e3529f..ca9630bd19 100644
--- a/ext/pybind11/tests/test_stl_binders.cpp
+++ b/ext/pybind11/tests/test_stl_binders.cpp
@@ -7,12 +7,13 @@
     BSD-style license that can be found in the LICENSE file.
 */
 
+#include <pybind11/numpy.h>
+#include <pybind11/stl_bind.h>
+
 #include "pybind11_tests.h"
 
-#include <pybind11/stl_bind.h>
-#include <pybind11/numpy.h>
-#include <map>
 #include <deque>
+#include <map>
 #include <unordered_map>
 
 class El {
@@ -23,7 +24,7 @@ public:
     int a;
 };
 
-std::ostream & operator<<(std::ostream &s, El const&v) {
+std::ostream &operator<<(std::ostream &s, El const &v) {
     s << "El{" << v.a << '}';
     return s;
 }
@@ -40,25 +41,32 @@ public:
     int value;
 };
 
-template <class Container> Container *one_to_n(int n) {
-    auto v = new Container();
-    for (int i = 1; i <= n; i++)
+template <class Container>
+Container *one_to_n(int n) {
+    auto *v = new Container();
+    for (int i = 1; i <= n; i++) {
         v->emplace_back(i);
+    }
     return v;
 }
 
-template <class Map> Map *times_ten(int n) {
-    auto m = new Map();
-    for (int i = 1; i <= n; i++)
-        m->emplace(int(i), E_nc(10*i));
+template <class Map>
+Map *times_ten(int n) {
+    auto *m = new Map();
+    for (int i = 1; i <= n; i++) {
+        m->emplace(int(i), E_nc(10 * i));
+    }
     return m;
 }
 
-template <class NestMap> NestMap *times_hundred(int n) {
-    auto m = new NestMap();
-    for (int i = 1; i <= n; i++)
-        for (int j = 1; j <= n; j++)
-            (*m)[i].emplace(int(j*10), E_nc(100*j));
+template <class NestMap>
+NestMap *times_hundred(int n) {
+    auto *m = new NestMap();
+    for (int i = 1; i <= n; i++) {
+        for (int j = 1; j <= n; j++) {
+            (*m)[i].emplace(int(j * 10), E_nc(100 * j));
+        }
+    }
     return m;
 }
 
@@ -67,8 +75,7 @@ TEST_SUBMODULE(stl_binders, m) {
     py::bind_vector<std::vector<unsigned int>>(m, "VectorInt", py::buffer_protocol());
 
     // test_vector_custom
-    py::class_<El>(m, "El")
-        .def(py::init<int>());
+    py::class_<El>(m, "El").def(py::init<int>());
     py::bind_vector<std::vector<El>>(m, "VectorEl");
     py::bind_vector<std::vector<std::vector<El>>>(m, "VectorVectorEl");
 
@@ -78,11 +85,10 @@ TEST_SUBMODULE(stl_binders, m) {
 
     // test_map_string_double_const
     py::bind_map<std::map<std::string, double const>>(m, "MapStringDoubleConst");
-    py::bind_map<std::unordered_map<std::string, double const>>(m, "UnorderedMapStringDoubleConst");
+    py::bind_map<std::unordered_map<std::string, double const>>(m,
+                                                                "UnorderedMapStringDoubleConst");
 
-    py::class_<E_nc>(m, "ENC")
-        .def(py::init<int>())
-        .def_readwrite("value", &E_nc::value);
+    py::class_<E_nc>(m, "ENC").def(py::init<int>()).def_readwrite("value", &E_nc::value);
 
     // test_noncopyable_containers
     py::bind_vector<std::vector<E_nc>>(m, "VectorENC");
@@ -95,14 +101,15 @@ TEST_SUBMODULE(stl_binders, m) {
     m.def("get_umnc", &times_ten<std::unordered_map<int, E_nc>>);
     // Issue #1885: binding nested std::map<X, Container<E>> with E non-copyable
     py::bind_map<std::map<int, std::vector<E_nc>>>(m, "MapVecENC");
-    m.def("get_nvnc", [](int n)
-        {
-            auto m = new std::map<int, std::vector<E_nc>>();
-            for (int i = 1; i <= n; i++)
-                for (int j = 1; j <= n; j++)
-                    (*m)[i].emplace_back(j);
-            return m;
-        });
+    m.def("get_nvnc", [](int n) {
+        auto *m = new std::map<int, std::vector<E_nc>>();
+        for (int i = 1; i <= n; i++) {
+            for (int j = 1; j <= n; j++) {
+                (*m)[i].emplace_back(j);
+            }
+        }
+        return m;
+    });
     py::bind_map<std::map<int, std::map<int, E_nc>>>(m, "MapMapENC");
     m.def("get_nmnc", &times_hundred<std::map<int, std::map<int, E_nc>>>);
     py::bind_map<std::unordered_map<int, std::unordered_map<int, E_nc>>>(m, "UmapUmapENC");
@@ -111,17 +118,31 @@ TEST_SUBMODULE(stl_binders, m) {
     // test_vector_buffer
     py::bind_vector<std::vector<unsigned char>>(m, "VectorUChar", py::buffer_protocol());
     // no dtype declared for this version:
-    struct VUndeclStruct { bool w; uint32_t x; double y; bool z; };
-    m.def("create_undeclstruct", [m] () mutable {
-        py::bind_vector<std::vector<VUndeclStruct>>(m, "VectorUndeclStruct", py::buffer_protocol());
+    struct VUndeclStruct {
+        bool w;
+        uint32_t x;
+        double y;
+        bool z;
+    };
+    m.def("create_undeclstruct", [m]() mutable {
+        py::bind_vector<std::vector<VUndeclStruct>>(
+            m, "VectorUndeclStruct", py::buffer_protocol());
     });
 
     // The rest depends on numpy:
-    try { py::module_::import("numpy"); }
-    catch (...) { return; }
+    try {
+        py::module_::import("numpy");
+    } catch (...) {
+        return;
+    }
 
     // test_vector_buffer_numpy
-    struct VStruct { bool w; uint32_t x; double y; bool z; };
+    struct VStruct {
+        bool w;
+        uint32_t x;
+        double y;
+        bool z;
+    };
     PYBIND11_NUMPY_DTYPE(VStruct, w, x, y, z);
     py::class_<VStruct>(m, "VStruct").def_readwrite("x", &VStruct::x);
     py::bind_vector<std::vector<VStruct>>(m, "VectorStruct", py::buffer_protocol());
diff --git a/ext/pybind11/tests/test_stl_binders.py b/ext/pybind11/tests/test_stl_binders.py
index 59c5ab6b5d..9eb906f065 100644
--- a/ext/pybind11/tests/test_stl_binders.py
+++ b/ext/pybind11/tests/test_stl_binders.py
@@ -1,7 +1,5 @@
-# -*- coding: utf-8 -*-
 import pytest
 
-import env
 from pybind11_tests import stl_binders as m
 
 
@@ -74,18 +72,13 @@ def test_vector_buffer():
     assert v[1] == 2
     v[2] = 5
     mv = memoryview(v)  # We expose the buffer interface
-    if not env.PY2:
-        assert mv[2] == 5
-        mv[2] = 6
-    else:
-        assert mv[2] == "\x05"
-        mv[2] = "\x06"
+    assert mv[2] == 5
+    mv[2] = 6
     assert v[2] == 6
 
-    if not env.PY2:
-        mv = memoryview(b)
-        v = m.VectorUChar(mv[::2])
-        assert v[1] == 3
+    mv = memoryview(b)
+    v = m.VectorUChar(mv[::2])
+    assert v[1] == 3
 
     with pytest.raises(RuntimeError) as excinfo:
         m.create_undeclstruct()  # Undeclared struct contents, no buffer interface
@@ -316,3 +309,29 @@ def test_map_delitem():
     del um["ua"]
     assert sorted(list(um)) == ["ub"]
     assert sorted(list(um.items())) == [("ub", 2.6)]
+
+
+def test_map_view_types():
+    map_string_double = m.MapStringDouble()
+    unordered_map_string_double = m.UnorderedMapStringDouble()
+    map_string_double_const = m.MapStringDoubleConst()
+    unordered_map_string_double_const = m.UnorderedMapStringDoubleConst()
+
+    assert map_string_double.keys().__class__.__name__ == "KeysView[str]"
+    assert map_string_double.values().__class__.__name__ == "ValuesView[float]"
+    assert map_string_double.items().__class__.__name__ == "ItemsView[str, float]"
+
+    keys_type = type(map_string_double.keys())
+    assert type(unordered_map_string_double.keys()) is keys_type
+    assert type(map_string_double_const.keys()) is keys_type
+    assert type(unordered_map_string_double_const.keys()) is keys_type
+
+    values_type = type(map_string_double.values())
+    assert type(unordered_map_string_double.values()) is values_type
+    assert type(map_string_double_const.values()) is values_type
+    assert type(unordered_map_string_double_const.values()) is values_type
+
+    items_type = type(map_string_double.items())
+    assert type(unordered_map_string_double.items()) is items_type
+    assert type(map_string_double_const.items()) is items_type
+    assert type(unordered_map_string_double_const.items()) is items_type
diff --git a/ext/pybind11/tests/test_tagbased_polymorphic.cpp b/ext/pybind11/tests/test_tagbased_polymorphic.cpp
index 2c7bad8bbc..12ba6532f3 100644
--- a/ext/pybind11/tests/test_tagbased_polymorphic.cpp
+++ b/ext/pybind11/tests/test_tagbased_polymorphic.cpp
@@ -7,11 +7,11 @@
     BSD-style license that can be found in the LICENSE file.
 */
 
-#include "pybind11_tests.h"
 #include <pybind11/stl.h>
 
-struct Animal
-{
+#include "pybind11_tests.h"
+
+struct Animal {
     // Make this type also a "standard" polymorphic type, to confirm that
     // specializing polymorphic_type_hook using enable_if_t still works
     // (https://github.com/pybind/pybind11/pull/2016/).
@@ -20,57 +20,54 @@ struct Animal
     // Enum for tag-based polymorphism.
     enum class Kind {
         Unknown = 0,
-        Dog = 100, Labrador, Chihuahua, LastDog = 199,
-        Cat = 200, Panther, LastCat = 299
+        Dog = 100,
+        Labrador,
+        Chihuahua,
+        LastDog = 199,
+        Cat = 200,
+        Panther,
+        LastCat = 299
     };
-    static const std::type_info* type_of_kind(Kind kind);
+    static const std::type_info *type_of_kind(Kind kind);
     static std::string name_of_kind(Kind kind);
 
     const Kind kind;
     const std::string name;
 
-  protected:
-    Animal(const std::string& _name, Kind _kind)
-        : kind(_kind), name(_name)
-    {}
+protected:
+    Animal(const std::string &_name, Kind _kind) : kind(_kind), name(_name) {}
 };
 
-struct Dog : Animal
-{
+struct Dog : Animal {
     explicit Dog(const std::string &_name, Kind _kind = Kind::Dog) : Animal(_name, _kind) {}
     std::string bark() const { return name_of_kind(kind) + " " + name + " goes " + sound; }
     std::string sound = "WOOF!";
 };
 
-struct Labrador : Dog
-{
+struct Labrador : Dog {
     explicit Labrador(const std::string &_name, int _excitement = 9001)
         : Dog(_name, Kind::Labrador), excitement(_excitement) {}
     int excitement;
 };
 
-struct Chihuahua : Dog
-{
+struct Chihuahua : Dog {
     explicit Chihuahua(const std::string &_name) : Dog(_name, Kind::Chihuahua) {
         sound = "iyiyiyiyiyi";
     }
     std::string bark() const { return Dog::bark() + " and runs in circles"; }
 };
 
-struct Cat : Animal
-{
+struct Cat : Animal {
     explicit Cat(const std::string &_name, Kind _kind = Kind::Cat) : Animal(_name, _kind) {}
     std::string purr() const { return "mrowr"; }
 };
 
-struct Panther : Cat
-{
+struct Panther : Cat {
     explicit Panther(const std::string &_name) : Cat(_name, Kind::Panther) {}
     std::string purr() const { return "mrrrRRRRRR"; }
 };
 
-std::vector<std::unique_ptr<Animal>> create_zoo()
-{
+std::vector<std::unique_ptr<Animal>> create_zoo() {
     std::vector<std::unique_ptr<Animal>> ret;
     ret.emplace_back(new Labrador("Fido", 15000));
 
@@ -85,45 +82,53 @@ std::vector<std::unique_ptr<Animal>> create_zoo()
     return ret;
 }
 
-const std::type_info* Animal::type_of_kind(Kind kind)
-{
+const std::type_info *Animal::type_of_kind(Kind kind) {
     switch (kind) {
         case Kind::Unknown:
-        case Kind::Dog: break;
+        case Kind::Dog:
+            break;
 
-        case Kind::Labrador: return &typeid(Labrador);
-        case Kind::Chihuahua: return &typeid(Chihuahua);
+        case Kind::Labrador:
+            return &typeid(Labrador);
+        case Kind::Chihuahua:
+            return &typeid(Chihuahua);
 
         case Kind::LastDog:
-        case Kind::Cat: break;
-        case Kind::Panther: return &typeid(Panther);
-        case Kind::LastCat: break;
+        case Kind::Cat:
+            break;
+        case Kind::Panther:
+            return &typeid(Panther);
+        case Kind::LastCat:
+            break;
     }
 
-    if (kind >= Kind::Dog && kind <= Kind::LastDog) return &typeid(Dog);
-    if (kind >= Kind::Cat && kind <= Kind::LastCat) return &typeid(Cat);
+    if (kind >= Kind::Dog && kind <= Kind::LastDog) {
+        return &typeid(Dog);
+    }
+    if (kind >= Kind::Cat && kind <= Kind::LastCat) {
+        return &typeid(Cat);
+    }
     return nullptr;
 }
 
-std::string Animal::name_of_kind(Kind kind)
-{
+std::string Animal::name_of_kind(Kind kind) {
     std::string raw_name = type_of_kind(kind)->name();
     py::detail::clean_type_id(raw_name);
     return raw_name;
 }
 
-namespace pybind11 {
-    template <typename itype>
-    struct polymorphic_type_hook<itype, detail::enable_if_t<std::is_base_of<Animal, itype>::value>>
-    {
-        static const void *get(const itype *src, const std::type_info*& type)
-        { type = src ? Animal::type_of_kind(src->kind) : nullptr; return src; }
-    };
-} // namespace pybind11
+namespace PYBIND11_NAMESPACE {
+template <typename itype>
+struct polymorphic_type_hook<itype, detail::enable_if_t<std::is_base_of<Animal, itype>::value>> {
+    static const void *get(const itype *src, const std::type_info *&type) {
+        type = src ? Animal::type_of_kind(src->kind) : nullptr;
+        return src;
+    }
+};
+} // namespace PYBIND11_NAMESPACE
 
 TEST_SUBMODULE(tagbased_polymorphic, m) {
-    py::class_<Animal>(m, "Animal")
-        .def_readonly("name", &Animal::name);
+    py::class_<Animal>(m, "Animal").def_readonly("name", &Animal::name);
     py::class_<Dog, Animal>(m, "Dog")
         .def(py::init<std::string>())
         .def_readwrite("sound", &Dog::sound)
@@ -134,9 +139,7 @@ TEST_SUBMODULE(tagbased_polymorphic, m) {
     py::class_<Chihuahua, Dog>(m, "Chihuahua")
         .def(py::init<std::string>())
         .def("bark", &Chihuahua::bark);
-    py::class_<Cat, Animal>(m, "Cat")
-        .def(py::init<std::string>())
-        .def("purr", &Cat::purr);
+    py::class_<Cat, Animal>(m, "Cat").def(py::init<std::string>()).def("purr", &Cat::purr);
     py::class_<Panther, Cat>(m, "Panther")
         .def(py::init<std::string>())
         .def("purr", &Panther::purr);
diff --git a/ext/pybind11/tests/test_tagbased_polymorphic.py b/ext/pybind11/tests/test_tagbased_polymorphic.py
index 64eb8a3c1b..84f0ea7178 100644
--- a/ext/pybind11/tests/test_tagbased_polymorphic.py
+++ b/ext/pybind11/tests/test_tagbased_polymorphic.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 from pybind11_tests import tagbased_polymorphic as m
 
 
diff --git a/ext/pybind11/tests/test_thread.cpp b/ext/pybind11/tests/test_thread.cpp
index 19d91768b3..1536503b6b 100644
--- a/ext/pybind11/tests/test_thread.cpp
+++ b/ext/pybind11/tests/test_thread.cpp
@@ -10,20 +10,20 @@
 #include <pybind11/cast.h>
 #include <pybind11/pybind11.h>
 
+#include "pybind11_tests.h"
+
 #include <chrono>
 #include <thread>
 
-#include "pybind11_tests.h"
-
 namespace py = pybind11;
 
 namespace {
 
 struct IntStruct {
-    explicit IntStruct(int v) : value(v) {};
+    explicit IntStruct(int v) : value(v){};
     ~IntStruct() { value = -value; }
-    IntStruct(const IntStruct&) = default;
-    IntStruct& operator=(const IntStruct&) = default;
+    IntStruct(const IntStruct &) = default;
+    IntStruct &operator=(const IntStruct &) = default;
 
     int value;
 };
diff --git a/ext/pybind11/tests/test_thread.py b/ext/pybind11/tests/test_thread.py
index f9db1babaf..e89991f9d6 100644
--- a/ext/pybind11/tests/test_thread.py
+++ b/ext/pybind11/tests/test_thread.py
@@ -1,5 +1,3 @@
-# -*- coding: utf-8 -*-
-
 import threading
 
 from pybind11_tests import thread as m
@@ -7,7 +5,7 @@ from pybind11_tests import thread as m
 
 class Thread(threading.Thread):
     def __init__(self, fn):
-        super(Thread, self).__init__()
+        super().__init__()
         self.fn = fn
         self.e = None
 
@@ -19,7 +17,7 @@ class Thread(threading.Thread):
             self.e = e
 
     def join(self):
-        super(Thread, self).join()
+        super().join()
         if self.e:
             raise self.e
 
diff --git a/ext/pybind11/tests/test_union.py b/ext/pybind11/tests/test_union.py
index 2a2c12fb48..e1866e701d 100644
--- a/ext/pybind11/tests/test_union.py
+++ b/ext/pybind11/tests/test_union.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 from pybind11_tests import union_ as m
 
 
diff --git a/ext/pybind11/tests/test_virtual_functions.cpp b/ext/pybind11/tests/test_virtual_functions.cpp
index 6e06db9fc1..93b136ad3c 100644
--- a/ext/pybind11/tests/test_virtual_functions.cpp
+++ b/ext/pybind11/tests/test_virtual_functions.cpp
@@ -7,13 +7,15 @@
     BSD-style license that can be found in the LICENSE file.
 */
 
-#include "pybind11_tests.h"
-#include "constructor_stats.h"
 #include <pybind11/functional.h>
+
+#include "constructor_stats.h"
+#include "pybind11_tests.h"
+
 #include <thread>
 
 /* This is an example class that we'll want to be able to extend from Python */
-class ExampleVirt  {
+class ExampleVirt {
 public:
     explicit ExampleVirt(int state) : state(state) { print_created(this, state); }
     ExampleVirt(const ExampleVirt &e) : state(e.state) { print_copy_created(this); }
@@ -25,7 +27,8 @@ public:
 
     virtual int run(int value) {
         py::print("Original implementation of "
-                  "ExampleVirt::run(state={}, value={}, str1={}, str2={})"_s.format(state, value, get_string1(), *get_string2()));
+                  "ExampleVirt::run(state={}, value={}, str1={}, str2={})"_s.format(
+                      state, value, get_string1(), *get_string2()));
         return state + value;
     }
 
@@ -33,8 +36,8 @@ public:
     virtual void pure_virtual() = 0;
 
     // Returning a reference/pointer to a type converted from python (numbers, strings, etc.) is a
-    // bit trickier, because the actual int& or std::string& or whatever only exists temporarily, so
-    // we have to handle it specially in the trampoline class (see below).
+    // bit trickier, because the actual int& or std::string& or whatever only exists temporarily,
+    // so we have to handle it specially in the trampoline class (see below).
     virtual const std::string &get_string1() { return str1; }
     virtual const std::string *get_string2() { return &str2; }
 
@@ -50,70 +53,62 @@ public:
 
     int run(int value) override {
         /* Generate wrapping code that enables native function overloading */
-        PYBIND11_OVERRIDE(
-            int,         /* Return type */
-            ExampleVirt, /* Parent class */
-            run,         /* Name of function */
-            value        /* Argument(s) */
+        PYBIND11_OVERRIDE(int,         /* Return type */
+                          ExampleVirt, /* Parent class */
+                          run,         /* Name of function */
+                          value        /* Argument(s) */
         );
     }
 
     bool run_bool() override {
-        PYBIND11_OVERRIDE_PURE(
-            bool,         /* Return type */
-            ExampleVirt,  /* Parent class */
-            run_bool,     /* Name of function */
-                          /* This function has no arguments. The trailing comma
-                             in the previous line is needed for some compilers */
+        PYBIND11_OVERRIDE_PURE(bool,        /* Return type */
+                               ExampleVirt, /* Parent class */
+                               run_bool,    /* Name of function */
+                                            /* This function has no arguments. The trailing comma
+                                               in the previous line is needed for some compilers */
         );
     }
 
     void pure_virtual() override {
-        PYBIND11_OVERRIDE_PURE(
-            void,         /* Return type */
-            ExampleVirt,  /* Parent class */
-            pure_virtual, /* Name of function */
-                          /* This function has no arguments. The trailing comma
-                             in the previous line is needed for some compilers */
+        PYBIND11_OVERRIDE_PURE(void,         /* Return type */
+                               ExampleVirt,  /* Parent class */
+                               pure_virtual, /* Name of function */
+                                             /* This function has no arguments. The trailing comma
+                                                in the previous line is needed for some compilers */
         );
     }
 
     // We can return reference types for compatibility with C++ virtual interfaces that do so, but
     // note they have some significant limitations (see the documentation).
     const std::string &get_string1() override {
-        PYBIND11_OVERRIDE(
-            const std::string &, /* Return type */
-            ExampleVirt,         /* Parent class */
-            get_string1,         /* Name of function */
-                                 /* (no arguments) */
+        PYBIND11_OVERRIDE(const std::string &, /* Return type */
+                          ExampleVirt,         /* Parent class */
+                          get_string1,         /* Name of function */
+                                               /* (no arguments) */
         );
     }
 
     const std::string *get_string2() override {
-        PYBIND11_OVERRIDE(
-            const std::string *, /* Return type */
-            ExampleVirt,         /* Parent class */
-            get_string2,         /* Name of function */
-                                 /* (no arguments) */
+        PYBIND11_OVERRIDE(const std::string *, /* Return type */
+                          ExampleVirt,         /* Parent class */
+                          get_string2,         /* Name of function */
+                                               /* (no arguments) */
         );
     }
-
 };
 
 class NonCopyable {
 public:
-    NonCopyable(int a, int b) : value{new int(a*b)} { print_created(this, a, b); }
-    NonCopyable(NonCopyable &&o) noexcept {
-        value = std::move(o.value);
-        print_move_created(this);
-    }
+    NonCopyable(int a, int b) : value{new int(a * b)} { print_created(this, a, b); }
+    NonCopyable(NonCopyable &&o) noexcept : value{std::move(o.value)} { print_move_created(this); }
     NonCopyable(const NonCopyable &) = delete;
     NonCopyable() = delete;
     void operator=(const NonCopyable &) = delete;
     void operator=(NonCopyable &&) = delete;
     std::string get_value() const {
-        if (value)
+        if (value) {
             return std::to_string(*value);
+        }
         return "(null)";
     }
     ~NonCopyable() { print_destroyed(this); }
@@ -126,14 +121,12 @@ private:
 // when it is not referenced elsewhere, but copied if it is still referenced.
 class Movable {
 public:
-    Movable(int a, int b) : value{a+b} { print_created(this, a, b); }
-    Movable(const Movable &m) { value = m.value; print_copy_created(this); }
-    Movable(Movable &&m) noexcept {
-        value = m.value;
-        print_move_created(this);
-    }
+    Movable(int a, int b) : value{a + b} { print_created(this, a, b); }
+    Movable(const Movable &m) : value{m.value} { print_copy_created(this); }
+    Movable(Movable &&m) noexcept : value{m.value} { print_move_created(this); }
     std::string get_value() const { return std::to_string(value); }
     ~Movable() { print_destroyed(this); }
+
 private:
     int value;
 };
@@ -142,7 +135,7 @@ class NCVirt {
 public:
     virtual ~NCVirt() = default;
     NCVirt() = default;
-    NCVirt(const NCVirt&) = delete;
+    NCVirt(const NCVirt &) = delete;
     virtual NonCopyable get_noncopyable(int a, int b) { return NonCopyable(a, b); }
     virtual Movable get_movable(int a, int b) = 0;
 
@@ -161,11 +154,10 @@ class NCVirtTrampoline : public NCVirt {
 };
 
 struct Base {
-    /* for some reason MSVC2015 can't compile this if the function is pure virtual */
-    virtual std::string dispatch() const { return {}; };
+    virtual std::string dispatch() const = 0;
     virtual ~Base() = default;
     Base() = default;
-    Base(const Base&) = delete;
+    Base(const Base &) = delete;
 };
 
 struct DispatchIssue : Base {
@@ -178,33 +170,34 @@ struct DispatchIssue : Base {
 // objects and send the result to the visitor functor
 struct AdderBase {
     struct Data {};
-    using DataVisitor = std::function<void (const Data&)>;
+    using DataVisitor = std::function<void(const Data &)>;
 
-    virtual void operator()(const Data& first, const Data& second, const DataVisitor& visitor) const = 0;
+    virtual void
+    operator()(const Data &first, const Data &second, const DataVisitor &visitor) const
+        = 0;
     virtual ~AdderBase() = default;
     AdderBase() = default;
-    AdderBase(const AdderBase&) = delete;
+    AdderBase(const AdderBase &) = delete;
 };
 
 struct Adder : AdderBase {
-    void operator()(const Data& first, const Data& second, const DataVisitor& visitor) const override {
-        PYBIND11_OVERRIDE_PURE_NAME(void, AdderBase, "__call__", operator(), first, second, visitor);
+    void
+    operator()(const Data &first, const Data &second, const DataVisitor &visitor) const override {
+        PYBIND11_OVERRIDE_PURE_NAME(
+            void, AdderBase, "__call__", operator(), first, second, visitor);
     }
 };
 
-
 static void test_gil() {
     {
         py::gil_scoped_acquire lock;
         py::print("1st lock acquired");
-
     }
 
     {
         py::gil_scoped_acquire lock;
         py::print("2nd lock acquired");
     }
-
 }
 
 static void test_gil_from_thread() {
@@ -214,9 +207,28 @@ static void test_gil_from_thread() {
     t.join();
 }
 
+class test_override_cache_helper {
 
-// Forward declaration (so that we can put the main tests here; the inherited virtual approaches are
-// rather long).
+public:
+    virtual int func() { return 0; }
+
+    test_override_cache_helper() = default;
+    virtual ~test_override_cache_helper() = default;
+    // Non-copyable
+    test_override_cache_helper &operator=(test_override_cache_helper const &Right) = delete;
+    test_override_cache_helper(test_override_cache_helper const &Copy) = delete;
+};
+
+class test_override_cache_helper_trampoline : public test_override_cache_helper {
+    int func() override { PYBIND11_OVERRIDE(int, test_override_cache_helper, func); }
+};
+
+inline int test_override_cache(std::shared_ptr<test_override_cache_helper> const &instance) {
+    return instance->func();
+}
+
+// Forward declaration (so that we can put the main tests here; the inherited virtual approaches
+// are rather long).
 void initialize_inherited_virtuals(py::module_ &m);
 
 TEST_SUBMODULE(virtual_functions, m) {
@@ -228,11 +240,9 @@ TEST_SUBMODULE(virtual_functions, m) {
         .def("run_bool", &ExampleVirt::run_bool)
         .def("pure_virtual", &ExampleVirt::pure_virtual);
 
-    py::class_<NonCopyable>(m, "NonCopyable")
-        .def(py::init<int, int>());
+    py::class_<NonCopyable>(m, "NonCopyable").def(py::init<int, int>());
 
-    py::class_<Movable>(m, "Movable")
-        .def(py::init<int, int>());
+    py::class_<Movable>(m, "Movable").def(py::init<int, int>());
 
     // test_move_support
 #if !defined(__INTEL_COMPILER) && !defined(__CUDACC__) && !defined(__PGIC__)
@@ -245,7 +255,7 @@ TEST_SUBMODULE(virtual_functions, m) {
 #endif
 
     m.def("runExampleVirt", [](ExampleVirt *ex, int value) { return ex->run(value); });
-    m.def("runExampleVirtBool", [](ExampleVirt* ex) { return ex->run_bool(); });
+    m.def("runExampleVirtBool", [](ExampleVirt *ex) { return ex->run_bool(); });
     m.def("runExampleVirtVirtual", [](ExampleVirt *ex) { ex->pure_virtual(); });
 
     m.def("cstats_debug", &ConstructorStats::get<ExampleVirt>);
@@ -256,27 +266,25 @@ TEST_SUBMODULE(virtual_functions, m) {
     // that were not extended on the Python side
     struct A {
         A() = default;
-        A(const A&) = delete;
+        A(const A &) = delete;
         virtual ~A() = default;
         virtual void f() { py::print("A.f()"); }
     };
 
     struct PyA : A {
         PyA() { py::print("PyA.PyA()"); }
-        PyA(const PyA&) = delete;
+        PyA(const PyA &) = delete;
         ~PyA() override { py::print("PyA.~PyA()"); }
 
         void f() override {
             py::print("PyA.f()");
-            // This convolution just gives a `void`, but tests that PYBIND11_TYPE() works to protect
-            // a type containing a ,
+            // This convolution just gives a `void`, but tests that PYBIND11_TYPE() works to
+            // protect a type containing a ,
             PYBIND11_OVERRIDE(PYBIND11_TYPE(typename std::enable_if<true, void>::type), A, f);
         }
     };
 
-    py::class_<A, PyA>(m, "A")
-        .def(py::init<>())
-        .def("f", &A::f);
+    py::class_<A, PyA>(m, "A").def(py::init<>()).def("f", &A::f);
 
     m.def("call_f", [](A *a) { a->f(); });
 
@@ -284,14 +292,14 @@ TEST_SUBMODULE(virtual_functions, m) {
     // ... unless we explicitly request it, as in this example:
     struct A2 {
         A2() = default;
-        A2(const A2&) = delete;
+        A2(const A2 &) = delete;
         virtual ~A2() = default;
         virtual void f() { py::print("A2.f()"); }
     };
 
     struct PyA2 : A2 {
         PyA2() { py::print("PyA2.PyA2()"); }
-        PyA2(const PyA2&) = delete;
+        PyA2(const PyA2 &) = delete;
         ~PyA2() override { py::print("PyA2.~PyA2()"); }
         void f() override {
             py::print("PyA2.f()");
@@ -312,7 +320,7 @@ TEST_SUBMODULE(virtual_functions, m) {
         .def(py::init<>())
         .def("dispatch", &Base::dispatch);
 
-    m.def("dispatch_issue_go", [](const Base * b) { return b->dispatch(); });
+    m.def("dispatch_issue_go", [](const Base *b) { return b->dispatch(); });
 
     // test_recursive_dispatch_issue
     // #3357: Recursive dispatch fails to find python function override
@@ -320,31 +328,38 @@ TEST_SUBMODULE(virtual_functions, m) {
         .def(pybind11::init<>())
         .def("__call__", &AdderBase::operator());
 
-    pybind11::class_<AdderBase::Data>(m, "Data")
-        .def(pybind11::init<>());
+    pybind11::class_<AdderBase::Data>(m, "Data").def(pybind11::init<>());
 
-    m.def("add2", [](const AdderBase::Data& first, const AdderBase::Data& second,
-                     const AdderBase& adder, const AdderBase::DataVisitor& visitor) {
-        adder(first, second, visitor);
-    });
+    m.def("add2",
+          [](const AdderBase::Data &first,
+             const AdderBase::Data &second,
+             const AdderBase &adder,
+             const AdderBase::DataVisitor &visitor) { adder(first, second, visitor); });
 
-    m.def("add3", [](const AdderBase::Data& first, const AdderBase::Data& second, const AdderBase::Data& third,
-                     const AdderBase& adder, const AdderBase::DataVisitor& visitor) {
-        adder(first, second, [&] (const AdderBase::Data& first_plus_second) {
-            adder(first_plus_second, third, visitor);
-        });
-    });
+    m.def("add3",
+          [](const AdderBase::Data &first,
+             const AdderBase::Data &second,
+             const AdderBase::Data &third,
+             const AdderBase &adder,
+             const AdderBase::DataVisitor &visitor) {
+              adder(first, second, [&](const AdderBase::Data &first_plus_second) {
+                  // NOLINTNEXTLINE(readability-suspicious-call-argument)
+                  adder(first_plus_second, third, visitor);
+              });
+          });
 
     // test_override_ref
     // #392/397: overriding reference-returning functions
     class OverrideTest {
     public:
-        struct A { std::string value = "hi"; };
+        struct A {
+            std::string value = "hi";
+        };
         std::string v;
         A a;
         explicit OverrideTest(const std::string &v) : v{v} {}
         OverrideTest() = default;
-        OverrideTest(const OverrideTest&) = delete;
+        OverrideTest(const OverrideTest &) = delete;
         virtual std::string str_value() { return v; }
         virtual std::string &str_ref() { return v; }
         virtual A A_value() { return a; }
@@ -355,14 +370,22 @@ TEST_SUBMODULE(virtual_functions, m) {
     class PyOverrideTest : public OverrideTest {
     public:
         using OverrideTest::OverrideTest;
-        std::string str_value() override { PYBIND11_OVERRIDE(std::string, OverrideTest, str_value); }
-        // Not allowed (uncommenting should hit a static_assert failure): we can't get a reference
-        // to a python numeric value, since we only copy values in the numeric type caster:
-//      std::string &str_ref() override { PYBIND11_OVERRIDE(std::string &, OverrideTest, str_ref); }
+        std::string str_value() override {
+            PYBIND11_OVERRIDE(std::string, OverrideTest, str_value);
+        }
+        // Not allowed (enabling the below should hit a static_assert failure): we can't get a
+        // reference to a python numeric value, since we only copy values in the numeric type
+        // caster:
+#ifdef PYBIND11_NEVER_DEFINED_EVER
+        std::string &str_ref() override {
+            PYBIND11_OVERRIDE(std::string &, OverrideTest, str_ref);
+        }
+#endif
         // But we can work around it like this:
     private:
         std::string _tmp;
         std::string str_ref_helper() { PYBIND11_OVERRIDE(std::string, OverrideTest, str_ref); }
+
     public:
         std::string &str_ref() override { return _tmp = str_ref_helper(); }
 
@@ -375,11 +398,20 @@ TEST_SUBMODULE(virtual_functions, m) {
     py::class_<OverrideTest, PyOverrideTest>(m, "OverrideTest")
         .def(py::init<const std::string &>())
         .def("str_value", &OverrideTest::str_value)
-//      .def("str_ref", &OverrideTest::str_ref)
+#ifdef PYBIND11_NEVER_DEFINED_EVER
+        .def("str_ref", &OverrideTest::str_ref)
+#endif
         .def("A_value", &OverrideTest::A_value)
         .def("A_ref", &OverrideTest::A_ref);
-}
 
+    py::class_<test_override_cache_helper,
+               test_override_cache_helper_trampoline,
+               std::shared_ptr<test_override_cache_helper>>(m, "test_override_cache_helper")
+        .def(py::init_alias<>())
+        .def("func", &test_override_cache_helper::func);
+
+    m.def("test_override_cache", test_override_cache);
+}
 
 // Inheriting virtual methods.  We do two versions here: the repeat-everything version and the
 // templated trampoline versions mentioned in docs/advanced.rst.
@@ -389,94 +421,107 @@ TEST_SUBMODULE(virtual_functions, m) {
 // properly (pybind11, sensibly, doesn't allow us to bind the same C++ class to
 // multiple python classes).
 class A_Repeat {
-#define A_METHODS \
-public: \
-    virtual int unlucky_number() = 0; \
-    virtual std::string say_something(unsigned times) { \
-        std::string s = ""; \
-        for (unsigned i = 0; i < times; ++i) \
-            s += "hi"; \
-        return s; \
-    } \
-    std::string say_everything() { \
-        return say_something(1) + " " + std::to_string(unlucky_number()); \
+#define A_METHODS                                                                                 \
+public:                                                                                           \
+    virtual int unlucky_number() = 0;                                                             \
+    virtual std::string say_something(unsigned times) {                                           \
+        std::string s = "";                                                                       \
+        for (unsigned i = 0; i < times; ++i)                                                      \
+            s += "hi";                                                                            \
+        return s;                                                                                 \
+    }                                                                                             \
+    std::string say_everything() {                                                                \
+        return say_something(1) + " " + std::to_string(unlucky_number());                         \
     }
-A_METHODS
+    A_METHODS
     A_Repeat() = default;
-    A_Repeat(const A_Repeat&) = delete;
+    A_Repeat(const A_Repeat &) = delete;
     virtual ~A_Repeat() = default;
 };
 class B_Repeat : public A_Repeat {
-#define B_METHODS \
-public: \
-    int unlucky_number() override { return 13; } \
-    std::string say_something(unsigned times) override { \
-        return "B says hi " + std::to_string(times) + " times"; \
-    } \
+#define B_METHODS                                                                                 \
+public:                                                                                           \
+    int unlucky_number() override { return 13; }                                                  \
+    std::string say_something(unsigned times) override {                                          \
+        return "B says hi " + std::to_string(times) + " times";                                   \
+    }                                                                                             \
     virtual double lucky_number() { return 7.0; }
-B_METHODS
+    B_METHODS
 };
 class C_Repeat : public B_Repeat {
-#define C_METHODS \
-public: \
-    int unlucky_number() override { return 4444; } \
+#define C_METHODS                                                                                 \
+public:                                                                                           \
+    int unlucky_number() override { return 4444; }                                                \
     double lucky_number() override { return 888; }
-C_METHODS
+    C_METHODS
 };
 class D_Repeat : public C_Repeat {
 #define D_METHODS // Nothing overridden.
-D_METHODS
+    D_METHODS
 };
 
 // Base classes for templated inheritance trampolines.  Identical to the repeat-everything version:
 class A_Tpl {
     A_METHODS;
     A_Tpl() = default;
-    A_Tpl(const A_Tpl&) = delete;
+    A_Tpl(const A_Tpl &) = delete;
     virtual ~A_Tpl() = default;
 };
-class B_Tpl : public A_Tpl { B_METHODS };
-class C_Tpl : public B_Tpl { C_METHODS };
-class D_Tpl : public C_Tpl { D_METHODS };
-
+class B_Tpl : public A_Tpl {
+    B_METHODS
+};
+class C_Tpl : public B_Tpl {
+    C_METHODS
+};
+class D_Tpl : public C_Tpl {
+    D_METHODS
+};
 
 // Inheritance approach 1: each trampoline gets every virtual method (11 in total)
 class PyA_Repeat : public A_Repeat {
 public:
     using A_Repeat::A_Repeat;
     int unlucky_number() override { PYBIND11_OVERRIDE_PURE(int, A_Repeat, unlucky_number, ); }
-    std::string say_something(unsigned times) override { PYBIND11_OVERRIDE(std::string, A_Repeat, say_something, times); }
+    std::string say_something(unsigned times) override {
+        PYBIND11_OVERRIDE(std::string, A_Repeat, say_something, times);
+    }
 };
 class PyB_Repeat : public B_Repeat {
 public:
     using B_Repeat::B_Repeat;
     int unlucky_number() override { PYBIND11_OVERRIDE(int, B_Repeat, unlucky_number, ); }
-    std::string say_something(unsigned times) override { PYBIND11_OVERRIDE(std::string, B_Repeat, say_something, times); }
+    std::string say_something(unsigned times) override {
+        PYBIND11_OVERRIDE(std::string, B_Repeat, say_something, times);
+    }
     double lucky_number() override { PYBIND11_OVERRIDE(double, B_Repeat, lucky_number, ); }
 };
 class PyC_Repeat : public C_Repeat {
 public:
     using C_Repeat::C_Repeat;
     int unlucky_number() override { PYBIND11_OVERRIDE(int, C_Repeat, unlucky_number, ); }
-    std::string say_something(unsigned times) override { PYBIND11_OVERRIDE(std::string, C_Repeat, say_something, times); }
+    std::string say_something(unsigned times) override {
+        PYBIND11_OVERRIDE(std::string, C_Repeat, say_something, times);
+    }
     double lucky_number() override { PYBIND11_OVERRIDE(double, C_Repeat, lucky_number, ); }
 };
 class PyD_Repeat : public D_Repeat {
 public:
     using D_Repeat::D_Repeat;
     int unlucky_number() override { PYBIND11_OVERRIDE(int, D_Repeat, unlucky_number, ); }
-    std::string say_something(unsigned times) override { PYBIND11_OVERRIDE(std::string, D_Repeat, say_something, times); }
+    std::string say_something(unsigned times) override {
+        PYBIND11_OVERRIDE(std::string, D_Repeat, say_something, times);
+    }
     double lucky_number() override { PYBIND11_OVERRIDE(double, D_Repeat, lucky_number, ); }
 };
 
 // Inheritance approach 2: templated trampoline classes.
 //
 // Advantages:
-// - we have only 2 (template) class and 4 method declarations (one per virtual method, plus one for
-//   any override of a pure virtual method), versus 4 classes and 6 methods (MI) or 4 classes and 11
-//   methods (repeat).
-// - Compared to MI, we also don't have to change the non-trampoline inheritance to virtual, and can
-//   properly inherit constructors.
+// - we have only 2 (template) class and 4 method declarations (one per virtual method, plus one
+//   for any override of a pure virtual method), versus 4 classes and 6 methods (MI) or 4 classes
+//   and 11 methods (repeat).
+// - Compared to MI, we also don't have to change the non-trampoline inheritance to virtual, and
+//   can properly inherit constructors.
 //
 // Disadvantage:
 // - the compiler must still generate and compile 14 different methods (more, even, than the 11
@@ -488,7 +533,9 @@ class PyA_Tpl : public Base {
 public:
     using Base::Base; // Inherit constructors
     int unlucky_number() override { PYBIND11_OVERRIDE_PURE(int, Base, unlucky_number, ); }
-    std::string say_something(unsigned times) override { PYBIND11_OVERRIDE(std::string, Base, say_something, times); }
+    std::string say_something(unsigned times) override {
+        PYBIND11_OVERRIDE(std::string, Base, say_something, times);
+    }
 };
 template <class Base = B_Tpl>
 class PyB_Tpl : public PyA_Tpl<Base> {
@@ -498,8 +545,8 @@ public:
     int unlucky_number() override { PYBIND11_OVERRIDE(int, Base, unlucky_number, ); }
     double lucky_number() override { PYBIND11_OVERRIDE(double, Base, lucky_number, ); }
 };
-// Since C_Tpl and D_Tpl don't declare any new virtual methods, we don't actually need these (we can
-// use PyB_Tpl<C_Tpl> and PyB_Tpl<D_Tpl> for the trampoline classes instead):
+// Since C_Tpl and D_Tpl don't declare any new virtual methods, we don't actually need these
+// (we can use PyB_Tpl<C_Tpl> and PyB_Tpl<D_Tpl> for the trampoline classes instead):
 /*
 template <class Base = C_Tpl> class PyC_Tpl : public PyB_Tpl<Base> {
 public:
@@ -523,10 +570,8 @@ void initialize_inherited_virtuals(py::module_ &m) {
     py::class_<B_Repeat, A_Repeat, PyB_Repeat>(m, "B_Repeat")
         .def(py::init<>())
         .def("lucky_number", &B_Repeat::lucky_number);
-    py::class_<C_Repeat, B_Repeat, PyC_Repeat>(m, "C_Repeat")
-        .def(py::init<>());
-    py::class_<D_Repeat, C_Repeat, PyD_Repeat>(m, "D_Repeat")
-        .def(py::init<>());
+    py::class_<C_Repeat, B_Repeat, PyC_Repeat>(m, "C_Repeat").def(py::init<>());
+    py::class_<D_Repeat, C_Repeat, PyD_Repeat>(m, "D_Repeat").def(py::init<>());
 
     // test_
     // Method 2: Templated trampolines
@@ -538,11 +583,8 @@ void initialize_inherited_virtuals(py::module_ &m) {
     py::class_<B_Tpl, A_Tpl, PyB_Tpl<>>(m, "B_Tpl")
         .def(py::init<>())
         .def("lucky_number", &B_Tpl::lucky_number);
-    py::class_<C_Tpl, B_Tpl, PyB_Tpl<C_Tpl>>(m, "C_Tpl")
-        .def(py::init<>());
-    py::class_<D_Tpl, C_Tpl, PyB_Tpl<D_Tpl>>(m, "D_Tpl")
-        .def(py::init<>());
-
+    py::class_<C_Tpl, B_Tpl, PyB_Tpl<C_Tpl>>(m, "C_Tpl").def(py::init<>());
+    py::class_<D_Tpl, C_Tpl, PyB_Tpl<D_Tpl>>(m, "D_Tpl").def(py::init<>());
 
     // Fix issue #1454 (crash when acquiring/releasing GIL on another thread in Python 2.7)
     m.def("test_gil", &test_gil);
diff --git a/ext/pybind11/tests/test_virtual_functions.py b/ext/pybind11/tests/test_virtual_functions.py
index 0b550992f1..4d00d3690d 100644
--- a/ext/pybind11/tests/test_virtual_functions.py
+++ b/ext/pybind11/tests/test_virtual_functions.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 import pytest
 
 import env  # noqa: F401
@@ -10,12 +9,12 @@ from pybind11_tests import ConstructorStats  # noqa: E402
 def test_override(capture, msg):
     class ExtendedExampleVirt(m.ExampleVirt):
         def __init__(self, state):
-            super(ExtendedExampleVirt, self).__init__(state + 1)
+            super().__init__(state + 1)
             self.data = "Hello world"
 
         def run(self, value):
-            print("ExtendedExampleVirt::run(%i), calling parent.." % value)
-            return super(ExtendedExampleVirt, self).run(value + 1)
+            print(f"ExtendedExampleVirt::run({value}), calling parent..")
+            return super().run(value + 1)
 
         def run_bool(self):
             print("ExtendedExampleVirt::run_bool()")
@@ -25,11 +24,11 @@ def test_override(capture, msg):
             return "override1"
 
         def pure_virtual(self):
-            print("ExtendedExampleVirt::pure_virtual(): %s" % self.data)
+            print(f"ExtendedExampleVirt::pure_virtual(): {self.data}")
 
     class ExtendedExampleVirt2(ExtendedExampleVirt):
         def __init__(self, state):
-            super(ExtendedExampleVirt2, self).__init__(state + 1)
+            super().__init__(state + 1)
 
         def get_string2(self):
             return "override2"
@@ -41,7 +40,7 @@ def test_override(capture, msg):
         capture
         == """
         Original implementation of ExampleVirt::run(state=10, value=20, str1=default1, str2=default2)
-    """  # noqa: E501 line too long
+    """
     )
 
     with pytest.raises(RuntimeError) as excinfo:
@@ -59,7 +58,7 @@ def test_override(capture, msg):
         == """
         ExtendedExampleVirt::run(20), calling parent..
         Original implementation of ExampleVirt::run(state=11, value=21, str1=override1, str2=default2)
-    """  # noqa: E501 line too long
+    """
     )
     with capture:
         assert m.runExampleVirtBool(ex12p) is False
@@ -76,7 +75,7 @@ def test_override(capture, msg):
         == """
         ExtendedExampleVirt::run(50), calling parent..
         Original implementation of ExampleVirt::run(state=17, value=51, str1=override1, str2=override2)
-    """  # noqa: E501 line too long
+    """
     )
 
     cstats = ConstructorStats.get(m.ExampleVirt)
@@ -97,7 +96,7 @@ def test_alias_delay_initialization1(capture):
 
     class B(m.A):
         def __init__(self):
-            super(B, self).__init__()
+            super().__init__()
 
         def f(self):
             print("In python f()")
@@ -137,7 +136,7 @@ def test_alias_delay_initialization2(capture):
 
     class B2(m.A2):
         def __init__(self):
-            super(B2, self).__init__()
+            super().__init__()
 
         def f(self):
             print("In python B2.f()")
@@ -245,7 +244,7 @@ def test_dispatch_issue(msg):
     class PyClass2(m.DispatchIssue):
         def dispatch(self):
             with pytest.raises(RuntimeError) as excinfo:
-                super(PyClass2, self).dispatch()
+                super().dispatch()
             assert (
                 msg(excinfo.value)
                 == 'Tried to call pure virtual function "Base::dispatch"'
@@ -262,7 +261,7 @@ def test_recursive_dispatch_issue(msg):
 
     class Data(m.Data):
         def __init__(self, value):
-            super(Data, self).__init__()
+            super().__init__()
             self.value = value
 
     class Adder(m.Adder):
@@ -439,3 +438,22 @@ def test_issue_1454():
     # Fix issue #1454 (crash when acquiring/releasing GIL on another thread in Python 2.7)
     m.test_gil()
     m.test_gil_from_thread()
+
+
+def test_python_override():
+    def func():
+        class Test(m.test_override_cache_helper):
+            def func(self):
+                return 42
+
+        return Test()
+
+    def func2():
+        class Test(m.test_override_cache_helper):
+            pass
+
+        return Test()
+
+    for _ in range(1500):
+        assert m.test_override_cache(func()) == 42
+        assert m.test_override_cache(func2()) == 0
diff --git a/ext/pybind11/tools/FindCatch.cmake b/ext/pybind11/tools/FindCatch.cmake
index 4d6bffcf68..57bba58b30 100644
--- a/ext/pybind11/tools/FindCatch.cmake
+++ b/ext/pybind11/tools/FindCatch.cmake
@@ -9,6 +9,8 @@
 #  CATCH_INCLUDE_DIR      - path to catch.hpp
 #  CATCH_VERSION          - version number
 
+option(DOWNLOAD_CATCH "Download catch2 if not found")
+
 if(NOT Catch_FIND_VERSION)
   message(FATAL_ERROR "A version number must be specified.")
 elseif(Catch_FIND_REQUIRED)
diff --git a/ext/pybind11/tools/FindPythonLibsNew.cmake b/ext/pybind11/tools/FindPythonLibsNew.cmake
index 3605aebcf3..ce558d4ece 100644
--- a/ext/pybind11/tools/FindPythonLibsNew.cmake
+++ b/ext/pybind11/tools/FindPythonLibsNew.cmake
@@ -92,7 +92,7 @@ endif()
 
 # Use the Python interpreter to find the libs.
 if(NOT PythonLibsNew_FIND_VERSION)
-  set(PythonLibsNew_FIND_VERSION "")
+  set(PythonLibsNew_FIND_VERSION "3.6")
 endif()
 
 find_package(PythonInterp ${PythonLibsNew_FIND_VERSION} ${_pythonlibs_required}
@@ -112,12 +112,26 @@ endif()
 # VERSION. VERSION will typically be like "2.7" on unix, and "27" on windows.
 execute_process(
   COMMAND
-    "${PYTHON_EXECUTABLE}" "-c" "from distutils import sysconfig as s;import sys;import struct;
+    "${PYTHON_EXECUTABLE}" "-c" "
+import sys;import struct;
+import sysconfig as s
+USE_SYSCONFIG = sys.version_info >= (3, 10)
+if not USE_SYSCONFIG:
+    from distutils import sysconfig as ds
 print('.'.join(str(v) for v in sys.version_info));
 print(sys.prefix);
-print(s.get_python_inc(plat_specific=True));
-print(s.get_python_lib(plat_specific=True));
-print(s.get_config_var('EXT_SUFFIX') or s.get_config_var('SO'));
+if USE_SYSCONFIG:
+    scheme = s.get_default_scheme()
+    if scheme == 'posix_local':
+        # Debian's default scheme installs to /usr/local/ but we want to find headers in /usr/
+        scheme = 'posix_prefix'
+    print(s.get_path('platinclude', scheme))
+    print(s.get_path('platlib'))
+    print(s.get_config_var('EXT_SUFFIX') or s.get_config_var('SO'))
+else:
+    print(ds.get_python_inc(plat_specific=True));
+    print(ds.get_python_lib(plat_specific=True));
+    print(ds.get_config_var('EXT_SUFFIX') or ds.get_config_var('SO'));
 print(hasattr(sys, 'gettotalrefcount')+0);
 print(struct.calcsize('@P'));
 print(s.get_config_var('LDVERSION') or s.get_config_var('VERSION'));
@@ -137,26 +151,40 @@ if(NOT _PYTHON_SUCCESS MATCHES 0)
   return()
 endif()
 
+option(
+  PYBIND11_PYTHONLIBS_OVERWRITE
+  "Overwrite cached values read from Python library (classic search). Turn off if cross-compiling and manually setting these values."
+  ON)
+# Can manually set values when cross-compiling
+macro(_PYBIND11_GET_IF_UNDEF lst index name)
+  if(PYBIND11_PYTHONLIBS_OVERWRITE OR NOT DEFINED "${name}")
+    list(GET "${lst}" "${index}" "${name}")
+  endif()
+endmacro()
+
 # Convert the process output into a list
 if(WIN32)
   string(REGEX REPLACE "\\\\" "/" _PYTHON_VALUES ${_PYTHON_VALUES})
 endif()
 string(REGEX REPLACE ";" "\\\\;" _PYTHON_VALUES ${_PYTHON_VALUES})
 string(REGEX REPLACE "\n" ";" _PYTHON_VALUES ${_PYTHON_VALUES})
-list(GET _PYTHON_VALUES 0 _PYTHON_VERSION_LIST)
-list(GET _PYTHON_VALUES 1 PYTHON_PREFIX)
-list(GET _PYTHON_VALUES 2 PYTHON_INCLUDE_DIR)
-list(GET _PYTHON_VALUES 3 PYTHON_SITE_PACKAGES)
-list(GET _PYTHON_VALUES 4 PYTHON_MODULE_EXTENSION)
-list(GET _PYTHON_VALUES 5 PYTHON_IS_DEBUG)
-list(GET _PYTHON_VALUES 6 PYTHON_SIZEOF_VOID_P)
-list(GET _PYTHON_VALUES 7 PYTHON_LIBRARY_SUFFIX)
-list(GET _PYTHON_VALUES 8 PYTHON_LIBDIR)
-list(GET _PYTHON_VALUES 9 PYTHON_MULTIARCH)
+_pybind11_get_if_undef(_PYTHON_VALUES 0 _PYTHON_VERSION_LIST)
+_pybind11_get_if_undef(_PYTHON_VALUES 1 PYTHON_PREFIX)
+_pybind11_get_if_undef(_PYTHON_VALUES 2 PYTHON_INCLUDE_DIR)
+_pybind11_get_if_undef(_PYTHON_VALUES 3 PYTHON_SITE_PACKAGES)
+_pybind11_get_if_undef(_PYTHON_VALUES 4 PYTHON_MODULE_EXTENSION)
+_pybind11_get_if_undef(_PYTHON_VALUES 5 PYTHON_IS_DEBUG)
+_pybind11_get_if_undef(_PYTHON_VALUES 6 PYTHON_SIZEOF_VOID_P)
+_pybind11_get_if_undef(_PYTHON_VALUES 7 PYTHON_LIBRARY_SUFFIX)
+_pybind11_get_if_undef(_PYTHON_VALUES 8 PYTHON_LIBDIR)
+_pybind11_get_if_undef(_PYTHON_VALUES 9 PYTHON_MULTIARCH)
 
 # Make sure the Python has the same pointer-size as the chosen compiler
 # Skip if CMAKE_SIZEOF_VOID_P is not defined
-if(CMAKE_SIZEOF_VOID_P AND (NOT "${PYTHON_SIZEOF_VOID_P}" STREQUAL "${CMAKE_SIZEOF_VOID_P}"))
+# This should be skipped for (non-Apple) cross-compiles (like EMSCRIPTEN)
+if(NOT CMAKE_CROSSCOMPILING
+   AND CMAKE_SIZEOF_VOID_P
+   AND (NOT "${PYTHON_SIZEOF_VOID_P}" STREQUAL "${CMAKE_SIZEOF_VOID_P}"))
   if(PythonLibsNew_FIND_REQUIRED)
     math(EXPR _PYTHON_BITS "${PYTHON_SIZEOF_VOID_P} * 8")
     math(EXPR _CMAKE_BITS "${CMAKE_SIZEOF_VOID_P} * 8")
@@ -180,7 +208,9 @@ string(REGEX REPLACE "\\\\" "/" PYTHON_PREFIX "${PYTHON_PREFIX}")
 string(REGEX REPLACE "\\\\" "/" PYTHON_INCLUDE_DIR "${PYTHON_INCLUDE_DIR}")
 string(REGEX REPLACE "\\\\" "/" PYTHON_SITE_PACKAGES "${PYTHON_SITE_PACKAGES}")
 
-if(CMAKE_HOST_WIN32)
+if(DEFINED PYTHON_LIBRARY)
+  # Don't write to PYTHON_LIBRARY if it's already set
+elseif(CMAKE_HOST_WIN32)
   set(PYTHON_LIBRARY "${PYTHON_PREFIX}/libs/python${PYTHON_LIBRARY_SUFFIX}.lib")
 
   # when run in a venv, PYTHON_PREFIX points to it. But the libraries remain in the
@@ -246,7 +276,7 @@ if(NOT PYTHON_DEBUG_LIBRARY)
 endif()
 set(PYTHON_DEBUG_LIBRARIES "${PYTHON_DEBUG_LIBRARY}")
 
-find_package_message(PYTHON "Found PythonLibs: ${PYTHON_LIBRARY}"
+find_package_message(PYTHON "Found PythonLibs: ${PYTHON_LIBRARIES}"
                      "${PYTHON_EXECUTABLE}${PYTHON_VERSION_STRING}")
 
 set(PYTHONLIBS_FOUND TRUE)
diff --git a/ext/pybind11/tools/JoinPaths.cmake b/ext/pybind11/tools/JoinPaths.cmake
new file mode 100644
index 0000000000..c68d91b84d
--- /dev/null
+++ b/ext/pybind11/tools/JoinPaths.cmake
@@ -0,0 +1,23 @@
+# This module provides function for joining paths
+# known from most languages
+#
+# SPDX-License-Identifier: (MIT OR CC0-1.0)
+# Copyright 2020 Jan Tojnar
+# https://github.com/jtojnar/cmake-snips
+#
+# Modelled after Python’s os.path.join
+# https://docs.python.org/3.7/library/os.path.html#os.path.join
+# Windows not supported
+function(join_paths joined_path first_path_segment)
+    set(temp_path "${first_path_segment}")
+    foreach(current_segment IN LISTS ARGN)
+        if(NOT ("${current_segment}" STREQUAL ""))
+            if(IS_ABSOLUTE "${current_segment}")
+                set(temp_path "${current_segment}")
+            else()
+                set(temp_path "${temp_path}/${current_segment}")
+            endif()
+        endif()
+    endforeach()
+    set(${joined_path} "${temp_path}" PARENT_SCOPE)
+endfunction()
diff --git a/ext/pybind11/tools/codespell_ignore_lines_from_errors.py b/ext/pybind11/tools/codespell_ignore_lines_from_errors.py
new file mode 100644
index 0000000000..5403ec3ad0
--- /dev/null
+++ b/ext/pybind11/tools/codespell_ignore_lines_from_errors.py
@@ -0,0 +1,35 @@
+"""Simple script for rebuilding .codespell-ignore-lines
+
+Usage:
+
+cat < /dev/null > .codespell-ignore-lines
+pre-commit run --all-files codespell >& /tmp/codespell_errors.txt
+python3 tools/codespell_ignore_lines_from_errors.py /tmp/codespell_errors.txt > .codespell-ignore-lines
+
+git diff to review changes, then commit, push.
+"""
+
+import sys
+from typing import List
+
+
+def run(args: List[str]) -> None:
+    assert len(args) == 1, "codespell_errors.txt"
+    cache = {}
+    done = set()
+    for line in sorted(open(args[0]).read().splitlines()):
+        i = line.find(" ==> ")
+        if i > 0:
+            flds = line[:i].split(":")
+            if len(flds) >= 2:
+                filename, line_num = flds[:2]
+                if filename not in cache:
+                    cache[filename] = open(filename).read().splitlines()
+                supp = cache[filename][int(line_num) - 1]
+                if supp not in done:
+                    print(supp)
+                    done.add(supp)
+
+
+if __name__ == "__main__":
+    run(args=sys.argv[1:])
diff --git a/ext/pybind11/tools/libsize.py b/ext/pybind11/tools/libsize.py
index 1551477e66..1ac9afbe81 100644
--- a/ext/pybind11/tools/libsize.py
+++ b/ext/pybind11/tools/libsize.py
@@ -1,6 +1,3 @@
-# -*- coding: utf-8 -*-
-from __future__ import division, print_function
-
 import os
 import sys
 
@@ -16,7 +13,7 @@ lib = sys.argv[1]
 save = sys.argv[2]
 
 if not os.path.exists(lib):
-    sys.exit("Error: requested file ({}) does not exist".format(lib))
+    sys.exit(f"Error: requested file ({lib}) does not exist")
 
 libsize = os.path.getsize(lib)
 
@@ -31,7 +28,7 @@ if os.path.exists(save):
         if change == 0:
             print(" (no change)")
         else:
-            print(" (change of {:+} bytes = {:+.2%})".format(change, change / oldsize))
+            print(f" (change of {change:+} bytes = {change / oldsize:+.2%})")
 else:
     print()
 
diff --git a/ext/pybind11/tools/make_changelog.py b/ext/pybind11/tools/make_changelog.py
index 629c284d39..b5bd832940 100755
--- a/ext/pybind11/tools/make_changelog.py
+++ b/ext/pybind11/tools/make_changelog.py
@@ -1,5 +1,4 @@
 #!/usr/bin/env python3
-# -*- coding: utf-8 -*-
 
 import re
 
@@ -32,8 +31,10 @@ issues = (issue for page in issues_pages for issue in page)
 missing = []
 
 for issue in issues:
-    changelog = ENTRY.findall(issue.body)
-    if changelog:
+    changelog = ENTRY.findall(issue.body or "")
+    if not changelog or not changelog[0]:
+        missing.append(issue)
+    else:
         (msg,) = changelog
         if not msg.startswith("* "):
             msg = "* " + msg
@@ -45,9 +46,6 @@ for issue in issues:
         print(Syntax(msg, "rst", theme="ansi_light", word_wrap=True))
         print()
 
-    else:
-        missing.append(issue)
-
 if missing:
     print()
     print("[blue]" + "-" * 30)
diff --git a/ext/pybind11/tools/pybind11.pc.in b/ext/pybind11/tools/pybind11.pc.in
new file mode 100644
index 0000000000..402f0b357d
--- /dev/null
+++ b/ext/pybind11/tools/pybind11.pc.in
@@ -0,0 +1,7 @@
+prefix=@prefix_for_pc_file@
+includedir=@includedir_for_pc_file@
+
+Name: @PROJECT_NAME@
+Description: Seamless operability between C++11 and Python
+Version: @PROJECT_VERSION@
+Cflags: -I${includedir}
diff --git a/ext/pybind11/tools/pybind11Common.cmake b/ext/pybind11/tools/pybind11Common.cmake
index 69ac4b0f61..0c985bc8e5 100644
--- a/ext/pybind11/tools/pybind11Common.cmake
+++ b/ext/pybind11/tools/pybind11Common.cmake
@@ -8,7 +8,6 @@ Adds the following targets::
     pybind11::lto - Link time optimizations (manual selection)
     pybind11::thin_lto - Link time optimizations (manual selection)
     pybind11::python_link_helper - Adds link to Python libraries
-    pybind11::python2_no_register - Avoid warning/error with Python 2 + C++14/7
     pybind11::windows_extras - MSVC bigobj and mp for building multithreaded
     pybind11::opt_size - avoid optimizations that increase code size
 
@@ -66,31 +65,6 @@ set_property(
   APPEND
   PROPERTY INTERFACE_LINK_LIBRARIES pybind11::pybind11)
 
-# ----------------------- no register ----------------------
-
-# Workaround for Python 2.7 and C++17 (C++14 as a warning) incompatibility
-# This adds the flags -Wno-register and -Wno-deprecated-register if the compiler
-# is Clang 3.9+ or AppleClang and the compile language is CXX, or /wd5033 for MSVC (all languages,
-# since MSVC didn't recognize COMPILE_LANGUAGE until CMake 3.11+).
-
-add_library(pybind11::python2_no_register INTERFACE IMPORTED ${optional_global})
-set(clang_4plus
-    "$<AND:$<CXX_COMPILER_ID:Clang>,$<NOT:$<VERSION_LESS:$<CXX_COMPILER_VERSION>,3.9>>>")
-set(no_register "$<OR:${clang_4plus},$<CXX_COMPILER_ID:AppleClang>>")
-
-if(MSVC AND CMAKE_VERSION VERSION_LESS 3.11)
-  set(cxx_no_register "${no_register}")
-else()
-  set(cxx_no_register "$<AND:$<COMPILE_LANGUAGE:CXX>,${no_register}>")
-endif()
-
-set(msvc "$<CXX_COMPILER_ID:MSVC>")
-
-set_property(
-  TARGET pybind11::python2_no_register
-  PROPERTY INTERFACE_COMPILE_OPTIONS
-           "$<${cxx_no_register}:-Wno-register;-Wno-deprecated-register>" "$<${msvc}:/wd5033>")
-
 # --------------------------- link helper ---------------------------
 
 add_library(pybind11::python_link_helper IMPORTED INTERFACE ${optional_global})
@@ -122,7 +96,7 @@ if(MSVC) # That's also clang-cl
   set_property(
     TARGET pybind11::windows_extras
     APPEND
-    PROPERTY INTERFACE_COMPILE_OPTIONS /bigobj)
+    PROPERTY INTERFACE_COMPILE_OPTIONS $<$<COMPILE_LANGUAGE:CXX>:/bigobj>)
 
   # /MP enables multithreaded builds (relevant when there are many files) for MSVC
   if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC") # no Clang no Intel
@@ -318,17 +292,35 @@ function(_pybind11_generate_lto target prefer_thin_lto)
       set(cxx_append ";-fno-fat-lto-objects")
     endif()
 
-    if(CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND prefer_thin_lto)
+    if(CMAKE_SYSTEM_PROCESSOR MATCHES "ppc64le" OR CMAKE_SYSTEM_PROCESSOR MATCHES "mips64")
+      set(NO_FLTO_ARCH TRUE)
+    else()
+      set(NO_FLTO_ARCH FALSE)
+    endif()
+
+    if(CMAKE_CXX_COMPILER_ID MATCHES "Clang"
+       AND prefer_thin_lto
+       AND NOT NO_FLTO_ARCH)
       _pybind11_return_if_cxx_and_linker_flags_work(
         HAS_FLTO_THIN "-flto=thin${cxx_append}" "-flto=thin${linker_append}"
         PYBIND11_LTO_CXX_FLAGS PYBIND11_LTO_LINKER_FLAGS)
     endif()
 
-    if(NOT HAS_FLTO_THIN)
+    if(NOT HAS_FLTO_THIN AND NOT NO_FLTO_ARCH)
       _pybind11_return_if_cxx_and_linker_flags_work(
         HAS_FLTO "-flto${cxx_append}" "-flto${linker_append}" PYBIND11_LTO_CXX_FLAGS
         PYBIND11_LTO_LINKER_FLAGS)
     endif()
+  elseif(CMAKE_CXX_COMPILER_ID MATCHES "IntelLLVM")
+    # IntelLLVM equivalent to LTO is called IPO; also IntelLLVM is WIN32/UNIX
+    # WARNING/HELP WANTED: This block of code is currently not covered by pybind11 GitHub Actions!
+    if(WIN32)
+      _pybind11_return_if_cxx_and_linker_flags_work(
+        HAS_INTEL_IPO "-Qipo" "-Qipo" PYBIND11_LTO_CXX_FLAGS PYBIND11_LTO_LINKER_FLAGS)
+    else()
+      _pybind11_return_if_cxx_and_linker_flags_work(
+        HAS_INTEL_IPO "-ipo" "-ipo" PYBIND11_LTO_CXX_FLAGS PYBIND11_LTO_LINKER_FLAGS)
+    endif()
   elseif(CMAKE_CXX_COMPILER_ID MATCHES "Intel")
     # Intel equivalent to LTO is called IPO
     _pybind11_return_if_cxx_and_linker_flags_work(HAS_INTEL_IPO "-ipo" "-ipo"
diff --git a/ext/pybind11/tools/pybind11Config.cmake.in b/ext/pybind11/tools/pybind11Config.cmake.in
index 8f8701be70..9383e8c671 100644
--- a/ext/pybind11/tools/pybind11Config.cmake.in
+++ b/ext/pybind11/tools/pybind11Config.cmake.in
@@ -13,7 +13,7 @@ This module sets the following variables in your project:
 ``pybind11_VERSION``
   pybind11 version in format Major.Minor.Release
 ``pybind11_VERSION_TYPE``
-  pybind11 version type (dev, release)
+  pybind11 version type (``dev*`` or empty for a release)
 ``pybind11_INCLUDE_DIRS``
   Directories where pybind11 and python headers are located.
 ``pybind11_INCLUDE_DIR``
@@ -51,8 +51,6 @@ complex applications, and they are available in all modes:
   Python headers too.
 ``pybind11::python_link_helper``
   Just the "linking" part of ``pybind11:module``, for CMake < 3.15.
-``pybind11::python2_no_register``
-  Quiets the warning/error when mixing C++14+ and Python 2, also included in ``pybind11::module``.
 ``pybind11::thin_lto``
   An alternative to ``INTERPROCEDURAL_OPTIMIZATION``.
 ``pybind11::lto``
@@ -88,7 +86,7 @@ you can either use the basic targets, or use the FindPython tools:
   set_target_properties(MyModule2 PROPERTIES
                                   INTERPROCEDURAL_OPTIMIZATION ON
                                   CXX_VISIBILITY_PRESET ON
-                                  VISIBLITY_INLINES_HIDDEN ON)
+                                  VISIBILITY_INLINES_HIDDEN ON)
 
 If you build targets yourself, you may be interested in stripping the output
 for reduced size; this is the one other feature that the helper function gives you.
@@ -195,7 +193,7 @@ Using ``find_package`` with version info is not recommended except for release v
 .. code-block:: cmake
 
   find_package(pybind11 CONFIG)
-  find_package(pybind11 2.0 EXACT CONFIG REQUIRED)
+  find_package(pybind11 2.9 EXACT CONFIG REQUIRED)
 
 #]=============================================================================]
 @PACKAGE_INIT@
@@ -228,6 +226,6 @@ include("${CMAKE_CURRENT_LIST_DIR}/pybind11Common.cmake")
 if(NOT pybind11_FIND_QUIETLY)
   message(
     STATUS
-      "Found pybind11: ${pybind11_INCLUDE_DIR} (found version \"${pybind11_VERSION}\" ${pybind11_VERSION_TYPE})"
+      "Found pybind11: ${pybind11_INCLUDE_DIR} (found version \"${pybind11_VERSION}${pybind11_VERSION_TYPE}\")"
   )
 endif()
diff --git a/ext/pybind11/tools/pybind11NewTools.cmake b/ext/pybind11/tools/pybind11NewTools.cmake
index 9e2fd2b40d..7d7424a790 100644
--- a/ext/pybind11/tools/pybind11NewTools.cmake
+++ b/ext/pybind11/tools/pybind11NewTools.cmake
@@ -9,7 +9,7 @@ if(CMAKE_VERSION VERSION_LESS 3.12)
   message(FATAL_ERROR "You cannot use the new FindPython module with CMake < 3.12")
 endif()
 
-include_guard(GLOBAL)
+include_guard(DIRECTORY)
 
 get_property(
   is_config
@@ -22,9 +22,7 @@ else()
   set(_pybind11_quiet "")
 endif()
 
-if(NOT Python_FOUND
-   AND NOT Python3_FOUND
-   AND NOT Python2_FOUND)
+if(NOT Python_FOUND AND NOT Python3_FOUND)
   if(NOT DEFINED Python_FIND_IMPLEMENTATIONS)
     set(Python_FIND_IMPLEMENTATIONS CPython PyPy)
   endif()
@@ -34,7 +32,7 @@ if(NOT Python_FOUND
     set(Python_ROOT_DIR "$ENV{pythonLocation}")
   endif()
 
-  find_package(Python REQUIRED COMPONENTS Interpreter Development ${_pybind11_quiet})
+  find_package(Python 3.6 REQUIRED COMPONENTS Interpreter Development ${_pybind11_quiet})
 
   # If we are in submodule mode, export the Python targets to global targets.
   # If this behavior is not desired, FindPython _before_ pybind11.
@@ -51,19 +49,10 @@ if(Python_FOUND)
   set(_Python
       Python
       CACHE INTERNAL "" FORCE)
-elseif(Python3_FOUND AND NOT Python2_FOUND)
+elseif(Python3_FOUND)
   set(_Python
       Python3
       CACHE INTERNAL "" FORCE)
-elseif(Python2_FOUND AND NOT Python3_FOUND)
-  set(_Python
-      Python2
-      CACHE INTERNAL "" FORCE)
-else()
-  message(AUTHOR_WARNING "Python2 and Python3 both present, pybind11 in "
-                         "PYBIND11_NOPYTHON mode (manually activate to silence warning)")
-  set(_pybind11_nopython ON)
-  return()
 endif()
 
 if(PYBIND11_MASTER_PROJECT)
@@ -110,7 +99,7 @@ if(NOT DEFINED PYTHON_MODULE_EXTENSION)
   execute_process(
     COMMAND
       "${${_Python}_EXECUTABLE}" "-c"
-      "from distutils import sysconfig as s;print(s.get_config_var('EXT_SUFFIX') or s.get_config_var('SO'))"
+      "import sys, importlib; s = importlib.import_module('distutils.sysconfig' if sys.version_info < (3, 10) else 'sysconfig'); print(s.get_config_var('EXT_SUFFIX') or s.get_config_var('SO'))"
     OUTPUT_VARIABLE _PYTHON_MODULE_EXTENSION
     ERROR_VARIABLE _PYTHON_MODULE_EXTENSION_ERR
     OUTPUT_STRIP_TRAILING_WHITESPACE)
@@ -137,7 +126,7 @@ if(PYTHON_IS_DEBUG)
     PROPERTY INTERFACE_COMPILE_DEFINITIONS Py_DEBUG)
 endif()
 
-# Check on every access - since Python2 and Python3 could have been used - do nothing in that case.
+# Check on every access - since Python can change - do nothing in that case.
 
 if(DEFINED ${_Python}_INCLUDE_DIRS)
   # Only add Python for build - must be added during the import for config
@@ -159,13 +148,6 @@ if(DEFINED ${_Python}_INCLUDE_DIRS)
       CACHE INTERNAL "Directories where pybind11 and possibly Python headers are located")
 endif()
 
-if(DEFINED ${_Python}_VERSION AND ${_Python}_VERSION VERSION_LESS 3)
-  set_property(
-    TARGET pybind11::pybind11
-    APPEND
-    PROPERTY INTERFACE_LINK_LIBRARIES pybind11::python2_no_register)
-endif()
-
 # In CMake 3.18+, you can find these separately, so include an if
 if(TARGET ${_Python}::Python)
   set_property(
@@ -205,8 +187,6 @@ function(pybind11_add_module target_name)
     python_add_library(${target_name} ${lib_type} ${ARG_UNPARSED_ARGUMENTS})
   elseif("${_Python}" STREQUAL "Python3")
     python3_add_library(${target_name} ${lib_type} ${ARG_UNPARSED_ARGUMENTS})
-  elseif("${_Python}" STREQUAL "Python2")
-    python2_add_library(${target_name} ${lib_type} ${ARG_UNPARSED_ARGUMENTS})
   else()
     message(FATAL_ERROR "Cannot detect FindPython version: ${_Python}")
   endif()
@@ -223,10 +203,6 @@ function(pybind11_add_module target_name)
     target_link_libraries(${target_name} PRIVATE pybind11::windows_extras)
   endif()
 
-  if(DEFINED ${_Python}_VERSION AND ${_Python}_VERSION VERSION_LESS 3)
-    target_link_libraries(${target_name} PRIVATE pybind11::python2_no_register)
-  endif()
-
   # -fvisibility=hidden is required to allow multiple modules compiled against
   # different pybind versions to work properly, and for some features (e.g.
   # py::module_local).  We force it on everything inside the `pybind11`
@@ -257,7 +233,9 @@ function(pybind11_add_module target_name)
     endif()
   endif()
 
-  if(NOT MSVC AND NOT ${CMAKE_BUILD_TYPE} MATCHES Debug|RelWithDebInfo)
+  # Use case-insensitive comparison to match the result of $<CONFIG:cfgs>
+  string(TOUPPER "${CMAKE_BUILD_TYPE}" uppercase_CMAKE_BUILD_TYPE)
+  if(NOT MSVC AND NOT "${uppercase_CMAKE_BUILD_TYPE}" MATCHES DEBUG|RELWITHDEBINFO)
     # Strip unnecessary sections of the binary on Linux/macOS
     pybind11_strip(${target_name})
   endif()
diff --git a/ext/pybind11/tools/pybind11Tools.cmake b/ext/pybind11/tools/pybind11Tools.cmake
index c255e5cfd8..66ad00a478 100644
--- a/ext/pybind11/tools/pybind11Tools.cmake
+++ b/ext/pybind11/tools/pybind11Tools.cmake
@@ -43,7 +43,7 @@ endif()
 
 # A user can set versions manually too
 set(Python_ADDITIONAL_VERSIONS
-    "3.11;3.10;3.9;3.8;3.7;3.6;3.5;3.4"
+    "3.11;3.10;3.9;3.8;3.7;3.6"
     CACHE INTERNAL "")
 
 list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}")
@@ -115,24 +115,36 @@ if(PYTHON_IS_DEBUG)
     PROPERTY INTERFACE_COMPILE_DEFINITIONS Py_DEBUG)
 endif()
 
-set_property(
-  TARGET pybind11::module
-  APPEND
-  PROPERTY
-    INTERFACE_LINK_LIBRARIES pybind11::python_link_helper
-    "$<$<OR:$<PLATFORM_ID:Windows>,$<PLATFORM_ID:Cygwin>>:$<BUILD_INTERFACE:${PYTHON_LIBRARIES}>>")
-
-if(PYTHON_VERSION VERSION_LESS 3)
+# The <3.11 code here does not support release/debug builds at the same time, like on vcpkg
+if(CMAKE_VERSION VERSION_LESS 3.11)
   set_property(
-    TARGET pybind11::pybind11
+    TARGET pybind11::module
     APPEND
-    PROPERTY INTERFACE_LINK_LIBRARIES pybind11::python2_no_register)
-endif()
+    PROPERTY
+      INTERFACE_LINK_LIBRARIES
+      pybind11::python_link_helper
+      "$<$<OR:$<PLATFORM_ID:Windows>,$<PLATFORM_ID:Cygwin>>:$<BUILD_INTERFACE:${PYTHON_LIBRARIES}>>"
+  )
 
-set_property(
-  TARGET pybind11::embed
-  APPEND
-  PROPERTY INTERFACE_LINK_LIBRARIES pybind11::pybind11 $<BUILD_INTERFACE:${PYTHON_LIBRARIES}>)
+  set_property(
+    TARGET pybind11::embed
+    APPEND
+    PROPERTY INTERFACE_LINK_LIBRARIES pybind11::pybind11 $<BUILD_INTERFACE:${PYTHON_LIBRARIES}>)
+else()
+  # The IMPORTED INTERFACE library here is to ensure that "debug" and "release" get processed outside
+  # of a generator expression - https://gitlab.kitware.com/cmake/cmake/-/issues/18424, as they are
+  # target_link_library keywords rather than real libraries.
+  add_library(pybind11::_ClassicPythonLibraries IMPORTED INTERFACE)
+  target_link_libraries(pybind11::_ClassicPythonLibraries INTERFACE ${PYTHON_LIBRARIES})
+  target_link_libraries(
+    pybind11::module
+    INTERFACE
+      pybind11::python_link_helper
+      "$<$<OR:$<PLATFORM_ID:Windows>,$<PLATFORM_ID:Cygwin>>:pybind11::_ClassicPythonLibraries>")
+
+  target_link_libraries(pybind11::embed INTERFACE pybind11::pybind11
+                                                  pybind11::_ClassicPythonLibraries)
+endif()
 
 function(pybind11_extension name)
   # The prefix and extension are provided by FindPythonLibsNew.cmake
@@ -200,7 +212,9 @@ function(pybind11_add_module target_name)
     endif()
   endif()
 
-  if(NOT MSVC AND NOT ${CMAKE_BUILD_TYPE} MATCHES Debug|RelWithDebInfo)
+  # Use case-insensitive comparison to match the result of $<CONFIG:cfgs>
+  string(TOUPPER "${CMAKE_BUILD_TYPE}" uppercase_CMAKE_BUILD_TYPE)
+  if(NOT MSVC AND NOT "${uppercase_CMAKE_BUILD_TYPE}" MATCHES DEBUG|RELWITHDEBINFO)
     pybind11_strip(${target_name})
   endif()
 
diff --git a/ext/pybind11/tools/setup_global.py.in b/ext/pybind11/tools/setup_global.py.in
index 8b7e538714..885ac5c725 100644
--- a/ext/pybind11/tools/setup_global.py.in
+++ b/ext/pybind11/tools/setup_global.py.in
@@ -1,17 +1,11 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
+#!/usr/bin/env python3
 
 # Setup script for pybind11-global (in the sdist or in tools/setup_global.py in the repository)
 # This package is targeted for easy use from CMake.
 
-import contextlib
 import glob
 import os
 import re
-import shutil
-import subprocess
-import sys
-import tempfile
 
 # Setuptools has to be before distutils
 from setuptools import setup
@@ -33,9 +27,11 @@ class InstallHeadersNested(install_headers):
 
 main_headers = glob.glob("pybind11/include/pybind11/*.h")
 detail_headers = glob.glob("pybind11/include/pybind11/detail/*.h")
+eigen_headers = glob.glob("pybind11/include/pybind11/eigen/*.h")
 stl_headers = glob.glob("pybind11/include/pybind11/stl/*.h")
 cmake_files = glob.glob("pybind11/share/cmake/pybind11/*.cmake")
-headers = main_headers + detail_headers + stl_headers
+pkgconfig_files = glob.glob("pybind11/share/pkgconfig/*.pc")
+headers = main_headers + detail_headers + stl_headers + eigen_headers
 
 cmdclass = {"install_headers": InstallHeadersNested}
 $extra_cmd
@@ -57,8 +53,10 @@ setup(
     headers=headers,
     data_files=[
         (base + "share/cmake/pybind11", cmake_files),
+        (base + "share/pkgconfig", pkgconfig_files),
         (base + "include/pybind11", main_headers),
         (base + "include/pybind11/detail", detail_headers),
+        (base + "include/pybind11/eigen", eigen_headers),
         (base + "include/pybind11/stl", stl_headers),
     ],
     cmdclass=cmdclass,
diff --git a/ext/pybind11/tools/setup_main.py.in b/ext/pybind11/tools/setup_main.py.in
index 533a75ae71..6358cc7b9b 100644
--- a/ext/pybind11/tools/setup_main.py.in
+++ b/ext/pybind11/tools/setup_main.py.in
@@ -1,5 +1,4 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
+#!/usr/bin/env python3
 
 # Setup script (in the sdist or in tools/setup_main.py in the repository)
 
@@ -16,15 +15,19 @@ setup(
         "pybind11",
         "pybind11.include.pybind11",
         "pybind11.include.pybind11.detail",
+        "pybind11.include.pybind11.eigen",
         "pybind11.include.pybind11.stl",
         "pybind11.share.cmake.pybind11",
+        "pybind11.share.pkgconfig",
     ],
     package_data={
-        "pybind11": ["py.typed", "*.pyi"],
+        "pybind11": ["py.typed"],
         "pybind11.include.pybind11": ["*.h"],
         "pybind11.include.pybind11.detail": ["*.h"],
+        "pybind11.include.pybind11.eigen": ["*.h"],
         "pybind11.include.pybind11.stl": ["*.h"],
         "pybind11.share.cmake.pybind11": ["*.cmake"],
+        "pybind11.share.pkgconfig": ["*.pc"],
     },
     extras_require={
         "global": ["pybind11_global==$version"]

From e73655d038cdfa68964109044e33c9a6e7d85ac9 Mon Sep 17 00:00:00 2001
From: Giacomo Travaglini <giacomo.travaglini@arm.com>
Date: Wed, 15 Mar 2023 13:34:46 +0000
Subject: [PATCH 265/492] misc: Use python f-strings for string formatting

This patch has been generated by applying flynt to the
gem5 repo (ext has been excluded)

JIRA: https://gem5.atlassian.net/browse/GEM5-831

Change-Id: I0935db6223d5426b99515959bde78e374cbadb04
Signed-off-by: Giacomo Travaglini <giacomo.travaglini@arm.com>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/68957
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
---
 configs/common/CacheConfig.py                 |   6 +-
 configs/common/FileSystemConfig.py            |  10 +-
 configs/common/GPUTLBConfig.py                |   4 +-
 configs/common/ObjectList.py                  |  12 +-
 configs/common/Options.py                     |   3 +-
 configs/common/Simulation.py                  |  18 +--
 configs/common/SysPaths.py                    |   4 +-
 configs/common/cpu2000.py                     |  39 +++---
 configs/deprecated/example/fs.py              |   6 +-
 configs/deprecated/example/se.py              |   3 +-
 configs/example/apu_se.py                     |  18 +--
 configs/example/arm/baremetal.py              |   4 +-
 configs/example/arm/fs_bigLITTLE.py           |   6 +-
 configs/example/arm/fs_power.py               |   2 +-
 configs/example/arm/ruby_fs.py                |  12 +-
 configs/example/arm/starter_fs.py             |  12 +-
 .../gem5_library/x86-gapbs-benchmarks.py      |   8 +-
 .../gem5_library/x86-npb-benchmarks.py        |   2 +-
 .../gem5_library/x86-parsec-benchmarks.py     |   5 +-
 .../x86-spec-cpu2006-benchmarks.py            |   4 +-
 .../x86-spec-cpu2017-benchmarks.py            |   4 +-
 configs/example/gpufs/DisjointNetwork.py      |   8 +-
 configs/example/gpufs/hip_cookbook.py         |  10 +-
 configs/example/gpufs/hip_rodinia.py          |  10 +-
 configs/example/gpufs/hip_samples.py          |  10 +-
 configs/example/gpufs/runfs.py                |   2 +-
 configs/example/gpufs/vega10_kvm.py           |  10 +-
 configs/example/hsaTopology.py                |  72 +++++-----
 configs/example/lupv/run_lupv.py              |   4 +-
 configs/example/memcheck.py                   |   2 +-
 configs/example/memtest.py                    |   5 +-
 configs/example/read_config.py                |  10 +-
 configs/example/riscv/fs_linux.py             |   2 +-
 configs/example/sst/riscv_fs.py               |   2 +-
 configs/learning_gem5/part1/caches.py         |   8 +-
 configs/learning_gem5/part3/ruby_test.py      |   4 +-
 configs/learning_gem5/part3/simple_ruby.py    |   4 +-
 configs/ruby/CHI.py                           |   2 +-
 configs/ruby/CHI_config.py                    |   2 +-
 configs/ruby/Ruby.py                          |  12 +-
 configs/topologies/CustomMesh.py              |  12 +-
 .../gem5_scons/builders/switching_headers.py  |   2 +-
 site_scons/gem5_scons/configure.py            |  10 +-
 site_scons/site_tools/git.py                  |   2 +-
 src/arch/arm/ArmSystem.py                     |   2 +-
 src/arch/arm/fastmodel/FastModel.py           |   4 +-
 src/arch/arm/fastmodel/arm_fast_model.py      |  12 +-
 src/arch/isa_parser/isa_parser.py             |  81 ++++++-----
 src/arch/isa_parser/operand_list.py           |   3 +-
 src/arch/isa_parser/operand_types.py          |  22 +--
 src/arch/micro_asm.py                         |  34 ++---
 src/arch/micro_asm_test.py                    |   4 +-
 src/arch/x86/bios/IntelMP.py                  |   3 +-
 src/arch/x86/isa/insts/__init__.py            |   2 +-
 .../x86/isa/insts/general_purpose/__init__.py |   2 +-
 .../general_purpose/arithmetic/__init__.py    |   2 +-
 .../compare_and_test/__init__.py              |   2 +-
 .../control_transfer/__init__.py              |   2 +-
 .../data_conversion/__init__.py               |   2 +-
 .../general_purpose/data_transfer/__init__.py |   2 +-
 .../insts/general_purpose/flags/__init__.py   |   2 +-
 .../general_purpose/input_output/__init__.py  |   2 +-
 .../rotate_and_shift/__init__.py              |   2 +-
 .../insts/general_purpose/string/__init__.py  |   2 +-
 src/arch/x86/isa/insts/simd128/__init__.py    |   2 +-
 .../insts/simd128/floating_point/__init__.py  |   2 +-
 .../floating_point/arithmetic/__init__.py     |   2 +-
 .../floating_point/compare/__init__.py        |   2 +-
 .../data_conversion/__init__.py               |   2 +-
 .../data_reordering/__init__.py               |   2 +-
 .../floating_point/data_transfer/__init__.py  |   2 +-
 .../floating_point/logical/__init__.py        |   2 +-
 .../x86/isa/insts/simd128/integer/__init__.py |   2 +-
 .../simd128/integer/arithmetic/__init__.py    |   2 +-
 .../insts/simd128/integer/compare/__init__.py |   2 +-
 .../integer/data_conversion/__init__.py       |   2 +-
 .../integer/data_reordering/__init__.py       |   2 +-
 .../simd128/integer/data_transfer/__init__.py |   2 +-
 .../insts/simd128/integer/logical/__init__.py |   2 +-
 .../save_and_restore_state/__init__.py        |   2 +-
 .../insts/simd128/integer/shift/__init__.py   |   2 +-
 src/arch/x86/isa/insts/simd64/__init__.py     |   2 +-
 .../insts/simd64/floating_point/__init__.py   |   2 +-
 .../floating_point/arithmetic/__init__.py     |   2 +-
 .../simd64/floating_point/compare/__init__.py |   2 +-
 .../x86/isa/insts/simd64/integer/__init__.py  |   2 +-
 .../simd64/integer/arithmetic/__init__.py     |   2 +-
 .../insts/simd64/integer/compare/__init__.py  |   2 +-
 .../integer/data_reordering/__init__.py       |   2 +-
 .../simd64/integer/data_transfer/__init__.py  |   2 +-
 .../insts/simd64/integer/logical/__init__.py  |   2 +-
 .../insts/simd64/integer/shift/__init__.py    |   2 +-
 src/arch/x86/isa/insts/system/__init__.py     |   2 +-
 src/arch/x86/isa/insts/x87/__init__.py        |   2 +-
 .../x86/isa/insts/x87/arithmetic/__init__.py  |   2 +-
 .../insts/x87/compare_and_test/__init__.py    |   2 +-
 .../x86/isa/insts/x87/control/__init__.py     |   2 +-
 .../data_transfer_and_conversion/__init__.py  |   2 +-
 .../isa/insts/x87/load_constants/__init__.py  |   2 +-
 .../insts/x87/stack_management/__init__.py    |   2 +-
 .../x87/transcendental_functions/__init__.py  |   2 +-
 src/cpu/BaseCPU.py                            |   8 +-
 src/cpu/testers/traffic_gen/BaseTrafficGen.py |   2 +-
 src/dev/Device.py                             |   2 +-
 src/dev/arm/GenericTimer.py                   |   2 +-
 src/dev/arm/RealView.py                       |   6 +-
 src/dev/arm/SMMUv3.py                         |   2 +-
 src/dev/arm/css/MHU.py                        |   2 +-
 src/mem/slicc/ast/ActionDeclAST.py            |   2 +-
 src/mem/slicc/ast/AssignStatementAST.py       |   2 +-
 .../slicc/ast/CheckAllocateStatementAST.py    |   2 +-
 src/mem/slicc/ast/CheckProbeStatementAST.py   |   2 +-
 src/mem/slicc/ast/DeclListAST.py              |   2 +-
 src/mem/slicc/ast/EnumDeclAST.py              |   8 +-
 src/mem/slicc/ast/EnumExprAST.py              |   2 +-
 src/mem/slicc/ast/ExprStatementAST.py         |   2 +-
 src/mem/slicc/ast/FormalParamAST.py           |  12 +-
 src/mem/slicc/ast/FuncCallExprAST.py          |  16 +--
 src/mem/slicc/ast/FuncDeclAST.py              |   6 +-
 src/mem/slicc/ast/IfStatementAST.py           |   2 +-
 src/mem/slicc/ast/InPortDeclAST.py            |   2 +-
 src/mem/slicc/ast/IsValidPtrExprAST.py        |   2 +-
 src/mem/slicc/ast/LiteralExprAST.py           |   4 +-
 src/mem/slicc/ast/LocalVariableAST.py         |   8 +-
 src/mem/slicc/ast/MachineAST.py               |  14 +-
 src/mem/slicc/ast/MemberExprAST.py            |   5 +-
 src/mem/slicc/ast/MethodCallExprAST.py        |   8 +-
 src/mem/slicc/ast/NewExprAST.py               |   2 +-
 src/mem/slicc/ast/ObjDeclAST.py               |  11 +-
 src/mem/slicc/ast/OperatorExprAST.py          |   6 +-
 src/mem/slicc/ast/OutPortDeclAST.py           |   2 +-
 src/mem/slicc/ast/PairAST.py                  |   2 +-
 src/mem/slicc/ast/PairListAST.py              |   2 +-
 src/mem/slicc/ast/ReturnStatementAST.py       |   2 +-
 src/mem/slicc/ast/StallAndWaitStatementAST.py |   2 +-
 src/mem/slicc/ast/StateDeclAST.py             |  10 +-
 src/mem/slicc/ast/StatementListAST.py         |   2 +-
 src/mem/slicc/ast/StaticCastAST.py            |   2 +-
 src/mem/slicc/ast/TransitionDeclAST.py        |   9 +-
 src/mem/slicc/ast/TypeDeclAST.py              |   6 +-
 src/mem/slicc/ast/TypeFieldEnumAST.py         |   4 +-
 src/mem/slicc/ast/TypeFieldStateAST.py        |   4 +-
 src/mem/slicc/ast/VarExprAST.py               |   2 +-
 src/mem/slicc/ast/WakeupPortStatementAST.py   |   2 +-
 src/mem/slicc/main.py                         |   2 +-
 src/mem/slicc/parser.py                       |   2 +-
 src/mem/slicc/symbols/Action.py               |   2 +-
 src/mem/slicc/symbols/Event.py                |   2 +-
 src/mem/slicc/symbols/Func.py                 |   9 +-
 src/mem/slicc/symbols/RequestType.py          |   2 +-
 src/mem/slicc/symbols/State.py                |   2 +-
 src/mem/slicc/symbols/StateMachine.py         | 117 ++++++++--------
 src/mem/slicc/symbols/Symbol.py               |   2 +-
 src/mem/slicc/symbols/SymbolTable.py          |   6 +-
 src/mem/slicc/symbols/Transition.py           |   2 +-
 src/mem/slicc/symbols/Type.py                 |  22 +--
 src/mem/slicc/symbols/Var.py                  |   2 +-
 src/mem/slicc/util.py                         |  10 +-
 .../gem5/components/boards/arm_board.py       |   2 +-
 .../boards/experimental/lupv_board.py         |   2 +-
 .../gem5/components/boards/riscv_board.py     |   2 +-
 .../riscvmatched/riscvmatched_board.py        |   2 +-
 src/python/gem5/resources/downloader.py       |  26 ++--
 src/python/gem5/simulate/exit_event.py        |   2 +-
 src/python/gem5/utils/filelock.py             |   4 +-
 .../gem5/utils/multiprocessing/context.py     |   2 +-
 src/python/gem5/utils/requires.py             |   2 +-
 src/python/importer.py                        |   2 +-
 src/python/m5/SimObject.py                    |  52 +++----
 src/python/m5/debug.py                        |   6 +-
 src/python/m5/event.py                        |   6 +-
 src/python/m5/ext/pyfdt/pyfdt.py              |  44 +++---
 src/python/m5/internal/params.py              |   2 +-
 src/python/m5/main.py                         |  18 +--
 src/python/m5/objects/__init__.py             |   2 +-
 src/python/m5/options.py                      |   4 +-
 src/python/m5/params.py                       | 130 ++++++++----------
 src/python/m5/proxy.py                        |   4 +-
 src/python/m5/simulate.py                     |  19 ++-
 src/python/m5/stats/__init__.py               |  23 ++--
 src/python/m5/ticks.py                        |   4 +-
 src/python/m5/util/__init__.py                |   2 +-
 src/python/m5/util/convert.py                 |  26 ++--
 src/python/m5/util/dot_writer.py              |   2 +-
 src/python/m5/util/dot_writer_ruby.py         |   2 +-
 src/python/m5/util/pybind.py                  |   7 +-
 src/systemc/tests/tlm/endian_conv/testall.py  |  19 +--
 src/systemc/tests/verify.py                   |  14 +-
 tests/configs/dram-lowp.py                    |   2 +-
 tests/gem5/arm-boot-tests/test_linux_boot.py  |   4 +-
 tests/gem5/configs/arm_boot_exit_run.py       |   4 +-
 tests/gem5/configs/boot_kvm_fork_run.py       |   4 +-
 tests/gem5/configs/boot_kvm_switch_exit.py    |   4 +-
 tests/gem5/configs/checkpoint.py              |   4 +-
 tests/gem5/configs/parsec_disk_run.py         |  16 +--
 tests/gem5/configs/riscv_boot_exit_run.py     |   2 +-
 tests/gem5/configs/switcheroo.py              |   4 +-
 tests/gem5/configs/x86_boot_exit_run.py       |   4 +-
 tests/gem5/cpu_tests/test.py                  |   4 +-
 tests/gem5/fixture.py                         |   6 +-
 .../gem5/kvm-fork-tests/test_kvm_fork_run.py  |   4 +-
 .../kvm-switch-tests/test_kvm_cpu_switch.py   |   4 +-
 .../run_replacement_policy_test.py            |   4 +-
 .../gem5/riscv-boot-tests/test_linux_boot.py  |   4 +-
 tests/gem5/suite.py                           |   4 +-
 tests/gem5/traffic_gen/simple_traffic_run.py  |   4 +-
 tests/gem5/verifier.py                        |   3 +-
 tests/gem5/x86-boot-tests/test_linux_boot.py  |   4 +-
 tests/run.py                                  |   6 +-
 util/checkpoint-tester.py                     |   4 +-
 util/cpt_upgrader.py                          |  31 ++---
 util/cpt_upgraders/arm-hdlcd-upgrade.py       |   2 +-
 util/cpt_upgraders/etherswitch.py             |   2 +-
 util/cpt_upgraders/isa-is-simobject.py        |   2 +-
 util/cpt_upgraders/process-fdmap-rename.py    |   4 +-
 util/decode_inst_dep_trace.py                 |  20 +--
 util/decode_packet_trace.py                   |  11 +-
 util/find_copyrights.py                       |  12 +-
 .../artifact/gem5art/artifact/_artifactdb.py  |   2 +-
 .../artifact/gem5art/artifact/artifact.py     |  10 +-
 util/gem5art/run/gem5art/run.py               |   6 +-
 util/gem5art/run/tests/test_run.py            |   2 +-
 util/gem5img.py                               |  18 +--
 util/gen_arm_fs_files.py                      |  22 +--
 util/git-pre-commit.py                        |   6 +-
 util/maint/list_changes.py                    |   4 +-
 util/maint/show_changes_by_file.py            |  10 +-
 util/minorview/model.py                       |  12 +-
 util/minorview/point.py                       |   4 +-
 util/o3-pipeview.py                           |   4 +-
 util/on-chip-network-power-area.py            |  12 +-
 util/oprofile-top.py                          |   4 +-
 util/plot_dram/PlotPowerStates.py             |   2 +-
 util/plot_dram/dram_sweep_plot.py             |   2 +-
 util/plot_dram/lowp_dram_sweep_plot.py        |   2 +-
 util/streamline/m5stats2streamline.py         |  13 +-
 util/style.py                                 |   5 +-
 util/style/region.py                          |  16 +--
 util/style/repo.py                            |  10 +-
 util/style/sort_includes.py                   |   4 +-
 util/style/verifiers.py                       |   4 +-
 util/update-copyright.py                      |   2 +-
 242 files changed, 814 insertions(+), 1002 deletions(-)

diff --git a/configs/common/CacheConfig.py b/configs/common/CacheConfig.py
index 63ffe6765c..7a191570e3 100644
--- a/configs/common/CacheConfig.py
+++ b/configs/common/CacheConfig.py
@@ -60,15 +60,15 @@ def _get_hwp(hwp_option):
 def _get_cache_opts(level, options):
     opts = {}
 
-    size_attr = "{}_size".format(level)
+    size_attr = f"{level}_size"
     if hasattr(options, size_attr):
         opts["size"] = getattr(options, size_attr)
 
-    assoc_attr = "{}_assoc".format(level)
+    assoc_attr = f"{level}_assoc"
     if hasattr(options, assoc_attr):
         opts["assoc"] = getattr(options, assoc_attr)
 
-    prefetcher_attr = "{}_hwp_type".format(level)
+    prefetcher_attr = f"{level}_hwp_type"
     if hasattr(options, prefetcher_attr):
         opts["prefetcher"] = _get_hwp(getattr(options, prefetcher_attr))
 
diff --git a/configs/common/FileSystemConfig.py b/configs/common/FileSystemConfig.py
index 066eb9a811..9c6647c861 100644
--- a/configs/common/FileSystemConfig.py
+++ b/configs/common/FileSystemConfig.py
@@ -51,7 +51,7 @@ from shutil import rmtree, copyfile
 
 def hex_mask(terms):
     dec_mask = reduce(operator.or_, [2**i for i in terms], 0)
-    return "%08x" % dec_mask
+    return f"{dec_mask:08x}"
 
 
 def file_append(path, contents):
@@ -252,13 +252,13 @@ def _redirect_paths(options):
     # Redirect filesystem syscalls from src to the first matching dests
     redirect_paths = [
         RedirectPath(
-            app_path="/proc", host_paths=["%s/fs/proc" % m5.options.outdir]
+            app_path="/proc", host_paths=[f"{m5.options.outdir}/fs/proc"]
         ),
         RedirectPath(
-            app_path="/sys", host_paths=["%s/fs/sys" % m5.options.outdir]
+            app_path="/sys", host_paths=[f"{m5.options.outdir}/fs/sys"]
         ),
         RedirectPath(
-            app_path="/tmp", host_paths=["%s/fs/tmp" % m5.options.outdir]
+            app_path="/tmp", host_paths=[f"{m5.options.outdir}/fs/tmp"]
         ),
     ]
 
@@ -275,7 +275,7 @@ def _redirect_paths(options):
     if chroot:
         redirect_paths.append(
             RedirectPath(
-                app_path="/", host_paths=["%s" % os.path.expanduser(chroot)]
+                app_path="/", host_paths=[f"{os.path.expanduser(chroot)}"]
             )
         )
 
diff --git a/configs/common/GPUTLBConfig.py b/configs/common/GPUTLBConfig.py
index b70d6c5516..e59cd00da4 100644
--- a/configs/common/GPUTLBConfig.py
+++ b/configs/common/GPUTLBConfig.py
@@ -204,8 +204,8 @@ def config_tlb_hierarchy(
             # add the different TLB levels to the system
             # Modify here if you want to make the TLB hierarchy a child of
             # the shader.
-            exec("system.%s = TLB_array" % system_TLB_name)
-            exec("system.%s = Coalescer_array" % system_Coalescer_name)
+            exec(f"system.{system_TLB_name} = TLB_array")
+            exec(f"system.{system_Coalescer_name} = Coalescer_array")
 
     # ===========================================================
     # Specify the TLB hierarchy (i.e., port connections)
diff --git a/configs/common/ObjectList.py b/configs/common/ObjectList.py
index ce529677e7..4b862db9e8 100644
--- a/configs/common/ObjectList.py
+++ b/configs/common/ObjectList.py
@@ -65,22 +65,18 @@ class ObjectList(object):
             sub_cls = self._sub_classes[real_name]
             return sub_cls
         except KeyError:
-            print(
-                "{} is not a valid sub-class of {}.".format(
-                    name, self.base_cls
-                )
-            )
+            print(f"{name} is not a valid sub-class of {self.base_cls}.")
             raise
 
     def print(self):
         """Print a list of available sub-classes and aliases."""
 
-        print("Available {} classes:".format(self.base_cls))
+        print(f"Available {self.base_cls} classes:")
         doc_wrapper = TextWrapper(
             initial_indent="\t\t", subsequent_indent="\t\t"
         )
         for name, cls in list(self._sub_classes.items()):
-            print("\t{}".format(name))
+            print(f"\t{name}")
 
             # Try to extract the class documentation from the class help
             # string.
@@ -92,7 +88,7 @@ class ObjectList(object):
         if self._aliases:
             print("\Aliases:")
             for alias, target in list(self._aliases.items()):
-                print("\t{} => {}".format(alias, target))
+                print(f"\t{alias} => {target}")
 
     def get_names(self):
         """Return a list of valid sub-class names and aliases."""
diff --git a/configs/common/Options.py b/configs/common/Options.py
index 5585a75b80..8344d9fd44 100644
--- a/configs/common/Options.py
+++ b/configs/common/Options.py
@@ -834,8 +834,7 @@ def addFSOptions(parser):
         action="store",
         type=str,
         dest="benchmark",
-        help="Specify the benchmark to run. Available benchmarks: %s"
-        % DefinedBenchmarks,
+        help=f"Specify the benchmark to run. Available benchmarks: {DefinedBenchmarks}",
     )
 
     # Metafile options
diff --git a/configs/common/Simulation.py b/configs/common/Simulation.py
index 731b3fcaa5..4377b65e64 100644
--- a/configs/common/Simulation.py
+++ b/configs/common/Simulation.py
@@ -71,7 +71,7 @@ def setCPUClass(options):
     TmpClass, test_mem_mode = getCPUClass(options.cpu_type)
     CPUClass = None
     if TmpClass.require_caches() and not options.caches and not options.ruby:
-        fatal("%s must be used with caches" % options.cpu_type)
+        fatal(f"{options.cpu_type} must be used with caches")
 
     if options.checkpoint_restore != None:
         if options.restore_with_cpu != options.cpu_type:
@@ -144,7 +144,7 @@ def findCptDir(options, cptdir, testsys):
                 fatal("Unable to find simpoint")
             inst += int(testsys.cpu[0].workload[0].simpoint)
 
-        checkpoint_dir = joinpath(cptdir, "cpt.%s.%s" % (options.bench, inst))
+        checkpoint_dir = joinpath(cptdir, f"cpt.{options.bench}.{inst}")
         if not exists(checkpoint_dir):
             fatal("Unable to find checkpoint directory %s", checkpoint_dir)
 
@@ -204,7 +204,7 @@ def findCptDir(options, cptdir, testsys):
             fatal("Checkpoint %d not found", cpt_num)
 
         cpt_starttick = int(cpts[cpt_num - 1])
-        checkpoint_dir = joinpath(cptdir, "cpt.%s" % cpts[cpt_num - 1])
+        checkpoint_dir = joinpath(cptdir, f"cpt.{cpts[cpt_num - 1]}")
 
     return cpt_starttick, checkpoint_dir
 
@@ -220,7 +220,7 @@ def scriptCheckpoints(options, maxtick, cptdir):
         print("Creating checkpoint at inst:%d" % (checkpoint_inst))
         exit_event = m5.simulate()
         exit_cause = exit_event.getCause()
-        print("exit cause = %s" % exit_cause)
+        print(f"exit cause = {exit_cause}")
 
         # skip checkpoint instructions should they exist
         while exit_cause == "checkpoint":
@@ -549,10 +549,10 @@ def run(options, root, testsys, cpu_class):
     if options.repeat_switch:
         switch_class = getCPUClass(options.cpu_type)[0]
         if switch_class.require_caches() and not options.caches:
-            print("%s: Must be used with caches" % str(switch_class))
+            print(f"{str(switch_class)}: Must be used with caches")
             sys.exit(1)
         if not switch_class.support_take_over():
-            print("%s: CPU switching not supported" % str(switch_class))
+            print(f"{str(switch_class)}: CPU switching not supported")
             sys.exit(1)
 
         repeat_switch_cpus = [
@@ -740,9 +740,9 @@ def run(options, root, testsys, cpu_class):
             )
             exit_event = m5.simulate()
         else:
-            print("Switch at curTick count:%s" % str(10000))
+            print(f"Switch at curTick count:{str(10000)}")
             exit_event = m5.simulate(10000)
-        print("Switched CPUS @ tick %s" % (m5.curTick()))
+        print(f"Switched CPUS @ tick {m5.curTick()}")
 
         m5.switchCpus(testsys, switch_cpu_list)
 
@@ -757,7 +757,7 @@ def run(options, root, testsys, cpu_class):
                 exit_event = m5.simulate()
             else:
                 exit_event = m5.simulate(options.standard_switch)
-            print("Switching CPUS @ tick %s" % (m5.curTick()))
+            print(f"Switching CPUS @ tick {m5.curTick()}")
             print(
                 "Simulation ends instruction count:%d"
                 % (testsys.switch_cpus_1[0].max_insts_any_thread)
diff --git a/configs/common/SysPaths.py b/configs/common/SysPaths.py
index 7c0f5bf59b..60375c30c5 100644
--- a/configs/common/SysPaths.py
+++ b/configs/common/SysPaths.py
@@ -73,9 +73,7 @@ class PathSearchFunc(object):
                 return next(p for p in paths if os.path.exists(p))
             except StopIteration:
                 raise IOError(
-                    "Can't find file '{}' on {}.".format(
-                        filepath, self.environment_variable
-                    )
+                    f"Can't find file '{filepath}' on {self.environment_variable}."
                 )
 
 
diff --git a/configs/common/cpu2000.py b/configs/common/cpu2000.py
index 3b1b390618..06f927cbcf 100644
--- a/configs/common/cpu2000.py
+++ b/configs/common/cpu2000.py
@@ -83,7 +83,7 @@ class Benchmark(object):
             self.args = []
 
         if not hasattr(self.__class__, "output"):
-            self.output = "%s.out" % self.name
+            self.output = f"{self.name}.out"
 
         if not hasattr(self.__class__, "simpoint"):
             self.simpoint = None
@@ -92,13 +92,12 @@ class Benchmark(object):
             func = getattr(self.__class__, input_set)
         except AttributeError:
             raise AttributeError(
-                "The benchmark %s does not have the %s input set"
-                % (self.name, input_set)
+                f"The benchmark {self.name} does not have the {input_set} input set"
             )
 
         executable = joinpath(spec_dist, "binaries", isa, os, self.binary)
         if not isfile(executable):
-            raise AttributeError("%s not found" % executable)
+            raise AttributeError(f"{executable} not found")
         self.executable = executable
 
         # root of tree for input & output data files
@@ -112,7 +111,7 @@ class Benchmark(object):
         self.input_set = input_set
 
         if not isdir(inputs_dir):
-            raise AttributeError("%s not found" % inputs_dir)
+            raise AttributeError(f"{inputs_dir} not found")
 
         self.inputs_dir = [inputs_dir]
         if isdir(all_dir):
@@ -121,12 +120,12 @@ class Benchmark(object):
             self.outputs_dir = outputs_dir
 
         if not hasattr(self.__class__, "stdin"):
-            self.stdin = joinpath(inputs_dir, "%s.in" % self.name)
+            self.stdin = joinpath(inputs_dir, f"{self.name}.in")
             if not isfile(self.stdin):
                 self.stdin = None
 
         if not hasattr(self.__class__, "stdout"):
-            self.stdout = joinpath(outputs_dir, "%s.out" % self.name)
+            self.stdout = joinpath(outputs_dir, f"{self.name}.out")
             if not isfile(self.stdout):
                 self.stdout = None
 
@@ -387,9 +386,9 @@ class mesa(Benchmark):
             "-frames",
             frames,
             "-meshfile",
-            "%s.in" % self.name,
+            f"{self.name}.in",
             "-ppmfile",
-            "%s.ppm" % self.name,
+            f"{self.name}.ppm",
         ]
 
     def test(self, isa, os):
@@ -876,34 +875,34 @@ class vortex(Benchmark):
         elif isa == "sparc" or isa == "sparc32":
             self.endian = "bendian"
         else:
-            raise AttributeError("unknown ISA %s" % isa)
+            raise AttributeError(f"unknown ISA {isa}")
 
         super(vortex, self).__init__(isa, os, input_set)
 
     def test(self, isa, os):
-        self.args = ["%s.raw" % self.endian]
+        self.args = [f"{self.endian}.raw"]
         self.output = "vortex.out"
 
     def train(self, isa, os):
-        self.args = ["%s.raw" % self.endian]
+        self.args = [f"{self.endian}.raw"]
         self.output = "vortex.out"
 
     def smred(self, isa, os):
-        self.args = ["%s.raw" % self.endian]
+        self.args = [f"{self.endian}.raw"]
         self.output = "vortex.out"
 
     def mdred(self, isa, os):
-        self.args = ["%s.raw" % self.endian]
+        self.args = [f"{self.endian}.raw"]
         self.output = "vortex.out"
 
     def lgred(self, isa, os):
-        self.args = ["%s.raw" % self.endian]
+        self.args = [f"{self.endian}.raw"]
         self.output = "vortex.out"
 
 
 class vortex1(vortex):
     def ref(self, isa, os):
-        self.args = ["%s1.raw" % self.endian]
+        self.args = [f"{self.endian}1.raw"]
         self.output = "vortex1.out"
         self.simpoint = 271 * 100e6
 
@@ -911,14 +910,14 @@ class vortex1(vortex):
 class vortex2(vortex):
     def ref(self, isa, os):
         self.simpoint = 1024 * 100e6
-        self.args = ["%s2.raw" % self.endian]
+        self.args = [f"{self.endian}2.raw"]
         self.output = "vortex2.out"
 
 
 class vortex3(vortex):
     def ref(self, isa, os):
         self.simpoint = 564 * 100e6
-        self.args = ["%s3.raw" % self.endian]
+        self.args = [f"{self.endian}3.raw"]
         self.output = "vortex3.out"
 
 
@@ -1031,8 +1030,8 @@ if __name__ == "__main__":
 
     for bench in all:
         for input_set in "ref", "test", "train":
-            print("class: %s" % bench.__name__)
+            print(f"class: {bench.__name__}")
             x = bench("x86", "linux", input_set)
-            print("%s: %s" % (x, input_set))
+            print(f"{x}: {input_set}")
             pprint(x.makeProcessArgs())
             print()
diff --git a/configs/deprecated/example/fs.py b/configs/deprecated/example/fs.py
index 59c35925fc..c50e3ac4cc 100644
--- a/configs/deprecated/example/fs.py
+++ b/configs/deprecated/example/fs.py
@@ -347,8 +347,8 @@ if args.benchmark:
     try:
         bm = Benchmarks[args.benchmark]
     except KeyError:
-        print("Error benchmark %s has not been defined." % args.benchmark)
-        print("Valid benchmarks are: %s" % DefinedBenchmarks)
+        print(f"Error benchmark {args.benchmark} has not been defined.")
+        print(f"Valid benchmarks are: {DefinedBenchmarks}")
         sys.exit(1)
 else:
     if args.dual:
@@ -433,7 +433,7 @@ if buildEnv["USE_ARM_ISA"] and not args.bare_metal and not args.dtb_filename:
         if hasattr(root, sysname):
             sys = getattr(root, sysname)
             sys.workload.dtb_filename = os.path.join(
-                m5.options.outdir, "%s.dtb" % sysname
+                m5.options.outdir, f"{sysname}.dtb"
             )
             sys.generateDtb(sys.workload.dtb_filename)
 
diff --git a/configs/deprecated/example/se.py b/configs/deprecated/example/se.py
index 4732839874..8d6735903f 100644
--- a/configs/deprecated/example/se.py
+++ b/configs/deprecated/example/se.py
@@ -159,8 +159,7 @@ if args.bench:
             multiprocesses.append(workload.makeProcess())
         except:
             print(
-                "Unable to find workload for %s: %s"
-                % (get_runtime_isa().name(), app),
+                f"Unable to find workload for {get_runtime_isa().name()}: {app}",
                 file=sys.stderr,
             )
             sys.exit(1)
diff --git a/configs/example/apu_se.py b/configs/example/apu_se.py
index c2b97fd82e..287135fd62 100644
--- a/configs/example/apu_se.py
+++ b/configs/example/apu_se.py
@@ -683,7 +683,7 @@ def find_path(base_list, rel_path, test):
         full_path = os.path.join(base, rel_path)
         if test(full_path):
             return full_path
-    fatal("%s not found in %s" % (rel_path, base_list))
+    fatal(f"{rel_path} not found in {base_list}")
 
 
 def find_file(base_list, rel_path):
@@ -717,7 +717,7 @@ else:
                 "/usr/lib/x86_64-linux-gnu",
             ]
         ),
-        "HOME=%s" % os.getenv("HOME", "/"),
+        f"HOME={os.getenv('HOME', '/')}",
         # Disable the VM fault handler signal creation for dGPUs also
         # forces the use of DefaultSignals instead of driver-controlled
         # InteruptSignals throughout the runtime.  DefaultSignals poll
@@ -922,14 +922,10 @@ else:
 
 redirect_paths = [
     RedirectPath(
-        app_path="/proc", host_paths=["%s/fs/proc" % m5.options.outdir]
-    ),
-    RedirectPath(
-        app_path="/sys", host_paths=["%s/fs/sys" % m5.options.outdir]
-    ),
-    RedirectPath(
-        app_path="/tmp", host_paths=["%s/fs/tmp" % m5.options.outdir]
+        app_path="/proc", host_paths=[f"{m5.options.outdir}/fs/proc"]
     ),
+    RedirectPath(app_path="/sys", host_paths=[f"{m5.options.outdir}/fs/sys"]),
+    RedirectPath(app_path="/tmp", host_paths=[f"{m5.options.outdir}/fs/tmp"]),
 ]
 
 system.redirect_paths = redirect_paths
@@ -981,7 +977,7 @@ exit_event = m5.simulate(maxtick)
 if args.fast_forward:
     if exit_event.getCause() == "a thread reached the max instruction count":
         m5.switchCpus(system, switch_cpu_list)
-        print("Switched CPUS @ tick %s" % (m5.curTick()))
+        print(f"Switched CPUS @ tick {m5.curTick()}")
         m5.stats.reset()
         exit_event = m5.simulate(maxtick - m5.curTick())
 elif args.fast_forward_pseudo_op:
@@ -992,7 +988,7 @@ elif args.fast_forward_pseudo_op:
             print("Dumping stats...")
             m5.stats.dump()
         m5.switchCpus(system, switch_cpu_list)
-        print("Switched CPUS @ tick %s" % (m5.curTick()))
+        print(f"Switched CPUS @ tick {m5.curTick()}")
         m5.stats.reset()
         # This lets us switch back and forth without keeping a counter
         switch_cpu_list = [(x[1], x[0]) for x in switch_cpu_list]
diff --git a/configs/example/arm/baremetal.py b/configs/example/arm/baremetal.py
index fc630e5299..0072c1d629 100644
--- a/configs/example/arm/baremetal.py
+++ b/configs/example/arm/baremetal.py
@@ -77,7 +77,7 @@ def create(args):
     """Create and configure the system object."""
 
     if args.readfile and not os.path.isfile(args.readfile):
-        print("Error: Bootscript %s does not exist" % args.readfile)
+        print(f"Error: Bootscript {args.readfile} does not exist")
         sys.exit(1)
 
     object_file = args.kernel if args.kernel else ""
@@ -149,7 +149,7 @@ def create(args):
 def run(args):
     cptdir = m5.options.outdir
     if args.checkpoint:
-        print("Checkpoint directory: %s" % cptdir)
+        print(f"Checkpoint directory: {cptdir}")
 
     while True:
         event = m5.simulate()
diff --git a/configs/example/arm/fs_bigLITTLE.py b/configs/example/arm/fs_bigLITTLE.py
index 060c51ec3c..401eb0c9e7 100644
--- a/configs/example/arm/fs_bigLITTLE.py
+++ b/configs/example/arm/fs_bigLITTLE.py
@@ -331,10 +331,10 @@ def build(options):
         "lpj=19988480",
         "norandmaps",
         "loglevel=8",
-        "mem=%s" % options.mem_size,
-        "root=%s" % options.root,
+        f"mem={options.mem_size}",
+        f"root={options.root}",
         "rw",
-        "init=%s" % options.kernel_init,
+        f"init={options.kernel_init}",
         "vmalloc=768MB",
     ]
 
diff --git a/configs/example/arm/fs_power.py b/configs/example/arm/fs_power.py
index 95d2182508..671cf63f2f 100644
--- a/configs/example/arm/fs_power.py
+++ b/configs/example/arm/fs_power.py
@@ -79,7 +79,7 @@ class L2PowerOn(MathExprPowerModel):
         # Example to report l2 Cache overallAccesses
         # The estimated power is converted to Watt and will vary based
         # on the size of the cache
-        self.dyn = "{}.overallAccesses * 0.000018000".format(l2_path)
+        self.dyn = f"{l2_path}.overallAccesses * 0.000018000"
         self.st = "(voltage * 3)/10"
 
 
diff --git a/configs/example/arm/ruby_fs.py b/configs/example/arm/ruby_fs.py
index fd36319363..67a8a6e0b3 100644
--- a/configs/example/arm/ruby_fs.py
+++ b/configs/example/arm/ruby_fs.py
@@ -100,7 +100,7 @@ def create(args):
     """Create and configure the system object."""
 
     if args.script and not os.path.isfile(args.script):
-        print("Error: Bootscript %s does not exist" % args.script)
+        print(f"Error: Bootscript {args.script} does not exist")
         sys.exit(1)
 
     cpu_class = cpu_types[args.cpu]
@@ -171,11 +171,11 @@ def create(args):
         # memory layout.
         "norandmaps",
         # Tell Linux where to find the root disk image.
-        "root=%s" % args.root_device,
+        f"root={args.root_device}",
         # Mount the root disk read-write by default.
         "rw",
         # Tell Linux about the amount of physical memory present.
-        "mem=%s" % args.mem_size,
+        f"mem={args.mem_size}",
     ]
     system.workload.command_line = " ".join(kernel_cmd)
 
@@ -185,7 +185,7 @@ def create(args):
 def run(args):
     cptdir = m5.options.outdir
     if args.checkpoint:
-        print("Checkpoint directory: %s" % cptdir)
+        print(f"Checkpoint directory: {cptdir}")
 
     while True:
         event = m5.simulate()
@@ -221,9 +221,7 @@ def main():
         "--root-device",
         type=str,
         default=default_root_device,
-        help="OS device name for root partition (default: {})".format(
-            default_root_device
-        ),
+        help=f"OS device name for root partition (default: {default_root_device})",
     )
     parser.add_argument(
         "--script", type=str, default="", help="Linux bootscript"
diff --git a/configs/example/arm/starter_fs.py b/configs/example/arm/starter_fs.py
index 7d7ab71768..48cbbdb3e6 100644
--- a/configs/example/arm/starter_fs.py
+++ b/configs/example/arm/starter_fs.py
@@ -88,7 +88,7 @@ def create(args):
     """Create and configure the system object."""
 
     if args.script and not os.path.isfile(args.script):
-        print("Error: Bootscript %s does not exist" % args.script)
+        print(f"Error: Bootscript {args.script} does not exist")
         sys.exit(1)
 
     cpu_class = cpu_types[args.cpu][0]
@@ -163,11 +163,11 @@ def create(args):
         # memory layout.
         "norandmaps",
         # Tell Linux where to find the root disk image.
-        "root=%s" % args.root_device,
+        f"root={args.root_device}",
         # Mount the root disk read-write by default.
         "rw",
         # Tell Linux about the amount of physical memory present.
-        "mem=%s" % args.mem_size,
+        f"mem={args.mem_size}",
     ]
     system.workload.command_line = " ".join(kernel_cmd)
 
@@ -177,7 +177,7 @@ def create(args):
 def run(args):
     cptdir = m5.options.outdir
     if args.checkpoint:
-        print("Checkpoint directory: %s" % cptdir)
+        print(f"Checkpoint directory: {cptdir}")
 
     while True:
         event = m5.simulate()
@@ -219,9 +219,7 @@ def main():
         "--root-device",
         type=str,
         default=default_root_device,
-        help="OS device name for root partition (default: {})".format(
-            default_root_device
-        ),
+        help=f"OS device name for root partition (default: {default_root_device})",
     )
     parser.add_argument(
         "--script", type=str, default="", help="Linux bootscript"
diff --git a/configs/example/gem5_library/x86-gapbs-benchmarks.py b/configs/example/gem5_library/x86-gapbs-benchmarks.py
index 6ab37479f9..b85ce6e7e8 100644
--- a/configs/example/gem5_library/x86-gapbs-benchmarks.py
+++ b/configs/example/gem5_library/x86-gapbs-benchmarks.py
@@ -195,9 +195,9 @@ if args.synthetic == "1":
         )
         exit(-1)
 
-    command = "./{} -g {}\n".format(args.benchmark, args.size)
+    command = f"./{args.benchmark} -g {args.size}\n"
 else:
-    command = "./{} -sf ../{}".format(args.benchmark, args.size)
+    command = f"./{args.benchmark} -sf ../{args.size}"
 
 board.set_kernel_disk_workload(
     # The x86 linux kernel will be automatically downloaded to the
@@ -262,7 +262,9 @@ print("Done with the simulation")
 print()
 print("Performance statistics:")
 
-print("Simulated time in ROI: %.2fs" % ((end_tick - start_tick) / 1e12))
+print(
+    f"Simulated time in ROI: {(end_tick - start_tick) / 1000000000000.0:.2f}s"
+)
 print(
     "Ran a total of", simulator.get_current_tick() / 1e12, "simulated seconds"
 )
diff --git a/configs/example/gem5_library/x86-npb-benchmarks.py b/configs/example/gem5_library/x86-npb-benchmarks.py
index ff363e449c..cffba5a294 100644
--- a/configs/example/gem5_library/x86-npb-benchmarks.py
+++ b/configs/example/gem5_library/x86-npb-benchmarks.py
@@ -195,7 +195,7 @@ board = X86Board(
 # properly.
 
 command = (
-    "/home/gem5/NPB3.3-OMP/bin/{}.{}.x;".format(args.benchmark, args.size)
+    f"/home/gem5/NPB3.3-OMP/bin/{args.benchmark}.{args.size}.x;"
     + "sleep 5;"
     + "m5 exit;"
 )
diff --git a/configs/example/gem5_library/x86-parsec-benchmarks.py b/configs/example/gem5_library/x86-parsec-benchmarks.py
index 190c0a0980..aaffec8edc 100644
--- a/configs/example/gem5_library/x86-parsec-benchmarks.py
+++ b/configs/example/gem5_library/x86-parsec-benchmarks.py
@@ -177,10 +177,7 @@ board = X86Board(
 command = (
     "cd /home/gem5/parsec-benchmark;".format(args.benchmark)
     + "source env.sh;"
-    + "parsecmgmt -a run -p {} -c gcc-hooks -i {} \
-        -n {};".format(
-        args.benchmark, args.size, "2"
-    )
+    + f"parsecmgmt -a run -p {args.benchmark} -c gcc-hooks -i {args.size}         -n 2;"
     + "sleep 5;"
     + "m5 exit;"
 )
diff --git a/configs/example/gem5_library/x86-spec-cpu2006-benchmarks.py b/configs/example/gem5_library/x86-spec-cpu2006-benchmarks.py
index 8f39f49e2e..a681ecadcb 100644
--- a/configs/example/gem5_library/x86-spec-cpu2006-benchmarks.py
+++ b/configs/example/gem5_library/x86-spec-cpu2006-benchmarks.py
@@ -179,7 +179,7 @@ if not os.path.exists(args.image):
     print(
         "https://gem5art.readthedocs.io/en/latest/tutorials/spec-tutorial.html"
     )
-    fatal("The disk-image is not found at {}".format(args.image))
+    fatal(f"The disk-image is not found at {args.image}")
 
 # Setting up all the fixed system parameters here
 # Caches: MESI Two Level Cache Hierarchy
@@ -252,7 +252,7 @@ except FileExistsError:
 # The runscript.sh file places `m5 exit` before and after the following command
 # Therefore, we only pass this command without m5 exit.
 
-command = "{} {} {}".format(args.benchmark, args.size, output_dir)
+command = f"{args.benchmark} {args.size} {output_dir}"
 
 board.set_kernel_disk_workload(
     # The x86 linux kernel will be automatically downloaded to the
diff --git a/configs/example/gem5_library/x86-spec-cpu2017-benchmarks.py b/configs/example/gem5_library/x86-spec-cpu2017-benchmarks.py
index c4af7f5dd9..531ce9413e 100644
--- a/configs/example/gem5_library/x86-spec-cpu2017-benchmarks.py
+++ b/configs/example/gem5_library/x86-spec-cpu2017-benchmarks.py
@@ -193,7 +193,7 @@ if not os.path.exists(args.image):
     print(
         "https://gem5art.readthedocs.io/en/latest/tutorials/spec-tutorial.html"
     )
-    fatal("The disk-image is not found at {}".format(args.image))
+    fatal(f"The disk-image is not found at {args.image}")
 
 # Setting up all the fixed system parameters here
 # Caches: MESI Two Level Cache Hierarchy
@@ -266,7 +266,7 @@ except FileExistsError:
 # The runscript.sh file places `m5 exit` before and after the following command
 # Therefore, we only pass this command without m5 exit.
 
-command = "{} {} {}".format(args.benchmark, args.size, output_dir)
+command = f"{args.benchmark} {args.size} {output_dir}"
 
 # For enabling CustomResource, we pass an additional parameter to mount the
 # correct partition.
diff --git a/configs/example/gpufs/DisjointNetwork.py b/configs/example/gpufs/DisjointNetwork.py
index 1d7f708967..1fbd0dcb15 100644
--- a/configs/example/gpufs/DisjointNetwork.py
+++ b/configs/example/gpufs/DisjointNetwork.py
@@ -48,7 +48,7 @@ class DisjointSimple(SimpleNetwork):
     def connectCPU(self, opts, controllers):
 
         # Setup parameters for makeTopology call for CPU network
-        topo_module = import_module("topologies.%s" % opts.cpu_topology)
+        topo_module = import_module(f"topologies.{opts.cpu_topology}")
         topo_class = getattr(topo_module, opts.cpu_topology)
         _topo = topo_class(controllers)
         _topo.makeTopology(opts, self, SimpleIntLink, SimpleExtLink, Switch)
@@ -58,7 +58,7 @@ class DisjointSimple(SimpleNetwork):
     def connectGPU(self, opts, controllers):
 
         # Setup parameters for makeTopology call for GPU network
-        topo_module = import_module("topologies.%s" % opts.gpu_topology)
+        topo_module = import_module(f"topologies.{opts.gpu_topology}")
         topo_class = getattr(topo_module, opts.gpu_topology)
         _topo = topo_class(controllers)
         _topo.makeTopology(opts, self, SimpleIntLink, SimpleExtLink, Switch)
@@ -84,7 +84,7 @@ class DisjointGarnet(GarnetNetwork):
     def connectCPU(self, opts, controllers):
 
         # Setup parameters for makeTopology call for CPU network
-        topo_module = import_module("topologies.%s" % opts.cpu_topology)
+        topo_module = import_module(f"topologies.{opts.cpu_topology}")
         topo_class = getattr(topo_module, opts.cpu_topology)
         _topo = topo_class(controllers)
         _topo.makeTopology(
@@ -96,7 +96,7 @@ class DisjointGarnet(GarnetNetwork):
     def connectGPU(self, opts, controllers):
 
         # Setup parameters for makeTopology call
-        topo_module = import_module("topologies.%s" % opts.gpu_topology)
+        topo_module = import_module(f"topologies.{opts.gpu_topology}")
         topo_class = getattr(topo_module, opts.gpu_topology)
         _topo = topo_class(controllers)
         _topo.makeTopology(
diff --git a/configs/example/gpufs/hip_cookbook.py b/configs/example/gpufs/hip_cookbook.py
index 1c22be52da..87c7547dd3 100644
--- a/configs/example/gpufs/hip_cookbook.py
+++ b/configs/example/gpufs/hip_cookbook.py
@@ -99,18 +99,16 @@ if __name__ == "__m5_main__":
 
     # Create temp script to run application
     if args.app is None:
-        print("No application given. Use %s -a <app>" % sys.argv[0])
+        print(f"No application given. Use {sys.argv[0]} -a <app>")
         sys.exit(1)
     elif args.kernel is None:
-        print("No kernel path given. Use %s --kernel <vmlinux>" % sys.argv[0])
+        print(f"No kernel path given. Use {sys.argv[0]} --kernel <vmlinux>")
         sys.exit(1)
     elif args.disk_image is None:
-        print("No disk path given. Use %s --disk-image <linux>" % sys.argv[0])
+        print(f"No disk path given. Use {sys.argv[0]} --disk-image <linux>")
         sys.exit(1)
     elif args.gpu_mmio_trace is None:
-        print(
-            "No MMIO trace path. Use %s --gpu-mmio-trace <path>" % sys.argv[0]
-        )
+        print(f"No MMIO trace path. Use {sys.argv[0]} --gpu-mmio-trace <path>")
         sys.exit(1)
 
     _, tempRunscript = tempfile.mkstemp()
diff --git a/configs/example/gpufs/hip_rodinia.py b/configs/example/gpufs/hip_rodinia.py
index a6c7c504c1..8ed951b55e 100644
--- a/configs/example/gpufs/hip_rodinia.py
+++ b/configs/example/gpufs/hip_rodinia.py
@@ -107,18 +107,16 @@ if __name__ == "__m5_main__":
 
     # Create temp script to run application
     if args.app is None:
-        print("No application given. Use %s -a <app>" % sys.argv[0])
+        print(f"No application given. Use {sys.argv[0]} -a <app>")
         sys.exit(1)
     elif args.kernel is None:
-        print("No kernel path given. Use %s --kernel <vmlinux>" % sys.argv[0])
+        print(f"No kernel path given. Use {sys.argv[0]} --kernel <vmlinux>")
         sys.exit(1)
     elif args.disk_image is None:
-        print("No disk path given. Use %s --disk-image <linux>" % sys.argv[0])
+        print(f"No disk path given. Use {sys.argv[0]} --disk-image <linux>")
         sys.exit(1)
     elif args.gpu_mmio_trace is None:
-        print(
-            "No MMIO trace path. Use %s --gpu-mmio-trace <path>" % sys.argv[0]
-        )
+        print(f"No MMIO trace path. Use {sys.argv[0]} --gpu-mmio-trace <path>")
         sys.exit(1)
 
     _, tempRunscript = tempfile.mkstemp()
diff --git a/configs/example/gpufs/hip_samples.py b/configs/example/gpufs/hip_samples.py
index 0d9263e128..ccc1719639 100644
--- a/configs/example/gpufs/hip_samples.py
+++ b/configs/example/gpufs/hip_samples.py
@@ -97,18 +97,16 @@ if __name__ == "__m5_main__":
 
     # Create temp script to run application
     if args.app is None:
-        print("No application given. Use %s -a <app>" % sys.argv[0])
+        print(f"No application given. Use {sys.argv[0]} -a <app>")
         sys.exit(1)
     elif args.kernel is None:
-        print("No kernel path given. Use %s --kernel <vmlinux>" % sys.argv[0])
+        print(f"No kernel path given. Use {sys.argv[0]} --kernel <vmlinux>")
         sys.exit(1)
     elif args.disk_image is None:
-        print("No disk path given. Use %s --disk-image <linux>" % sys.argv[0])
+        print(f"No disk path given. Use {sys.argv[0]} --disk-image <linux>")
         sys.exit(1)
     elif args.gpu_mmio_trace is None:
-        print(
-            "No MMIO trace path. Use %s --gpu-mmio-trace <path>" % sys.argv[0]
-        )
+        print(f"No MMIO trace path. Use {sys.argv[0]} --gpu-mmio-trace <path>")
         sys.exit(1)
 
     _, tempRunscript = tempfile.mkstemp()
diff --git a/configs/example/gpufs/runfs.py b/configs/example/gpufs/runfs.py
index 86b91034b0..4a28068a11 100644
--- a/configs/example/gpufs/runfs.py
+++ b/configs/example/gpufs/runfs.py
@@ -184,7 +184,7 @@ def runGpuFSSystem(args):
             break
         else:
             print(
-                "Unknown exit event: %s. Continuing..." % exit_event.getCause()
+                f"Unknown exit event: {exit_event.getCause()}. Continuing..."
             )
 
     print(
diff --git a/configs/example/gpufs/vega10_kvm.py b/configs/example/gpufs/vega10_kvm.py
index 48e2d69516..54253bece5 100644
--- a/configs/example/gpufs/vega10_kvm.py
+++ b/configs/example/gpufs/vega10_kvm.py
@@ -82,18 +82,16 @@ if __name__ == "__m5_main__":
 
     # Create temp script to run application
     if args.app is None:
-        print("No application given. Use %s -a <app>" % sys.argv[0])
+        print(f"No application given. Use {sys.argv[0]} -a <app>")
         sys.exit(1)
     elif args.kernel is None:
-        print("No kernel path given. Use %s --kernel <vmlinux>" % sys.argv[0])
+        print(f"No kernel path given. Use {sys.argv[0]} --kernel <vmlinux>")
         sys.exit(1)
     elif args.disk_image is None:
-        print("No disk path given. Use %s --disk-image <linux>" % sys.argv[0])
+        print(f"No disk path given. Use {sys.argv[0]} --disk-image <linux>")
         sys.exit(1)
     elif args.gpu_mmio_trace is None:
-        print(
-            "No MMIO trace path. Use %s --gpu-mmio-trace <path>" % sys.argv[0]
-        )
+        print(f"No MMIO trace path. Use {sys.argv[0]} --gpu-mmio-trace <path>")
         sys.exit(1)
     elif not os.path.isfile(args.app):
         print("Could not find applcation", args.app)
diff --git a/configs/example/hsaTopology.py b/configs/example/hsaTopology.py
index 691e8c2a58..909b9ef519 100644
--- a/configs/example/hsaTopology.py
+++ b/configs/example/hsaTopology.py
@@ -118,11 +118,11 @@ def createVegaTopology(options):
 
     # Populate CPU node properties
     node_prop = (
-        "cpu_cores_count %s\n" % options.num_cpus
+        f"cpu_cores_count {options.num_cpus}\n"
         + "simd_count 0\n"
         + "mem_banks_count 1\n"
         + "caches_count 0\n"
-        + "io_links_count %s\n" % io_links
+        + f"io_links_count {io_links}\n"
         + "cpu_core_id_base 0\n"
         + "simd_id_base 0\n"
         + "max_waves_per_simd 0\n"
@@ -200,8 +200,8 @@ def createVegaTopology(options):
         "cpu_cores_count 0\n"
         + "simd_count 256\n"
         + "mem_banks_count 1\n"
-        + "caches_count %s\n" % caches
-        + "io_links_count %s\n" % io_links
+        + f"caches_count {caches}\n"
+        + f"io_links_count {io_links}\n"
         + "cpu_core_id_base 0\n"
         + "simd_id_base 2147487744\n"
         + "max_waves_per_simd 10\n"
@@ -212,11 +212,11 @@ def createVegaTopology(options):
         + "simd_arrays_per_engine 1\n"
         + "cu_per_simd_array 16\n"
         + "simd_per_cu 4\n"
-        + "max_slots_scratch_cu %s\n" % cu_scratch
+        + f"max_slots_scratch_cu {cu_scratch}\n"
         + "vendor_id 4098\n"
         + "device_id 26720\n"
         + "location_id 1024\n"
-        + "drm_render_minor %s\n" % drm_num
+        + f"drm_render_minor {drm_num}\n"
         + "hive_id 0\n"
         + "num_sdma_engines 2\n"
         + "num_sdma_xgmi_engines 0\n"
@@ -313,11 +313,11 @@ def createFijiTopology(options):
 
     # Populate CPU node properties
     node_prop = (
-        "cpu_cores_count %s\n" % options.num_cpus
+        f"cpu_cores_count {options.num_cpus}\n"
         + "simd_count 0\n"
         + "mem_banks_count 1\n"
         + "caches_count 0\n"
-        + "io_links_count %s\n" % io_links
+        + f"io_links_count {io_links}\n"
         + "cpu_core_id_base 0\n"
         + "simd_id_base 0\n"
         + "max_waves_per_simd 0\n"
@@ -392,33 +392,30 @@ def createFijiTopology(options):
     # Populate GPU node properties
     node_prop = (
         "cpu_cores_count 0\n"
-        + "simd_count %s\n"
-        % (options.num_compute_units * options.simds_per_cu)
+        + f"simd_count {options.num_compute_units * options.simds_per_cu}\n"
         + "mem_banks_count 1\n"
-        + "caches_count %s\n" % caches
-        + "io_links_count %s\n" % io_links
+        + f"caches_count {caches}\n"
+        + f"io_links_count {io_links}\n"
         + "cpu_core_id_base 0\n"
         + "simd_id_base 2147487744\n"
-        + "max_waves_per_simd %s\n" % options.wfs_per_simd
-        + "lds_size_in_kb %s\n" % int(options.lds_size / 1024)
+        + f"max_waves_per_simd {options.wfs_per_simd}\n"
+        + f"lds_size_in_kb {int(options.lds_size / 1024)}\n"
         + "gds_size_in_kb 0\n"
-        + "wave_front_size %s\n" % options.wf_size
+        + f"wave_front_size {options.wf_size}\n"
         + "array_count 4\n"
-        + "simd_arrays_per_engine %s\n" % options.sa_per_complex
-        + "cu_per_simd_array %s\n" % options.cu_per_sa
-        + "simd_per_cu %s\n" % options.simds_per_cu
+        + f"simd_arrays_per_engine {options.sa_per_complex}\n"
+        + f"cu_per_simd_array {options.cu_per_sa}\n"
+        + f"simd_per_cu {options.simds_per_cu}\n"
         + "max_slots_scratch_cu 32\n"
         + "vendor_id 4098\n"
         + "device_id 29440\n"
         + "location_id 512\n"
-        + "drm_render_minor %s\n" % drm_num
-        + "max_engine_clk_fcompute %s\n"
-        % int(toFrequency(options.gpu_clock) / 1e6)
+        + f"drm_render_minor {drm_num}\n"
+        + f"max_engine_clk_fcompute {int(toFrequency(options.gpu_clock) / 1000000.0)}\n"
         + "local_mem_size 4294967296\n"
         + "fw_version 730\n"
         + "capability 4736\n"
-        + "max_engine_clk_ccompute %s\n"
-        % int(toFrequency(options.CPUClock) / 1e6)
+        + f"max_engine_clk_ccompute {int(toFrequency(options.CPUClock) / 1000000.0)}\n"
     )
 
     file_append((node_dir, "properties"), node_prop)
@@ -484,34 +481,31 @@ def createCarrizoTopology(options):
     # populate global node properties
     # NOTE: SIMD count triggers a valid GPU agent creation
     node_prop = (
-        "cpu_cores_count %s\n" % options.num_cpus
-        + "simd_count %s\n"
-        % (options.num_compute_units * options.simds_per_cu)
-        + "mem_banks_count %s\n" % mem_banks_cnt
+        f"cpu_cores_count {options.num_cpus}\n"
+        + f"simd_count {options.num_compute_units * options.simds_per_cu}\n"
+        + f"mem_banks_count {mem_banks_cnt}\n"
         + "caches_count 0\n"
         + "io_links_count 0\n"
         + "cpu_core_id_base 16\n"
         + "simd_id_base 2147483648\n"
-        + "max_waves_per_simd %s\n" % options.wfs_per_simd
-        + "lds_size_in_kb %s\n" % int(options.lds_size / 1024)
+        + f"max_waves_per_simd {options.wfs_per_simd}\n"
+        + f"lds_size_in_kb {int(options.lds_size / 1024)}\n"
         + "gds_size_in_kb 0\n"
-        + "wave_front_size %s\n" % options.wf_size
+        + f"wave_front_size {options.wf_size}\n"
         + "array_count 1\n"
-        + "simd_arrays_per_engine %s\n" % options.sa_per_complex
-        + "cu_per_simd_array %s\n" % options.cu_per_sa
-        + "simd_per_cu %s\n" % options.simds_per_cu
+        + f"simd_arrays_per_engine {options.sa_per_complex}\n"
+        + f"cu_per_simd_array {options.cu_per_sa}\n"
+        + f"simd_per_cu {options.simds_per_cu}\n"
         + "max_slots_scratch_cu 32\n"
         + "vendor_id 4098\n"
-        + "device_id %s\n" % device_id
+        + f"device_id {device_id}\n"
         + "location_id 8\n"
-        + "drm_render_minor %s\n" % drm_num
-        + "max_engine_clk_fcompute %s\n"
-        % int(toFrequency(options.gpu_clock) / 1e6)
+        + f"drm_render_minor {drm_num}\n"
+        + f"max_engine_clk_fcompute {int(toFrequency(options.gpu_clock) / 1000000.0)}\n"
         + "local_mem_size 0\n"
         + "fw_version 699\n"
         + "capability 4738\n"
-        + "max_engine_clk_ccompute %s\n"
-        % int(toFrequency(options.CPUClock) / 1e6)
+        + f"max_engine_clk_ccompute {int(toFrequency(options.CPUClock) / 1000000.0)}\n"
     )
 
     file_append((node_dir, "properties"), node_prop)
diff --git a/configs/example/lupv/run_lupv.py b/configs/example/lupv/run_lupv.py
index 0056cf8bb4..d92ea3fa3f 100644
--- a/configs/example/lupv/run_lupv.py
+++ b/configs/example/lupv/run_lupv.py
@@ -113,6 +113,4 @@ print("Beginning simulation!")
 
 exit_event = m5.simulate(args.max_ticks)
 
-print(
-    "Exiting @ tick {} because {}.".format(m5.curTick(), exit_event.getCause())
-)
+print(f"Exiting @ tick {m5.curTick()} because {exit_event.getCause()}.")
diff --git a/configs/example/memcheck.py b/configs/example/memcheck.py
index a50644b2b1..aee2ef74d0 100644
--- a/configs/example/memcheck.py
+++ b/configs/example/memcheck.py
@@ -330,7 +330,7 @@ def make_cache_level(ncaches, prototypes, level, next_cache):
 make_cache_level(cachespec, cache_proto, len(cachespec), None)
 
 # Connect the lowest level crossbar to the memory
-last_subsys = getattr(system, "l%dsubsys0" % len(cachespec))
+last_subsys = getattr(system, f"l{len(cachespec)}subsys0")
 last_subsys.xbar.mem_side_ports = system.physmem.port
 last_subsys.xbar.point_of_coherency = True
 
diff --git a/configs/example/memtest.py b/configs/example/memtest.py
index 58d762dc60..0cbbab5b4f 100644
--- a/configs/example/memtest.py
+++ b/configs/example/memtest.py
@@ -211,8 +211,7 @@ else:
 
     if numtesters(cachespec, testerspec) > block_size:
         print(
-            "Error: Limited to %s testers because of false sharing"
-            % (block_size)
+            f"Error: Limited to {block_size} testers because of false sharing"
         )
         sys.exit(1)
 
@@ -351,7 +350,7 @@ make_cache_level(cachespec, cache_proto, len(cachespec), None)
 
 # Connect the lowest level crossbar to the last-level cache and memory
 # controller
-last_subsys = getattr(system, "l%dsubsys0" % len(cachespec))
+last_subsys = getattr(system, f"l{len(cachespec)}subsys0")
 last_subsys.xbar.point_of_coherency = True
 if args.noncoherent_cache:
     system.llc = NoncoherentCache(
diff --git a/configs/example/read_config.py b/configs/example/read_config.py
index b52a73d1fa..40c20ef501 100644
--- a/configs/example/read_config.py
+++ b/configs/example/read_config.py
@@ -68,8 +68,7 @@ sim_object_classes_by_name = {
 
 def no_parser(cls, flags, param):
     raise Exception(
-        "Can't parse string: %s for parameter"
-        " class: %s" % (str(param), cls.__name__)
+        f"Can't parse string: {str(param)} for parameter class: {cls.__name__}"
     )
 
 
@@ -114,7 +113,7 @@ def memory_bandwidth_parser(cls, flags, param):
     value = 1.0 / float(param)
     # Convert to byte/s
     value = ticks.fromSeconds(value)
-    return cls("%fB/s" % value)
+    return cls(f"{value:f}B/s")
 
 
 # These parameters have trickier parsing from .ini files than might be
@@ -201,8 +200,7 @@ class ConfigManager(object):
 
         if object_type not in sim_object_classes_by_name:
             raise Exception(
-                "No SimObject type %s is available to"
-                " build: %s" % (object_type, object_name)
+                f"No SimObject type {object_type} is available to build: {object_name}"
             )
 
         object_class = sim_object_classes_by_name[object_type]
@@ -479,7 +477,7 @@ class ConfigIniFile(ConfigFile):
             if object_name == "root":
                 return child_name
             else:
-                return "%s.%s" % (object_name, child_name)
+                return f"{object_name}.{child_name}"
 
         return [(name, make_path(name)) for name in child_names]
 
diff --git a/configs/example/riscv/fs_linux.py b/configs/example/riscv/fs_linux.py
index 1a98126e92..aec126ab0d 100644
--- a/configs/example/riscv/fs_linux.py
+++ b/configs/example/riscv/fs_linux.py
@@ -91,7 +91,7 @@ from common import Options
 
 
 def generateMemNode(state, mem_range):
-    node = FdtNode("memory@%x" % int(mem_range.start))
+    node = FdtNode(f"memory@{int(mem_range.start):x}")
     node.append(FdtPropertyStrings("device_type", ["memory"]))
     node.append(
         FdtPropertyWords(
diff --git a/configs/example/sst/riscv_fs.py b/configs/example/sst/riscv_fs.py
index fb22f29190..fc8f8618c4 100644
--- a/configs/example/sst/riscv_fs.py
+++ b/configs/example/sst/riscv_fs.py
@@ -35,7 +35,7 @@ import argparse
 
 
 def generateMemNode(state, mem_range):
-    node = FdtNode("memory@%x" % int(mem_range.start))
+    node = FdtNode(f"memory@{int(mem_range.start):x}")
     node.append(FdtPropertyStrings("device_type", ["memory"]))
     node.append(
         FdtPropertyWords(
diff --git a/configs/learning_gem5/part1/caches.py b/configs/learning_gem5/part1/caches.py
index 9bb06ab2e6..3f7d26ed21 100644
--- a/configs/learning_gem5/part1/caches.py
+++ b/configs/learning_gem5/part1/caches.py
@@ -75,7 +75,7 @@ class L1ICache(L1Cache):
     size = "16kB"
 
     SimpleOpts.add_option(
-        "--l1i_size", help="L1 instruction cache size. Default: %s" % size
+        "--l1i_size", help=f"L1 instruction cache size. Default: {size}"
     )
 
     def __init__(self, opts=None):
@@ -96,7 +96,7 @@ class L1DCache(L1Cache):
     size = "64kB"
 
     SimpleOpts.add_option(
-        "--l1d_size", help="L1 data cache size. Default: %s" % size
+        "--l1d_size", help=f"L1 data cache size. Default: {size}"
     )
 
     def __init__(self, opts=None):
@@ -122,9 +122,7 @@ class L2Cache(Cache):
     mshrs = 20
     tgts_per_mshr = 12
 
-    SimpleOpts.add_option(
-        "--l2_size", help="L2 cache size. Default: %s" % size
-    )
+    SimpleOpts.add_option("--l2_size", help=f"L2 cache size. Default: {size}")
 
     def __init__(self, opts=None):
         super(L2Cache, self).__init__()
diff --git a/configs/learning_gem5/part3/ruby_test.py b/configs/learning_gem5/part3/ruby_test.py
index d0cc1be613..e46f07bb0a 100644
--- a/configs/learning_gem5/part3/ruby_test.py
+++ b/configs/learning_gem5/part3/ruby_test.py
@@ -78,6 +78,4 @@ m5.instantiate()
 
 print("Beginning simulation!")
 exit_event = m5.simulate()
-print(
-    "Exiting @ tick {} because {}".format(m5.curTick(), exit_event.getCause())
-)
+print(f"Exiting @ tick {m5.curTick()} because {exit_event.getCause()}")
diff --git a/configs/learning_gem5/part3/simple_ruby.py b/configs/learning_gem5/part3/simple_ruby.py
index b62a7195c8..f3f84353e8 100644
--- a/configs/learning_gem5/part3/simple_ruby.py
+++ b/configs/learning_gem5/part3/simple_ruby.py
@@ -110,6 +110,4 @@ m5.instantiate()
 
 print("Beginning simulation!")
 exit_event = m5.simulate()
-print(
-    "Exiting @ tick {} because {}".format(m5.curTick(), exit_event.getCause())
-)
+print(f"Exiting @ tick {m5.curTick()} because {exit_event.getCause()}")
diff --git a/configs/ruby/CHI.py b/configs/ruby/CHI.py
index df97b923ae..96537e558a 100644
--- a/configs/ruby/CHI.py
+++ b/configs/ruby/CHI.py
@@ -280,6 +280,6 @@ def create_system(
     elif options.topology in ["Crossbar", "Pt2Pt"]:
         topology = create_topology(network_cntrls, options)
     else:
-        m5.fatal("%s not supported!" % options.topology)
+        m5.fatal(f"{options.topology} not supported!")
 
     return (cpu_sequencers, mem_cntrls, topology)
diff --git a/configs/ruby/CHI_config.py b/configs/ruby/CHI_config.py
index 6d2084bc7b..4f2580c373 100644
--- a/configs/ruby/CHI_config.py
+++ b/configs/ruby/CHI_config.py
@@ -428,7 +428,7 @@ class CPUSequencerWrapper:
         cpu.icache_port = self.inst_seq.in_ports
         for p in cpu._cached_ports:
             if str(p) != "icache_port":
-                exec("cpu.%s = self.data_seq.in_ports" % p)
+                exec(f"cpu.{p} = self.data_seq.in_ports")
         cpu.connectUncachedPorts(
             self.data_seq.in_ports, self.data_seq.interrupt_out_port
         )
diff --git a/configs/ruby/Ruby.py b/configs/ruby/Ruby.py
index 3ca7b95140..d3c2efbb3f 100644
--- a/configs/ruby/Ruby.py
+++ b/configs/ruby/Ruby.py
@@ -120,8 +120,8 @@ def define_options(parser):
     )
 
     protocol = buildEnv["PROTOCOL"]
-    exec("from . import %s" % protocol)
-    eval("%s.define_options(parser)" % protocol)
+    exec(f"from . import {protocol}")
+    eval(f"{protocol}.define_options(parser)")
     Network.define_options(parser)
 
 
@@ -207,8 +207,8 @@ def create_topology(controllers, options):
     found in configs/topologies/BaseTopology.py
     This is a wrapper for the legacy topologies.
     """
-    exec("import topologies.%s as Topo" % options.topology)
-    topology = eval("Topo.%s(controllers)" % options.topology)
+    exec(f"import topologies.{options.topology} as Topo")
+    topology = eval(f"Topo.{options.topology}(controllers)")
     return topology
 
 
@@ -242,7 +242,7 @@ def create_system(
         cpus = system.cpu
 
     protocol = buildEnv["PROTOCOL"]
-    exec("from . import %s" % protocol)
+    exec(f"from . import {protocol}")
     try:
         (cpu_sequencers, dir_cntrls, topology) = eval(
             "%s.create_system(options, full_system, system, dma_ports,\
@@ -250,7 +250,7 @@ def create_system(
             % protocol
         )
     except:
-        print("Error: could not create sytem for ruby protocol %s" % protocol)
+        print(f"Error: could not create sytem for ruby protocol {protocol}")
         raise
 
     # Create the network topology
diff --git a/configs/topologies/CustomMesh.py b/configs/topologies/CustomMesh.py
index 088e4b9cfe..c62b39a9c2 100644
--- a/configs/topologies/CustomMesh.py
+++ b/configs/topologies/CustomMesh.py
@@ -325,9 +325,7 @@ class CustomMesh(SimpleTopology):
                 rni_io_params = check_same(type(n).NoC_Params, rni_io_params)
             else:
                 fatal(
-                    "topologies.CustomMesh: {} not supported".format(
-                        n.__class__.__name__
-                    )
+                    f"topologies.CustomMesh: {n.__class__.__name__} not supported"
                 )
 
         # Create all mesh routers
@@ -420,11 +418,11 @@ class CustomMesh(SimpleTopology):
             if pair_debug:
                 print(c.path())
                 for r in c.addr_ranges:
-                    print("%s" % r)
+                    print(f"{r}")
                 for p in c._pairing:
                     print("\t" + p.path())
                     for r in p.addr_ranges:
-                        print("\t%s" % r)
+                        print(f"\t{r}")
 
         # all must be paired
         for c in all_cache:
@@ -516,8 +514,8 @@ class CustomMesh(SimpleTopology):
                 assert len(c._pairing) == pairing_check
                 print(c.path())
                 for r in c.addr_ranges:
-                    print("%s" % r)
+                    print(f"{r}")
                 for p in c._pairing:
                     print("\t" + p.path())
                     for r in p.addr_ranges:
-                        print("\t%s" % r)
+                        print(f"\t{r}")
diff --git a/site_scons/gem5_scons/builders/switching_headers.py b/site_scons/gem5_scons/builders/switching_headers.py
index a56ab51c86..92bd613508 100755
--- a/site_scons/gem5_scons/builders/switching_headers.py
+++ b/site_scons/gem5_scons/builders/switching_headers.py
@@ -61,7 +61,7 @@ def SwitchingHeaders(env):
             os.path.realpath(dp), os.path.realpath(env["BUILDDIR"])
         )
         with open(path, "w") as hdr:
-            print('#include "%s/%s/%s"' % (dp, subdir, fp), file=hdr)
+            print(f'#include "{dp}/{subdir}/{fp}"', file=hdr)
 
     switching_header_action = MakeAction(
         build_switching_header, Transform("GENERATE")
diff --git a/site_scons/gem5_scons/configure.py b/site_scons/gem5_scons/configure.py
index 55a0d7d399..d04cdd49cb 100644
--- a/site_scons/gem5_scons/configure.py
+++ b/site_scons/gem5_scons/configure.py
@@ -46,7 +46,7 @@ import SCons.Util
 
 
 def CheckCxxFlag(context, flag, autoadd=True):
-    context.Message("Checking for compiler %s support... " % flag)
+    context.Message(f"Checking for compiler {flag} support... ")
     last_cxxflags = context.env["CXXFLAGS"]
     context.env.Append(CXXFLAGS=[flag])
     pre_werror = context.env["CXXFLAGS"]
@@ -60,7 +60,7 @@ def CheckCxxFlag(context, flag, autoadd=True):
 
 
 def CheckLinkFlag(context, flag, autoadd=True, set_for_shared=True):
-    context.Message("Checking for linker %s support... " % flag)
+    context.Message(f"Checking for linker {flag} support... ")
     last_linkflags = context.env["LINKFLAGS"]
     context.env.Append(LINKFLAGS=[flag])
     pre_werror = context.env["LINKFLAGS"]
@@ -78,7 +78,7 @@ def CheckLinkFlag(context, flag, autoadd=True, set_for_shared=True):
 
 # Add a custom Check function to test for structure members.
 def CheckMember(context, include, decl, member, include_quotes="<>"):
-    context.Message("Checking for member %s in %s..." % (member, decl))
+    context.Message(f"Checking for member {member} in {decl}...")
     text = """
 #include %(header)s
 int main(){
@@ -128,8 +128,8 @@ def CheckPkgConfig(context, pkgs, *args):
     assert pkgs
 
     for pkg in pkgs:
-        context.Message("Checking for pkg-config package %s... " % pkg)
-        ret = context.TryAction("pkg-config %s" % pkg)[0]
+        context.Message(f"Checking for pkg-config package {pkg}... ")
+        ret = context.TryAction(f"pkg-config {pkg}")[0]
         if not ret:
             context.Result(ret)
             continue
diff --git a/site_scons/site_tools/git.py b/site_scons/site_tools/git.py
index b47de77612..362c20b105 100644
--- a/site_scons/site_tools/git.py
+++ b/site_scons/site_tools/git.py
@@ -63,7 +63,7 @@ def install_style_hooks(env):
             ).strip("\n")
         )
     except Exception as e:
-        print("Warning: Failed to find git repo directory: %s" % e)
+        print(f"Warning: Failed to find git repo directory: {e}")
         return
 
     git_hooks = gitdir.Dir("hooks")
diff --git a/src/arch/arm/ArmSystem.py b/src/arch/arm/ArmSystem.py
index 25cf8b2172..7367d80eec 100644
--- a/src/arch/arm/ArmSystem.py
+++ b/src/arch/arm/ArmSystem.py
@@ -262,7 +262,7 @@ class ArmSystem(System):
         # root instead of appended.
 
         def generateMemNode(mem_range):
-            node = FdtNode("memory@%x" % int(mem_range.start))
+            node = FdtNode(f"memory@{int(mem_range.start):x}")
             node.append(FdtPropertyStrings("device_type", ["memory"]))
             node.append(
                 FdtPropertyWords(
diff --git a/src/arch/arm/fastmodel/FastModel.py b/src/arch/arm/fastmodel/FastModel.py
index 1ea3c5e8d7..8ba537623a 100644
--- a/src/arch/arm/fastmodel/FastModel.py
+++ b/src/arch/arm/fastmodel/FastModel.py
@@ -39,11 +39,11 @@ def AMBA_INITIATOR_ROLE(width):
 
 
 def SC_REQUEST_PORT_ROLE(port_type):
-    return "SC REQUEST PORT for %s" % port_type
+    return f"SC REQUEST PORT for {port_type}"
 
 
 def SC_RESPONSE_PORT_ROLE(port_type):
-    return "SC RESPONSE PORT for %s" % port_type
+    return f"SC RESPONSE PORT for {port_type}"
 
 
 class AmbaTargetSocket(Port):
diff --git a/src/arch/arm/fastmodel/arm_fast_model.py b/src/arch/arm/fastmodel/arm_fast_model.py
index 81b2cfe04b..5a38eb132b 100644
--- a/src/arch/arm/fastmodel/arm_fast_model.py
+++ b/src/arch/arm/fastmodel/arm_fast_model.py
@@ -64,13 +64,13 @@ def check_armlmd_license(timeout):
 
     for server in servers:
         if os.path.exists(server):
-            logging.debug("License file %s exists." % server)
+            logging.debug(f"License file {server} exists.")
             break
 
         tuple = server.split("@")
         if len(tuple) != 2:
             # Probably not a server, and we know the file doesn't exist.
-            logging.debug('License file "%s" does not exist.' % server)
+            logging.debug(f'License file "{server}" does not exist.')
             continue
 
         try:
@@ -80,17 +80,15 @@ def check_armlmd_license(timeout):
                 (tuple[1], int(tuple[0])), timeout=timeout
             )
             s.close()
-            logging.debug("License server %s is reachable." % server)
+            logging.debug(f"License server {server} is reachable.")
             break
         except Exception as e:
             logging.debug(
-                "Cannot connect to license server %s (%s: %s)."
-                % (server, type(e).__name__, e)
+                f"Cannot connect to license server {server} ({type(e).__name__}: {e})."
             )
     else:
         raise ConnectionError(
-            "Cannot connect to any of the license servers (%s)."
-            % ", ".join(servers)
+            f"Cannot connect to any of the license servers ({', '.join(servers)})."
         )
 
 
diff --git a/src/arch/isa_parser/isa_parser.py b/src/arch/isa_parser/isa_parser.py
index 39b50f06b6..0f29840c3b 100755
--- a/src/arch/isa_parser/isa_parser.py
+++ b/src/arch/isa_parser/isa_parser.py
@@ -206,13 +206,11 @@ class Format(object):
         label = "def format " + id
         self.user_code = compile(fixPythonIndentation(code), label, "exec")
         param_list = ", ".join(params)
-        f = (
-            """def defInst(_code, _context, %s):
+        f = f"""def defInst(_code, _context, {param_list}):
                 my_locals = vars().copy()
                 exec(_code, _context, my_locals)
-                return my_locals\n"""
-            % param_list
-        )
+                return my_locals
+"""
         c = compile(f, label + " wrapper", "exec")
         exec(c, globals())
         self.func = defInst
@@ -230,7 +228,7 @@ class Format(object):
         except Exception as exc:
             if debug:
                 raise
-            error(lineno, 'error defining "%s": %s.' % (name, exc))
+            error(lineno, f'error defining "{name}": {exc}.')
         for k in list(vars.keys()):
             if k not in (
                 "header_output",
@@ -250,7 +248,7 @@ class NoFormat(object):
 
     def defineInst(self, parser, name, args, lineno):
         error(
-            lineno, 'instruction definition "%s" with no active format!' % name
+            lineno, f'instruction definition "{name}" with no active format!'
         )
 
 
@@ -606,7 +604,7 @@ class ISAParser(Grammar):
             if section == "header":
                 file = "decoder.hh"
             else:
-                file = "%s.cc" % section
+                file = f"{section}.cc"
             filename = self.suffixize(file, section)
         try:
             return self.files[filename]
@@ -652,7 +650,7 @@ class ISAParser(Grammar):
             )
             fn = "decoder-g.hh.inc"
             assert fn in self.files
-            f.write('#include "%s"\n' % fn)
+            f.write(f'#include "{fn}"\n')
 
             fn = "decoder-ns.hh.inc"
             assert fn in self.files
@@ -663,26 +661,25 @@ class ISAParser(Grammar):
             )
             f.write("} // namespace gem5")
             f.write(
-                "\n#endif  // __ARCH_%s_GENERATED_DECODER_HH__\n"
-                % self.isa_name.upper()
+                f"\n#endif  // __ARCH_{self.isa_name.upper()}_GENERATED_DECODER_HH__\n"
             )
 
         # decoder method - cannot be split
         file = "decoder.cc"
         with self.open(file) as f:
             fn = "base/compiler.hh"
-            f.write('#include "%s"\n' % fn)
+            f.write(f'#include "{fn}"\n')
 
             fn = "decoder-g.cc.inc"
             assert fn in self.files
-            f.write('#include "%s"\n' % fn)
+            f.write(f'#include "{fn}"\n')
 
             fn = "decoder.hh"
-            f.write('#include "%s"\n' % fn)
+            f.write(f'#include "{fn}"\n')
 
             fn = "decode-method.cc.inc"
             # is guaranteed to have been written for parse to complete
-            f.write('#include "%s"\n' % fn)
+            f.write(f'#include "{fn}"\n')
 
         extn = re.compile("(\.[^\.]+)$")
 
@@ -697,10 +694,10 @@ class ISAParser(Grammar):
             with self.open(file) as f:
                 fn = "decoder-g.cc.inc"
                 assert fn in self.files
-                f.write('#include "%s"\n' % fn)
+                f.write(f'#include "{fn}"\n')
 
                 fn = "decoder.hh"
-                f.write('#include "%s"\n' % fn)
+                f.write(f'#include "{fn}"\n')
 
                 fn = "decoder-ns.cc.inc"
                 assert fn in self.files
@@ -708,7 +705,7 @@ class ISAParser(Grammar):
                 print("namespace %s {" % self.namespace, file=f)
                 if splits > 1:
                     print("#define __SPLIT %u" % i, file=f)
-                print('#include "%s"' % fn, file=f)
+                print(f'#include "{fn}"', file=f)
                 print("} // namespace %s" % self.namespace, file=f)
                 print("} // namespace gem5", file=f)
 
@@ -721,7 +718,7 @@ class ISAParser(Grammar):
             with self.open(file) as f:
                 fn = "exec-g.cc.inc"
                 assert fn in self.files
-                f.write('#include "%s"\n' % fn)
+                f.write(f'#include "{fn}"\n')
                 f.write('#include "cpu/exec_context.hh"\n')
                 f.write('#include "decoder.hh"\n')
 
@@ -731,7 +728,7 @@ class ISAParser(Grammar):
                 print("namespace %s {" % self.namespace, file=f)
                 if splits > 1:
                     print("#define __SPLIT %u" % i, file=f)
-                print('#include "%s"' % fn, file=f)
+                print(f'#include "{fn}"', file=f)
                 print("} // namespace %s" % self.namespace, file=f)
                 print("} // namespace gem5", file=f)
 
@@ -847,7 +844,7 @@ class ISAParser(Grammar):
         try:
             t.value = int(t.value, 0)
         except ValueError:
-            error(t.lexer.lineno, 'Integer value "%s" too large' % t.value)
+            error(t.lexer.lineno, f'Integer value "{t.value}" too large')
             t.value = 0
         return t
 
@@ -902,7 +899,7 @@ class ISAParser(Grammar):
 
     # Error handler
     def t_error(self, t):
-        error(t.lexer.lineno, "illegal character '%s'" % t.value[0])
+        error(t.lexer.lineno, f"illegal character '{t.value[0]}'")
         t.skip(1)
 
     #####################################################################
@@ -1060,7 +1057,7 @@ del wrap
             traceback.print_exc(file=sys.stdout)
             if debug:
                 raise
-            error(t.lineno(1), "In global let block: %s" % exc)
+            error(t.lineno(1), f"In global let block: {exc}")
         GenCode(
             self,
             header_output=self.exportContext["header_output"],
@@ -1078,7 +1075,7 @@ del wrap
         except Exception as exc:
             if debug:
                 raise
-            error(t.lineno(1), "In def operand_types: %s" % exc)
+            error(t.lineno(1), f"In def operand_types: {exc}")
 
     # Define the mapping from operand names to operand classes and
     # other traits.  Stored in operandNameMap.
@@ -1094,7 +1091,7 @@ del wrap
         except Exception as exc:
             if debug:
                 raise
-            error(t.lineno(1), "In def operands: %s" % exc)
+            error(t.lineno(1), f"In def operands: {exc}")
         self.buildOperandNameMap(user_dict, t.lexer.lineno)
 
     # A bitfield definition looks like:
@@ -1105,7 +1102,7 @@ del wrap
         expr = "bits(machInst, %2d, %2d)" % (t[6], t[8])
         if t[2] == "signed":
             expr = "sext<%d>(%s)" % (t[6] - t[8] + 1, expr)
-        hash_define = "#undef %s\n#define %s\t%s\n" % (t[4], t[4], expr)
+        hash_define = f"#undef {t[4]}\n#define {t[4]}\t{expr}\n"
         GenCode(self, header_output=hash_define).emit()
 
     # alternate form for single bit: 'def [signed] bitfield <ID> [<bit>]'
@@ -1114,7 +1111,7 @@ del wrap
         expr = "bits(machInst, %2d, %2d)" % (t[6], t[6])
         if t[2] == "signed":
             expr = "sext<%d>(%s)" % (1, expr)
-        hash_define = "#undef %s\n#define %s\t%s\n" % (t[4], t[4], expr)
+        hash_define = f"#undef {t[4]}\n#define {t[4]}\t{expr}\n"
         GenCode(self, header_output=hash_define).emit()
 
     # alternate form for structure member: 'def bitfield <ID> <ID>'
@@ -1124,8 +1121,8 @@ del wrap
             error(
                 t.lineno(1), "error: structure bitfields are always unsigned."
             )
-        expr = "machInst.%s" % t[5]
-        hash_define = "#undef %s\n#define %s\t%s\n" % (t[4], t[4], expr)
+        expr = f"machInst.{t[5]}"
+        hash_define = f"#undef {t[4]}\n#define {t[4]}\t{expr}\n"
         GenCode(self, header_output=hash_define).emit()
 
     def p_id_with_dot_0(self, t):
@@ -1147,7 +1144,7 @@ del wrap
     def p_def_template(self, t):
         "def_template : DEF TEMPLATE ID CODELIT SEMI"
         if t[3] in self.templateMap:
-            print("warning: template %s already defined" % t[3])
+            print(f"warning: template {t[3]} already defined")
         self.templateMap[t[3]] = Template(self, t[4])
 
     # An instruction format definition looks like
@@ -1326,9 +1323,9 @@ StaticInstPtr
         "push_format_id : ID"
         try:
             self.formatStack.push(self.formatMap[t[1]])
-            t[0] = ("", "// format %s" % t[1])
+            t[0] = ("", f"// format {t[1]}")
         except KeyError:
-            error(t.lineno(1), 'instruction format "%s" not defined.' % t[1])
+            error(t.lineno(1), f'instruction format "{t[1]}" not defined.')
 
     # Nested decode block: if the value of the current field matches
     # the specified constant(s), do a nested decode on some other field.
@@ -1339,7 +1336,7 @@ StaticInstPtr
         # just wrap the decoding code from the block as a case in the
         # outer switch statement.
         codeObj.wrap_decode_block(
-            "\n%s\n" % "".join(case_list), "GEM5_UNREACHABLE;\n"
+            f"\n{''.join(case_list)}\n", "GEM5_UNREACHABLE;\n"
         )
         codeObj.has_decode_default = case_list == ["default:"]
         t[0] = codeObj
@@ -1349,7 +1346,7 @@ StaticInstPtr
         "decode_stmt : case_list COLON inst SEMI"
         case_list = t[1]
         codeObj = t[3]
-        codeObj.wrap_decode_block("\n%s" % "".join(case_list), "break;\n")
+        codeObj.wrap_decode_block(f"\n{''.join(case_list)}", "break;\n")
         codeObj.has_decode_default = case_list == ["default:"]
         t[0] = codeObj
 
@@ -1368,7 +1365,7 @@ StaticInstPtr
             return "case %#x: " % lit
 
     def prep_str_lit_case_label(self, lit):
-        return "case %s: " % lit
+        return f"case {lit}: "
 
     def p_case_list_1(self, t):
         "case_list : INTLIT"
@@ -1399,7 +1396,7 @@ StaticInstPtr
         args = ",".join(list(map(str, t[3])))
         args = re.sub("(?m)^", "//", args)
         args = re.sub("^//", "", args)
-        comment = "\n// %s::%s(%s)\n" % (currentFormat.id, t[1], args)
+        comment = f"\n// {currentFormat.id}::{t[1]}({args})\n"
         codeObj.prepend_all(comment)
         t[0] = codeObj
 
@@ -1410,10 +1407,10 @@ StaticInstPtr
         try:
             format = self.formatMap[t[1]]
         except KeyError:
-            error(t.lineno(1), 'instruction format "%s" not defined.' % t[1])
+            error(t.lineno(1), f'instruction format "{t[1]}" not defined.')
 
         codeObj = format.defineInst(self, t[3], t[5], t.lexer.lineno)
-        comment = "\n// %s::%s(%s)\n" % (t[1], t[3], t[5])
+        comment = f"\n// {t[1]}::{t[3]}({t[5]})\n"
         codeObj.prepend_all(comment)
         t[0] = codeObj
 
@@ -1503,7 +1500,7 @@ StaticInstPtr
     # t.value)
     def p_error(self, t):
         if t:
-            error(t.lexer.lineno, "syntax error at '%s'" % t.value)
+            error(t.lexer.lineno, f"syntax error at '{t.value}'")
         else:
             error("unknown syntax error")
 
@@ -1523,7 +1520,7 @@ StaticInstPtr
 
         # make sure we haven't already defined this one
         if id in self.formatMap:
-            error(lineno, "format %s redefined." % id)
+            error(lineno, f"format {id} redefined.")
 
         # create new object and store in global map
         self.formatMap[id] = Format(id, params, code)
@@ -1641,7 +1638,7 @@ StaticInstPtr
         try:
             contents = open(filename).read()
         except IOError:
-            error('Error including file "%s"' % filename)
+            error(f'Error including file "{filename}"')
 
         self.fileNameStack.push(LineTracker(filename))
 
@@ -1691,7 +1688,7 @@ StaticInstPtr
             self._parse_isa_desc(*args, **kwargs)
         except ISAParserError as e:
             print(backtrace(self.fileNameStack))
-            print("At %s:" % e.lineno)
+            print(f"At {e.lineno}:")
             print(e)
             sys.exit(1)
 
diff --git a/src/arch/isa_parser/operand_list.py b/src/arch/isa_parser/operand_list.py
index 8df36c711b..5741a52324 100755
--- a/src/arch/isa_parser/operand_list.py
+++ b/src/arch/isa_parser/operand_list.py
@@ -205,8 +205,7 @@ class SubOperandList(OperandList):
             op_desc = requestor_list.find_base(op_base)
             if not op_desc:
                 error(
-                    "Found operand %s which is not in the requestor list!"
-                    % op_base
+                    f"Found operand {op_base} which is not in the requestor list!"
                 )
             else:
                 # See if we've already found this operand
diff --git a/src/arch/isa_parser/operand_types.py b/src/arch/isa_parser/operand_types.py
index 4786f88774..174a54cd4c 100755
--- a/src/arch/isa_parser/operand_types.py
+++ b/src/arch/isa_parser/operand_types.py
@@ -286,16 +286,16 @@ class VecRegOperand(RegOperand):
         else:
             ext = dflt_elem_ext
         ctype = self.parser.operandTypeMap[ext]
-        return "\n\t%s %s = 0;" % (ctype, elem_name)
+        return f"\n\t{ctype} {elem_name} = 0;"
 
     def makeDecl(self):
         if not self.is_dest and self.is_src:
-            c_decl = "\t/* Vars for %s*/" % (self.base_name)
+            c_decl = f"\t/* Vars for {self.base_name}*/"
             if hasattr(self, "active_elems"):
                 if self.active_elems:
                     for elem in self.active_elems:
                         c_decl += self.makeDeclElem(elem)
-            return c_decl + "\t/* End vars for %s */\n" % (self.base_name)
+            return c_decl + f"\t/* End vars for {self.base_name} */\n"
         else:
             return ""
 
@@ -308,12 +308,7 @@ class VecRegOperand(RegOperand):
         else:
             ext = dflt_elem_ext
         ctype = self.parser.operandTypeMap[ext]
-        c_read = "\t\t%s& %s = %s[%s];\n" % (
-            ctype,
-            elem_name,
-            self.base_name,
-            elem_spec,
-        )
+        c_read = f"\t\t{ctype}& {elem_name} = {self.base_name}[{elem_spec}];\n"
         return c_read
 
     def makeReadW(self):
@@ -346,7 +341,7 @@ class VecRegOperand(RegOperand):
         else:
             ext = dflt_elem_ext
         ctype = self.parser.operandTypeMap[ext]
-        c_read = "\t\t%s = %s[%s];\n" % (elem_name, name, elem_spec)
+        c_read = f"\t\t{elem_name} = {name}[{elem_spec}];\n"
         return c_read
 
     def makeRead(self):
@@ -610,10 +605,7 @@ class PCStateOperand(Operand):
     def makeWrite(self):
         if self.reg_spec:
             # A component of the PC state.
-            return "__parserAutoPCState.%s(%s);\n" % (
-                self.reg_spec,
-                self.base_name,
-            )
+            return f"__parserAutoPCState.{self.reg_spec}({self.base_name});\n"
         else:
             # The whole PC state itself.
             return f"xc->pcState({self.base_name});\n"
@@ -624,7 +616,7 @@ class PCStateOperand(Operand):
             ctype = self.ctype
         # Note that initializations in the declarations are solely
         # to avoid 'uninitialized variable' errors from the compiler.
-        return "%s %s = 0;\n" % (ctype, self.base_name)
+        return f"{ctype} {self.base_name} = 0;\n"
 
     def isPCState(self):
         return 1
diff --git a/src/arch/micro_asm.py b/src/arch/micro_asm.py
index 5b4378881e..1c2183c07a 100644
--- a/src/arch/micro_asm.py
+++ b/src/arch/micro_asm.py
@@ -56,9 +56,9 @@ class MicroContainer:
         self.microops.append(microop)
 
     def __str__(self):
-        string = "%s:\n" % self.name
+        string = f"{self.name}:\n"
         for microop in self.microops:
-            string += "  %s\n" % microop
+            string += f"  {microop}\n"
         return string
 
 
@@ -72,7 +72,7 @@ class RomMacroop:
         self.target = target
 
     def __str__(self):
-        return "%s: %s\n" % (self.name, self.target)
+        return f"{self.name}: {self.target}\n"
 
 
 class Rom(MicroContainer):
@@ -130,29 +130,26 @@ class Directive(Statement):
 
 def print_error(message):
     print()
-    print("*** %s" % message)
+    print(f"*** {message}")
     print()
 
 
 def handle_statement(parser, container, statement):
     if statement.is_microop:
         if statement.mnemonic not in parser.microops.keys():
-            raise Exception(
-                "Unrecognized mnemonic: {}".format(statement.mnemonic)
-            )
+            raise Exception(f"Unrecognized mnemonic: {statement.mnemonic}")
         parser.symbols[
             "__microopClassFromInsideTheAssembler"
         ] = parser.microops[statement.mnemonic]
         try:
             microop = eval(
-                "__microopClassFromInsideTheAssembler(%s)" % statement.params,
+                f"__microopClassFromInsideTheAssembler({statement.params})",
                 {},
                 parser.symbols,
             )
         except:
             print_error(
-                "Error creating microop object with mnemonic %s."
-                % statement.mnemonic
+                f"Error creating microop object with mnemonic {statement.mnemonic}."
             )
             raise
         try:
@@ -166,16 +163,13 @@ def handle_statement(parser, container, statement):
             raise
     elif statement.is_directive:
         if statement.name not in container.directives.keys():
-            raise Exception(
-                "Unrecognized directive: {}".format(statement.name)
-            )
+            raise Exception(f"Unrecognized directive: {statement.name}")
         parser.symbols[
             "__directiveFunctionFromInsideTheAssembler"
         ] = container.directives[statement.name]
         try:
             eval(
-                "__directiveFunctionFromInsideTheAssembler(%s)"
-                % statement.params,
+                f"__directiveFunctionFromInsideTheAssembler({statement.params})",
                 {},
                 parser.symbols,
             )
@@ -184,9 +178,7 @@ def handle_statement(parser, container, statement):
             print(container.directives)
             raise
     else:
-        raise Exception(
-            "Didn't recognize the type of statement {}".format(statement)
-        )
+        raise Exception(f"Didn't recognize the type of statement {statement}")
 
 
 ##########################################################################
@@ -207,7 +199,7 @@ def error(lineno, string, print_traceback=False):
         line_str = "%d:" % lineno
     else:
         line_str = ""
-    sys.exit("%s %s" % (line_str, string))
+    sys.exit(f"{line_str} {string}")
 
 
 reserved = ("DEF", "MACROOP", "ROM", "EXTERN")
@@ -358,7 +350,7 @@ t_ANY_ignore = " \t\x0c"
 
 
 def t_ANY_error(t):
-    error(t.lineno, "illegal character '%s'" % t.value[0])
+    error(t.lineno, f"illegal character '{t.value[0]}'")
     t.skip(1)
 
 
@@ -570,7 +562,7 @@ def p_directive_1(t):
 # *token*, not a grammar symbol (hence the need to use t.value)
 def p_error(t):
     if t:
-        error(t.lineno, "syntax error at '%s'" % t.value)
+        error(t.lineno, f"syntax error at '{t.value}'")
     else:
         error(0, "unknown syntax error", True)
 
diff --git a/src/arch/micro_asm_test.py b/src/arch/micro_asm_test.py
index 85bbe6b7c9..609b8a4021 100755
--- a/src/arch/micro_asm_test.py
+++ b/src/arch/micro_asm_test.py
@@ -39,10 +39,10 @@ class Bah_Tweaked(object):
 
 class Hoop(object):
     def __init__(self, first_param, second_param):
-        self.mnemonic = "hoop_%s_%s" % (first_param, second_param)
+        self.mnemonic = f"hoop_{first_param}_{second_param}"
 
     def __str__(self):
-        return "%s" % self.mnemonic
+        return f"{self.mnemonic}"
 
 
 class Dah(object):
diff --git a/src/arch/x86/bios/IntelMP.py b/src/arch/x86/bios/IntelMP.py
index 3471f50540..a1e7e823be 100644
--- a/src/arch/x86/bios/IntelMP.py
+++ b/src/arch/x86/bios/IntelMP.py
@@ -81,8 +81,7 @@ class X86IntelMPConfigTable(SimObject):
             self.ext_entries.append(entry)
         else:
             panic(
-                "Don't know what type of Intel MP entry %s is."
-                % entry.__class__.__name__
+                f"Don't know what type of Intel MP entry {entry.__class__.__name__} is."
             )
 
 
diff --git a/src/arch/x86/isa/insts/__init__.py b/src/arch/x86/isa/insts/__init__.py
index 2d10d98647..270d405a28 100644
--- a/src/arch/x86/isa/insts/__init__.py
+++ b/src/arch/x86/isa/insts/__init__.py
@@ -46,5 +46,5 @@ microcode = """
 # X86 microcode
 """
 for category in categories:
-    exec("from . import %s as cat" % category)
+    exec(f"from . import {category} as cat")
     microcode += cat.microcode
diff --git a/src/arch/x86/isa/insts/general_purpose/__init__.py b/src/arch/x86/isa/insts/general_purpose/__init__.py
index eef0150ae8..0843c231d6 100644
--- a/src/arch/x86/isa/insts/general_purpose/__init__.py
+++ b/src/arch/x86/isa/insts/general_purpose/__init__.py
@@ -56,5 +56,5 @@ microcode = """
 # Microcode for general purpose instructions
 """
 for category in categories:
-    exec("from . import %s as cat" % category)
+    exec(f"from . import {category} as cat")
     microcode += cat.microcode
diff --git a/src/arch/x86/isa/insts/general_purpose/arithmetic/__init__.py b/src/arch/x86/isa/insts/general_purpose/arithmetic/__init__.py
index 287d1de9eb..29fa42d4f5 100644
--- a/src/arch/x86/isa/insts/general_purpose/arithmetic/__init__.py
+++ b/src/arch/x86/isa/insts/general_purpose/arithmetic/__init__.py
@@ -41,5 +41,5 @@ categories = [
 
 microcode = ""
 for category in categories:
-    exec("from . import %s as cat" % category)
+    exec(f"from . import {category} as cat")
     microcode += cat.microcode
diff --git a/src/arch/x86/isa/insts/general_purpose/compare_and_test/__init__.py b/src/arch/x86/isa/insts/general_purpose/compare_and_test/__init__.py
index fc7b35f867..65ab0e8db6 100644
--- a/src/arch/x86/isa/insts/general_purpose/compare_and_test/__init__.py
+++ b/src/arch/x86/isa/insts/general_purpose/compare_and_test/__init__.py
@@ -44,5 +44,5 @@ categories = [
 
 microcode = ""
 for category in categories:
-    exec("from . import %s as cat" % category)
+    exec(f"from . import {category} as cat")
     microcode += cat.microcode
diff --git a/src/arch/x86/isa/insts/general_purpose/control_transfer/__init__.py b/src/arch/x86/isa/insts/general_purpose/control_transfer/__init__.py
index b651278990..93a437a80c 100644
--- a/src/arch/x86/isa/insts/general_purpose/control_transfer/__init__.py
+++ b/src/arch/x86/isa/insts/general_purpose/control_transfer/__init__.py
@@ -44,5 +44,5 @@ categories = [
 
 microcode = ""
 for category in categories:
-    exec("from . import %s as cat" % category)
+    exec(f"from . import {category} as cat")
     microcode += cat.microcode
diff --git a/src/arch/x86/isa/insts/general_purpose/data_conversion/__init__.py b/src/arch/x86/isa/insts/general_purpose/data_conversion/__init__.py
index e6eca02d15..613b9c8fed 100644
--- a/src/arch/x86/isa/insts/general_purpose/data_conversion/__init__.py
+++ b/src/arch/x86/isa/insts/general_purpose/data_conversion/__init__.py
@@ -44,5 +44,5 @@ categories = [
 
 microcode = ""
 for category in categories:
-    exec("from . import %s as cat" % category)
+    exec(f"from . import {category} as cat")
     microcode += cat.microcode
diff --git a/src/arch/x86/isa/insts/general_purpose/data_transfer/__init__.py b/src/arch/x86/isa/insts/general_purpose/data_transfer/__init__.py
index cef9e595b4..e53f82bfe8 100644
--- a/src/arch/x86/isa/insts/general_purpose/data_transfer/__init__.py
+++ b/src/arch/x86/isa/insts/general_purpose/data_transfer/__init__.py
@@ -37,5 +37,5 @@ categories = ["conditional_move", "move", "stack_operations", "xchg"]
 
 microcode = ""
 for category in categories:
-    exec("from . import %s as cat" % category)
+    exec(f"from . import {category} as cat")
     microcode += cat.microcode
diff --git a/src/arch/x86/isa/insts/general_purpose/flags/__init__.py b/src/arch/x86/isa/insts/general_purpose/flags/__init__.py
index ef1585ab26..6005ea0d5d 100644
--- a/src/arch/x86/isa/insts/general_purpose/flags/__init__.py
+++ b/src/arch/x86/isa/insts/general_purpose/flags/__init__.py
@@ -37,5 +37,5 @@ categories = ["load_and_store", "push_and_pop", "set_and_clear"]
 
 microcode = ""
 for category in categories:
-    exec("from . import %s as cat" % category)
+    exec(f"from . import {category} as cat")
     microcode += cat.microcode
diff --git a/src/arch/x86/isa/insts/general_purpose/input_output/__init__.py b/src/arch/x86/isa/insts/general_purpose/input_output/__init__.py
index 08b88dd9bd..e1ee1d6571 100644
--- a/src/arch/x86/isa/insts/general_purpose/input_output/__init__.py
+++ b/src/arch/x86/isa/insts/general_purpose/input_output/__init__.py
@@ -37,5 +37,5 @@ categories = ["general_io", "string_io"]
 
 microcode = ""
 for category in categories:
-    exec("from . import %s as cat" % category)
+    exec(f"from . import {category} as cat")
     microcode += cat.microcode
diff --git a/src/arch/x86/isa/insts/general_purpose/rotate_and_shift/__init__.py b/src/arch/x86/isa/insts/general_purpose/rotate_and_shift/__init__.py
index 2675ed2429..202cf32cd2 100644
--- a/src/arch/x86/isa/insts/general_purpose/rotate_and_shift/__init__.py
+++ b/src/arch/x86/isa/insts/general_purpose/rotate_and_shift/__init__.py
@@ -37,5 +37,5 @@ categories = ["rotate", "shift"]
 
 microcode = ""
 for category in categories:
-    exec("from . import %s as cat" % category)
+    exec(f"from . import {category} as cat")
     microcode += cat.microcode
diff --git a/src/arch/x86/isa/insts/general_purpose/string/__init__.py b/src/arch/x86/isa/insts/general_purpose/string/__init__.py
index 0f7e81a82c..13199ed9f3 100644
--- a/src/arch/x86/isa/insts/general_purpose/string/__init__.py
+++ b/src/arch/x86/isa/insts/general_purpose/string/__init__.py
@@ -43,5 +43,5 @@ categories = [
 
 microcode = ""
 for category in categories:
-    exec("from . import %s as cat" % category)
+    exec(f"from . import {category} as cat")
     microcode += cat.microcode
diff --git a/src/arch/x86/isa/insts/simd128/__init__.py b/src/arch/x86/isa/insts/simd128/__init__.py
index 5f0b52c50a..2e343362d3 100644
--- a/src/arch/x86/isa/insts/simd128/__init__.py
+++ b/src/arch/x86/isa/insts/simd128/__init__.py
@@ -39,5 +39,5 @@ microcode = """
 # SSE instructions
 """
 for category in categories:
-    exec("from . import %s as cat" % category)
+    exec(f"from . import {category} as cat")
     microcode += cat.microcode
diff --git a/src/arch/x86/isa/insts/simd128/floating_point/__init__.py b/src/arch/x86/isa/insts/simd128/floating_point/__init__.py
index 4becf25c29..c0f3149623 100644
--- a/src/arch/x86/isa/insts/simd128/floating_point/__init__.py
+++ b/src/arch/x86/isa/insts/simd128/floating_point/__init__.py
@@ -46,5 +46,5 @@ microcode = """
 # SSE instructions
 """
 for category in categories:
-    exec("from . import %s as cat" % category)
+    exec(f"from . import {category} as cat")
     microcode += cat.microcode
diff --git a/src/arch/x86/isa/insts/simd128/floating_point/arithmetic/__init__.py b/src/arch/x86/isa/insts/simd128/floating_point/arithmetic/__init__.py
index 08bfb09f03..d03a3c9c6f 100644
--- a/src/arch/x86/isa/insts/simd128/floating_point/arithmetic/__init__.py
+++ b/src/arch/x86/isa/insts/simd128/floating_point/arithmetic/__init__.py
@@ -50,5 +50,5 @@ microcode = """
 # SSE instructions
 """
 for category in categories:
-    exec("from . import %s as cat" % category)
+    exec(f"from . import {category} as cat")
     microcode += cat.microcode
diff --git a/src/arch/x86/isa/insts/simd128/floating_point/compare/__init__.py b/src/arch/x86/isa/insts/simd128/floating_point/compare/__init__.py
index d9f10dcedc..9b5b8fa1b9 100644
--- a/src/arch/x86/isa/insts/simd128/floating_point/compare/__init__.py
+++ b/src/arch/x86/isa/insts/simd128/floating_point/compare/__init__.py
@@ -43,5 +43,5 @@ microcode = """
 # SSE instructions
 """
 for category in categories:
-    exec("from . import %s as cat" % category)
+    exec(f"from . import {category} as cat")
     microcode += cat.microcode
diff --git a/src/arch/x86/isa/insts/simd128/floating_point/data_conversion/__init__.py b/src/arch/x86/isa/insts/simd128/floating_point/data_conversion/__init__.py
index 6661dc8120..b6ddbf1e2c 100644
--- a/src/arch/x86/isa/insts/simd128/floating_point/data_conversion/__init__.py
+++ b/src/arch/x86/isa/insts/simd128/floating_point/data_conversion/__init__.py
@@ -45,5 +45,5 @@ microcode = """
 # SSE instructions
 """
 for category in categories:
-    exec("from . import %s as cat" % category)
+    exec(f"from . import {category} as cat")
     microcode += cat.microcode
diff --git a/src/arch/x86/isa/insts/simd128/floating_point/data_reordering/__init__.py b/src/arch/x86/isa/insts/simd128/floating_point/data_reordering/__init__.py
index 2584d21ede..711a98b81b 100644
--- a/src/arch/x86/isa/insts/simd128/floating_point/data_reordering/__init__.py
+++ b/src/arch/x86/isa/insts/simd128/floating_point/data_reordering/__init__.py
@@ -39,5 +39,5 @@ microcode = """
 # SSE instructions
 """
 for category in categories:
-    exec("from . import %s as cat" % category)
+    exec(f"from . import {category} as cat")
     microcode += cat.microcode
diff --git a/src/arch/x86/isa/insts/simd128/floating_point/data_transfer/__init__.py b/src/arch/x86/isa/insts/simd128/floating_point/data_transfer/__init__.py
index d1e90960ac..3c4e21da9d 100644
--- a/src/arch/x86/isa/insts/simd128/floating_point/data_transfer/__init__.py
+++ b/src/arch/x86/isa/insts/simd128/floating_point/data_transfer/__init__.py
@@ -44,5 +44,5 @@ microcode = """
 # SSE instructions
 """
 for category in categories:
-    exec("from . import %s as cat" % category)
+    exec(f"from . import {category} as cat")
     microcode += cat.microcode
diff --git a/src/arch/x86/isa/insts/simd128/floating_point/logical/__init__.py b/src/arch/x86/isa/insts/simd128/floating_point/logical/__init__.py
index 996aa74e93..3917c8b10d 100644
--- a/src/arch/x86/isa/insts/simd128/floating_point/logical/__init__.py
+++ b/src/arch/x86/isa/insts/simd128/floating_point/logical/__init__.py
@@ -39,5 +39,5 @@ microcode = """
 # SSE instructions
 """
 for category in categories:
-    exec("from . import %s as cat" % category)
+    exec(f"from . import {category} as cat")
     microcode += cat.microcode
diff --git a/src/arch/x86/isa/insts/simd128/integer/__init__.py b/src/arch/x86/isa/insts/simd128/integer/__init__.py
index cf4491f9bf..ad8751fe45 100644
--- a/src/arch/x86/isa/insts/simd128/integer/__init__.py
+++ b/src/arch/x86/isa/insts/simd128/integer/__init__.py
@@ -48,5 +48,5 @@ microcode = """
 # 128 bit multimedia and scientific instructions
 """
 for category in categories:
-    exec("from . import %s as cat" % category)
+    exec(f"from . import {category} as cat")
     microcode += cat.microcode
diff --git a/src/arch/x86/isa/insts/simd128/integer/arithmetic/__init__.py b/src/arch/x86/isa/insts/simd128/integer/arithmetic/__init__.py
index c2de13b845..6d757c91f3 100644
--- a/src/arch/x86/isa/insts/simd128/integer/arithmetic/__init__.py
+++ b/src/arch/x86/isa/insts/simd128/integer/arithmetic/__init__.py
@@ -47,5 +47,5 @@ microcode = """
 # 128 bit multimedia and scientific instructions
 """
 for category in categories:
-    exec("from . import %s as cat" % category)
+    exec(f"from . import {category} as cat")
     microcode += cat.microcode
diff --git a/src/arch/x86/isa/insts/simd128/integer/compare/__init__.py b/src/arch/x86/isa/insts/simd128/integer/compare/__init__.py
index df0bc81b10..e69ca449e4 100644
--- a/src/arch/x86/isa/insts/simd128/integer/compare/__init__.py
+++ b/src/arch/x86/isa/insts/simd128/integer/compare/__init__.py
@@ -39,5 +39,5 @@ microcode = """
 # 128 bit multimedia and scientific instructions
 """
 for category in categories:
-    exec("from . import %s as cat" % category)
+    exec(f"from . import {category} as cat")
     microcode += cat.microcode
diff --git a/src/arch/x86/isa/insts/simd128/integer/data_conversion/__init__.py b/src/arch/x86/isa/insts/simd128/integer/data_conversion/__init__.py
index 3212cf0636..1d85906e1a 100644
--- a/src/arch/x86/isa/insts/simd128/integer/data_conversion/__init__.py
+++ b/src/arch/x86/isa/insts/simd128/integer/data_conversion/__init__.py
@@ -43,5 +43,5 @@ microcode = """
 # 128 bit multimedia and scientific conversion instructions
 """
 for category in categories:
-    exec("from . import %s as cat" % category)
+    exec(f"from . import {category} as cat")
     microcode += cat.microcode
diff --git a/src/arch/x86/isa/insts/simd128/integer/data_reordering/__init__.py b/src/arch/x86/isa/insts/simd128/integer/data_reordering/__init__.py
index 1c0d574847..6a21322891 100644
--- a/src/arch/x86/isa/insts/simd128/integer/data_reordering/__init__.py
+++ b/src/arch/x86/isa/insts/simd128/integer/data_reordering/__init__.py
@@ -44,5 +44,5 @@ microcode = """
 # 128 bit multimedia and scientific instructions
 """
 for category in categories:
-    exec("from . import %s as cat" % category)
+    exec(f"from . import {category} as cat")
     microcode += cat.microcode
diff --git a/src/arch/x86/isa/insts/simd128/integer/data_transfer/__init__.py b/src/arch/x86/isa/insts/simd128/integer/data_transfer/__init__.py
index 4117b59325..0d67e7d9ca 100644
--- a/src/arch/x86/isa/insts/simd128/integer/data_transfer/__init__.py
+++ b/src/arch/x86/isa/insts/simd128/integer/data_transfer/__init__.py
@@ -39,5 +39,5 @@ microcode = """
 # 128 bit multimedia and scientific data transfer instructions
 """
 for category in categories:
-    exec("from . import %s as cat" % category)
+    exec(f"from . import {category} as cat")
     microcode += cat.microcode
diff --git a/src/arch/x86/isa/insts/simd128/integer/logical/__init__.py b/src/arch/x86/isa/insts/simd128/integer/logical/__init__.py
index 72fc2cfd56..15e5fe32d5 100644
--- a/src/arch/x86/isa/insts/simd128/integer/logical/__init__.py
+++ b/src/arch/x86/isa/insts/simd128/integer/logical/__init__.py
@@ -39,5 +39,5 @@ microcode = """
 # 128 bit multimedia and scientific instructions
 """
 for category in categories:
-    exec("from . import %s as cat" % category)
+    exec(f"from . import {category} as cat")
     microcode += cat.microcode
diff --git a/src/arch/x86/isa/insts/simd128/integer/save_and_restore_state/__init__.py b/src/arch/x86/isa/insts/simd128/integer/save_and_restore_state/__init__.py
index ee0ee06639..e4511545dd 100644
--- a/src/arch/x86/isa/insts/simd128/integer/save_and_restore_state/__init__.py
+++ b/src/arch/x86/isa/insts/simd128/integer/save_and_restore_state/__init__.py
@@ -39,5 +39,5 @@ microcode = """
 # 128 bit multimedia and scientific instructions
 """
 for category in categories:
-    exec("from . import %s as cat" % category)
+    exec(f"from . import {category} as cat")
     microcode += cat.microcode
diff --git a/src/arch/x86/isa/insts/simd128/integer/shift/__init__.py b/src/arch/x86/isa/insts/simd128/integer/shift/__init__.py
index b3a35cb812..1e7cc1c700 100644
--- a/src/arch/x86/isa/insts/simd128/integer/shift/__init__.py
+++ b/src/arch/x86/isa/insts/simd128/integer/shift/__init__.py
@@ -43,5 +43,5 @@ microcode = """
 # 128 bit multimedia and scientific instructions
 """
 for category in categories:
-    exec("from . import %s as cat" % category)
+    exec(f"from . import {category} as cat")
     microcode += cat.microcode
diff --git a/src/arch/x86/isa/insts/simd64/__init__.py b/src/arch/x86/isa/insts/simd64/__init__.py
index 5109e99634..ac36d68cad 100644
--- a/src/arch/x86/isa/insts/simd64/__init__.py
+++ b/src/arch/x86/isa/insts/simd64/__init__.py
@@ -39,5 +39,5 @@ microcode = """
 # 64 bit multimedia instructions
 """
 for category in categories:
-    exec("from . import %s as cat" % category)
+    exec(f"from . import {category} as cat")
     microcode += cat.microcode
diff --git a/src/arch/x86/isa/insts/simd64/floating_point/__init__.py b/src/arch/x86/isa/insts/simd64/floating_point/__init__.py
index 1d4d70f700..a4c8278f27 100644
--- a/src/arch/x86/isa/insts/simd64/floating_point/__init__.py
+++ b/src/arch/x86/isa/insts/simd64/floating_point/__init__.py
@@ -39,5 +39,5 @@ microcode = """
 # 64 bit multimedia instructions
 """
 for category in categories:
-    exec("from . import %s as cat" % category)
+    exec(f"from . import {category} as cat")
     microcode += cat.microcode
diff --git a/src/arch/x86/isa/insts/simd64/floating_point/arithmetic/__init__.py b/src/arch/x86/isa/insts/simd64/floating_point/arithmetic/__init__.py
index 59cb06036f..ac8672f2d0 100644
--- a/src/arch/x86/isa/insts/simd64/floating_point/arithmetic/__init__.py
+++ b/src/arch/x86/isa/insts/simd64/floating_point/arithmetic/__init__.py
@@ -46,5 +46,5 @@ microcode = """
 # 64 bit multimedia instructions
 """
 for category in categories:
-    exec("from . import %s as cat" % category)
+    exec(f"from . import {category} as cat")
     microcode += cat.microcode
diff --git a/src/arch/x86/isa/insts/simd64/floating_point/compare/__init__.py b/src/arch/x86/isa/insts/simd64/floating_point/compare/__init__.py
index 1226c61f98..96601c1871 100644
--- a/src/arch/x86/isa/insts/simd64/floating_point/compare/__init__.py
+++ b/src/arch/x86/isa/insts/simd64/floating_point/compare/__init__.py
@@ -39,5 +39,5 @@ microcode = """
 # 64 bit multimedia instructions
 """
 for category in categories:
-    exec("from . import %s as cat" % category)
+    exec(f"from . import {category} as cat")
     microcode += cat.microcode
diff --git a/src/arch/x86/isa/insts/simd64/integer/__init__.py b/src/arch/x86/isa/insts/simd64/integer/__init__.py
index 8e5209b926..6b026f78c6 100644
--- a/src/arch/x86/isa/insts/simd64/integer/__init__.py
+++ b/src/arch/x86/isa/insts/simd64/integer/__init__.py
@@ -49,5 +49,5 @@ microcode = """
 # 64 bit multimedia instructions
 """
 for category in categories:
-    exec("from . import %s as cat" % category)
+    exec(f"from . import {category} as cat")
     microcode += cat.microcode
diff --git a/src/arch/x86/isa/insts/simd64/integer/arithmetic/__init__.py b/src/arch/x86/isa/insts/simd64/integer/arithmetic/__init__.py
index 4458ee80f6..cd1bd9be10 100644
--- a/src/arch/x86/isa/insts/simd64/integer/arithmetic/__init__.py
+++ b/src/arch/x86/isa/insts/simd64/integer/arithmetic/__init__.py
@@ -46,5 +46,5 @@ microcode = """
 # 64 bit multimedia instructions
 """
 for category in categories:
-    exec("from . import %s as cat" % category)
+    exec(f"from . import {category} as cat")
     microcode += cat.microcode
diff --git a/src/arch/x86/isa/insts/simd64/integer/compare/__init__.py b/src/arch/x86/isa/insts/simd64/integer/compare/__init__.py
index 1226c61f98..96601c1871 100644
--- a/src/arch/x86/isa/insts/simd64/integer/compare/__init__.py
+++ b/src/arch/x86/isa/insts/simd64/integer/compare/__init__.py
@@ -39,5 +39,5 @@ microcode = """
 # 64 bit multimedia instructions
 """
 for category in categories:
-    exec("from . import %s as cat" % category)
+    exec(f"from . import {category} as cat")
     microcode += cat.microcode
diff --git a/src/arch/x86/isa/insts/simd64/integer/data_reordering/__init__.py b/src/arch/x86/isa/insts/simd64/integer/data_reordering/__init__.py
index a3ea862ec7..df73b13d89 100644
--- a/src/arch/x86/isa/insts/simd64/integer/data_reordering/__init__.py
+++ b/src/arch/x86/isa/insts/simd64/integer/data_reordering/__init__.py
@@ -44,5 +44,5 @@ microcode = """
 # 64 bit multimedia instructions
 """
 for category in categories:
-    exec("from . import %s as cat" % category)
+    exec(f"from . import {category} as cat")
     microcode += cat.microcode
diff --git a/src/arch/x86/isa/insts/simd64/integer/data_transfer/__init__.py b/src/arch/x86/isa/insts/simd64/integer/data_transfer/__init__.py
index 1417770087..8bad75e90c 100644
--- a/src/arch/x86/isa/insts/simd64/integer/data_transfer/__init__.py
+++ b/src/arch/x86/isa/insts/simd64/integer/data_transfer/__init__.py
@@ -39,5 +39,5 @@ microcode = """
 # 64 bit multimedia instructions
 """
 for category in categories:
-    exec("from . import %s as cat" % category)
+    exec(f"from . import {category} as cat")
     microcode += cat.microcode
diff --git a/src/arch/x86/isa/insts/simd64/integer/logical/__init__.py b/src/arch/x86/isa/insts/simd64/integer/logical/__init__.py
index 385af7ecac..e3d12ed838 100644
--- a/src/arch/x86/isa/insts/simd64/integer/logical/__init__.py
+++ b/src/arch/x86/isa/insts/simd64/integer/logical/__init__.py
@@ -39,5 +39,5 @@ microcode = """
 # 64 bit multimedia instructions
 """
 for category in categories:
-    exec("from . import %s as cat" % category)
+    exec(f"from . import {category} as cat")
     microcode += cat.microcode
diff --git a/src/arch/x86/isa/insts/simd64/integer/shift/__init__.py b/src/arch/x86/isa/insts/simd64/integer/shift/__init__.py
index fd8c75fa12..def2967878 100644
--- a/src/arch/x86/isa/insts/simd64/integer/shift/__init__.py
+++ b/src/arch/x86/isa/insts/simd64/integer/shift/__init__.py
@@ -43,5 +43,5 @@ microcode = """
 # 64 bit multimedia instructions
 """
 for category in categories:
-    exec("from . import %s as cat" % category)
+    exec(f"from . import {category} as cat")
     microcode += cat.microcode
diff --git a/src/arch/x86/isa/insts/system/__init__.py b/src/arch/x86/isa/insts/system/__init__.py
index e84ee3e732..67d6d62322 100644
--- a/src/arch/x86/isa/insts/system/__init__.py
+++ b/src/arch/x86/isa/insts/system/__init__.py
@@ -47,5 +47,5 @@ categories = [
 
 microcode = ""
 for category in categories:
-    exec("from . import %s as cat" % category)
+    exec(f"from . import {category} as cat")
     microcode += cat.microcode
diff --git a/src/arch/x86/isa/insts/x87/__init__.py b/src/arch/x86/isa/insts/x87/__init__.py
index 169ac7275e..369450e751 100644
--- a/src/arch/x86/isa/insts/x87/__init__.py
+++ b/src/arch/x86/isa/insts/x87/__init__.py
@@ -48,5 +48,5 @@ microcode = """
 # X86 microcode
 """
 for category in categories:
-    exec("from . import %s as cat" % category)
+    exec(f"from . import {category} as cat")
     microcode += cat.microcode
diff --git a/src/arch/x86/isa/insts/x87/arithmetic/__init__.py b/src/arch/x86/isa/insts/x87/arithmetic/__init__.py
index a64665722d..082582d0cd 100644
--- a/src/arch/x86/isa/insts/x87/arithmetic/__init__.py
+++ b/src/arch/x86/isa/insts/x87/arithmetic/__init__.py
@@ -48,5 +48,5 @@ microcode = """
 # X86 microcode
 """
 for category in categories:
-    exec("from . import %s as cat" % category)
+    exec(f"from . import {category} as cat")
     microcode += cat.microcode
diff --git a/src/arch/x86/isa/insts/x87/compare_and_test/__init__.py b/src/arch/x86/isa/insts/x87/compare_and_test/__init__.py
index cb4b1093fe..9ce0bc96b0 100644
--- a/src/arch/x86/isa/insts/x87/compare_and_test/__init__.py
+++ b/src/arch/x86/isa/insts/x87/compare_and_test/__init__.py
@@ -45,5 +45,5 @@ microcode = """
 # X86 microcode
 """
 for category in categories:
-    exec("from . import %s as cat" % category)
+    exec(f"from . import {category} as cat")
     microcode += cat.microcode
diff --git a/src/arch/x86/isa/insts/x87/control/__init__.py b/src/arch/x86/isa/insts/x87/control/__init__.py
index 00d86f468c..9bda0269a1 100644
--- a/src/arch/x86/isa/insts/x87/control/__init__.py
+++ b/src/arch/x86/isa/insts/x87/control/__init__.py
@@ -46,5 +46,5 @@ microcode = """
 # X86 microcode
 """
 for category in categories:
-    exec("from . import %s as cat" % category)
+    exec(f"from . import {category} as cat")
     microcode += cat.microcode
diff --git a/src/arch/x86/isa/insts/x87/data_transfer_and_conversion/__init__.py b/src/arch/x86/isa/insts/x87/data_transfer_and_conversion/__init__.py
index dcb581e158..cd71033cb6 100644
--- a/src/arch/x86/isa/insts/x87/data_transfer_and_conversion/__init__.py
+++ b/src/arch/x86/isa/insts/x87/data_transfer_and_conversion/__init__.py
@@ -46,5 +46,5 @@ microcode = """
 # X86 microcode
 """
 for category in categories:
-    exec("from . import %s as cat" % category)
+    exec(f"from . import {category} as cat")
     microcode += cat.microcode
diff --git a/src/arch/x86/isa/insts/x87/load_constants/__init__.py b/src/arch/x86/isa/insts/x87/load_constants/__init__.py
index b89e81525f..a3e41a47f0 100644
--- a/src/arch/x86/isa/insts/x87/load_constants/__init__.py
+++ b/src/arch/x86/isa/insts/x87/load_constants/__init__.py
@@ -39,5 +39,5 @@ microcode = """
 # X86 microcode
 """
 for category in categories:
-    exec("from . import %s as cat" % category)
+    exec(f"from . import {category} as cat")
     microcode += cat.microcode
diff --git a/src/arch/x86/isa/insts/x87/stack_management/__init__.py b/src/arch/x86/isa/insts/x87/stack_management/__init__.py
index ffbabaf89f..5d9e16a25a 100644
--- a/src/arch/x86/isa/insts/x87/stack_management/__init__.py
+++ b/src/arch/x86/isa/insts/x87/stack_management/__init__.py
@@ -39,5 +39,5 @@ microcode = """
 # X86 microcode
 """
 for category in categories:
-    exec("from . import %s as cat" % category)
+    exec(f"from . import {category} as cat")
     microcode += cat.microcode
diff --git a/src/arch/x86/isa/insts/x87/transcendental_functions/__init__.py b/src/arch/x86/isa/insts/x87/transcendental_functions/__init__.py
index d8651fe2ae..776b588ab3 100644
--- a/src/arch/x86/isa/insts/x87/transcendental_functions/__init__.py
+++ b/src/arch/x86/isa/insts/x87/transcendental_functions/__init__.py
@@ -39,5 +39,5 @@ microcode = """
 # X86 microcode
 """
 for category in categories:
-    exec("from . import %s as cat" % category)
+    exec(f"from . import {category} as cat")
     microcode += cat.microcode
diff --git a/src/cpu/BaseCPU.py b/src/cpu/BaseCPU.py
index d77036a480..556af52612 100644
--- a/src/cpu/BaseCPU.py
+++ b/src/cpu/BaseCPU.py
@@ -172,13 +172,13 @@ class BaseCPU(ClockedObject):
 
     def connectCachedPorts(self, in_ports):
         for p in self._cached_ports:
-            exec("self.%s = in_ports" % p)
+            exec(f"self.{p} = in_ports")
 
     def connectUncachedPorts(self, in_ports, out_ports):
         for p in self._uncached_interrupt_response_ports:
-            exec("self.%s = out_ports" % p)
+            exec(f"self.{p} = out_ports")
         for p in self._uncached_interrupt_request_ports:
-            exec("self.%s = in_ports" % p)
+            exec(f"self.{p} = in_ports")
 
     def connectAllPorts(self, cached_in, uncached_in, uncached_out):
         self.connectCachedPorts(cached_in)
@@ -267,7 +267,7 @@ class BaseCPU(ClockedObject):
         # Generate cpu nodes
         for i in range(int(self.numThreads)):
             reg = (int(self.socket_id) << 8) + int(self.cpu_id) + i
-            node = FdtNode("cpu@%x" % reg)
+            node = FdtNode(f"cpu@{reg:x}")
             node.append(FdtPropertyStrings("device_type", "cpu"))
             node.appendCompatible(["gem5,arm-cpu"])
             node.append(FdtPropertyWords("reg", state.CPUAddrCells(reg)))
diff --git a/src/cpu/testers/traffic_gen/BaseTrafficGen.py b/src/cpu/testers/traffic_gen/BaseTrafficGen.py
index 0d9146756d..b5df83e779 100644
--- a/src/cpu/testers/traffic_gen/BaseTrafficGen.py
+++ b/src/cpu/testers/traffic_gen/BaseTrafficGen.py
@@ -117,7 +117,7 @@ class BaseTrafficGen(ClockedObject):
     def connectCachedPorts(self, in_ports):
         if hasattr(self, "_cached_ports") and (len(self._cached_ports) > 0):
             for p in self._cached_ports:
-                exec("self.%s = in_ports" % p)
+                exec(f"self.{p} = in_ports")
         else:
             self.port = in_ports
 
diff --git a/src/dev/Device.py b/src/dev/Device.py
index 5c3a4193a1..7f8428e6ff 100644
--- a/src/dev/Device.py
+++ b/src/dev/Device.py
@@ -55,7 +55,7 @@ class PioDevice(ClockedObject):
     def generateBasicPioDeviceNode(
         self, state, name, pio_addr, size, interrupts=None
     ):
-        node = FdtNode("%s@%x" % (name, int(pio_addr)))
+        node = FdtNode(f"{name}@{int(pio_addr):x}")
         node.append(
             FdtPropertyWords(
                 "reg", state.addrCells(pio_addr) + state.sizeCells(size)
diff --git a/src/dev/arm/GenericTimer.py b/src/dev/arm/GenericTimer.py
index a44cd6fd7e..4b104ade92 100644
--- a/src/dev/arm/GenericTimer.py
+++ b/src/dev/arm/GenericTimer.py
@@ -158,7 +158,7 @@ class GenericTimerFrame(PioDevice):
     int_virt = Param.ArmSPI("Virtual Interrupt")
 
     def generateDeviceTree(self, state, gic):
-        node = FdtNode("frame@{:08x}".format(self.cnt_base.value))
+        node = FdtNode(f"frame@{self.cnt_base.value:08x}")
         node.append(FdtPropertyWords("frame-number", self._frame_num))
 
         ints = self.int_phys.generateFdtProperty(gic)
diff --git a/src/dev/arm/RealView.py b/src/dev/arm/RealView.py
index 0009842771..e71f6cee5a 100644
--- a/src/dev/arm/RealView.py
+++ b/src/dev/arm/RealView.py
@@ -248,7 +248,7 @@ class RealViewCtrl(BasicPioDevice):
     idreg = Param.UInt32(0x00000000, "ID Register, SYS_ID")
 
     def generateDeviceTree(self, state):
-        node = FdtNode("sysreg@%x" % int(self.pio_addr))
+        node = FdtNode(f"sysreg@{int(self.pio_addr):x}")
         node.appendCompatible("arm,vexpress-sysreg")
         node.append(
             FdtPropertyWords(
@@ -458,7 +458,7 @@ class FixedClock(SrcClockDomain):
     def generateDeviceTree(self, state):
         if len(self.clock) > 1:
             fatal("FixedClock configured with multiple frequencies")
-        node = FdtNode("clock{}".format(FixedClock._index))
+        node = FdtNode(f"clock{FixedClock._index}")
         node.appendCompatible("fixed-clock")
         node.append(FdtPropertyWords("#clock-cells", 0))
         node.append(
@@ -743,7 +743,7 @@ class MmioSRAM(ParentMem):
         super().__init__(**kwargs)
 
     def generateDeviceTree(self, state):
-        node = FdtNode("sram@%x" % int(self.range.start))
+        node = FdtNode(f"sram@{int(self.range.start):x}")
         node.appendCompatible(["mmio-sram"])
         node.append(
             FdtPropertyWords(
diff --git a/src/dev/arm/SMMUv3.py b/src/dev/arm/SMMUv3.py
index a1992ecd63..46fad3bf68 100644
--- a/src/dev/arm/SMMUv3.py
+++ b/src/dev/arm/SMMUv3.py
@@ -203,7 +203,7 @@ class SMMUv3(ClockedObject):
     def generateDeviceTree(self, state):
         reg_addr = self.reg_map.start
         reg_size = self.reg_map.size()
-        node = FdtNode("smmuv3@%x" % int(reg_addr))
+        node = FdtNode(f"smmuv3@{int(reg_addr):x}")
         node.appendCompatible("arm,smmu-v3")
         node.append(
             FdtPropertyWords(
diff --git a/src/dev/arm/css/MHU.py b/src/dev/arm/css/MHU.py
index 6a930f423c..a26b239332 100644
--- a/src/dev/arm/css/MHU.py
+++ b/src/dev/arm/css/MHU.py
@@ -103,7 +103,7 @@ class MHU(BasicPioDevice):
     scp = Param.Scp(Parent.any, "System Control Processor")
 
     def generateDeviceTree(self, state):
-        node = FdtNode("mailbox@%x" % int(self.pio_addr))
+        node = FdtNode(f"mailbox@{int(self.pio_addr):x}")
         node.appendCompatible(["arm,mhu", "arm,primecell"])
         node.append(
             FdtPropertyWords(
diff --git a/src/mem/slicc/ast/ActionDeclAST.py b/src/mem/slicc/ast/ActionDeclAST.py
index 21b6e3a2f7..ff6a4ff9d5 100644
--- a/src/mem/slicc/ast/ActionDeclAST.py
+++ b/src/mem/slicc/ast/ActionDeclAST.py
@@ -36,7 +36,7 @@ class ActionDeclAST(DeclAST):
         self.statement_list = statement_list
 
     def __repr__(self):
-        return "[ActionDecl: %r]" % (self.ident)
+        return f"[ActionDecl: {self.ident!r}]"
 
     def generate(self):
         resources = {}
diff --git a/src/mem/slicc/ast/AssignStatementAST.py b/src/mem/slicc/ast/AssignStatementAST.py
index d1f5f5105a..47d91a3413 100644
--- a/src/mem/slicc/ast/AssignStatementAST.py
+++ b/src/mem/slicc/ast/AssignStatementAST.py
@@ -35,7 +35,7 @@ class AssignStatementAST(StatementAST):
         self.rvalue = rvalue
 
     def __repr__(self):
-        return "[AssignStatementAST: %r := %r]" % (self.lvalue, self.rvalue)
+        return f"[AssignStatementAST: {self.lvalue!r} := {self.rvalue!r}]"
 
     def generate(self, code, return_type, **kwargs):
         lcode = self.slicc.codeFormatter()
diff --git a/src/mem/slicc/ast/CheckAllocateStatementAST.py b/src/mem/slicc/ast/CheckAllocateStatementAST.py
index 83325df7f0..0f3c6e47dd 100644
--- a/src/mem/slicc/ast/CheckAllocateStatementAST.py
+++ b/src/mem/slicc/ast/CheckAllocateStatementAST.py
@@ -34,7 +34,7 @@ class CheckAllocateStatementAST(StatementAST):
         self.variable = variable
 
     def __repr__(self):
-        return "[CheckAllocateStatementAst: %r]" % self.variable
+        return f"[CheckAllocateStatementAst: {self.variable!r}]"
 
     def generate(self, code, return_type, **kwargs):
         # FIXME - check the type of the variable
diff --git a/src/mem/slicc/ast/CheckProbeStatementAST.py b/src/mem/slicc/ast/CheckProbeStatementAST.py
index 4e798ed12f..10945cfc30 100644
--- a/src/mem/slicc/ast/CheckProbeStatementAST.py
+++ b/src/mem/slicc/ast/CheckProbeStatementAST.py
@@ -36,7 +36,7 @@ class CheckProbeStatementAST(StatementAST):
         self.address = address
 
     def __repr__(self):
-        return "[CheckProbeStatementAst: %r]" % self.in_port
+        return f"[CheckProbeStatementAst: {self.in_port!r}]"
 
     def generate(self, code, return_type, **kwargs):
         self.in_port.assertType("InPort")
diff --git a/src/mem/slicc/ast/DeclListAST.py b/src/mem/slicc/ast/DeclListAST.py
index a835a04a61..4d893c97d2 100644
--- a/src/mem/slicc/ast/DeclListAST.py
+++ b/src/mem/slicc/ast/DeclListAST.py
@@ -37,7 +37,7 @@ class DeclListAST(AST):
         self.decls = decls
 
     def __repr__(self):
-        return "[DeclListAST: %s]" % (", ".join(repr(d) for d in self.decls))
+        return f"[DeclListAST: {', '.join(repr(d) for d in self.decls)}]"
 
     def files(self, parent=None):
         s = set()
diff --git a/src/mem/slicc/ast/EnumDeclAST.py b/src/mem/slicc/ast/EnumDeclAST.py
index 5ffc8bb720..9b4a6be77a 100644
--- a/src/mem/slicc/ast/EnumDeclAST.py
+++ b/src/mem/slicc/ast/EnumDeclAST.py
@@ -37,17 +37,17 @@ class EnumDeclAST(DeclAST):
         self.fields = fields
 
     def __repr__(self):
-        return "[EnumDecl: %s]" % (self.type_ast)
+        return f"[EnumDecl: {self.type_ast}]"
 
     def files(self, parent=None):
         if "external" in self:
             return set()
 
         if parent:
-            ident = "%s_%s" % (parent, self.type_ast.ident)
+            ident = f"{parent}_{self.type_ast.ident}"
         else:
             ident = self.type_ast.ident
-        s = set(("%s.hh" % ident, "%s.cc" % ident))
+        s = set((f"{ident}.hh", f"{ident}.cc"))
         return s
 
     def generate(self):
@@ -64,7 +64,7 @@ class EnumDeclAST(DeclAST):
             field.generate(t)
 
         # Add the implicit State_to_string method - FIXME, this is a bit dirty
-        func_id = "%s_to_string" % t.c_ident
+        func_id = f"{t.c_ident}_to_string"
 
         pairs = {"external": "yes"}
         func = Func(
diff --git a/src/mem/slicc/ast/EnumExprAST.py b/src/mem/slicc/ast/EnumExprAST.py
index 9f3aae33a3..b3034b5c06 100644
--- a/src/mem/slicc/ast/EnumExprAST.py
+++ b/src/mem/slicc/ast/EnumExprAST.py
@@ -39,7 +39,7 @@ class EnumExprAST(ExprAST):
         self.value = value
 
     def __repr__(self):
-        return "[EnumExpr: %s:%s]" % (self.type_ast, self.value)
+        return f"[EnumExpr: {self.type_ast}:{self.value}]"
 
     def generate(self, code, **kwargs):
         fix = code.nofix()
diff --git a/src/mem/slicc/ast/ExprStatementAST.py b/src/mem/slicc/ast/ExprStatementAST.py
index d26920c11c..9545ef3e41 100644
--- a/src/mem/slicc/ast/ExprStatementAST.py
+++ b/src/mem/slicc/ast/ExprStatementAST.py
@@ -37,7 +37,7 @@ class ExprStatementAST(StatementAST):
         self.expr = expr
 
     def __repr__(self):
-        return "[ExprStatementAST: %s]" % (self.expr)
+        return f"[ExprStatementAST: {self.expr}]"
 
     def generate(self, code, return_type, **kwargs):
         actual_type, rcode = self.expr.inline(True, **kwargs)
diff --git a/src/mem/slicc/ast/FormalParamAST.py b/src/mem/slicc/ast/FormalParamAST.py
index cd6cdc182c..8b97a81790 100644
--- a/src/mem/slicc/ast/FormalParamAST.py
+++ b/src/mem/slicc/ast/FormalParamAST.py
@@ -50,7 +50,7 @@ class FormalParamAST(AST):
         self.qualifier = qualifier
 
     def __repr__(self):
-        return "[FormalParamAST: %s]" % self.ident
+        return f"[FormalParamAST: {self.ident}]"
 
     @property
     def name(self):
@@ -58,7 +58,7 @@ class FormalParamAST(AST):
 
     def generate(self):
         type = self.type_ast.type
-        param = "param_%s" % self.ident
+        param = f"param_{self.ident}"
 
         # Add to symbol table
         v = Var(
@@ -84,10 +84,10 @@ class FormalParamAST(AST):
             qualifier = "CONST_REF"
 
         if qualifier == "PTR":
-            return type, "%s* %s" % (type.c_ident, param)
+            return type, f"{type.c_ident}* {param}"
         elif qualifier == "REF":
-            return type, "%s& %s" % (type.c_ident, param)
+            return type, f"{type.c_ident}& {param}"
         elif qualifier == "CONST_REF":
-            return type, "const %s& %s" % (type.c_ident, param)
+            return type, f"const {type.c_ident}& {param}"
         else:
-            self.error("Invalid qualifier for param '%s'" % self.ident)
+            self.error(f"Invalid qualifier for param '{self.ident}'")
diff --git a/src/mem/slicc/ast/FuncCallExprAST.py b/src/mem/slicc/ast/FuncCallExprAST.py
index 940e78acff..6ccca6650a 100644
--- a/src/mem/slicc/ast/FuncCallExprAST.py
+++ b/src/mem/slicc/ast/FuncCallExprAST.py
@@ -49,7 +49,7 @@ class FuncCallExprAST(ExprAST):
         self.exprs = exprs
 
     def __repr__(self):
-        return "[FuncCallExpr: %s %s]" % (self.proc_name, self.exprs)
+        return f"[FuncCallExpr: {self.proc_name} {self.exprs}]"
 
     # When calling generate for statements in a in_port, the reference to
     # the port must be provided as the in_port kwarg (see InPortDeclAST)
@@ -69,14 +69,14 @@ class FuncCallExprAST(ExprAST):
             # handled differently. Hence the check whether or not
             # the str_list is empty.
 
-            dflag = "%s" % (self.exprs[0].name)
+            dflag = f"{self.exprs[0].name}"
             machine.addDebugFlag(dflag)
-            format = "%s" % (self.exprs[1].inline())
+            format = f"{self.exprs[1].inline()}"
             format_length = len(format)
             str_list = []
 
             for i in range(2, len(self.exprs)):
-                str_list.append("%s" % self.exprs[i].inline())
+                str_list.append(f"{self.exprs[i].inline()}")
 
             if len(str_list) == 0:
                 code(
@@ -97,12 +97,12 @@ class FuncCallExprAST(ExprAST):
             return self.symtab.find("void", Type)
 
         if self.proc_name == "DPRINTFN":
-            format = "%s" % (self.exprs[0].inline())
+            format = f"{self.exprs[0].inline()}"
             format_length = len(format)
             str_list = []
 
             for i in range(1, len(self.exprs)):
-                str_list.append("%s" % self.exprs[i].inline())
+                str_list.append(f"{self.exprs[i].inline()}")
 
             if len(str_list) == 0:
                 code(
@@ -264,11 +264,11 @@ if (!(${{cvec[0]}})) {
             )
 
         elif self.proc_name == "set_cache_entry":
-            code("set_cache_entry(m_cache_entry_ptr, %s);" % (cvec[0]))
+            code(f"set_cache_entry(m_cache_entry_ptr, {cvec[0]});")
         elif self.proc_name == "unset_cache_entry":
             code("unset_cache_entry(m_cache_entry_ptr);")
         elif self.proc_name == "set_tbe":
-            code("set_tbe(m_tbe_ptr, %s);" % (cvec[0]))
+            code(f"set_tbe(m_tbe_ptr, {cvec[0]});")
         elif self.proc_name == "unset_tbe":
             code("unset_tbe(m_tbe_ptr);")
         elif self.proc_name == "stallPort":
diff --git a/src/mem/slicc/ast/FuncDeclAST.py b/src/mem/slicc/ast/FuncDeclAST.py
index ece27e708f..38898ff9e5 100644
--- a/src/mem/slicc/ast/FuncDeclAST.py
+++ b/src/mem/slicc/ast/FuncDeclAST.py
@@ -39,7 +39,7 @@ class FuncDeclAST(DeclAST):
         self.statements = statements
 
     def __repr__(self):
-        return "[FuncDecl: %s]" % self.ident
+        return f"[FuncDecl: {self.ident}]"
 
     def files(self, parent=None):
         return set()
@@ -102,12 +102,12 @@ class FuncDeclAST(DeclAST):
 
         if parent is not None:
             if not parent.addFunc(func):
-                self.error("Duplicate method: %s:%s()" % (parent, self.ident))
+                self.error(f"Duplicate method: {parent}:{self.ident}()")
             func.class_name = parent.c_ident
 
         elif machine is not None:
             machine.addFunc(func)
             func.isInternalMachineFunc = True
-            func.class_name = "%s_Controller" % machine
+            func.class_name = f"{machine}_Controller"
         else:
             self.symtab.newSymbol(func)
diff --git a/src/mem/slicc/ast/IfStatementAST.py b/src/mem/slicc/ast/IfStatementAST.py
index aba19d62ce..1cb50bd95f 100644
--- a/src/mem/slicc/ast/IfStatementAST.py
+++ b/src/mem/slicc/ast/IfStatementAST.py
@@ -41,7 +41,7 @@ class IfStatementAST(StatementAST):
         self.else_ = else_
 
     def __repr__(self):
-        return "[IfStatement: %r%r%r]" % (self.cond, self.then, self.else_)
+        return f"[IfStatement: {self.cond!r}{self.then!r}{self.else_!r}]"
 
     def generate(self, code, return_type, **kwargs):
         cond_code = self.slicc.codeFormatter()
diff --git a/src/mem/slicc/ast/InPortDeclAST.py b/src/mem/slicc/ast/InPortDeclAST.py
index c8b99a4710..2cbf3bb617 100644
--- a/src/mem/slicc/ast/InPortDeclAST.py
+++ b/src/mem/slicc/ast/InPortDeclAST.py
@@ -53,7 +53,7 @@ class InPortDeclAST(DeclAST):
         self.queue_type = TypeAST(slicc, "InPort")
 
     def __repr__(self):
-        return "[InPortDecl: %s]" % self.ident
+        return f"[InPortDecl: {self.ident}]"
 
     def generate(self):
         symtab = self.symtab
diff --git a/src/mem/slicc/ast/IsValidPtrExprAST.py b/src/mem/slicc/ast/IsValidPtrExprAST.py
index ec285dcaa6..0a58361ab2 100644
--- a/src/mem/slicc/ast/IsValidPtrExprAST.py
+++ b/src/mem/slicc/ast/IsValidPtrExprAST.py
@@ -37,7 +37,7 @@ class IsValidPtrExprAST(ExprAST):
         self.flag = flag
 
     def __repr__(self):
-        return "[IsValidPtrExprAST: %r]" % self.variable
+        return f"[IsValidPtrExprAST: {self.variable!r}]"
 
     def generate(self, code, **kwargs):
         # Make sure the variable is valid
diff --git a/src/mem/slicc/ast/LiteralExprAST.py b/src/mem/slicc/ast/LiteralExprAST.py
index 973ac6a1c0..37655529d8 100644
--- a/src/mem/slicc/ast/LiteralExprAST.py
+++ b/src/mem/slicc/ast/LiteralExprAST.py
@@ -36,7 +36,7 @@ class LiteralExprAST(ExprAST):
         self.type = type
 
     def __repr__(self):
-        return "[Literal: %s]" % self.literal
+        return f"[Literal: {self.literal}]"
 
     def generate(self, code, **kwargs):
         fix = code.nofix()
@@ -51,6 +51,6 @@ class LiteralExprAST(ExprAST):
         type = self.symtab.find(self.type, Type)
         if type is None:
             # Can't find the type
-            self.error("Internal: can't primitive type '%s'" % self.type)
+            self.error(f"Internal: can't primitive type '{self.type}'")
 
         return type
diff --git a/src/mem/slicc/ast/LocalVariableAST.py b/src/mem/slicc/ast/LocalVariableAST.py
index e08e5770a4..b4ac8f446b 100644
--- a/src/mem/slicc/ast/LocalVariableAST.py
+++ b/src/mem/slicc/ast/LocalVariableAST.py
@@ -39,7 +39,7 @@ class LocalVariableAST(StatementAST):
         self.pointer = pointer
 
     def __repr__(self):
-        return "[LocalVariableAST: %r %r]" % (self.type_ast, self.ident)
+        return f"[LocalVariableAST: {self.type_ast!r} {self.ident!r}]"
 
     @property
     def name(self):
@@ -55,7 +55,7 @@ class LocalVariableAST(StatementAST):
 
     def generate(self, code, **kwargs):
         type = self.type_ast.type
-        ident = "%s" % self.ident
+        ident = f"{self.ident}"
 
         # Add to symbol table
         v = Var(
@@ -72,7 +72,7 @@ class LocalVariableAST(StatementAST):
                 and (type["interface"] == "AbstractCacheEntry")
             )
         ):
-            code += "%s* %s" % (type.c_ident, ident)
+            code += f"{type.c_ident}* {ident}"
         else:
-            code += "%s %s" % (type.c_ident, ident)
+            code += f"{type.c_ident} {ident}"
         return type
diff --git a/src/mem/slicc/ast/MachineAST.py b/src/mem/slicc/ast/MachineAST.py
index 57526daa3c..5c76aa8173 100644
--- a/src/mem/slicc/ast/MachineAST.py
+++ b/src/mem/slicc/ast/MachineAST.py
@@ -39,16 +39,16 @@ class MachineAST(DeclAST):
         self.decls = decls
 
     def __repr__(self):
-        return "[Machine: %r]" % self.ident
+        return f"[Machine: {self.ident!r}]"
 
     def files(self, parent=None):
         s = set(
             (
-                "%s_Controller.cc" % self.ident,
-                "%s_Controller.hh" % self.ident,
-                "%s_Controller.py" % self.ident,
-                "%s_Transitions.cc" % self.ident,
-                "%s_Wakeup.cc" % self.ident,
+                f"{self.ident}_Controller.cc",
+                f"{self.ident}_Controller.hh",
+                f"{self.ident}_Controller.py",
+                f"{self.ident}_Transitions.cc",
+                f"{self.ident}_Wakeup.cc",
             )
         )
 
@@ -83,4 +83,4 @@ class MachineAST(DeclAST):
         mtype = self.ident
         machine_type = self.symtab.find("MachineType", Type)
         if not machine_type.checkEnum(mtype):
-            self.error("Duplicate machine name: %s:%s" % (machine_type, mtype))
+            self.error(f"Duplicate machine name: {machine_type}:{mtype}")
diff --git a/src/mem/slicc/ast/MemberExprAST.py b/src/mem/slicc/ast/MemberExprAST.py
index 292c1b7899..d45a6ac9a4 100644
--- a/src/mem/slicc/ast/MemberExprAST.py
+++ b/src/mem/slicc/ast/MemberExprAST.py
@@ -36,7 +36,7 @@ class MemberExprAST(ExprAST):
         self.field = field
 
     def __repr__(self):
-        return "[MemberExprAST: %r.%r]" % (self.expr_ast, self.field)
+        return f"[MemberExprAST: {self.expr_ast!r}.{self.field!r}]"
 
     def generate(self, code):
         return_type, gcode = self.expr_ast.inline(True)
@@ -68,6 +68,5 @@ class MemberExprAST(ExprAST):
                         return interface_type.data_members[self.field].type
         self.error(
             "Invalid object field: "
-            + "Type '%s' does not have data member %s"
-            % (return_type, self.field)
+            + f"Type '{return_type}' does not have data member {self.field}"
         )
diff --git a/src/mem/slicc/ast/MethodCallExprAST.py b/src/mem/slicc/ast/MethodCallExprAST.py
index a4ebc67ecc..7bdf0c7dd9 100644
--- a/src/mem/slicc/ast/MethodCallExprAST.py
+++ b/src/mem/slicc/ast/MethodCallExprAST.py
@@ -171,9 +171,9 @@ class MemberMethodCallExprAST(MethodCallExprAST):
             "interface" in obj_type
             and (obj_type["interface"] == "AbstractCacheEntry")
         ):
-            prefix = "%s((*(%s))." % (prefix, code)
+            prefix = f"{prefix}((*({code}))."
         else:
-            prefix = "%s((%s)." % (prefix, code)
+            prefix = f"{prefix}(({code})."
 
         return obj_type, methodId, prefix
 
@@ -186,12 +186,12 @@ class ClassMethodCallExprAST(MethodCallExprAST):
         self.type_ast = type_ast
 
     def __repr__(self):
-        return "[MethodCallExpr: %r %r]" % (self.proc_name, self.expr_ast_vec)
+        return f"[MethodCallExpr: {self.proc_name!r} {self.expr_ast_vec!r}]"
 
     def generate_prefix(self, paramTypes):
 
         # class method call
-        prefix = "(%s::" % self.type_ast
+        prefix = f"({self.type_ast}::"
         obj_type = self.type_ast.type
         methodId = obj_type.methodId(self.proc_name, paramTypes)
 
diff --git a/src/mem/slicc/ast/NewExprAST.py b/src/mem/slicc/ast/NewExprAST.py
index 3488070783..a9ee3ed07c 100644
--- a/src/mem/slicc/ast/NewExprAST.py
+++ b/src/mem/slicc/ast/NewExprAST.py
@@ -34,7 +34,7 @@ class NewExprAST(ExprAST):
         self.type_ast = type_ast
 
     def __repr__(self):
-        return "[NewExprAST: %r]" % self.type_ast
+        return f"[NewExprAST: {self.type_ast!r}]"
 
     @property
     def name(self):
diff --git a/src/mem/slicc/ast/ObjDeclAST.py b/src/mem/slicc/ast/ObjDeclAST.py
index 0aec0c367e..504d0d41c3 100644
--- a/src/mem/slicc/ast/ObjDeclAST.py
+++ b/src/mem/slicc/ast/ObjDeclAST.py
@@ -39,7 +39,7 @@ class ObjDeclAST(DeclAST):
         self.pointer = pointer
 
     def __repr__(self):
-        return "[ObjDecl: %r]" % self.ident
+        return f"[ObjDecl: {self.ident!r}]"
 
     def generate(self, parent=None, **kwargs):
         if "network" in self and not (
@@ -60,7 +60,7 @@ class ObjDeclAST(DeclAST):
         elif self.ident == "recycle_latency":
             c_code = "m_recycle_latency"
         else:
-            c_code = "(*m_%s_ptr)" % (self.ident)
+            c_code = f"(*m_{self.ident}_ptr)"
 
         # check type if this is a initialization
         init_code = ""
@@ -68,8 +68,7 @@ class ObjDeclAST(DeclAST):
             rvalue_type, init_code = self.rvalue.inline(True)
             if type != rvalue_type:
                 self.error(
-                    "Initialization type mismatch '%s' and '%s'"
-                    % (type, rvalue_type)
+                    f"Initialization type mismatch '{type}' and '{rvalue_type}'"
                 )
 
         machine = self.symtab.state_machine
@@ -89,9 +88,7 @@ class ObjDeclAST(DeclAST):
             if not parent.addDataMember(
                 self.ident, type, self.pairs, init_code
             ):
-                self.error(
-                    "Duplicate data member: %s:%s" % (parent, self.ident)
-                )
+                self.error(f"Duplicate data member: {parent}:{self.ident}")
 
         elif machine:
             machine.addObject(v)
diff --git a/src/mem/slicc/ast/OperatorExprAST.py b/src/mem/slicc/ast/OperatorExprAST.py
index ebebfdf23d..714b553101 100644
--- a/src/mem/slicc/ast/OperatorExprAST.py
+++ b/src/mem/slicc/ast/OperatorExprAST.py
@@ -38,7 +38,7 @@ class InfixOperatorExprAST(ExprAST):
         self.right = right
 
     def __repr__(self):
-        return "[InfixExpr: %r %s %r]" % (self.left, self.op, self.right)
+        return f"[InfixExpr: {self.left!r} {self.op} {self.right!r}]"
 
     def generate(self, code, **kwargs):
         lcode = self.slicc.codeFormatter()
@@ -83,7 +83,7 @@ class InfixOperatorExprAST(ExprAST):
                     ("int", "Cycles", "Cycles"),
                 ]
             else:
-                self.error("No operator matched with {0}!".format(self.op))
+                self.error(f"No operator matched with {self.op}!")
 
             for expected_type in expected_types:
                 left_input_type = self.symtab.find(expected_type[0], Type)
@@ -115,7 +115,7 @@ class PrefixOperatorExprAST(ExprAST):
         self.operand = operand
 
     def __repr__(self):
-        return "[PrefixExpr: %s %r]" % (self.op, self.operand)
+        return f"[PrefixExpr: {self.op} {self.operand!r}]"
 
     def generate(self, code, **kwargs):
         opcode = self.slicc.codeFormatter()
diff --git a/src/mem/slicc/ast/OutPortDeclAST.py b/src/mem/slicc/ast/OutPortDeclAST.py
index 887597b797..e21a4a6fa7 100644
--- a/src/mem/slicc/ast/OutPortDeclAST.py
+++ b/src/mem/slicc/ast/OutPortDeclAST.py
@@ -41,7 +41,7 @@ class OutPortDeclAST(DeclAST):
         self.queue_type = TypeAST(slicc, "OutPort")
 
     def __repr__(self):
-        return "[OutPortDecl: %r]" % self.ident
+        return f"[OutPortDecl: {self.ident!r}]"
 
     def generate(self):
         code = self.slicc.codeFormatter(newlines=False)
diff --git a/src/mem/slicc/ast/PairAST.py b/src/mem/slicc/ast/PairAST.py
index eae776f136..526b97f189 100644
--- a/src/mem/slicc/ast/PairAST.py
+++ b/src/mem/slicc/ast/PairAST.py
@@ -34,4 +34,4 @@ class PairAST(AST):
         self.value = value
 
     def __repr__(self):
-        return "[%s=%s]" % (self.key, self.value)
+        return f"[{self.key}={self.value}]"
diff --git a/src/mem/slicc/ast/PairListAST.py b/src/mem/slicc/ast/PairListAST.py
index a0cf26d07f..6a8efe1a80 100644
--- a/src/mem/slicc/ast/PairListAST.py
+++ b/src/mem/slicc/ast/PairListAST.py
@@ -32,7 +32,7 @@ class PairListAST(AST):
         super().__init__(slicc)
 
     def __repr__(self):
-        return "[PairListAST] %r" % self.pairs
+        return f"[PairListAST] {self.pairs!r}"
 
     def addPair(self, pair_ast):
         self[pair_ast.key] = pair_ast.value
diff --git a/src/mem/slicc/ast/ReturnStatementAST.py b/src/mem/slicc/ast/ReturnStatementAST.py
index ca4e33dbb9..a742947b4d 100644
--- a/src/mem/slicc/ast/ReturnStatementAST.py
+++ b/src/mem/slicc/ast/ReturnStatementAST.py
@@ -35,7 +35,7 @@ class ReturnStatementAST(StatementAST):
         self.expr_ast = expr_ast
 
     def __repr__(self):
-        return "[ReturnStatementAST: %r]" % self.expr_ast
+        return f"[ReturnStatementAST: {self.expr_ast!r}]"
 
     def generate(self, code, return_type, **kwargs):
         actual_type, ecode = self.expr_ast.inline(True)
diff --git a/src/mem/slicc/ast/StallAndWaitStatementAST.py b/src/mem/slicc/ast/StallAndWaitStatementAST.py
index 37e567289e..6214fac10f 100644
--- a/src/mem/slicc/ast/StallAndWaitStatementAST.py
+++ b/src/mem/slicc/ast/StallAndWaitStatementAST.py
@@ -36,7 +36,7 @@ class StallAndWaitStatementAST(StatementAST):
         self.address = address
 
     def __repr__(self):
-        return "[StallAndWaitStatementAst: %r]" % self.in_port
+        return f"[StallAndWaitStatementAst: {self.in_port!r}]"
 
     def generate(self, code, return_type, **kwargs):
         self.in_port.assertType("InPort")
diff --git a/src/mem/slicc/ast/StateDeclAST.py b/src/mem/slicc/ast/StateDeclAST.py
index f6e5d6e39b..d190326484 100644
--- a/src/mem/slicc/ast/StateDeclAST.py
+++ b/src/mem/slicc/ast/StateDeclAST.py
@@ -36,17 +36,17 @@ class StateDeclAST(DeclAST):
         self.states = states
 
     def __repr__(self):
-        return "[StateDecl: %s]" % (self.type_ast)
+        return f"[StateDecl: {self.type_ast}]"
 
     def files(self, parent=None):
         if "external" in self:
             return set()
 
         if parent:
-            ident = "%s_%s" % (parent, self.type_ast.ident)
+            ident = f"{parent}_{self.type_ast.ident}"
         else:
             ident = self.type_ast.ident
-        s = set(("%s.hh" % ident, "%s.cc" % ident))
+        s = set((f"{ident}.hh", f"{ident}.cc"))
         return s
 
     def generate(self):
@@ -63,7 +63,7 @@ class StateDeclAST(DeclAST):
             state.generate(t)
 
         # Add the implicit State_to_string method - FIXME, this is a bit dirty
-        func_id = "%s_to_string" % t.c_ident
+        func_id = f"{t.c_ident}_to_string"
 
         pairs = {"external": "yes"}
         func = Func(
@@ -80,7 +80,7 @@ class StateDeclAST(DeclAST):
         self.symtab.newSymbol(func)
 
         # Add the State_to_permission method
-        func_id = "%s_to_permission" % t.c_ident
+        func_id = f"{t.c_ident}_to_permission"
 
         pairs = {"external": "yes"}
         func = Func(
diff --git a/src/mem/slicc/ast/StatementListAST.py b/src/mem/slicc/ast/StatementListAST.py
index 82b5d20d1e..9e2bb6579a 100644
--- a/src/mem/slicc/ast/StatementListAST.py
+++ b/src/mem/slicc/ast/StatementListAST.py
@@ -36,7 +36,7 @@ class StatementListAST(AST):
         self.statements = statements
 
     def __repr__(self):
-        return "[StatementListAST: %r]" % self.statements
+        return f"[StatementListAST: {self.statements!r}]"
 
     def generate(self, code, return_type, **kwargs):
         for statement in self.statements:
diff --git a/src/mem/slicc/ast/StaticCastAST.py b/src/mem/slicc/ast/StaticCastAST.py
index 16f6b151bc..178285202b 100644
--- a/src/mem/slicc/ast/StaticCastAST.py
+++ b/src/mem/slicc/ast/StaticCastAST.py
@@ -36,7 +36,7 @@ class StaticCastAST(ExprAST):
         self.type_modifier = type_modifier
 
     def __repr__(self):
-        return "[StaticCastAST: %r]" % self.expr_ast
+        return f"[StaticCastAST: {self.expr_ast!r}]"
 
     def generate(self, code, **kwargs):
         actual_type, ecode = self.expr_ast.inline(True)
diff --git a/src/mem/slicc/ast/TransitionDeclAST.py b/src/mem/slicc/ast/TransitionDeclAST.py
index 089bb45e4e..c791ed9548 100644
--- a/src/mem/slicc/ast/TransitionDeclAST.py
+++ b/src/mem/slicc/ast/TransitionDeclAST.py
@@ -53,8 +53,7 @@ class TransitionDeclAST(DeclAST):
         for action in self.actions:
             if action not in machine.actions:
                 self.error(
-                    "Invalid action: %s is not part of machine: %s"
-                    % (action, machine)
+                    f"Invalid action: {action} is not part of machine: {machine}"
                 )
 
         for request_type in self.request_types:
@@ -67,15 +66,13 @@ class TransitionDeclAST(DeclAST):
         for state in self.states:
             if state not in machine.states:
                 self.error(
-                    "Invalid state: %s is not part of machine: %s"
-                    % (state, machine)
+                    f"Invalid state: {state} is not part of machine: {machine}"
                 )
             next_state = self.next_state or state
             for event in self.events:
                 if event not in machine.events:
                     self.error(
-                        "Invalid event: %s is not part of machine: %s"
-                        % (event, machine)
+                        f"Invalid event: {event} is not part of machine: {machine}"
                     )
                 t = Transition(
                     self.symtab,
diff --git a/src/mem/slicc/ast/TypeDeclAST.py b/src/mem/slicc/ast/TypeDeclAST.py
index e64b3d5010..d39e678477 100644
--- a/src/mem/slicc/ast/TypeDeclAST.py
+++ b/src/mem/slicc/ast/TypeDeclAST.py
@@ -37,17 +37,17 @@ class TypeDeclAST(DeclAST):
         self.field_asts = field_asts
 
     def __repr__(self):
-        return "[TypeDecl: %r]" % (self.type_ast)
+        return f"[TypeDecl: {self.type_ast!r}]"
 
     def files(self, parent=None):
         if "external" in self:
             return set()
 
         if parent:
-            ident = "%s_%s" % (parent, self.type_ast.ident)
+            ident = f"{parent}_{self.type_ast.ident}"
         else:
             ident = self.type_ast.ident
-        return set(("%s.hh" % ident, "%s.cc" % ident))
+        return set((f"{ident}.hh", f"{ident}.cc"))
 
     def generate(self):
         ident = str(self.type_ast)
diff --git a/src/mem/slicc/ast/TypeFieldEnumAST.py b/src/mem/slicc/ast/TypeFieldEnumAST.py
index ea35e081eb..68dd0cd0fa 100644
--- a/src/mem/slicc/ast/TypeFieldEnumAST.py
+++ b/src/mem/slicc/ast/TypeFieldEnumAST.py
@@ -37,7 +37,7 @@ class TypeFieldEnumAST(TypeFieldAST):
         self.pairs_ast = pairs_ast
 
     def __repr__(self):
-        return "[TypeFieldEnum: %r]" % self.field_id
+        return f"[TypeFieldEnum: {self.field_id!r}]"
 
     def generate(self, type, **kwargs):
         if str(type) == "State":
@@ -47,7 +47,7 @@ class TypeFieldEnumAST(TypeFieldAST):
 
         # Add enumeration
         if not type.addEnum(self.field_id, self.pairs_ast.pairs):
-            self.error("Duplicate enumeration: %s:%s" % (type, self.field_id))
+            self.error(f"Duplicate enumeration: {type}:{self.field_id}")
 
         # Fill machine info
         machine = self.symtab.state_machine
diff --git a/src/mem/slicc/ast/TypeFieldStateAST.py b/src/mem/slicc/ast/TypeFieldStateAST.py
index e71b9383c3..b04a708f21 100644
--- a/src/mem/slicc/ast/TypeFieldStateAST.py
+++ b/src/mem/slicc/ast/TypeFieldStateAST.py
@@ -39,7 +39,7 @@ class TypeFieldStateAST(TypeFieldAST):
         self.pairs_ast = pairs_ast
 
     def __repr__(self):
-        return "[TypeFieldState: %r]" % self.field_id
+        return f"[TypeFieldState: {self.field_id!r}]"
 
     def generate(self, type, **kwargs):
         if not str(type) == "State":
@@ -47,7 +47,7 @@ class TypeFieldStateAST(TypeFieldAST):
 
         # Add enumeration
         if not type.addEnum(self.field_id, self.pairs_ast.pairs):
-            self.error("Duplicate enumeration: %s:%s" % (type, self.field_id))
+            self.error(f"Duplicate enumeration: {type}:{self.field_id}")
 
         # Fill machine info
         machine = self.symtab.state_machine
diff --git a/src/mem/slicc/ast/VarExprAST.py b/src/mem/slicc/ast/VarExprAST.py
index a653504f6d..3c4023e8fe 100644
--- a/src/mem/slicc/ast/VarExprAST.py
+++ b/src/mem/slicc/ast/VarExprAST.py
@@ -36,7 +36,7 @@ class VarExprAST(ExprAST):
         self._var = var
 
     def __repr__(self):
-        return "[VarExprAST: %r]" % self._var
+        return f"[VarExprAST: {self._var!r}]"
 
     @property
     def name(self):
diff --git a/src/mem/slicc/ast/WakeupPortStatementAST.py b/src/mem/slicc/ast/WakeupPortStatementAST.py
index 62e3549ceb..31a60e8715 100644
--- a/src/mem/slicc/ast/WakeupPortStatementAST.py
+++ b/src/mem/slicc/ast/WakeupPortStatementAST.py
@@ -43,7 +43,7 @@ class WakeupPortStatementAST(StatementAST):
         self.address = address
 
     def __repr__(self):
-        return "[WakeupPortStatementAst: %r]" % self.in_port
+        return f"[WakeupPortStatementAst: {self.in_port!r}]"
 
     def generate(self, code, return_type):
         self.in_port.assertType("InPort")
diff --git a/src/mem/slicc/main.py b/src/mem/slicc/main.py
index bb0f9cbf16..a3d562220a 100644
--- a/src/mem/slicc/main.py
+++ b/src/mem/slicc/main.py
@@ -122,7 +122,7 @@ def main(args=None):
 
     if opts.print_files:
         for i in sorted(slicc.files()):
-            print("    %s" % i)
+            print(f"    {i}")
     else:
         output("Processing AST...")
         slicc.process()
diff --git a/src/mem/slicc/parser.py b/src/mem/slicc/parser.py
index 2d33cd30b5..155eb07f7a 100644
--- a/src/mem/slicc/parser.py
+++ b/src/mem/slicc/parser.py
@@ -520,7 +520,7 @@ class SLICC(Grammar):
 
     def p_typestr__multi(self, p):
         "typestr : typestr DOUBLE_COLON ident"
-        p[0] = "%s::%s" % (p[1], p[3])
+        p[0] = f"{p[1]}::{p[3]}"
 
     def p_typestr__single(self, p):
         "typestr : ident"
diff --git a/src/mem/slicc/symbols/Action.py b/src/mem/slicc/symbols/Action.py
index c00482f276..c2f7d906ee 100644
--- a/src/mem/slicc/symbols/Action.py
+++ b/src/mem/slicc/symbols/Action.py
@@ -34,7 +34,7 @@ class Action(Symbol):
         self.resources = resources
 
     def __repr__(self):
-        return "[Action: %s]" % self.ident
+        return f"[Action: {self.ident}]"
 
 
 __all__ = ["Action"]
diff --git a/src/mem/slicc/symbols/Event.py b/src/mem/slicc/symbols/Event.py
index 57ab7a76e3..c2fd8d3372 100644
--- a/src/mem/slicc/symbols/Event.py
+++ b/src/mem/slicc/symbols/Event.py
@@ -30,7 +30,7 @@ from slicc.symbols.Symbol import Symbol
 
 class Event(Symbol):
     def __repr__(self):
-        return "[Event: %s]" % self.ident
+        return f"[Event: {self.ident}]"
 
 
 __all__ = ["Event"]
diff --git a/src/mem/slicc/symbols/Func.py b/src/mem/slicc/symbols/Func.py
index 4d254138e1..18de3af41f 100644
--- a/src/mem/slicc/symbols/Func.py
+++ b/src/mem/slicc/symbols/Func.py
@@ -67,11 +67,7 @@ class Func(Symbol):
         elif "return_by_pointer" in self and self.return_type != void_type:
             return_type += "*"
 
-        return "%s %s(%s);" % (
-            return_type,
-            self.c_name,
-            ", ".join(self.param_strings),
-        )
+        return f"{return_type} {self.c_name}({', '.join(self.param_strings)});"
 
     def writeCodeFiles(self, path, includes):
         return
@@ -97,8 +93,7 @@ class Func(Symbol):
                 and str(actual_type["interface"]) != str(expected_type)
             ):
                 expr.error(
-                    "Type mismatch: expected: %s actual: %s"
-                    % (expected_type, actual_type)
+                    f"Type mismatch: expected: {expected_type} actual: {actual_type}"
                 )
             cvec.append(param_code)
             type_vec.append(expected_type)
diff --git a/src/mem/slicc/symbols/RequestType.py b/src/mem/slicc/symbols/RequestType.py
index db822a0b17..e5a6df2e76 100644
--- a/src/mem/slicc/symbols/RequestType.py
+++ b/src/mem/slicc/symbols/RequestType.py
@@ -29,7 +29,7 @@ from slicc.symbols.Symbol import Symbol
 
 class RequestType(Symbol):
     def __repr__(self):
-        return "[RequestType: %s]" % self.ident
+        return f"[RequestType: {self.ident}]"
 
 
 __all__ = ["RequestType"]
diff --git a/src/mem/slicc/symbols/State.py b/src/mem/slicc/symbols/State.py
index 59c7c7d701..e855f1ba0f 100644
--- a/src/mem/slicc/symbols/State.py
+++ b/src/mem/slicc/symbols/State.py
@@ -30,7 +30,7 @@ from slicc.symbols.Symbol import Symbol
 
 class State(Symbol):
     def __repr__(self):
-        return "[State: %s]" % self.ident
+        return f"[State: {self.ident}]"
 
     def isWildcard(self):
         return False
diff --git a/src/mem/slicc/symbols/StateMachine.py b/src/mem/slicc/symbols/StateMachine.py
index b5af9ca8ed..4712064089 100644
--- a/src/mem/slicc/symbols/StateMachine.py
+++ b/src/mem/slicc/symbols/StateMachine.py
@@ -86,7 +86,7 @@ class StateMachine(Symbol):
                     param.ident,
                     location,
                     param.type_ast.type,
-                    "(*m_%s_ptr)" % param.ident,
+                    f"(*m_{param.ident}_ptr)",
                     {},
                     self,
                 )
@@ -96,7 +96,7 @@ class StateMachine(Symbol):
                     param.ident,
                     location,
                     param.type_ast.type,
-                    "m_%s" % param.ident,
+                    f"m_{param.ident}",
                     {},
                     self,
                 )
@@ -127,7 +127,7 @@ class StateMachine(Symbol):
         self.debug_flags.add("RubySlicc")
 
     def __repr__(self):
-        return "[StateMachine: %s]" % self.ident
+        return f"[StateMachine: {self.ident}]"
 
     def addState(self, state):
         assert self.table is None
@@ -143,15 +143,13 @@ class StateMachine(Symbol):
         # Check for duplicate action
         for other in self.actions.values():
             if action.ident == other.ident:
-                action.warning(
-                    "Duplicate action definition: %s" % action.ident
-                )
-                action.error("Duplicate action definition: %s" % action.ident)
+                action.warning(f"Duplicate action definition: {action.ident}")
+                action.error(f"Duplicate action definition: {action.ident}")
             if action.short == other.short:
-                other.warning("Duplicate action shorthand: %s" % other.ident)
-                other.warning("    shorthand = %s" % other.short)
-                action.warning("Duplicate action shorthand: %s" % action.ident)
-                action.error("    shorthand = %s" % action.short)
+                other.warning(f"Duplicate action shorthand: {other.ident}")
+                other.warning(f"    shorthand = {other.short}")
+                action.warning(f"Duplicate action shorthand: {action.ident}")
+                action.error(f"    shorthand = {action.short}")
 
         self.actions[action.ident] = action
 
@@ -179,9 +177,9 @@ class StateMachine(Symbol):
         self.objects.append(obj)
 
     def addType(self, type):
-        type_ident = "%s" % type.c_ident
+        type_ident = f"{type.c_ident}"
 
-        if type_ident == "%s_TBE" % self.ident:
+        if type_ident == f"{self.ident}_TBE":
             if self.TBEType != None:
                 self.error(
                     "Multiple Transaction Buffer types in a single machine."
@@ -216,14 +214,14 @@ class StateMachine(Symbol):
 
             index = (trans.state, trans.event)
             if index in table:
-                table[index].warning("Duplicate transition: %s" % table[index])
-                trans.error("Duplicate transition: %s" % trans)
+                table[index].warning(f"Duplicate transition: {table[index]}")
+                trans.error(f"Duplicate transition: {trans}")
             table[index] = trans
 
         # Look at all actions to make sure we used them all
         for action in self.actions.values():
             if not action.used:
-                error_msg = "Unused action: %s" % action.ident
+                error_msg = f"Unused action: {action.ident}"
                 if "desc" in action:
                     error_msg += ", " + action.desc
                 action.warning(error_msg)
@@ -235,7 +233,7 @@ class StateMachine(Symbol):
         port_to_buf_map = {}
         in_msg_bufs = {}
         for port in self.in_ports:
-            buf_name = "m_%s_ptr" % port.pairs["buffer_expr"].name
+            buf_name = f"m_{port.pairs['buffer_expr'].name}_ptr"
             msg_bufs.append(buf_name)
             port_to_buf_map[port] = msg_bufs.index(buf_name)
             if buf_name not in in_msg_bufs:
@@ -255,8 +253,8 @@ class StateMachine(Symbol):
         code = self.symtab.codeFormatter()
         ident = self.ident
 
-        py_ident = "%s_Controller" % ident
-        c_ident = "%s_Controller" % self.ident
+        py_ident = f"{ident}_Controller"
+        c_ident = f"{self.ident}_Controller"
 
         code(
             """
@@ -292,13 +290,13 @@ class $py_ident(RubyController):
                 )
 
         code.dedent()
-        code.write(path, "%s.py" % py_ident)
+        code.write(path, f"{py_ident}.py")
 
     def printControllerHH(self, path):
         """Output the method declarations for the class declaration"""
         code = self.symtab.codeFormatter()
         ident = self.ident
-        c_ident = "%s_Controller" % self.ident
+        c_ident = f"{self.ident}_Controller"
 
         code(
             """
@@ -529,14 +527,14 @@ void unset_tbe(${{self.TBEType.c_ident}}*& m_tbe_ptr);
 """
         )
 
-        code.write(path, "%s.hh" % c_ident)
+        code.write(path, f"{c_ident}.hh")
 
     def printControllerCC(self, path, includes):
         """Output the actions for performing the actions"""
 
         code = self.symtab.codeFormatter()
         ident = self.ident
-        c_ident = "%s_Controller" % self.ident
+        c_ident = f"{self.ident}_Controller"
 
         # Unfortunately, clang compilers will throw a "call to function ...
         # that is neither visible in the template definition nor found by
@@ -701,7 +699,7 @@ $c_ident::initNetQueues()
         vnet_dir_set = set()
 
         for var in self.config_parameters:
-            vid = "m_%s_ptr" % var.ident
+            vid = f"m_{var.ident}_ptr"
             if "network" in var:
                 vtype = var.type_ast.type
                 code("assert($vid != NULL);")
@@ -742,7 +740,7 @@ $c_ident::init()
 
         for var in self.objects:
             vtype = var.type
-            vid = "m_%s_ptr" % var.ident
+            vid = f"m_{var.ident}_ptr"
             if "network" not in var:
                 # Not a network port object
                 if "primitive" in vtype:
@@ -752,7 +750,7 @@ $c_ident::init()
                 else:
                     # Normal Object
                     th = var.get("template", "")
-                    expr = "%s  = new %s%s" % (vid, vtype.c_ident, th)
+                    expr = f"{vid}  = new {vtype.c_ident}{th}"
                     args = ""
                     if "non_obj" not in vtype and not vtype.isEnumeration:
                         args = var.get("constructor", "")
@@ -763,7 +761,7 @@ $c_ident::init()
                     if "default" in var:
                         code('*$vid = ${{var["default"]}}; // Object default')
                     elif "default" in vtype:
-                        comment = "Type %s default" % vtype.ident
+                        comment = f"Type {vtype.ident} default"
                         code('*$vid = ${{vtype["default"]}}; // $comment')
 
         # Set the prefetchers
@@ -787,8 +785,8 @@ $c_ident::init()
 
             # Only possible if it is not a 'z' case
             if not stall:
-                state = "%s_State_%s" % (self.ident, trans.state.ident)
-                event = "%s_Event_%s" % (self.ident, trans.event.ident)
+                state = f"{self.ident}_State_{trans.state.ident}"
+                event = f"{self.ident}_Event_{trans.event.ident}"
                 code("possibleTransition($state, $event);")
 
         code.dedent()
@@ -819,19 +817,19 @@ $c_ident::init()
         for param in self.config_parameters:
             if param.ident == "sequencer":
                 assert param.pointer
-                seq_ident = "m_%s_ptr" % param.ident
+                seq_ident = f"m_{param.ident}_ptr"
 
         dma_seq_ident = "NULL"
         for param in self.config_parameters:
             if param.ident == "dma_sequencer":
                 assert param.pointer
-                dma_seq_ident = "m_%s_ptr" % param.ident
+                dma_seq_ident = f"m_{param.ident}_ptr"
 
         coal_ident = "NULL"
         for param in self.config_parameters:
             if param.ident == "coalescer":
                 assert param.pointer
-                coal_ident = "m_%s_ptr" % param.ident
+                coal_ident = f"m_{param.ident}_ptr"
 
         if seq_ident != "NULL":
             code(
@@ -1276,13 +1274,13 @@ $c_ident::functionalWriteBuffers(PacketPtr& pkt)
         for var in self.objects:
             vtype = var.type
             if vtype.isBuffer:
-                vid = "m_%s_ptr" % var.ident
+                vid = f"m_{var.ident}_ptr"
                 code("num_functional_writes += $vid->functionalWrite(pkt);")
 
         for var in self.config_parameters:
             vtype = var.type_ast.type
             if vtype.isBuffer:
-                vid = "m_%s_ptr" % var.ident
+                vid = f"m_{var.ident}_ptr"
                 code("num_functional_writes += $vid->functionalWrite(pkt);")
 
         code(
@@ -1303,13 +1301,13 @@ $c_ident::functionalReadBuffers(PacketPtr& pkt)
         for var in self.objects:
             vtype = var.type
             if vtype.isBuffer:
-                vid = "m_%s_ptr" % var.ident
+                vid = f"m_{var.ident}_ptr"
                 code("if ($vid->functionalRead(pkt)) return true;")
 
         for var in self.config_parameters:
             vtype = var.type_ast.type
             if vtype.isBuffer:
-                vid = "m_%s_ptr" % var.ident
+                vid = f"m_{var.ident}_ptr"
                 code("if ($vid->functionalRead(pkt)) return true;")
 
         code(
@@ -1326,13 +1324,13 @@ $c_ident::functionalReadBuffers(PacketPtr& pkt, WriteMask &mask)
         for var in self.objects:
             vtype = var.type
             if vtype.isBuffer:
-                vid = "m_%s_ptr" % var.ident
+                vid = f"m_{var.ident}_ptr"
                 code("if ($vid->functionalRead(pkt, mask)) read = true;")
 
         for var in self.config_parameters:
             vtype = var.type_ast.type
             if vtype.isBuffer:
-                vid = "m_%s_ptr" % var.ident
+                vid = f"m_{var.ident}_ptr"
                 code("if ($vid->functionalRead(pkt, mask)) read = true;")
 
         code(
@@ -1345,7 +1343,7 @@ $c_ident::functionalReadBuffers(PacketPtr& pkt, WriteMask &mask)
 """
         )
 
-        code.write(path, "%s.cc" % c_ident)
+        code.write(path, f"{c_ident}.cc")
 
     def printCWakeup(self, path, includes):
         """Output the wakeup loop for the events"""
@@ -1496,7 +1494,7 @@ ${ident}_Controller::wakeup()
 """
         )
 
-        code.write(path, "%s_Wakeup.cc" % self.ident)
+        code.write(path, f"{self.ident}_Wakeup.cc")
 
     def printCSwitch(self, path):
         """Output switch statement for transition table"""
@@ -1720,13 +1718,10 @@ ${ident}_Controller::doTransitionWorker(${ident}_Event event,
             case_sorter = []
             res = trans.resources
             for key, val in res.items():
-                val = """
-if (!%s.areNSlotsAvailable(%s, clockEdge()))
+                val = f"""
+if (!{key.code}.areNSlotsAvailable({val}, clockEdge()))
     return TransitionResult_ResourceStall;
-""" % (
-                    key.code,
-                    val,
-                )
+"""
                 case_sorter.append(val)
 
             # Check all of the request_types for resource constraints
@@ -1811,7 +1806,7 @@ if (!checkResourceAvailable(%s_RequestType_%s, addr)) {
 } // namespace gem5
 """
         )
-        code.write(path, "%s_Transitions.cc" % self.ident)
+        code.write(path, f"{self.ident}_Transitions.cc")
 
     # **************************
     # ******* HTML Files *******
@@ -1838,19 +1833,19 @@ if (!checkResourceAvailable(%s_RequestType_%s, addr)) {
 
         # Generate action descriptions
         for action in self.actions.values():
-            name = "%s_action_%s.html" % (self.ident, action.ident)
+            name = f"{self.ident}_action_{action.ident}.html"
             code = html.createSymbol(action, "Action")
             code.write(path, name)
 
         # Generate state descriptions
         for state in self.states.values():
-            name = "%s_State_%s.html" % (self.ident, state.ident)
+            name = f"{self.ident}_State_{state.ident}.html"
             code = html.createSymbol(state, "State")
             code.write(path, name)
 
         # Generate event descriptions
         for event in self.events.values():
-            name = "%s_Event_%s.html" % (self.ident, event.ident)
+            name = f"{self.ident}_Event_{event.ident}.html"
             code = html.createSymbol(event, "Event")
             code.write(path, name)
 
@@ -1891,7 +1886,7 @@ if (!checkResourceAvailable(%s_RequestType_%s, addr)) {
         )
 
         for event in self.events.values():
-            href = "%s_Event_%s.html" % (self.ident, event.ident)
+            href = f"{self.ident}_Event_{event.ident}.html"
             ref = self.frameRef(href, "Status", href, "1", event.short)
             code("<TH bgcolor=white>$ref</TH>")
 
@@ -1904,8 +1899,8 @@ if (!checkResourceAvailable(%s_RequestType_%s, addr)) {
             else:
                 color = "white"
 
-            click = "%s_table_%s.html" % (self.ident, state.ident)
-            over = "%s_State_%s.html" % (self.ident, state.ident)
+            click = f"{self.ident}_table_{state.ident}.html"
+            over = f"{self.ident}_State_{state.ident}.html"
             text = html.formatShorthand(state.short)
             ref = self.frameRef(click, "Table", over, "1", state.short)
             code(
@@ -1955,7 +1950,7 @@ if (!checkResourceAvailable(%s_RequestType_%s, addr)) {
 
                 code("<TD bgcolor=$color>")
                 for action in trans.actions:
-                    href = "%s_action_%s.html" % (self.ident, action.ident)
+                    href = f"{self.ident}_action_{action.ident}.html"
                     ref = self.frameRef(
                         href, "Status", href, "1", action.short
                     )
@@ -1963,8 +1958,8 @@ if (!checkResourceAvailable(%s_RequestType_%s, addr)) {
                 if next != state:
                     if trans.actions:
                         code("/")
-                    click = "%s_table_%s.html" % (self.ident, next.ident)
-                    over = "%s_State_%s.html" % (self.ident, next.ident)
+                    click = f"{self.ident}_table_{next.ident}.html"
+                    over = f"{self.ident}_State_{next.ident}.html"
                     ref = self.frameRef(click, "Table", over, "1", next.short)
                     code("$ref")
                 code("</TD>")
@@ -1975,8 +1970,8 @@ if (!checkResourceAvailable(%s_RequestType_%s, addr)) {
             else:
                 color = "white"
 
-            click = "%s_table_%s.html" % (self.ident, state.ident)
-            over = "%s_State_%s.html" % (self.ident, state.ident)
+            click = f"{self.ident}_table_{state.ident}.html"
+            over = f"{self.ident}_State_{state.ident}.html"
             ref = self.frameRef(click, "Table", over, "1", state.short)
             code(
                 """
@@ -1993,7 +1988,7 @@ if (!checkResourceAvailable(%s_RequestType_%s, addr)) {
         )
 
         for event in self.events.values():
-            href = "%s_Event_%s.html" % (self.ident, event.ident)
+            href = f"{self.ident}_Event_{event.ident}.html"
             ref = self.frameRef(href, "Status", href, "1", event.short)
             code("<TH bgcolor=white>$ref</TH>")
         code(
@@ -2005,9 +2000,9 @@ if (!checkResourceAvailable(%s_RequestType_%s, addr)) {
         )
 
         if active_state:
-            name = "%s_table_%s.html" % (self.ident, active_state.ident)
+            name = f"{self.ident}_table_{active_state.ident}.html"
         else:
-            name = "%s_table.html" % self.ident
+            name = f"{self.ident}_table.html"
         code.write(path, name)
 
 
diff --git a/src/mem/slicc/symbols/Symbol.py b/src/mem/slicc/symbols/Symbol.py
index cd8f6b9a29..74863724d2 100644
--- a/src/mem/slicc/symbols/Symbol.py
+++ b/src/mem/slicc/symbols/Symbol.py
@@ -64,7 +64,7 @@ class Symbol(PairContainer):
         self.used = False
 
     def __repr__(self):
-        return "[Symbol: %s]" % self.ident
+        return f"[Symbol: {self.ident}]"
 
     def __str__(self):
         return str(self.ident)
diff --git a/src/mem/slicc/symbols/SymbolTable.py b/src/mem/slicc/symbols/SymbolTable.py
index 4b06be5c4a..f5dfec1d68 100644
--- a/src/mem/slicc/symbols/SymbolTable.py
+++ b/src/mem/slicc/symbols/SymbolTable.py
@@ -38,7 +38,7 @@ def makeDir(path):
     ensure that it is a directory"""
     if os.path.exists(path):
         if not os.path.isdir(path):
-            raise AttributeError("%s exists but is not directory" % path)
+            raise AttributeError(f"{path} exists but is not directory")
     else:
         os.mkdir(path)
 
@@ -124,7 +124,7 @@ class SymbolTable(object):
     def registerGlobalSym(self, ident, symbol):
         # Check for redeclaration (global frame only)
         if ident in self.sym_map_vec[0]:
-            symbol.error("Symbol '%s' redeclared in global scope." % ident)
+            symbol.error(f"Symbol '{ident}' redeclared in global scope.")
 
         self.sym_map_vec[0][ident] = symbol
 
@@ -155,7 +155,7 @@ class SymbolTable(object):
 
         machines = list(self.getAllType(StateMachine))
         if len(machines) > 1:
-            name = "%s_table.html" % machines[0].ident
+            name = f"{machines[0].ident}_table.html"
         else:
             name = "empty.html"
 
diff --git a/src/mem/slicc/symbols/Transition.py b/src/mem/slicc/symbols/Transition.py
index 478f28c74c..b517cf4d44 100644
--- a/src/mem/slicc/symbols/Transition.py
+++ b/src/mem/slicc/symbols/Transition.py
@@ -41,7 +41,7 @@ class Transition(Symbol):
         request_types,
         location,
     ):
-        ident = "%s|%s" % (state, event)
+        ident = f"{state}|{event}"
         super().__init__(table, ident, location)
 
         self.state = machine.states[state]
diff --git a/src/mem/slicc/symbols/Type.py b/src/mem/slicc/symbols/Type.py
index c51902667b..7010461e0c 100644
--- a/src/mem/slicc/symbols/Type.py
+++ b/src/mem/slicc/symbols/Type.py
@@ -73,7 +73,7 @@ class Type(Symbol):
                     self.c_ident = self["external_name"]
             else:
                 # Append with machine name
-                self.c_ident = "%s_%s" % (machine, ident)
+                self.c_ident = f"{machine}_{ident}"
 
         self.pairs.setdefault("desc", "No description avaliable")
 
@@ -157,7 +157,7 @@ class Type(Symbol):
             ident,
             self.location,
             type,
-            "m_%s" % ident,
+            f"m_{ident}",
             pairs,
             None,
             init_code,
@@ -195,7 +195,7 @@ class Type(Symbol):
 
         # Add default
         if "default" not in self:
-            self["default"] = "%s_NUM" % self.c_ident
+            self["default"] = f"{self.c_ident}_NUM"
 
         return True
 
@@ -240,7 +240,7 @@ class Type(Symbol):
         parent = ""
         if "interface" in self:
             code('#include "mem/ruby/protocol/$0.hh"', self["interface"])
-            parent = " :  public %s" % self["interface"]
+            parent = f" :  public {self['interface']}"
 
         code(
             """
@@ -294,7 +294,7 @@ $klass ${{self.c_ident}}$parent
         # ******** Full init constructor ********
         if not self.isGlobal:
             params = [
-                "const %s& local_%s" % (dm.real_c_type, dm.ident)
+                f"const {dm.real_c_type}& local_{dm.ident}"
                 for dm in self.data_members.values()
             ]
             params = ", ".join(params)
@@ -407,7 +407,7 @@ set${{dm.ident}}(const ${{dm.real_c_type}}& local_${{dm.ident}})
                 if dm.init_code:
                     # only global structure can have init value here
                     assert self.isGlobal
-                    init = " = %s" % (dm.init_code)
+                    init = f" = {dm.init_code}"
 
                 if "desc" in dm:
                     code('/** ${{dm["desc"]}} */')
@@ -440,7 +440,7 @@ operator<<(::std::ostream& out, const ${{self.c_ident}}& obj)
 """
         )
 
-        code.write(path, "%s.hh" % self.c_ident)
+        code.write(path, f"{self.c_ident}.hh")
 
     def printTypeCC(self, path):
         code = self.symtab.codeFormatter()
@@ -498,7 +498,7 @@ out << "${{dm.ident}} = " << printAddress(m_${{dm.ident}}) << " ";"""
 """
         )
 
-        code.write(path, "%s.cc" % self.c_ident)
+        code.write(path, f"{self.c_ident}.cc")
 
     def printEnumHH(self, path):
         code = self.symtab.codeFormatter()
@@ -552,7 +552,7 @@ enum ${{self.c_ident}} {
         for i, (ident, enum) in enumerate(self.enums.items()):
             desc = enum.get("desc", "No description avaliable")
             if i == 0:
-                init = " = %s_FIRST" % self.c_ident
+                init = f" = {self.c_ident}_FIRST"
             else:
                 init = ""
             code("${{self.c_ident}}_${{enum.ident}}$init, /**< $desc */")
@@ -640,7 +640,7 @@ struct hash<gem5::ruby::MachineType>
 """
         )
 
-        code.write(path, "%s.hh" % self.c_ident)
+        code.write(path, f"{self.c_ident}.hh")
 
     def printEnumCC(self, path):
         code = self.symtab.codeFormatter()
@@ -932,7 +932,7 @@ get${{enum.ident}}MachineID(NodeID RubyNode)
         )
 
         # Write the file
-        code.write(path, "%s.cc" % self.c_ident)
+        code.write(path, f"{self.c_ident}.cc")
 
 
 __all__ = ["Type"]
diff --git a/src/mem/slicc/symbols/Var.py b/src/mem/slicc/symbols/Var.py
index 3b8a538a23..cafdb17ff3 100644
--- a/src/mem/slicc/symbols/Var.py
+++ b/src/mem/slicc/symbols/Var.py
@@ -39,7 +39,7 @@ class Var(Symbol):
         self.code = code
 
     def __repr__(self):
-        return "[Var id: %s]" % (self.ident)
+        return f"[Var id: {self.ident}]"
 
     def writeCodeFiles(self, path, includes):
         pass
diff --git a/src/mem/slicc/util.py b/src/mem/slicc/util.py
index 07b5ba6ab2..3bb4131a01 100644
--- a/src/mem/slicc/util.py
+++ b/src/mem/slicc/util.py
@@ -51,11 +51,11 @@ class Location(object):
     def __init__(self, filename, lineno, no_warning=False):
         if not isinstance(filename, str):
             raise AttributeError(
-                "filename must be a string, found {}".format(type(filename))
+                f"filename must be a string, found {type(filename)}"
             )
         if not isinstance(lineno, int):
             raise AttributeError(
-                "filename must be an integer, found {}".format(type(lineno))
+                f"filename must be an integer, found {type(lineno)}"
             )
         self.filename = filename
         self.lineno = lineno
@@ -70,13 +70,13 @@ class Location(object):
         if args:
             message = message % args
         # raise Exception, "%s: Warning: %s" % (self, message)
-        print("%s: Warning: %s" % (self, message), file=sys.stderr)
+        print(f"{self}: Warning: {message}", file=sys.stderr)
 
     def error(self, message, *args):
         if args:
             message = message % args
-        raise Exception("{}: Error: {}".format(self, message))
-        sys.exit("\n%s: Error: %s" % (self, message))
+        raise Exception(f"{self}: Error: {message}")
+        sys.exit(f"\n{self}: Error: {message}")
 
 
 __all__ = ["PairContainer", "Location"]
diff --git a/src/python/gem5/components/boards/arm_board.py b/src/python/gem5/components/boards/arm_board.py
index 7936c0c25e..10e2c0eb82 100644
--- a/src/python/gem5/components/boards/arm_board.py
+++ b/src/python/gem5/components/boards/arm_board.py
@@ -387,7 +387,7 @@ class ArmBoard(ArmSystem, AbstractBoard, KernelDiskWorkload):
             "norandmaps",
             "root={root_value}",
             "rw",
-            "mem=%s" % self.get_memory().get_size(),
+            f"mem={self.get_memory().get_size()}",
         ]
 
     @overrides(SimObject)
diff --git a/src/python/gem5/components/boards/experimental/lupv_board.py b/src/python/gem5/components/boards/experimental/lupv_board.py
index 5624712ca8..ad130b7273 100644
--- a/src/python/gem5/components/boards/experimental/lupv_board.py
+++ b/src/python/gem5/components/boards/experimental/lupv_board.py
@@ -293,7 +293,7 @@ class LupvBoard(AbstractSystemBoard, KernelDiskWorkload):
         root.appendCompatible(["luplab,lupv"])
 
         for mem_range in self.mem_ranges:
-            node = FdtNode("memory@%x" % int(mem_range.start))
+            node = FdtNode(f"memory@{int(mem_range.start):x}")
             node.append(FdtPropertyStrings("device_type", ["memory"]))
             node.append(
                 FdtPropertyWords(
diff --git a/src/python/gem5/components/boards/riscv_board.py b/src/python/gem5/components/boards/riscv_board.py
index 15ec57af69..25f1fac562 100644
--- a/src/python/gem5/components/boards/riscv_board.py
+++ b/src/python/gem5/components/boards/riscv_board.py
@@ -248,7 +248,7 @@ class RiscvBoard(AbstractSystemBoard, KernelDiskWorkload):
         root.appendCompatible(["riscv-virtio"])
 
         for mem_range in self.mem_ranges:
-            node = FdtNode("memory@%x" % int(mem_range.start))
+            node = FdtNode(f"memory@{int(mem_range.start):x}")
             node.append(FdtPropertyStrings("device_type", ["memory"]))
             node.append(
                 FdtPropertyWords(
diff --git a/src/python/gem5/prebuilt/riscvmatched/riscvmatched_board.py b/src/python/gem5/prebuilt/riscvmatched/riscvmatched_board.py
index 4148c0a061..ae483cc401 100644
--- a/src/python/gem5/prebuilt/riscvmatched/riscvmatched_board.py
+++ b/src/python/gem5/prebuilt/riscvmatched/riscvmatched_board.py
@@ -321,7 +321,7 @@ class RISCVMatchedBoard(
         root.appendCompatible(["riscv-virtio"])
 
         for mem_range in self.mem_ranges:
-            node = FdtNode("memory@%x" % int(mem_range.start))
+            node = FdtNode(f"memory@{int(mem_range.start):x}")
             node.append(FdtPropertyStrings("device_type", ["memory"]))
             node.append(
                 FdtPropertyWords(
diff --git a/src/python/gem5/resources/downloader.py b/src/python/gem5/resources/downloader.py
index 24b8970cc0..16b0147b74 100644
--- a/src/python/gem5/resources/downloader.py
+++ b/src/python/gem5/resources/downloader.py
@@ -108,7 +108,7 @@ def _get_resources_json_at_path(path: str, use_caching: bool = True) -> Dict:
     # Note the timeout is 120 so the `_download` function is given time to run
     # its Truncated Exponential Backoff algorithm
     # (maximum of roughly 1 minute). Typically this code will run quickly.
-    with FileLock("{}.lock".format(download_path), timeout=120):
+    with FileLock(f"{download_path}.lock", timeout=120):
 
         # The resources.json file can change at any time, but to avoid
         # excessive retrieval we cache a version locally and use it for up to
@@ -212,9 +212,7 @@ def _get_resources(
             # after a check that the name is unique.
             if resource["name"] in to_return.keys():
                 raise Exception(
-                    "Error: Duplicate resource with name '{}'.".format(
-                        resource["name"]
-                    )
+                    f"Error: Duplicate resource with name '{resource['name']}'."
                 )
             to_return[resource["name"]] = resource
         elif resource["type"] == "group":
@@ -229,9 +227,7 @@ def _get_resources(
                 # the resources.json file. The resources names need to be
                 # unique keyes.
                 raise Exception(
-                    "Error: Duplicate resources with names: {}.".format(
-                        str(intersection)
-                    )
+                    f"Error: Duplicate resources with names: {str(intersection)}."
                 )
             to_return.update(new_map)
 
@@ -390,9 +386,7 @@ def get_resources_json_obj(resource_name: str) -> Dict:
 
     if resource_name not in resource_map:
         raise Exception(
-            "Error: Resource with name '{}' does not exist".format(
-                resource_name
-            )
+            f"Error: Resource with name '{resource_name}' does not exist"
         )
 
     return resource_map[resource_name]
@@ -435,7 +429,7 @@ def get_resource(
     # same resources at once. The timeout here is somewhat arbitarily put at 15
     # minutes.Most resources should be downloaded and decompressed in this
     # timeframe, even on the most constrained of systems.
-    with FileLock("{}.lock".format(to_path), timeout=900):
+    with FileLock(f"{to_path}.lock", timeout=900):
 
         resource_json = get_resources_json_obj(resource_name)
 
@@ -506,13 +500,11 @@ def get_resource(
         url = resource_json["url"].format(url_base=_get_url_base())
 
         _download(url=url, download_to=download_dest)
-        print("Finished downloading resource '{}'.".format(resource_name))
+        print(f"Finished downloading resource '{resource_name}'.")
 
         if run_unzip:
             print(
-                "Decompressing resource '{}' ('{}')...".format(
-                    resource_name, download_dest
-                )
+                f"Decompressing resource '{resource_name}' ('{download_dest}')..."
             )
             unzip_to = download_dest[: -len(zip_extension)]
             with gzip.open(download_dest, "rb") as f:
@@ -520,9 +512,7 @@ def get_resource(
                     shutil.copyfileobj(f, o)
             os.remove(download_dest)
             download_dest = unzip_to
-            print(
-                "Finished decompressing resource '{}'.".format(resource_name)
-            )
+            print(f"Finished decompressing resource '{resource_name}'.")
 
         if run_tar_extract:
             print(
diff --git a/src/python/gem5/simulate/exit_event.py b/src/python/gem5/simulate/exit_event.py
index 1e14fdd11a..605fb6e556 100644
--- a/src/python/gem5/simulate/exit_event.py
+++ b/src/python/gem5/simulate/exit_event.py
@@ -97,5 +97,5 @@ class ExitEvent(Enum):
             # This is for the gups generator exit event
             return ExitEvent.EXIT
         raise NotImplementedError(
-            "Exit event '{}' not implemented".format(exit_string)
+            f"Exit event '{exit_string}' not implemented"
         )
diff --git a/src/python/gem5/utils/filelock.py b/src/python/gem5/utils/filelock.py
index a6798e9f53..6fb4e3e1d1 100644
--- a/src/python/gem5/utils/filelock.py
+++ b/src/python/gem5/utils/filelock.py
@@ -47,7 +47,7 @@ class FileLock(object):
                 "If timeout is not None, then delay must not be None."
             )
         self.is_locked = False
-        self.lockfile = os.path.join(os.getcwd(), "%s.lock" % file_name)
+        self.lockfile = os.path.join(os.getcwd(), f"{file_name}.lock")
         self.file_name = file_name
         self.timeout = timeout
         self.delay = delay
@@ -83,7 +83,7 @@ class FileLock(object):
                     )
                 if (time.time() - start_time) >= self.timeout:
                     raise FileLockException(
-                        "Timeout occured. {}".format(solution_message)
+                        f"Timeout occured. {solution_message}"
                     )
                 time.sleep(self.delay)
 
diff --git a/src/python/gem5/utils/multiprocessing/context.py b/src/python/gem5/utils/multiprocessing/context.py
index 2108bc624c..87917d1bfb 100644
--- a/src/python/gem5/utils/multiprocessing/context.py
+++ b/src/python/gem5/utils/multiprocessing/context.py
@@ -65,7 +65,7 @@ class gem5Context(context.BaseContext):
         try:
             ctx = _concrete_contexts[method]
         except KeyError:
-            raise ValueError("cannot find context for %r" % method) from None
+            raise ValueError(f"cannot find context for {method!r}") from None
         ctx._check_available()
         return ctx
 
diff --git a/src/python/gem5/utils/requires.py b/src/python/gem5/utils/requires.py
index 30a8ef4a8b..9d271aafa2 100644
--- a/src/python/gem5/utils/requires.py
+++ b/src/python/gem5/utils/requires.py
@@ -47,7 +47,7 @@ def _get_exception_str(msg: str):
         # Otherwise we assume the `requires` is being called by a class, in
         # which case we label the exception message with the class name.
         name = inspect.stack()[2].frame.f_locals["self"].__class__.__name__
-    return "[{}] {}".format(name, msg)
+    return f"[{name}] {msg}"
 
 
 def requires(
diff --git a/src/python/importer.py b/src/python/importer.py
index 3d3ee7c068..d3bdd593ef 100644
--- a/src/python/importer.py
+++ b/src/python/importer.py
@@ -50,7 +50,7 @@ class CodeImporter(object):
 
     def add_module(self, abspath, modpath, code):
         if modpath in self.modules:
-            raise AttributeError("%s already found in importer" % modpath)
+            raise AttributeError(f"{modpath} already found in importer")
 
         self.modules[modpath] = (abspath, code)
 
diff --git a/src/python/m5/SimObject.py b/src/python/m5/SimObject.py
index 354a8288cd..08105d8833 100644
--- a/src/python/m5/SimObject.py
+++ b/src/python/m5/SimObject.py
@@ -152,7 +152,7 @@ class MetaSimObject(type):
     # and only allow "private" attributes to be passed to the base
     # __new__ (starting with underscore).
     def __new__(mcls, name, bases, dict):
-        assert name not in allClasses, "SimObject %s already present" % name
+        assert name not in allClasses, f"SimObject {name} already present"
 
         # Copy "private" attributes, functions, and classes to the
         # official dict.  Everything else goes in _init_dict to be
@@ -252,7 +252,7 @@ class MetaSimObject(type):
             if "cxx_class" not in cls._value_dict:
                 cls._value_dict["cxx_class"] = cls._value_dict["type"]
 
-            cls._value_dict["cxx_type"] = "%s *" % cls._value_dict["cxx_class"]
+            cls._value_dict["cxx_type"] = f"{cls._value_dict['cxx_class']} *"
 
             if "cxx_header" not in cls._value_dict:
                 global noCxxHeader
@@ -295,8 +295,7 @@ class MetaSimObject(type):
     def _set_keyword(cls, keyword, val, kwtype):
         if not isinstance(val, kwtype):
             raise TypeError(
-                "keyword %s has bad type %s (expecting %s)"
-                % (keyword, type(val), kwtype)
+                f"keyword {keyword} has bad type {type(val)} (expecting {kwtype})"
             )
         if isinstance(val, FunctionType):
             val = classmethod(val)
@@ -316,11 +315,8 @@ class MetaSimObject(type):
             hr_value = value
             value = param.convert(value)
         except Exception as e:
-            msg = "%s\nError setting param %s.%s to %s\n" % (
-                e,
-                cls.__name__,
-                name,
-                value,
+            msg = (
+                f"{e}\nError setting param {cls.__name__}.{name} to {value}\n"
             )
             e.args = (msg,)
             raise
@@ -372,9 +368,7 @@ class MetaSimObject(type):
         for k, v in cls._value_dict.items():
             if v == value:
                 return k, v
-        raise RuntimeError(
-            "Cannot find parameter {} in parameter list".format(value)
-        )
+        raise RuntimeError(f"Cannot find parameter {value} in parameter list")
 
     # Set attribute (called on foo.attr = value when foo is an
     # instance of class cls).
@@ -411,9 +405,7 @@ class MetaSimObject(type):
             return
 
         # no valid assignment... raise exception
-        raise AttributeError(
-            "Class %s has no parameter '%s'" % (cls.__name__, attr)
-        )
+        raise AttributeError(f"Class {cls.__name__} has no parameter '{attr}'")
 
     def __getattr__(cls, attr):
         if attr == "cxx_class_path":
@@ -438,7 +430,7 @@ class MetaSimObject(type):
             return getattr(cls.getCCClass(), attr)
         except AttributeError:
             raise AttributeError(
-                "object '%s' has no attribute '%s'" % (cls.__name__, attr)
+                f"object '{cls.__name__}' has no attribute '{attr}'"
             )
 
     def __str__(cls):
@@ -672,10 +664,10 @@ class SimObject(object, metaclass=MetaSimObject):
                     ex_str = values.example_str()
                     ptype = None
                     if isinstance(values, VectorParamDesc):
-                        type_str = "Vector_%s" % values.ptype_str
+                        type_str = f"Vector_{values.ptype_str}"
                         ptype = values
                     else:
-                        type_str = "%s" % values.ptype_str
+                        type_str = f"{values.ptype_str}"
                         ptype = values.ptype
 
                     if (
@@ -842,9 +834,8 @@ class SimObject(object, metaclass=MetaSimObject):
         if self._ccObject and hasattr(self._ccObject, attr):
             return getattr(self._ccObject, attr)
 
-        err_string = "object '%s' has no attribute '%s'" % (
-            self.__class__.__name__,
-            attr,
+        err_string = (
+            f"object '{self.__class__.__name__}' has no attribute '{attr}'"
         )
 
         if not self._ccObject:
@@ -915,7 +906,7 @@ class SimObject(object, metaclass=MetaSimObject):
 
         # no valid assignment... raise exception
         raise AttributeError(
-            "Class %s has no parameter %s" % (self.__class__.__name__, attr)
+            f"Class {self.__class__.__name__} has no parameter {attr}"
         )
 
     # this hack allows tacking a '[0]' onto parameters that may or may
@@ -923,7 +914,7 @@ class SimObject(object, metaclass=MetaSimObject):
     def __getitem__(self, key):
         if key == 0:
             return self
-        raise IndexError("Non-zero index '%s' to SimObject" % key)
+        raise IndexError(f"Non-zero index '{key}' to SimObject")
 
     # this hack allows us to iterate over a SimObject that may
     # not be a vector, so we can call a loop over it and get just one
@@ -1000,7 +991,7 @@ class SimObject(object, metaclass=MetaSimObject):
 
     def path(self):
         if not self._parent:
-            return "<orphan %s>" % self.__class__
+            return f"<orphan {self.__class__}>"
         elif isinstance(self._parent, MetaSimObject):
             return str(self.__class__)
 
@@ -1096,8 +1087,7 @@ class SimObject(object, metaclass=MetaSimObject):
                     value = value.unproxy(self)
                 except:
                     print(
-                        "Error in unproxying param '%s' of %s"
-                        % (param, self.path())
+                        f"Error in unproxying param '{param}' of {self.path()}"
                     )
                     raise
                 setattr(self, param, value)
@@ -1117,7 +1107,7 @@ class SimObject(object, metaclass=MetaSimObject):
         instanceDict[self.path()] = self
 
         if hasattr(self, "type"):
-            print("type=%s" % self.type, file=ini_file)
+            print(f"type={self.type}", file=ini_file)
 
         if len(self._children.keys()):
             print(
@@ -1133,14 +1123,14 @@ class SimObject(object, metaclass=MetaSimObject):
             value = self._values.get(param)
             if value != None:
                 print(
-                    "%s=%s" % (param, self._values[param].ini_str()),
+                    f"{param}={self._values[param].ini_str()}",
                     file=ini_file,
                 )
 
         for port_name in sorted(self._ports.keys()):
             port = self._port_refs.get(port_name, None)
             if port != None:
-                print("%s=%s" % (port_name, port.ini_str()), file=ini_file)
+                print(f"{port_name}={port.ini_str()}", file=ini_file)
 
         print(file=ini_file)  # blank line between objects
 
@@ -1186,7 +1176,7 @@ class SimObject(object, metaclass=MetaSimObject):
         # Ensure that m5.internal.params is available.
         import m5.internal.params
 
-        cc_params_struct = getattr(m5.internal.params, "%sParams" % self.type)
+        cc_params_struct = getattr(m5.internal.params, f"{self.type}Params")
         cc_params = cc_params_struct()
         cc_params.name = str(self)
 
@@ -1249,7 +1239,7 @@ class SimObject(object, metaclass=MetaSimObject):
                 self._ccObject = params.create()
         elif self._ccObject == -1:
             raise RuntimeError(
-                "%s: Cycle found in configuration hierarchy." % self.path()
+                f"{self.path()}: Cycle found in configuration hierarchy."
             )
         return self._ccObject
 
diff --git a/src/python/m5/debug.py b/src/python/m5/debug.py
index 70af2e0f3a..09a032aa50 100644
--- a/src/python/m5/debug.py
+++ b/src/python/m5/debug.py
@@ -40,13 +40,13 @@ def help():
         lambda kv: isinstance(kv[1], SimpleFlag) and not kv[1].isFormat,
         sorted_flags,
     ):
-        print("    %s: %s" % (name, flag.desc))
+        print(f"    {name}: {flag.desc}")
     print()
     print("Compound Flags:")
     for name, flag in filter(
         lambda kv: isinstance(kv[1], CompoundFlag), sorted_flags
     ):
-        print("    %s: %s" % (name, flag.desc))
+        print(f"    {name}: {flag.desc}")
         # The list of kids for flag "All" is too long, so it is not printed
         if name != "All":
             printList([c.name for c in flag.kids()], indent=8)
@@ -58,7 +58,7 @@ def help():
         lambda kv: isinstance(kv[1], SimpleFlag) and kv[1].isFormat,
         sorted_flags,
     ):
-        print("    %s: %s" % (name, flag.desc))
+        print(f"    {name}: {flag.desc}")
     print()
 
 
diff --git a/src/python/m5/event.py b/src/python/m5/event.py
index 707d65d63d..7c3f9a7c42 100644
--- a/src/python/m5/event.py
+++ b/src/python/m5/event.py
@@ -56,7 +56,7 @@ class EventWrapper(Event):
 
         if not callable(func):
             raise RuntimeError(
-                "Can't wrap '%s', object is not callable" % str(func)
+                f"Can't wrap '{str(func)}', object is not callable"
             )
 
         self._func = func
@@ -65,7 +65,7 @@ class EventWrapper(Event):
         self._func()
 
     def __str__(self):
-        return "EventWrapper(%s)" % (str(self._func),)
+        return f"EventWrapper({str(self._func)})"
 
 
 class ProgressEvent(Event):
@@ -76,7 +76,7 @@ class ProgressEvent(Event):
         self.eventq.schedule(self, m5.curTick() + self.period)
 
     def __call__(self):
-        print("Progress! Time now %fs" % (m5.curTick() / 1e12))
+        print(f"Progress! Time now {m5.curTick() / 1000000000000.0:f}s")
         self.eventq.schedule(self, m5.curTick() + self.period)
 
 
diff --git a/src/python/m5/ext/pyfdt/pyfdt.py b/src/python/m5/ext/pyfdt/pyfdt.py
index 191a57740d..499af7588a 100644
--- a/src/python/m5/ext/pyfdt/pyfdt.py
+++ b/src/python/m5/ext/pyfdt/pyfdt.py
@@ -52,7 +52,7 @@ class FdtProperty(object):
         """Init with name"""
         self.name = name
         if not FdtProperty.__validate_dt_name(self.name):
-            raise Exception("Invalid name '%s'" % self.name)
+            raise Exception(f"Invalid name '{self.name}'")
 
     def get_name(self):
         """Get property name"""
@@ -60,7 +60,7 @@ class FdtProperty(object):
 
     def __str__(self):
         """String representation"""
-        return "Property(%s)" % self.name
+        return f"Property({self.name})"
 
     def dts_represent(self, depth=0):
         """Get dts string representation"""
@@ -78,7 +78,7 @@ class FdtProperty(object):
 
     def json_represent(self, depth=0):
         """Ouput JSON"""
-        return "%s: null" % json.dumps(self.name)
+        return f"{json.dumps(self.name)}: null"
 
     def to_raw(self):
         """Return RAW value representation"""
@@ -219,7 +219,7 @@ class FdtPropertyStrings(FdtProperty):
 
     def json_represent(self, depth=0):
         """Ouput JSON"""
-        result = '%s: ["strings", ' % json.dumps(self.name)
+        result = f'{json.dumps(self.name)}: ["strings", '
         result += ", ".join([json.dumps(stri) for stri in self.strings])
         result += "]"
         return result
@@ -230,7 +230,7 @@ class FdtPropertyStrings(FdtProperty):
 
     def __str__(self):
         """String representation"""
-        return "Property(%s,Strings:%s)" % (self.name, self.strings)
+        return f"Property({self.name},Strings:{self.strings})"
 
     def __getitem__(self, index):
         """Get strings, returns a string"""
@@ -291,7 +291,7 @@ class FdtPropertyWords(FdtProperty):
             INDENT * depth
             + self.name
             + " = <"
-            + " ".join(["0x%08x" % word for word in self.words])
+            + " ".join([f"0x{word:08x}" for word in self.words])
             + ">;"
         )
 
@@ -310,8 +310,8 @@ class FdtPropertyWords(FdtProperty):
 
     def json_represent(self, depth=0):
         """Ouput JSON"""
-        result = '%s: ["words", "' % json.dumps(self.name)
-        result += '", "'.join(["0x%08x" % word for word in self.words])
+        result = f'{json.dumps(self.name)}: ["words", "'
+        result += '", "'.join([f"0x{word:08x}" for word in self.words])
         result += '"]'
         return result
 
@@ -321,7 +321,7 @@ class FdtPropertyWords(FdtProperty):
 
     def __str__(self):
         """String representation"""
-        return "Property(%s,Words:%s)" % (self.name, self.words)
+        return f"Property({self.name},Words:{self.words})"
 
     def __getitem__(self, index):
         """Get words, returns a word integer"""
@@ -376,7 +376,7 @@ class FdtPropertyBytes(FdtProperty):
             + self.name
             + " = ["
             + " ".join(
-                ["%02x" % (byte & int("ffffffff", 16)) for byte in self.bytes]
+                [f"{byte & int('ffffffff', 16):02x}" for byte in self.bytes]
             )
             + "];"
         )
@@ -397,8 +397,8 @@ class FdtPropertyBytes(FdtProperty):
 
     def json_represent(self, depth=0):
         """Ouput JSON"""
-        result = '%s: ["bytes", "' % json.dumps(self.name)
-        result += '", "'.join(["%02x" % byte for byte in self.bytes])
+        result = f'{json.dumps(self.name)}: ["bytes", "'
+        result += '", "'.join([f"{byte:02x}" for byte in self.bytes])
         result += '"]'
         return result
 
@@ -408,7 +408,7 @@ class FdtPropertyBytes(FdtProperty):
 
     def __str__(self):
         """String representation"""
-        return "Property(%s,Bytes:%s)" % (self.name, self.bytes)
+        return f"Property({self.name},Bytes:{self.bytes})"
 
     def __getitem__(self, index):
         """Get bytes, returns a byte"""
@@ -471,7 +471,7 @@ class FdtNode(object):
         self.subdata = []
         self.parent = None
         if not FdtNode.__validate_dt_name(self.name):
-            raise Exception("Invalid name '%s'" % self.name)
+            raise Exception(f"Invalid name '{self.name}'")
 
     def get_name(self):
         """Get property name"""
@@ -504,7 +504,7 @@ class FdtNode(object):
 
     def __str__(self):
         """String representation"""
-        return "Node(%s)" % self.name
+        return f"Node({self.name})"
 
     def dts_represent(self, depth=0):
         """Get dts string representation"""
@@ -579,7 +579,7 @@ class FdtNode(object):
         ].get_name() != subnode.get_name() and self.__check_name_duplicate(
             subnode.get_name()
         ):
-            raise Exception("%s : %s subnode already exists" % (self, subnode))
+            raise Exception(f"{self} : {subnode} subnode already exists")
         if not isinstance(subnode, (FdtNode, FdtProperty, FdtNop)):
             raise Exception("Invalid object type")
         self.subdata[index] = subnode
@@ -635,7 +635,7 @@ class FdtNode(object):
     def append(self, subnode):
         """Append subnode, same as add_subnode"""
         if self.__check_name_duplicate(subnode.get_name()):
-            raise Exception("%s : %s subnode already exists" % (self, subnode))
+            raise Exception(f"{self} : {subnode} subnode already exists")
         if not isinstance(subnode, (FdtNode, FdtProperty, FdtNop)):
             raise Exception("Invalid object type")
         self.subdata.append(subnode)
@@ -647,7 +647,7 @@ class FdtNode(object):
     def insert(self, index, subnode):
         """Insert subnode before index, must not be a duplicate name"""
         if self.__check_name_duplicate(subnode.get_name()):
-            raise Exception("%s : %s subnode already exists" % (self, subnode))
+            raise Exception(f"{self} : {subnode} subnode already exists")
         if not isinstance(subnode, (FdtNode, FdtProperty, FdtNop)):
             raise Exception("Invalid object type")
         self.subdata.insert(index, subnode)
@@ -778,7 +778,7 @@ class Fdt(object):
         )
         if self.header["version"] >= 2:
             result += (
-                "// boot_cpuid_phys:\t0x%x\n" % self.header["boot_cpuid_phys"]
+                f"// boot_cpuid_phys:\t0x{self.header['boot_cpuid_phys']:x}\n"
             )
         result += "\n"
         if self.reserve_entries is not None:
@@ -914,7 +914,7 @@ def _add_json_to_fdtnode(node, subjson):
             _add_json_to_fdtnode(subnode, value)
         elif isinstance(value, list):
             if len(value) < 2:
-                raise Exception("Invalid list for %s" % key)
+                raise Exception(f"Invalid list for {key}")
             if value[0] == "words":
                 words = [int(word, 16) for word in value[1:]]
                 node.append(FdtPropertyWords(key, words))
@@ -924,11 +924,11 @@ def _add_json_to_fdtnode(node, subjson):
             elif value[0] == "strings":
                 node.append(FdtPropertyStrings(key, [s for s in value[1:]]))
             else:
-                raise Exception("Invalid list for %s" % key)
+                raise Exception(f"Invalid list for {key}")
         elif value is None:
             node.append(FdtProperty(key))
         else:
-            raise Exception("Invalid value for %s" % key)
+            raise Exception(f"Invalid value for {key}")
 
 
 def FdtJsonParse(buf):
diff --git a/src/python/m5/internal/params.py b/src/python/m5/internal/params.py
index 8225d0b059..bd6cbb7d80 100644
--- a/src/python/m5/internal/params.py
+++ b/src/python/m5/internal/params.py
@@ -50,4 +50,4 @@ except ImportError:
 if in_gem5:
     for name, module in inspect.getmembers(_m5):
         if name.startswith("param_") or name.startswith("enum_"):
-            exec("from _m5.%s import *" % name)
+            exec(f"from _m5.{name} import *")
diff --git a/src/python/m5/main.py b/src/python/m5/main.py
index 458e143a53..d8c9951f6d 100644
--- a/src/python/m5/main.py
+++ b/src/python/m5/main.py
@@ -399,14 +399,14 @@ def main():
         done = True
         print("Build information:")
         print()
-        print("gem5 version %s" % defines.gem5Version)
-        print("compiled %s" % defines.compileDate)
+        print(f"gem5 version {defines.gem5Version}")
+        print(f"compiled {defines.compileDate}")
         print("build options:")
         keys = list(defines.buildEnv.keys())
         keys.sort()
         for key in keys:
             val = defines.buildEnv[key]
-            print("    %s = %s" % (key, val))
+            print(f"    {key} = {val}")
         print()
 
     if options.copyright:
@@ -470,11 +470,11 @@ def main():
         print(brief_copyright)
         print()
 
-        print("gem5 version %s" % _m5.core.gem5Version)
-        print("gem5 compiled %s" % _m5.core.compileDate)
+        print(f"gem5 version {_m5.core.gem5Version}")
+        print(f"gem5 compiled {_m5.core.compileDate}")
 
         print(
-            "gem5 started %s" % datetime.datetime.now().strftime("%b %e %Y %X")
+            f"gem5 started {datetime.datetime.now().strftime('%b %e %Y %X')}"
         )
         print(
             "gem5 executing on %s, pid %d"
@@ -490,7 +490,7 @@ def main():
     # check to make sure we can find the listed script
     if not options.c and (not arguments or not os.path.isfile(arguments[0])):
         if arguments and not os.path.isfile(arguments[0]):
-            print("Script %s not found" % arguments[0])
+            print(f"Script {arguments[0]} not found")
 
         options.usage(2)
 
@@ -514,7 +514,7 @@ def main():
     elif options.listener_mode == "on":
         pass
     else:
-        panic("Unhandled listener mode: %s" % options.listener_mode)
+        panic(f"Unhandled listener mode: {options.listener_mode}")
 
     if not options.allow_remote_connections:
         m5.listenersLoopbackOnly()
@@ -534,7 +534,7 @@ def main():
                 off = True
 
             if flag not in debug.flags:
-                print("invalid debug flag '%s'" % flag, file=sys.stderr)
+                print(f"invalid debug flag '{flag}'", file=sys.stderr)
                 sys.exit(1)
 
             if off:
diff --git a/src/python/m5/objects/__init__.py b/src/python/m5/objects/__init__.py
index b6672331f4..788babf620 100644
--- a/src/python/m5/objects/__init__.py
+++ b/src/python/m5/objects/__init__.py
@@ -26,4 +26,4 @@
 
 for module in __spec__.loader_state:
     if module.startswith("m5.objects."):
-        exec("from %s import *" % module)
+        exec(f"from {module} import *")
diff --git a/src/python/m5/options.py b/src/python/m5/options.py
index 08638c65a7..ed0dcddc97 100644
--- a/src/python/m5/options.py
+++ b/src/python/m5/options.py
@@ -92,8 +92,8 @@ class OptionParser(dict):
         """add a boolean option called --name and --no-name.
         Display help depending on which is the default"""
 
-        tname = "--%s" % name
-        fname = "--no-%s" % name
+        tname = f"--{name}"
+        fname = f"--no-{name}"
         dest = name.replace("-", "_")
         if default:
             thelp = optparse.SUPPRESS_HELP
diff --git a/src/python/m5/params.py b/src/python/m5/params.py
index 92e913b2f0..e9047a85d4 100644
--- a/src/python/m5/params.py
+++ b/src/python/m5/params.py
@@ -135,8 +135,8 @@ class ParamValue(object, metaclass=MetaParamValue):
     # src into lvalue dest (of the param's C++ type)
     @classmethod
     def cxx_ini_parse(cls, code, src, dest, ret):
-        code("// Unhandled param type: %s" % cls.__name__)
-        code("%s false;" % ret)
+        code(f"// Unhandled param type: {cls.__name__}")
+        code(f"{ret} false;")
 
     # allows us to blithely call unproxy() on things without checking
     # if they're really proxies or not
@@ -176,7 +176,7 @@ class ParamDesc(object):
             del kwargs["default"]
 
         if kwargs:
-            raise TypeError("extra unknown kwargs %s" % kwargs)
+            raise TypeError(f"extra unknown kwargs {kwargs}")
 
         if not hasattr(self, "desc"):
             raise TypeError("desc attribute missing")
@@ -191,7 +191,7 @@ class ParamDesc(object):
             return ptype
 
         raise AttributeError(
-            "'%s' object has no attribute '%s'" % (type(self).__name__, attr)
+            f"'{type(self).__name__}' object has no attribute '{attr}'"
         )
 
     def example_str(self):
@@ -247,7 +247,7 @@ class ParamDesc(object):
 class VectorParamValue(list, metaclass=MetaParamValue):
     def __setattr__(self, attr, value):
         raise AttributeError(
-            "Not allowed to set %s on '%s'" % (attr, type(self).__name__)
+            f"Not allowed to set {attr} on '{type(self).__name__}'"
         )
 
     def config_value(self):
@@ -316,7 +316,7 @@ class SimObjectVector(VectorParamValue):
         val = self[key]
         if value.has_parent():
             warn(
-                "SimObject %s already has a parent" % value.get_name()
+                f"SimObject {value.get_name()} already has a parent"
                 + " that is being overwritten by a SimObjectVector"
             )
         value.set_parent(val.get_parent(), val._name)
@@ -327,7 +327,7 @@ class SimObjectVector(VectorParamValue):
     # allow it to be specified on the command line.
     def enumerateParams(self, flags_dict={}, cmd_line_str="", access_str=""):
         if hasattr(self, "_paramEnumed"):
-            print("Cycle detected enumerating params at %s?!" % (cmd_line_str))
+            print(f"Cycle detected enumerating params at {cmd_line_str}?!")
         else:
             x = 0
             for vals in self:
@@ -469,8 +469,8 @@ class String(ParamValue, str):
 
     @classmethod
     def cxx_ini_parse(self, code, src, dest, ret):
-        code("%s = %s;" % (dest, src))
-        code("%s true;" % ret)
+        code(f"{dest} = {src};")
+        code(f"{ret} true;")
 
     def getValue(self):
         return self
@@ -571,7 +571,7 @@ class NumericParamValue(ParamValue):
     # the dest type.
     @classmethod
     def cxx_ini_parse(self, code, src, dest, ret):
-        code("%s to_number(%s, %s);" % (ret, src, dest))
+        code(f"{ret} to_number({src}, {dest});")
 
 
 # Metaclass for bounds-checked integer parameters.  See CheckedInt.
@@ -621,8 +621,7 @@ class CheckedInt(NumericParamValue, metaclass=CheckedIntType):
             self.value = int(value)
         else:
             raise TypeError(
-                "Can't convert object of type %s to CheckedInt"
-                % type(value).__name__
+                f"Can't convert object of type {type(value).__name__} to CheckedInt"
             )
         self._check()
 
@@ -751,10 +750,10 @@ class Cycles(CheckedInt):
     @classmethod
     def cxx_ini_parse(cls, code, src, dest, ret):
         code("uint64_t _temp;")
-        code("bool _ret = to_number(%s, _temp);" % src)
+        code(f"bool _ret = to_number({src}, _temp);")
         code("if (_ret)")
-        code("    %s = Cycles(_temp);" % dest)
-        code("%s _ret;" % ret)
+        code(f"    {dest} = Cycles(_temp);")
+        code(f"{ret} _ret;")
 
 
 class Float(ParamValue, float):
@@ -766,8 +765,7 @@ class Float(ParamValue, float):
             self.value = float(value)
         else:
             raise TypeError(
-                "Can't convert object of type %s to Float"
-                % type(value).__name__
+                f"Can't convert object of type {type(value).__name__} to Float"
             )
 
     def __call__(self, value):
@@ -786,7 +784,7 @@ class Float(ParamValue, float):
 
     @classmethod
     def cxx_ini_parse(self, code, src, dest, ret):
-        code("%s (std::istringstream(%s) >> %s).eof();" % (ret, src, dest))
+        code(f"{ret} (std::istringstream({src}) >> {dest}).eof();")
 
 
 class MemorySize(CheckedInt):
@@ -851,7 +849,7 @@ class Addr(CheckedInt):
             val = convert.toMemorySize(value)
         except TypeError:
             val = int(value)
-        return "0x%x" % int(val)
+        return f"0x{int(val):x}"
 
 
 class PcCountPair(ParamValue):
@@ -961,11 +959,11 @@ class AddrRange(ParamValue):
             raise TypeError("Too many arguments specified")
 
         if kwargs:
-            raise TypeError("Too many keywords: %s" % list(kwargs.keys()))
+            raise TypeError(f"Too many keywords: {list(kwargs.keys())}")
 
     def __str__(self):
         if len(self.masks) == 0:
-            return "%s:%s" % (self.start, self.end)
+            return f"{self.start}:{self.end}"
         else:
             return "%s:%s:%s:%s" % (
                 self.start,
@@ -1084,7 +1082,7 @@ class Bool(ParamValue):
 
     @classmethod
     def cxx_ini_parse(cls, code, src, dest, ret):
-        code("%s to_bool(%s, %s);" % (ret, src, dest))
+        code(f"{ret} to_bool({src}, {dest});")
 
 
 def IncEthernetAddr(addr, val=1):
@@ -1097,7 +1095,7 @@ def IncEthernetAddr(addr, val=1):
             break
         bytes[i - 1] += val
     assert bytes[0] <= 255
-    return ":".join(map(lambda x: "%02x" % x, bytes))
+    return ":".join(map(lambda x: f"{x:02x}", bytes))
 
 
 _NextEthernetAddr = "00:90:00:00:00:01"
@@ -1130,11 +1128,11 @@ class EthernetAddr(ParamValue):
 
         bytes = value.split(":")
         if len(bytes) != 6:
-            raise TypeError("invalid ethernet address %s" % value)
+            raise TypeError(f"invalid ethernet address {value}")
 
         for byte in bytes:
             if not 0 <= int(byte, base=16) <= 0xFF:
-                raise TypeError("invalid ethernet address %s" % value)
+                raise TypeError(f"invalid ethernet address {value}")
 
         self.value = value
 
@@ -1160,8 +1158,8 @@ class EthernetAddr(ParamValue):
 
     @classmethod
     def cxx_ini_parse(self, code, src, dest, ret):
-        code("%s = networking::EthAddr(%s);" % (dest, src))
-        code("%s true;" % ret)
+        code(f"{dest} = networking::EthAddr({src});")
+        code(f"{ret} true;")
 
 
 # When initializing an IpAddress, pass in an existing IpAddress, a string of
@@ -1236,7 +1234,7 @@ class IpNetmask(IpAddress):
             elif elseVal:
                 setattr(self, key, elseVal)
             else:
-                raise TypeError("No value set for %s" % key)
+                raise TypeError(f"No value set for {key}")
 
         if len(args) == 0:
             handle_kwarg(self, kwargs, "ip")
@@ -1261,7 +1259,7 @@ class IpNetmask(IpAddress):
             raise TypeError("Too many arguments specified")
 
         if kwargs:
-            raise TypeError("Too many keywords: %s" % list(kwargs.keys()))
+            raise TypeError(f"Too many keywords: {list(kwargs.keys())}")
 
         self.verify()
 
@@ -1312,7 +1310,7 @@ class IpWithPort(IpAddress):
             elif elseVal:
                 setattr(self, key, elseVal)
             else:
-                raise TypeError("No value set for %s" % key)
+                raise TypeError(f"No value set for {key}")
 
         if len(args) == 0:
             handle_kwarg(self, kwargs, "ip")
@@ -1337,7 +1335,7 @@ class IpWithPort(IpAddress):
             raise TypeError("Too many arguments specified")
 
         if kwargs:
-            raise TypeError("Too many keywords: %s" % list(kwargs.keys()))
+            raise TypeError(f"Too many keywords: {list(kwargs.keys())}")
 
         self.verify()
 
@@ -1408,7 +1406,7 @@ def parse_time(value):
             except ValueError:
                 pass
 
-    raise ValueError("Could not parse '%s' as a time" % value)
+    raise ValueError(f"Could not parse '{value}' as a time")
 
 
 class Time(ParamValue):
@@ -1501,9 +1499,9 @@ class MetaEnum(MetaParamValue):
             )
 
         if cls.is_class:
-            cls.cxx_type = "%s" % name
+            cls.cxx_type = f"{name}"
         else:
-            cls.cxx_type = "enums::%s" % name
+            cls.cxx_type = f"enums::{name}"
 
         super().__init__(name, bases, init_dict)
 
@@ -1527,8 +1525,7 @@ class Enum(ParamValue, metaclass=MetaEnum):
     def __init__(self, value):
         if value not in self.map:
             raise TypeError(
-                "Enum param got bad value '%s' (not in %s)"
-                % (value, self.vals)
+                f"Enum param got bad value '{value}' (not in {self.vals})"
             )
         self.value = value
 
@@ -1547,20 +1544,17 @@ class Enum(ParamValue, metaclass=MetaEnum):
             code('} else if (%s == "%s") {' % (src, elem_name))
             code.indent()
             name = cls.__name__ if cls.enum_name is None else cls.enum_name
-            code(
-                "%s = %s::%s;"
-                % (dest, name if cls.is_class else "enums", elem_name)
-            )
-            code("%s true;" % ret)
+            code(f"{dest} = {name if cls.is_class else 'enums'}::{elem_name};")
+            code(f"{ret} true;")
             code.dedent()
         code("} else {")
-        code("    %s false;" % ret)
+        code(f"    {ret} false;")
         code("}")
 
     def getValue(self):
         import m5.internal.params
 
-        e = getattr(m5.internal.params, "enum_%s" % self.__class__.__name__)
+        e = getattr(m5.internal.params, f"enum_{self.__class__.__name__}")
         return e(self.map[self.value])
 
     def __str__(self):
@@ -1648,7 +1642,7 @@ class Latency(TickParamValue):
             return self
         if attr == "frequency":
             return Frequency(self)
-        raise AttributeError("Latency object has no attribute '%s'" % attr)
+        raise AttributeError(f"Latency object has no attribute '{attr}'")
 
     def getValue(self):
         if self.ticks or self.value == 0:
@@ -1691,7 +1685,7 @@ class Frequency(TickParamValue):
             return self
         if attr in ("latency", "period"):
             return Latency(self)
-        raise AttributeError("Frequency object has no attribute '%s'" % attr)
+        raise AttributeError(f"Frequency object has no attribute '{attr}'")
 
     # convert latency to ticks
     def getValue(self):
@@ -1730,14 +1724,14 @@ class Clock(TickParamValue):
         return value
 
     def __str__(self):
-        return "%s" % Latency(self)
+        return f"{Latency(self)}"
 
     def __getattr__(self, attr):
         if attr == "frequency":
             return Frequency(self)
         if attr in ("latency", "period"):
             return Latency(self)
-        raise AttributeError("Frequency object has no attribute '%s'" % attr)
+        raise AttributeError(f"Frequency object has no attribute '{attr}'")
 
     def getValue(self):
         return self.period.getValue()
@@ -1821,10 +1815,10 @@ class Temperature(ParamValue):
     @classmethod
     def cxx_ini_parse(self, code, src, dest, ret):
         code("double _temp;")
-        code("bool _ret = to_number(%s, _temp);" % src)
+        code(f"bool _ret = to_number({src}, _temp);")
         code("if (_ret)")
-        code("    %s = Temperature(_temp);" % dest)
-        code("%s _ret;" % ret)
+        code(f"    {dest} = Temperature(_temp);")
+        code(f"{ret} _ret;")
 
 
 class NetworkBandwidth(float, ParamValue):
@@ -1853,10 +1847,10 @@ class NetworkBandwidth(float, ParamValue):
         return float(value)
 
     def ini_str(self):
-        return "%f" % self.getValue()
+        return f"{self.getValue():f}"
 
     def config_value(self):
-        return "%f" % self.getValue()
+        return f"{self.getValue():f}"
 
     @classmethod
     def cxx_ini_predecls(cls, code):
@@ -1864,7 +1858,7 @@ class NetworkBandwidth(float, ParamValue):
 
     @classmethod
     def cxx_ini_parse(self, code, src, dest, ret):
-        code("%s (std::istringstream(%s) >> %s).eof();" % (ret, src, dest))
+        code(f"{ret} (std::istringstream({src}) >> {dest}).eof();")
 
 
 class MemoryBandwidth(float, ParamValue):
@@ -1892,10 +1886,10 @@ class MemoryBandwidth(float, ParamValue):
         return float(value)
 
     def ini_str(self):
-        return "%f" % self.getValue()
+        return f"{self.getValue():f}"
 
     def config_value(self):
-        return "%f" % self.getValue()
+        return f"{self.getValue():f}"
 
     @classmethod
     def cxx_ini_predecls(cls, code):
@@ -1903,7 +1897,7 @@ class MemoryBandwidth(float, ParamValue):
 
     @classmethod
     def cxx_ini_parse(self, code, src, dest, ret):
-        code("%s (std::istringstream(%s) >> %s).eof();" % (ret, src, dest))
+        code(f"{ret} (std::istringstream({src}) >> {dest}).eof();")
 
 
 #
@@ -1991,7 +1985,7 @@ class PortRef(object):
         self.index = -1  # always -1 for non-vector ports
 
     def __str__(self):
-        return "%s.%s" % (self.simobj, self.name)
+        return f"{self.simobj}.{self.name}"
 
     def __len__(self):
         # Return the number of connected ports, i.e. 0 is we have no
@@ -2015,8 +2009,7 @@ class PortRef(object):
             # shorthand for proxies
             return self.peer.simobj
         raise AttributeError(
-            "'%s' object has no attribute '%s'"
-            % (self.__class__.__name__, attr)
+            f"'{self.__class__.__name__}' object has no attribute '{attr}'"
         )
 
     # Full connection is symmetric (both ways).  Called via
@@ -2041,8 +2034,7 @@ class PortRef(object):
             return
         elif not isinstance(other, PortRef):
             raise TypeError(
-                "assigning non-port reference '%s' to port '%s'"
-                % (other, self)
+                f"assigning non-port reference '{other}' to port '{self}'"
             )
 
         if not Port.is_compat(self, other):
@@ -2068,8 +2060,7 @@ class PortRef(object):
 
         if not isinstance(new_1, PortRef) or not isinstance(new_2, PortRef):
             raise TypeError(
-                "Splicing non-port references '%s','%s' to port '%s'"
-                % (new_1, new_2, self)
+                f"Splicing non-port references '{new_1}','{new_2}' to port '{self}'"
             )
 
         old_peer = self.peer
@@ -2118,8 +2109,7 @@ class PortRef(object):
                 realPeer = self.peer.unproxy(self.simobj)
             except:
                 print(
-                    "Error in unproxying port '%s' of %s"
-                    % (self.name, self.simobj.path())
+                    f"Error in unproxying port '{self.name}' of {self.simobj.path()}"
                 )
                 raise
             self.connect(realPeer)
@@ -2163,7 +2153,7 @@ class VectorPortRef(object):
         self.elements = []
 
     def __str__(self):
-        return "%s.%s[:]" % (self.simobj, self.name)
+        return f"{self.simobj}.{self.name}[:]"
 
     def __len__(self):
         # Return the number of connected peers, corresponding the the
@@ -2390,12 +2380,8 @@ class DeprecatedParam(object):
         simobj_name: str, the name of the SimObject type
         """
         if not self.message:
-            self.message = "See {} for more information".format(simobj_name)
-        warn(
-            "{}.{} is deprecated. {}".format(
-                instance_name, self._oldName, self.message
-            )
-        )
+            self.message = f"See {simobj_name} for more information"
+        warn(f"{instance_name}.{self._oldName} is deprecated. {self.message}")
 
 
 baseEnums = allEnums.copy()
diff --git a/src/python/m5/proxy.py b/src/python/m5/proxy.py
index 16aa0c4505..78862346b4 100644
--- a/src/python/m5/proxy.py
+++ b/src/python/m5/proxy.py
@@ -63,7 +63,7 @@ class BaseProxy(object):
     def __setattr__(self, attr, value):
         if not attr.startswith("_"):
             raise AttributeError(
-                "cannot set attribute '%s' on proxy object" % attr
+                f"cannot set attribute '{attr}' on proxy object"
             )
         super().__setattr__(attr, value)
 
@@ -234,7 +234,7 @@ class AttrProxy(BaseProxy):
         p = self._attr
         for m in self._modifiers:
             if isinstance(m, str):
-                p += ".%s" % m
+                p += f".{m}"
             elif isinstance(m, int):
                 p += "[%d]" % m
             else:
diff --git a/src/python/m5/simulate.py b/src/python/m5/simulate.py
index 18fb1d6cd4..587bfa0202 100644
--- a/src/python/m5/simulate.py
+++ b/src/python/m5/simulate.py
@@ -358,36 +358,35 @@ def switchCpus(system, cpuList, verbose=True):
     memory_mode_name = new_cpus[0].memory_mode()
     for old_cpu, new_cpu in cpuList:
         if not isinstance(old_cpu, objects.BaseCPU):
-            raise TypeError("%s is not of type BaseCPU" % old_cpu)
+            raise TypeError(f"{old_cpu} is not of type BaseCPU")
         if not isinstance(new_cpu, objects.BaseCPU):
-            raise TypeError("%s is not of type BaseCPU" % new_cpu)
+            raise TypeError(f"{new_cpu} is not of type BaseCPU")
         if new_cpu in old_cpu_set:
             raise RuntimeError(
-                "New CPU (%s) is in the list of old CPUs." % (old_cpu,)
+                f"New CPU ({old_cpu}) is in the list of old CPUs."
             )
         if not new_cpu.switchedOut():
-            raise RuntimeError("New CPU (%s) is already active." % (new_cpu,))
+            raise RuntimeError(f"New CPU ({new_cpu}) is already active.")
         if not new_cpu.support_take_over():
             raise RuntimeError(
-                "New CPU (%s) does not support CPU handover." % (old_cpu,)
+                f"New CPU ({old_cpu}) does not support CPU handover."
             )
         if new_cpu.memory_mode() != memory_mode_name:
             raise RuntimeError(
-                "%s and %s require different memory modes."
-                % (new_cpu, new_cpus[0])
+                f"{new_cpu} and {new_cpus[0]} require different memory modes."
             )
         if old_cpu.switchedOut():
-            raise RuntimeError("Old CPU (%s) is inactive." % (new_cpu,))
+            raise RuntimeError(f"Old CPU ({new_cpu}) is inactive.")
         if not old_cpu.support_take_over():
             raise RuntimeError(
-                "Old CPU (%s) does not support CPU handover." % (old_cpu,)
+                f"Old CPU ({old_cpu}) does not support CPU handover."
             )
 
     MemoryMode = params.allEnums["MemoryMode"]
     try:
         memory_mode = MemoryMode(memory_mode_name).getValue()
     except KeyError:
-        raise RuntimeError("Invalid memory mode (%s)" % memory_mode_name)
+        raise RuntimeError(f"Invalid memory mode ({memory_mode_name})")
 
     drain()
 
diff --git a/src/python/m5/stats/__init__.py b/src/python/m5/stats/__init__.py
index 6bc50cfad9..ce7a2d267d 100644
--- a/src/python/m5/stats/__init__.py
+++ b/src/python/m5/stats/__init__.py
@@ -102,26 +102,21 @@ def _url_factory(schemes, enable=True):
             # values into proper Python types.
             def parse_value(key, values):
                 if len(values) == 0 or (len(values) == 1 and not values[0]):
-                    fatal(
-                        "%s: '%s' doesn't have a value." % (url.geturl(), key)
-                    )
+                    fatal(f"{url.geturl()}: '{key}' doesn't have a value.")
                 elif len(values) > 1:
-                    fatal(
-                        "%s: '%s' has multiple values." % (url.geturl(), key)
-                    )
+                    fatal(f"{url.geturl()}: '{key}' has multiple values.")
                 else:
                     try:
                         return key, literal_eval(values[0])
                     except ValueError:
                         fatal(
-                            "%s: %s isn't a valid Python literal"
-                            % (url.geturl(), values[0])
+                            f"{url.geturl()}: {values[0]} isn't a valid Python literal"
                         )
 
             kwargs = dict([parse_value(k, v) for k, v in qs.items()])
 
             try:
-                return func("%s%s" % (url.netloc, url.path), **kwargs)
+                return func(f"{url.netloc}{url.path}", **kwargs)
             except TypeError:
                 fatal("Illegal stat visitor parameter specified")
 
@@ -227,10 +222,10 @@ def addStatVisitor(url):
     try:
         factory = factories[parsed.scheme]
     except KeyError:
-        fatal("Illegal stat file type '%s' specified." % parsed.scheme)
+        fatal(f"Illegal stat file type '{parsed.scheme}' specified.")
 
     if factory is None:
-        fatal("Stat type '%s' disabled at compile time" % parsed.scheme)
+        fatal(f"Stat type '{parsed.scheme}' disabled at compile time")
 
     outputList.append(factory(parsed))
 
@@ -242,12 +237,12 @@ def printStatVisitorTypes():
 
     def print_doc(doc):
         for line in doc.splitlines():
-            print("| %s" % line)
+            print(f"| {line}")
         print()
 
     enabled_visitors = [x for x in all_factories if x[2]]
     for factory, schemes, _ in enabled_visitors:
-        print("%s:" % ", ".join(filter(lambda x: x is not None, schemes)))
+        print(f"{', '.join(filter(lambda x: x is not None, schemes))}:")
 
         # Try to extract the factory doc string
         print_doc(inspect.getdoc(factory))
@@ -283,7 +278,7 @@ def _bindStatHierarchy(root):
                 _bind_obj(name, obj[0])
             else:
                 for idx, obj in enumerate(obj):
-                    _bind_obj("{}{}".format(name, idx), obj)
+                    _bind_obj(f"{name}{idx}", obj)
         else:
             # We need this check because not all obj.getCCObject() is an
             # instance of Stat::Group. For example, sc_core::sc_module, the C++
diff --git a/src/python/m5/ticks.py b/src/python/m5/ticks.py
index c1c6a507ee..47b033cfb4 100644
--- a/src/python/m5/ticks.py
+++ b/src/python/m5/ticks.py
@@ -48,7 +48,7 @@ def setGlobalFrequency(ticksPerSecond):
         tps = round(convert.anyToFrequency(ticksPerSecond))
     else:
         raise TypeError(
-            "wrong type '%s' for ticksPerSecond" % type(ticksPerSecond)
+            f"wrong type '{type(ticksPerSecond)}' for ticksPerSecond"
         )
     _m5.core.setClockFrequency(int(tps))
 
@@ -61,7 +61,7 @@ def fromSeconds(value):
     import _m5.core
 
     if not isinstance(value, float):
-        raise TypeError("can't convert '%s' to type tick" % type(value))
+        raise TypeError(f"can't convert '{type(value)}' to type tick")
 
     # once someone needs to convert to seconds, the global frequency
     # had better be fixed
diff --git a/src/python/m5/util/__init__.py b/src/python/m5/util/__init__.py
index 5ae48754ab..34c5ee8a49 100644
--- a/src/python/m5/util/__init__.py
+++ b/src/python/m5/util/__init__.py
@@ -203,7 +203,7 @@ def printList(items, indent=4):
             line = " " * indent
 
         if i < len(items) - 1:
-            line += "%s, " % item
+            line += f"{item}, "
         else:
             line += item
             print(line)
diff --git a/src/python/m5/util/convert.py b/src/python/m5/util/convert.py
index ca897ba9c8..72c748360c 100644
--- a/src/python/m5/util/convert.py
+++ b/src/python/m5/util/convert.py
@@ -99,7 +99,7 @@ binary_prefixes = {
 
 def assertStr(value):
     if not isinstance(value, str):
-        raise TypeError("wrong type '%s' should be str" % type(value))
+        raise TypeError(f"wrong type '{type(value)}' should be str")
 
 
 def _split_suffix(value, suffixes):
@@ -141,9 +141,7 @@ def toNum(value, target_type, units, prefixes, converter):
         try:
             return converter(val)
         except ValueError:
-            raise ValueError(
-                "cannot convert '%s' to %s" % (value, target_type)
-            )
+            raise ValueError(f"cannot convert '{value}' to {target_type}")
 
     # Units can be None, the empty string, or a list/tuple. Convert
     # to a tuple for consistent handling.
@@ -198,7 +196,7 @@ def toBool(value):
         return True
     if value in ("false", "f", "no", "n", "0"):
         return False
-    raise ValueError("cannot convert '%s' to bool" % value)
+    raise ValueError(f"cannot convert '{value}' to bool")
 
 
 def toFrequency(value):
@@ -265,15 +263,15 @@ def toMemorySize(value):
 
 def toIpAddress(value):
     if not isinstance(value, str):
-        raise TypeError("wrong type '%s' should be str" % type(value))
+        raise TypeError(f"wrong type '{type(value)}' should be str")
 
     bytes = value.split(".")
     if len(bytes) != 4:
-        raise ValueError("invalid ip address %s" % value)
+        raise ValueError(f"invalid ip address {value}")
 
     for byte in bytes:
         if not 0 <= int(byte) <= 0xFF:
-            raise ValueError("invalid ip address %s" % value)
+            raise ValueError(f"invalid ip address {value}")
 
     return (
         (int(bytes[0]) << 24)
@@ -285,14 +283,14 @@ def toIpAddress(value):
 
 def toIpNetmask(value):
     if not isinstance(value, str):
-        raise TypeError("wrong type '%s' should be str" % type(value))
+        raise TypeError(f"wrong type '{type(value)}' should be str")
 
     (ip, netmask) = value.split("/")
     ip = toIpAddress(ip)
     netmaskParts = netmask.split(".")
     if len(netmaskParts) == 1:
         if not 0 <= int(netmask) <= 32:
-            raise ValueError("invalid netmask %s" % netmask)
+            raise ValueError(f"invalid netmask {netmask}")
         return (ip, int(netmask))
     elif len(netmaskParts) == 4:
         netmaskNum = toIpAddress(netmask)
@@ -303,19 +301,19 @@ def toIpNetmask(value):
             testVal |= 1 << (31 - i)
             if testVal == netmaskNum:
                 return (ip, i + 1)
-        raise ValueError("invalid netmask %s" % netmask)
+        raise ValueError(f"invalid netmask {netmask}")
     else:
-        raise ValueError("invalid netmask %s" % netmask)
+        raise ValueError(f"invalid netmask {netmask}")
 
 
 def toIpWithPort(value):
     if not isinstance(value, str):
-        raise TypeError("wrong type '%s' should be str" % type(value))
+        raise TypeError(f"wrong type '{type(value)}' should be str")
 
     (ip, port) = value.split(":")
     ip = toIpAddress(ip)
     if not 0 <= int(port) <= 0xFFFF:
-        raise ValueError("invalid port %s" % port)
+        raise ValueError(f"invalid port {port}")
     return (ip, int(port))
 
 
diff --git a/src/python/m5/util/dot_writer.py b/src/python/m5/util/dot_writer.py
index 6d49f4ef79..b491a98448 100644
--- a/src/python/m5/util/dot_writer.py
+++ b/src/python/m5/util/dot_writer.py
@@ -284,7 +284,7 @@ def dot_gen_colour(simNode, isPort=False):
 
 
 def dot_rgb_to_html(r, g, b):
-    return "#%.2x%.2x%.2x" % (int(r), int(g), int(b))
+    return f"#{int(r):02x}{int(g):02x}{int(b):02x}"
 
 
 # We need to create all of the clock domains. We abuse the alpha channel to get
diff --git a/src/python/m5/util/dot_writer_ruby.py b/src/python/m5/util/dot_writer_ruby.py
index e23a1064bc..fa21ae1a01 100644
--- a/src/python/m5/util/dot_writer_ruby.py
+++ b/src/python/m5/util/dot_writer_ruby.py
@@ -46,7 +46,7 @@ except:
 
 
 def _dot_rgb_to_html(r, g, b):
-    return "#%.2x%.2x%.2x" % (r, g, b)
+    return f"#{r:02x}{g:02x}{b:02x}"
 
 
 def _dot_create_router_node(full_path, label):
diff --git a/src/python/m5/util/pybind.py b/src/python/m5/util/pybind.py
index 52d38e5302..54fd111f38 100644
--- a/src/python/m5/util/pybind.py
+++ b/src/python/m5/util/pybind.py
@@ -88,12 +88,9 @@ class PyBindMethod(PyBindExport):
             def get_arg_decl(arg):
                 if isinstance(arg, tuple):
                     name, default = arg
-                    return 'py::arg("%s") = %s' % (
-                        name,
-                        self._conv_arg(default),
-                    )
+                    return f'py::arg("{name}") = {self._conv_arg(default)}'
                 else:
-                    return 'py::arg("%s")' % arg
+                    return f'py::arg("{arg}")'
 
             arguments.extend(list([get_arg_decl(a) for a in self.args]))
         code("." + self.method_def + "(" + ", ".join(arguments) + ")")
diff --git a/src/systemc/tests/tlm/endian_conv/testall.py b/src/systemc/tests/tlm/endian_conv/testall.py
index b9e10ad94d..3bc7f136e9 100644
--- a/src/systemc/tests/tlm/endian_conv/testall.py
+++ b/src/systemc/tests/tlm/endian_conv/testall.py
@@ -545,23 +545,16 @@ for txn in txn_generator(nr_txns_to_test):
         else:
             if memory_state != golden_memory_state:
                 raise FragmenterDifference(
-                    """
-fragmenter: %s
+                    f"""
+fragmenter: {fragmenter}
 transaction:
-%s
+{txn}
 start memory:
-%s
+{initial_memory}
 golden memory:
-%s
+{golden_memory_state}
 actual memory:
-%s"""
-                    % (
-                        fragmenter,
-                        txn,
-                        initial_memory,
-                        golden_memory_state,
-                        memory_state,
-                    )
+{memory_state}"""
                 )
 
     print("."),
diff --git a/src/systemc/tests/verify.py b/src/systemc/tests/verify.py
index 0d8ce3cf75..c0e072e3c2 100755
--- a/src/systemc/tests/verify.py
+++ b/src/systemc/tests/verify.py
@@ -413,11 +413,7 @@ class VerifyPhase(TestPhaseBase):
         total_passed = len(self._passed)
         total_failed = sum(map(len, self._failed.values()))
         print()
-        print(
-            "Passed: {passed:4} - Failed: {failed:4}".format(
-                passed=total_passed, failed=total_failed
-            )
-        )
+        print(f"Passed: {total_passed:4} - Failed: {total_failed:4}")
 
     def write_result_file(self, path):
         results = {
@@ -626,7 +622,7 @@ def collect_phases(args):
     for group in phase_groups[1:]:
         name = group[0]
         if name in names:
-            raise RuntimeException("Phase %s specified more than once" % name)
+            raise RuntimeException(f"Phase {name} specified more than once")
         phase = test_phase_classes[name]
         phases.append(phase(main_args, *group[1:]))
     phases.sort()
@@ -669,10 +665,10 @@ with open(json_path) as f:
 
     if main_args.list:
         for target, props in sorted(filtered_tests.items()):
-            print("%s.%s" % (target, main_args.flavor))
+            print(f"{target}.{main_args.flavor}")
             for key, val in props.items():
-                print("    %s: %s" % (key, val))
-        print("Total tests: %d" % len(filtered_tests))
+                print(f"    {key}: {val}")
+        print(f"Total tests: {len(filtered_tests)}")
     else:
         tests_to_run = list(
             [
diff --git a/tests/configs/dram-lowp.py b/tests/configs/dram-lowp.py
index a2a0ce37b7..25e7cc3087 100644
--- a/tests/configs/dram-lowp.py
+++ b/tests/configs/dram-lowp.py
@@ -52,7 +52,7 @@ def run_test(root):
     argv = [
         sys.argv[0],
         # Add a specific page policy and specify the number of ranks
-        "-p%s" % page_policy,
+        f"-p{page_policy}",
         "-r 2",
     ]
 
diff --git a/tests/gem5/arm-boot-tests/test_linux_boot.py b/tests/gem5/arm-boot-tests/test_linux_boot.py
index 364125691c..d6e8ac94a5 100644
--- a/tests/gem5/arm-boot-tests/test_linux_boot.py
+++ b/tests/gem5/arm-boot-tests/test_linux_boot.py
@@ -66,9 +66,7 @@ arm-boot-test"
     if to_tick:
         name += "_to-tick"
         exit_regex = re.compile(
-            "Exiting @ tick {} because simulate\(\) limit reached".format(
-                str(to_tick)
-            )
+            f"Exiting @ tick {str(to_tick)} because simulate\\(\\) limit reached"
         )
         verifiers.append(verifier.MatchRegex(exit_regex))
         config_args += ["--tick-exit", str(to_tick)]
diff --git a/tests/gem5/configs/arm_boot_exit_run.py b/tests/gem5/configs/arm_boot_exit_run.py
index aea3c4160f..a8ea6eeea7 100644
--- a/tests/gem5/configs/arm_boot_exit_run.py
+++ b/tests/gem5/configs/arm_boot_exit_run.py
@@ -161,9 +161,7 @@ elif args.mem_system == "mi_example":
     cache_hierarchy = MIExampleCacheHierarchy(size="32kB", assoc=4)
 else:
     raise NotImplementedError(
-        "Memory type '{}' is not supported in the boot tests.".format(
-            args.mem_system
-        )
+        f"Memory type '{args.mem_system}' is not supported in the boot tests."
     )
 
 # Setup the system memory.
diff --git a/tests/gem5/configs/boot_kvm_fork_run.py b/tests/gem5/configs/boot_kvm_fork_run.py
index 18f6e9d416..84e273d842 100644
--- a/tests/gem5/configs/boot_kvm_fork_run.py
+++ b/tests/gem5/configs/boot_kvm_fork_run.py
@@ -151,9 +151,7 @@ elif args.mem_system == "classic":
     cache_hierarchy = PrivateL1CacheHierarchy(l1d_size="16kB", l1i_size="16kB")
 else:
     raise NotImplementedError(
-        "Memory system '{}' is not supported in the boot tests.".format(
-            args.mem_system
-        )
+        f"Memory system '{args.mem_system}' is not supported in the boot tests."
     )
 
 assert cache_hierarchy != None
diff --git a/tests/gem5/configs/boot_kvm_switch_exit.py b/tests/gem5/configs/boot_kvm_switch_exit.py
index 25f5808e13..1347e68ba4 100644
--- a/tests/gem5/configs/boot_kvm_switch_exit.py
+++ b/tests/gem5/configs/boot_kvm_switch_exit.py
@@ -137,9 +137,7 @@ elif args.mem_system == "classic":
     cache_hierarchy = PrivateL1CacheHierarchy(l1d_size="16kB", l1i_size="16kB")
 else:
     raise NotImplementedError(
-        "Memory system '{}' is not supported in the boot tests.".format(
-            args.mem_system
-        )
+        f"Memory system '{args.mem_system}' is not supported in the boot tests."
     )
 
 assert cache_hierarchy != None
diff --git a/tests/gem5/configs/checkpoint.py b/tests/gem5/configs/checkpoint.py
index d5d58922a3..f1b8a1bf72 100644
--- a/tests/gem5/configs/checkpoint.py
+++ b/tests/gem5/configs/checkpoint.py
@@ -73,7 +73,7 @@ def _run_step(name, restore=None, interval=0.5):
     elif cause in _exit_normal:
         sys.exit(_exitcode_done)
     else:
-        print("Test failed: Unknown exit cause: %s" % cause)
+        print(f"Test failed: Unknown exit cause: {cause}")
         sys.exit(_exitcode_fail)
 
 
@@ -129,5 +129,5 @@ def run_test(root, interval=0.5, max_checkpoints=5):
     if cause in _exit_normal:
         sys.exit(0)
     else:
-        print("Test failed: Unknown exit cause: %s" % cause)
+        print(f"Test failed: Unknown exit cause: {cause}")
         sys.exit(1)
diff --git a/tests/gem5/configs/parsec_disk_run.py b/tests/gem5/configs/parsec_disk_run.py
index fbe1cd3688..5c2fa75f65 100644
--- a/tests/gem5/configs/parsec_disk_run.py
+++ b/tests/gem5/configs/parsec_disk_run.py
@@ -199,9 +199,9 @@ board = X86Board(
 command = (
     "cd /home/gem5/parsec-benchmark\n"
     + "source env.sh\n"
-    + "parsecmgmt -a run -p {} ".format(args.benchmark)
-    + "-c gcc-hooks -i {} ".format(args.size)
-    + "-n {}\n".format(str(args.num_cpus))
+    + f"parsecmgmt -a run -p {args.benchmark} "
+    + f"-c gcc-hooks -i {args.size} "
+    + f"-n {str(args.num_cpus)}\n"
 )
 
 board.set_kernel_disk_workload(
@@ -247,12 +247,8 @@ print("Done running the simulation")
 print()
 print("Performance statistics:")
 
-print("Simulated time in ROI: {}s".format((roi_ticks[0]) / 1e12))
+print(f"Simulated time in ROI: {roi_ticks[0] / 1000000000000.0}s")
 print(
-    "Ran a total of {} simulated seconds".format(
-        simulator.get_current_tick() / 1e12
-    )
-)
-print(
-    "Total wallclock time: {}s, {} min".format(global_time, (global_time) / 60)
+    f"Ran a total of {simulator.get_current_tick() / 1000000000000.0} simulated seconds"
 )
+print(f"Total wallclock time: {global_time}s, {global_time / 60} min")
diff --git a/tests/gem5/configs/riscv_boot_exit_run.py b/tests/gem5/configs/riscv_boot_exit_run.py
index 4424868112..e9fc06b27b 100644
--- a/tests/gem5/configs/riscv_boot_exit_run.py
+++ b/tests/gem5/configs/riscv_boot_exit_run.py
@@ -144,7 +144,7 @@ elif args.cpu == "minor":
     cpu_type = CPUTypes.MINOR
 else:
     raise NotImplementedError(
-        "CPU type '{}' is not supported in the boot tests.".format(args.cpu)
+        f"CPU type '{args.cpu}' is not supported in the boot tests."
     )
 
 processor = SimpleProcessor(
diff --git a/tests/gem5/configs/switcheroo.py b/tests/gem5/configs/switcheroo.py
index 5f38543c52..72736a9d87 100644
--- a/tests/gem5/configs/switcheroo.py
+++ b/tests/gem5/configs/switcheroo.py
@@ -125,7 +125,7 @@ def run_test(root, switcher=None, freq=1000, verbose=False):
 
             if verbose:
                 print("Switching CPUs...")
-                print("Next CPU: %s" % type(next_cpu))
+                print(f"Next CPU: {type(next_cpu)}")
             m5.drain()
             if current_cpu != next_cpu:
                 m5.switchCpus(
@@ -144,5 +144,5 @@ def run_test(root, switcher=None, freq=1000, verbose=False):
 
             sys.exit(0)
         else:
-            print("Test failed: Unknown exit cause: %s" % exit_cause)
+            print(f"Test failed: Unknown exit cause: {exit_cause}")
             sys.exit(1)
diff --git a/tests/gem5/configs/x86_boot_exit_run.py b/tests/gem5/configs/x86_boot_exit_run.py
index 5458b6db6c..e9eeacefd8 100644
--- a/tests/gem5/configs/x86_boot_exit_run.py
+++ b/tests/gem5/configs/x86_boot_exit_run.py
@@ -152,9 +152,7 @@ elif args.mem_system == "classic":
     cache_hierarchy = PrivateL1CacheHierarchy(l1d_size="16kB", l1i_size="16kB")
 else:
     raise NotImplementedError(
-        "Memory system '{}' is not supported in the boot tests.".format(
-            args.mem_system
-        )
+        f"Memory system '{args.mem_system}' is not supported in the boot tests."
     )
 
 assert cache_hierarchy != None
diff --git a/tests/gem5/cpu_tests/test.py b/tests/gem5/cpu_tests/test.py
index bbdb492c82..4a2a104ea7 100644
--- a/tests/gem5/cpu_tests/test.py
+++ b/tests/gem5/cpu_tests/test.py
@@ -88,10 +88,10 @@ for isa in valid_isas:
 
         for cpu in valid_isas[isa]:
             gem5_verify_config(
-                name="cpu_test_{}_{}".format(cpu, workload),
+                name=f"cpu_test_{cpu}_{workload}",
                 verifiers=verifiers,
                 config=joinpath(getcwd(), "run.py"),
-                config_args=["--cpu={}".format(cpu), binary],
+                config_args=[f"--cpu={cpu}", binary],
                 valid_isas=(constants.all_compiled_tag,),
                 fixtures=[workload_binary],
             )
diff --git a/tests/gem5/fixture.py b/tests/gem5/fixture.py
index 6f5dd616ab..d3312c9a63 100644
--- a/tests/gem5/fixture.py
+++ b/tests/gem5/fixture.py
@@ -172,7 +172,7 @@ class SConsFixture(UniqueFixture):
             log.test_log.message(
                 "Building the following targets. This may take a while."
             )
-            log.test_log.message("%s" % (", ".join(self.targets)))
+            log.test_log.message(f"{', '.join(self.targets)}")
             log.test_log.message(
                 "You may want to use --skip-build, or use 'rerun'."
             )
@@ -188,7 +188,7 @@ class Gem5Fixture(SConsFixture):
         target_dir = joinpath(config.build_dir, isa.upper())
         if protocol:
             target_dir += "_" + protocol
-        target = joinpath(target_dir, "gem5.%s" % variant)
+        target = joinpath(target_dir, f"gem5.{variant}")
         obj = super(Gem5Fixture, cls).__new__(cls, target)
         return obj
 
@@ -207,7 +207,7 @@ class Gem5Fixture(SConsFixture):
 
 class MakeFixture(Fixture):
     def __init__(self, directory, *args, **kwargs):
-        name = "make -C %s" % directory
+        name = f"make -C {directory}"
         super(MakeFixture, self).__init__(
             build_once=True, lazy_init=False, name=name, *args, **kwargs
         )
diff --git a/tests/gem5/kvm-fork-tests/test_kvm_fork_run.py b/tests/gem5/kvm-fork-tests/test_kvm_fork_run.py
index 7467c02763..7dcfc8517c 100644
--- a/tests/gem5/kvm-fork-tests/test_kvm_fork_run.py
+++ b/tests/gem5/kvm-fork-tests/test_kvm_fork_run.py
@@ -46,9 +46,7 @@ def test_kvm_fork_run(cpu: str, num_cpus: int, mem_system: str, length: str):
         # Don't run the tests if KVM is unavailable.
         return
 
-    name = "{}-cpu_{}-cores_{}_kvm-fork-run-test".format(
-        cpu, str(num_cpus), mem_system
-    )
+    name = f"{cpu}-cpu_{str(num_cpus)}-cores_{mem_system}_kvm-fork-run-test"
     verifiers = []
 
     if mem_system == "mesi_two_level":
diff --git a/tests/gem5/kvm-switch-tests/test_kvm_cpu_switch.py b/tests/gem5/kvm-switch-tests/test_kvm_cpu_switch.py
index 222c26b9e2..85e9268e2d 100644
--- a/tests/gem5/kvm-switch-tests/test_kvm_cpu_switch.py
+++ b/tests/gem5/kvm-switch-tests/test_kvm_cpu_switch.py
@@ -46,9 +46,7 @@ def test_kvm_switch(cpu: str, num_cpus: int, mem_system: str, length: str):
         # Don't run the tests if KVM is unavailable.
         return
 
-    name = "{}-cpu_{}-cores_{}_kvm-switch-test".format(
-        cpu, str(num_cpus), mem_system
-    )
+    name = f"{cpu}-cpu_{str(num_cpus)}-cores_{mem_system}_kvm-switch-test"
     verifiers = []
 
     if mem_system == "mesi_two_level":
diff --git a/tests/gem5/replacement-policies/run_replacement_policy_test.py b/tests/gem5/replacement-policies/run_replacement_policy_test.py
index 31076c6d99..ec38bf382f 100644
--- a/tests/gem5/replacement-policies/run_replacement_policy_test.py
+++ b/tests/gem5/replacement-policies/run_replacement_policy_test.py
@@ -91,6 +91,4 @@ m5.instantiate()
 generator.start_traffic()
 print("Beginning simulation!")
 exit_event = m5.simulate()
-print(
-    "Exiting @ tick {} because {}.".format(m5.curTick(), exit_event.getCause())
-)
+print(f"Exiting @ tick {m5.curTick()} because {exit_event.getCause()}.")
diff --git a/tests/gem5/riscv-boot-tests/test_linux_boot.py b/tests/gem5/riscv-boot-tests/test_linux_boot.py
index 5ba4fa5dc0..55e0ae6109 100644
--- a/tests/gem5/riscv-boot-tests/test_linux_boot.py
+++ b/tests/gem5/riscv-boot-tests/test_linux_boot.py
@@ -51,9 +51,7 @@ def test_boot(
 
     verifiers = []
     exit_regex = re.compile(
-        "Exiting @ tick {} because simulate\(\) limit reached".format(
-            str(to_tick)
-        )
+        f"Exiting @ tick {str(to_tick)} because simulate\\(\\) limit reached"
     )
     verifiers.append(verifier.MatchRegex(exit_regex))
 
diff --git a/tests/gem5/suite.py b/tests/gem5/suite.py
index 36532aa9f7..7e0935d9eb 100644
--- a/tests/gem5/suite.py
+++ b/tests/gem5/suite.py
@@ -106,9 +106,7 @@ def gem5_verify_config(
                 )
 
                 # Common name of this generated testcase.
-                _name = "{given_name}-{isa}-{host}-{opt}".format(
-                    given_name=name, isa=isa, host=host, opt=opt
-                )
+                _name = f"{name}-{isa}-{host}-{opt}"
                 if protocol:
                     _name += "-" + protocol
 
diff --git a/tests/gem5/traffic_gen/simple_traffic_run.py b/tests/gem5/traffic_gen/simple_traffic_run.py
index 4e38155070..7c0f18865a 100644
--- a/tests/gem5/traffic_gen/simple_traffic_run.py
+++ b/tests/gem5/traffic_gen/simple_traffic_run.py
@@ -207,9 +207,7 @@ m5.instantiate()
 generator.start_traffic()
 print("Beginning simulation!")
 exit_event = m5.simulate()
-print(
-    "Exiting @ tick {} because {}.".format(m5.curTick(), exit_event.getCause())
-)
+print(f"Exiting @ tick {m5.curTick()} because {exit_event.getCause()}.")
 
 simstats = get_simstat(root, prepare_stats=True)
 json_output = Path(m5.options.outdir) / "output.json"
diff --git a/tests/gem5/verifier.py b/tests/gem5/verifier.py
index 075cec15d2..93d47c8d10 100644
--- a/tests/gem5/verifier.py
+++ b/tests/gem5/verifier.py
@@ -115,8 +115,7 @@ class MatchGoldStandard(Verifier):
         )
         if diff is not None:
             test_util.fail(
-                "Stdout did not match:\n%s\nSee %s for full results"
-                % (diff, tempdir)
+                f"Stdout did not match:\n{diff}\nSee {tempdir} for full results"
             )
 
     def _generic_instance_warning(self, kwargs):
diff --git a/tests/gem5/x86-boot-tests/test_linux_boot.py b/tests/gem5/x86-boot-tests/test_linux_boot.py
index 76d593bd3a..1907aaf0e4 100644
--- a/tests/gem5/x86-boot-tests/test_linux_boot.py
+++ b/tests/gem5/x86-boot-tests/test_linux_boot.py
@@ -54,9 +54,7 @@ def test_boot(
     if to_tick != None:
         name += "_to-tick"
         exit_regex = re.compile(
-            "Exiting @ tick {} because simulate\(\) limit reached".format(
-                str(to_tick)
-            )
+            f"Exiting @ tick {str(to_tick)} because simulate\\(\\) limit reached"
         )
         verifiers.append(verifier.MatchRegex(exit_regex))
         additional_config_args.append("--tick-exit")
diff --git a/tests/run.py b/tests/run.py
index e24d4b3bdd..dde8f70749 100644
--- a/tests/run.py
+++ b/tests/run.py
@@ -56,7 +56,7 @@ def skip_test(reason=""):
     """
 
     if reason:
-        print("Skipping test: %s" % reason)
+        print(f"Skipping test: {reason}")
     sys.exit(2)
 
 
@@ -90,7 +90,7 @@ def require_sim_object(name, fatal=False):
     if has_sim_object(name):
         return
     else:
-        msg = "Test requires the '%s' SimObject." % name
+        msg = f"Test requires the '{name}' SimObject."
         if fatal:
             m5.fatal(msg)
         else:
@@ -113,7 +113,7 @@ def require_file(path, fatal=False, mode=os.F_OK):
     if os.access(path, mode):
         return
     else:
-        msg = "Test requires '%s'" % path
+        msg = f"Test requires '{path}'"
         if not os.path.exists(path):
             msg += " which does not exist."
         else:
diff --git a/util/checkpoint-tester.py b/util/checkpoint-tester.py
index 6bc636ac18..1e4024b858 100755
--- a/util/checkpoint-tester.py
+++ b/util/checkpoint-tester.py
@@ -142,8 +142,8 @@ for i in range(1, len(cpts)):
             "-ru",
             "-I",
             "^##.*",
-            "%s/%s" % (cptdir, cpt_name),
-            "%s/%s" % (mydir, cpt_name),
+            f"{cptdir}/{cpt_name}",
+            f"{mydir}/{cpt_name}",
         ],
         stdout=diffout,
     )
diff --git a/util/cpt_upgrader.py b/util/cpt_upgrader.py
index 06f98d8a74..a852294fbc 100755
--- a/util/cpt_upgrader.py
+++ b/util/cpt_upgrader.py
@@ -102,7 +102,7 @@ class Upgrader:
             self.depends = [self.depends]
 
         if not isinstance(self.depends, list):
-            print("Error: 'depends' for {} is the wrong type".format(self.tag))
+            print(f"Error: 'depends' for {self.tag} is the wrong type")
             sys.exit(1)
 
         if hasattr(self, "fwd_depends"):
@@ -112,37 +112,25 @@ class Upgrader:
             self.fwd_depends = []
 
         if not isinstance(self.fwd_depends, list):
-            print(
-                "Error: 'fwd_depends' for {} is the wrong type".format(
-                    self.tag
-                )
-            )
+            print(f"Error: 'fwd_depends' for {self.tag} is the wrong type")
             sys.exit(1)
 
         if hasattr(self, "upgrader"):
             if not isinstance(self.upgrader, types.FunctionType):
                 print(
-                    "Error: 'upgrader' for {} is {}, not function".format(
-                        self.tag, type(self)
-                    )
+                    f"Error: 'upgrader' for {self.tag} is {type(self)}, not function"
                 )
                 sys.exit(1)
             Upgrader.tag_set.add(self.tag)
         elif hasattr(self, "downgrader"):
             if not isinstance(self.downgrader, types.FunctionType):
                 print(
-                    "Error: 'downgrader' for {} is {}, not function".format(
-                        self.tag, type(self)
-                    )
+                    f"Error: 'downgrader' for {self.tag} is {type(self)}, not function"
                 )
                 sys.exit(1)
             Upgrader.untag_set.add(self.tag)
         else:
-            print(
-                "Error: no upgrader or downgrader method for {}".format(
-                    self.tag
-                )
-            )
+            print(f"Error: no upgrader or downgrader method for {self.tag}")
             sys.exit(1)
 
         if hasattr(self, "legacy_version"):
@@ -196,8 +184,7 @@ class Upgrader:
             for dep in upg.depends:
                 if dep not in Upgrader.by_tag:
                     print(
-                        "Error: '{}' cannot depend on "
-                        "nonexistent tag '{}'".format(tag, dep)
+                        f"Error: '{tag}' cannot depend on nonexistent tag '{dep}'"
                     )
                     sys.exit(1)
 
@@ -208,7 +195,7 @@ def process_file(path, **kwargs):
 
         raise IOError(errno.ENOENT, "No such file", path)
 
-    verboseprint("Processing file %s...." % path)
+    verboseprint(f"Processing file {path}....")
 
     if kwargs.get("backup", True):
         import shutil
@@ -337,7 +324,7 @@ if __name__ == "__main__":
         print()
         print("std::set<std::string> version_tags = {")
         for tag in sorted(Upgrader.tag_set):
-            print('  "{}",'.format(tag))
+            print(f'  "{tag}",')
         print("};")
         print()
         print("} // namespace gem5")
@@ -369,7 +356,7 @@ if __name__ == "__main__":
         elif osp.isfile(cpt_file):
             process_file(cpt_file, **vars(args))
         else:
-            print("Error: checkpoint file not found in {} ".format(path))
+            print(f"Error: checkpoint file not found in {path} ")
             print("and recurse not specified")
             sys.exit(1)
     sys.exit(0)
diff --git a/util/cpt_upgraders/arm-hdlcd-upgrade.py b/util/cpt_upgraders/arm-hdlcd-upgrade.py
index bbd2b9c79e..96d6368718 100644
--- a/util/cpt_upgraders/arm-hdlcd-upgrade.py
+++ b/util/cpt_upgraders/arm-hdlcd-upgrade.py
@@ -82,7 +82,7 @@ def upgrader(cpt):
             # Create a DMA engine section. The LCD controller will
             # initialize the DMA it after the next VSync, so we don't
             # care about the actual values
-            sec_dma = "%s.dmaEngine" % sec
+            sec_dma = f"{sec}.dmaEngine"
             cpt.add_section(sec_dma)
             cpt.set(sec_dma, "nextLineAddr", "0")
             cpt.set(sec_dma, "frameEnd", "0")
diff --git a/util/cpt_upgraders/etherswitch.py b/util/cpt_upgraders/etherswitch.py
index e10fa3601c..6f2cc74c8c 100644
--- a/util/cpt_upgraders/etherswitch.py
+++ b/util/cpt_upgraders/etherswitch.py
@@ -11,7 +11,7 @@ def upgrader(cpt):
                 if "outputFifo" in new_sec_name:
                     new_sec_name = new_sec_name.rstrip("outputFifo")
                     new_sec_name += ".outputFifo"
-                    new_sec_name = "system.system.%s" % (new_sec_name)
+                    new_sec_name = f"system.system.{new_sec_name}"
                     if not cpt.has_section(new_sec_name):
                         cpt.add_section(new_sec_name)
                     if old_opt_name == "size":
diff --git a/util/cpt_upgraders/isa-is-simobject.py b/util/cpt_upgraders/isa-is-simobject.py
index 077d4d98bf..0fd33f733e 100644
--- a/util/cpt_upgraders/isa-is-simobject.py
+++ b/util/cpt_upgraders/isa-is-simobject.py
@@ -79,7 +79,7 @@ def upgrader(cpt):
                 if key in isa_fields:
                     isa_section.append((key, value))
 
-        name = "%s.isa" % re_cpu_match.group(1)
+        name = f"{re_cpu_match.group(1)}.isa"
         isa_sections.append((name, isa_section))
 
         for (key, value) in isa_section:
diff --git a/util/cpt_upgraders/process-fdmap-rename.py b/util/cpt_upgraders/process-fdmap-rename.py
index dfd23bd5e0..3b8776af31 100644
--- a/util/cpt_upgraders/process-fdmap-rename.py
+++ b/util/cpt_upgraders/process-fdmap-rename.py
@@ -15,7 +15,7 @@ def upgrader(cpt):
     for sec in cpt.sections():
         fdm = "FdMap"
         fde = "FDEntry"
-        if re.match(".*\.%s.*" % fdm, sec):
+        if re.match(f".*\\.{fdm}.*", sec):
             rename = re.sub(fdm, fde, sec)
             split = re.split(fde, rename)
 
@@ -26,7 +26,7 @@ def upgrader(cpt):
             # add in entries 257 to 1023
             if split[1] == "0":
                 for x in range(257, 1024):
-                    seq = (split[0], fde, "%s" % x)
+                    seq = (split[0], fde, f"{x}")
                     section = "".join(seq)
                     cpt.add_section(section)
                     cpt.set(section, "fd", "-1")
diff --git a/util/decode_inst_dep_trace.py b/util/decode_inst_dep_trace.py
index 2a43f52d54..ded0051ae1 100755
--- a/util/decode_inst_dep_trace.py
+++ b/util/decode_inst_dep_trace.py
@@ -164,20 +164,20 @@ def main():
         num_packets += 1
 
         # Write to file the seq num
-        ascii_out.write("%s" % (packet.seq_num))
+        ascii_out.write(f"{packet.seq_num}")
         # Write to file the pc of the instruction, default is 0
         if packet.HasField("pc"):
-            ascii_out.write(",%s" % (packet.pc))
+            ascii_out.write(f",{packet.pc}")
         else:
             ascii_out.write(",0")
         # Write to file the weight, default is 1
         if packet.HasField("weight"):
-            ascii_out.write(",%s" % (packet.weight))
+            ascii_out.write(f",{packet.weight}")
         else:
             ascii_out.write(",1")
         # Write to file the type of the record
         try:
-            ascii_out.write(",%s" % enumNames[packet.type])
+            ascii_out.write(f",{enumNames[packet.type]}")
         except KeyError:
             print(
                 "Seq. num", packet.seq_num, "has unsupported type", packet.type
@@ -187,21 +187,21 @@ def main():
         # Write to file if it has the optional fields physical addr, size,
         # flags
         if packet.HasField("p_addr"):
-            ascii_out.write(",%s" % (packet.p_addr))
+            ascii_out.write(f",{packet.p_addr}")
         if packet.HasField("size"):
-            ascii_out.write(",%s" % (packet.size))
+            ascii_out.write(f",{packet.size}")
         if packet.HasField("flags"):
-            ascii_out.write(",%s" % (packet.flags))
+            ascii_out.write(f",{packet.flags}")
 
         # Write to file the comp delay
-        ascii_out.write(",%s" % (packet.comp_delay))
+        ascii_out.write(f",{packet.comp_delay}")
 
         # Write to file the repeated field order dependency
         ascii_out.write(":")
         if packet.rob_dep:
             num_robdeps += 1
             for dep in packet.rob_dep:
-                ascii_out.write(",%s" % dep)
+                ascii_out.write(f",{dep}")
         # Write to file the repeated field register dependency
         ascii_out.write(":")
         if packet.reg_dep:
@@ -209,7 +209,7 @@ def main():
                 1  # No. of packets with atleast 1 register dependency
             )
             for dep in packet.reg_dep:
-                ascii_out.write(",%s" % dep)
+                ascii_out.write(f",{dep}")
         # New line
         ascii_out.write("\n")
 
diff --git a/util/decode_packet_trace.py b/util/decode_packet_trace.py
index 798a824ecb..66a74c6f01 100755
--- a/util/decode_packet_trace.py
+++ b/util/decode_packet_trace.py
@@ -93,18 +93,15 @@ def main():
         # ReadReq is 1 and WriteReq is 4 in src/mem/packet.hh Command enum
         cmd = "r" if packet.cmd == 1 else ("w" if packet.cmd == 4 else "u")
         if packet.HasField("pkt_id"):
-            ascii_out.write("%s," % (packet.pkt_id))
+            ascii_out.write(f"{packet.pkt_id},")
         if packet.HasField("flags"):
             ascii_out.write(
-                "%s,%s,%s,%s,%s"
-                % (cmd, packet.addr, packet.size, packet.flags, packet.tick)
+                f"{cmd},{packet.addr},{packet.size},{packet.flags},{packet.tick}"
             )
         else:
-            ascii_out.write(
-                "%s,%s,%s,%s" % (cmd, packet.addr, packet.size, packet.tick)
-            )
+            ascii_out.write(f"{cmd},{packet.addr},{packet.size},{packet.tick}")
         if packet.HasField("pc"):
-            ascii_out.write(",%s\n" % (packet.pc))
+            ascii_out.write(f",{packet.pc}\n")
         else:
             ascii_out.write("\n")
 
diff --git a/util/find_copyrights.py b/util/find_copyrights.py
index 6fbb10cb61..0bd0ef3a51 100644
--- a/util/find_copyrights.py
+++ b/util/find_copyrights.py
@@ -103,7 +103,7 @@ def find_copyright_block(lines, lang_type):
                 return
 
     else:
-        raise AttributeError("Could not handle language %s" % lang_type)
+        raise AttributeError(f"Could not handle language {lang_type}")
 
 
 date_range_re = re.compile(r"([0-9]{4})\s*-\s*([0-9]{4})")
@@ -254,7 +254,7 @@ if __name__ == "__main__":
         elif os.path.isdir(base):
             files += find_files(base)
         else:
-            raise AttributeError("can't access '%s'" % base)
+            raise AttributeError(f"can't access '{base}'")
 
     copyrights = {}
     counts = {}
@@ -273,8 +273,8 @@ if __name__ == "__main__":
         except Exception as e:
             if verbose:
                 if len(e.args) == 1:
-                    e.args = ("%s (%s))" % (e, filename),)
-                print("could not parse %s: %s" % (filename, e))
+                    e.args = (f"{e} ({filename}))",)
+                print(f"could not parse {filename}: {e}")
             continue
 
         for owner, dates, authors, start, end in data:
@@ -290,5 +290,5 @@ if __name__ == "__main__":
 
     for count, dates, owner in sorted(info, reverse=True):
         if show_counts:
-            owner = "%s (%s files)" % (owner, count)
-        print("Copyright (c) %s %s" % (datestr(dates), owner))
+            owner = f"{owner} ({count} files)"
+        print(f"Copyright (c) {datestr(dates)} {owner}")
diff --git a/util/gem5art/artifact/gem5art/artifact/_artifactdb.py b/util/gem5art/artifact/gem5art/artifact/_artifactdb.py
index c1b9a69f5b..16d35e86e8 100644
--- a/util/gem5art/artifact/gem5art/artifact/_artifactdb.py
+++ b/util/gem5art/artifact/gem5art/artifact/_artifactdb.py
@@ -209,7 +209,7 @@ class ArtifactMongoDB(ArtifactDB):
         some type and a regex name."""
 
         data = self.artifacts.find(
-            {"type": typ, "name": {"$regex": "{}".format(name)}}, limit=limit
+            {"type": typ, "name": {"$regex": f"{name}"}}, limit=limit
         )
         for d in data:
             yield d
diff --git a/util/gem5art/artifact/gem5art/artifact/artifact.py b/util/gem5art/artifact/gem5art/artifact/artifact.py
index 46664e82fb..b71369c689 100644
--- a/util/gem5art/artifact/gem5art/artifact/artifact.py
+++ b/util/gem5art/artifact/gem5art/artifact/artifact.py
@@ -76,9 +76,9 @@ def getGit(path: Path) -> Dict[str, str]:
     ]
     res = subprocess.run(command, stdout=subprocess.PIPE, cwd=path)
     if res.returncode != 0:
-        raise Exception("git repo doesn't exist for {}".format(path))
+        raise Exception(f"git repo doesn't exist for {path}")
     if res.stdout:
-        raise Exception("git repo dirty for {}".format(path))
+        raise Exception(f"git repo dirty for {path}")
 
     command = ["git", "remote", "get-url", "origin"]
     origin = subprocess.check_output(command, cwd=path)
@@ -203,14 +203,14 @@ class Artifact:
             data["git"] = getGit(ppath)
             data["hash"] = data["git"]["hash"]
         else:
-            raise Exception("Path {} doesn't exist".format(ppath))
+            raise Exception(f"Path {ppath} doesn't exist")
 
         pcwd = Path(cwd)
         data["cwd"] = pcwd
         if not pcwd.exists():
-            raise Exception("cwd {} doesn't exist.".format(pcwd))
+            raise Exception(f"cwd {pcwd} doesn't exist.")
         if not pcwd.is_dir():
-            raise Exception("cwd {} is not a directory".format(pcwd))
+            raise Exception(f"cwd {pcwd} is not a directory")
 
         data["inputs"] = [i._id for i in inputs]
 
diff --git a/util/gem5art/run/gem5art/run.py b/util/gem5art/run/gem5art/run.py
index 89bd4637c4..12e4b3e208 100644
--- a/util/gem5art/run/gem5art/run.py
+++ b/util/gem5art/run/gem5art/run.py
@@ -316,7 +316,7 @@ class gem5Run:
         try:
             return cls.loadFromDict(d)
         except KeyError:
-            print("Incompatible json file: {}!".format(filename))
+            print(f"Incompatible json file: {filename}!")
             raise
 
     @classmethod
@@ -521,7 +521,7 @@ class gem5Run:
             # Check again in five seconds
             time.sleep(5)
 
-        print("Done running {}".format(" ".join(self.command)))
+        print(f"Done running {' '.join(self.command)}")
 
         # Done executing
         self.running = False
@@ -540,7 +540,7 @@ class gem5Run:
         # Store current gem5 run in the database
         db.put(self._id, self._getSerializable())
 
-        print("Done storing the results of {}".format(" ".join(self.command)))
+        print(f"Done storing the results of {' '.join(self.command)}")
 
     def run(self, task: Any = None, cwd: str = ".") -> None:
         """Actually run the test.
diff --git a/util/gem5art/run/tests/test_run.py b/util/gem5art/run/tests/test_run.py
index 1710dbc706..0bdd561220 100644
--- a/util/gem5art/run/tests/test_run.py
+++ b/util/gem5art/run/tests/test_run.py
@@ -112,7 +112,7 @@ class TestSERun(unittest.TestCase):
             [
                 "gem5/build/X86/gem5.opt",
                 "-re",
-                "--outdir={}".format(os.path.abspath("results/run_test/out")),
+                f"--outdir={os.path.abspath('results/run_test/out')}",
                 "configs-tests/run_test.py",
                 "extra",
                 "params",
diff --git a/util/gem5img.py b/util/gem5img.py
index 9b32b6c1a5..8eb0965c9e 100755
--- a/util/gem5img.py
+++ b/util/gem5img.py
@@ -135,7 +135,7 @@ def findProg(program, cleanupDev=None):
     if returncode != 0:
         if cleanupDev:
             cleanupDev.destroy()
-        exit("Unable to find program %s, check your PATH variable." % program)
+        exit(f"Unable to find program {program}, check your PATH variable.")
     return out.strip()
 
 
@@ -197,7 +197,7 @@ def findPartOffset(devFile, fileName, partition):
     else:
         # No partition description was found
         print("No partition description was found in sfdisk output:")
-        print("\n".join("  {}".format(line.rstrip()) for line in lines))
+        print("\n".join(f"  {line.rstrip()}" for line in lines))
         print("Could not determine size of first partition.")
         exit(1)
 
@@ -242,7 +242,7 @@ class Command(object):
         posUsage = ""
         for posArg in posArgs:
             (argName, argDesc) = posArg
-            usage += " %s" % argName
+            usage += f" {argName}"
             posUsage += "\n  %s: %s" % posArg
         usage += posUsage
         self.parser = ArgumentParser(usage=usage, description=description)
@@ -266,7 +266,7 @@ class Command(object):
 
     def runCom(self):
         if not self.func:
-            exit("Unimplemented command %s!" % self.name)
+            exit(f"Unimplemented command {self.name}!")
         self.func(self.options, self.args)
 
 
@@ -300,7 +300,7 @@ mountCom = Command(
 def mountComFunc(options, args):
     (path, mountPoint) = args
     if not os.path.isdir(mountPoint):
-        print("Mount point %s is not a directory." % mountPoint)
+        print(f"Mount point {mountPoint} is not a directory.")
 
     dev = LoopbackDevice()
     if dev.setup(path, offset=True) != 0:
@@ -324,12 +324,12 @@ umountCom = Command(
 def umountComFunc(options, args):
     (mountPoint,) = args
     if not os.path.isdir(mountPoint):
-        print("Mount point %s is not a directory." % mountPoint)
+        print(f"Mount point {mountPoint} is not a directory.")
         exit(1)
 
     dev = mountPointToDev(mountPoint)
     if not dev:
-        print("Unable to find mount information for %s." % mountPoint)
+        print(f"Unable to find mount information for {mountPoint}.")
 
     # Unmount the loopback device.
     if runPriv([findProg("umount"), mountPoint]) != 0:
@@ -424,7 +424,7 @@ formatCom.addArgument(
 
 
 def formatImage(dev, fsType):
-    return runPriv([findProg("mkfs.%s" % fsType, dev), str(dev)])
+    return runPriv([findProg(f"mkfs.{fsType}", dev), str(dev)])
 
 
 def formatComFunc(options, args):
@@ -474,7 +474,7 @@ if len(argv) < 2 or argv[1] not in commands:
     print("where [command] is one of ")
     for name in commandOrder:
         command = commands[name]
-        print("    %s: %s" % (command.name, command.description))
+        print(f"    {command.name}: {command.description}")
     print("Watch for orphaned loopback devices and delete them with")
     print("losetup -d. Mounted images will belong to root, so you may need")
     print("to use sudo to modify their contents.")
diff --git a/util/gen_arm_fs_files.py b/util/gen_arm_fs_files.py
index 548abe819f..6446d79bfb 100755
--- a/util/gen_arm_fs_files.py
+++ b/util/gen_arm_fs_files.py
@@ -49,7 +49,7 @@ import os
 
 
 def run_cmd(explanation, working_dir, cmd, stdout=None):
-    print("Running phase '%s'" % explanation)
+    print(f"Running phase '{explanation}'")
     sys.stdout.flush()
 
     # some of the commands need $PWD to be properly set
@@ -296,11 +296,11 @@ def xen():
         [
             "./configure",
             "--host=aarch64-linux-gnu",
-            "--with-kernel-dir={}".format(linux_dir),
-            "--with-dtb={}".format(dtb_bin),
-            "--with-cmdline='{}'".format(linux_cmdline),
-            "--with-xen-cmdline='{}'".format(xen_cmdline),
-            "--with-xen={}".format(os.path.join(xen_dir, "xen", "xen")),
+            f"--with-kernel-dir={linux_dir}",
+            f"--with-dtb={dtb_bin}",
+            f"--with-cmdline='{linux_cmdline}'",
+            f"--with-xen-cmdline='{xen_cmdline}'",
+            f"--with-xen={os.path.join(xen_dir, 'xen', 'xen')}",
             "--enable-psci",
             "--enable-gicv3",
         ],
@@ -372,11 +372,11 @@ parser.add_argument(
 args = parser.parse_args()
 
 if not os.path.isdir(args.dest_dir):
-    print("Error: %s is not a directory." % args.dest_dir)
+    print(f"Error: {args.dest_dir} is not a directory.")
     sys.exit(1)
 
 if not os.path.isdir(args.gem5_dir):
-    print("Error: %s is not a directory." % args.gem5_dir)
+    print(f"Error: {args.gem5_dir} is not a directory.")
     sys.exit(1)
 
 if machine() != "x86_64":
@@ -386,13 +386,13 @@ if machine() != "x86_64":
 binaries_dir = args.dest_dir + "/binaries"
 
 if os.path.exists(binaries_dir):
-    print("Error: %s already exists." % binaries_dir)
+    print(f"Error: {binaries_dir} already exists.")
     sys.exit(1)
 
 revisions_dir = args.dest_dir + "/revisions"
 
 if os.path.exists(revisions_dir):
-    print("Error: %s already exists." % revisions_dir)
+    print(f"Error: {revisions_dir} already exists.")
     sys.exit(1)
 
 os.mkdir(binaries_dir)
@@ -413,6 +413,6 @@ binaries = args.fs_binaries if args.fs_binaries else list(all_binaries.keys())
 for fs_binary in binaries:
     all_binaries[fs_binary]()
 
-print("Done! All the generated files can be found in %s" % binaries_dir)
+print(f"Done! All the generated files can be found in {binaries_dir}")
 
 sys.exit(0)
diff --git a/util/git-pre-commit.py b/util/git-pre-commit.py
index 766013fe3c..c6f3b3c033 100755
--- a/util/git-pre-commit.py
+++ b/util/git-pre-commit.py
@@ -67,7 +67,7 @@ staged_mismatch = set()
 
 for status, fname in git.status(filter="MA", cached=True):
     if args.verbose:
-        print("Checking {}...".format(fname))
+        print(f"Checking {fname}...")
     if check_ignores(fname):
         continue
     if status == "M":
@@ -112,7 +112,7 @@ if failing_files:
         print("Style checker failed for the following files:", file=sys.stderr)
         for f in failing_files:
             if f not in staged_mismatch:
-                print("\t{}".format(f), file=sys.stderr)
+                print(f"\t{f}", file=sys.stderr)
         print("\n", file=sys.stderr)
         print(
             "Please run the style checker manually to fix "
@@ -130,6 +130,6 @@ if failing_files:
             file=sys.stderr,
         )
         for f in staged_mismatch:
-            print("\t{}".format(f), file=sys.stderr)
+            print(f"\t{f}", file=sys.stderr)
         print("Please `git --add' them", file=sys.stderr)
     sys.exit(1)
diff --git a/util/maint/list_changes.py b/util/maint/list_changes.py
index 87e4ea2d20..0d61e39fde 100755
--- a/util/maint/list_changes.py
+++ b/util/maint/list_changes.py
@@ -104,7 +104,7 @@ class Commit(object):
         return cids[0]
 
     def __str__(self):
-        return "%s: %s" % (self.rev[0:8], self.log[0])
+        return f"{self.rev[0:8]}: {self.log[0]}"
 
 
 def list_revs(branch, baseline=None, paths=[]):
@@ -117,7 +117,7 @@ def list_revs(branch, baseline=None, paths=[]):
     """
 
     if baseline is not None:
-        query = "%s..%s" % (branch, baseline)
+        query = f"{branch}..{baseline}"
     else:
         query = str(branch)
 
diff --git a/util/maint/show_changes_by_file.py b/util/maint/show_changes_by_file.py
index d5055c1ff4..75b7e7edd9 100755
--- a/util/maint/show_changes_by_file.py
+++ b/util/maint/show_changes_by_file.py
@@ -51,7 +51,7 @@ def diff_files(upstream, feature, paths=[]):
     """
 
     raw = subprocess.check_output(
-        ["git", "diff", "--name-status", "%s..%s" % (upstream, feature), "--"]
+        ["git", "diff", "--name-status", f"{upstream}..{feature}", "--"]
         + paths
     )
 
@@ -61,7 +61,7 @@ def diff_files(upstream, feature, paths=[]):
     for p in path:
         direc = subprocess.check_output(["dirname", p]).strip() + "/"
         filename = subprocess.check_output(["basename", p]).strip()
-        odd[direc].append("%s" % filename)
+        odd[direc].append(f"{filename}")
 
     return odd
 
@@ -76,7 +76,7 @@ def cl_hash(upstream, feature, path):
     """
 
     raw = subprocess.check_output(
-        ["git", "log", "--oneline", "%s..%s" % (upstream, feature), "--", path]
+        ["git", "log", "--oneline", f"{upstream}..{feature}", "--", path]
     )
 
     return [l.split()[0] for l in raw.splitlines()]
@@ -119,11 +119,11 @@ def _main():
     for key, value in odd.items():
         print(key)
         for entry in value:
-            print("    %s" % entry)
+            print(f"    {entry}")
             path = key + entry
             sha = cl_hash(args.upstream, args.feature, path)
             for s in sha:
-                print("\t%s" % s)
+                print(f"\t{s}")
         print()
 
 
diff --git a/util/minorview/model.py b/util/minorview/model.py
index 86f49a3da0..126b730bea 100644
--- a/util/minorview/model.py
+++ b/util/minorview/model.py
@@ -529,12 +529,12 @@ class Inst(IdedObj):
 
     def table_line(self):
         if self.nextAddr is not None:
-            addrStr = "0x%x->0x%x" % (self.addr, self.nextAddr)
+            addrStr = f"0x{self.addr:x}->0x{self.nextAddr:x}"
         else:
-            addrStr = "0x%x" % self.addr
+            addrStr = f"0x{self.addr:x}"
         ret = [addrStr, self.disassembly]
         for name, value in self.pairs.items():
-            ret.append("%s=%s" % (name, str(value)))
+            ret.append(f"{name}={str(value)}")
         return ret
 
 
@@ -547,7 +547,7 @@ class InstFault(IdedObj):
         self.addr = addr
 
     def table_line(self):
-        ret = ["0x%x" % self.addr, self.fault]
+        ret = [f"0x{self.addr:x}", self.fault]
         for name, value in self.pairs:
             ret.append("%s=%s", name, str(value))
         return ret
@@ -563,7 +563,7 @@ class Line(IdedObj):
         self.size = size
 
     def table_line(self):
-        ret = ["0x%x/0x%x" % (self.vaddr, self.paddr), "%d" % self.size]
+        ret = [f"0x{self.vaddr:x}/0x{self.paddr:x}", "%d" % self.size]
         for name, value in self.pairs:
             ret.append("%s=%s", name, str(value))
         return ret
@@ -578,7 +578,7 @@ class LineFault(IdedObj):
         self.fault = fault
 
     def table_line(self):
-        ret = ["0x%x" % self.vaddr, self.fault]
+        ret = [f"0x{self.vaddr:x}", self.fault]
         for name, value in self.pairs:
             ret.append("%s=%s", name, str(value))
         return ret
diff --git a/util/minorview/point.py b/util/minorview/point.py
index 5df990998f..17190e1ca7 100644
--- a/util/minorview/point.py
+++ b/util/minorview/point.py
@@ -60,10 +60,10 @@ class Point(object):
         return (self.x, self.y)
 
     def __str__(self):
-        return "Point(%f,%f)" % (self.x, self.y)
+        return f"Point({self.x:f},{self.y:f})"
 
     def __repr__(self):
-        return "Point(%f,%f)" % (self.x, self.y)
+        return f"Point({self.x:f},{self.y:f})"
 
     def is_within_box(self, box):
         """Is this point inside the (centre, size) box box"""
diff --git a/util/o3-pipeview.py b/util/o3-pipeview.py
index 18f66129c8..fe49706dad 100755
--- a/util/o3-pipeview.py
+++ b/util/o3-pipeview.py
@@ -425,7 +425,7 @@ def print_inst(
                 )
             )
             if timestamps:
-                outfile.write("  f=%s, r=%s" % (inst["fetch"], inst["retire"]))
+                outfile.write(f"  f={inst['fetch']}, r={inst['retire']}")
             outfile.write("\n")
         else:
             outfile.write("...".center(12) + "\n")
@@ -522,7 +522,7 @@ def main():
                 args.timestamps,
                 args.only_committed,
                 args.store_completions,
-                *(tick_range + inst_range)
+                *(tick_range + inst_range),
             )
     print("done!")
 
diff --git a/util/on-chip-network-power-area.py b/util/on-chip-network-power-area.py
index 6a576bbdb3..61a316a99c 100644
--- a/util/on-chip-network-power-area.py
+++ b/util/on-chip-network-power-area.py
@@ -40,7 +40,7 @@ if not os.path.exists(build_dir):
     os.makedirs(build_dir)
 os.chdir(build_dir)
 
-error = call(["cmake", "../../../%s" % src_dir])
+error = call(["cmake", f"../../../{src_dir}"])
 if error:
     print("Failed to run cmake")
     exit(-1)
@@ -154,18 +154,18 @@ def computeRouterPowerAndArea(
         ni_flit_size_bits,
     )
 
-    print("%s Power: " % router, power)
+    print(f"{router} Power: ", power)
 
 
 ## Compute the power consumed by the given link
 def computeLinkPower(link, stats_file, config, sim_seconds):
     frequency = getClock(link + ".nls0", config)
     power = dsent.computeLinkPower(frequency)
-    print("%s.nls0 Power: " % link, power)
+    print(f"{link}.nls0 Power: ", power)
 
     frequency = getClock(link + ".nls1", config)
     power = dsent.computeLinkPower(frequency)
-    print("%s.nls1 Power: " % link, power)
+    print(f"{link}.nls1 Power: ", power)
 
 
 def parseStats(
@@ -269,10 +269,10 @@ def main():
         routers,
         int_links,
         ext_links,
-    ) = parseConfig("%s/%s/config.ini" % (sys.argv[1], sys.argv[2]))
+    ) = parseConfig(f"{sys.argv[1]}/{sys.argv[2]}/config.ini")
 
     parseStats(
-        "%s/%s/stats.txt" % (sys.argv[1], sys.argv[2]),
+        f"{sys.argv[1]}/{sys.argv[2]}/stats.txt",
         config,
         sys.argv[3],
         sys.argv[4],
diff --git a/util/oprofile-top.py b/util/oprofile-top.py
index b71c629f4e..4d5a693451 100755
--- a/util/oprofile-top.py
+++ b/util/oprofile-top.py
@@ -45,7 +45,7 @@ def category(app, sym):
     for regexp, cat in categories_re:
         if regexp.match(name):
             return cat
-    print("no match for symbol %s" % name)
+    print(f"no match for symbol {name}")
     return "other"
 
 
@@ -99,4 +99,4 @@ if showidle:
 
 for d in cats:
     if d in prof:
-        print("%s -- %5.1f%% " % (d, 100 * float(prof[d]) / float(total)))
+        print(f"{d} -- {100 * float(prof[d]) / float(total):5.1f}% ")
diff --git a/util/plot_dram/PlotPowerStates.py b/util/plot_dram/PlotPowerStates.py
index 7f0b326812..b476a24da1 100755
--- a/util/plot_dram/PlotPowerStates.py
+++ b/util/plot_dram/PlotPowerStates.py
@@ -295,7 +295,7 @@ def plotStackedStates(delay, states_list, bottom_state, plot_name, ylabel_str):
                 for prev_sum, new_s in zip(time_sum, l_states[state])
             ]
 
-        ax[sub_idx].set_title("Bank util %s" % bank_util)
+        ax[sub_idx].set_title(f"Bank util {bank_util}")
         ax[sub_idx].xaxis.set_ticks(ind + width / 2.0)
         ax[sub_idx].xaxis.set_ticklabels(seqBytesValues, rotation=45)
         ax[sub_idx].set_xlabel("Seq. bytes")
diff --git a/util/plot_dram/dram_sweep_plot.py b/util/plot_dram/dram_sweep_plot.py
index ad7bc5e3b6..1350f7af77 100755
--- a/util/plot_dram/dram_sweep_plot.py
+++ b/util/plot_dram/dram_sweep_plot.py
@@ -147,7 +147,7 @@ def main():
             # avg_pwr is in mW, peak_bw in MiByte/s, bus_util in percent
             z.append(avg_pwr[j] / (bus_util[j] / 100.0 * peak_bw[j] / 1000.0))
         else:
-            print("Unexpected mode %s" % mode)
+            print(f"Unexpected mode {mode}")
             exit(-1)
 
         i += 1
diff --git a/util/plot_dram/lowp_dram_sweep_plot.py b/util/plot_dram/lowp_dram_sweep_plot.py
index 03a47f9abd..dedd1e0c0d 100755
--- a/util/plot_dram/lowp_dram_sweep_plot.py
+++ b/util/plot_dram/lowp_dram_sweep_plot.py
@@ -76,7 +76,7 @@ parser.add_argument("--pdf", action="store_true", help="output Latex and pdf")
 def main():
     args = parser.parse_args()
     if not os.path.isfile(args.statsfile):
-        exit("Error! File not found: %s" % args.statsfile)
+        exit(f"Error! File not found: {args.statsfile}")
     if not os.path.isdir(args.outdir):
         os.mkdir(args.outdir)
 
diff --git a/util/streamline/m5stats2streamline.py b/util/streamline/m5stats2streamline.py
index f8bc9bc987..8dc72bf0f9 100755
--- a/util/streamline/m5stats2streamline.py
+++ b/util/streamline/m5stats2streamline.py
@@ -383,7 +383,7 @@ def timestampList(x):
 
 def writeBinary(outfile, binary_list):
     for i in binary_list:
-        outfile.write("%c" % i)
+        outfile.write(f"{i:c}")
 
 
 ############################################################
@@ -751,7 +751,7 @@ def parseProcessInfo(task_file):
             if len(unified_event_list) == num_events:
                 print("Truncating at", num_events, "events!")
                 break
-    print("Found %d events." % len(unified_event_list))
+    print(f"Found {len(unified_event_list)} events.")
 
     for process in process_list:
         if process.pid > 9990:  # fix up framebuffer ticks
@@ -1012,8 +1012,7 @@ def readGem5Stats(stats, gem5_stats_file):
                 sim_freq = int(m.group(1))  # ticks in 1 sec
                 ticks_in_ns = int(sim_freq / 1e9)
                 print(
-                    "Simulation frequency found! 1 tick == %e sec\n"
-                    % (1.0 / sim_freq)
+                    f"Simulation frequency found! 1 tick == {1.0 / sim_freq:e} sec\n"
                 )
 
         # Final tick in gem5 stats: current absolute timestamp
@@ -1145,7 +1144,7 @@ def doCapturedXML(output_path, stats):
         s.set("title", stat.group)
         s.set("name", stat_name)
         s.set("color", "0x00000000")
-        s.set("key", "0x%08x" % stat.key)
+        s.set("key", f"0x{stat.key:08x}")
         s.set("type", stat_name)
         s.set("event", "0x00000000")
         if stat.per_cpu:
@@ -1354,7 +1353,7 @@ output_path = args.output_path
 # Make sure input path exists
 ####
 if not os.path.exists(input_path):
-    print("ERROR: Input path %s does not exist!" % input_path)
+    print(f"ERROR: Input path {input_path} does not exist!")
     sys.exit(1)
 
 ####
@@ -1389,7 +1388,7 @@ gem5_stats_file = input_path + "/stats.txt.gz"
 if not os.path.exists(gem5_stats_file):
     gem5_stats_file = input_path + "/stats.txt"
 if not os.path.exists(gem5_stats_file):
-    print("ERROR: stats.txt[.gz] file does not exist in %s!" % input_path)
+    print(f"ERROR: stats.txt[.gz] file does not exist in {input_path}!")
     sys.exit(1)
 
 readGem5Stats(stats, gem5_stats_file)
diff --git a/util/style.py b/util/style.py
index 4007ff9f9e..27d6568ec3 100755
--- a/util/style.py
+++ b/util/style.py
@@ -62,12 +62,11 @@ def verify(
         verifiers = style.verifiers.all_verifiers
 
     if verbose:
-        print("Verifying %s[%s]..." % (filename, regions))
+        print(f"Verifying {filename}[{regions}]...")
     for verifier in [v(ui, opts, base=base) for v in verifiers]:
         if verbose:
             print(
-                "Applying %s (%s)"
-                % (verifier.test_name, verifier.__class__.__name__)
+                f"Applying {verifier.test_name} ({verifier.__class__.__name__})"
             )
         if verifier.apply(filename, regions=regions):
             return False
diff --git a/util/style/region.py b/util/style/region.py
index 39eaba50be..bd2fc89251 100644
--- a/util/style/region.py
+++ b/util/style/region.py
@@ -100,7 +100,7 @@ class Region(tuple):
         return tuple.__new__(cls, args)
 
     def __repr__(self):
-        return "Region(%s, %s)" % (self[0], self[1])
+        return f"Region({self[0]}, {self[1]})"
 
     @property
     def start(self):
@@ -267,7 +267,7 @@ class Regions(object):
         return result
 
     def __repr__(self):
-        return "Regions(%s)" % ([(r[0], r[1]) for r in self.regions],)
+        return f"Regions({[(r[0], r[1]) for r in self.regions]})"
 
 
 all_regions = Regions(Region(neg_inf, pos_inf))
@@ -279,12 +279,12 @@ if __name__ == "__main__":
     n = Region(9, 10)
 
     def test(left, right):
-        print("%s == %s: %s" % (left, right, left == right))
-        print("%s != %s: %s" % (left, right, left != right))
-        print("%s <  %s: %s" % (left, right, left < right))
-        print("%s <= %s: %s" % (left, right, left <= right))
-        print("%s >  %s: %s" % (left, right, left > right))
-        print("%s >= %s: %s" % (left, right, left >= right))
+        print(f"{left} == {right}: {left == right}")
+        print(f"{left} != {right}: {left != right}")
+        print(f"{left} <  {right}: {left < right}")
+        print(f"{left} <= {right}: {left <= right}")
+        print(f"{left} >  {right}: {left > right}")
+        print(f"{left} >= {right}: {left >= right}")
         print("\n")
 
     test(neg_inf, neg_inf)
diff --git a/util/style/repo.py b/util/style/repo.py
index 20dfde4042..18079cea6a 100644
--- a/util/style/repo.py
+++ b/util/style/repo.py
@@ -192,7 +192,7 @@ class GitRepo(AbstractRepo):
         if cached:
             cmd.append("--cached")
         if filter:
-            cmd += ["--diff-filter=%s" % filter]
+            cmd += [f"--diff-filter={filter}"]
         cmd += [self.head_revision(), "--"] + files
         status = subprocess.check_output(cmd).decode("utf-8").rstrip("\n")
 
@@ -202,13 +202,13 @@ class GitRepo(AbstractRepo):
             return []
 
     def file_from_index(self, name):
-        return subprocess.check_output(
-            [self.git, "show", ":%s" % (name,)]
-        ).decode("utf-8")
+        return subprocess.check_output([self.git, "show", f":{name}"]).decode(
+            "utf-8"
+        )
 
     def file_from_head(self, name):
         return subprocess.check_output(
-            [self.git, "show", "%s:%s" % (self.head_revision(), name)]
+            [self.git, "show", f"{self.head_revision()}:{name}"]
         ).decode("utf-8")
 
 
diff --git a/util/style/sort_includes.py b/util/style/sort_includes.py
index 67e7ca9fbc..9c532b5669 100644
--- a/util/style/sort_includes.py
+++ b/util/style/sort_includes.py
@@ -136,7 +136,7 @@ def _include_matcher_main():
         base, ext = m.groups()
         (keyword, fname, extra) = base_matcher(context, line)
         try:
-            if fname == "%s.%s" % (base, header_map[ext]):
+            if fname == f"{base}.{header_map[ext]}":
                 return (keyword, fname, extra)
         except KeyError:
             pass
@@ -342,6 +342,6 @@ if __name__ == "__main__":
             dir_ignore=args.dir_ignore,
         ):
             if args.dry_run:
-                print("{}: {}".format(filename, language))
+                print(f"{filename}: {language}")
             else:
                 update_file(filename, filename, language, SortIncludes())
diff --git a/util/style/verifiers.py b/util/style/verifiers.py
index 4ccd35af48..dbcce1c764 100644
--- a/util/style/verifiers.py
+++ b/util/style/verifiers.py
@@ -147,7 +147,7 @@ class Verifier(object, metaclass=ABCMeta):
         try:
             f = open(filename, mode)
         except OSError as msg:
-            print("could not open file {}: {}".format(filename, msg))
+            print(f"could not open file {filename}: {msg}")
             return None
 
         return f
@@ -247,7 +247,7 @@ class LineVerifier(Verifier):
                         % (self.test_name, filename, num + 1)
                     )
                     if self.ui.verbose:
-                        self.ui.write(">>%s<<\n" % s_line[:-1])
+                        self.ui.write(f">>{s_line[:-1]}<<\n")
                 errors += 1
         if close:
             fobj.close()
diff --git a/util/update-copyright.py b/util/update-copyright.py
index c22638c2eb..1c3fd611ac 100755
--- a/util/update-copyright.py
+++ b/util/update-copyright.py
@@ -130,7 +130,7 @@ else:
 filter_repo_args = git_filter_repo.FilteringOptions.default_options()
 filter_repo_args.force = True
 filter_repo_args.partial = True
-filter_repo_args.refs = ["{}..HEAD".format(args.start)]
+filter_repo_args.refs = [f"{args.start}..HEAD"]
 filter_repo_args.repack = False
 filter_repo_args.replace_refs = "update-no-add"
 

From f6ee1f27245b95034fe1e82ab0ea17818d8a981e Mon Sep 17 00:00:00 2001
From: Yu-hsin Wang <yuhsingw@google.com>
Date: Tue, 7 Mar 2023 16:59:31 +0800
Subject: [PATCH 266/492] fastmodel: delay the breakpoint event handle to
 simulation stop

The fastmodel simulation would be paused when it hits a breakpoint.
However, the order of stop event happens after the breakpoint event. If
we handle the breakpoint logic in the breakpoint event, it may cause
somehow status unsynchronized. To make the behavior stable, we delay the
breakpoint handle until the simulation stop event called.

Change-Id: I0083561f561af71370ccaa066220b72ed7831b78
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/68697
Reviewed-by: Earl Ou <shunhsingou@google.com>
Maintainer: Gabe Black <gabe.black@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/arch/arm/fastmodel/iris/thread_context.cc | 31 ++++++++++++-------
 src/arch/arm/fastmodel/iris/thread_context.hh |  2 ++
 2 files changed, 21 insertions(+), 12 deletions(-)

diff --git a/src/arch/arm/fastmodel/iris/thread_context.cc b/src/arch/arm/fastmodel/iris/thread_context.cc
index 45e020d832..462995a19a 100644
--- a/src/arch/arm/fastmodel/iris/thread_context.cc
+++ b/src/arch/arm/fastmodel/iris/thread_context.cc
@@ -269,6 +269,22 @@ ThreadContext::simulationTimeEvent(
         return iris::E_ok;
     }
 
+    // Handle the breakpoint event at simulation is stopped if needed.
+    if (bpAddr.has_value()) {
+        auto it = getOrAllocBp(bpAddr.value());
+
+        std::shared_ptr<BpInfo::EventList> events = it->second->events;
+        auto e_it = events->begin();
+        while (e_it != events->end()) {
+            PCEvent *e = *e_it;
+            // Advance e_it here since e might remove itself from the list.
+            e_it++;
+            e->process(this);
+        }
+
+        bpAddr.reset();
+    }
+
     // If simulation time has stopped for any reason, IRIS helpfully clears
     // all stepping counters and we need to set them back. We might also need
     // to service events based on the current number of executed instructions.
@@ -286,19 +302,10 @@ ThreadContext::breakpointHit(
         uint64_t esId, const iris::IrisValueMap &fields, uint64_t time,
         uint64_t sInstId, bool syncEc, std::string &error_message_out)
 {
+    // Handle the breakpoint event later when the fastmodel simulation is
+    // stopped.
     Addr pc = fields.at("PC").getU64();
-
-    auto it = getOrAllocBp(pc);
-
-    std::shared_ptr<BpInfo::EventList> events = it->second->events;
-    auto e_it = events->begin();
-    while (e_it != events->end()) {
-        PCEvent *e = *e_it;
-        // Advance e_it here since e might remove itself from the list.
-        e_it++;
-        e->process(this);
-    }
-
+    bpAddr = pc;
     return iris::E_ok;
 }
 
diff --git a/src/arch/arm/fastmodel/iris/thread_context.hh b/src/arch/arm/fastmodel/iris/thread_context.hh
index 05209e685e..88c6746037 100644
--- a/src/arch/arm/fastmodel/iris/thread_context.hh
+++ b/src/arch/arm/fastmodel/iris/thread_context.hh
@@ -31,6 +31,7 @@
 #include <list>
 #include <map>
 #include <memory>
+#include <optional>
 #include <unordered_map>
 
 #include "arch/arm/fastmodel/iris/memory_spaces.hh"
@@ -133,6 +134,7 @@ class ThreadContext : public gem5::ThreadContext
     using BpInfoIt = BpInfoMap::iterator;
 
     BpInfoMap bps;
+    std::optional<Addr> bpAddr;
 
     BpInfoIt getOrAllocBp(Addr pc);
 

From 2bbd2b9596478c78eb7b0bfd9af825a30f3cea56 Mon Sep 17 00:00:00 2001
From: Giacomo Travaglini <giacomo.travaglini@arm.com>
Date: Mon, 20 Mar 2023 12:57:11 +0000
Subject: [PATCH 267/492] base: Remove unnecessary DEBUG guard

There is no performance improvement on guarding a cprint
before a panic. We should just print as many info as possible
anytime we encounter a failure

Change-Id: I3ee9fb2c3b8a8f23bdf6173bb2a010020f2b2572
Signed-off-by: Giacomo Travaglini <giacomo.travaglini@arm.com>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/69077
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Reviewed-by: Daniel Carvalho <odanrc@yahoo.com.br>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/base/stats/info.cc | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/src/base/stats/info.cc b/src/base/stats/info.cc
index 06e7ec977d..8779cf0081 100644
--- a/src/base/stats/info.cc
+++ b/src/base/stats/info.cc
@@ -172,13 +172,11 @@ bool
 Info::baseCheck() const
 {
     if (!(flags & statistics::init)) {
-#ifdef DEBUG
-        cprintf("this is stat number %d\n", id);
-#endif
-        panic("Not all stats have been initialized.\n"
+        panic("this is stat number %d\n"
+              "Not all stats have been initialized.\n"
               "You may need to add <ParentClass>::regStats() to a"
               " new SimObject's regStats() function. Name: %s",
-              name);
+              id, name);
         return false;
     }
 

From e4a98644587f7d547a7acfa4998a54a4945b5f8f Mon Sep 17 00:00:00 2001
From: Giacomo Travaglini <giacomo.travaglini@arm.com>
Date: Mon, 20 Mar 2023 13:10:48 +0000
Subject: [PATCH 268/492] sim: Remove unused SimObject::debugObjectBreak

The static method is only present in gem5.debug builds and
it doesn't seem to do anything else than setting up a boolean
flag. I was not able to understand from git history which
was the purpose of it but I am confident it's not currently
used.

It was maybe meant to work with the --debug-break option, which
sends a SIGRAP to the current PID in order to halt a GDB
session at a specific time. Maybe the idea was to halt only
when executing a SimObject method?

Change-Id: Ic21e8f55b5e35480c2bc617c24e1352c37516e8f
Signed-off-by: Giacomo Travaglini <giacomo.travaglini@arm.com>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/69078
Reviewed-by: Daniel Carvalho <odanrc@yahoo.com.br>
Tested-by: kokoro <noreply+kokoro@google.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
---
 src/sim/sim_object.cc | 27 ---------------------------
 src/sim/sim_object.hh |  9 ---------
 2 files changed, 36 deletions(-)

diff --git a/src/sim/sim_object.cc b/src/sim/sim_object.cc
index d10be92b44..e91d3e2407 100644
--- a/src/sim/sim_object.cc
+++ b/src/sim/sim_object.cc
@@ -60,9 +60,6 @@ SimObject::SimObject(const Params &p)
       statistics::Group(nullptr), Named(p.name),
       _params(p)
 {
-#ifdef DEBUG
-    doDebugBreak = false;
-#endif
     simObjectList.push_back(this);
     probeManager = new ProbeManager(this);
 }
@@ -148,30 +145,6 @@ SimObject::serializeAll(const std::string &cpt_dir)
    }
 }
 
-#ifdef DEBUG
-//
-// static function: flag which objects should have the debugger break
-//
-void
-SimObject::debugObjectBreak(const std::string &objs)
-{
-    SimObjectList::const_iterator i = simObjectList.begin();
-    SimObjectList::const_iterator end = simObjectList.end();
-
-    ObjectMatch match(objs);
-    for (; i != end; ++i) {
-        SimObject *obj = *i;
-        obj->doDebugBreak = match.match(obj->name());
-   }
-}
-
-void
-debugObjectBreak(const char *objs)
-{
-    SimObject::debugObjectBreak(std::string(objs));
-}
-#endif
-
 SimObject *
 SimObject::find(const char *name)
 {
diff --git a/src/sim/sim_object.hh b/src/sim/sim_object.hh
index c2c5d4d29b..047821d371 100644
--- a/src/sim/sim_object.hh
+++ b/src/sim/sim_object.hh
@@ -329,12 +329,6 @@ class SimObject : public EventManager, public Serializable, public Drainable,
      */
     static void serializeAll(const std::string &cpt_dir);
 
-#ifdef DEBUG
-  public:
-    bool doDebugBreak;
-    static void debugObjectBreak(const std::string &objs);
-#endif
-
     /**
      * Find the SimObject with the given name and return a pointer to
      * it.  Primarily used for interactive debugging.  Argument is
@@ -395,9 +389,6 @@ class SimObjectResolver
     virtual SimObject *resolveSimObject(const std::string &name) = 0;
 };
 
-#ifdef DEBUG
-void debugObjectBreak(const char *objs);
-#endif
 
 /**
  * To avoid circular dependencies the unserialization of SimObjects must be

From 7b39a7f14e6d5132961e3e568b121d7fd7422f90 Mon Sep 17 00:00:00 2001
From: Giacomo Travaglini <giacomo.travaglini@arm.com>
Date: Mon, 20 Mar 2023 12:57:36 +0000
Subject: [PATCH 269/492] misc: Rename DEBUG macro into GEM5_DEBUG

The DEBUG macro is not part of any compiler standards (differently from
NDEBUG, which elides assertions).

It is only meant to differentiate gem5.debug from .fast and .opt builds.
gem5 developers have used it to insert helper code that is supposed to
aid the debugging process in case anything goes wrong.

This generic name is likely to clash with other libraries linked with
gem5.  This is the case of DRAMSim as an example.

Rather than using undef tricks, we just inject a GEM5_DEBUG macro
for gem5.debug builds.

Change-Id: Ie913ca30da615bd0075277a260bbdbc397b7ec87
Signed-off-by: Giacomo Travaglini <giacomo.travaglini@arm.com>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/69079
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Daniel Carvalho <odanrc@yahoo.com.br>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
---
 src/SConscript                |  2 +-
 src/base/cast.hh              |  2 +-
 src/cpu/o3/cpu.hh             |  2 +-
 src/cpu/o3/dyn_inst.cc        |  8 ++++----
 src/cpu/o3/dyn_inst.hh        |  2 +-
 src/cpu/o3/iew.cc             |  2 +-
 src/cpu/o3/inst_queue.cc      |  2 +-
 src/cpu/o3/mem_dep_unit.cc    | 18 +++++++++---------
 src/cpu/o3/mem_dep_unit.hh    |  2 +-
 src/cpu/pred/tournament.cc    |  2 +-
 src/cpu/pred/tournament.hh    |  2 +-
 src/gpu-compute/fetch_unit.cc |  2 +-
 src/mem/dramsim2_wrapper.cc   |  9 ---------
 src/mem/dramsim3_wrapper.cc   |  9 ---------
 util/tlm/SConstruct           |  2 +-
 15 files changed, 24 insertions(+), 42 deletions(-)

diff --git a/src/SConscript b/src/SConscript
index 3179849601..4e9048ca04 100644
--- a/src/SConscript
+++ b/src/SConscript
@@ -649,7 +649,7 @@ envs = {
     'fast': env.Clone(ENV_LABEL='fast', OBJSUFFIX='.fo'),
 }
 
-envs['debug'].Append(CPPDEFINES=['DEBUG', 'TRACING_ON=1'])
+envs['debug'].Append(CPPDEFINES=['GEM5_DEBUG', 'TRACING_ON=1'])
 envs['opt'].Append(CCFLAGS=['-g'], CPPDEFINES=['TRACING_ON=1'])
 envs['fast'].Append(CPPDEFINES=['NDEBUG', 'TRACING_ON=0'])
 
diff --git a/src/base/cast.hh b/src/base/cast.hh
index 01464d9b2c..29eefa9df2 100644
--- a/src/base/cast.hh
+++ b/src/base/cast.hh
@@ -40,7 +40,7 @@ namespace gem5
 // type, but in all cases when we cast it to a derived type, we know
 // by construction that it should work correctly.
 
-#if defined(DEBUG)
+#if defined(GEM5_DEBUG)
 
 // In debug builds, do the dynamic cast and assert the result is good
 
diff --git a/src/cpu/o3/cpu.hh b/src/cpu/o3/cpu.hh
index 08a1312e73..76a9060f0a 100644
--- a/src/cpu/o3/cpu.hh
+++ b/src/cpu/o3/cpu.hh
@@ -385,7 +385,7 @@ class CPU : public BaseCPU
      */
     std::queue<ListIt> removeList;
 
-#ifdef DEBUG
+#ifdef GEM5_DEBUG
     /** Debug structure to keep track of the sequence numbers still in
      * flight.
      */
diff --git a/src/cpu/o3/dyn_inst.cc b/src/cpu/o3/dyn_inst.cc
index 94433cf433..0f500d8923 100644
--- a/src/cpu/o3/dyn_inst.cc
+++ b/src/cpu/o3/dyn_inst.cc
@@ -74,7 +74,7 @@ DynInst::DynInst(const Arrays &arrays, const StaticInstPtr &static_inst,
     ++cpu->instcount;
 
     if (cpu->instcount > 1500) {
-#ifdef DEBUG
+#ifdef GEM5_DEBUG
         cpu->dumpInsts();
         dumpSNList();
 #endif
@@ -86,7 +86,7 @@ DynInst::DynInst(const Arrays &arrays, const StaticInstPtr &static_inst,
         seqNum, cpu->name(), cpu->instcount);
 #endif
 
-#ifdef DEBUG
+#ifdef GEM5_DEBUG
     cpu->snList.insert(seqNum);
 #endif
 
@@ -262,13 +262,13 @@ DynInst::~DynInst()
         "DynInst: [sn:%lli] Instruction destroyed. Instcount for %s = %i\n",
         seqNum, cpu->name(), cpu->instcount);
 #endif
-#ifdef DEBUG
+#ifdef GEM5_DEBUG
     cpu->snList.erase(seqNum);
 #endif
 };
 
 
-#ifdef DEBUG
+#ifdef GEM5_DEBUG
 void
 DynInst::dumpSNList()
 {
diff --git a/src/cpu/o3/dyn_inst.hh b/src/cpu/o3/dyn_inst.hh
index 54c0385374..d6df09ce4a 100644
--- a/src/cpu/o3/dyn_inst.hh
+++ b/src/cpu/o3/dyn_inst.hh
@@ -452,7 +452,7 @@ class DynInst : public ExecContext, public RefCounted
     }
 
   public:
-#ifdef DEBUG
+#ifdef GEM5_DEBUG
     void dumpSNList();
 #endif
 
diff --git a/src/cpu/o3/iew.cc b/src/cpu/o3/iew.cc
index 7cf6c54542..e67bc69b9b 100644
--- a/src/cpu/o3/iew.cc
+++ b/src/cpu/o3/iew.cc
@@ -769,7 +769,7 @@ void
 IEW::sortInsts()
 {
     int insts_from_rename = fromRename->size;
-#ifdef DEBUG
+#ifdef GEM5_DEBUG
     for (ThreadID tid = 0; tid < numThreads; tid++)
         assert(insts[tid].empty());
 #endif
diff --git a/src/cpu/o3/inst_queue.cc b/src/cpu/o3/inst_queue.cc
index ee286fc585..b236b7518e 100644
--- a/src/cpu/o3/inst_queue.cc
+++ b/src/cpu/o3/inst_queue.cc
@@ -165,7 +165,7 @@ InstructionQueue::InstructionQueue(CPU *cpu_ptr, IEW *iew_ptr,
 InstructionQueue::~InstructionQueue()
 {
     dependGraph.reset();
-#ifdef DEBUG
+#ifdef GEM5_DEBUG
     cprintf("Nodes traversed: %i, removed: %i\n",
             dependGraph.nodesTraversed, dependGraph.nodesRemoved);
 #endif
diff --git a/src/cpu/o3/mem_dep_unit.cc b/src/cpu/o3/mem_dep_unit.cc
index bffbf2380d..bd74e2b734 100644
--- a/src/cpu/o3/mem_dep_unit.cc
+++ b/src/cpu/o3/mem_dep_unit.cc
@@ -46,7 +46,7 @@ namespace gem5
 namespace o3
 {
 
-#ifdef DEBUG
+#ifdef GEM5_DEBUG
 int MemDepUnit::MemDepEntry::memdep_count = 0;
 int MemDepUnit::MemDepEntry::memdep_insert = 0;
 int MemDepUnit::MemDepEntry::memdep_erase = 0;
@@ -83,7 +83,7 @@ MemDepUnit::~MemDepUnit()
         }
     }
 
-#ifdef DEBUG
+#ifdef GEM5_DEBUG
     assert(MemDepEntry::memdep_count == 0);
 #endif
 }
@@ -196,7 +196,7 @@ MemDepUnit::insert(const DynInstPtr &inst)
     // Add the MemDepEntry to the hash.
     memDepHash.insert(
         std::pair<InstSeqNum, MemDepEntryPtr>(inst->seqNum, inst_entry));
-#ifdef DEBUG
+#ifdef GEM5_DEBUG
     MemDepEntry::memdep_insert++;
 #endif
 
@@ -329,7 +329,7 @@ MemDepUnit::insertBarrier(const DynInstPtr &barr_inst)
     // Add the MemDepEntry to the hash.
     memDepHash.insert(
         std::pair<InstSeqNum, MemDepEntryPtr>(barr_inst->seqNum, inst_entry));
-#ifdef DEBUG
+#ifdef GEM5_DEBUG
     MemDepEntry::memdep_insert++;
 #endif
 
@@ -419,7 +419,7 @@ MemDepUnit::completed(const DynInstPtr &inst)
     (*hash_it).second = NULL;
 
     memDepHash.erase(hash_it);
-#ifdef DEBUG
+#ifdef GEM5_DEBUG
     MemDepEntry::memdep_erase++;
 #endif
 }
@@ -494,7 +494,7 @@ MemDepUnit::wakeDependents(const DynInstPtr &inst)
 MemDepUnit::MemDepEntry::MemDepEntry(const DynInstPtr &new_inst) :
     inst(new_inst)
 {
-#ifdef DEBUG
+#ifdef GEM5_DEBUG
     ++memdep_count;
 
     DPRINTF(MemDepUnit,
@@ -508,7 +508,7 @@ MemDepUnit::MemDepEntry::~MemDepEntry()
     for (int i = 0; i < dependInsts.size(); ++i) {
         dependInsts[i] = NULL;
     }
-#ifdef DEBUG
+#ifdef GEM5_DEBUG
     --memdep_count;
 
     DPRINTF(MemDepUnit,
@@ -556,7 +556,7 @@ MemDepUnit::squash(const InstSeqNum &squashed_num, ThreadID tid)
         (*hash_it).second = NULL;
 
         memDepHash.erase(hash_it);
-#ifdef DEBUG
+#ifdef GEM5_DEBUG
         MemDepEntry::memdep_erase++;
 #endif
 
@@ -635,7 +635,7 @@ MemDepUnit::dumpLists()
 
     cprintf("Memory dependence hash size: %i\n", memDepHash.size());
 
-#ifdef DEBUG
+#ifdef GEM5_DEBUG
     cprintf("Memory dependence entries: %i\n", MemDepEntry::memdep_count);
 #endif
 }
diff --git a/src/cpu/o3/mem_dep_unit.hh b/src/cpu/o3/mem_dep_unit.hh
index 6609f8dcad..57df492c6f 100644
--- a/src/cpu/o3/mem_dep_unit.hh
+++ b/src/cpu/o3/mem_dep_unit.hh
@@ -210,7 +210,7 @@ class MemDepUnit
         bool squashed = false;
 
         /** For debugging. */
-#ifdef DEBUG
+#ifdef GEM5_DEBUG
         static int memdep_count;
         static int memdep_insert;
         static int memdep_erase;
diff --git a/src/cpu/pred/tournament.cc b/src/cpu/pred/tournament.cc
index c059b5df49..b3a55313b7 100644
--- a/src/cpu/pred/tournament.cc
+++ b/src/cpu/pred/tournament.cc
@@ -349,7 +349,7 @@ TournamentBP::squash(ThreadID tid, void *bp_history)
     delete history;
 }
 
-#ifdef DEBUG
+#ifdef GEM5_DEBUG
 int
 TournamentBP::BPHistory::newCount = 0;
 #endif
diff --git a/src/cpu/pred/tournament.hh b/src/cpu/pred/tournament.hh
index 3f2eb2517a..018d6756e4 100644
--- a/src/cpu/pred/tournament.hh
+++ b/src/cpu/pred/tournament.hh
@@ -159,7 +159,7 @@ class TournamentBP : public BPredUnit
      */
     struct BPHistory
     {
-#ifdef DEBUG
+#ifdef GEM5_DEBUG
         BPHistory()
         { newCount++; }
         ~BPHistory()
diff --git a/src/gpu-compute/fetch_unit.cc b/src/gpu-compute/fetch_unit.cc
index 640e29bdba..4dadbd363d 100644
--- a/src/gpu-compute/fetch_unit.cc
+++ b/src/gpu-compute/fetch_unit.cc
@@ -523,7 +523,7 @@ FetchUnit::FetchBufDesc::checkWaveReleaseBuf()
             wavefront->wfSlotId, wavefront->wfDynId, cur_wave_pc,
             wavefront->pc());
 
-#ifdef DEBUG
+#ifdef GEM5_DEBUG
     int idx = 0;
     for (const auto &buf_pc : bufferedPCs) {
         DPRINTF(GPUFetch, "PC[%d] = %#x\n", idx, buf_pc.first);
diff --git a/src/mem/dramsim2_wrapper.cc b/src/mem/dramsim2_wrapper.cc
index b9bcf14c08..b174454b39 100644
--- a/src/mem/dramsim2_wrapper.cc
+++ b/src/mem/dramsim2_wrapper.cc
@@ -37,15 +37,6 @@
 
 #include <cassert>
 
-/**
- * When building the debug binary, we need to undo the command-line
- * definition of DEBUG not to clash with DRAMSim2 print macros that
- * are included for no obvious reason.
- */
-#ifdef DEBUG
-#undef DEBUG
-#endif
-
 #include "mem/dramsim2_wrapper.hh"
 
 #include <fstream>
diff --git a/src/mem/dramsim3_wrapper.cc b/src/mem/dramsim3_wrapper.cc
index 89e49017a9..69edfdee64 100644
--- a/src/mem/dramsim3_wrapper.cc
+++ b/src/mem/dramsim3_wrapper.cc
@@ -38,15 +38,6 @@
 
 #include <cassert>
 
-/**
- * When building the debug binary, we need to undo the command-line
- * definition of DEBUG not to clash with DRAMsim3 print macros that
- * are included for no obvious reason.
- */
-#ifdef DEBUG
-#undef DEBUG
-#endif
-
 #include "mem/dramsim3_wrapper.hh"
 
 #include <fstream>
diff --git a/util/tlm/SConstruct b/util/tlm/SConstruct
index 7fe09d446d..c05b70b208 100644
--- a/util/tlm/SConstruct
+++ b/util/tlm/SConstruct
@@ -62,7 +62,7 @@ env.Append(CXXFLAGS=['-std=c++17',
                      ])
 
 if gem5_variant == 'debug':
-    env.Append(CXXFLAGS=['-g', '-DDEBUG'])
+    env.Append(CXXFLAGS=['-g', '-DGEM5_DEBUG'])
 
 deps = [] # keep track of all dependencies required for building the binaries
 

From 069688df35555131f4c692f65d0813b7d35d2da7 Mon Sep 17 00:00:00 2001
From: Giacomo Travaglini <giacomo.travaglini@arm.com>
Date: Thu, 16 Mar 2023 11:19:33 +0000
Subject: [PATCH 270/492] misc: Update the .git-blame-ignore-revs to ignore
 flynt commit

This [1] is just a cosmetic change which shouldn't affect functionalities.
We might want to ignore it when running git blame

[1]: https://gem5-review.googlesource.com/c/public/gem5/+/68957

Change-Id: I96e3308beb0bc6ad9638df137341bf611d4db5b1
Signed-off-by: Giacomo Travaglini <giacomo.travaglini@arm.com>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/69037
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
---
 .git-blame-ignore-revs | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs
index ebbbb1a2a2..799591c5ce 100644
--- a/.git-blame-ignore-revs
+++ b/.git-blame-ignore-revs
@@ -26,3 +26,6 @@ c3bd8eb1214cbebbc92c7958b80aa06913bce3ba
 # A commit which ran Python Black on all Python files.
 # https://gem5-review.googlesource.com/c/public/gem5/+/47024
 787204c92d876dd81357b75aede52d8ef5e053d3
+
+# A commit which ran flynt all Python files.
+e73655d038cdfa68964109044e33c9a6e7d85ac9

From 2c488e07df462ee79a3196479a747ff18f2de0fd Mon Sep 17 00:00:00 2001
From: Roger Chang <rogerycchang@google.com>
Date: Tue, 21 Mar 2023 10:33:23 +0800
Subject: [PATCH 271/492] arch-riscv: Add pmp index checking

Check the index is within the bounds of PMP table before updating the
address and config

Change-Id: Ie938b3c2a61eca9527192c0452d1db9522f07af9
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/69117
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Ayaz Akram <yazakram@ucdavis.edu>
Reviewed-by: Yu-hsin Wang <yuhsingw@google.com>
---
 src/arch/riscv/pmp.cc | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/src/arch/riscv/pmp.cc b/src/arch/riscv/pmp.cc
index 940af47686..49dc7ba822 100644
--- a/src/arch/riscv/pmp.cc
+++ b/src/arch/riscv/pmp.cc
@@ -152,6 +152,13 @@ PMP::pmpGetAField(uint8_t cfg)
 bool
 PMP::pmpUpdateCfg(uint32_t pmp_index, uint8_t this_cfg)
 {
+    if (pmp_index >= pmpEntries) {
+        DPRINTF(PMP, "Can't update pmp entry config %u"
+                " because the index exceed the size of pmp entries %u",
+                pmp_index, pmpEntries);
+        return false;
+    }
+
     DPRINTF(PMP, "Update pmp config with %u for pmp entry: %u \n",
                                     (unsigned)this_cfg, pmp_index);
     if (pmpTable[pmp_index].pmpCfg & PMP_LOCK) {
@@ -231,6 +238,13 @@ PMP::pmpReset()
 bool
 PMP::pmpUpdateAddr(uint32_t pmp_index, Addr this_addr)
 {
+    if (pmp_index >= pmpEntries) {
+        DPRINTF(PMP, "Can't update pmp entry address %u"
+                " because the index exceed the size of pmp entries %u",
+                pmp_index, pmpEntries);
+        return false;
+    }
+
     DPRINTF(PMP, "Update pmp addr %#x for pmp entry %u \n",
                                       this_addr, pmp_index);
 
@@ -241,8 +255,8 @@ PMP::pmpUpdateAddr(uint32_t pmp_index, Addr this_addr)
     } else if (pmp_index < pmpTable.size() - 1 &&
                ((pmpTable[pmp_index+1].pmpCfg & PMP_LOCK) != 0) &&
                pmpGetAField(pmpTable[pmp_index+1].pmpCfg) == PMP_TOR) {
-        DPRINTF(PMP, "Update pmp entry %u failed because the entry %u lock bit set"
-                "and A field is TOR\n",
+        DPRINTF(PMP, "Update pmp entry %u failed because the entry %u lock bit"
+                " set and A field is TOR\n",
                 pmp_index, pmp_index+1);
         return false;
     }

From fb4eb86711599351dab72fbd7b5d241fe3a35f77 Mon Sep 17 00:00:00 2001
From: Gabe Black <gabeblack@google.com>
Date: Sat, 18 Mar 2023 06:23:57 -0700
Subject: [PATCH 272/492] base: Flip the default for ListenSocket::accept(bool
 nodelay).

This option was almost always overridden from false to true anyway,
except in one place (in the ethertap device) which was likely just by
accident.

This will give external users a chance to remove the option without
changing behavior, so that the option can be removed entirely in a
later change.

Change-Id: I77add40b8131b91997b2aecbfff6c7de0ee9ead9
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/69157
Reviewed-by: Yu-hsin Wang <yuhsingw@google.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Maintainer: Gabe Black <gabeblack@google.com>
---
 src/base/socket.hh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/base/socket.hh b/src/base/socket.hh
index f3b2760518..af4a6511dd 100644
--- a/src/base/socket.hh
+++ b/src/base/socket.hh
@@ -104,7 +104,7 @@ class ListenSocket
     ListenSocket();
     virtual ~ListenSocket();
 
-    virtual int accept(bool nodelay = false);
+    virtual int accept(bool nodelay = true);
 
     virtual bool listen(int port, bool reuse = true);
 

From 92d920f99419dc6b3f452175c53738132443c065 Mon Sep 17 00:00:00 2001
From: Matt Sinclair <mattdsinclair.wisc@gmail.com>
Date: Wed, 15 Mar 2023 17:06:42 -0500
Subject: [PATCH 273/492] mem-ruby: fix load deadlock with WB GPU L2 caches

By default the GPU VIPER coherence protocol uses a WT L2 cache.
However it has support for using WB caches (although this is not
tested currently).  When using a WB L2 cache for the GPU, this
results in deadlocks with loads.

Specifically, when a load reaches the L2 and the line is currently
in the W state, that line must be written back before the load can
be performed.  However, the current transition for this in the L2
did not attempt to retry the load when the WB completes, resulting
in a deadlock.  This deadlock can be replicated by running the GPU
Ruby random tester as is with a WB L2 cache instead of a WT L2
cache.

To fix this, this change modifies the transition in question to
put the load on the stalled requests buffer, which the WBAck will
check when it returns to the L2 (and thus perform the load).

This fix has been tested and verified with both the per-checkin and
nightly GPU Ruby Random tester tests (with a WB L2 cache).

Change-Id: Ieec4f61a3070cf9976b8c3ef0cdbd0cc5a1443c6
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/68977
Reviewed-by: Matthew Poremba <matthew.poremba@amd.com>
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/mem/ruby/protocol/GPU_VIPER-TCC.sm | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/mem/ruby/protocol/GPU_VIPER-TCC.sm b/src/mem/ruby/protocol/GPU_VIPER-TCC.sm
index 0f93339827..0b7f5ed9ad 100644
--- a/src/mem/ruby/protocol/GPU_VIPER-TCC.sm
+++ b/src/mem/ruby/protocol/GPU_VIPER-TCC.sm
@@ -718,10 +718,13 @@ machine(MachineType:TCC, "TCC Cache")
     p_popRequestQueue;
   }
   transition(W, RdBlk, WI) {TagArrayRead, DataArrayRead} {
-    p_profileHit;
     t_allocateTBE;
     wb_writeBack;
-    p_popRequestQueue;
+    // need to try this request again after writing back the current entry -- to
+    // do so, put it with other stalled requests in a buffer to reduce resource
+    // contention since they won't try again every cycle and will instead only
+    // try again once woken up
+    st_stallAndWaitRequest;
   }
 
   transition(I, RdBlk, IV) {TagArrayRead} {

From a030ff2745db7294723996ada0493366c0399c6f Mon Sep 17 00:00:00 2001
From: Matt Sinclair <mattdsinclair.wisc@gmail.com>
Date: Wed, 15 Mar 2023 17:12:17 -0500
Subject: [PATCH 274/492] mem-ruby: fix atomic deadlock with WB GPU L2 caches

By default the GPU VIPER coherence protocol uses a WT L2 cache.
However it has support for using WB caches (although this is not
tested currently).  When using a WB L2 cache for the GPU, this
results in deadlocks with atomics.

Specifically, when an atomic reaches the L2 and the line is
currently in M or W, the line must be written back before the atomic
can be performed.  However, the current support has two issues:

a) it never performs the atomic operation -- while VIPER current
assumes all atomics are system scope atomics and thus cannot be
performed at the L2 and this transition requires the dirty line be
written back before performing the atomic, the transition never
performs the atomic nor does the response path handle it.
b) putting the atomic action right after the write back is not
safe because we need to ensure the requests are ordered when they
reach memory -- thus we have to wait until the write back is
acknowledged before it's safe to send/perform the atomic.

To fix this, this change modifies the transition in question to
put the atomic on the stalled requests buffer, which the WBAck will
check when it returns to the L2 (and thus perform the atomic, which
will result in the atomic being sent on to the directory).

This fix has been tested and verified with both the per-checkin and
nightly GPU Ruby Random tester tests (with a WB L2 cache).

Change-Id: I9a43fd985dc71297521f4b05c47288d92c314ac7
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/68978
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
Reviewed-by: Matthew Poremba <matthew.poremba@amd.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/mem/ruby/protocol/GPU_VIPER-TCC.sm | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/mem/ruby/protocol/GPU_VIPER-TCC.sm b/src/mem/ruby/protocol/GPU_VIPER-TCC.sm
index 0b7f5ed9ad..a59589870d 100644
--- a/src/mem/ruby/protocol/GPU_VIPER-TCC.sm
+++ b/src/mem/ruby/protocol/GPU_VIPER-TCC.sm
@@ -816,10 +816,14 @@ transition(I, Atomic, A) {TagArrayRead} {
   }
 
   transition({M, W}, Atomic, WI) {TagArrayRead} {
-    p_profileHit;
     t_allocateTBE;
     wb_writeBack;
-    p_popRequestQueue;
+    // after writing back the current line, we need to wait for it to be done
+    // before we try to perform the atomic
+    // by putting the stalled requests in a buffer, we reduce resource contention
+    // since they won't try again every cycle and will instead only try again once
+    // woken up
+    st_stallAndWaitRequest;
   }
 
   transition(I, WrVicBlk) {TagArrayRead} {

From 79cfef2650465942bd0e6f27187dc4aed4649337 Mon Sep 17 00:00:00 2001
From: Alvaro Moreno <alvaro.moreno@bsc.es>
Date: Wed, 15 Mar 2023 12:38:44 +0100
Subject: [PATCH 275/492] stdlib: Define env variables in SE workload

Implements a parameter in SE binary workload class
to define env variables inside each process.

Change-Id: I943a560043eff423989297cb2afa386a90df6791
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/68937
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/python/gem5/components/boards/se_binary_workload.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/src/python/gem5/components/boards/se_binary_workload.py b/src/python/gem5/components/boards/se_binary_workload.py
index 23be81b03d..98fe840a27 100644
--- a/src/python/gem5/components/boards/se_binary_workload.py
+++ b/src/python/gem5/components/boards/se_binary_workload.py
@@ -66,6 +66,7 @@ class SEBinaryWorkload:
         stdin_file: Optional[FileResource] = None,
         stdout_file: Optional[Path] = None,
         stderr_file: Optional[Path] = None,
+        env_list: Optional[List[str]] = None,
         arguments: List[str] = [],
         checkpoint: Optional[Union[Path, CheckpointResource]] = None,
     ) -> None:
@@ -80,6 +81,9 @@ class SEBinaryWorkload:
         :param exit_on_work_items: Whether the simulation should exit on work
         items. True by default.
         :param stdin_file: The input file for the binary
+        :param stdout_file: The output file for the binary
+        :param stderr_file: The error output file for the binary
+        :param env_list: The environment variables defined for the binary
         :param arguments: The input arguments for the binary
         :param checkpoint: The checkpoint directory. Used to restore the
         simulation to that checkpoint.
@@ -105,6 +109,8 @@ class SEBinaryWorkload:
             process.output = stdout_file.as_posix()
         if stderr_file is not None:
             process.errout = stderr_file.as_posix()
+        if env_list is not None:
+            process.env = env_list
 
         for core in self.get_processor().get_cores():
             core.set_workload(process)

From 2e1d24d0480c914068bb202cffb16d52a40ca9d7 Mon Sep 17 00:00:00 2001
From: Gabe Black <gabeblack@google.com>
Date: Sat, 18 Mar 2023 06:28:16 -0700
Subject: [PATCH 276/492] base,dev: Simplify the ListenSocket::accept method.

Remove the nodelay option which is always set to the same thing, and
simplify the logic of the method itself.

Change-Id: I78cd91f99cbaec9abddedbc7dcddc563daedb81f
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/69158
Tested-by: kokoro <noreply+kokoro@google.com>
Maintainer: Gabe Black <gabeblack@google.com>
Reviewed-by: Yu-hsin Wang <yuhsingw@google.com>
---
 src/base/remote_gdb.cc     |  2 +-
 src/base/socket.cc         | 14 +++++++-------
 src/base/socket.hh         |  2 +-
 src/base/vnc/vncserver.cc  |  2 +-
 src/dev/net/ethertap.cc    |  2 +-
 src/dev/serial/terminal.cc |  2 +-
 6 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/src/base/remote_gdb.cc b/src/base/remote_gdb.cc
index dd37a3503a..b709ac3d76 100644
--- a/src/base/remote_gdb.cc
+++ b/src/base/remote_gdb.cc
@@ -438,7 +438,7 @@ BaseRemoteGDB::connect()
 
     pollQueue.remove(incomingConnectionEvent);
 
-    int sfd = listener.accept(true);
+    int sfd = listener.accept();
 
     if (sfd != -1) {
         if (isAttached())
diff --git a/src/base/socket.cc b/src/base/socket.cc
index 23f2b40d1a..0a62a88f6c 100644
--- a/src/base/socket.cc
+++ b/src/base/socket.cc
@@ -239,17 +239,17 @@ ListenSocket::listen(int port, bool reuse)
 // Open a connection.  Accept will block, so if you don't want it to,
 // make sure a connection is ready before you call accept.
 int
-ListenSocket::accept(bool nodelay)
+ListenSocket::accept()
 {
     struct sockaddr_in sockaddr;
     socklen_t slen = sizeof (sockaddr);
     int sfd = acceptCloexec(fd, (struct sockaddr *)&sockaddr, &slen);
-    if (sfd != -1 && nodelay) {
-        int i = 1;
-        if (::setsockopt(sfd, IPPROTO_TCP, TCP_NODELAY, (char *)&i,
-                         sizeof(i)) < 0)
-            warn("ListenSocket(accept): setsockopt() TCP_NODELAY failed!");
-    }
+    if (sfd == -1)
+        return -1;
+
+    int i = 1;
+    int ret = ::setsockopt(sfd, IPPROTO_TCP, TCP_NODELAY, &i, sizeof(i));
+    warn_if(ret < 0, "ListenSocket(accept): setsockopt() TCP_NODELAY failed!");
 
     return sfd;
 }
diff --git a/src/base/socket.hh b/src/base/socket.hh
index af4a6511dd..aa451b691a 100644
--- a/src/base/socket.hh
+++ b/src/base/socket.hh
@@ -104,7 +104,7 @@ class ListenSocket
     ListenSocket();
     virtual ~ListenSocket();
 
-    virtual int accept(bool nodelay = true);
+    virtual int accept();
 
     virtual bool listen(int port, bool reuse = true);
 
diff --git a/src/base/vnc/vncserver.cc b/src/base/vnc/vncserver.cc
index c98838a393..5792c440fc 100644
--- a/src/base/vnc/vncserver.cc
+++ b/src/base/vnc/vncserver.cc
@@ -190,7 +190,7 @@ VncServer::accept()
     if (!listener.islistening())
         panic("%s: cannot accept a connection if not listening!", name());
 
-    int fd = listener.accept(true);
+    int fd = listener.accept();
     if (fd < 0) {
         warn("%s: failed to accept VNC connection!", name());
         return;
diff --git a/src/dev/net/ethertap.cc b/src/dev/net/ethertap.cc
index 5b1e957146..b28f255d83 100644
--- a/src/dev/net/ethertap.cc
+++ b/src/dev/net/ethertap.cc
@@ -280,7 +280,7 @@ TapListener::accept()
     if (!listener.islistening())
         panic("TapListener(accept): cannot accept if we're not listening!");
 
-    int sfd = listener.accept(true);
+    int sfd = listener.accept();
     if (sfd != -1)
         tap->attach(sfd);
 }
diff --git a/src/dev/serial/terminal.cc b/src/dev/serial/terminal.cc
index d4108a372d..fada99c2a4 100644
--- a/src/dev/serial/terminal.cc
+++ b/src/dev/serial/terminal.cc
@@ -195,7 +195,7 @@ Terminal::accept()
     if (!listener.islistening())
         panic("%s: cannot accept a connection if not listening!", name());
 
-    int fd = listener.accept(true);
+    int fd = listener.accept();
     if (data_fd != -1) {
         char message[] = "terminal already attached!\n";
         atomic_write(fd, message, sizeof(message));

From daaccaa9aee4e7f5ae091ff88f9a747665610a15 Mon Sep 17 00:00:00 2001
From: Yu-hsin Wang <yuhsingw@google.com>
Date: Wed, 2 Nov 2022 17:23:42 +0800
Subject: [PATCH 277/492] fastmodel: skip vector registers update in remote gdb

iris::ThreadContext doesn't implement the write interface for vector
registers. Skip that part in fastmodel remote_gdb to make updating
common registers work at least.

Change-Id: Ifa071f5dff4bdeee7361ae824b4b76e0b2805460
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/69177
Maintainer: Gabe Black <gabeblack@google.com>
Reviewed-by: Gabe Black <gabeblack@google.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/arch/arm/fastmodel/remote_gdb.cc | 40 +++++++++++++++++++++++++++-
 src/arch/arm/fastmodel/remote_gdb.hh | 14 ++++++++--
 src/arch/arm/remote_gdb.hh           | 24 ++++++++---------
 src/base/remote_gdb.hh               |  2 +-
 4 files changed, 64 insertions(+), 16 deletions(-)

diff --git a/src/arch/arm/fastmodel/remote_gdb.cc b/src/arch/arm/fastmodel/remote_gdb.cc
index e13fee8d70..d8dddaddf9 100644
--- a/src/arch/arm/fastmodel/remote_gdb.cc
+++ b/src/arch/arm/fastmodel/remote_gdb.cc
@@ -27,13 +27,42 @@
 #include "arch/arm/fastmodel/remote_gdb.hh"
 
 #include "arch/arm/fastmodel/iris/thread_context.hh"
+#include "arch/arm/utility.hh"
+#include "base/trace.hh"
+#include "debug/GDBAcc.hh"
 
 namespace gem5 {
 
+using namespace ArmISA;
+
 namespace fastmodel {
 
+void
+FastmodelRemoteGDB::AArch64GdbRegCache::setRegs(ThreadContext *context) const
+{
+    DPRINTF(GDBAcc, "setRegs in remotegdb \n");
+
+    for (int i = 0; i < 31; ++i)
+        context->setReg(int_reg::x(i), r.x[i]);
+    auto pc_state = context->pcState().as<PCState>();
+    pc_state.set(r.pc);
+    context->pcState(pc_state);
+    context->setMiscRegNoEffect(MISCREG_CPSR, r.cpsr);
+    // Update the stack pointer. This should be done after
+    // updating CPSR/PSTATE since that might affect how SPX gets
+    // mapped.
+    context->setReg(int_reg::Spx, r.spx);
+
+    // Remove the vector registers update in FastmodelRemoteGDB since it's not
+    // implemented in iris::ThreadContext.
+    warn("Skip update vector registers in remotegdb\n");
+
+    context->setMiscRegNoEffect(MISCREG_FPSR, r.fpsr);
+    context->setMiscRegNoEffect(MISCREG_FPCR, r.fpcr);
+}
+
 FastmodelRemoteGDB::FastmodelRemoteGDB(System *_system, int port)
-    : gem5::ArmISA::RemoteGDB(_system, port)
+    : gem5::ArmISA::RemoteGDB(_system, port), regCache64(this)
 {
 }
 
@@ -57,5 +86,14 @@ FastmodelRemoteGDB::writeBlob(Addr vaddr, size_t size, const char *data)
     return true;
 }
 
+BaseGdbRegCache*
+FastmodelRemoteGDB::gdbRegs()
+{
+    if (inAArch64(context()))
+        return &regCache64;
+    else
+        return &regCache32;
+}
+
 }  // namespace fastmodel
 }  // namespace gem5
diff --git a/src/arch/arm/fastmodel/remote_gdb.hh b/src/arch/arm/fastmodel/remote_gdb.hh
index 93cf882e21..75dc6580e3 100644
--- a/src/arch/arm/fastmodel/remote_gdb.hh
+++ b/src/arch/arm/fastmodel/remote_gdb.hh
@@ -36,14 +36,24 @@ namespace gem5
 namespace fastmodel
 {
 
-class FastmodelRemoteGDB : public gem5::ArmISA::RemoteGDB
+class FastmodelRemoteGDB : public ArmISA::RemoteGDB
 {
   public:
     FastmodelRemoteGDB(System *_system, int port);
 
-  private:
+  protected:
+    class AArch64GdbRegCache : public ArmISA::RemoteGDB::AArch64GdbRegCache
+    {
+      using ArmISA::RemoteGDB::AArch64GdbRegCache::AArch64GdbRegCache;
+      public:
+        void setRegs(ThreadContext*) const override;
+    };
+
     bool readBlob(Addr vaddr, size_t size, char *data) override;
     bool writeBlob(Addr vaddr, size_t size, const char *data) override;
+    BaseGdbRegCache* gdbRegs() override;
+
+    AArch64GdbRegCache regCache64;
 };
 
 }  // namespace fastmodel
diff --git a/src/arch/arm/remote_gdb.hh b/src/arch/arm/remote_gdb.hh
index 8e512a452f..aeb2db6754 100644
--- a/src/arch/arm/remote_gdb.hh
+++ b/src/arch/arm/remote_gdb.hh
@@ -68,7 +68,7 @@ class RemoteGDB : public BaseRemoteGDB
     class AArch32GdbRegCache : public BaseGdbRegCache
     {
       using BaseGdbRegCache::BaseGdbRegCache;
-      private:
+      protected:
         struct GEM5_PACKED
         {
           uint32_t gpr[16];
@@ -77,12 +77,12 @@ class RemoteGDB : public BaseRemoteGDB
           uint32_t fpscr;
         } r;
       public:
-        char *data() const { return (char *)&r; }
-        size_t size() const { return sizeof(r); }
-        void getRegs(ThreadContext*);
-        void setRegs(ThreadContext*) const;
+        char *data() const override { return (char *)&r; }
+        size_t size() const override { return sizeof(r); }
+        void getRegs(ThreadContext*) override;
+        void setRegs(ThreadContext*) const override;
         const std::string
-        name() const
+        name() const override
         {
             return gdb->name() + ".AArch32GdbRegCache";
         }
@@ -91,7 +91,7 @@ class RemoteGDB : public BaseRemoteGDB
     class AArch64GdbRegCache : public BaseGdbRegCache
     {
       using BaseGdbRegCache::BaseGdbRegCache;
-      private:
+      protected:
         struct GEM5_PACKED
         {
           uint64_t x[31];
@@ -103,12 +103,12 @@ class RemoteGDB : public BaseRemoteGDB
           uint32_t fpcr;
         } r;
       public:
-        char *data() const { return (char *)&r; }
-        size_t size() const { return sizeof(r); }
-        void getRegs(ThreadContext*);
-        void setRegs(ThreadContext*) const;
+        char *data() const override { return (char *)&r; }
+        size_t size() const override { return sizeof(r); }
+        void getRegs(ThreadContext*) override;
+        void setRegs(ThreadContext*) const override;
         const std::string
-        name() const
+        name() const override
         {
             return gdb->name() + ".AArch64GdbRegCache";
         }
diff --git a/src/base/remote_gdb.hh b/src/base/remote_gdb.hh
index 7981a13064..80c108ba22 100644
--- a/src/base/remote_gdb.hh
+++ b/src/base/remote_gdb.hh
@@ -188,7 +188,7 @@ class BaseRemoteGDB
             return nullptr;
     }
 
-  private:
+  protected:
     /*
      * Connection to the external GDB.
      */

From db1a5a367da91fcad991a4583408c2e6982b90cc Mon Sep 17 00:00:00 2001
From: Gabe Black <gabeblack@google.com>
Date: Sat, 18 Mar 2023 06:47:16 -0700
Subject: [PATCH 278/492] base,cpu,dev: Simplify ListenSocket::listen().

Remove the "reuse" parameter which default to true and was always
also explicitly set to true. Tidy up the code itself slightly, mostly
by using "panic_if" to remove some nesting.

Change-Id: Ie23971aabf2fe4252d27f1887468360722a72379
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/69159
Maintainer: Gabe Black <gabeblack@google.com>
Reviewed-by: Yu-hsin Wang <yuhsingw@google.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/base/remote_gdb.cc     |  2 +-
 src/base/socket.cc         | 28 ++++++++++++----------------
 src/base/socket.hh         |  2 +-
 src/base/socket.test.cc    | 21 ++++++---------------
 src/base/vnc/vncserver.cc  |  2 +-
 src/cpu/nativetrace.cc     |  3 +--
 src/dev/net/ethertap.cc    |  2 +-
 src/dev/serial/terminal.cc |  2 +-
 8 files changed, 24 insertions(+), 38 deletions(-)

diff --git a/src/base/remote_gdb.cc b/src/base/remote_gdb.cc
index b709ac3d76..1a2fef42d8 100644
--- a/src/base/remote_gdb.cc
+++ b/src/base/remote_gdb.cc
@@ -417,7 +417,7 @@ BaseRemoteGDB::listen()
         return;
     }
 
-    while (!listener.listen(_port, true)) {
+    while (!listener.listen(_port)) {
         DPRINTF(GDBMisc, "Can't bind port %d\n", _port);
         _port++;
     }
diff --git a/src/base/socket.cc b/src/base/socket.cc
index 0a62a88f6c..280f92b593 100644
--- a/src/base/socket.cc
+++ b/src/base/socket.cc
@@ -185,24 +185,20 @@ ListenSocket::~ListenSocket()
 
 // Create a socket and configure it for listening
 bool
-ListenSocket::listen(int port, bool reuse)
+ListenSocket::listen(int port)
 {
-    if (listening)
-        panic("Socket already listening!");
+    panic_if(listening, "Socket already listening!");
 
     // only create socket if not already created by a previous call
     if (fd == -1) {
         fd = socketCloexec(PF_INET, SOCK_STREAM, 0);
-        if (fd < 0)
-            panic("Can't create socket:%s !", strerror(errno));
+        panic_if(fd < 0, "Can't create socket:%s !", strerror(errno));
     }
 
-    if (reuse) {
-        int i = 1;
-        if (::setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, (char *)&i,
-                         sizeof(i)) < 0)
-            panic("ListenSocket(listen): setsockopt() SO_REUSEADDR failed!");
-    }
+    int i = 1;
+    int ret = ::setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &i, sizeof(i));
+    panic_if(ret < 0,
+            "ListenSocket(listen): setsockopt() SO_REUSEADDR failed!");
 
     struct sockaddr_in sockaddr;
     sockaddr.sin_family = PF_INET;
@@ -211,16 +207,16 @@ ListenSocket::listen(int port, bool reuse)
     sockaddr.sin_port = htons(port);
     // finally clear sin_zero
     std::memset(&sockaddr.sin_zero, 0, sizeof(sockaddr.sin_zero));
-    int ret = ::bind(fd, (struct sockaddr *)&sockaddr, sizeof (sockaddr));
+    ret = ::bind(fd, (struct sockaddr *)&sockaddr, sizeof (sockaddr));
     if (ret != 0) {
-        if (ret == -1 && errno != EADDRINUSE)
-            panic("ListenSocket(listen): bind() failed!");
+        panic_if(ret == -1 && errno != EADDRINUSE,
+                "ListenSocket(listen): bind() failed!");
         return false;
     }
 
     if (::listen(fd, 1) == -1) {
-        if (errno != EADDRINUSE)
-            panic("ListenSocket(listen): listen() failed!");
+        panic_if(errno != EADDRINUSE,
+                "ListenSocket(listen): listen() failed!");
         // User may decide to retry with a different port later; however, the
         // socket is already bound to a port and the next bind will surely
         // fail. We'll close the socket and reset fd to -1 so our user can
diff --git a/src/base/socket.hh b/src/base/socket.hh
index aa451b691a..d2393e9325 100644
--- a/src/base/socket.hh
+++ b/src/base/socket.hh
@@ -106,7 +106,7 @@ class ListenSocket
 
     virtual int accept();
 
-    virtual bool listen(int port, bool reuse = true);
+    virtual bool listen(int port);
 
     int getfd() const { return fd; }
     bool islistening() const { return listening; }
diff --git a/src/base/socket.test.cc b/src/base/socket.test.cc
index 1ab1f21070..cb24c49090 100644
--- a/src/base/socket.test.cc
+++ b/src/base/socket.test.cc
@@ -162,19 +162,6 @@ TEST(SocketTest, ListenToPort)
     EXPECT_FALSE(listen_socket.allDisabled());
 }
 
-TEST(SocketTest, ListenToPortReuseFalse)
-{
-    MockListenSocket listen_socket;
-    /*
-     * The ListenSocket object should have the same state regardless as to
-     * whether reuse is true or false (it is true by default).
-     */
-    EXPECT_TRUE(listen_socket.listen(TestPort1, false));
-    EXPECT_NE(-1, listen_socket.getfd());
-    EXPECT_TRUE(listen_socket.islistening());
-    EXPECT_FALSE(listen_socket.allDisabled());
-}
-
 TEST(SocketTest, RelistenWithSameInstanceSamePort)
 {
     MockListenSocket listen_socket;
@@ -185,7 +172,9 @@ TEST(SocketTest, RelistenWithSameInstanceSamePort)
      */
     gtestLogOutput.str("");
     EXPECT_ANY_THROW(listen_socket.listen(TestPort1));
-    std::string expected = "panic: Socket already listening!\n";
+    std::string expected =
+        "panic: panic condition listening occurred: "
+        "Socket already listening!\n";
     std::string actual = gtestLogOutput.str();
     EXPECT_EQ(expected, actual);
 }
@@ -201,7 +190,9 @@ TEST(SocketTest, RelistenWithSameInstanceDifferentPort)
     gtestLogOutput.str("");
     EXPECT_ANY_THROW(listen_socket.listen(TestPort2));
 
-    std::string expected = "panic: Socket already listening!\n";
+    std::string expected =
+        "panic: panic condition listening occurred: "
+        "Socket already listening!\n";
     std::string actual = gtestLogOutput.str();
     EXPECT_EQ(expected, actual);
 }
diff --git a/src/base/vnc/vncserver.cc b/src/base/vnc/vncserver.cc
index 5792c440fc..39a1338799 100644
--- a/src/base/vnc/vncserver.cc
+++ b/src/base/vnc/vncserver.cc
@@ -164,7 +164,7 @@ VncServer::listen(int port)
         return;
     }
 
-    while (!listener.listen(port, true)) {
+    while (!listener.listen(port)) {
         DPRINTF(VNC,
                 "can't bind address vnc server port %d in use PID %d\n",
                 port, getpid());
diff --git a/src/cpu/nativetrace.cc b/src/cpu/nativetrace.cc
index 5b7d0b9895..714787ffa4 100644
--- a/src/cpu/nativetrace.cc
+++ b/src/cpu/nativetrace.cc
@@ -45,8 +45,7 @@ NativeTrace::NativeTrace(const Params &p)
         fatal("All listeners are disabled!");
 
     int port = 8000;
-    while (!native_listener.listen(port, true))
-    {
+    while (!native_listener.listen(port)) {
         DPRINTF(GDBMisc, "Can't bind port %d\n", port);
         port++;
     }
diff --git a/src/dev/net/ethertap.cc b/src/dev/net/ethertap.cc
index b28f255d83..0769ad1203 100644
--- a/src/dev/net/ethertap.cc
+++ b/src/dev/net/ethertap.cc
@@ -259,7 +259,7 @@ class TapListener
 void
 TapListener::listen()
 {
-    while (!listener.listen(port, true)) {
+    while (!listener.listen(port)) {
         DPRINTF(Ethernet, "TapListener(listen): Can't bind port %d\n", port);
         port++;
     }
diff --git a/src/dev/serial/terminal.cc b/src/dev/serial/terminal.cc
index fada99c2a4..9564876826 100644
--- a/src/dev/serial/terminal.cc
+++ b/src/dev/serial/terminal.cc
@@ -175,7 +175,7 @@ Terminal::listen(int port)
         return;
     }
 
-    while (!listener.listen(port, true)) {
+    while (!listener.listen(port)) {
         DPRINTF(Terminal,
                 ": can't bind address terminal port %d inuse PID %d\n",
                 port, getpid());

From 2112eea41424cfa360c785c4ead1639c666bde24 Mon Sep 17 00:00:00 2001
From: Matt Sinclair <mattdsinclair.wisc@gmail.com>
Date: Thu, 23 Mar 2023 15:42:02 -0500
Subject: [PATCH 279/492] tests: add GPU Ruby Random tester with WB L2 caches

The current GPU Ruby Random tester tests only test for WT L2 caches,
meaning that some transitions (specific to WB caches) are never
tested.  To help ensure better coverage, this commit adds a separate
test that tests WB GPU L2 caches to the per-checkin and nightly
regressions.

Change-Id: I539ece3b825b9a38630027d947dc11ebef588752
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/69258
Tested-by: kokoro <noreply+kokoro@google.com>
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
---
 tests/gem5/gpu/test_gpu_ruby_random_wbL2.py | 84 +++++++++++++++++++++
 1 file changed, 84 insertions(+)
 create mode 100644 tests/gem5/gpu/test_gpu_ruby_random_wbL2.py

diff --git a/tests/gem5/gpu/test_gpu_ruby_random_wbL2.py b/tests/gem5/gpu/test_gpu_ruby_random_wbL2.py
new file mode 100644
index 0000000000..9af4e65a11
--- /dev/null
+++ b/tests/gem5/gpu/test_gpu_ruby_random_wbL2.py
@@ -0,0 +1,84 @@
+# Copyright (c) 2023 The Board of Regents of the University of Wisconsin
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from testlib import *
+
+"""
+This file contains random tests for the Ruby GPU protocols with a WB L2 cache.
+"""
+
+# This test will first run the GPU protocol random tester -- it should take
+# about 30 seconds to run and provides good coverage for the coherence
+# protocol.
+#
+# Input choices (some are default and thus implicit):
+# - use small cache size to encourage races
+# - use small system size to encourage races since more requests per CU (and
+#   faster sim)
+# - use small address range to encourage more races
+# - use small episode length to encourage more races
+# - 50K tests runs in ~30 seconds with reasonably good coverage
+# - num-dmas = 0 because VIPER doesn't support partial cache line writes, which
+#   DMAs need
+gem5_verify_config(
+    name="ruby-gpu-random-test-wbL2-perCheckin",
+    fixtures=(),
+    verifiers=(),
+    config=joinpath(
+        config.base_dir, "configs", "example", "ruby_gpu_random_test.py"
+    ),
+    config_args=["--WB_L2", "--test-length", "50000", "--num-dmas", "0"],
+    valid_isas=(constants.vega_x86_tag,),
+    valid_hosts=constants.supported_hosts,
+    length=constants.long_tag,
+)
+
+
+# This test will run the GPU protocol random tester in nightly -- it should
+# take about 30 minutes to run and provides good coverage for the coherence
+# protocol.
+#
+# Input choices (some are default and thus implicit):
+#  - use small cache size to encourage races
+#  - use small system size to encourage races since more requests per CU (and
+#    faster sim)
+#  - use small address range to encourage more races
+#  - use small episode length to encourage more races
+#  - 5M tests runs in ~30 minutes with reasonably good coverage
+#  - num-dmas = 0 because VIPER doesn't support partial cache line writes,
+#    which DMAs need
+gem5_verify_config(
+    name="ruby-gpu-random-test-wbL2-nightly",
+    fixtures=(),
+    verifiers=(),
+    config=joinpath(
+        config.base_dir, "configs", "example", "ruby_gpu_random_test.py"
+    ),
+    config_args=["--WB_L2", "--test-length", "5000000", "--num-dmas", "0"],
+    valid_isas=(constants.vega_x86_tag,),
+    valid_hosts=constants.supported_hosts,
+    length=constants.long_tag,
+)

From 024a907cbbbedebf12367e621c27271f715269bf Mon Sep 17 00:00:00 2001
From: Ayaz Akram <yazakram@ucdavis.edu>
Date: Sun, 26 Mar 2023 03:19:46 -0700
Subject: [PATCH 280/492] stdlib: Small fix in mesi three level component

This change ensures that if cache_line_size is not
an integer type, we don't incorrectly raise the
exception of cache size not equal to 2^bits.

Change-Id: I5a06cdac820283feb54f23d805fd87490fae1c3b
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/69297
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
---
 .../cachehierarchies/ruby/caches/mesi_three_level/l1_cache.py   | 2 +-
 .../cachehierarchies/ruby/caches/mesi_three_level/l2_cache.py   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/l1_cache.py b/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/l1_cache.py
index b4854816fb..e746579834 100644
--- a/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/l1_cache.py
+++ b/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/l1_cache.py
@@ -92,7 +92,7 @@ class L1Cache(L0Cache_Controller):
 
     def getBlockSizeBits(self, cache_line_size):
         bits = int(math.log(cache_line_size, 2))
-        if 2**bits != cache_line_size:
+        if 2**bits != int(cache_line_size):
             raise Exception("Cache line size is not a power of 2!")
         return bits
 
diff --git a/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/l2_cache.py b/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/l2_cache.py
index d54e1ab8dc..dfc1304a87 100644
--- a/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/l2_cache.py
+++ b/src/python/gem5/components/cachehierarchies/ruby/caches/mesi_three_level/l2_cache.py
@@ -90,7 +90,7 @@ class L2Cache(L1Cache_Controller):
 
     def getBlockSizeBits(self, cache_line_size):
         bits = int(math.log(cache_line_size, 2))
-        if 2**bits != cache_line_size:
+        if 2**bits != int(cache_line_size):
             raise Exception("Cache line size is not a power of 2!")
         return bits
 

From 8dfaec0e104c8802c96573784e784811d5940262 Mon Sep 17 00:00:00 2001
From: "Bobby R. Bruce" <bbruce@ucdavis.edu>
Date: Mon, 27 Mar 2023 13:55:29 -0700
Subject: [PATCH 281/492] util-docker: Update systemc docker image to use 22.04

Updates the Dockerfile, and therefore the Docker image, to use Ubuntu
22.04 instead of Ubuntu 20.04.

Change-Id: I0969205159d47ece9b0b6c0452ce0f3420f32e4c
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/69317
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
---
 util/dockerfiles/systemc-2.3.3/Dockerfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/util/dockerfiles/systemc-2.3.3/Dockerfile b/util/dockerfiles/systemc-2.3.3/Dockerfile
index 0653a86b11..ac94666d6d 100644
--- a/util/dockerfiles/systemc-2.3.3/Dockerfile
+++ b/util/dockerfiles/systemc-2.3.3/Dockerfile
@@ -24,7 +24,7 @@
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-FROM ubuntu:20.04
+FROM ubuntu:22.04
 
 ENV DEBIAN_FRONTEND=noninteractive
 RUN apt -y update && apt -y upgrade && \

From 68fed7b5e1a2c976c23a4051e59181e5a5a2fece Mon Sep 17 00:00:00 2001
From: "Bobby R. Bruce" <bbruce@ucdavis.edu>
Date: Thu, 23 Mar 2023 12:55:32 -0700
Subject: [PATCH 282/492] tests: Fix Replacement Policy Tests

The script run by 'test_replacement_policies.py' was named
'run_replacement_policy_test.py'. This caused the TestLib framework to
fail as any file with the string "test" in it was attempted to parse as
a test. As this is a gem5 Python config file, this caused a failure.

To fix this the file was renamed 'run_replacement_policy.py'.

Change-Id: I6f0a09e4f5056a9c94e258c9aea26bf277206733
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/69237
Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Maintainer: Matt Sinclair <mattdsinclair@gmail.com>
---
 ...run_replacement_policy_test.py => run_replacement_policy.py} | 0
 tests/gem5/replacement-policies/test_replacement_policies.py    | 2 +-
 2 files changed, 1 insertion(+), 1 deletion(-)
 rename tests/gem5/replacement-policies/{run_replacement_policy_test.py => run_replacement_policy.py} (100%)

diff --git a/tests/gem5/replacement-policies/run_replacement_policy_test.py b/tests/gem5/replacement-policies/run_replacement_policy.py
similarity index 100%
rename from tests/gem5/replacement-policies/run_replacement_policy_test.py
rename to tests/gem5/replacement-policies/run_replacement_policy.py
diff --git a/tests/gem5/replacement-policies/test_replacement_policies.py b/tests/gem5/replacement-policies/test_replacement_policies.py
index 7b00e10429..4c74f72a2a 100644
--- a/tests/gem5/replacement-policies/test_replacement_policies.py
+++ b/tests/gem5/replacement-policies/test_replacement_policies.py
@@ -45,7 +45,7 @@ def test_replacement_policy(config_name: str, config_path: str) -> None:
             "tests",
             "gem5",
             "replacement-policies",
-            "run_replacement_policy_test.py",
+            "run_replacement_policy.py",
         ),
         config_args=[config_name, config_path],
         valid_isas=(constants.null_tag,),

From 3f59eed52304e5b0ff36627be6bf772bcd3615aa Mon Sep 17 00:00:00 2001
From: "Bobby R. Bruce" <bbruce@ucdavis.edu>
Date: Thu, 23 Mar 2023 12:59:58 -0700
Subject: [PATCH 283/492] tests: Fix the Weekly arm-boot-tests

These tests were broken due to an incorrect 'valid_isas' flag. This
patch fixes this error.

Change-Id: I941deb209151dd94ca64f2741229398f13153c50
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/69238
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
---
 tests/gem5/arm-boot-tests/test_linux_boot.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/gem5/arm-boot-tests/test_linux_boot.py b/tests/gem5/arm-boot-tests/test_linux_boot.py
index d6e8ac94a5..9e04e24d62 100644
--- a/tests/gem5/arm-boot-tests/test_linux_boot.py
+++ b/tests/gem5/arm-boot-tests/test_linux_boot.py
@@ -94,7 +94,7 @@ arm-boot-test"
             "arm_boot_exit_run.py",
         ),
         config_args=config_args,
-        valid_isas=(constants.all_compiled,),
+        valid_isas=(constants.all_compiled_tag,),
         valid_hosts=constants.supported_hosts,
         length=length,
         protocol=protocol_to_use,

From 4ce826b24422d7296800c48ebc023875f8997252 Mon Sep 17 00:00:00 2001
From: "Bobby R. Bruce" <bbruce@ucdavis.edu>
Date: Thu, 23 Mar 2023 13:11:59 -0700
Subject: [PATCH 284/492] tests: Check if ARM/gem5.opt not built in
 test_hdf5.py

This test was failing to load as ARM/gem5.opt is not built when this
test is pased during the Weekly tests. This bug is highlighed here:
https://gem5.atlassian.net/browse/GEM5-1073.

Comments have been added explaining this issue and the test not fails
more quitely. It, however, is not run.

Change-Id: I1f26f541a15438f6124a7541c01d44f70647762a
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/69239
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
---
 tests/gem5/stats/test_hdf5.py | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/tests/gem5/stats/test_hdf5.py b/tests/gem5/stats/test_hdf5.py
index 993534a61d..8775d22ad8 100644
--- a/tests/gem5/stats/test_hdf5.py
+++ b/tests/gem5/stats/test_hdf5.py
@@ -39,6 +39,12 @@ Test file for the hdf5 stats.
 It just runs an SE simulation with the hdf5 stats and checks that the
 simulation succeeds and the stats file exists.
 No specific checks on the stats are performed.
+
+**Important Note**: This test has a major design flaw, noted here:
+https://gem5.atlassian.net/browse/GEM5-1073.
+It will not run if the build/ARM/gem5.opt has not been built. As this is not
+built prior to this test being processed during the Weekly run, this test is
+not run.
 """
 import re
 import os
@@ -54,6 +60,12 @@ def have_hdf5():
     have_hdf5_file = os.path.join(
         config.base_dir, "build", constants.arm_tag, "config", "have_hdf5.hh"
     )
+    if not os.path.exists(have_hdf5_file):
+        # This will most likely happen if the file has yet to have been
+        # compiled. It should be noted that this case is likely. This is not
+        # a good test as checking if hdf5 is available requires compilation
+        # which is not assumed to be true at this stage in the test.
+        return False
     with open(have_hdf5_file) as f:
         content = f.read()
 

From c8abd97584d4dffe32c21773b98b41fa991f4231 Mon Sep 17 00:00:00 2001
From: "Bobby R. Bruce" <bbruce@ucdavis.edu>
Date: Thu, 23 Mar 2023 14:07:16 -0700
Subject: [PATCH 285/492] tests: Disable the looppoint checkpoint tests

The Looppoint checkpoint  feature is still under-development.
These tests are stalling indefinitely and causing the Weekly tests to
timeout. They are therefore disabled until looppoint functionality is
complete and this bug is resolved.

Change-Id: I2c16abdaec639120c0ba349d90f5cc2fe747f8e6
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/69240
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 .../test_gem5_library_examples.py             | 107 +++++++++---------
 1 file changed, 55 insertions(+), 52 deletions(-)

diff --git a/tests/gem5/gem5_library_example_tests/test_gem5_library_examples.py b/tests/gem5/gem5_library_example_tests/test_gem5_library_examples.py
index 7db46b0770..e43d461b35 100644
--- a/tests/gem5/gem5_library_example_tests/test_gem5_library_examples.py
+++ b/tests/gem5/gem5_library_example_tests/test_gem5_library_examples.py
@@ -325,56 +325,59 @@ gem5_verify_config(
     length=constants.very_long_tag,
 )
 
-gem5_verify_config(
-    name="test-gem5-library-create-looppoint-checkpoints",
-    fixtures=(),
-    verifiers=(),
-    config=joinpath(
-        config.base_dir,
-        "configs",
-        "example",
-        "gem5_library",
-        "looppoints",
-        "create-looppoint-checkpoint.py",
-    ),
-    config_args=[
-        "--checkpoint-path",
-        joinpath(resource_path, "looppoint-checkpoint-save"),
-    ],
-    valid_isas=(constants.all_compiled_tag,),
-    valid_hosts=constants.supported_hosts,
-    length=constants.very_long_tag,
-)
+# The LoopPoint-Checkpointing feature is still under development, therefore
+# these tests are temporarily disabled until this feature is complete.#
 
-for region in (
-    "1",
-    "2",
-    "3",
-    "5",
-    "6",
-    "7",
-    "8",
-    "9",
-    "10",
-    "11",
-    "12",
-    "13",
-    "14",
-):
-    gem5_verify_config(
-        name=f"test-gem5-library-restore-looppoint-checkpoint-region-f{region}",
-        fixtures=(),
-        verifiers=(),
-        config=joinpath(
-            config.base_dir,
-            "configs",
-            "example",
-            "gem5_library",
-            "looppoints",
-            "restore-looppoint-checkpoint.py",
-        ),
-        config_args=["--checkpoint-region", region],
-        valid_isas=(constants.all_compiled_tag,),
-        valid_hosts=constants.supported_hosts,
-        length=constants.very_long_tag,
-    )
+# gem5_verify_config(
+#    name="test-gem5-library-create-looppoint-checkpoints",
+#    fixtures=(),
+#    verifiers=(),
+#    config=joinpath(
+#        config.base_dir,
+#        "configs",
+#        "example",
+#        "gem5_library",
+#        "looppoints",
+#        "create-looppoint-checkpoint.py",
+#    ),
+#    config_args=[
+#        "--checkpoint-path",
+#        joinpath(resource_path, "looppoint-checkpoint-save"),
+#    ],
+#    valid_isas=(constants.all_compiled_tag,),
+#    valid_hosts=constants.supported_hosts,
+#    length=constants.very_long_tag,
+# )
+
+# for region in (
+#    "1",
+#    "2",
+#    "3",
+#    "5",
+#    "6",
+#    "7",
+#    "8",
+#    "9",
+#    "10",
+#    "11",
+#    "12",
+#    "13",
+#    "14",
+# ):
+#    gem5_verify_config(
+#        name=f"test-gem5-library-restore-looppoint-checkpoint-region-f{region}",
+#        fixtures=(),
+#        verifiers=(),
+#        config=joinpath(
+#            config.base_dir,
+#            "configs",
+#            "example",
+#            "gem5_library",
+#            "looppoints",
+#            "restore-looppoint-checkpoint.py",
+#        ),
+#        config_args=["--checkpoint-region", region],
+#        valid_isas=(constants.all_compiled_tag,),
+#        valid_hosts=constants.supported_hosts,
+#        length=constants.very_long_tag,
+#    )

From c68fac2cfc7c3915217de40b3025b34a4f95a3ec Mon Sep 17 00:00:00 2001
From: "Bobby R. Bruce" <bbruce@ucdavis.edu>
Date: Thu, 23 Mar 2023 14:26:53 -0700
Subject: [PATCH 286/492] tests: Add "run threads" optional parameter to
 weekly.sh

Adds a third argument to the weekly.sh script, allowing for a different
number of "run threads" (maximum number of tests that can be run at
once) and "compile threads" (threads to allocate to the running of
gem5). This is useful for cases where a high number of compilation
threads is wanted (e.g., 50 or so), but that number of tests running at
once is undesirable.

Change-Id: Ifd67c5826c6f3602db1546a8a48e615cb03ba820
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/69241
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 tests/weekly.sh | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/tests/weekly.sh b/tests/weekly.sh
index f9d3e4bc04..d07cc8b2a6 100755
--- a/tests/weekly.sh
+++ b/tests/weekly.sh
@@ -39,15 +39,24 @@ docker_mem_limit="24g"
 # branch)
 tag="latest"
 
-# We assume the first two arguments are the number of threads followed by the
-# GPU ISA to test. These default to 1 and GCN3_X86 is no argument is given.
+# We assume the first three arguments are the number of threads to use for
+# compilation followed by the GPU ISA to test, and finally, the number of
+# "run threads", the maximum number of tests to be run at once. By default the
+# number of compile threads 1 and the GPU ISA is GCN3_X86. The number of
+# "run threads" is equal to the number of compile threads by default.
 threads=1
 gpu_isa=GCN3_X86
+run_threads=1
 if [[ $# -eq 1 ]]; then
     threads=$1
+    run_threads=${threads}
 elif [[ $# -eq 2 ]]; then
     threads=$1
     gpu_isa=$2
+elif [[ $# -eq 3 ]]; then
+    threads=$1
+    gpu_isa=$2
+    run_threads=$3
 else
     if [[ $# -gt 0 ]]; then
         echo "Invalid number of arguments: $#"
@@ -64,7 +73,7 @@ fi
 docker run -u $UID:$GID --volume "${gem5_root}":"${gem5_root}" -w \
     "${gem5_root}"/tests --memory="${docker_mem_limit}" --rm \
     gcr.io/gem5-test/ubuntu-22.04_all-dependencies:${tag} \
-        ./main.py run --length very-long -j${threads} -t${threads} -vv
+        ./main.py run --length very-long -j${threads} -t${run_threads} -vv
 
 mkdir -p tests/testing-results
 

From 461520d0ab446966d4e144b413e7b6854ca7ef54 Mon Sep 17 00:00:00 2001
From: Xuan Hu <huxuan@bosc.ac.cn>
Date: Tue, 14 Mar 2023 12:33:32 +0800
Subject: [PATCH 287/492] ext: Update softfloat to 3d full version

* Add all softfloat source files without any change.
* Remove useless file softfloat.mk.in, since gem5 use Scons.
* Add `use_fast_int64` in SConscript to distinguish src of two strategies for data
  larger than 64 bits.
  * The SoftFloat library uses two strategies to handle data larger than 64bit. One is
    spliting data into `fast_int64`, and the other is using pointer. Two strategies
    are distinguished by macro `SOFTFLOAT_FAST_INT64`. But not all "*.c" files are
    guarded by this macro, which leads to including useless files in compiling progress
    and compiling error. `use_fast_int64` used in SConscript can exclude unnecessary
    files.

Change-Id: I7cec10412c00a35c247299cd92d83cdee9066410
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/66552
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Hoa Nguyen <hoanguyen@ucdavis.edu>
---
 ext/softfloat/SConscript                     | 310 +++++++++++----
 ext/softfloat/extF80M_add.c                  | 100 +++++
 ext/softfloat/extF80M_div.c                  | 194 +++++++++
 ext/softfloat/extF80M_eq.c                   |  98 +++++
 ext/softfloat/extF80M_eq_signaling.c         |  92 +++++
 ext/softfloat/extF80M_le.c                   | 106 +++++
 ext/softfloat/extF80M_le_quiet.c             | 112 ++++++
 ext/softfloat/extF80M_lt.c                   | 106 +++++
 ext/softfloat/extF80M_lt_quiet.c             | 112 ++++++
 ext/softfloat/extF80M_mul.c                  | 139 +++++++
 ext/softfloat/extF80M_rem.c                  | 204 ++++++++++
 ext/softfloat/extF80M_roundToInt.c           | 169 ++++++++
 ext/softfloat/extF80M_sqrt.c                 | 180 +++++++++
 ext/softfloat/extF80M_sub.c                  | 100 +++++
 ext/softfloat/extF80M_to_f128M.c             | 125 ++++++
 ext/softfloat/extF80M_to_f16.c               | 112 ++++++
 ext/softfloat/extF80M_to_f32.c               | 112 ++++++
 ext/softfloat/extF80M_to_f64.c               | 112 ++++++
 ext/softfloat/extF80M_to_i32.c               | 100 +++++
 ext/softfloat/extF80M_to_i32_r_minMag.c      | 120 ++++++
 ext/softfloat/extF80M_to_i64.c               |  97 +++++
 ext/softfloat/extF80M_to_i64_r_minMag.c      | 115 ++++++
 ext/softfloat/extF80M_to_ui32.c              | 101 +++++
 ext/softfloat/extF80M_to_ui32_r_minMag.c     | 111 ++++++
 ext/softfloat/extF80M_to_ui64.c              |  97 +++++
 ext/softfloat/extF80M_to_ui64_r_minMag.c     | 108 +++++
 ext/softfloat/extF80_add.c                   |  80 ++++
 ext/softfloat/extF80_div.c                   | 203 ++++++++++
 ext/softfloat/extF80_eq.c                    |  73 ++++
 ext/softfloat/extF80_eq_signaling.c          |  67 ++++
 ext/softfloat/extF80_isSignalingNaN.c        |  51 +++
 ext/softfloat/extF80_le.c                    |  73 ++++
 ext/softfloat/extF80_le_quiet.c              |  78 ++++
 ext/softfloat/extF80_lt.c                    |  73 ++++
 ext/softfloat/extF80_lt_quiet.c              |  78 ++++
 ext/softfloat/extF80_mul.c                   | 158 ++++++++
 ext/softfloat/extF80_rem.c                   | 225 +++++++++++
 ext/softfloat/extF80_roundToInt.c            | 147 +++++++
 ext/softfloat/extF80_sqrt.c                  | 176 +++++++++
 ext/softfloat/extF80_sub.c                   |  80 ++++
 ext/softfloat/extF80_to_f128.c               |  75 ++++
 ext/softfloat/extF80_to_f16.c                |  96 +++++
 ext/softfloat/extF80_to_f32.c                |  96 +++++
 ext/softfloat/extF80_to_f64.c                |  96 +++++
 ext/softfloat/extF80_to_i32.c                |  83 ++++
 ext/softfloat/extF80_to_i32_r_minMag.c       |  97 +++++
 ext/softfloat/extF80_to_i64.c                |  89 +++++
 ext/softfloat/extF80_to_i64_r_minMag.c       |  94 +++++
 ext/softfloat/extF80_to_ui32.c               |  83 ++++
 ext/softfloat/extF80_to_ui32_r_minMag.c      |  88 +++++
 ext/softfloat/extF80_to_ui64.c               |  84 ++++
 ext/softfloat/extF80_to_ui64_r_minMag.c      |  88 +++++
 ext/softfloat/f128M_add.c                    |  97 +++++
 ext/softfloat/f128M_div.c                    | 187 +++++++++
 ext/softfloat/f128M_eq.c                     | 100 +++++
 ext/softfloat/f128M_eq_signaling.c           |  92 +++++
 ext/softfloat/f128M_le.c                     |  93 +++++
 ext/softfloat/f128M_le_quiet.c               |  96 +++++
 ext/softfloat/f128M_lt.c                     |  93 +++++
 ext/softfloat/f128M_lt_quiet.c               |  96 +++++
 ext/softfloat/f128M_mul.c                    | 158 ++++++++
 ext/softfloat/f128M_mulAdd.c                 |  92 +++++
 ext/softfloat/f128M_rem.c                    | 182 +++++++++
 ext/softfloat/f128M_roundToInt.c             | 216 ++++++++++
 ext/softfloat/f128M_sqrt.c                   | 228 +++++++++++
 ext/softfloat/f128M_sub.c                    |  97 +++++
 ext/softfloat/f128M_to_extF80M.c             | 101 +++++
 ext/softfloat/f128M_to_f16.c                 | 113 ++++++
 ext/softfloat/f128M_to_f32.c                 | 109 ++++++
 ext/softfloat/f128M_to_f64.c                 | 112 ++++++
 ext/softfloat/f128M_to_i32.c                 |  98 +++++
 ext/softfloat/f128M_to_i32_r_minMag.c        | 106 +++++
 ext/softfloat/f128M_to_i64.c                 | 102 +++++
 ext/softfloat/f128M_to_i64_r_minMag.c        | 124 ++++++
 ext/softfloat/f128M_to_ui32.c                |  98 +++++
 ext/softfloat/f128M_to_ui32_r_minMag.c       | 102 +++++
 ext/softfloat/f128M_to_ui64.c                | 102 +++++
 ext/softfloat/f128M_to_ui64_r_minMag.c       | 114 ++++++
 ext/softfloat/f128_add.c                     |   3 +-
 ext/softfloat/f128_classify.c                |   5 +-
 ext/softfloat/f128_div.c                     |   5 +-
 ext/softfloat/f128_eq.c                      |   5 +-
 ext/softfloat/f128_eq_signaling.c            |   3 +-
 ext/softfloat/f128_isSignalingNaN.c          |   5 +-
 ext/softfloat/f128_le.c                      |   3 +-
 ext/softfloat/f128_le_quiet.c                |   5 +-
 ext/softfloat/f128_lt.c                      |   3 +-
 ext/softfloat/f128_lt_quiet.c                |   5 +-
 ext/softfloat/f128_mul.c                     |   5 +-
 ext/softfloat/f128_mulAdd.c                  |   3 +-
 ext/softfloat/f128_rem.c                     |   5 +-
 ext/softfloat/f128_roundToInt.c              |   5 +-
 ext/softfloat/f128_sqrt.c                    |   5 +-
 ext/softfloat/f128_sub.c                     |   3 +-
 ext/softfloat/f128_to_extF80.c               | 109 ++++++
 ext/softfloat/f128_to_f16.c                  |   5 +-
 ext/softfloat/f128_to_f32.c                  |   5 +-
 ext/softfloat/f128_to_f64.c                  |   5 +-
 ext/softfloat/f128_to_i32.c                  |   5 +-
 ext/softfloat/f128_to_i32_r_minMag.c         |   5 +-
 ext/softfloat/f128_to_i64.c                  |   5 +-
 ext/softfloat/f128_to_i64_r_minMag.c         |   5 +-
 ext/softfloat/f128_to_ui32.c                 |   5 +-
 ext/softfloat/f128_to_ui32_r_minMag.c        |   5 +-
 ext/softfloat/f128_to_ui64.c                 |   5 +-
 ext/softfloat/f128_to_ui64_r_minMag.c        |   5 +-
 ext/softfloat/f16_add.c                      |   3 +-
 ext/softfloat/f16_div.c                      |   5 +-
 ext/softfloat/f16_eq.c                       |   5 +-
 ext/softfloat/f16_eq_signaling.c             |   3 +-
 ext/softfloat/f16_isSignalingNaN.c           |   5 +-
 ext/softfloat/f16_le.c                       |   3 +-
 ext/softfloat/f16_le_quiet.c                 |   5 +-
 ext/softfloat/f16_lt.c                       |   3 +-
 ext/softfloat/f16_lt_quiet.c                 |   5 +-
 ext/softfloat/f16_mul.c                      |   5 +-
 ext/softfloat/f16_mulAdd.c                   |   3 +-
 ext/softfloat/f16_rem.c                      |   5 +-
 ext/softfloat/f16_roundToInt.c               |   5 +-
 ext/softfloat/f16_sqrt.c                     |   5 +-
 ext/softfloat/f16_sub.c                      |   3 +-
 ext/softfloat/f16_to_extF80.c                | 101 +++++
 ext/softfloat/f16_to_extF80M.c               | 111 ++++++
 ext/softfloat/f16_to_f128.c                  |   5 +-
 ext/softfloat/f16_to_f128M.c                 | 111 ++++++
 ext/softfloat/f16_to_f32.c                   |   5 +-
 ext/softfloat/f16_to_f64.c                   |   5 +-
 ext/softfloat/f16_to_i16.c                   |  57 +++
 ext/softfloat/f16_to_i32.c                   |   5 +-
 ext/softfloat/f16_to_i32_r_minMag.c          |   5 +-
 ext/softfloat/f16_to_i64.c                   |   5 +-
 ext/softfloat/f16_to_i64_r_minMag.c          |   5 +-
 ext/softfloat/f16_to_i8.c                    |  57 +++
 ext/softfloat/f16_to_ui16.c                  |  54 +++
 ext/softfloat/f16_to_ui32.c                  |   5 +-
 ext/softfloat/f16_to_ui32_r_minMag.c         |   5 +-
 ext/softfloat/f16_to_ui64.c                  |   5 +-
 ext/softfloat/f16_to_ui64_r_minMag.c         |   5 +-
 ext/softfloat/f16_to_ui8.c                   |  54 +++
 ext/softfloat/f32_add.c                      |   3 +-
 ext/softfloat/f32_classify.c                 |   5 +-
 ext/softfloat/f32_div.c                      |   5 +-
 ext/softfloat/f32_eq.c                       |   5 +-
 ext/softfloat/f32_eq_signaling.c             |   3 +-
 ext/softfloat/f32_isSignalingNaN.c           |   5 +-
 ext/softfloat/f32_le.c                       |   3 +-
 ext/softfloat/f32_le_quiet.c                 |   5 +-
 ext/softfloat/f32_lt.c                       |   3 +-
 ext/softfloat/f32_lt_quiet.c                 |   5 +-
 ext/softfloat/f32_mul.c                      |   5 +-
 ext/softfloat/f32_mulAdd.c                   |   3 +-
 ext/softfloat/f32_rem.c                      |   5 +-
 ext/softfloat/f32_roundToInt.c               |   5 +-
 ext/softfloat/f32_sqrt.c                     |   5 +-
 ext/softfloat/f32_sub.c                      |   3 +-
 ext/softfloat/f32_to_extF80.c                | 101 +++++
 ext/softfloat/f32_to_extF80M.c               | 111 ++++++
 ext/softfloat/f32_to_f128.c                  |   5 +-
 ext/softfloat/f32_to_f128M.c                 | 115 ++++++
 ext/softfloat/f32_to_f16.c                   |   5 +-
 ext/softfloat/f32_to_f64.c                   |   5 +-
 ext/softfloat/f32_to_i16.c                   |  57 +++
 ext/softfloat/f32_to_i32.c                   |   6 +-
 ext/softfloat/f32_to_i32_r_minMag.c          |   5 +-
 ext/softfloat/f32_to_i64.c                   |   5 +-
 ext/softfloat/f32_to_i64_r_minMag.c          |   5 +-
 ext/softfloat/f32_to_ui16.c                  |  53 +++
 ext/softfloat/f32_to_ui32.c                  |   5 +-
 ext/softfloat/f32_to_ui32_r_minMag.c         |   5 +-
 ext/softfloat/f32_to_ui64.c                  |   5 +-
 ext/softfloat/f32_to_ui64_r_minMag.c         |   5 +-
 ext/softfloat/f64_add.c                      |   3 +-
 ext/softfloat/f64_classify.c                 |   5 +-
 ext/softfloat/f64_div.c                      |   5 +-
 ext/softfloat/f64_eq.c                       |   5 +-
 ext/softfloat/f64_eq_signaling.c             |   3 +-
 ext/softfloat/f64_isSignalingNaN.c           |   5 +-
 ext/softfloat/f64_le.c                       |   3 +-
 ext/softfloat/f64_le_quiet.c                 |   5 +-
 ext/softfloat/f64_lt.c                       |   3 +-
 ext/softfloat/f64_lt_quiet.c                 |   5 +-
 ext/softfloat/f64_mul.c                      |   5 +-
 ext/softfloat/f64_mulAdd.c                   |   3 +-
 ext/softfloat/f64_rem.c                      |   5 +-
 ext/softfloat/f64_roundToInt.c               |   5 +-
 ext/softfloat/f64_sqrt.c                     |   5 +-
 ext/softfloat/f64_sub.c                      |   3 +-
 ext/softfloat/f64_to_extF80.c                | 101 +++++
 ext/softfloat/f64_to_extF80M.c               | 111 ++++++
 ext/softfloat/f64_to_f128.c                  |   5 +-
 ext/softfloat/f64_to_f128M.c                 | 117 ++++++
 ext/softfloat/f64_to_f16.c                   |   5 +-
 ext/softfloat/f64_to_f32.c                   |   5 +-
 ext/softfloat/f64_to_i32.c                   |   5 +-
 ext/softfloat/f64_to_i32_r_minMag.c          |   5 +-
 ext/softfloat/f64_to_i64.c                   |   5 +-
 ext/softfloat/f64_to_i64_r_minMag.c          |   5 +-
 ext/softfloat/f64_to_ui32.c                  |   5 +-
 ext/softfloat/f64_to_ui32_r_minMag.c         |   5 +-
 ext/softfloat/f64_to_ui64.c                  |   5 +-
 ext/softfloat/f64_to_ui64_r_minMag.c         |   5 +-
 ext/softfloat/fall_maxmin.c                  |  81 ++++
 ext/softfloat/fall_reciprocal.c              | 392 +++++++++++++++++++
 ext/softfloat/i32_to_extF80.c                |  65 +++
 ext/softfloat/i32_to_extF80M.c               |  78 ++++
 ext/softfloat/i32_to_f128.c                  |   3 +-
 ext/softfloat/i32_to_f128M.c                 |  81 ++++
 ext/softfloat/i32_to_f16.c                   |   3 +-
 ext/softfloat/i32_to_f32.c                   |   3 +-
 ext/softfloat/i32_to_f64.c                   |   3 +-
 ext/softfloat/i64_to_extF80.c                |  65 +++
 ext/softfloat/i64_to_extF80M.c               |  78 ++++
 ext/softfloat/i64_to_f128.c                  |   3 +-
 ext/softfloat/i64_to_f128M.c                 |  92 +++++
 ext/softfloat/i64_to_f16.c                   |   3 +-
 ext/softfloat/i64_to_f32.c                   |   3 +-
 ext/softfloat/i64_to_f64.c                   |   3 +-
 ext/softfloat/platform.h                     |   6 +-
 ext/softfloat/primitiveTypes.h               |   1 -
 ext/softfloat/primitives.h                   |   1 -
 ext/softfloat/s_add128.c                     |   1 -
 ext/softfloat/s_add256M.c                    |   1 -
 ext/softfloat/s_addCarryM.c                  |   1 -
 ext/softfloat/s_addComplCarryM.c             |   1 -
 ext/softfloat/s_addExtF80M.c                 | 186 +++++++++
 ext/softfloat/s_addF128M.c                   | 211 ++++++++++
 ext/softfloat/s_addM.c                       |   1 -
 ext/softfloat/s_addMagsExtF80.c              | 156 ++++++++
 ext/softfloat/s_addMagsF128.c                |   3 +-
 ext/softfloat/s_addMagsF16.c                 |   5 +-
 ext/softfloat/s_addMagsF32.c                 |   3 +-
 ext/softfloat/s_addMagsF64.c                 |   3 +-
 ext/softfloat/s_approxRecip32_1.c            |   1 -
 ext/softfloat/s_approxRecipSqrt32_1.c        |   1 -
 ext/softfloat/s_approxRecipSqrt_1Ks.c        |   1 -
 ext/softfloat/s_approxRecip_1Ks.c            |   1 -
 ext/softfloat/s_compare128M.c                |   1 -
 ext/softfloat/s_compare96M.c                 |   1 -
 ext/softfloat/s_compareNonnormExtF80M.c      | 111 ++++++
 ext/softfloat/s_countLeadingZeros16.c        |   1 -
 ext/softfloat/s_countLeadingZeros32.c        |   1 -
 ext/softfloat/s_countLeadingZeros64.c        |   1 -
 ext/softfloat/s_countLeadingZeros8.c         |   1 -
 ext/softfloat/s_eq128.c                      |   1 -
 ext/softfloat/s_invalidExtF80M.c             |  49 +++
 ext/softfloat/s_invalidF128M.c               |  53 +++
 ext/softfloat/s_isNaNF128M.c                 |  57 +++
 ext/softfloat/s_le128.c                      |   1 -
 ext/softfloat/s_lt128.c                      |   1 -
 ext/softfloat/s_mul128By32.c                 |   1 -
 ext/softfloat/s_mul128MTo256M.c              |   1 -
 ext/softfloat/s_mul128To256M.c               |   1 -
 ext/softfloat/s_mul64ByShifted32To128.c      |   1 -
 ext/softfloat/s_mul64To128.c                 |   1 -
 ext/softfloat/s_mul64To128M.c                |   1 -
 ext/softfloat/s_mulAddF128.c                 |   5 +-
 ext/softfloat/s_mulAddF128M.c                | 382 ++++++++++++++++++
 ext/softfloat/s_mulAddF16.c                  |   5 +-
 ext/softfloat/s_mulAddF32.c                  |   5 +-
 ext/softfloat/s_mulAddF64.c                  |   5 +-
 ext/softfloat/s_negXM.c                      |   1 -
 ext/softfloat/s_normExtF80SigM.c             |  52 +++
 ext/softfloat/s_normRoundPackMToExtF80M.c    |  78 ++++
 ext/softfloat/s_normRoundPackMToF128M.c      |  73 ++++
 ext/softfloat/s_normRoundPackToExtF80.c      |  71 ++++
 ext/softfloat/s_normRoundPackToF128.c        |   3 +-
 ext/softfloat/s_normRoundPackToF16.c         |   3 +-
 ext/softfloat/s_normRoundPackToF32.c         |   3 +-
 ext/softfloat/s_normRoundPackToF64.c         |   3 +-
 ext/softfloat/s_normSubnormalExtF80Sig.c     |  52 +++
 ext/softfloat/s_normSubnormalF128Sig.c       |   3 +-
 ext/softfloat/s_normSubnormalF128SigM.c      |  61 +++
 ext/softfloat/s_normSubnormalF16Sig.c        |   3 +-
 ext/softfloat/s_normSubnormalF32Sig.c        |   3 +-
 ext/softfloat/s_normSubnormalF64Sig.c        |   3 +-
 ext/softfloat/s_propagateNaNF128UI.c         |   3 +-
 ext/softfloat/s_propagateNaNF16UI.c          |   3 +-
 ext/softfloat/s_propagateNaNF32UI.c          |   3 +-
 ext/softfloat/s_propagateNaNF64UI.c          |   3 +-
 ext/softfloat/s_remStepMBy32.c               |   1 -
 ext/softfloat/s_roundMToI64.c                |   5 +-
 ext/softfloat/s_roundMToUI64.c               |   5 +-
 ext/softfloat/s_roundPackMToExtF80M.c        | 256 ++++++++++++
 ext/softfloat/s_roundPackMToF128M.c          | 178 +++++++++
 ext/softfloat/s_roundPackMToI64.c            |   5 +-
 ext/softfloat/s_roundPackMToUI64.c           |   5 +-
 ext/softfloat/s_roundPackToExtF80.c          | 256 ++++++++++++
 ext/softfloat/s_roundPackToF128.c            |   5 +-
 ext/softfloat/s_roundPackToF16.c             |   3 +-
 ext/softfloat/s_roundPackToF32.c             |   3 +-
 ext/softfloat/s_roundPackToF64.c             |   3 +-
 ext/softfloat/s_roundPackToI32.c             |   5 +-
 ext/softfloat/s_roundPackToI64.c             |   5 +-
 ext/softfloat/s_roundPackToUI32.c            |   5 +-
 ext/softfloat/s_roundPackToUI64.c            |   5 +-
 ext/softfloat/s_roundToI32.c                 |   5 +-
 ext/softfloat/s_roundToI64.c                 |   5 +-
 ext/softfloat/s_roundToUI32.c                |   5 +-
 ext/softfloat/s_roundToUI64.c                |   5 +-
 ext/softfloat/s_shiftLeftM.c                 |  91 +++++
 ext/softfloat/s_shiftNormSigF128M.c          |  78 ++++
 ext/softfloat/s_shiftRightJam128.c           |   1 -
 ext/softfloat/s_shiftRightJam128Extra.c      |   1 -
 ext/softfloat/s_shiftRightJam256M.c          |   1 -
 ext/softfloat/s_shiftRightJam32.c            |   1 -
 ext/softfloat/s_shiftRightJam64.c            |   1 -
 ext/softfloat/s_shiftRightJam64Extra.c       |   1 -
 ext/softfloat/s_shiftRightJamM.c             | 101 +++++
 ext/softfloat/s_shiftRightM.c                |  91 +++++
 ext/softfloat/s_shortShiftLeft128.c          |   1 -
 ext/softfloat/s_shortShiftLeft64To96M.c      |   1 -
 ext/softfloat/s_shortShiftLeftM.c            |  70 ++++
 ext/softfloat/s_shortShiftRight128.c         |   1 -
 ext/softfloat/s_shortShiftRightExtendM.c     |   1 -
 ext/softfloat/s_shortShiftRightJam128.c      |   1 -
 ext/softfloat/s_shortShiftRightJam128Extra.c |   1 -
 ext/softfloat/s_shortShiftRightJam64.c       |   1 -
 ext/softfloat/s_shortShiftRightJam64Extra.c  |   1 -
 ext/softfloat/s_shortShiftRightJamM.c        |  72 ++++
 ext/softfloat/s_shortShiftRightM.c           |   1 -
 ext/softfloat/s_sub128.c                     |   1 -
 ext/softfloat/s_sub1XM.c                     |   1 -
 ext/softfloat/s_sub256M.c                    |   1 -
 ext/softfloat/s_subM.c                       |   1 -
 ext/softfloat/s_subMagsExtF80.c              | 158 ++++++++
 ext/softfloat/s_subMagsF128.c                |   5 +-
 ext/softfloat/s_subMagsF16.c                 |   5 +-
 ext/softfloat/s_subMagsF32.c                 |   5 +-
 ext/softfloat/s_subMagsF64.c                 |   5 +-
 ext/softfloat/s_tryPropagateNaNExtF80M.c     |  64 +++
 ext/softfloat/s_tryPropagateNaNF128M.c       |  55 +++
 ext/softfloat/softfloat.h                    |  19 +-
 ext/softfloat/softfloat.mk.in                | 230 -----------
 ext/softfloat/softfloat_state.c              |   5 +-
 ext/softfloat/specialize.h                   |  15 +-
 ext/softfloat/ui32_to_extF80.c               |  59 +++
 ext/softfloat/ui32_to_extF80M.c              |  74 ++++
 ext/softfloat/ui32_to_f128.c                 |   3 +-
 ext/softfloat/ui32_to_f128M.c                |  76 ++++
 ext/softfloat/ui32_to_f16.c                  |   3 +-
 ext/softfloat/ui32_to_f32.c                  |   3 +-
 ext/softfloat/ui32_to_f64.c                  |   3 +-
 ext/softfloat/ui64_to_extF80.c               |  59 +++
 ext/softfloat/ui64_to_extF80M.c              |  74 ++++
 ext/softfloat/ui64_to_f128.c                 |   3 +-
 ext/softfloat/ui64_to_f128M.c                |  86 ++++
 ext/softfloat/ui64_to_f16.c                  |   3 +-
 ext/softfloat/ui64_to_f32.c                  |   3 +-
 ext/softfloat/ui64_to_f64.c                  |   3 +-
 349 files changed, 15139 insertions(+), 796 deletions(-)
 create mode 100644 ext/softfloat/extF80M_add.c
 create mode 100644 ext/softfloat/extF80M_div.c
 create mode 100644 ext/softfloat/extF80M_eq.c
 create mode 100644 ext/softfloat/extF80M_eq_signaling.c
 create mode 100644 ext/softfloat/extF80M_le.c
 create mode 100644 ext/softfloat/extF80M_le_quiet.c
 create mode 100644 ext/softfloat/extF80M_lt.c
 create mode 100644 ext/softfloat/extF80M_lt_quiet.c
 create mode 100644 ext/softfloat/extF80M_mul.c
 create mode 100644 ext/softfloat/extF80M_rem.c
 create mode 100644 ext/softfloat/extF80M_roundToInt.c
 create mode 100644 ext/softfloat/extF80M_sqrt.c
 create mode 100644 ext/softfloat/extF80M_sub.c
 create mode 100644 ext/softfloat/extF80M_to_f128M.c
 create mode 100644 ext/softfloat/extF80M_to_f16.c
 create mode 100644 ext/softfloat/extF80M_to_f32.c
 create mode 100644 ext/softfloat/extF80M_to_f64.c
 create mode 100644 ext/softfloat/extF80M_to_i32.c
 create mode 100644 ext/softfloat/extF80M_to_i32_r_minMag.c
 create mode 100644 ext/softfloat/extF80M_to_i64.c
 create mode 100644 ext/softfloat/extF80M_to_i64_r_minMag.c
 create mode 100644 ext/softfloat/extF80M_to_ui32.c
 create mode 100644 ext/softfloat/extF80M_to_ui32_r_minMag.c
 create mode 100644 ext/softfloat/extF80M_to_ui64.c
 create mode 100644 ext/softfloat/extF80M_to_ui64_r_minMag.c
 create mode 100644 ext/softfloat/extF80_add.c
 create mode 100644 ext/softfloat/extF80_div.c
 create mode 100644 ext/softfloat/extF80_eq.c
 create mode 100644 ext/softfloat/extF80_eq_signaling.c
 create mode 100644 ext/softfloat/extF80_isSignalingNaN.c
 create mode 100644 ext/softfloat/extF80_le.c
 create mode 100644 ext/softfloat/extF80_le_quiet.c
 create mode 100644 ext/softfloat/extF80_lt.c
 create mode 100644 ext/softfloat/extF80_lt_quiet.c
 create mode 100644 ext/softfloat/extF80_mul.c
 create mode 100644 ext/softfloat/extF80_rem.c
 create mode 100644 ext/softfloat/extF80_roundToInt.c
 create mode 100644 ext/softfloat/extF80_sqrt.c
 create mode 100644 ext/softfloat/extF80_sub.c
 create mode 100644 ext/softfloat/extF80_to_f128.c
 create mode 100644 ext/softfloat/extF80_to_f16.c
 create mode 100644 ext/softfloat/extF80_to_f32.c
 create mode 100644 ext/softfloat/extF80_to_f64.c
 create mode 100644 ext/softfloat/extF80_to_i32.c
 create mode 100644 ext/softfloat/extF80_to_i32_r_minMag.c
 create mode 100644 ext/softfloat/extF80_to_i64.c
 create mode 100644 ext/softfloat/extF80_to_i64_r_minMag.c
 create mode 100644 ext/softfloat/extF80_to_ui32.c
 create mode 100644 ext/softfloat/extF80_to_ui32_r_minMag.c
 create mode 100644 ext/softfloat/extF80_to_ui64.c
 create mode 100644 ext/softfloat/extF80_to_ui64_r_minMag.c
 create mode 100644 ext/softfloat/f128M_add.c
 create mode 100644 ext/softfloat/f128M_div.c
 create mode 100644 ext/softfloat/f128M_eq.c
 create mode 100644 ext/softfloat/f128M_eq_signaling.c
 create mode 100644 ext/softfloat/f128M_le.c
 create mode 100644 ext/softfloat/f128M_le_quiet.c
 create mode 100644 ext/softfloat/f128M_lt.c
 create mode 100644 ext/softfloat/f128M_lt_quiet.c
 create mode 100644 ext/softfloat/f128M_mul.c
 create mode 100644 ext/softfloat/f128M_mulAdd.c
 create mode 100644 ext/softfloat/f128M_rem.c
 create mode 100644 ext/softfloat/f128M_roundToInt.c
 create mode 100644 ext/softfloat/f128M_sqrt.c
 create mode 100644 ext/softfloat/f128M_sub.c
 create mode 100644 ext/softfloat/f128M_to_extF80M.c
 create mode 100644 ext/softfloat/f128M_to_f16.c
 create mode 100644 ext/softfloat/f128M_to_f32.c
 create mode 100644 ext/softfloat/f128M_to_f64.c
 create mode 100644 ext/softfloat/f128M_to_i32.c
 create mode 100644 ext/softfloat/f128M_to_i32_r_minMag.c
 create mode 100644 ext/softfloat/f128M_to_i64.c
 create mode 100644 ext/softfloat/f128M_to_i64_r_minMag.c
 create mode 100644 ext/softfloat/f128M_to_ui32.c
 create mode 100644 ext/softfloat/f128M_to_ui32_r_minMag.c
 create mode 100644 ext/softfloat/f128M_to_ui64.c
 create mode 100644 ext/softfloat/f128M_to_ui64_r_minMag.c
 create mode 100644 ext/softfloat/f128_to_extF80.c
 create mode 100644 ext/softfloat/f16_to_extF80.c
 create mode 100644 ext/softfloat/f16_to_extF80M.c
 create mode 100644 ext/softfloat/f16_to_f128M.c
 create mode 100644 ext/softfloat/f16_to_i16.c
 create mode 100644 ext/softfloat/f16_to_i8.c
 create mode 100644 ext/softfloat/f16_to_ui16.c
 create mode 100644 ext/softfloat/f16_to_ui8.c
 create mode 100644 ext/softfloat/f32_to_extF80.c
 create mode 100644 ext/softfloat/f32_to_extF80M.c
 create mode 100644 ext/softfloat/f32_to_f128M.c
 create mode 100644 ext/softfloat/f32_to_i16.c
 create mode 100644 ext/softfloat/f32_to_ui16.c
 create mode 100644 ext/softfloat/f64_to_extF80.c
 create mode 100644 ext/softfloat/f64_to_extF80M.c
 create mode 100644 ext/softfloat/f64_to_f128M.c
 create mode 100644 ext/softfloat/fall_maxmin.c
 create mode 100644 ext/softfloat/fall_reciprocal.c
 create mode 100644 ext/softfloat/i32_to_extF80.c
 create mode 100644 ext/softfloat/i32_to_extF80M.c
 create mode 100644 ext/softfloat/i32_to_f128M.c
 create mode 100644 ext/softfloat/i64_to_extF80.c
 create mode 100644 ext/softfloat/i64_to_extF80M.c
 create mode 100644 ext/softfloat/i64_to_f128M.c
 create mode 100644 ext/softfloat/s_addExtF80M.c
 create mode 100644 ext/softfloat/s_addF128M.c
 create mode 100644 ext/softfloat/s_addMagsExtF80.c
 create mode 100644 ext/softfloat/s_compareNonnormExtF80M.c
 create mode 100644 ext/softfloat/s_invalidExtF80M.c
 create mode 100644 ext/softfloat/s_invalidF128M.c
 create mode 100644 ext/softfloat/s_isNaNF128M.c
 create mode 100644 ext/softfloat/s_mulAddF128M.c
 create mode 100644 ext/softfloat/s_normExtF80SigM.c
 create mode 100644 ext/softfloat/s_normRoundPackMToExtF80M.c
 create mode 100644 ext/softfloat/s_normRoundPackMToF128M.c
 create mode 100644 ext/softfloat/s_normRoundPackToExtF80.c
 create mode 100644 ext/softfloat/s_normSubnormalExtF80Sig.c
 create mode 100644 ext/softfloat/s_normSubnormalF128SigM.c
 create mode 100644 ext/softfloat/s_roundPackMToExtF80M.c
 create mode 100644 ext/softfloat/s_roundPackMToF128M.c
 create mode 100644 ext/softfloat/s_roundPackToExtF80.c
 create mode 100644 ext/softfloat/s_shiftLeftM.c
 create mode 100644 ext/softfloat/s_shiftNormSigF128M.c
 create mode 100644 ext/softfloat/s_shiftRightJamM.c
 create mode 100644 ext/softfloat/s_shiftRightM.c
 create mode 100644 ext/softfloat/s_shortShiftLeftM.c
 create mode 100644 ext/softfloat/s_shortShiftRightJamM.c
 create mode 100644 ext/softfloat/s_subMagsExtF80.c
 create mode 100644 ext/softfloat/s_tryPropagateNaNExtF80M.c
 create mode 100644 ext/softfloat/s_tryPropagateNaNF128M.c
 delete mode 100644 ext/softfloat/softfloat.mk.in
 create mode 100644 ext/softfloat/ui32_to_extF80.c
 create mode 100644 ext/softfloat/ui32_to_extF80M.c
 create mode 100644 ext/softfloat/ui32_to_f128M.c
 create mode 100644 ext/softfloat/ui64_to_extF80.c
 create mode 100644 ext/softfloat/ui64_to_extF80M.c
 create mode 100644 ext/softfloat/ui64_to_f128M.c

diff --git a/ext/softfloat/SConscript b/ext/softfloat/SConscript
index 689cbcf925..b4a8d514f5 100644
--- a/ext/softfloat/SConscript
+++ b/ext/softfloat/SConscript
@@ -33,18 +33,27 @@ import os
 
 Import('env')
 
+# You can change it to undefine SOFTFLOAT_FAST_INT64
+use_fast_int64 = True
+
 sf_env = env.Clone()
 if sf_env['GCC']:
     sf_env.Append(CCFLAGS=['-Wno-unused-variable',
                            '-Wno-unused-label',
                            '-Wno-implicit-fallthrough',
+                           '-Wno-implicit-function-declaration',
                            '-g'])
 
 elif sf_env['CLANG']:
     sf_env.Append(CCFLAGS=['-Wno-unused-variable',
                            '-Wno-unused-label',
+                           '-Wno-implicit-function-declaration',
                            '-g'])
 
+if use_fast_int64:
+    sf_env.Prepend(CXXFLAGS=['-DSOFTFLOAT_FAST_INT64'])
+    sf_env.Prepend(CFLAGS=['-DSOFTFLOAT_FAST_INT64'])
+
 # Add the appropriate files for the library
 softfloat_files = []
 
@@ -52,33 +61,6 @@ softfloat_files = []
 def SoftfloatFile(filename):
     softfloat_files.append(File('./' + filename))
 
-SoftfloatFile('f128_add.c')
-SoftfloatFile('f128_classify.c')
-SoftfloatFile('f128_div.c')
-SoftfloatFile('f128_eq.c')
-SoftfloatFile('f128_eq_signaling.c')
-SoftfloatFile('f128_isSignalingNaN.c')
-SoftfloatFile('f128_le.c')
-SoftfloatFile('f128_le_quiet.c')
-SoftfloatFile('f128_lt.c')
-SoftfloatFile('f128_lt_quiet.c')
-SoftfloatFile('f128_mulAdd.c')
-SoftfloatFile('f128_mul.c')
-SoftfloatFile('f128_rem.c')
-SoftfloatFile('f128_roundToInt.c')
-SoftfloatFile('f128_sqrt.c')
-SoftfloatFile('f128_sub.c')
-SoftfloatFile('f128_to_f16.c')
-SoftfloatFile('f128_to_f32.c')
-SoftfloatFile('f128_to_f64.c')
-SoftfloatFile('f128_to_i32.c')
-SoftfloatFile('f128_to_i32_r_minMag.c')
-SoftfloatFile('f128_to_i64.c')
-SoftfloatFile('f128_to_i64_r_minMag.c')
-SoftfloatFile('f128_to_ui32.c')
-SoftfloatFile('f128_to_ui32_r_minMag.c')
-SoftfloatFile('f128_to_ui64.c')
-SoftfloatFile('f128_to_ui64_r_minMag.c')
 SoftfloatFile('f16_add.c')
 SoftfloatFile('f16_classify.c')
 SoftfloatFile('f16_div.c')
@@ -95,17 +77,20 @@ SoftfloatFile('f16_rem.c')
 SoftfloatFile('f16_roundToInt.c')
 SoftfloatFile('f16_sqrt.c')
 SoftfloatFile('f16_sub.c')
-SoftfloatFile('f16_to_f128.c')
 SoftfloatFile('f16_to_f32.c')
 SoftfloatFile('f16_to_f64.c')
+SoftfloatFile('f16_to_i16.c')
 SoftfloatFile('f16_to_i32.c')
 SoftfloatFile('f16_to_i32_r_minMag.c')
 SoftfloatFile('f16_to_i64.c')
 SoftfloatFile('f16_to_i64_r_minMag.c')
+SoftfloatFile('f16_to_i8.c')
+SoftfloatFile('f16_to_ui16.c')
 SoftfloatFile('f16_to_ui32.c')
 SoftfloatFile('f16_to_ui32_r_minMag.c')
 SoftfloatFile('f16_to_ui64.c')
 SoftfloatFile('f16_to_ui64_r_minMag.c')
+SoftfloatFile('f16_to_ui8.c')
 SoftfloatFile('f32_add.c')
 SoftfloatFile('f32_classify.c')
 SoftfloatFile('f32_div.c')
@@ -122,17 +107,22 @@ SoftfloatFile('f32_rem.c')
 SoftfloatFile('f32_roundToInt.c')
 SoftfloatFile('f32_sqrt.c')
 SoftfloatFile('f32_sub.c')
-SoftfloatFile('f32_to_f128.c')
 SoftfloatFile('f32_to_f16.c')
 SoftfloatFile('f32_to_f64.c')
+SoftfloatFile('f32_to_i16.c')
 SoftfloatFile('f32_to_i32.c')
 SoftfloatFile('f32_to_i32_r_minMag.c')
 SoftfloatFile('f32_to_i64.c')
 SoftfloatFile('f32_to_i64_r_minMag.c')
+SoftfloatFile('f32_to_ui16.c')
 SoftfloatFile('f32_to_ui32.c')
 SoftfloatFile('f32_to_ui32_r_minMag.c')
 SoftfloatFile('f32_to_ui64.c')
 SoftfloatFile('f32_to_ui64_r_minMag.c')
+SoftfloatFile('f16_to_extF80M.c')
+SoftfloatFile('f16_to_f128M.c')
+SoftfloatFile('f32_to_extF80M.c')
+SoftfloatFile('f32_to_f128M.c')
 SoftfloatFile('f64_add.c')
 SoftfloatFile('f64_classify.c')
 SoftfloatFile('f64_div.c')
@@ -149,7 +139,8 @@ SoftfloatFile('f64_rem.c')
 SoftfloatFile('f64_roundToInt.c')
 SoftfloatFile('f64_sqrt.c')
 SoftfloatFile('f64_sub.c')
-SoftfloatFile('f64_to_f128.c')
+SoftfloatFile('f64_to_extF80M.c')
+SoftfloatFile('f64_to_f128M.c')
 SoftfloatFile('f64_to_f16.c')
 SoftfloatFile('f64_to_f32.c')
 SoftfloatFile('f64_to_i32.c')
@@ -160,75 +151,48 @@ SoftfloatFile('f64_to_ui32.c')
 SoftfloatFile('f64_to_ui32_r_minMag.c')
 SoftfloatFile('f64_to_ui64.c')
 SoftfloatFile('f64_to_ui64_r_minMag.c')
-SoftfloatFile('i32_to_f128.c')
+SoftfloatFile('fall_maxmin.c')
+SoftfloatFile('fall_reciprocal.c')
+SoftfloatFile('i32_to_extF80M.c')
+SoftfloatFile('i32_to_f128M.c')
 SoftfloatFile('i32_to_f16.c')
 SoftfloatFile('i32_to_f32.c')
 SoftfloatFile('i32_to_f64.c')
-SoftfloatFile('i64_to_f128.c')
+SoftfloatFile('i64_to_extF80M.c')
+SoftfloatFile('i64_to_f128M.c')
 SoftfloatFile('i64_to_f16.c')
 SoftfloatFile('i64_to_f32.c')
 SoftfloatFile('i64_to_f64.c')
-SoftfloatFile('s_add128.c')
-SoftfloatFile('s_add256M.c')
-SoftfloatFile('s_addCarryM.c')
-SoftfloatFile('s_addComplCarryM.c')
-SoftfloatFile('s_addMagsF128.c')
 SoftfloatFile('s_addMagsF16.c')
 SoftfloatFile('s_addMagsF32.c')
 SoftfloatFile('s_addMagsF64.c')
-SoftfloatFile('s_addM.c')
 SoftfloatFile('s_approxRecip_1Ks.c')
 SoftfloatFile('s_approxRecip32_1.c')
 SoftfloatFile('s_approxRecipSqrt_1Ks.c')
 SoftfloatFile('s_approxRecipSqrt32_1.c')
-SoftfloatFile('s_commonNaNToF128UI.c')
 SoftfloatFile('s_commonNaNToF16UI.c')
 SoftfloatFile('s_commonNaNToF32UI.c')
 SoftfloatFile('s_commonNaNToF64UI.c')
-SoftfloatFile('s_compare128M.c')
-SoftfloatFile('s_compare96M.c')
 SoftfloatFile('s_countLeadingZeros16.c')
 SoftfloatFile('s_countLeadingZeros32.c')
 SoftfloatFile('s_countLeadingZeros64.c')
 SoftfloatFile('s_countLeadingZeros8.c')
-SoftfloatFile('s_eq128.c')
 SoftfloatFile('s_f128UIToCommonNaN.c')
 SoftfloatFile('s_f16UIToCommonNaN.c')
 SoftfloatFile('s_f32UIToCommonNaN.c')
 SoftfloatFile('s_f64UIToCommonNaN.c')
-SoftfloatFile('s_le128.c')
-SoftfloatFile('s_lt128.c')
-SoftfloatFile('s_mul128By32.c')
-SoftfloatFile('s_mul128MTo256M.c')
-SoftfloatFile('s_mul128To256M.c')
-SoftfloatFile('s_mul64ByShifted32To128.c')
-SoftfloatFile('s_mul64To128.c')
-SoftfloatFile('s_mul64To128M.c')
-SoftfloatFile('s_mulAddF128.c')
 SoftfloatFile('s_mulAddF16.c')
 SoftfloatFile('s_mulAddF32.c')
 SoftfloatFile('s_mulAddF64.c')
-SoftfloatFile('s_negXM.c')
-SoftfloatFile('s_normRoundPackToF128.c')
 SoftfloatFile('s_normRoundPackToF16.c')
 SoftfloatFile('s_normRoundPackToF32.c')
 SoftfloatFile('s_normRoundPackToF64.c')
-SoftfloatFile('s_normSubnormalF128Sig.c')
 SoftfloatFile('s_normSubnormalF16Sig.c')
 SoftfloatFile('s_normSubnormalF32Sig.c')
 SoftfloatFile('s_normSubnormalF64Sig.c')
-SoftfloatFile('softfloat_raiseFlags.c')
-SoftfloatFile('softfloat_state.c')
-SoftfloatFile('s_propagateNaNF128UI.c')
 SoftfloatFile('s_propagateNaNF16UI.c')
 SoftfloatFile('s_propagateNaNF32UI.c')
 SoftfloatFile('s_propagateNaNF64UI.c')
-SoftfloatFile('s_remStepMBy32.c')
-SoftfloatFile('s_roundMToI64.c')
-SoftfloatFile('s_roundMToUI64.c')
-SoftfloatFile('s_roundPackMToI64.c')
-SoftfloatFile('s_roundPackMToUI64.c')
-SoftfloatFile('s_roundPackToF128.c')
 SoftfloatFile('s_roundPackToF16.c')
 SoftfloatFile('s_roundPackToF32.c')
 SoftfloatFile('s_roundPackToF64.c')
@@ -239,39 +203,221 @@ SoftfloatFile('s_roundPackToUI64.c')
 SoftfloatFile('s_roundToI32.c')
 SoftfloatFile('s_roundToI64.c')
 SoftfloatFile('s_roundToUI32.c')
-SoftfloatFile('s_roundToUI64.c')
-SoftfloatFile('s_shiftRightJam128.c')
-SoftfloatFile('s_shiftRightJam128Extra.c')
-SoftfloatFile('s_shiftRightJam256M.c')
 SoftfloatFile('s_shiftRightJam32.c')
 SoftfloatFile('s_shiftRightJam64.c')
-SoftfloatFile('s_shiftRightJam64Extra.c')
-SoftfloatFile('s_shortShiftLeft128.c')
-SoftfloatFile('s_shortShiftLeft64To96M.c')
-SoftfloatFile('s_shortShiftRight128.c')
 SoftfloatFile('s_shortShiftRightExtendM.c')
-SoftfloatFile('s_shortShiftRightJam128.c')
-SoftfloatFile('s_shortShiftRightJam128Extra.c')
 SoftfloatFile('s_shortShiftRightJam64.c')
-SoftfloatFile('s_shortShiftRightJam64Extra.c')
-SoftfloatFile('s_shortShiftRightM.c')
-SoftfloatFile('s_sub128.c')
-SoftfloatFile('s_sub1XM.c')
-SoftfloatFile('s_sub256M.c')
-SoftfloatFile('s_subMagsF128.c')
 SoftfloatFile('s_subMagsF16.c')
 SoftfloatFile('s_subMagsF32.c')
 SoftfloatFile('s_subMagsF64.c')
-SoftfloatFile('s_subM.c')
-SoftfloatFile('ui32_to_f128.c')
+SoftfloatFile('softfloat_raiseFlags.c')
+SoftfloatFile('softfloat_state.c')
 SoftfloatFile('ui32_to_f16.c')
 SoftfloatFile('ui32_to_f32.c')
 SoftfloatFile('ui32_to_f64.c')
-SoftfloatFile('ui64_to_f128.c')
 SoftfloatFile('ui64_to_f16.c')
 SoftfloatFile('ui64_to_f32.c')
 SoftfloatFile('ui64_to_f64.c')
 
+if use_fast_int64:
+    SoftfloatFile('extF80_add.c')
+    SoftfloatFile('extF80_div.c')
+    SoftfloatFile('extF80_eq.c')
+    SoftfloatFile('extF80_eq_signaling.c')
+    SoftfloatFile('extF80_isSignalingNaN.c')
+    SoftfloatFile('extF80_le.c')
+    SoftfloatFile('extF80_le_quiet.c')
+    SoftfloatFile('extF80_lt.c')
+    SoftfloatFile('extF80_lt_quiet.c')
+    SoftfloatFile('extF80_mul.c')
+    SoftfloatFile('extF80_rem.c')
+    SoftfloatFile('extF80_roundToInt.c')
+    SoftfloatFile('extF80_sqrt.c')
+    SoftfloatFile('extF80_sub.c')
+    SoftfloatFile('extF80_to_f128.c')
+    SoftfloatFile('extF80_to_f16.c')
+    SoftfloatFile('extF80_to_f32.c')
+    SoftfloatFile('extF80_to_f64.c')
+    SoftfloatFile('extF80_to_i32.c')
+    SoftfloatFile('extF80_to_i32_r_minMag.c')
+    SoftfloatFile('extF80_to_i64.c')
+    SoftfloatFile('extF80_to_i64_r_minMag.c')
+    SoftfloatFile('extF80_to_ui32.c')
+    SoftfloatFile('extF80_to_ui32_r_minMag.c')
+    SoftfloatFile('extF80_to_ui64.c')
+    SoftfloatFile('extF80_to_ui64_r_minMag.c')
+    SoftfloatFile('f128_add.c')
+    SoftfloatFile('f128_classify.c')
+    SoftfloatFile('f128_div.c')
+    SoftfloatFile('f128_eq.c')
+    SoftfloatFile('f128_eq_signaling.c')
+    SoftfloatFile('f128_isSignalingNaN.c')
+    SoftfloatFile('f128_le.c')
+    SoftfloatFile('f128_le_quiet.c')
+    SoftfloatFile('f128_lt.c')
+    SoftfloatFile('f128_lt_quiet.c')
+    SoftfloatFile('f128_mulAdd.c')
+    SoftfloatFile('f128_mul.c')
+    SoftfloatFile('f128_rem.c')
+    SoftfloatFile('f128_roundToInt.c')
+    SoftfloatFile('f128_sqrt.c')
+    SoftfloatFile('f128_sub.c')
+    SoftfloatFile('f128_to_extF80.c')
+    SoftfloatFile('f128_to_f16.c')
+    SoftfloatFile('f128_to_f32.c')
+    SoftfloatFile('f128_to_f64.c')
+    SoftfloatFile('f128_to_i32.c')
+    SoftfloatFile('f128_to_i32_r_minMag.c')
+    SoftfloatFile('f128_to_i64.c')
+    SoftfloatFile('f128_to_i64_r_minMag.c')
+    SoftfloatFile('f128_to_ui32.c')
+    SoftfloatFile('f128_to_ui32_r_minMag.c')
+    SoftfloatFile('f128_to_ui64.c')
+    SoftfloatFile('f128_to_ui64_r_minMag.c')
+    SoftfloatFile('f16_to_extF80.c')
+    SoftfloatFile('f16_to_f128.c')
+    SoftfloatFile('f32_to_extF80.c')
+    SoftfloatFile('f32_to_f128.c')
+    SoftfloatFile('f64_to_extF80.c')
+    SoftfloatFile('f64_to_f128.c')
+    SoftfloatFile('i32_to_extF80.c')
+    SoftfloatFile('i32_to_f128.c')
+    SoftfloatFile('i64_to_extF80.c')
+    SoftfloatFile('i64_to_f128.c')
+    SoftfloatFile('s_addMagsExtF80.c')
+    SoftfloatFile('s_addMagsF128.c')
+    SoftfloatFile('s_add128.c')
+    SoftfloatFile('s_add256M.c')
+    SoftfloatFile('s_commonNaNToF128UI.c')
+    SoftfloatFile('s_eq128.c')
+    SoftfloatFile('s_le128.c')
+    SoftfloatFile('s_lt128.c')
+    SoftfloatFile('s_mulAddF128.c')
+    SoftfloatFile('s_mul64ByShifted32To128.c')
+    SoftfloatFile('s_mul64To128.c')
+    SoftfloatFile('s_mul128By32.c')
+    SoftfloatFile('s_mul128To256M.c')
+    SoftfloatFile('s_normRoundPackToExtF80.c')
+    SoftfloatFile('s_normRoundPackToF128.c')
+    SoftfloatFile('s_normSubnormalExtF80Sig.c')
+    SoftfloatFile('s_normSubnormalF128Sig.c')
+    SoftfloatFile('s_propagateNaNF128UI.c')
+    SoftfloatFile('s_roundPackToExtF80.c')
+    SoftfloatFile('s_roundPackToF128.c')
+    SoftfloatFile('s_roundToUI64.c')
+    SoftfloatFile('s_shiftRightJam128.c')
+    SoftfloatFile('s_shiftRightJam128Extra.c')
+    SoftfloatFile('s_shiftRightJam256M.c')
+    SoftfloatFile('s_shiftRightJam64Extra.c')
+    SoftfloatFile('s_shortShiftLeft128.c')
+    SoftfloatFile('s_shortShiftRight128.c')
+    SoftfloatFile('s_shortShiftRightJam64Extra.c')
+    SoftfloatFile('s_shortShiftRightJam128.c')
+    SoftfloatFile('s_shortShiftRightJam128Extra.c')
+    SoftfloatFile('s_subMagsExtF80.c')
+    SoftfloatFile('s_subMagsF128.c')
+    SoftfloatFile('s_sub128.c')
+    SoftfloatFile('s_sub256M.c')
+    SoftfloatFile('ui32_to_extF80.c')
+    SoftfloatFile('ui32_to_f128.c')
+    SoftfloatFile('ui64_to_extF80.c')
+    SoftfloatFile('ui64_to_f128.c')
+
+else:
+    SoftfloatFile('extF80M_add.c')
+    SoftfloatFile('extF80M_div.c')
+    SoftfloatFile('extF80M_eq.c')
+    SoftfloatFile('extF80M_eq_signaling.c')
+    SoftfloatFile('extF80M_le.c')
+    SoftfloatFile('extF80M_le_quiet.c')
+    SoftfloatFile('extF80M_lt.c')
+    SoftfloatFile('extF80M_lt_quiet.c')
+    SoftfloatFile('extF80M_mul.c')
+    SoftfloatFile('extF80M_rem.c')
+    SoftfloatFile('extF80M_roundToInt.c')
+    SoftfloatFile('extF80M_sqrt.c')
+    SoftfloatFile('extF80M_sub.c')
+    SoftfloatFile('extF80M_to_f128M.c')
+    SoftfloatFile('extF80M_to_f16.c')
+    SoftfloatFile('extF80M_to_f32.c')
+    SoftfloatFile('extF80M_to_f64.c')
+    SoftfloatFile('extF80M_to_i32.c')
+    SoftfloatFile('extF80M_to_i32_r_minMag.c')
+    SoftfloatFile('extF80M_to_i64.c')
+    SoftfloatFile('extF80M_to_i64_r_minMag.c')
+    SoftfloatFile('extF80M_to_ui32.c')
+    SoftfloatFile('extF80M_to_ui32_r_minMag.c')
+    SoftfloatFile('extF80M_to_ui64.c')
+    SoftfloatFile('extF80M_to_ui64_r_minMag.c')
+    SoftfloatFile('f128M_add.c')
+    SoftfloatFile('f128M_div.c')
+    SoftfloatFile('f128M_eq.c')
+    SoftfloatFile('f128M_eq_signaling.c')
+    SoftfloatFile('f128M_le.c')
+    SoftfloatFile('f128M_le_quiet.c')
+    SoftfloatFile('f128M_lt.c')
+    SoftfloatFile('f128M_lt_quiet.c')
+    SoftfloatFile('f128M_mulAdd.c')
+    SoftfloatFile('f128M_mul.c')
+    SoftfloatFile('f128M_rem.c')
+    SoftfloatFile('f128M_roundToInt.c')
+    SoftfloatFile('f128M_sqrt.c')
+    SoftfloatFile('f128M_sub.c')
+    SoftfloatFile('f128M_to_extF80M.c')
+    SoftfloatFile('f128M_to_f16.c')
+    SoftfloatFile('f128M_to_f32.c')
+    SoftfloatFile('f128M_to_f64.c')
+    SoftfloatFile('f128M_to_i32.c')
+    SoftfloatFile('f128M_to_i32_r_minMag.c')
+    SoftfloatFile('f128M_to_i64.c')
+    SoftfloatFile('f128M_to_i64_r_minMag.c')
+    SoftfloatFile('f128M_to_ui32.c')
+    SoftfloatFile('f128M_to_ui32_r_minMag.c')
+    SoftfloatFile('f128M_to_ui64.c')
+    SoftfloatFile('f128M_to_ui64_r_minMag.c')
+    SoftfloatFile('s_addM.c')
+    SoftfloatFile('s_addCarryM.c')
+    SoftfloatFile('s_addComplCarryM.c')
+    SoftfloatFile('s_addExtF80M.c')
+    SoftfloatFile('s_addF128M.c')
+    SoftfloatFile('s_compareNonnormExtF80M.c')
+    SoftfloatFile('s_compare128M.c')
+    SoftfloatFile('s_compare96M.c')
+    SoftfloatFile('s_invalidExtF80M.c')
+    SoftfloatFile('s_invalidF128M.c')
+    SoftfloatFile('s_isNaNF128M.c')
+    SoftfloatFile('s_mulAddF128M.c')
+    SoftfloatFile('s_mul128MTo256M.c')
+    SoftfloatFile('s_mul64To128M.c')
+    SoftfloatFile('s_negXM.c')
+    SoftfloatFile('s_normExtF80SigM.c')
+    SoftfloatFile('s_normRoundPackMToExtF80M.c')
+    SoftfloatFile('s_normRoundPackMToF128M.c')
+    SoftfloatFile('s_normSubnormalF128SigM.c')
+    SoftfloatFile('s_remStepMBy32.c')
+    SoftfloatFile('s_roundMToI64.c')
+    SoftfloatFile('s_roundMToUI64.c')
+    SoftfloatFile('s_roundPackMToExtF80M.c')
+    SoftfloatFile('s_roundPackMToF128M.c')
+    SoftfloatFile('s_roundPackMToI64.c')
+    SoftfloatFile('s_roundPackMToUI64.c')
+    SoftfloatFile('s_shiftLeftM.c')
+    SoftfloatFile('s_shiftNormSigF128M.c')
+    SoftfloatFile('s_shiftRightJamM.c')
+    SoftfloatFile('s_shiftRightM.c')
+    SoftfloatFile('s_shortShiftLeftM.c')
+    SoftfloatFile('s_shortShiftLeft64To96M.c')
+    SoftfloatFile('s_shortShiftRightJamM.c')
+    SoftfloatFile('s_shortShiftRightM.c')
+    SoftfloatFile('s_subM.c')
+    SoftfloatFile('s_sub1XM.c')
+    SoftfloatFile('s_tryPropagateNaNExtF80M.c')
+    SoftfloatFile('s_tryPropagateNaNF128M.c')
+    SoftfloatFile('ui32_to_extF80M.c')
+    SoftfloatFile('ui32_to_f128M.c')
+    SoftfloatFile('ui64_to_extF80M.c')
+    SoftfloatFile('ui64_to_f128M.c')
+
 sf_env.Library('softfloat', [sf_env.SharedObject(f) for f in softfloat_files])
 
 env.Prepend(CPPPATH=Dir('./'))
diff --git a/ext/softfloat/extF80M_add.c b/ext/softfloat/extF80M_add.c
new file mode 100644
index 0000000000..ee18535806
--- /dev/null
+++ b/ext/softfloat/extF80M_add.c
@@ -0,0 +1,100 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include "platform.h"
+#include "internals.h"
+#include "softfloat.h"
+
+#ifdef SOFTFLOAT_FAST_INT64
+
+void
+ extF80M_add(
+     const extFloat80_t *aPtr, const extFloat80_t *bPtr, extFloat80_t *zPtr )
+{
+    const struct extFloat80M *aSPtr, *bSPtr;
+    uint_fast16_t uiA64;
+    uint_fast64_t uiA0;
+    bool signA;
+    uint_fast16_t uiB64;
+    uint_fast64_t uiB0;
+    bool signB;
+#if ! defined INLINE_LEVEL || (INLINE_LEVEL < 2)
+    extFloat80_t
+        (*magsFuncPtr)(
+            uint_fast16_t, uint_fast64_t, uint_fast16_t, uint_fast64_t, bool );
+#endif
+
+    aSPtr = (const struct extFloat80M *) aPtr;
+    bSPtr = (const struct extFloat80M *) bPtr;
+    uiA64 = aSPtr->signExp;
+    uiA0  = aSPtr->signif;
+    signA = signExtF80UI64( uiA64 );
+    uiB64 = bSPtr->signExp;
+    uiB0  = bSPtr->signif;
+    signB = signExtF80UI64( uiB64 );
+#if defined INLINE_LEVEL && (2 <= INLINE_LEVEL)
+    if ( signA == signB ) {
+        *zPtr = softfloat_addMagsExtF80( uiA64, uiA0, uiB64, uiB0, signA );
+    } else {
+        *zPtr = softfloat_subMagsExtF80( uiA64, uiA0, uiB64, uiB0, signA );
+    }
+#else
+    magsFuncPtr =
+        (signA == signB) ? softfloat_addMagsExtF80 : softfloat_subMagsExtF80;
+    *zPtr = (*magsFuncPtr)( uiA64, uiA0, uiB64, uiB0, signA );
+#endif
+
+}
+
+#else
+
+void
+ extF80M_add(
+     const extFloat80_t *aPtr, const extFloat80_t *bPtr, extFloat80_t *zPtr )
+{
+
+    softfloat_addExtF80M(
+        (const struct extFloat80M *) aPtr,
+        (const struct extFloat80M *) bPtr,
+        (struct extFloat80M *) zPtr,
+        false
+    );
+
+}
+
+#endif
+
diff --git a/ext/softfloat/extF80M_div.c b/ext/softfloat/extF80M_div.c
new file mode 100644
index 0000000000..fa0ccdc815
--- /dev/null
+++ b/ext/softfloat/extF80M_div.c
@@ -0,0 +1,194 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of
+California.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+#ifdef SOFTFLOAT_FAST_INT64
+
+void
+ extF80M_div(
+     const extFloat80_t *aPtr, const extFloat80_t *bPtr, extFloat80_t *zPtr )
+{
+
+    *zPtr = extF80_div( *aPtr, *bPtr );
+
+}
+
+#else
+
+void
+ extF80M_div(
+     const extFloat80_t *aPtr, const extFloat80_t *bPtr, extFloat80_t *zPtr )
+{
+    const struct extFloat80M *aSPtr, *bSPtr;
+    struct extFloat80M *zSPtr;
+    uint_fast16_t uiA64;
+    int32_t expA;
+    uint_fast16_t uiB64;
+    int32_t expB;
+    bool signZ;
+    uint64_t sigA, x64;
+    int32_t expZ;
+    int shiftDist;
+    uint32_t y[3], recip32, sigB[3];
+    int ix;
+    uint32_t q, qs[2];
+    uint_fast16_t uiZ64;
+    uint64_t uiZ0;
+
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    aSPtr = (const struct extFloat80M *) aPtr;
+    bSPtr = (const struct extFloat80M *) bPtr;
+    zSPtr = (struct extFloat80M *) zPtr;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    uiA64 = aSPtr->signExp;
+    expA = expExtF80UI64( uiA64 );
+    uiB64 = bSPtr->signExp;
+    expB = expExtF80UI64( uiB64 );
+    signZ = signExtF80UI64( uiA64 ) ^ signExtF80UI64( uiB64 );
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    if ( (expA == 0x7FFF) || (expB == 0x7FFF) ) {
+        if ( softfloat_tryPropagateNaNExtF80M( aSPtr, bSPtr, zSPtr ) ) return;
+        if ( expA == 0x7FFF ) {
+            if ( expB == 0x7FFF ) goto invalid;
+            goto infinity;
+        }
+        goto zero;
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    sigA = aSPtr->signif;
+    x64 = bSPtr->signif;
+    if ( ! expB ) expB = 1;
+    if ( ! (x64 & UINT64_C( 0x8000000000000000 )) ) {
+        if ( ! x64 ) {
+            if ( ! sigA ) goto invalid;
+            softfloat_raiseFlags( softfloat_flag_infinite );
+            goto infinity;
+        }
+        expB += softfloat_normExtF80SigM( &x64 );
+    }
+    if ( ! expA ) expA = 1;
+    if ( ! (sigA & UINT64_C( 0x8000000000000000 )) ) {
+        if ( ! sigA ) goto zero;
+        expA += softfloat_normExtF80SigM( &sigA );
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    expZ = expA - expB + 0x3FFF;
+    shiftDist = 29;
+    if ( sigA < x64 ) {
+        --expZ;
+        shiftDist = 30;
+    }
+    softfloat_shortShiftLeft64To96M( sigA, shiftDist, y );
+    recip32 = softfloat_approxRecip32_1( x64>>32 );
+    sigB[indexWord( 3, 0 )] = (uint32_t) x64<<30;
+    x64 >>= 2;
+    sigB[indexWord( 3, 2 )] = x64>>32;
+    sigB[indexWord( 3, 1 )] = x64;
+    ix = 2;
+    for (;;) {
+        x64 = (uint64_t) y[indexWordHi( 3 )] * recip32;
+        q = (x64 + 0x80000000)>>32;
+        --ix;
+        if ( ix < 0 ) break;
+        softfloat_remStep96MBy32( y, 29, sigB, q, y );
+        if ( y[indexWordHi( 3 )] & 0x80000000 ) {
+            --q;
+            softfloat_add96M( y, sigB, y );
+        }
+        qs[ix] = q;
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    if ( ((q + 1) & 0x3FFFFF) < 2 ) {
+        softfloat_remStep96MBy32( y, 29, sigB, q, y );
+        if ( y[indexWordHi( 3 )] & 0x80000000 ) {
+            --q;
+            softfloat_add96M( y, sigB, y );
+        } else if ( softfloat_compare96M( sigB, y ) <= 0 ) {
+            ++q;
+            softfloat_sub96M( y, sigB, y );
+        }
+        if (
+            y[indexWordLo( 3 )] || y[indexWord( 3, 1 )] || y[indexWord( 3, 2 )]
+        ) {
+            q |= 1;
+        }
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    x64 = (uint64_t) q<<9;
+    y[indexWord( 3, 0 )] = x64;
+    x64 = ((uint64_t) qs[0]<<6) + (x64>>32);
+    y[indexWord( 3, 1 )] = x64;
+    y[indexWord( 3, 2 )] = (qs[1]<<3) + (x64>>32);
+    softfloat_roundPackMToExtF80M(
+        signZ, expZ, y, extF80_roundingPrecision, zSPtr );
+    return;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+ invalid:
+    softfloat_invalidExtF80M( zSPtr );
+    return;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+ infinity:
+    uiZ64 = packToExtF80UI64( signZ, 0x7FFF );
+    uiZ0  = UINT64_C( 0x8000000000000000 );
+    goto uiZ;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+ zero:
+    uiZ64 = packToExtF80UI64( signZ, 0 );
+    uiZ0  = 0;
+ uiZ:
+    zSPtr->signExp = uiZ64;
+    zSPtr->signif  = uiZ0;
+
+}
+
+#endif
+
diff --git a/ext/softfloat/extF80M_eq.c b/ext/softfloat/extF80M_eq.c
new file mode 100644
index 0000000000..845d87c07d
--- /dev/null
+++ b/ext/softfloat/extF80M_eq.c
@@ -0,0 +1,98 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+#ifdef SOFTFLOAT_FAST_INT64
+
+bool extF80M_eq( const extFloat80_t *aPtr, const extFloat80_t *bPtr )
+{
+
+    return extF80_eq( *aPtr, *bPtr );
+
+}
+
+#else
+
+bool extF80M_eq( const extFloat80_t *aPtr, const extFloat80_t *bPtr )
+{
+    const struct extFloat80M *aSPtr, *bSPtr;
+    uint_fast16_t uiA64;
+    uint64_t uiA0;
+    uint_fast16_t uiB64;
+    uint64_t uiB0;
+
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    aSPtr = (const struct extFloat80M *) aPtr;
+    bSPtr = (const struct extFloat80M *) bPtr;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    uiA64 = aSPtr->signExp;
+    uiA0  = aSPtr->signif;
+    uiB64 = bSPtr->signExp;
+    uiB0  = bSPtr->signif;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    if ( isNaNExtF80UI( uiA64, uiA0 ) || isNaNExtF80UI( uiB64, uiB0 ) ) {
+        if (
+               softfloat_isSigNaNExtF80UI( uiA64, uiA0 )
+            || softfloat_isSigNaNExtF80UI( uiB64, uiB0 )
+        ) {
+            softfloat_raiseFlags( softfloat_flag_invalid );
+        }
+        return false;
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    if ( uiA0 == uiB0 ) {
+        return (uiA64 == uiB64) || ! uiA0;
+    } else {
+        if ( ! ((uiA0 & uiB0) & UINT64_C( 0x8000000000000000 )) ) {
+            return ! softfloat_compareNonnormExtF80M( aSPtr, bSPtr );
+        }
+        return false;
+    }
+
+}
+
+#endif
+
diff --git a/ext/softfloat/extF80M_eq_signaling.c b/ext/softfloat/extF80M_eq_signaling.c
new file mode 100644
index 0000000000..1eed4d8fae
--- /dev/null
+++ b/ext/softfloat/extF80M_eq_signaling.c
@@ -0,0 +1,92 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "softfloat.h"
+
+#ifdef SOFTFLOAT_FAST_INT64
+
+bool extF80M_eq_signaling( const extFloat80_t *aPtr, const extFloat80_t *bPtr )
+{
+
+    return extF80_eq_signaling( *aPtr, *bPtr );
+
+}
+
+#else
+
+bool extF80M_eq_signaling( const extFloat80_t *aPtr, const extFloat80_t *bPtr )
+{
+    const struct extFloat80M *aSPtr, *bSPtr;
+    uint_fast16_t uiA64;
+    uint64_t uiA0;
+    uint_fast16_t uiB64;
+    uint64_t uiB0;
+
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    aSPtr = (const struct extFloat80M *) aPtr;
+    bSPtr = (const struct extFloat80M *) bPtr;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    uiA64 = aSPtr->signExp;
+    uiA0  = aSPtr->signif;
+    uiB64 = bSPtr->signExp;
+    uiB0  = bSPtr->signif;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    if ( isNaNExtF80UI( uiA64, uiA0 ) || isNaNExtF80UI( uiB64, uiB0 ) ) {
+        softfloat_raiseFlags( softfloat_flag_invalid );
+        return false;
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    if ( uiA0 == uiB0 ) {
+        return (uiA64 == uiB64) || ! uiA0;
+    } else {
+        if ( ! ((uiA0 & uiB0) & UINT64_C( 0x8000000000000000 )) ) {
+            return ! softfloat_compareNonnormExtF80M( aSPtr, bSPtr );
+        }
+        return false;
+    }
+
+}
+
+#endif
+
diff --git a/ext/softfloat/extF80M_le.c b/ext/softfloat/extF80M_le.c
new file mode 100644
index 0000000000..1926794d7a
--- /dev/null
+++ b/ext/softfloat/extF80M_le.c
@@ -0,0 +1,106 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "softfloat.h"
+
+#ifdef SOFTFLOAT_FAST_INT64
+
+bool extF80M_le( const extFloat80_t *aPtr, const extFloat80_t *bPtr )
+{
+
+    return extF80_le( *aPtr, *bPtr );
+
+}
+
+#else
+
+bool extF80M_le( const extFloat80_t *aPtr, const extFloat80_t *bPtr )
+{
+    const struct extFloat80M *aSPtr, *bSPtr;
+    uint_fast16_t uiA64;
+    uint64_t uiA0;
+    uint_fast16_t uiB64;
+    uint64_t uiB0;
+    bool signA, ltMags;
+
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    aSPtr = (const struct extFloat80M *) aPtr;
+    bSPtr = (const struct extFloat80M *) bPtr;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    uiA64 = aSPtr->signExp;
+    uiA0  = aSPtr->signif;
+    uiB64 = bSPtr->signExp;
+    uiB0  = bSPtr->signif;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    if ( isNaNExtF80UI( uiA64, uiA0 ) || isNaNExtF80UI( uiB64, uiB0 ) ) {
+        softfloat_raiseFlags( softfloat_flag_invalid );
+        return false;
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    signA = signExtF80UI64( uiA64 );
+    if ( (uiA64 ^ uiB64) & 0x8000 ) {
+        /*--------------------------------------------------------------------
+        | Signs are different.
+        *--------------------------------------------------------------------*/
+        return signA || ! (uiA0 | uiB0);
+    } else {
+        /*--------------------------------------------------------------------
+        | Signs are the same.
+        *--------------------------------------------------------------------*/
+        if ( ! ((uiA0 & uiB0) & UINT64_C( 0x8000000000000000 )) ) {
+            return (softfloat_compareNonnormExtF80M( aSPtr, bSPtr ) <= 0);
+        }
+        if ( uiA64 == uiB64 ) {
+            if ( uiA0 == uiB0 ) return true;
+            ltMags = (uiA0 < uiB0);
+        } else {
+            ltMags = (uiA64 < uiB64);
+        }
+        return signA ^ ltMags;
+    }
+
+}
+
+#endif
+
diff --git a/ext/softfloat/extF80M_le_quiet.c b/ext/softfloat/extF80M_le_quiet.c
new file mode 100644
index 0000000000..5e9b353ce3
--- /dev/null
+++ b/ext/softfloat/extF80M_le_quiet.c
@@ -0,0 +1,112 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+#ifdef SOFTFLOAT_FAST_INT64
+
+bool extF80M_le_quiet( const extFloat80_t *aPtr, const extFloat80_t *bPtr )
+{
+
+    return extF80_le_quiet( *aPtr, *bPtr );
+
+}
+
+#else
+
+bool extF80M_le_quiet( const extFloat80_t *aPtr, const extFloat80_t *bPtr )
+{
+    const struct extFloat80M *aSPtr, *bSPtr;
+    uint_fast16_t uiA64;
+    uint64_t uiA0;
+    uint_fast16_t uiB64;
+    uint64_t uiB0;
+    bool signA, ltMags;
+
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    aSPtr = (const struct extFloat80M *) aPtr;
+    bSPtr = (const struct extFloat80M *) bPtr;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    uiA64 = aSPtr->signExp;
+    uiA0  = aSPtr->signif;
+    uiB64 = bSPtr->signExp;
+    uiB0  = bSPtr->signif;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    if ( isNaNExtF80UI( uiA64, uiA0 ) || isNaNExtF80UI( uiB64, uiB0 ) ) {
+        if (
+               softfloat_isSigNaNExtF80UI( uiA64, uiA0 )
+            || softfloat_isSigNaNExtF80UI( uiB64, uiB0 )
+        ) {
+            softfloat_raiseFlags( softfloat_flag_invalid );
+        }
+        return false;
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    signA = signExtF80UI64( uiA64 );
+    if ( (uiA64 ^ uiB64) & 0x8000 ) {
+        /*--------------------------------------------------------------------
+        | Signs are different.
+        *--------------------------------------------------------------------*/
+        return signA || ! (uiA0 | uiB0);
+    } else {
+        /*--------------------------------------------------------------------
+        | Signs are the same.
+        *--------------------------------------------------------------------*/
+        if ( ! ((uiA0 & uiB0) & UINT64_C( 0x8000000000000000 )) ) {
+            return (softfloat_compareNonnormExtF80M( aSPtr, bSPtr ) <= 0);
+        }
+        if ( uiA64 == uiB64 ) {
+            if ( uiA0 == uiB0 ) return true;
+            ltMags = (uiA0 < uiB0);
+        } else {
+            ltMags = (uiA64 < uiB64);
+        }
+        return signA ^ ltMags;
+    }
+
+}
+
+#endif
+
diff --git a/ext/softfloat/extF80M_lt.c b/ext/softfloat/extF80M_lt.c
new file mode 100644
index 0000000000..c7bd03fbbc
--- /dev/null
+++ b/ext/softfloat/extF80M_lt.c
@@ -0,0 +1,106 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "softfloat.h"
+
+#ifdef SOFTFLOAT_FAST_INT64
+
+bool extF80M_lt( const extFloat80_t *aPtr, const extFloat80_t *bPtr )
+{
+
+    return extF80_lt( *aPtr, *bPtr );
+
+}
+
+#else
+
+bool extF80M_lt( const extFloat80_t *aPtr, const extFloat80_t *bPtr )
+{
+    const struct extFloat80M *aSPtr, *bSPtr;
+    uint_fast16_t uiA64;
+    uint64_t uiA0;
+    uint_fast16_t uiB64;
+    uint64_t uiB0;
+    bool signA, ltMags;
+
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    aSPtr = (const struct extFloat80M *) aPtr;
+    bSPtr = (const struct extFloat80M *) bPtr;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    uiA64 = aSPtr->signExp;
+    uiA0  = aSPtr->signif;
+    uiB64 = bSPtr->signExp;
+    uiB0  = bSPtr->signif;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    if ( isNaNExtF80UI( uiA64, uiA0 ) || isNaNExtF80UI( uiB64, uiB0 ) ) {
+        softfloat_raiseFlags( softfloat_flag_invalid );
+        return false;
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    signA = signExtF80UI64( uiA64 );
+    if ( (uiA64 ^ uiB64) & 0x8000 ) {
+        /*--------------------------------------------------------------------
+        | Signs are different.
+        *--------------------------------------------------------------------*/
+        return signA && ((uiA0 | uiB0) != 0);
+    } else {
+        /*--------------------------------------------------------------------
+        | Signs are the same.
+        *--------------------------------------------------------------------*/
+        if ( ! ((uiA0 & uiB0) & UINT64_C( 0x8000000000000000 )) ) {
+            return (softfloat_compareNonnormExtF80M( aSPtr, bSPtr ) < 0);
+        }
+        if ( uiA64 == uiB64 ) {
+            if ( uiA0 == uiB0 ) return false;
+            ltMags = (uiA0 < uiB0);
+        } else {
+            ltMags = (uiA64 < uiB64);
+        }
+        return signA ^ ltMags;
+    }
+
+}
+
+#endif
+
diff --git a/ext/softfloat/extF80M_lt_quiet.c b/ext/softfloat/extF80M_lt_quiet.c
new file mode 100644
index 0000000000..c0c62d5656
--- /dev/null
+++ b/ext/softfloat/extF80M_lt_quiet.c
@@ -0,0 +1,112 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+#ifdef SOFTFLOAT_FAST_INT64
+
+bool extF80M_lt_quiet( const extFloat80_t *aPtr, const extFloat80_t *bPtr )
+{
+
+    return extF80_lt_quiet( *aPtr, *bPtr );
+
+}
+
+#else
+
+bool extF80M_lt_quiet( const extFloat80_t *aPtr, const extFloat80_t *bPtr )
+{
+    const struct extFloat80M *aSPtr, *bSPtr;
+    uint_fast16_t uiA64;
+    uint64_t uiA0;
+    uint_fast16_t uiB64;
+    uint64_t uiB0;
+    bool signA, ltMags;
+
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    aSPtr = (const struct extFloat80M *) aPtr;
+    bSPtr = (const struct extFloat80M *) bPtr;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    uiA64 = aSPtr->signExp;
+    uiA0  = aSPtr->signif;
+    uiB64 = bSPtr->signExp;
+    uiB0  = bSPtr->signif;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    if ( isNaNExtF80UI( uiA64, uiA0 ) || isNaNExtF80UI( uiB64, uiB0 ) ) {
+        if (
+               softfloat_isSigNaNExtF80UI( uiA64, uiA0 )
+            || softfloat_isSigNaNExtF80UI( uiB64, uiB0 )
+        ) {
+            softfloat_raiseFlags( softfloat_flag_invalid );
+        }
+        return false;
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    signA = signExtF80UI64( uiA64 );
+    if ( (uiA64 ^ uiB64) & 0x8000 ) {
+        /*--------------------------------------------------------------------
+        | Signs are different.
+        *--------------------------------------------------------------------*/
+        return signA && ((uiA0 | uiB0) != 0);
+    } else {
+        /*--------------------------------------------------------------------
+        | Signs are the same.
+        *--------------------------------------------------------------------*/
+        if ( ! ((uiA0 & uiB0) & UINT64_C( 0x8000000000000000 )) ) {
+            return (softfloat_compareNonnormExtF80M( aSPtr, bSPtr ) < 0);
+        }
+        if ( uiA64 == uiB64 ) {
+            if ( uiA0 == uiB0 ) return false;
+            ltMags = (uiA0 < uiB0);
+        } else {
+            ltMags = (uiA64 < uiB64);
+        }
+        return signA ^ ltMags;
+    }
+
+}
+
+#endif
+
diff --git a/ext/softfloat/extF80M_mul.c b/ext/softfloat/extF80M_mul.c
new file mode 100644
index 0000000000..58cdd26328
--- /dev/null
+++ b/ext/softfloat/extF80M_mul.c
@@ -0,0 +1,139 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+#ifdef SOFTFLOAT_FAST_INT64
+
+void
+ extF80M_mul(
+     const extFloat80_t *aPtr, const extFloat80_t *bPtr, extFloat80_t *zPtr )
+{
+
+    *zPtr = extF80_mul( *aPtr, *bPtr );
+
+}
+
+#else
+
+void
+ extF80M_mul(
+     const extFloat80_t *aPtr, const extFloat80_t *bPtr, extFloat80_t *zPtr )
+{
+    const struct extFloat80M *aSPtr, *bSPtr;
+    struct extFloat80M *zSPtr;
+    uint_fast16_t uiA64;
+    int32_t expA;
+    uint_fast16_t uiB64;
+    int32_t expB;
+    bool signZ;
+    uint_fast16_t exp, uiZ64;
+    uint64_t uiZ0, sigA, sigB;
+    int32_t expZ;
+    uint32_t sigProd[4], *extSigZPtr;
+
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    aSPtr = (const struct extFloat80M *) aPtr;
+    bSPtr = (const struct extFloat80M *) bPtr;
+    zSPtr = (struct extFloat80M *) zPtr;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    uiA64 = aSPtr->signExp;
+    expA = expExtF80UI64( uiA64 );
+    uiB64 = bSPtr->signExp;
+    expB = expExtF80UI64( uiB64 );
+    signZ = signExtF80UI64( uiA64 ) ^ signExtF80UI64( uiB64 );
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    if ( (expA == 0x7FFF) || (expB == 0x7FFF) ) {
+        if ( softfloat_tryPropagateNaNExtF80M( aSPtr, bSPtr, zSPtr ) ) return;
+        if (
+               (! aSPtr->signif && (expA != 0x7FFF))
+            || (! bSPtr->signif && (expB != 0x7FFF))
+        ) {
+            softfloat_invalidExtF80M( zSPtr );
+            return;
+        }
+        uiZ64 = packToExtF80UI64( signZ, 0x7FFF );
+        uiZ0  = UINT64_C( 0x8000000000000000 );
+        goto uiZ;
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    if ( ! expA ) expA = 1;
+    sigA = aSPtr->signif;
+    if ( ! (sigA & UINT64_C( 0x8000000000000000 )) ) {
+        if ( ! sigA ) goto zero;
+        expA += softfloat_normExtF80SigM( &sigA );
+    }
+    if ( ! expB ) expB = 1;
+    sigB = bSPtr->signif;
+    if ( ! (sigB & UINT64_C( 0x8000000000000000 )) ) {
+        if ( ! sigB ) goto zero;
+        expB += softfloat_normExtF80SigM( &sigB );
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    expZ = expA + expB - 0x3FFE;
+    softfloat_mul64To128M( sigA, sigB, sigProd );
+    if ( sigProd[indexWordLo( 4 )] ) sigProd[indexWord( 4, 1 )] |= 1;
+    extSigZPtr = &sigProd[indexMultiwordHi( 4, 3 )];
+    if ( sigProd[indexWordHi( 4 )] < 0x80000000 ) {
+        --expZ;
+        softfloat_add96M( extSigZPtr, extSigZPtr, extSigZPtr );
+    }
+    softfloat_roundPackMToExtF80M(
+        signZ, expZ, extSigZPtr, extF80_roundingPrecision, zSPtr );
+    return;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+ zero:
+    uiZ64 = packToExtF80UI64( signZ, 0 );
+    uiZ0  = 0;
+ uiZ:
+    zSPtr->signExp = uiZ64;
+    zSPtr->signif  = uiZ0;
+
+}
+
+#endif
+
diff --git a/ext/softfloat/extF80M_rem.c b/ext/softfloat/extF80M_rem.c
new file mode 100644
index 0000000000..18e39c96fb
--- /dev/null
+++ b/ext/softfloat/extF80M_rem.c
@@ -0,0 +1,204 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+#ifdef SOFTFLOAT_FAST_INT64
+
+void
+ extF80M_rem(
+     const extFloat80_t *aPtr, const extFloat80_t *bPtr, extFloat80_t *zPtr )
+{
+
+    *zPtr = extF80_rem( *aPtr, *bPtr );
+
+}
+
+#else
+
+void
+ extF80M_rem(
+     const extFloat80_t *aPtr, const extFloat80_t *bPtr, extFloat80_t *zPtr )
+{
+    const struct extFloat80M *aSPtr, *bSPtr;
+    struct extFloat80M *zSPtr;
+    uint_fast16_t uiA64;
+    int32_t expA, expB;
+    uint64_t x64;
+    bool signRem;
+    uint64_t sigA;
+    int32_t expDiff;
+    uint32_t rem[3], x[3], sig32B, q, recip32, rem2[3], *remPtr, *altRemPtr;
+    uint32_t *newRemPtr, wordMeanRem;
+
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    aSPtr = (const struct extFloat80M *) aPtr;
+    bSPtr = (const struct extFloat80M *) bPtr;
+    zSPtr = (struct extFloat80M *) zPtr;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    uiA64 = aSPtr->signExp;
+    expA = expExtF80UI64( uiA64 );
+    expB = expExtF80UI64( bSPtr->signExp );
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    if ( (expA == 0x7FFF) || (expB == 0x7FFF) ) {
+        if ( softfloat_tryPropagateNaNExtF80M( aSPtr, bSPtr, zSPtr ) ) return;
+        if ( expA == 0x7FFF ) goto invalid;
+        /*--------------------------------------------------------------------
+        | If we get here, then argument b is an infinity and `expB' is 0x7FFF;
+        | Doubling `expB' is an easy way to ensure that `expDiff' later is
+        | less than -1, which will result in returning a canonicalized version
+        | of argument a.
+        *--------------------------------------------------------------------*/
+        expB += expB;
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    if ( ! expB ) expB = 1;
+    x64 = bSPtr->signif;
+    if ( ! (x64 & UINT64_C( 0x8000000000000000 )) ) {
+        if ( ! x64 ) goto invalid;
+        expB += softfloat_normExtF80SigM( &x64 );
+    }
+    signRem = signExtF80UI64( uiA64 );
+    if ( ! expA ) expA = 1;
+    sigA = aSPtr->signif;
+    if ( ! (sigA & UINT64_C( 0x8000000000000000 )) ) {
+        if ( ! sigA ) {
+            expA = 0;
+            goto copyA;
+        }
+        expA += softfloat_normExtF80SigM( &sigA );
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    expDiff = expA - expB;
+    if ( expDiff < -1 ) goto copyA;
+    rem[indexWord( 3, 2 )] = sigA>>34;
+    rem[indexWord( 3, 1 )] = sigA>>2;
+    rem[indexWord( 3, 0 )] = (uint32_t) sigA<<30;
+    x[indexWord( 3, 0 )] = (uint32_t) x64<<30;
+    sig32B = x64>>32;
+    x64 >>= 2;
+    x[indexWord( 3, 2 )] = x64>>32;
+    x[indexWord( 3, 1 )] = x64;
+    if ( expDiff < 1 ) {
+        if ( expDiff ) {
+            --expB;
+            softfloat_add96M( x, x, x );
+            q = 0;
+        } else {
+            q = (softfloat_compare96M( x, rem ) <= 0);
+            if ( q ) softfloat_sub96M( rem, x, rem );
+        }
+    } else {
+        recip32 = softfloat_approxRecip32_1( sig32B );
+        expDiff -= 30;
+        for (;;) {
+            x64 = (uint64_t) rem[indexWordHi( 3 )] * recip32;
+            if ( expDiff < 0 ) break;
+            q = (x64 + 0x80000000)>>32;
+            softfloat_remStep96MBy32( rem, 29, x, q, rem );
+            if ( rem[indexWordHi( 3 )] & 0x80000000 ) {
+                softfloat_add96M( rem, x, rem );
+            }
+            expDiff -= 29;
+        }
+        /*--------------------------------------------------------------------
+        | (`expDiff' cannot be less than -29 here.)
+        *--------------------------------------------------------------------*/
+        q = (uint32_t) (x64>>32)>>(~expDiff & 31);
+        softfloat_remStep96MBy32( rem, expDiff + 30, x, q, rem );
+        if ( rem[indexWordHi( 3 )] & 0x80000000 ) {
+            remPtr = rem;
+            altRemPtr = rem2;
+            softfloat_add96M( remPtr, x, altRemPtr );
+            goto selectRem;
+        }
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    remPtr = rem;
+    altRemPtr = rem2;
+    do {
+        ++q;
+        newRemPtr = altRemPtr;
+        softfloat_sub96M( remPtr, x, newRemPtr );
+        altRemPtr = remPtr;
+        remPtr = newRemPtr;
+    } while ( ! (remPtr[indexWordHi( 3 )] & 0x80000000) );
+ selectRem:
+    softfloat_add96M( remPtr, altRemPtr, x );
+    wordMeanRem = x[indexWordHi( 3 )];
+    if (
+        (wordMeanRem & 0x80000000)
+            || (! wordMeanRem && (q & 1) && ! x[indexWord( 3, 0 )]
+                    && ! x[indexWord( 3, 1 )])
+    ) {
+        remPtr = altRemPtr;
+    }
+    if ( remPtr[indexWordHi( 3 )] & 0x80000000 ) {
+        signRem = ! signRem;
+        softfloat_negX96M( remPtr );
+    }
+    softfloat_normRoundPackMToExtF80M( signRem, expB + 2, remPtr, 80, zSPtr );
+    return;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+ invalid:
+    softfloat_invalidExtF80M( zSPtr );
+    return;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+ copyA:
+    if ( expA < 1 ) {
+        sigA >>= 1 - expA;
+        expA = 0;
+    }
+    zSPtr->signExp = packToExtF80UI64( signRem, expA );
+    zSPtr->signif = sigA;
+
+}
+
+#endif
+
diff --git a/ext/softfloat/extF80M_roundToInt.c b/ext/softfloat/extF80M_roundToInt.c
new file mode 100644
index 0000000000..5c4f5f3731
--- /dev/null
+++ b/ext/softfloat/extF80M_roundToInt.c
@@ -0,0 +1,169 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014, 2017 The Regents of the University of
+California.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+#ifdef SOFTFLOAT_FAST_INT64
+
+void
+ extF80M_roundToInt(
+     const extFloat80_t *aPtr,
+     uint_fast8_t roundingMode,
+     bool exact,
+     extFloat80_t *zPtr
+ )
+{
+
+    *zPtr = extF80_roundToInt( *aPtr, roundingMode, exact );
+
+}
+
+#else
+
+void
+ extF80M_roundToInt(
+     const extFloat80_t *aPtr,
+     uint_fast8_t roundingMode,
+     bool exact,
+     extFloat80_t *zPtr
+ )
+{
+    const struct extFloat80M *aSPtr;
+    struct extFloat80M *zSPtr;
+    uint_fast16_t uiA64, signUI64;
+    int32_t exp;
+    uint64_t sigA;
+    uint_fast16_t uiZ64;
+    uint64_t sigZ, lastBitMask, roundBitsMask;
+
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    aSPtr = (const struct extFloat80M *) aPtr;
+    zSPtr = (struct extFloat80M *) zPtr;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    uiA64 = aSPtr->signExp;
+    signUI64 = uiA64 & packToExtF80UI64( 1, 0 );
+    exp = expExtF80UI64( uiA64 );
+    sigA = aSPtr->signif;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    if ( ! (sigA & UINT64_C( 0x8000000000000000 )) && (exp != 0x7FFF) ) {
+        if ( ! sigA ) {
+            uiZ64 = signUI64;
+            sigZ = 0;
+            goto uiZ;
+        }
+        exp += softfloat_normExtF80SigM( &sigA );
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    if ( exp <= 0x3FFE ) {
+        if ( exact ) softfloat_exceptionFlags |= softfloat_flag_inexact;
+        switch ( roundingMode ) {
+         case softfloat_round_near_even:
+            if ( ! (sigA & UINT64_C( 0x7FFFFFFFFFFFFFFF )) ) break;
+         case softfloat_round_near_maxMag:
+            if ( exp == 0x3FFE ) goto mag1;
+            break;
+         case softfloat_round_min:
+            if ( signUI64 ) goto mag1;
+            break;
+         case softfloat_round_max:
+            if ( ! signUI64 ) goto mag1;
+            break;
+        }
+        uiZ64 = signUI64;
+        sigZ = 0;
+        goto uiZ;
+     mag1:
+        uiZ64 = signUI64 | 0x3FFF;
+        sigZ = UINT64_C( 0x8000000000000000 );
+        goto uiZ;
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    if ( 0x403E <= exp ) {
+        if ( exp == 0x7FFF ) {
+            if ( sigA & UINT64_C( 0x7FFFFFFFFFFFFFFF ) ) {
+                softfloat_propagateNaNExtF80M( aSPtr, 0, zSPtr );
+                return;
+            }
+            sigZ = UINT64_C( 0x8000000000000000 );
+        } else {
+            sigZ = sigA;
+        }
+        uiZ64 = signUI64 | exp;
+        goto uiZ;
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    uiZ64 = signUI64 | exp;
+    lastBitMask = (uint64_t) 1<<(0x403E - exp);
+    roundBitsMask = lastBitMask - 1;
+    sigZ = sigA;
+    if ( roundingMode == softfloat_round_near_maxMag ) {
+        sigZ += lastBitMask>>1;
+    } else if ( roundingMode == softfloat_round_near_even ) {
+        sigZ += lastBitMask>>1;
+        if ( ! (sigZ & roundBitsMask) ) sigZ &= ~lastBitMask;
+    } else if (
+        roundingMode == (signUI64 ? softfloat_round_min : softfloat_round_max)
+    ) {
+        sigZ += roundBitsMask;
+    }
+    sigZ &= ~roundBitsMask;
+    if ( ! sigZ ) {
+        ++uiZ64;
+        sigZ = UINT64_C( 0x8000000000000000 );
+    }
+    if ( exact && (sigZ != sigA) ) {
+        softfloat_exceptionFlags |= softfloat_flag_inexact;
+    }
+ uiZ:
+    zSPtr->signExp = uiZ64;
+    zSPtr->signif = sigZ;
+    return;
+
+}
+
+#endif
+
diff --git a/ext/softfloat/extF80M_sqrt.c b/ext/softfloat/extF80M_sqrt.c
new file mode 100644
index 0000000000..be532cf2bb
--- /dev/null
+++ b/ext/softfloat/extF80M_sqrt.c
@@ -0,0 +1,180 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014, 2015, 2017 The Regents of the University of
+California.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+#ifdef SOFTFLOAT_FAST_INT64
+
+void extF80M_sqrt( const extFloat80_t *aPtr, extFloat80_t *zPtr )
+{
+
+    *zPtr = extF80_sqrt( *aPtr );
+
+}
+
+#else
+
+void extF80M_sqrt( const extFloat80_t *aPtr, extFloat80_t *zPtr )
+{
+    const struct extFloat80M *aSPtr;
+    struct extFloat80M *zSPtr;
+    uint_fast16_t uiA64, signUI64;
+    int32_t expA;
+    uint64_t rem64;
+    int32_t expZ;
+    uint32_t rem96[3], sig32A, recipSqrt32, sig32Z, q;
+    uint64_t sig64Z, x64;
+    uint32_t rem32, term[4], rem[4], extSigZ[3];
+
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    aSPtr = (const struct extFloat80M *) aPtr;
+    zSPtr = (struct extFloat80M *) zPtr;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    uiA64 = aSPtr->signExp;
+    signUI64 = uiA64 & packToExtF80UI64( 1, 0 );
+    expA = expExtF80UI64( uiA64 );
+    rem64 = aSPtr->signif;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    if ( expA == 0x7FFF ) {
+        if ( rem64 & UINT64_C( 0x7FFFFFFFFFFFFFFF ) ) {
+            softfloat_propagateNaNExtF80M( aSPtr, 0, zSPtr );
+            return;
+        }
+        if ( signUI64 ) goto invalid;
+        rem64 = UINT64_C( 0x8000000000000000 );
+        goto copyA;
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    if ( ! expA ) expA = 1;
+    if ( ! (rem64 & UINT64_C( 0x8000000000000000 )) ) {
+        if ( ! rem64 ) {
+            uiA64 = signUI64;
+            goto copyA;
+        }
+        expA += softfloat_normExtF80SigM( &rem64 );
+    }
+    if ( signUI64 ) goto invalid;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    expZ = ((expA - 0x3FFF)>>1) + 0x3FFF;
+    expA &= 1;
+    softfloat_shortShiftLeft64To96M( rem64, 30 - expA, rem96 );
+    sig32A = rem64>>32;
+    recipSqrt32 = softfloat_approxRecipSqrt32_1( expA, sig32A );
+    sig32Z = ((uint64_t) sig32A * recipSqrt32)>>32;
+    if ( expA ) sig32Z >>= 1;
+    rem64 =
+        ((uint64_t) rem96[indexWord( 3, 2 )]<<32 | rem96[indexWord( 3, 1 )])
+            - (uint64_t) sig32Z * sig32Z;
+    rem96[indexWord( 3, 2 )] = rem64>>32;
+    rem96[indexWord( 3, 1 )] = rem64;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    q = ((uint32_t) (rem64>>2) * (uint64_t) recipSqrt32)>>32;
+    sig64Z = ((uint64_t) sig32Z<<32) + ((uint64_t) q<<3);
+    term[indexWord( 3, 2 )] = 0;
+    /*------------------------------------------------------------------------
+    | (Repeating this loop is a rare occurrence.)
+    *------------------------------------------------------------------------*/
+    for (;;) {
+        x64 = ((uint64_t) sig32Z<<32) + sig64Z;
+        term[indexWord( 3, 1 )] = x64>>32;
+        term[indexWord( 3, 0 )] = x64;
+        softfloat_remStep96MBy32(
+            rem96, 29, term, q, &rem[indexMultiwordHi( 4, 3 )] );
+        rem32 = rem[indexWord( 4, 3 )];
+        if ( ! (rem32 & 0x80000000) ) break;
+        --q;
+        sig64Z -= 1<<3;
+    }
+    rem64 = (uint64_t) rem32<<32 | rem[indexWord( 4, 2 )];
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    q = (((uint32_t) (rem64>>2) * (uint64_t) recipSqrt32)>>32) + 2;
+    if ( rem64>>34 ) q += recipSqrt32;
+    x64 = (uint64_t) q<<7;
+    extSigZ[indexWord( 3, 0 )] = x64;
+    x64 = (sig64Z<<1) + (x64>>32);
+    extSigZ[indexWord( 3, 2 )] = x64>>32;
+    extSigZ[indexWord( 3, 1 )] = x64;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    if ( (q & 0xFFFFFF) <= 2 ) {
+        q &= ~(uint32_t) 0xFFFF;
+        extSigZ[indexWordLo( 3 )] = q<<7;
+        x64 = sig64Z + (q>>27);
+        term[indexWord( 4, 3 )] = 0;
+        term[indexWord( 4, 2 )] = x64>>32;
+        term[indexWord( 4, 1 )] = x64;
+        term[indexWord( 4, 0 )] = q<<5;
+        rem[indexWord( 4, 0 )] = 0;
+        softfloat_remStep128MBy32( rem, 28, term, q, rem );
+        q = rem[indexWordHi( 4 )];
+        if ( q & 0x80000000 ) {
+            softfloat_sub1X96M( extSigZ );
+        } else {
+            if ( q || rem[indexWord( 4, 1 )] || rem[indexWord( 4, 2 )] ) {
+                extSigZ[indexWordLo( 3 )] |= 1;
+            }
+        }
+    }
+    softfloat_roundPackMToExtF80M(
+        0, expZ, extSigZ, extF80_roundingPrecision, zSPtr );
+    return;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+ invalid:
+    softfloat_invalidExtF80M( zSPtr );
+    return;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+ copyA:
+    zSPtr->signExp = uiA64;
+    zSPtr->signif  = rem64;
+
+}
+
+#endif
+
diff --git a/ext/softfloat/extF80M_sub.c b/ext/softfloat/extF80M_sub.c
new file mode 100644
index 0000000000..2f01d7e493
--- /dev/null
+++ b/ext/softfloat/extF80M_sub.c
@@ -0,0 +1,100 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include "platform.h"
+#include "internals.h"
+#include "softfloat.h"
+
+#ifdef SOFTFLOAT_FAST_INT64
+
+void
+ extF80M_sub(
+     const extFloat80_t *aPtr, const extFloat80_t *bPtr, extFloat80_t *zPtr )
+{
+    const struct extFloat80M *aSPtr, *bSPtr;
+    uint_fast16_t uiA64;
+    uint_fast64_t uiA0;
+    bool signA;
+    uint_fast16_t uiB64;
+    uint_fast64_t uiB0;
+    bool signB;
+#if ! defined INLINE_LEVEL || (INLINE_LEVEL < 2)
+    extFloat80_t
+        (*magsFuncPtr)(
+            uint_fast16_t, uint_fast64_t, uint_fast16_t, uint_fast64_t, bool );
+#endif
+
+    aSPtr = (const struct extFloat80M *) aPtr;
+    bSPtr = (const struct extFloat80M *) bPtr;
+    uiA64 = aSPtr->signExp;
+    uiA0  = aSPtr->signif;
+    signA = signExtF80UI64( uiA64 );
+    uiB64 = bSPtr->signExp;
+    uiB0  = bSPtr->signif;
+    signB = signExtF80UI64( uiB64 );
+#if defined INLINE_LEVEL && (2 <= INLINE_LEVEL)
+    if ( signA == signB ) {
+        *zPtr = softfloat_subMagsExtF80( uiA64, uiA0, uiB64, uiB0, signA );
+    } else {
+        *zPtr = softfloat_addMagsExtF80( uiA64, uiA0, uiB64, uiB0, signA );
+    }
+#else
+    magsFuncPtr =
+        (signA == signB) ? softfloat_subMagsExtF80 : softfloat_addMagsExtF80;
+    *zPtr = (*magsFuncPtr)( uiA64, uiA0, uiB64, uiB0, signA );
+#endif
+
+}
+
+#else
+
+void
+ extF80M_sub(
+     const extFloat80_t *aPtr, const extFloat80_t *bPtr, extFloat80_t *zPtr )
+{
+
+    softfloat_addExtF80M(
+        (const struct extFloat80M *) aPtr,
+        (const struct extFloat80M *) bPtr,
+        (struct extFloat80M *) zPtr,
+        true
+    );
+
+}
+
+#endif
+
diff --git a/ext/softfloat/extF80M_to_f128M.c b/ext/softfloat/extF80M_to_f128M.c
new file mode 100644
index 0000000000..15c6d26b53
--- /dev/null
+++ b/ext/softfloat/extF80M_to_f128M.c
@@ -0,0 +1,125 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+#ifdef SOFTFLOAT_FAST_INT64
+
+void extF80M_to_f128M( const extFloat80_t *aPtr, float128_t *zPtr )
+{
+
+    *zPtr = extF80_to_f128( *aPtr );
+
+}
+
+#else
+
+void extF80M_to_f128M( const extFloat80_t *aPtr, float128_t *zPtr )
+{
+    const struct extFloat80M *aSPtr;
+    uint32_t *zWPtr;
+    uint_fast16_t uiA64;
+    bool sign;
+    int32_t exp;
+    uint64_t sig;
+    struct commonNaN commonNaN;
+    uint32_t uiZ96;
+
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    aSPtr = (const struct extFloat80M *) aPtr;
+    zWPtr = (uint32_t *) zPtr;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    uiA64 = aSPtr->signExp;
+    sign = signExtF80UI64( uiA64 );
+    exp  = expExtF80UI64( uiA64 );
+    sig = aSPtr->signif;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    zWPtr[indexWord( 4, 0 )] = 0;
+    if ( exp == 0x7FFF ) {
+        if ( sig & UINT64_C( 0x7FFFFFFFFFFFFFFF ) ) {
+            softfloat_extF80MToCommonNaN( aSPtr, &commonNaN );
+            softfloat_commonNaNToF128M( &commonNaN, zWPtr );
+            return;
+        }
+        uiZ96 = packToF128UI96( sign, 0x7FFF, 0 );
+        goto uiZ;
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    if ( exp ) --exp;
+    if ( ! (sig & UINT64_C( 0x8000000000000000 )) ) {
+        if ( ! sig ) {
+            uiZ96 = packToF128UI96( sign, 0, 0 );
+            goto uiZ;
+        }
+        exp += softfloat_normExtF80SigM( &sig );
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    zWPtr[indexWord( 4, 1 )] = (uint32_t) sig<<17;
+    sig >>= 15;
+    zWPtr[indexWord( 4, 2 )] = sig;
+    if ( exp < 0 ) {
+        zWPtr[indexWordHi( 4 )] = sig>>32;
+        softfloat_shiftRight96M(
+            &zWPtr[indexMultiwordHi( 4, 3 )],
+            -exp,
+            &zWPtr[indexMultiwordHi( 4, 3 )]
+        );
+        exp = 0;
+        sig = (uint64_t) zWPtr[indexWordHi( 4 )]<<32;
+    }
+    zWPtr[indexWordHi( 4 )] = packToF128UI96( sign, exp, sig>>32 );
+    return;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+ uiZ:
+    zWPtr[indexWord( 4, 3 )] = uiZ96;
+    zWPtr[indexWord( 4, 2 )] = 0;
+    zWPtr[indexWord( 4, 1 )] = 0;
+
+}
+
+#endif
+
diff --git a/ext/softfloat/extF80M_to_f16.c b/ext/softfloat/extF80M_to_f16.c
new file mode 100644
index 0000000000..9bf2eb884c
--- /dev/null
+++ b/ext/softfloat/extF80M_to_f16.c
@@ -0,0 +1,112 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014, 2015 The Regents of the University of
+California.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+#ifdef SOFTFLOAT_FAST_INT64
+
+float16_t extF80M_to_f16( const extFloat80_t *aPtr )
+{
+
+    return extF80_to_f16( *aPtr );
+
+}
+
+#else
+
+float16_t extF80M_to_f16( const extFloat80_t *aPtr )
+{
+    const struct extFloat80M *aSPtr;
+    uint_fast16_t uiA64;
+    bool sign;
+    int32_t exp;
+    uint64_t sig;
+    struct commonNaN commonNaN;
+    uint16_t uiZ, sig16;
+    union ui16_f16 uZ;
+
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    aSPtr = (const struct extFloat80M *) aPtr;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    uiA64 = aSPtr->signExp;
+    sign = signExtF80UI64( uiA64 );
+    exp  = expExtF80UI64( uiA64 );
+    sig = aSPtr->signif;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    if ( exp == 0x7FFF ) {
+        if ( sig & UINT64_C( 0x7FFFFFFFFFFFFFFF ) ) {
+            softfloat_extF80MToCommonNaN( aSPtr, &commonNaN );
+            uiZ = softfloat_commonNaNToF16UI( &commonNaN );
+        } else {
+            uiZ = packToF16UI( sign, 0x1F, 0 );
+        }
+        goto uiZ;
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    if ( ! (sig & UINT64_C( 0x8000000000000000 )) ) {
+        if ( ! sig ) {
+            uiZ = packToF16UI( sign, 0, 0 );
+            goto uiZ;
+        }
+        exp += softfloat_normExtF80SigM( &sig );
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    sig16 = softfloat_shortShiftRightJam64( sig, 49 );
+    exp -= 0x3FF1;
+    if ( sizeof (int_fast16_t) < sizeof (int32_t) ) {
+        if ( exp < -0x40 ) exp = -0x40;
+    }
+    return softfloat_roundPackToF16( sign, exp, sig16 );
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+ uiZ:
+    uZ.ui = uiZ;
+    return uZ.f;
+
+}
+
+#endif
+
diff --git a/ext/softfloat/extF80M_to_f32.c b/ext/softfloat/extF80M_to_f32.c
new file mode 100644
index 0000000000..89aa277302
--- /dev/null
+++ b/ext/softfloat/extF80M_to_f32.c
@@ -0,0 +1,112 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014, 2015 The Regents of the University of
+California.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+#ifdef SOFTFLOAT_FAST_INT64
+
+float32_t extF80M_to_f32( const extFloat80_t *aPtr )
+{
+
+    return extF80_to_f32( *aPtr );
+
+}
+
+#else
+
+float32_t extF80M_to_f32( const extFloat80_t *aPtr )
+{
+    const struct extFloat80M *aSPtr;
+    uint_fast16_t uiA64;
+    bool sign;
+    int32_t exp;
+    uint64_t sig;
+    struct commonNaN commonNaN;
+    uint32_t uiZ, sig32;
+    union ui32_f32 uZ;
+
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    aSPtr = (const struct extFloat80M *) aPtr;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    uiA64 = aSPtr->signExp;
+    sign = signExtF80UI64( uiA64 );
+    exp  = expExtF80UI64( uiA64 );
+    sig = aSPtr->signif;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    if ( exp == 0x7FFF ) {
+        if ( sig & UINT64_C( 0x7FFFFFFFFFFFFFFF ) ) {
+            softfloat_extF80MToCommonNaN( aSPtr, &commonNaN );
+            uiZ = softfloat_commonNaNToF32UI( &commonNaN );
+        } else {
+            uiZ = packToF32UI( sign, 0xFF, 0 );
+        }
+        goto uiZ;
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    if ( ! (sig & UINT64_C( 0x8000000000000000 )) ) {
+        if ( ! sig ) {
+            uiZ = packToF32UI( sign, 0, 0 );
+            goto uiZ;
+        }
+        exp += softfloat_normExtF80SigM( &sig );
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    sig32 = softfloat_shortShiftRightJam64( sig, 33 );
+    exp -= 0x3F81;
+    if ( sizeof (int_fast16_t) < sizeof (int32_t) ) {
+        if ( exp < -0x1000 ) exp = -0x1000;
+    }
+    return softfloat_roundPackToF32( sign, exp, sig32 );
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+ uiZ:
+    uZ.ui = uiZ;
+    return uZ.f;
+
+}
+
+#endif
+
diff --git a/ext/softfloat/extF80M_to_f64.c b/ext/softfloat/extF80M_to_f64.c
new file mode 100644
index 0000000000..d12e3ed024
--- /dev/null
+++ b/ext/softfloat/extF80M_to_f64.c
@@ -0,0 +1,112 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014, 2015 The Regents of the University of
+California.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+#ifdef SOFTFLOAT_FAST_INT64
+
+float64_t extF80M_to_f64( const extFloat80_t *aPtr )
+{
+
+    return extF80_to_f64( *aPtr );
+
+}
+
+#else
+
+float64_t extF80M_to_f64( const extFloat80_t *aPtr )
+{
+    const struct extFloat80M *aSPtr;
+    uint_fast16_t uiA64;
+    bool sign;
+    int32_t exp;
+    uint64_t sig;
+    struct commonNaN commonNaN;
+    uint64_t uiZ;
+    union ui64_f64 uZ;
+
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    aSPtr = (const struct extFloat80M *) aPtr;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    uiA64 = aSPtr->signExp;
+    sign = signExtF80UI64( uiA64 );
+    exp  = expExtF80UI64( uiA64 );
+    sig = aSPtr->signif;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    if ( exp == 0x7FFF ) {
+        if ( sig & UINT64_C( 0x7FFFFFFFFFFFFFFF ) ) {
+            softfloat_extF80MToCommonNaN( aSPtr, &commonNaN );
+            uiZ = softfloat_commonNaNToF64UI( &commonNaN );
+        } else {
+            uiZ = packToF64UI( sign, 0x7FF, 0 );
+        }
+        goto uiZ;
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    if ( ! (sig & UINT64_C( 0x8000000000000000 )) ) {
+        if ( ! sig ) {
+            uiZ = packToF64UI( sign, 0, 0 );
+            goto uiZ;
+        }
+        exp += softfloat_normExtF80SigM( &sig );
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    sig = softfloat_shortShiftRightJam64( sig, 1 );
+    exp -= 0x3C01;
+    if ( sizeof (int_fast16_t) < sizeof (int32_t) ) {
+        if ( exp < -0x1000 ) exp = -0x1000;
+    }
+    return softfloat_roundPackToF64( sign, exp, sig );
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+ uiZ:
+    uZ.ui = uiZ;
+    return uZ.f;
+
+}
+
+#endif
+
diff --git a/ext/softfloat/extF80M_to_i32.c b/ext/softfloat/extF80M_to_i32.c
new file mode 100644
index 0000000000..d29a8673d9
--- /dev/null
+++ b/ext/softfloat/extF80M_to_i32.c
@@ -0,0 +1,100 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the
+University of California.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+#ifdef SOFTFLOAT_FAST_INT64
+
+int_fast32_t
+ extF80M_to_i32(
+     const extFloat80_t *aPtr, uint_fast8_t roundingMode, bool exact )
+{
+
+    return extF80_to_i32( *aPtr, roundingMode, exact );
+
+}
+
+#else
+
+int_fast32_t
+ extF80M_to_i32(
+     const extFloat80_t *aPtr, uint_fast8_t roundingMode, bool exact )
+{
+    const struct extFloat80M *aSPtr;
+    uint_fast16_t uiA64;
+    bool sign;
+    int32_t exp;
+    uint64_t sig;
+    int32_t shiftDist;
+
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    aSPtr = (const struct extFloat80M *) aPtr;
+    uiA64 = aSPtr->signExp;
+    sign = signExtF80UI64( uiA64 );
+    exp  = expExtF80UI64( uiA64 );
+    sig = aSPtr->signif;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    shiftDist = 0x4032 - exp;
+    if ( shiftDist <= 0 ) {
+        if ( sig>>32 ) goto invalid;
+        if ( -32 < shiftDist ) {
+            sig <<= -shiftDist;
+        } else {
+            if ( (uint32_t) sig ) goto invalid;
+        }
+    } else {
+        sig = softfloat_shiftRightJam64( sig, shiftDist );
+    }
+    return softfloat_roundToI32( sign, sig, roundingMode, exact );
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+ invalid:
+    softfloat_raiseFlags( softfloat_flag_invalid );
+    return
+        (exp == 0x7FFF) && (sig & UINT64_C( 0x7FFFFFFFFFFFFFFF )) ? i32_fromNaN
+            : sign ? i32_fromNegOverflow : i32_fromPosOverflow;
+
+}
+
+#endif
+
diff --git a/ext/softfloat/extF80M_to_i32_r_minMag.c b/ext/softfloat/extF80M_to_i32_r_minMag.c
new file mode 100644
index 0000000000..daf250598f
--- /dev/null
+++ b/ext/softfloat/extF80M_to_i32_r_minMag.c
@@ -0,0 +1,120 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of
+California.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+#ifdef SOFTFLOAT_FAST_INT64
+
+int_fast32_t extF80M_to_i32_r_minMag( const extFloat80_t *aPtr, bool exact )
+{
+
+    return extF80_to_i32_r_minMag( *aPtr, exact );
+
+}
+
+#else
+
+int_fast32_t extF80M_to_i32_r_minMag( const extFloat80_t *aPtr, bool exact )
+{
+    const struct extFloat80M *aSPtr;
+    uint_fast16_t uiA64;
+    int32_t exp;
+    uint64_t sig;
+    int32_t shiftDist;
+    bool sign, raiseInexact;
+    int32_t z;
+    uint64_t shiftedSig;
+    uint32_t absZ;
+    union { uint32_t ui; int32_t i; } u;
+
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    aSPtr = (const struct extFloat80M *) aPtr;
+    uiA64 = aSPtr->signExp;
+    exp = expExtF80UI64( uiA64 );
+    sig = aSPtr->signif;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    if ( ! sig && (exp != 0x7FFF) ) return 0;
+    shiftDist = 0x403E - exp;
+    if ( 64 <= shiftDist ) {
+        raiseInexact = exact;
+        z = 0;
+    } else {
+        sign = signExtF80UI64( uiA64 );
+        raiseInexact = false;
+        if ( shiftDist < 0 ) {
+            if ( sig>>32 || (shiftDist <= -31) ) goto invalid;
+            shiftedSig = (uint64_t) (uint32_t) sig<<-shiftDist;
+            if ( shiftedSig>>32 ) goto invalid;
+            absZ = shiftedSig;
+        } else {
+            shiftedSig = sig;
+            if ( shiftDist ) shiftedSig >>= shiftDist;
+            if ( shiftedSig>>32 ) goto invalid;
+            absZ = shiftedSig;
+            if ( exact && shiftDist ) {
+                raiseInexact = ((uint64_t) absZ<<shiftDist != sig);
+            }
+        }
+        if ( sign ) {
+            if ( 0x80000000 < absZ ) goto invalid;
+            u.ui = -absZ;
+            z = u.i;
+        } else {
+            if ( 0x80000000 <= absZ ) goto invalid;
+            z = absZ;
+        }
+    }
+    if ( raiseInexact ) softfloat_exceptionFlags |= softfloat_flag_inexact;
+    return z;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+ invalid:
+    softfloat_raiseFlags( softfloat_flag_invalid );
+    return
+        (exp == 0x7FFF) && (sig & UINT64_C( 0x7FFFFFFFFFFFFFFF )) ? i32_fromNaN
+            : sign ? i32_fromNegOverflow : i32_fromPosOverflow;
+
+}
+
+#endif
+
diff --git a/ext/softfloat/extF80M_to_i64.c b/ext/softfloat/extF80M_to_i64.c
new file mode 100644
index 0000000000..515add3327
--- /dev/null
+++ b/ext/softfloat/extF80M_to_i64.c
@@ -0,0 +1,97 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the
+University of California.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+#ifdef SOFTFLOAT_FAST_INT64
+
+int_fast64_t
+ extF80M_to_i64(
+     const extFloat80_t *aPtr, uint_fast8_t roundingMode, bool exact )
+{
+
+    return extF80_to_i64( *aPtr, roundingMode, exact );
+
+}
+
+#else
+
+int_fast64_t
+ extF80M_to_i64(
+     const extFloat80_t *aPtr, uint_fast8_t roundingMode, bool exact )
+{
+    const struct extFloat80M *aSPtr;
+    uint_fast16_t uiA64;
+    bool sign;
+    int32_t exp;
+    uint64_t sig;
+    int32_t shiftDist;
+    uint32_t extSig[3];
+
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    aSPtr = (const struct extFloat80M *) aPtr;
+    uiA64 = aSPtr->signExp;
+    sign = signExtF80UI64( uiA64 );
+    exp  = expExtF80UI64( uiA64 );
+    sig = aSPtr->signif;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    shiftDist = 0x403E - exp;
+    if ( shiftDist < 0 ) {
+        softfloat_raiseFlags( softfloat_flag_invalid );
+        return
+            (exp == 0x7FFF) && (sig & UINT64_C( 0x7FFFFFFFFFFFFFFF ))
+                ? i64_fromNaN
+                : sign ? i64_fromNegOverflow : i64_fromPosOverflow;
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    extSig[indexWord( 3, 2 )] = sig>>32;
+    extSig[indexWord( 3, 1 )] = sig;
+    extSig[indexWord( 3, 0 )] = 0;
+    if ( shiftDist ) softfloat_shiftRightJam96M( extSig, shiftDist, extSig );
+    return softfloat_roundMToI64( sign, extSig, roundingMode, exact );
+
+}
+
+#endif
+
diff --git a/ext/softfloat/extF80M_to_i64_r_minMag.c b/ext/softfloat/extF80M_to_i64_r_minMag.c
new file mode 100644
index 0000000000..bab6e1f610
--- /dev/null
+++ b/ext/softfloat/extF80M_to_i64_r_minMag.c
@@ -0,0 +1,115 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of
+California.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+#ifdef SOFTFLOAT_FAST_INT64
+
+int_fast64_t extF80M_to_i64_r_minMag( const extFloat80_t *aPtr, bool exact )
+{
+
+    return extF80_to_i64_r_minMag( *aPtr, exact );
+
+}
+
+#else
+
+int_fast64_t extF80M_to_i64_r_minMag( const extFloat80_t *aPtr, bool exact )
+{
+    const struct extFloat80M *aSPtr;
+    uint_fast16_t uiA64;
+    int32_t exp;
+    uint64_t sig;
+    int32_t shiftDist;
+    bool sign, raiseInexact;
+    int64_t z;
+    uint64_t absZ;
+    union { uint64_t ui; int64_t i; } u;
+
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    aSPtr = (const struct extFloat80M *) aPtr;
+    uiA64 = aSPtr->signExp;
+    exp = expExtF80UI64( uiA64 );
+    sig = aSPtr->signif;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    if ( ! sig && (exp != 0x7FFF) ) return 0;
+    shiftDist = 0x403E - exp;
+    if ( 64 <= shiftDist ) {
+        raiseInexact = exact;
+        z = 0;
+    } else {
+        sign = signExtF80UI64( uiA64 );
+        raiseInexact = false;
+        if ( shiftDist < 0 ) {
+            if ( shiftDist <= -63 ) goto invalid;
+            shiftDist = -shiftDist;
+            absZ = sig<<shiftDist;
+            if ( absZ>>shiftDist != sig ) goto invalid;
+        } else {
+            absZ = sig;
+            if ( shiftDist ) absZ >>= shiftDist;
+            if ( exact && shiftDist ) raiseInexact = (absZ<<shiftDist != sig);
+        }
+        if ( sign ) {
+            if ( UINT64_C( 0x8000000000000000 ) < absZ ) goto invalid;
+            u.ui = -absZ;
+            z = u.i;
+        } else {
+            if ( UINT64_C( 0x8000000000000000 ) <= absZ ) goto invalid;
+            z = absZ;
+        }
+    }
+    if ( raiseInexact ) softfloat_exceptionFlags |= softfloat_flag_inexact;
+    return z;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+ invalid:
+    softfloat_raiseFlags( softfloat_flag_invalid );
+    return
+        (exp == 0x7FFF) && (sig & UINT64_C( 0x7FFFFFFFFFFFFFFF )) ? i64_fromNaN
+            : sign ? i64_fromNegOverflow : i64_fromPosOverflow;
+
+}
+
+#endif
+
diff --git a/ext/softfloat/extF80M_to_ui32.c b/ext/softfloat/extF80M_to_ui32.c
new file mode 100644
index 0000000000..9e029b80e3
--- /dev/null
+++ b/ext/softfloat/extF80M_to_ui32.c
@@ -0,0 +1,101 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the
+University of California.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+#ifdef SOFTFLOAT_FAST_INT64
+
+uint_fast32_t
+ extF80M_to_ui32(
+     const extFloat80_t *aPtr, uint_fast8_t roundingMode, bool exact )
+{
+
+    return extF80_to_ui32( *aPtr, roundingMode, exact );
+
+}
+
+#else
+
+uint_fast32_t
+ extF80M_to_ui32(
+     const extFloat80_t *aPtr, uint_fast8_t roundingMode, bool exact )
+{
+    const struct extFloat80M *aSPtr;
+    uint_fast16_t uiA64;
+    bool sign;
+    int32_t exp;
+    uint64_t sig;
+    int32_t shiftDist;
+
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    aSPtr = (const struct extFloat80M *) aPtr;
+    uiA64 = aSPtr->signExp;
+    sign = signExtF80UI64( uiA64 );
+    exp  = expExtF80UI64( uiA64 );
+    sig = aSPtr->signif;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    shiftDist = 0x4032 - exp;
+    if ( shiftDist <= 0 ) {
+        if ( sig>>32 ) goto invalid;
+        if ( -32 < shiftDist ) {
+            sig <<= -shiftDist;
+        } else {
+            if ( (uint32_t) sig ) goto invalid;
+        }
+    } else {
+        sig = softfloat_shiftRightJam64( sig, shiftDist );
+    }
+    return softfloat_roundToUI32( sign, sig, roundingMode, exact );
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+ invalid:
+    softfloat_raiseFlags( softfloat_flag_invalid );
+    return
+        (exp == 0x7FFF) && (sig & UINT64_C( 0x7FFFFFFFFFFFFFFF ))
+            ? ui32_fromNaN
+            : sign ? ui32_fromNegOverflow : ui32_fromPosOverflow;
+
+}
+
+#endif
+
diff --git a/ext/softfloat/extF80M_to_ui32_r_minMag.c b/ext/softfloat/extF80M_to_ui32_r_minMag.c
new file mode 100644
index 0000000000..c4974ade3c
--- /dev/null
+++ b/ext/softfloat/extF80M_to_ui32_r_minMag.c
@@ -0,0 +1,111 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of
+California.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+#ifdef SOFTFLOAT_FAST_INT64
+
+uint_fast32_t extF80M_to_ui32_r_minMag( const extFloat80_t *aPtr, bool exact )
+{
+
+    return extF80_to_ui32_r_minMag( *aPtr, exact );
+
+}
+
+#else
+
+uint_fast32_t extF80M_to_ui32_r_minMag( const extFloat80_t *aPtr, bool exact )
+{
+    const struct extFloat80M *aSPtr;
+    uint_fast16_t uiA64;
+    int32_t exp;
+    uint64_t sig;
+    int32_t shiftDist;
+    bool sign;
+    uint64_t shiftedSig;
+    uint32_t z;
+
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    aSPtr = (const struct extFloat80M *) aPtr;
+    uiA64 = aSPtr->signExp;
+    exp = expExtF80UI64( uiA64 );
+    sig = aSPtr->signif;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    if ( ! sig && (exp != 0x7FFF) ) return 0;
+    shiftDist = 0x403E - exp;
+    if ( 64 <= shiftDist ) {
+        if ( exact ) softfloat_exceptionFlags |= softfloat_flag_inexact;
+        return 0;
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    sign = signExtF80UI64( uiA64 );
+    if ( shiftDist < 0 ) {
+        if ( sign || sig>>32 || (shiftDist <= -31) ) goto invalid;
+        shiftedSig = (uint64_t) (uint32_t) sig<<-shiftDist;
+        if ( shiftedSig>>32 ) goto invalid;
+        z = shiftedSig;
+    } else {
+        shiftedSig = sig;
+        if ( shiftDist ) shiftedSig >>= shiftDist;
+        if ( shiftedSig>>32 ) goto invalid;
+        z = shiftedSig;
+        if ( sign && z ) goto invalid;
+        if ( exact && shiftDist && ((uint64_t) z<<shiftDist != sig) ) {
+            softfloat_exceptionFlags |= softfloat_flag_inexact;
+        }
+    }
+    return z;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+ invalid:
+    softfloat_raiseFlags( softfloat_flag_invalid );
+    return
+        (exp == 0x7FFF) && (sig & UINT64_C( 0x7FFFFFFFFFFFFFFF ))
+            ? ui32_fromNaN
+            : sign ? ui32_fromNegOverflow : ui32_fromPosOverflow;
+
+}
+
+#endif
+
diff --git a/ext/softfloat/extF80M_to_ui64.c b/ext/softfloat/extF80M_to_ui64.c
new file mode 100644
index 0000000000..e1767fdd01
--- /dev/null
+++ b/ext/softfloat/extF80M_to_ui64.c
@@ -0,0 +1,97 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the
+University of California.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+#ifdef SOFTFLOAT_FAST_INT64
+
+uint_fast64_t
+ extF80M_to_ui64(
+     const extFloat80_t *aPtr, uint_fast8_t roundingMode, bool exact )
+{
+
+    return extF80_to_ui64( *aPtr, roundingMode, exact );
+
+}
+
+#else
+
+uint_fast64_t
+ extF80M_to_ui64(
+     const extFloat80_t *aPtr, uint_fast8_t roundingMode, bool exact )
+{
+    const struct extFloat80M *aSPtr;
+    uint_fast16_t uiA64;
+    bool sign;
+    int32_t exp;
+    uint64_t sig;
+    int32_t shiftDist;
+    uint32_t extSig[3];
+
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    aSPtr = (const struct extFloat80M *) aPtr;
+    uiA64 = aSPtr->signExp;
+    sign = signExtF80UI64( uiA64 );
+    exp  = expExtF80UI64( uiA64 );
+    sig = aSPtr->signif;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    shiftDist = 0x403E - exp;
+    if ( shiftDist < 0 ) {
+        softfloat_raiseFlags( softfloat_flag_invalid );
+        return
+            (exp == 0x7FFF) && (sig & UINT64_C( 0x7FFFFFFFFFFFFFFF ))
+                ? ui64_fromNaN
+                : sign ? ui64_fromNegOverflow : ui64_fromPosOverflow;
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    extSig[indexWord( 3, 2 )] = sig>>32;
+    extSig[indexWord( 3, 1 )] = sig;
+    extSig[indexWord( 3, 0 )] = 0;
+    if ( shiftDist ) softfloat_shiftRightJam96M( extSig, shiftDist, extSig );
+    return softfloat_roundMToUI64( sign, extSig, roundingMode, exact );
+
+}
+
+#endif
+
diff --git a/ext/softfloat/extF80M_to_ui64_r_minMag.c b/ext/softfloat/extF80M_to_ui64_r_minMag.c
new file mode 100644
index 0000000000..4bd1c123d4
--- /dev/null
+++ b/ext/softfloat/extF80M_to_ui64_r_minMag.c
@@ -0,0 +1,108 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of
+California.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+#ifdef SOFTFLOAT_FAST_INT64
+
+uint_fast64_t extF80M_to_ui64_r_minMag( const extFloat80_t *aPtr, bool exact )
+{
+
+    return extF80_to_ui64_r_minMag( *aPtr, exact );
+
+}
+
+#else
+
+uint_fast64_t extF80M_to_ui64_r_minMag( const extFloat80_t *aPtr, bool exact )
+{
+    const struct extFloat80M *aSPtr;
+    uint_fast16_t uiA64;
+    int32_t exp;
+    uint64_t sig;
+    int32_t shiftDist;
+    bool sign;
+    uint64_t z;
+
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    aSPtr = (const struct extFloat80M *) aPtr;
+    uiA64 = aSPtr->signExp;
+    exp = expExtF80UI64( uiA64 );
+    sig = aSPtr->signif;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    if ( ! sig && (exp != 0x7FFF) ) return 0;
+    shiftDist = 0x403E - exp;
+    if ( 64 <= shiftDist ) {
+        if ( exact ) softfloat_exceptionFlags |= softfloat_flag_inexact;
+        return 0;
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    sign = signExtF80UI64( uiA64 );
+    if ( shiftDist < 0 ) {
+        if ( sign || (shiftDist <= -63) ) goto invalid;
+        shiftDist = -shiftDist;
+        z = sig<<shiftDist;
+        if ( z>>shiftDist != sig ) goto invalid;
+    } else {
+        z = sig;
+        if ( shiftDist ) z >>= shiftDist;
+        if ( sign && z ) goto invalid;
+        if ( exact && shiftDist && (z<<shiftDist != sig) ) {
+            softfloat_exceptionFlags |= softfloat_flag_inexact;
+        }
+    }
+    return z;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+ invalid:
+    softfloat_raiseFlags( softfloat_flag_invalid );
+    return
+        (exp == 0x7FFF) && (sig & UINT64_C( 0x7FFFFFFFFFFFFFFF ))
+            ? ui64_fromNaN
+            : sign ? ui64_fromNegOverflow : ui64_fromPosOverflow;
+
+}
+
+#endif
+
diff --git a/ext/softfloat/extF80_add.c b/ext/softfloat/extF80_add.c
new file mode 100644
index 0000000000..ad4ef67ea5
--- /dev/null
+++ b/ext/softfloat/extF80_add.c
@@ -0,0 +1,80 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "softfloat.h"
+
+extFloat80_t extF80_add( extFloat80_t a, extFloat80_t b )
+{
+    union { struct extFloat80M s; extFloat80_t f; } uA;
+    uint_fast16_t uiA64;
+    uint_fast64_t uiA0;
+    bool signA;
+    union { struct extFloat80M s; extFloat80_t f; } uB;
+    uint_fast16_t uiB64;
+    uint_fast64_t uiB0;
+    bool signB;
+#if ! defined INLINE_LEVEL || (INLINE_LEVEL < 2)
+    extFloat80_t
+        (*magsFuncPtr)(
+            uint_fast16_t, uint_fast64_t, uint_fast16_t, uint_fast64_t, bool );
+#endif
+
+    uA.f = a;
+    uiA64 = uA.s.signExp;
+    uiA0  = uA.s.signif;
+    signA = signExtF80UI64( uiA64 );
+    uB.f = b;
+    uiB64 = uB.s.signExp;
+    uiB0  = uB.s.signif;
+    signB = signExtF80UI64( uiB64 );
+#if defined INLINE_LEVEL && (2 <= INLINE_LEVEL)
+    if ( signA == signB ) {
+        return softfloat_addMagsExtF80( uiA64, uiA0, uiB64, uiB0, signA );
+    } else {
+        return softfloat_subMagsExtF80( uiA64, uiA0, uiB64, uiB0, signA );
+    }
+#else
+    magsFuncPtr =
+        (signA == signB) ? softfloat_addMagsExtF80 : softfloat_subMagsExtF80;
+    return (*magsFuncPtr)( uiA64, uiA0, uiB64, uiB0, signA );
+#endif
+
+}
+
diff --git a/ext/softfloat/extF80_div.c b/ext/softfloat/extF80_div.c
new file mode 100644
index 0000000000..e9ddfa3abf
--- /dev/null
+++ b/ext/softfloat/extF80_div.c
@@ -0,0 +1,203 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+extFloat80_t extF80_div( extFloat80_t a, extFloat80_t b )
+{
+    union { struct extFloat80M s; extFloat80_t f; } uA;
+    uint_fast16_t uiA64;
+    uint_fast64_t uiA0;
+    bool signA;
+    int_fast32_t expA;
+    uint_fast64_t sigA;
+    union { struct extFloat80M s; extFloat80_t f; } uB;
+    uint_fast16_t uiB64;
+    uint_fast64_t uiB0;
+    bool signB;
+    int_fast32_t expB;
+    uint_fast64_t sigB;
+    bool signZ;
+    struct exp32_sig64 normExpSig;
+    int_fast32_t expZ;
+    struct uint128 rem;
+    uint_fast32_t recip32;
+    uint_fast64_t sigZ;
+    int ix;
+    uint_fast64_t q64;
+    uint_fast32_t q;
+    struct uint128 term;
+    uint_fast64_t sigZExtra;
+    struct uint128 uiZ;
+    uint_fast16_t uiZ64;
+    uint_fast64_t uiZ0;
+    union { struct extFloat80M s; extFloat80_t f; } uZ;
+
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    uA.f = a;
+    uiA64 = uA.s.signExp;
+    uiA0  = uA.s.signif;
+    signA = signExtF80UI64( uiA64 );
+    expA  = expExtF80UI64( uiA64 );
+    sigA  = uiA0;
+    uB.f = b;
+    uiB64 = uB.s.signExp;
+    uiB0  = uB.s.signif;
+    signB = signExtF80UI64( uiB64 );
+    expB  = expExtF80UI64( uiB64 );
+    sigB  = uiB0;
+    signZ = signA ^ signB;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    if ( expA == 0x7FFF ) {
+        if ( sigA & UINT64_C( 0x7FFFFFFFFFFFFFFF ) ) goto propagateNaN;
+        if ( expB == 0x7FFF ) {
+            if ( sigB & UINT64_C( 0x7FFFFFFFFFFFFFFF ) ) goto propagateNaN;
+            goto invalid;
+        }
+        goto infinity;
+    }
+    if ( expB == 0x7FFF ) {
+        if ( sigB & UINT64_C( 0x7FFFFFFFFFFFFFFF ) ) goto propagateNaN;
+        goto zero;
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    if ( ! expB ) expB = 1;
+    if ( ! (sigB & UINT64_C( 0x8000000000000000 )) ) {
+        if ( ! sigB ) {
+            if ( ! sigA ) goto invalid;
+            softfloat_raiseFlags( softfloat_flag_infinite );
+            goto infinity;
+        }
+        normExpSig = softfloat_normSubnormalExtF80Sig( sigB );
+        expB += normExpSig.exp;
+        sigB = normExpSig.sig;
+    }
+    if ( ! expA ) expA = 1;
+    if ( ! (sigA & UINT64_C( 0x8000000000000000 )) ) {
+        if ( ! sigA ) goto zero;
+        normExpSig = softfloat_normSubnormalExtF80Sig( sigA );
+        expA += normExpSig.exp;
+        sigA = normExpSig.sig;
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    expZ = expA - expB + 0x3FFF;
+    if ( sigA < sigB ) {
+        --expZ;
+        rem = softfloat_shortShiftLeft128( 0, sigA, 32 );
+    } else {
+        rem = softfloat_shortShiftLeft128( 0, sigA, 31 );
+    }
+    recip32 = softfloat_approxRecip32_1( sigB>>32 );
+    sigZ = 0;
+    ix = 2;
+    for (;;) {
+        q64 = (uint_fast64_t) (uint32_t) (rem.v64>>2) * recip32;
+        q = (q64 + 0x80000000)>>32;
+        --ix;
+        if ( ix < 0 ) break;
+        rem = softfloat_shortShiftLeft128( rem.v64, rem.v0, 29 );
+        term = softfloat_mul64ByShifted32To128( sigB, q );
+        rem = softfloat_sub128( rem.v64, rem.v0, term.v64, term.v0 );
+        if ( rem.v64 & UINT64_C( 0x8000000000000000 ) ) {
+            --q;
+            rem = softfloat_add128( rem.v64, rem.v0, sigB>>32, sigB<<32 );
+        }
+        sigZ = (sigZ<<29) + q;
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    if ( ((q + 1) & 0x3FFFFF) < 2 ) {
+        rem = softfloat_shortShiftLeft128( rem.v64, rem.v0, 29 );
+        term = softfloat_mul64ByShifted32To128( sigB, q );
+        rem = softfloat_sub128( rem.v64, rem.v0, term.v64, term.v0 );
+        term = softfloat_shortShiftLeft128( 0, sigB, 32 );
+        if ( rem.v64 & UINT64_C( 0x8000000000000000 ) ) {
+            --q;
+            rem = softfloat_add128( rem.v64, rem.v0, term.v64, term.v0 );
+        } else if ( softfloat_le128( term.v64, term.v0, rem.v64, rem.v0 ) ) {
+            ++q;
+            rem = softfloat_sub128( rem.v64, rem.v0, term.v64, term.v0 );
+        }
+        if ( rem.v64 | rem.v0 ) q |= 1;
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    sigZ = (sigZ<<6) + (q>>23);
+    sigZExtra = (uint64_t) ((uint_fast64_t) q<<41);
+    return
+        softfloat_roundPackToExtF80(
+            signZ, expZ, sigZ, sigZExtra, extF80_roundingPrecision );
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+ propagateNaN:
+    uiZ = softfloat_propagateNaNExtF80UI( uiA64, uiA0, uiB64, uiB0 );
+    uiZ64 = uiZ.v64;
+    uiZ0  = uiZ.v0;
+    goto uiZ;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+ invalid:
+    softfloat_raiseFlags( softfloat_flag_invalid );
+    uiZ64 = defaultNaNExtF80UI64;
+    uiZ0  = defaultNaNExtF80UI0;
+    goto uiZ;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+ infinity:
+    uiZ64 = packToExtF80UI64( signZ, 0x7FFF );
+    uiZ0  = UINT64_C( 0x8000000000000000 );
+    goto uiZ;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+ zero:
+    uiZ64 = packToExtF80UI64( signZ, 0 );
+    uiZ0  = 0;
+ uiZ:
+    uZ.s.signExp = uiZ64;
+    uZ.s.signif  = uiZ0;
+    return uZ.f;
+
+}
+
diff --git a/ext/softfloat/extF80_eq.c b/ext/softfloat/extF80_eq.c
new file mode 100644
index 0000000000..f0bab466e4
--- /dev/null
+++ b/ext/softfloat/extF80_eq.c
@@ -0,0 +1,73 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+bool extF80_eq( extFloat80_t a, extFloat80_t b )
+{
+    union { struct extFloat80M s; extFloat80_t f; } uA;
+    uint_fast16_t uiA64;
+    uint_fast64_t uiA0;
+    union { struct extFloat80M s; extFloat80_t f; } uB;
+    uint_fast16_t uiB64;
+    uint_fast64_t uiB0;
+
+    uA.f = a;
+    uiA64 = uA.s.signExp;
+    uiA0  = uA.s.signif;
+    uB.f = b;
+    uiB64 = uB.s.signExp;
+    uiB0  = uB.s.signif;
+    if ( isNaNExtF80UI( uiA64, uiA0 ) || isNaNExtF80UI( uiB64, uiB0 ) ) {
+        if (
+               softfloat_isSigNaNExtF80UI( uiA64, uiA0 )
+            || softfloat_isSigNaNExtF80UI( uiB64, uiB0 )
+        ) {
+            softfloat_raiseFlags( softfloat_flag_invalid );
+        }
+        return false;
+    }
+    return
+           (uiA0 == uiB0)
+        && ((uiA64 == uiB64) || (! uiA0 && ! ((uiA64 | uiB64) & 0x7FFF)));
+
+}
+
diff --git a/ext/softfloat/extF80_eq_signaling.c b/ext/softfloat/extF80_eq_signaling.c
new file mode 100644
index 0000000000..9cfe5f1614
--- /dev/null
+++ b/ext/softfloat/extF80_eq_signaling.c
@@ -0,0 +1,67 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "softfloat.h"
+
+bool extF80_eq_signaling( extFloat80_t a, extFloat80_t b )
+{
+    union { struct extFloat80M s; extFloat80_t f; } uA;
+    uint_fast16_t uiA64;
+    uint_fast64_t uiA0;
+    union { struct extFloat80M s; extFloat80_t f; } uB;
+    uint_fast16_t uiB64;
+    uint_fast64_t uiB0;
+
+    uA.f = a;
+    uiA64 = uA.s.signExp;
+    uiA0  = uA.s.signif;
+    uB.f = b;
+    uiB64 = uB.s.signExp;
+    uiB0  = uB.s.signif;
+    if ( isNaNExtF80UI( uiA64, uiA0 ) || isNaNExtF80UI( uiB64, uiB0 ) ) {
+        softfloat_raiseFlags( softfloat_flag_invalid );
+        return false;
+    }
+    return
+           (uiA0 == uiB0)
+        && ((uiA64 == uiB64) || (! uiA0 && ! ((uiA64 | uiB64) & 0x7FFF)));
+
+}
+
diff --git a/ext/softfloat/extF80_isSignalingNaN.c b/ext/softfloat/extF80_isSignalingNaN.c
new file mode 100644
index 0000000000..b2186029c4
--- /dev/null
+++ b/ext/softfloat/extF80_isSignalingNaN.c
@@ -0,0 +1,51 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include "platform.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+bool extF80_isSignalingNaN( extFloat80_t a )
+{
+    union { struct extFloat80M s; extFloat80_t f; } uA;
+
+    uA.f = a;
+    return softfloat_isSigNaNExtF80UI( uA.s.signExp, uA.s.signif );
+
+}
+
diff --git a/ext/softfloat/extF80_le.c b/ext/softfloat/extF80_le.c
new file mode 100644
index 0000000000..6acecfd89c
--- /dev/null
+++ b/ext/softfloat/extF80_le.c
@@ -0,0 +1,73 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+bool extF80_le( extFloat80_t a, extFloat80_t b )
+{
+    union { struct extFloat80M s; extFloat80_t f; } uA;
+    uint_fast16_t uiA64;
+    uint_fast64_t uiA0;
+    union { struct extFloat80M s; extFloat80_t f; } uB;
+    uint_fast16_t uiB64;
+    uint_fast64_t uiB0;
+    bool signA, signB;
+
+    uA.f = a;
+    uiA64 = uA.s.signExp;
+    uiA0  = uA.s.signif;
+    uB.f = b;
+    uiB64 = uB.s.signExp;
+    uiB0  = uB.s.signif;
+    if ( isNaNExtF80UI( uiA64, uiA0 ) || isNaNExtF80UI( uiB64, uiB0 ) ) {
+        softfloat_raiseFlags( softfloat_flag_invalid );
+        return false;
+    }
+    signA = signExtF80UI64( uiA64 );
+    signB = signExtF80UI64( uiB64 );
+    return
+        (signA != signB)
+            ? signA || ! (((uiA64 | uiB64) & 0x7FFF) | uiA0 | uiB0)
+            : ((uiA64 == uiB64) && (uiA0 == uiB0))
+                  || (signA ^ softfloat_lt128( uiA64, uiA0, uiB64, uiB0 ));
+
+}
+
diff --git a/ext/softfloat/extF80_le_quiet.c b/ext/softfloat/extF80_le_quiet.c
new file mode 100644
index 0000000000..d4b7d16b93
--- /dev/null
+++ b/ext/softfloat/extF80_le_quiet.c
@@ -0,0 +1,78 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+bool extF80_le_quiet( extFloat80_t a, extFloat80_t b )
+{
+    union { struct extFloat80M s; extFloat80_t f; } uA;
+    uint_fast16_t uiA64;
+    uint_fast64_t uiA0;
+    union { struct extFloat80M s; extFloat80_t f; } uB;
+    uint_fast16_t uiB64;
+    uint_fast64_t uiB0;
+    bool signA, signB;
+
+    uA.f = a;
+    uiA64 = uA.s.signExp;
+    uiA0  = uA.s.signif;
+    uB.f = b;
+    uiB64 = uB.s.signExp;
+    uiB0  = uB.s.signif;
+    if ( isNaNExtF80UI( uiA64, uiA0 ) || isNaNExtF80UI( uiB64, uiB0 ) ) {
+        if (
+               softfloat_isSigNaNExtF80UI( uiA64, uiA0 )
+            || softfloat_isSigNaNExtF80UI( uiB64, uiB0 )
+        ) {
+            softfloat_raiseFlags( softfloat_flag_invalid );
+        }
+        return false;
+    }
+    signA = signExtF80UI64( uiA64 );
+    signB = signExtF80UI64( uiB64 );
+    return
+        (signA != signB)
+            ? signA || ! (((uiA64 | uiB64) & 0x7FFF) | uiA0 | uiB0)
+            : ((uiA64 == uiB64) && (uiA0 == uiB0))
+                  || (signA ^ softfloat_lt128( uiA64, uiA0, uiB64, uiB0 ));
+
+}
+
diff --git a/ext/softfloat/extF80_lt.c b/ext/softfloat/extF80_lt.c
new file mode 100644
index 0000000000..cc48633037
--- /dev/null
+++ b/ext/softfloat/extF80_lt.c
@@ -0,0 +1,73 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+bool extF80_lt( extFloat80_t a, extFloat80_t b )
+{
+    union { struct extFloat80M s; extFloat80_t f; } uA;
+    uint_fast16_t uiA64;
+    uint_fast64_t uiA0;
+    union { struct extFloat80M s; extFloat80_t f; } uB;
+    uint_fast16_t uiB64;
+    uint_fast64_t uiB0;
+    bool signA, signB;
+
+    uA.f = a;
+    uiA64 = uA.s.signExp;
+    uiA0  = uA.s.signif;
+    uB.f = b;
+    uiB64 = uB.s.signExp;
+    uiB0  = uB.s.signif;
+    if ( isNaNExtF80UI( uiA64, uiA0 ) || isNaNExtF80UI( uiB64, uiB0 ) ) {
+        softfloat_raiseFlags( softfloat_flag_invalid );
+        return false;
+    }
+    signA = signExtF80UI64( uiA64 );
+    signB = signExtF80UI64( uiB64 );
+    return
+        (signA != signB)
+            ? signA && (((uiA64 | uiB64) & 0x7FFF) | uiA0 | uiB0)
+            : ((uiA64 != uiB64) || (uiA0 != uiB0))
+                  && (signA ^ softfloat_lt128( uiA64, uiA0, uiB64, uiB0 ));
+
+}
+
diff --git a/ext/softfloat/extF80_lt_quiet.c b/ext/softfloat/extF80_lt_quiet.c
new file mode 100644
index 0000000000..eed6b36863
--- /dev/null
+++ b/ext/softfloat/extF80_lt_quiet.c
@@ -0,0 +1,78 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+bool extF80_lt_quiet( extFloat80_t a, extFloat80_t b )
+{
+    union { struct extFloat80M s; extFloat80_t f; } uA;
+    uint_fast16_t uiA64;
+    uint_fast64_t uiA0;
+    union { struct extFloat80M s; extFloat80_t f; } uB;
+    uint_fast16_t uiB64;
+    uint_fast64_t uiB0;
+    bool signA, signB;
+
+    uA.f = a;
+    uiA64 = uA.s.signExp;
+    uiA0  = uA.s.signif;
+    uB.f = b;
+    uiB64 = uB.s.signExp;
+    uiB0  = uB.s.signif;
+    if ( isNaNExtF80UI( uiA64, uiA0 ) || isNaNExtF80UI( uiB64, uiB0 ) ) {
+        if (
+               softfloat_isSigNaNExtF80UI( uiA64, uiA0 )
+            || softfloat_isSigNaNExtF80UI( uiB64, uiB0 )
+        ) {
+            softfloat_raiseFlags( softfloat_flag_invalid );
+        }
+        return false;
+    }
+    signA = signExtF80UI64( uiA64 );
+    signB = signExtF80UI64( uiB64 );
+    return
+        (signA != signB)
+            ? signA && (((uiA64 | uiB64) & 0x7FFF) | uiA0 | uiB0)
+            : ((uiA64 != uiB64) || (uiA0 != uiB0))
+                  && (signA ^ softfloat_lt128( uiA64, uiA0, uiB64, uiB0 ));
+
+}
+
diff --git a/ext/softfloat/extF80_mul.c b/ext/softfloat/extF80_mul.c
new file mode 100644
index 0000000000..2c1a9263b3
--- /dev/null
+++ b/ext/softfloat/extF80_mul.c
@@ -0,0 +1,158 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+extFloat80_t extF80_mul( extFloat80_t a, extFloat80_t b )
+{
+    union { struct extFloat80M s; extFloat80_t f; } uA;
+    uint_fast16_t uiA64;
+    uint_fast64_t uiA0;
+    bool signA;
+    int_fast32_t expA;
+    uint_fast64_t sigA;
+    union { struct extFloat80M s; extFloat80_t f; } uB;
+    uint_fast16_t uiB64;
+    uint_fast64_t uiB0;
+    bool signB;
+    int_fast32_t expB;
+    uint_fast64_t sigB;
+    bool signZ;
+    uint_fast64_t magBits;
+    struct exp32_sig64 normExpSig;
+    int_fast32_t expZ;
+    struct uint128 sig128Z, uiZ;
+    uint_fast16_t uiZ64;
+    uint_fast64_t uiZ0;
+    union { struct extFloat80M s; extFloat80_t f; } uZ;
+
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    uA.f = a;
+    uiA64 = uA.s.signExp;
+    uiA0  = uA.s.signif;
+    signA = signExtF80UI64( uiA64 );
+    expA  = expExtF80UI64( uiA64 );
+    sigA  = uiA0;
+    uB.f = b;
+    uiB64 = uB.s.signExp;
+    uiB0  = uB.s.signif;
+    signB = signExtF80UI64( uiB64 );
+    expB  = expExtF80UI64( uiB64 );
+    sigB  = uiB0;
+    signZ = signA ^ signB;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    if ( expA == 0x7FFF ) {
+        if (
+               (sigA & UINT64_C( 0x7FFFFFFFFFFFFFFF ))
+            || ((expB == 0x7FFF) && (sigB & UINT64_C( 0x7FFFFFFFFFFFFFFF )))
+        ) {
+            goto propagateNaN;
+        }
+        magBits = expB | sigB;
+        goto infArg;
+    }
+    if ( expB == 0x7FFF ) {
+        if ( sigB & UINT64_C( 0x7FFFFFFFFFFFFFFF ) ) goto propagateNaN;
+        magBits = expA | sigA;
+        goto infArg;
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    if ( ! expA ) expA = 1;
+    if ( ! (sigA & UINT64_C( 0x8000000000000000 )) ) {
+        if ( ! sigA ) goto zero;
+        normExpSig = softfloat_normSubnormalExtF80Sig( sigA );
+        expA += normExpSig.exp;
+        sigA = normExpSig.sig;
+    }
+    if ( ! expB ) expB = 1;
+    if ( ! (sigB & UINT64_C( 0x8000000000000000 )) ) {
+        if ( ! sigB ) goto zero;
+        normExpSig = softfloat_normSubnormalExtF80Sig( sigB );
+        expB += normExpSig.exp;
+        sigB = normExpSig.sig;
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    expZ = expA + expB - 0x3FFE;
+    sig128Z = softfloat_mul64To128( sigA, sigB );
+    if ( sig128Z.v64 < UINT64_C( 0x8000000000000000 ) ) {
+        --expZ;
+        sig128Z =
+            softfloat_add128(
+                sig128Z.v64, sig128Z.v0, sig128Z.v64, sig128Z.v0 );
+    }
+    return
+        softfloat_roundPackToExtF80(
+            signZ, expZ, sig128Z.v64, sig128Z.v0, extF80_roundingPrecision );
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+ propagateNaN:
+    uiZ = softfloat_propagateNaNExtF80UI( uiA64, uiA0, uiB64, uiB0 );
+    uiZ64 = uiZ.v64;
+    uiZ0  = uiZ.v0;
+    goto uiZ;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+ infArg:
+    if ( ! magBits ) {
+        softfloat_raiseFlags( softfloat_flag_invalid );
+        uiZ64 = defaultNaNExtF80UI64;
+        uiZ0  = defaultNaNExtF80UI0;
+    } else {
+        uiZ64 = packToExtF80UI64( signZ, 0x7FFF );
+        uiZ0  = UINT64_C( 0x8000000000000000 );
+    }
+    goto uiZ;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+ zero:
+    uiZ64 = packToExtF80UI64( signZ, 0 );
+    uiZ0  = 0;
+ uiZ:
+    uZ.s.signExp = uiZ64;
+    uZ.s.signif  = uiZ0;
+    return uZ.f;
+
+}
+
diff --git a/ext/softfloat/extF80_rem.c b/ext/softfloat/extF80_rem.c
new file mode 100644
index 0000000000..bde1c440a9
--- /dev/null
+++ b/ext/softfloat/extF80_rem.c
@@ -0,0 +1,225 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the
+University of California.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+extFloat80_t extF80_rem( extFloat80_t a, extFloat80_t b )
+{
+    union { struct extFloat80M s; extFloat80_t f; } uA;
+    uint_fast16_t uiA64;
+    uint_fast64_t uiA0;
+    bool signA;
+    int_fast32_t expA;
+    uint_fast64_t sigA;
+    union { struct extFloat80M s; extFloat80_t f; } uB;
+    uint_fast16_t uiB64;
+    uint_fast64_t uiB0;
+    int_fast32_t expB;
+    uint_fast64_t sigB;
+    struct exp32_sig64 normExpSig;
+    int_fast32_t expDiff;
+    struct uint128 rem, shiftedSigB;
+    uint_fast32_t q, recip32;
+    uint_fast64_t q64;
+    struct uint128 term, altRem, meanRem;
+    bool signRem;
+    struct uint128 uiZ;
+    uint_fast16_t uiZ64;
+    uint_fast64_t uiZ0;
+    union { struct extFloat80M s; extFloat80_t f; } uZ;
+
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    uA.f = a;
+    uiA64 = uA.s.signExp;
+    uiA0  = uA.s.signif;
+    signA = signExtF80UI64( uiA64 );
+    expA  = expExtF80UI64( uiA64 );
+    sigA  = uiA0;
+    uB.f = b;
+    uiB64 = uB.s.signExp;
+    uiB0  = uB.s.signif;
+    expB  = expExtF80UI64( uiB64 );
+    sigB  = uiB0;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    if ( expA == 0x7FFF ) {
+        if (
+               (sigA & UINT64_C( 0x7FFFFFFFFFFFFFFF ))
+            || ((expB == 0x7FFF) && (sigB & UINT64_C( 0x7FFFFFFFFFFFFFFF )))
+        ) {
+            goto propagateNaN;
+        }
+        goto invalid;
+    }
+    if ( expB == 0x7FFF ) {
+        if ( sigB & UINT64_C( 0x7FFFFFFFFFFFFFFF ) ) goto propagateNaN;
+        /*--------------------------------------------------------------------
+        | Argument b is an infinity.  Doubling `expB' is an easy way to ensure
+        | that `expDiff' later is less than -1, which will result in returning
+        | a canonicalized version of argument a.
+        *--------------------------------------------------------------------*/
+        expB += expB;
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    if ( ! expB ) expB = 1;
+    if ( ! (sigB & UINT64_C( 0x8000000000000000 )) ) {
+        if ( ! sigB ) goto invalid;
+        normExpSig = softfloat_normSubnormalExtF80Sig( sigB );
+        expB += normExpSig.exp;
+        sigB = normExpSig.sig;
+    }
+    if ( ! expA ) expA = 1;
+    if ( ! (sigA & UINT64_C( 0x8000000000000000 )) ) {
+        if ( ! sigA ) {
+            expA = 0;
+            goto copyA;
+        }
+        normExpSig = softfloat_normSubnormalExtF80Sig( sigA );
+        expA += normExpSig.exp;
+        sigA = normExpSig.sig;
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    expDiff = expA - expB;
+    if ( expDiff < -1 ) goto copyA;
+    rem = softfloat_shortShiftLeft128( 0, sigA, 32 );
+    shiftedSigB = softfloat_shortShiftLeft128( 0, sigB, 32 );
+    if ( expDiff < 1 ) {
+        if ( expDiff ) {
+            --expB;
+            shiftedSigB = softfloat_shortShiftLeft128( 0, sigB, 33 );
+            q = 0;
+        } else {
+            q = (sigB <= sigA);
+            if ( q ) {
+                rem =
+                    softfloat_sub128(
+                        rem.v64, rem.v0, shiftedSigB.v64, shiftedSigB.v0 );
+            }
+        }
+    } else {
+        recip32 = softfloat_approxRecip32_1( sigB>>32 );
+        expDiff -= 30;
+        for (;;) {
+            q64 = (uint_fast64_t) (uint32_t) (rem.v64>>2) * recip32;
+            if ( expDiff < 0 ) break;
+            q = (q64 + 0x80000000)>>32;
+            rem = softfloat_shortShiftLeft128( rem.v64, rem.v0, 29 );
+            term = softfloat_mul64ByShifted32To128( sigB, q );
+            rem = softfloat_sub128( rem.v64, rem.v0, term.v64, term.v0 );
+            if ( rem.v64 & UINT64_C( 0x8000000000000000 ) ) {
+                rem =
+                    softfloat_add128(
+                        rem.v64, rem.v0, shiftedSigB.v64, shiftedSigB.v0 );
+            }
+            expDiff -= 29;
+        }
+        /*--------------------------------------------------------------------
+        | (`expDiff' cannot be less than -29 here.)
+        *--------------------------------------------------------------------*/
+        q = (uint32_t) (q64>>32)>>(~expDiff & 31);
+        rem = softfloat_shortShiftLeft128( rem.v64, rem.v0, expDiff + 30 );
+        term = softfloat_mul64ByShifted32To128( sigB, q );
+        rem = softfloat_sub128( rem.v64, rem.v0, term.v64, term.v0 );
+        if ( rem.v64 & UINT64_C( 0x8000000000000000 ) ) {
+            altRem =
+                softfloat_add128(
+                    rem.v64, rem.v0, shiftedSigB.v64, shiftedSigB.v0 );
+            goto selectRem;
+        }
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    do {
+        altRem = rem;
+        ++q;
+        rem =
+            softfloat_sub128(
+                rem.v64, rem.v0, shiftedSigB.v64, shiftedSigB.v0 );
+    } while ( ! (rem.v64 & UINT64_C( 0x8000000000000000 )) );
+ selectRem:
+    meanRem = softfloat_add128( rem.v64, rem.v0, altRem.v64, altRem.v0 );
+    if (
+        (meanRem.v64 & UINT64_C( 0x8000000000000000 ))
+            || (! (meanRem.v64 | meanRem.v0) && (q & 1))
+    ) {
+        rem = altRem;
+    }
+    signRem = signA;
+    if ( rem.v64 & UINT64_C( 0x8000000000000000 ) ) {
+        signRem = ! signRem;
+        rem = softfloat_sub128( 0, 0, rem.v64, rem.v0 );
+    }
+    return
+        softfloat_normRoundPackToExtF80(
+            signRem, rem.v64 | rem.v0 ? expB + 32 : 0, rem.v64, rem.v0, 80 );
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+ propagateNaN:
+    uiZ = softfloat_propagateNaNExtF80UI( uiA64, uiA0, uiB64, uiB0 );
+    uiZ64 = uiZ.v64;
+    uiZ0  = uiZ.v0;
+    goto uiZ;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+ invalid:
+    softfloat_raiseFlags( softfloat_flag_invalid );
+    uiZ64 = defaultNaNExtF80UI64;
+    uiZ0  = defaultNaNExtF80UI0;
+    goto uiZ;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+ copyA:
+    if ( expA < 1 ) {
+        sigA >>= 1 - expA;
+        expA = 0;
+    }
+    uiZ64 = packToExtF80UI64( signA, expA );
+    uiZ0  = sigA;
+ uiZ:
+    uZ.s.signExp = uiZ64;
+    uZ.s.signif  = uiZ0;
+    return uZ.f;
+
+}
+
diff --git a/ext/softfloat/extF80_roundToInt.c b/ext/softfloat/extF80_roundToInt.c
new file mode 100644
index 0000000000..f7d950091f
--- /dev/null
+++ b/ext/softfloat/extF80_roundToInt.c
@@ -0,0 +1,147 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014, 2017 The Regents of the University of
+California.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+extFloat80_t
+ extF80_roundToInt( extFloat80_t a, uint_fast8_t roundingMode, bool exact )
+{
+    union { struct extFloat80M s; extFloat80_t f; } uA;
+    uint_fast16_t uiA64, signUI64;
+    int_fast32_t exp;
+    uint_fast64_t sigA;
+    uint_fast16_t uiZ64;
+    uint_fast64_t sigZ;
+    struct exp32_sig64 normExpSig;
+    struct uint128 uiZ;
+    uint_fast64_t lastBitMask, roundBitsMask;
+    union { struct extFloat80M s; extFloat80_t f; } uZ;
+
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    uA.f = a;
+    uiA64 = uA.s.signExp;
+    signUI64 = uiA64 & packToExtF80UI64( 1, 0 );
+    exp = expExtF80UI64( uiA64 );
+    sigA = uA.s.signif;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    if ( ! (sigA & UINT64_C( 0x8000000000000000 )) && (exp != 0x7FFF) ) {
+        if ( ! sigA ) {
+            uiZ64 = signUI64;
+            sigZ = 0;
+            goto uiZ;
+        }
+        normExpSig = softfloat_normSubnormalExtF80Sig( sigA );
+        exp += normExpSig.exp;
+        sigA = normExpSig.sig;
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    if ( 0x403E <= exp ) {
+        if ( exp == 0x7FFF ) {
+            if ( sigA & UINT64_C( 0x7FFFFFFFFFFFFFFF ) ) {
+                uiZ = softfloat_propagateNaNExtF80UI( uiA64, sigA, 0, 0 );
+                uiZ64 = uiZ.v64;
+                sigZ  = uiZ.v0;
+                goto uiZ;
+            }
+            sigZ = UINT64_C( 0x8000000000000000 );
+        } else {
+            sigZ = sigA;
+        }
+        uiZ64 = signUI64 | exp;
+        goto uiZ;
+    }
+    if ( exp <= 0x3FFE ) {
+        if ( exact ) softfloat_exceptionFlags |= softfloat_flag_inexact;
+        switch ( roundingMode ) {
+         case softfloat_round_near_even:
+            if ( ! (sigA & UINT64_C( 0x7FFFFFFFFFFFFFFF )) ) break;
+         case softfloat_round_near_maxMag:
+            if ( exp == 0x3FFE ) goto mag1;
+            break;
+         case softfloat_round_min:
+            if ( signUI64 ) goto mag1;
+            break;
+         case softfloat_round_max:
+            if ( ! signUI64 ) goto mag1;
+            break;
+        }
+        uiZ64 = signUI64;
+        sigZ  = 0;
+        goto uiZ;
+     mag1:
+        uiZ64 = signUI64 | 0x3FFF;
+        sigZ  = UINT64_C( 0x8000000000000000 );
+        goto uiZ;
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    uiZ64 = signUI64 | exp;
+    lastBitMask = (uint_fast64_t) 1<<(0x403E - exp);
+    roundBitsMask = lastBitMask - 1;
+    sigZ = sigA;
+    if ( roundingMode == softfloat_round_near_maxMag ) {
+        sigZ += lastBitMask>>1;
+    } else if ( roundingMode == softfloat_round_near_even ) {
+        sigZ += lastBitMask>>1;
+        if ( ! (sigZ & roundBitsMask) ) sigZ &= ~lastBitMask;
+    } else if (
+        roundingMode == (signUI64 ? softfloat_round_min : softfloat_round_max)
+    ) {
+        sigZ += roundBitsMask;
+    }
+    sigZ &= ~roundBitsMask;
+    if ( ! sigZ ) {
+        ++uiZ64;
+        sigZ = UINT64_C( 0x8000000000000000 );
+    }
+    if ( exact && (sigZ != sigA) ) {
+        softfloat_exceptionFlags |= softfloat_flag_inexact;
+    }
+ uiZ:
+    uZ.s.signExp = uiZ64;
+    uZ.s.signif = sigZ;
+    return uZ.f;
+
+}
+
diff --git a/ext/softfloat/extF80_sqrt.c b/ext/softfloat/extF80_sqrt.c
new file mode 100644
index 0000000000..4c19af397b
--- /dev/null
+++ b/ext/softfloat/extF80_sqrt.c
@@ -0,0 +1,176 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the
+University of California.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+extFloat80_t extF80_sqrt( extFloat80_t a )
+{
+    union { struct extFloat80M s; extFloat80_t f; } uA;
+    uint_fast16_t uiA64;
+    uint_fast64_t uiA0;
+    bool signA;
+    int_fast32_t expA;
+    uint_fast64_t sigA;
+    struct uint128 uiZ;
+    uint_fast16_t uiZ64;
+    uint_fast64_t uiZ0;
+    struct exp32_sig64 normExpSig;
+    int_fast32_t expZ;
+    uint_fast32_t sig32A, recipSqrt32, sig32Z;
+    struct uint128 rem;
+    uint_fast64_t q, x64, sigZ;
+    struct uint128 y, term;
+    uint_fast64_t sigZExtra;
+    union { struct extFloat80M s; extFloat80_t f; } uZ;
+
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    uA.f = a;
+    uiA64 = uA.s.signExp;
+    uiA0  = uA.s.signif;
+    signA = signExtF80UI64( uiA64 );
+    expA  = expExtF80UI64( uiA64 );
+    sigA  = uiA0;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    if ( expA == 0x7FFF ) {
+        if ( sigA & UINT64_C( 0x7FFFFFFFFFFFFFFF ) ) {
+            uiZ = softfloat_propagateNaNExtF80UI( uiA64, uiA0, 0, 0 );
+            uiZ64 = uiZ.v64;
+            uiZ0  = uiZ.v0;
+            goto uiZ;
+        }
+        if ( ! signA ) return a;
+        goto invalid;
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    if ( signA ) {
+        if ( ! sigA ) goto zero;
+        goto invalid;
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    if ( ! expA ) expA = 1;
+    if ( ! (sigA & UINT64_C( 0x8000000000000000 )) ) {
+        if ( ! sigA ) goto zero;
+        normExpSig = softfloat_normSubnormalExtF80Sig( sigA );
+        expA += normExpSig.exp;
+        sigA = normExpSig.sig;
+    }
+    /*------------------------------------------------------------------------
+    | (`sig32Z' is guaranteed to be a lower bound on the square root of
+    | `sig32A', which makes `sig32Z' also a lower bound on the square root of
+    | `sigA'.)
+    *------------------------------------------------------------------------*/
+    expZ = ((expA - 0x3FFF)>>1) + 0x3FFF;
+    expA &= 1;
+    sig32A = sigA>>32;
+    recipSqrt32 = softfloat_approxRecipSqrt32_1( expA, sig32A );
+    sig32Z = ((uint_fast64_t) sig32A * recipSqrt32)>>32;
+    if ( expA ) {
+        sig32Z >>= 1;
+        rem = softfloat_shortShiftLeft128( 0, sigA, 61 );
+    } else {
+        rem = softfloat_shortShiftLeft128( 0, sigA, 62 );
+    }
+    rem.v64 -= (uint_fast64_t) sig32Z * sig32Z;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    q = ((uint32_t) (rem.v64>>2) * (uint_fast64_t) recipSqrt32)>>32;
+    x64 = (uint_fast64_t) sig32Z<<32;
+    sigZ = x64 + (q<<3);
+    y = softfloat_shortShiftLeft128( rem.v64, rem.v0, 29 );
+    /*------------------------------------------------------------------------
+    | (Repeating this loop is a rare occurrence.)
+    *------------------------------------------------------------------------*/
+    for (;;) {
+        term = softfloat_mul64ByShifted32To128( x64 + sigZ, q );
+        rem = softfloat_sub128( y.v64, y.v0, term.v64, term.v0 );
+        if ( ! (rem.v64 & UINT64_C( 0x8000000000000000 )) ) break;
+        --q;
+        sigZ -= 1<<3;
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    q = (((rem.v64>>2) * recipSqrt32)>>32) + 2;
+    x64 = sigZ;
+    sigZ = (sigZ<<1) + (q>>25);
+    sigZExtra = (uint64_t) (q<<39);
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    if ( (q & 0xFFFFFF) <= 2 ) {
+        q &= ~(uint_fast64_t) 0xFFFF;
+        sigZExtra = (uint64_t) (q<<39);
+        term = softfloat_mul64ByShifted32To128( x64 + (q>>27), q );
+        x64 = (uint32_t) (q<<5) * (uint_fast64_t) (uint32_t) q;
+        term = softfloat_add128( term.v64, term.v0, 0, x64 );
+        rem = softfloat_shortShiftLeft128( rem.v64, rem.v0, 28 );
+        rem = softfloat_sub128( rem.v64, rem.v0, term.v64, term.v0 );
+        if ( rem.v64 & UINT64_C( 0x8000000000000000 ) ) {
+            if ( ! sigZExtra ) --sigZ;
+            --sigZExtra;
+        } else {
+            if ( rem.v64 | rem.v0 ) sigZExtra |= 1;
+        }
+    }
+    return
+        softfloat_roundPackToExtF80(
+            0, expZ, sigZ, sigZExtra, extF80_roundingPrecision );
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+ invalid:
+    softfloat_raiseFlags( softfloat_flag_invalid );
+    uiZ64 = defaultNaNExtF80UI64;
+    uiZ0  = defaultNaNExtF80UI0;
+    goto uiZ;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+ zero:
+    uiZ64 = packToExtF80UI64( signA, 0 );
+    uiZ0  = 0;
+ uiZ:
+    uZ.s.signExp = uiZ64;
+    uZ.s.signif  = uiZ0;
+    return uZ.f;
+
+}
+
diff --git a/ext/softfloat/extF80_sub.c b/ext/softfloat/extF80_sub.c
new file mode 100644
index 0000000000..3b8f309404
--- /dev/null
+++ b/ext/softfloat/extF80_sub.c
@@ -0,0 +1,80 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "softfloat.h"
+
+extFloat80_t extF80_sub( extFloat80_t a, extFloat80_t b )
+{
+    union { struct extFloat80M s; extFloat80_t f; } uA;
+    uint_fast16_t uiA64;
+    uint_fast64_t uiA0;
+    bool signA;
+    union { struct extFloat80M s; extFloat80_t f; } uB;
+    uint_fast16_t uiB64;
+    uint_fast64_t uiB0;
+    bool signB;
+#if ! defined INLINE_LEVEL || (INLINE_LEVEL < 2)
+    extFloat80_t
+        (*magsFuncPtr)(
+            uint_fast16_t, uint_fast64_t, uint_fast16_t, uint_fast64_t, bool );
+#endif
+
+    uA.f = a;
+    uiA64 = uA.s.signExp;
+    uiA0  = uA.s.signif;
+    signA = signExtF80UI64( uiA64 );
+    uB.f = b;
+    uiB64 = uB.s.signExp;
+    uiB0  = uB.s.signif;
+    signB = signExtF80UI64( uiB64 );
+#if defined INLINE_LEVEL && (2 <= INLINE_LEVEL)
+    if ( signA == signB ) {
+        return softfloat_subMagsExtF80( uiA64, uiA0, uiB64, uiB0, signA );
+    } else {
+        return softfloat_addMagsExtF80( uiA64, uiA0, uiB64, uiB0, signA );
+    }
+#else
+    magsFuncPtr =
+        (signA == signB) ? softfloat_subMagsExtF80 : softfloat_addMagsExtF80;
+    return (*magsFuncPtr)( uiA64, uiA0, uiB64, uiB0, signA );
+#endif
+
+}
+
diff --git a/ext/softfloat/extF80_to_f128.c b/ext/softfloat/extF80_to_f128.c
new file mode 100644
index 0000000000..c554695f74
--- /dev/null
+++ b/ext/softfloat/extF80_to_f128.c
@@ -0,0 +1,75 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+float128_t extF80_to_f128( extFloat80_t a )
+{
+    union { struct extFloat80M s; extFloat80_t f; } uA;
+    uint_fast16_t uiA64;
+    uint_fast64_t uiA0;
+    uint_fast16_t exp;
+    uint_fast64_t frac;
+    struct commonNaN commonNaN;
+    struct uint128 uiZ;
+    bool sign;
+    struct uint128 frac128;
+    union ui128_f128 uZ;
+
+    uA.f = a;
+    uiA64 = uA.s.signExp;
+    uiA0  = uA.s.signif;
+    exp = expExtF80UI64( uiA64 );
+    frac = uiA0 & UINT64_C( 0x7FFFFFFFFFFFFFFF );
+    if ( (exp == 0x7FFF) && frac ) {
+        softfloat_extF80UIToCommonNaN( uiA64, uiA0, &commonNaN );
+        uiZ = softfloat_commonNaNToF128UI( &commonNaN );
+    } else {
+        sign = signExtF80UI64( uiA64 );
+        frac128 = softfloat_shortShiftLeft128( 0, frac, 49 );
+        uiZ.v64 = packToF128UI64( sign, exp, frac128.v64 );
+        uiZ.v0  = frac128.v0;
+    }
+    uZ.ui = uiZ;
+    return uZ.f;
+
+}
+
diff --git a/ext/softfloat/extF80_to_f16.c b/ext/softfloat/extF80_to_f16.c
new file mode 100644
index 0000000000..aaf56585c5
--- /dev/null
+++ b/ext/softfloat/extF80_to_f16.c
@@ -0,0 +1,96 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014, 2015 The Regents of the University of
+California.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+float16_t extF80_to_f16( extFloat80_t a )
+{
+    union { struct extFloat80M s; extFloat80_t f; } uA;
+    uint_fast16_t uiA64;
+    uint_fast64_t uiA0;
+    bool sign;
+    int_fast32_t exp;
+    uint_fast64_t sig;
+    struct commonNaN commonNaN;
+    uint_fast16_t uiZ, sig16;
+    union ui16_f16 uZ;
+
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    uA.f = a;
+    uiA64 = uA.s.signExp;
+    uiA0  = uA.s.signif;
+    sign = signExtF80UI64( uiA64 );
+    exp  = expExtF80UI64( uiA64 );
+    sig  = uiA0;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    if ( exp == 0x7FFF ) {
+        if ( sig & UINT64_C( 0x7FFFFFFFFFFFFFFF ) ) {
+            softfloat_extF80UIToCommonNaN( uiA64, uiA0, &commonNaN );
+            uiZ = softfloat_commonNaNToF16UI( &commonNaN );
+        } else {
+            uiZ = packToF16UI( sign, 0x1F, 0 );
+        }
+        goto uiZ;
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    sig16 = softfloat_shortShiftRightJam64( sig, 49 );
+    if ( ! (exp | sig16) ) {
+        uiZ = packToF16UI( sign, 0, 0 );
+        goto uiZ;
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    exp -= 0x3FF1;
+    if ( sizeof (int_fast16_t) < sizeof (int_fast32_t) ) {
+        if ( exp < -0x40 ) exp = -0x40;
+    }
+    return softfloat_roundPackToF16( sign, exp, sig16 );
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+ uiZ:
+    uZ.ui = uiZ;
+    return uZ.f;
+
+}
+
diff --git a/ext/softfloat/extF80_to_f32.c b/ext/softfloat/extF80_to_f32.c
new file mode 100644
index 0000000000..3359f351c9
--- /dev/null
+++ b/ext/softfloat/extF80_to_f32.c
@@ -0,0 +1,96 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014, 2015 The Regents of the University of
+California.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+float32_t extF80_to_f32( extFloat80_t a )
+{
+    union { struct extFloat80M s; extFloat80_t f; } uA;
+    uint_fast16_t uiA64;
+    uint_fast64_t uiA0;
+    bool sign;
+    int_fast32_t exp;
+    uint_fast64_t sig;
+    struct commonNaN commonNaN;
+    uint_fast32_t uiZ, sig32;
+    union ui32_f32 uZ;
+
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    uA.f = a;
+    uiA64 = uA.s.signExp;
+    uiA0  = uA.s.signif;
+    sign = signExtF80UI64( uiA64 );
+    exp  = expExtF80UI64( uiA64 );
+    sig  = uiA0;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    if ( exp == 0x7FFF ) {
+        if ( sig & UINT64_C( 0x7FFFFFFFFFFFFFFF ) ) {
+            softfloat_extF80UIToCommonNaN( uiA64, uiA0, &commonNaN );
+            uiZ = softfloat_commonNaNToF32UI( &commonNaN );
+        } else {
+            uiZ = packToF32UI( sign, 0xFF, 0 );
+        }
+        goto uiZ;
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    sig32 = softfloat_shortShiftRightJam64( sig, 33 );
+    if ( ! (exp | sig32) ) {
+        uiZ = packToF32UI( sign, 0, 0 );
+        goto uiZ;
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    exp -= 0x3F81;
+    if ( sizeof (int_fast16_t) < sizeof (int_fast32_t) ) {
+        if ( exp < -0x1000 ) exp = -0x1000;
+    }
+    return softfloat_roundPackToF32( sign, exp, sig32 );
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+ uiZ:
+    uZ.ui = uiZ;
+    return uZ.f;
+
+}
+
diff --git a/ext/softfloat/extF80_to_f64.c b/ext/softfloat/extF80_to_f64.c
new file mode 100644
index 0000000000..2f8957b2ee
--- /dev/null
+++ b/ext/softfloat/extF80_to_f64.c
@@ -0,0 +1,96 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014, 2015 The Regents of the University of
+California.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+float64_t extF80_to_f64( extFloat80_t a )
+{
+    union { struct extFloat80M s; extFloat80_t f; } uA;
+    uint_fast16_t uiA64;
+    uint_fast64_t uiA0;
+    bool sign;
+    int_fast32_t exp;
+    uint_fast64_t sig;
+    struct commonNaN commonNaN;
+    uint_fast64_t uiZ;
+    union ui64_f64 uZ;
+
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    uA.f = a;
+    uiA64 = uA.s.signExp;
+    uiA0  = uA.s.signif;
+    sign = signExtF80UI64( uiA64 );
+    exp  = expExtF80UI64( uiA64 );
+    sig  = uiA0;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    if ( ! (exp | sig) ) {
+        uiZ = packToF64UI( sign, 0, 0 );
+        goto uiZ;
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    if ( exp == 0x7FFF ) {
+        if ( sig & UINT64_C( 0x7FFFFFFFFFFFFFFF ) ) {
+            softfloat_extF80UIToCommonNaN( uiA64, uiA0, &commonNaN );
+            uiZ = softfloat_commonNaNToF64UI( &commonNaN );
+        } else {
+            uiZ = packToF64UI( sign, 0x7FF, 0 );
+        }
+        goto uiZ;
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    sig = softfloat_shortShiftRightJam64( sig, 1 );
+    exp -= 0x3C01;
+    if ( sizeof (int_fast16_t) < sizeof (int_fast32_t) ) {
+        if ( exp < -0x1000 ) exp = -0x1000;
+    }
+    return softfloat_roundPackToF64( sign, exp, sig );
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+ uiZ:
+    uZ.ui = uiZ;
+    return uZ.f;
+
+}
+
diff --git a/ext/softfloat/extF80_to_i32.c b/ext/softfloat/extF80_to_i32.c
new file mode 100644
index 0000000000..0ffb6412d8
--- /dev/null
+++ b/ext/softfloat/extF80_to_i32.c
@@ -0,0 +1,83 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the
+University of California.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+int_fast32_t
+ extF80_to_i32( extFloat80_t a, uint_fast8_t roundingMode, bool exact )
+{
+    union { struct extFloat80M s; extFloat80_t f; } uA;
+    uint_fast16_t uiA64;
+    bool sign;
+    int_fast32_t exp;
+    uint_fast64_t sig;
+    int_fast32_t shiftDist;
+
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    uA.f = a;
+    uiA64 = uA.s.signExp;
+    sign = signExtF80UI64( uiA64 );
+    exp  = expExtF80UI64( uiA64 );
+    sig = uA.s.signif;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+#if (i32_fromNaN != i32_fromPosOverflow) || (i32_fromNaN != i32_fromNegOverflow)
+    if ( (exp == 0x7FFF) && (sig & UINT64_C( 0x7FFFFFFFFFFFFFFF )) ) {
+#if (i32_fromNaN == i32_fromPosOverflow)
+        sign = 0;
+#elif (i32_fromNaN == i32_fromNegOverflow)
+        sign = 1;
+#else
+        softfloat_raiseFlags( softfloat_flag_invalid );
+        return i32_fromNaN;
+#endif
+    }
+#endif
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    shiftDist = 0x4032 - exp;
+    if ( shiftDist <= 0 ) shiftDist = 1;
+    sig = softfloat_shiftRightJam64( sig, shiftDist );
+    return softfloat_roundToI32( sign, sig, roundingMode, exact );
+
+}
+
diff --git a/ext/softfloat/extF80_to_i32_r_minMag.c b/ext/softfloat/extF80_to_i32_r_minMag.c
new file mode 100644
index 0000000000..a50dcdc6fe
--- /dev/null
+++ b/ext/softfloat/extF80_to_i32_r_minMag.c
@@ -0,0 +1,97 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of
+California.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+int_fast32_t extF80_to_i32_r_minMag( extFloat80_t a, bool exact )
+{
+    union { struct extFloat80M s; extFloat80_t f; } uA;
+    uint_fast16_t uiA64;
+    int_fast32_t exp;
+    uint_fast64_t sig;
+    int_fast32_t shiftDist;
+    bool sign;
+    int_fast32_t absZ;
+
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    uA.f = a;
+    uiA64 = uA.s.signExp;
+    exp = expExtF80UI64( uiA64 );
+    sig = uA.s.signif;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    shiftDist = 0x403E - exp;
+    if ( 64 <= shiftDist ) {
+        if ( exact && (exp | sig) ) {
+            softfloat_exceptionFlags |= softfloat_flag_inexact;
+        }
+        return 0;
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    sign = signExtF80UI64( uiA64 );
+    if ( shiftDist < 33 ) {
+        if (
+            (uiA64 == packToExtF80UI64( 1, 0x401E ))
+                && (sig < UINT64_C( 0x8000000100000000 ))
+        ) {
+            if ( exact && (sig & UINT64_C( 0x00000000FFFFFFFF )) ) {
+                softfloat_exceptionFlags |= softfloat_flag_inexact;
+            }
+            return -0x7FFFFFFF - 1;
+        }
+        softfloat_raiseFlags( softfloat_flag_invalid );
+        return
+            (exp == 0x7FFF) && (sig & UINT64_C( 0x7FFFFFFFFFFFFFFF ))
+                ? i32_fromNaN
+                : sign ? i32_fromNegOverflow : i32_fromPosOverflow;
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    absZ = sig>>shiftDist;
+    if ( exact && ((uint_fast64_t) (uint_fast32_t) absZ<<shiftDist != sig) ) {
+        softfloat_exceptionFlags |= softfloat_flag_inexact;
+    }
+    return sign ? -absZ : absZ;
+
+}
+
diff --git a/ext/softfloat/extF80_to_i64.c b/ext/softfloat/extF80_to_i64.c
new file mode 100644
index 0000000000..b18621670b
--- /dev/null
+++ b/ext/softfloat/extF80_to_i64.c
@@ -0,0 +1,89 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the
+University of California.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+int_fast64_t
+ extF80_to_i64( extFloat80_t a, uint_fast8_t roundingMode, bool exact )
+{
+    union { struct extFloat80M s; extFloat80_t f; } uA;
+    uint_fast16_t uiA64;
+    bool sign;
+    int_fast32_t exp;
+    uint_fast64_t sig;
+    int_fast32_t shiftDist;
+    uint_fast64_t sigExtra;
+    struct uint64_extra sig64Extra;
+
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    uA.f = a;
+    uiA64 = uA.s.signExp;
+    sign = signExtF80UI64( uiA64 );
+    exp  = expExtF80UI64( uiA64 );
+    sig = uA.s.signif;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    shiftDist = 0x403E - exp;
+    if ( shiftDist <= 0 ) {
+        /*--------------------------------------------------------------------
+        *--------------------------------------------------------------------*/
+        if ( shiftDist ) {
+            softfloat_raiseFlags( softfloat_flag_invalid );
+            return
+                (exp == 0x7FFF) && (sig & UINT64_C( 0x7FFFFFFFFFFFFFFF ))
+                    ? i64_fromNaN
+                    : sign ? i64_fromNegOverflow : i64_fromPosOverflow;
+        }
+        /*--------------------------------------------------------------------
+        *--------------------------------------------------------------------*/
+        sigExtra = 0;
+    } else {
+        /*--------------------------------------------------------------------
+        *--------------------------------------------------------------------*/
+        sig64Extra = softfloat_shiftRightJam64Extra( sig, 0, shiftDist );
+        sig = sig64Extra.v;
+        sigExtra = sig64Extra.extra;
+    }
+    return softfloat_roundToI64( sign, sig, sigExtra, roundingMode, exact );
+
+}
+
diff --git a/ext/softfloat/extF80_to_i64_r_minMag.c b/ext/softfloat/extF80_to_i64_r_minMag.c
new file mode 100644
index 0000000000..659c4fb747
--- /dev/null
+++ b/ext/softfloat/extF80_to_i64_r_minMag.c
@@ -0,0 +1,94 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of
+California.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+int_fast64_t extF80_to_i64_r_minMag( extFloat80_t a, bool exact )
+{
+    union { struct extFloat80M s; extFloat80_t f; } uA;
+    uint_fast16_t uiA64;
+    int_fast32_t exp;
+    uint_fast64_t sig;
+    int_fast32_t shiftDist;
+    bool sign;
+    int_fast64_t absZ;
+
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    uA.f = a;
+    uiA64 = uA.s.signExp;
+    exp = expExtF80UI64( uiA64 );
+    sig = uA.s.signif;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    shiftDist = 0x403E - exp;
+    if ( 64 <= shiftDist ) {
+        if ( exact && (exp | sig) ) {
+            softfloat_exceptionFlags |= softfloat_flag_inexact;
+        }
+        return 0;
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    sign = signExtF80UI64( uiA64 );
+    if ( shiftDist <= 0 ) {
+        if (
+            (uiA64 == packToExtF80UI64( 1, 0x403E ))
+                && (sig == UINT64_C( 0x8000000000000000 ))
+        ) {
+            return -INT64_C( 0x7FFFFFFFFFFFFFFF ) - 1;
+        }
+        softfloat_raiseFlags( softfloat_flag_invalid );
+        return
+            (exp == 0x7FFF) && (sig & UINT64_C( 0x7FFFFFFFFFFFFFFF ))
+                ? i64_fromNaN
+                : sign ? i64_fromNegOverflow : i64_fromPosOverflow;
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    absZ = sig>>shiftDist;
+    if ( exact && (uint64_t) (sig<<(-shiftDist & 63)) ) {
+        softfloat_exceptionFlags |= softfloat_flag_inexact;
+    }
+    return sign ? -absZ : absZ;
+
+}
+
diff --git a/ext/softfloat/extF80_to_ui32.c b/ext/softfloat/extF80_to_ui32.c
new file mode 100644
index 0000000000..2bd8557666
--- /dev/null
+++ b/ext/softfloat/extF80_to_ui32.c
@@ -0,0 +1,83 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the
+University of California.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+uint_fast32_t
+ extF80_to_ui32( extFloat80_t a, uint_fast8_t roundingMode, bool exact )
+{
+    union { struct extFloat80M s; extFloat80_t f; } uA;
+    uint_fast16_t uiA64;
+    bool sign;
+    int_fast32_t exp;
+    uint_fast64_t sig;
+    int_fast32_t shiftDist;
+
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    uA.f = a;
+    uiA64 = uA.s.signExp;
+    sign = signExtF80UI64( uiA64 );
+    exp  = expExtF80UI64( uiA64 );
+    sig = uA.s.signif;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+#if (ui32_fromNaN != ui32_fromPosOverflow) || (ui32_fromNaN != ui32_fromNegOverflow)
+    if ( (exp == 0x7FFF) && (sig & UINT64_C( 0x7FFFFFFFFFFFFFFF )) ) {
+#if (ui32_fromNaN == ui32_fromPosOverflow)
+        sign = 0;
+#elif (ui32_fromNaN == ui32_fromNegOverflow)
+        sign = 1;
+#else
+        softfloat_raiseFlags( softfloat_flag_invalid );
+        return ui32_fromNaN;
+#endif
+    }
+#endif
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    shiftDist = 0x4032 - exp;
+    if ( shiftDist <= 0 ) shiftDist = 1;
+    sig = softfloat_shiftRightJam64( sig, shiftDist );
+    return softfloat_roundToUI32( sign, sig, roundingMode, exact );
+
+}
+
diff --git a/ext/softfloat/extF80_to_ui32_r_minMag.c b/ext/softfloat/extF80_to_ui32_r_minMag.c
new file mode 100644
index 0000000000..38a5d8d7e9
--- /dev/null
+++ b/ext/softfloat/extF80_to_ui32_r_minMag.c
@@ -0,0 +1,88 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of
+California.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+uint_fast32_t extF80_to_ui32_r_minMag( extFloat80_t a, bool exact )
+{
+    union { struct extFloat80M s; extFloat80_t f; } uA;
+    uint_fast16_t uiA64;
+    int_fast32_t exp;
+    uint_fast64_t sig;
+    int_fast32_t shiftDist;
+    bool sign;
+    uint_fast32_t z;
+
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    uA.f = a;
+    uiA64 = uA.s.signExp;
+    exp = expExtF80UI64( uiA64 );
+    sig = uA.s.signif;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    shiftDist = 0x403E - exp;
+    if ( 64 <= shiftDist ) {
+        if ( exact && (exp | sig) ) {
+            softfloat_exceptionFlags |= softfloat_flag_inexact;
+        }
+        return 0;
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    sign = signExtF80UI64( uiA64 );
+    if ( sign || (shiftDist < 32) ) {
+        softfloat_raiseFlags( softfloat_flag_invalid );
+        return
+            (exp == 0x7FFF) && (sig & UINT64_C( 0x7FFFFFFFFFFFFFFF ))
+                ? ui32_fromNaN
+                : sign ? ui32_fromNegOverflow : ui32_fromPosOverflow;
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    z = sig>>shiftDist;
+    if ( exact && ((uint_fast64_t) z<<shiftDist != sig) ) {
+        softfloat_exceptionFlags |= softfloat_flag_inexact;
+    }
+    return z;
+
+}
+
diff --git a/ext/softfloat/extF80_to_ui64.c b/ext/softfloat/extF80_to_ui64.c
new file mode 100644
index 0000000000..a187848eab
--- /dev/null
+++ b/ext/softfloat/extF80_to_ui64.c
@@ -0,0 +1,84 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the
+University of California.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+uint_fast64_t
+ extF80_to_ui64( extFloat80_t a, uint_fast8_t roundingMode, bool exact )
+{
+    union { struct extFloat80M s; extFloat80_t f; } uA;
+    uint_fast16_t uiA64;
+    bool sign;
+    int_fast32_t exp;
+    uint_fast64_t sig;
+    int_fast32_t shiftDist;
+    uint_fast64_t sigExtra;
+    struct uint64_extra sig64Extra;
+
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    uA.f = a;
+    uiA64 = uA.s.signExp;
+    sign = signExtF80UI64( uiA64 );
+    exp  = expExtF80UI64( uiA64 );
+    sig = uA.s.signif;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    shiftDist = 0x403E - exp;
+    if ( shiftDist < 0 ) {
+        softfloat_raiseFlags( softfloat_flag_invalid );
+        return
+            (exp == 0x7FFF) && (sig & UINT64_C( 0x7FFFFFFFFFFFFFFF ))
+                ? ui64_fromNaN
+                : sign ? ui64_fromNegOverflow : ui64_fromPosOverflow;
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    sigExtra = 0;
+    if ( shiftDist ) {
+        sig64Extra = softfloat_shiftRightJam64Extra( sig, 0, shiftDist );
+        sig = sig64Extra.v;
+        sigExtra = sig64Extra.extra;
+    }
+    return softfloat_roundToUI64( sign, sig, sigExtra, roundingMode, exact );
+
+}
+
diff --git a/ext/softfloat/extF80_to_ui64_r_minMag.c b/ext/softfloat/extF80_to_ui64_r_minMag.c
new file mode 100644
index 0000000000..07648b838a
--- /dev/null
+++ b/ext/softfloat/extF80_to_ui64_r_minMag.c
@@ -0,0 +1,88 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of
+California.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+uint_fast64_t extF80_to_ui64_r_minMag( extFloat80_t a, bool exact )
+{
+    union { struct extFloat80M s; extFloat80_t f; } uA;
+    uint_fast16_t uiA64;
+    int_fast32_t exp;
+    uint_fast64_t sig;
+    int_fast32_t shiftDist;
+    bool sign;
+    uint_fast64_t z;
+
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    uA.f = a;
+    uiA64 = uA.s.signExp;
+    exp = expExtF80UI64( uiA64 );
+    sig = uA.s.signif;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    shiftDist = 0x403E - exp;
+    if ( 64 <= shiftDist ) {
+        if ( exact && (exp | sig) ) {
+            softfloat_exceptionFlags |= softfloat_flag_inexact;
+        }
+        return 0;
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    sign = signExtF80UI64( uiA64 );
+    if ( sign || (shiftDist < 0) ) {
+        softfloat_raiseFlags( softfloat_flag_invalid );
+        return
+            (exp == 0x7FFF) && (sig & UINT64_C( 0x7FFFFFFFFFFFFFFF ))
+                ? ui64_fromNaN
+                : sign ? ui64_fromNegOverflow : ui64_fromPosOverflow;
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    z = sig>>shiftDist;
+    if ( exact && (z<<shiftDist != sig) ) {
+        softfloat_exceptionFlags |= softfloat_flag_inexact;
+    }
+    return z;
+
+}
+
diff --git a/ext/softfloat/f128M_add.c b/ext/softfloat/f128M_add.c
new file mode 100644
index 0000000000..649a6d128a
--- /dev/null
+++ b/ext/softfloat/f128M_add.c
@@ -0,0 +1,97 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "softfloat.h"
+
+#ifdef SOFTFLOAT_FAST_INT64
+
+void
+ f128M_add( const float128_t *aPtr, const float128_t *bPtr, float128_t *zPtr )
+{
+    const uint64_t *aWPtr, *bWPtr;
+    uint_fast64_t uiA64, uiA0;
+    bool signA;
+    uint_fast64_t uiB64, uiB0;
+    bool signB;
+#if ! defined INLINE_LEVEL || (INLINE_LEVEL < 2)
+    float128_t
+        (*magsFuncPtr)(
+            uint_fast64_t, uint_fast64_t, uint_fast64_t, uint_fast64_t, bool );
+#endif
+
+    aWPtr = (const uint64_t *) aPtr;
+    bWPtr = (const uint64_t *) bPtr;
+    uiA64 = aWPtr[indexWord( 2, 1 )];
+    uiA0  = aWPtr[indexWord( 2, 0 )];
+    signA = signF128UI64( uiA64 );
+    uiB64 = bWPtr[indexWord( 2, 1 )];
+    uiB0  = bWPtr[indexWord( 2, 0 )];
+    signB = signF128UI64( uiB64 );
+#if defined INLINE_LEVEL && (2 <= INLINE_LEVEL)
+    if ( signA == signB ) {
+        *zPtr = softfloat_addMagsF128( uiA64, uiA0, uiB64, uiB0, signA );
+    } else {
+        *zPtr = softfloat_subMagsF128( uiA64, uiA0, uiB64, uiB0, signA );
+    }
+#else
+    magsFuncPtr =
+        (signA == signB) ? softfloat_addMagsF128 : softfloat_subMagsF128;
+    *zPtr = (*magsFuncPtr)( uiA64, uiA0, uiB64, uiB0, signA );
+#endif
+
+}
+
+#else
+
+void
+ f128M_add( const float128_t *aPtr, const float128_t *bPtr, float128_t *zPtr )
+{
+
+    softfloat_addF128M(
+        (const uint32_t *) aPtr,
+        (const uint32_t *) bPtr,
+        (uint32_t *) zPtr,
+        false
+    );
+
+}
+
+#endif
+
diff --git a/ext/softfloat/f128M_div.c b/ext/softfloat/f128M_div.c
new file mode 100644
index 0000000000..a2c838df53
--- /dev/null
+++ b/ext/softfloat/f128M_div.c
@@ -0,0 +1,187 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+#ifdef SOFTFLOAT_FAST_INT64
+
+void
+ f128M_div( const float128_t *aPtr, const float128_t *bPtr, float128_t *zPtr )
+{
+
+    *zPtr = f128_div( *aPtr, *bPtr );
+
+}
+
+#else
+
+void
+ f128M_div( const float128_t *aPtr, const float128_t *bPtr, float128_t *zPtr )
+{
+    const uint32_t *aWPtr, *bWPtr;
+    uint32_t *zWPtr, uiA96;
+    bool signA;
+    int32_t expA;
+    uint32_t uiB96;
+    bool signB;
+    int32_t expB;
+    bool signZ;
+    uint32_t y[5], sigB[4];
+    int32_t expZ;
+    uint32_t recip32;
+    int ix;
+    uint64_t q64;
+    uint32_t q, qs[3], uiZ96;
+
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    aWPtr = (const uint32_t *) aPtr;
+    bWPtr = (const uint32_t *) bPtr;
+    zWPtr = (uint32_t *) zPtr;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    uiA96 = aWPtr[indexWordHi( 4 )];
+    signA = signF128UI96( uiA96 );
+    expA  = expF128UI96( uiA96 );
+    uiB96 = bWPtr[indexWordHi( 4 )];
+    signB = signF128UI96( uiB96 );
+    expB  = expF128UI96( uiB96 );
+    signZ = signA ^ signB;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    if ( (expA == 0x7FFF) || (expB == 0x7FFF) ) {
+        if ( softfloat_tryPropagateNaNF128M( aWPtr, bWPtr, zWPtr ) ) return;
+        if ( expA == 0x7FFF ) {
+            if ( expB == 0x7FFF ) goto invalid;
+            goto infinity;
+        }
+        goto zero;
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    expA = softfloat_shiftNormSigF128M( aWPtr, 13, y );
+    expB = softfloat_shiftNormSigF128M( bWPtr, 13, sigB );
+    if ( expA == -128 ) {
+        if ( expB == -128 ) goto invalid;
+        goto zero;
+    }
+    if ( expB == -128 ) {
+        softfloat_raiseFlags( softfloat_flag_infinite );
+        goto infinity;
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    expZ = expA - expB + 0x3FFE;
+    if ( softfloat_compare128M( y, sigB ) < 0 ) {
+        --expZ;
+        softfloat_add128M( y, y, y );
+    }
+    recip32 =
+        softfloat_approxRecip32_1(
+            ((uint64_t) sigB[indexWord( 4, 3 )]<<32 | sigB[indexWord( 4, 2 )])
+                >>30
+        );
+    ix = 3;
+    for (;;) {
+        q64 = (uint64_t) y[indexWordHi( 4 )] * recip32;
+        q = (q64 + 0x80000000)>>32;
+        --ix;
+        if ( ix < 0 ) break;
+        softfloat_remStep128MBy32( y, 29, sigB, q, y );
+        if ( y[indexWordHi( 4 )] & 0x80000000 ) {
+            --q;
+            softfloat_add128M( y, sigB, y );
+        }
+        qs[ix] = q;
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    if ( ((q + 1) & 7) < 2 ) {
+        softfloat_remStep128MBy32( y, 29, sigB, q, y );
+        if ( y[indexWordHi( 4 )] & 0x80000000 ) {
+            --q;
+            softfloat_add128M( y, sigB, y );
+        } else if ( softfloat_compare128M( sigB, y ) <= 0 ) {
+            ++q;
+            softfloat_sub128M( y, sigB, y );
+        }
+        if (
+            y[indexWordLo( 4 )] || y[indexWord( 4, 1 )]
+                || (y[indexWord( 4, 2 )] | y[indexWord( 4, 3 )])
+        ) {
+            q |= 1;
+        }
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    q64 = (uint64_t) q<<28;
+    y[indexWord( 5, 0 )] = q64;
+    q64 = ((uint64_t) qs[0]<<25) + (q64>>32);
+    y[indexWord( 5, 1 )] = q64;
+    q64 = ((uint64_t) qs[1]<<22) + (q64>>32);
+    y[indexWord( 5, 2 )] = q64;
+    q64 = ((uint64_t) qs[2]<<19) + (q64>>32);
+    y[indexWord( 5, 3 )] = q64;
+    y[indexWord( 5, 4 )] = q64>>32;
+    softfloat_roundPackMToF128M( signZ, expZ, y, zWPtr );
+    return;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+ invalid:
+    softfloat_invalidF128M( zWPtr );
+    return;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+ infinity:
+    uiZ96 = packToF128UI96( signZ, 0x7FFF, 0 );
+    goto uiZ96;
+ zero:
+    uiZ96 = packToF128UI96( signZ, 0, 0 );
+ uiZ96:
+    zWPtr[indexWordHi( 4 )] = uiZ96;
+    zWPtr[indexWord( 4, 2 )] = 0;
+    zWPtr[indexWord( 4, 1 )] = 0;
+    zWPtr[indexWord( 4, 0 )] = 0;
+
+}
+
+#endif
+
diff --git a/ext/softfloat/f128M_eq.c b/ext/softfloat/f128M_eq.c
new file mode 100644
index 0000000000..39bf3f8fa8
--- /dev/null
+++ b/ext/softfloat/f128M_eq.c
@@ -0,0 +1,100 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+#ifdef SOFTFLOAT_FAST_INT64
+
+bool f128M_eq( const float128_t *aPtr, const float128_t *bPtr )
+{
+
+    return f128_eq( *aPtr, *bPtr );
+
+}
+
+#else
+
+bool f128M_eq( const float128_t *aPtr, const float128_t *bPtr )
+{
+    const uint32_t *aWPtr, *bWPtr;
+    uint32_t wordA, wordB, uiA96, uiB96;
+    bool possibleOppositeZeros;
+    uint32_t mashWord;
+
+    aWPtr = (const uint32_t *) aPtr;
+    bWPtr = (const uint32_t *) bPtr;
+    wordA = aWPtr[indexWord( 4, 2 )];
+    wordB = bWPtr[indexWord( 4, 2 )];
+    if ( wordA != wordB ) goto false_checkSigNaNs;
+    uiA96 = aWPtr[indexWordHi( 4 )];
+    uiB96 = bWPtr[indexWordHi( 4 )];
+    possibleOppositeZeros = false;
+    if ( uiA96 != uiB96 ) {
+        possibleOppositeZeros = (((uiA96 | uiB96) & 0x7FFFFFFF) == 0);
+        if ( ! possibleOppositeZeros ) goto false_checkSigNaNs;
+    }
+    mashWord = wordA | wordB;
+    wordA = aWPtr[indexWord( 4, 1 )];
+    wordB = bWPtr[indexWord( 4, 1 )];
+    if ( wordA != wordB ) goto false_checkSigNaNs;
+    mashWord |= wordA | wordB;
+    wordA = aWPtr[indexWord( 4, 0 )];
+    wordB = bWPtr[indexWord( 4, 0 )];
+    if ( wordA != wordB ) goto false_checkSigNaNs;
+    if ( possibleOppositeZeros && ((mashWord | wordA | wordB) != 0) ) {
+        goto false_checkSigNaNs;
+    }
+    if ( ! softfloat_isNaNF128M( aWPtr ) && ! softfloat_isNaNF128M( bWPtr ) ) {
+        return true;
+    }
+ false_checkSigNaNs:
+    if (
+           f128M_isSignalingNaN( (const float128_t *) aWPtr )
+        || f128M_isSignalingNaN( (const float128_t *) bWPtr )
+    ) {
+        softfloat_raiseFlags( softfloat_flag_invalid );
+    }
+    return false;
+
+}
+
+#endif
+
diff --git a/ext/softfloat/f128M_eq_signaling.c b/ext/softfloat/f128M_eq_signaling.c
new file mode 100644
index 0000000000..faa33e368a
--- /dev/null
+++ b/ext/softfloat/f128M_eq_signaling.c
@@ -0,0 +1,92 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+#ifdef SOFTFLOAT_FAST_INT64
+
+bool f128M_eq_signaling( const float128_t *aPtr, const float128_t *bPtr )
+{
+
+    return f128_eq_signaling( *aPtr, *bPtr );
+
+}
+
+#else
+
+bool f128M_eq_signaling( const float128_t *aPtr, const float128_t *bPtr )
+{
+    const uint32_t *aWPtr, *bWPtr;
+    uint32_t wordA, wordB, uiA96, uiB96;
+    bool possibleOppositeZeros;
+    uint32_t mashWord;
+
+    aWPtr = (const uint32_t *) aPtr;
+    bWPtr = (const uint32_t *) bPtr;
+    if ( softfloat_isNaNF128M( aWPtr ) || softfloat_isNaNF128M( bWPtr ) ) {
+        softfloat_raiseFlags( softfloat_flag_invalid );
+        return false;
+    }
+    wordA = aWPtr[indexWord( 4, 2 )];
+    wordB = bWPtr[indexWord( 4, 2 )];
+    if ( wordA != wordB ) return false;
+    uiA96 = aWPtr[indexWordHi( 4 )];
+    uiB96 = bWPtr[indexWordHi( 4 )];
+    possibleOppositeZeros = false;
+    if ( uiA96 != uiB96 ) {
+        possibleOppositeZeros = (((uiA96 | uiB96) & 0x7FFFFFFF) == 0);
+        if ( ! possibleOppositeZeros ) return false;
+    }
+    mashWord = wordA | wordB;
+    wordA = aWPtr[indexWord( 4, 1 )];
+    wordB = bWPtr[indexWord( 4, 1 )];
+    if ( wordA != wordB ) return false;
+    mashWord |= wordA | wordB;
+    wordA = aWPtr[indexWord( 4, 0 )];
+    wordB = bWPtr[indexWord( 4, 0 )];
+    return
+        (wordA == wordB)
+            && (! possibleOppositeZeros || ((mashWord | wordA | wordB) == 0));
+
+}
+
+#endif
+
diff --git a/ext/softfloat/f128M_le.c b/ext/softfloat/f128M_le.c
new file mode 100644
index 0000000000..ffdd6aecdc
--- /dev/null
+++ b/ext/softfloat/f128M_le.c
@@ -0,0 +1,93 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "softfloat.h"
+
+#ifdef SOFTFLOAT_FAST_INT64
+
+bool f128M_le( const float128_t *aPtr, const float128_t *bPtr )
+{
+
+    return f128_le( *aPtr, *bPtr );
+
+}
+
+#else
+
+bool f128M_le( const float128_t *aPtr, const float128_t *bPtr )
+{
+    const uint32_t *aWPtr, *bWPtr;
+    uint32_t uiA96, uiB96;
+    bool signA, signB;
+    uint32_t wordA, wordB;
+
+    aWPtr = (const uint32_t *) aPtr;
+    bWPtr = (const uint32_t *) bPtr;
+    if ( softfloat_isNaNF128M( aWPtr ) || softfloat_isNaNF128M( bWPtr ) ) {
+        softfloat_raiseFlags( softfloat_flag_invalid );
+        return false;
+    }
+    uiA96 = aWPtr[indexWordHi( 4 )];
+    uiB96 = bWPtr[indexWordHi( 4 )];
+    signA = signF128UI96( uiA96 );
+    signB = signF128UI96( uiB96 );
+    if ( signA != signB ) {
+        if ( signA ) return true;
+        if ( (uiA96 | uiB96) & 0x7FFFFFFF ) return false;
+        wordA = aWPtr[indexWord( 4, 2 )];
+        wordB = bWPtr[indexWord( 4, 2 )];
+        if ( wordA | wordB ) return false;
+        wordA = aWPtr[indexWord( 4, 1 )];
+        wordB = bWPtr[indexWord( 4, 1 )];
+        if ( wordA | wordB ) return false;
+        wordA = aWPtr[indexWord( 4, 0 )];
+        wordB = bWPtr[indexWord( 4, 0 )];
+        return ((wordA | wordB) == 0);
+    }
+    if ( signA ) {
+        aWPtr = (const uint32_t *) bPtr;
+        bWPtr = (const uint32_t *) aPtr;
+    }
+    return (softfloat_compare128M( aWPtr, bWPtr ) <= 0);
+
+}
+
+#endif
+
diff --git a/ext/softfloat/f128M_le_quiet.c b/ext/softfloat/f128M_le_quiet.c
new file mode 100644
index 0000000000..11927e6ca4
--- /dev/null
+++ b/ext/softfloat/f128M_le_quiet.c
@@ -0,0 +1,96 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+#ifdef SOFTFLOAT_FAST_INT64
+
+bool f128M_le_quiet( const float128_t *aPtr, const float128_t *bPtr )
+{
+
+    return f128_le_quiet( *aPtr, *bPtr );
+
+}
+
+#else
+
+bool f128M_le_quiet( const float128_t *aPtr, const float128_t *bPtr )
+{
+    const uint32_t *aWPtr, *bWPtr;
+    uint32_t uiA96, uiB96;
+    bool signA, signB;
+    uint32_t wordA, wordB;
+
+    aWPtr = (const uint32_t *) aPtr;
+    bWPtr = (const uint32_t *) bPtr;
+    if ( softfloat_isNaNF128M( aWPtr ) || softfloat_isNaNF128M( bWPtr ) ) {
+        if ( f128M_isSignalingNaN( aPtr ) || f128M_isSignalingNaN( bPtr ) ) {
+            softfloat_raiseFlags( softfloat_flag_invalid );
+        }
+        return false;
+    }
+    uiA96 = aWPtr[indexWordHi( 4 )];
+    uiB96 = bWPtr[indexWordHi( 4 )];
+    signA = signF128UI96( uiA96 );
+    signB = signF128UI96( uiB96 );
+    if ( signA != signB ) {
+        if ( signA ) return true;
+        if ( (uiA96 | uiB96) & 0x7FFFFFFF ) return false;
+        wordA = aWPtr[indexWord( 4, 2 )];
+        wordB = bWPtr[indexWord( 4, 2 )];
+        if ( wordA | wordB ) return false;
+        wordA = aWPtr[indexWord( 4, 1 )];
+        wordB = bWPtr[indexWord( 4, 1 )];
+        if ( wordA | wordB ) return false;
+        wordA = aWPtr[indexWord( 4, 0 )];
+        wordB = bWPtr[indexWord( 4, 0 )];
+        return ((wordA | wordB) == 0);
+    }
+    if ( signA ) {
+        aWPtr = (const uint32_t *) bPtr;
+        bWPtr = (const uint32_t *) aPtr;
+    }
+    return (softfloat_compare128M( aWPtr, bWPtr ) <= 0);
+
+}
+
+#endif
+
diff --git a/ext/softfloat/f128M_lt.c b/ext/softfloat/f128M_lt.c
new file mode 100644
index 0000000000..1b987f0d0f
--- /dev/null
+++ b/ext/softfloat/f128M_lt.c
@@ -0,0 +1,93 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "softfloat.h"
+
+#ifdef SOFTFLOAT_FAST_INT64
+
+bool f128M_lt( const float128_t *aPtr, const float128_t *bPtr )
+{
+
+    return f128_lt( *aPtr, *bPtr );
+
+}
+
+#else
+
+bool f128M_lt( const float128_t *aPtr, const float128_t *bPtr )
+{
+    const uint32_t *aWPtr, *bWPtr;
+    uint32_t uiA96, uiB96;
+    bool signA, signB;
+    uint32_t wordA, wordB;
+
+    aWPtr = (const uint32_t *) aPtr;
+    bWPtr = (const uint32_t *) bPtr;
+    if ( softfloat_isNaNF128M( aWPtr ) || softfloat_isNaNF128M( bWPtr ) ) {
+        softfloat_raiseFlags( softfloat_flag_invalid );
+        return false;
+    }
+    uiA96 = aWPtr[indexWordHi( 4 )];
+    uiB96 = bWPtr[indexWordHi( 4 )];
+    signA = signF128UI96( uiA96 );
+    signB = signF128UI96( uiB96 );
+    if ( signA != signB ) {
+        if ( signB ) return false;
+        if ( (uiA96 | uiB96) & 0x7FFFFFFF ) return true;
+        wordA = aWPtr[indexWord( 4, 2 )];
+        wordB = bWPtr[indexWord( 4, 2 )];
+        if ( wordA | wordB ) return true;
+        wordA = aWPtr[indexWord( 4, 1 )];
+        wordB = bWPtr[indexWord( 4, 1 )];
+        if ( wordA | wordB ) return true;
+        wordA = aWPtr[indexWord( 4, 0 )];
+        wordB = bWPtr[indexWord( 4, 0 )];
+        return ((wordA | wordB) != 0);
+    }
+    if ( signA ) {
+        aWPtr = (const uint32_t *) bPtr;
+        bWPtr = (const uint32_t *) aPtr;
+    }
+    return (softfloat_compare128M( aWPtr, bWPtr ) < 0);
+
+}
+
+#endif
+
diff --git a/ext/softfloat/f128M_lt_quiet.c b/ext/softfloat/f128M_lt_quiet.c
new file mode 100644
index 0000000000..4beff04af5
--- /dev/null
+++ b/ext/softfloat/f128M_lt_quiet.c
@@ -0,0 +1,96 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+#ifdef SOFTFLOAT_FAST_INT64
+
+bool f128M_lt_quiet( const float128_t *aPtr, const float128_t *bPtr )
+{
+
+    return f128_lt_quiet( *aPtr, *bPtr );
+
+}
+
+#else
+
+bool f128M_lt_quiet( const float128_t *aPtr, const float128_t *bPtr )
+{
+    const uint32_t *aWPtr, *bWPtr;
+    uint32_t uiA96, uiB96;
+    bool signA, signB;
+    uint32_t wordA, wordB;
+
+    aWPtr = (const uint32_t *) aPtr;
+    bWPtr = (const uint32_t *) bPtr;
+    if ( softfloat_isNaNF128M( aWPtr ) || softfloat_isNaNF128M( bWPtr ) ) {
+        if ( f128M_isSignalingNaN( aPtr ) || f128M_isSignalingNaN( bPtr ) ) {
+            softfloat_raiseFlags( softfloat_flag_invalid );
+        }
+        return false;
+    }
+    uiA96 = aWPtr[indexWordHi( 4 )];
+    uiB96 = bWPtr[indexWordHi( 4 )];
+    signA = signF128UI96( uiA96 );
+    signB = signF128UI96( uiB96 );
+    if ( signA != signB ) {
+        if ( signB ) return false;
+        if ( (uiA96 | uiB96) & 0x7FFFFFFF ) return true;
+        wordA = aWPtr[indexWord( 4, 2 )];
+        wordB = bWPtr[indexWord( 4, 2 )];
+        if ( wordA | wordB ) return true;
+        wordA = aWPtr[indexWord( 4, 1 )];
+        wordB = bWPtr[indexWord( 4, 1 )];
+        if ( wordA | wordB ) return true;
+        wordA = aWPtr[indexWord( 4, 0 )];
+        wordB = bWPtr[indexWord( 4, 0 )];
+        return ((wordA | wordB) != 0);
+    }
+    if ( signA ) {
+        aWPtr = (const uint32_t *) bPtr;
+        bWPtr = (const uint32_t *) aPtr;
+    }
+    return (softfloat_compare128M( aWPtr, bWPtr ) < 0);
+
+}
+
+#endif
+
diff --git a/ext/softfloat/f128M_mul.c b/ext/softfloat/f128M_mul.c
new file mode 100644
index 0000000000..3215164f74
--- /dev/null
+++ b/ext/softfloat/f128M_mul.c
@@ -0,0 +1,158 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of
+California.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+#ifdef SOFTFLOAT_FAST_INT64
+
+void
+ f128M_mul( const float128_t *aPtr, const float128_t *bPtr, float128_t *zPtr )
+{
+
+    *zPtr = f128_mul( *aPtr, *bPtr );
+
+}
+
+#else
+
+void
+ f128M_mul( const float128_t *aPtr, const float128_t *bPtr, float128_t *zPtr )
+{
+    const uint32_t *aWPtr, *bWPtr;
+    uint32_t *zWPtr;
+    uint32_t uiA96;
+    int32_t expA;
+    uint32_t uiB96;
+    int32_t expB;
+    bool signZ;
+    const uint32_t *ptr;
+    uint32_t uiZ96, sigA[4];
+    uint_fast8_t shiftDist;
+    uint32_t sigB[4];
+    int32_t expZ;
+    uint32_t sigProd[8], *extSigZPtr;
+
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    aWPtr = (const uint32_t *) aPtr;
+    bWPtr = (const uint32_t *) bPtr;
+    zWPtr = (uint32_t *) zPtr;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    uiA96 = aWPtr[indexWordHi( 4 )];
+    expA = expF128UI96( uiA96 );
+    uiB96 = bWPtr[indexWordHi( 4 )];
+    expB = expF128UI96( uiB96 );
+    signZ = signF128UI96( uiA96 ) ^ signF128UI96( uiB96 );
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    if ( (expA == 0x7FFF) || (expB == 0x7FFF) ) {
+        if ( softfloat_tryPropagateNaNF128M( aWPtr, bWPtr, zWPtr ) ) return;
+        ptr = aWPtr;
+        if ( ! expA ) goto possiblyInvalid;
+        if ( ! expB ) {
+            ptr = bWPtr;
+     possiblyInvalid:
+            if (
+                ! fracF128UI96( ptr[indexWordHi( 4 )] )
+                    && ! (ptr[indexWord( 4, 2 )] | ptr[indexWord( 4, 1 )]
+                              | ptr[indexWord( 4, 0 )])
+            ) {
+                softfloat_invalidF128M( zWPtr );
+                return;
+            }
+        }
+        uiZ96 = packToF128UI96( signZ, 0x7FFF, 0 );
+        goto uiZ96;
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    if ( expA ) {
+        sigA[indexWordHi( 4 )] = fracF128UI96( uiA96 ) | 0x00010000;
+        sigA[indexWord( 4, 2 )] = aWPtr[indexWord( 4, 2 )];
+        sigA[indexWord( 4, 1 )] = aWPtr[indexWord( 4, 1 )];
+        sigA[indexWord( 4, 0 )] = aWPtr[indexWord( 4, 0 )];
+    } else {
+        expA = softfloat_shiftNormSigF128M( aWPtr, 0, sigA );
+        if ( expA == -128 ) goto zero;
+    }
+    if ( expB ) {
+        sigB[indexWordHi( 4 )] = fracF128UI96( uiB96 ) | 0x00010000;
+        sigB[indexWord( 4, 2 )] = bWPtr[indexWord( 4, 2 )];
+        sigB[indexWord( 4, 1 )] = bWPtr[indexWord( 4, 1 )];
+        sigB[indexWord( 4, 0 )] = bWPtr[indexWord( 4, 0 )];
+    } else {
+        expB = softfloat_shiftNormSigF128M( bWPtr, 0, sigB );
+        if ( expB == -128 ) goto zero;
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    expZ = expA + expB - 0x4000;
+    softfloat_mul128MTo256M( sigA, sigB, sigProd );
+    if (
+        sigProd[indexWord( 8, 2 )]
+            || (sigProd[indexWord( 8, 1 )] | sigProd[indexWord( 8, 0 )])
+    ) {
+        sigProd[indexWord( 8, 3 )] |= 1;
+    }
+    extSigZPtr = &sigProd[indexMultiwordHi( 8, 5 )];
+    shiftDist = 16;
+    if ( extSigZPtr[indexWordHi( 5 )] & 2 ) {
+        ++expZ;
+        shiftDist = 15;
+    }
+    softfloat_shortShiftLeft160M( extSigZPtr, shiftDist, extSigZPtr );
+    softfloat_roundPackMToF128M( signZ, expZ, extSigZPtr, zWPtr );
+    return;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+ zero:
+    uiZ96 = packToF128UI96( signZ, 0, 0 );
+ uiZ96:
+    zWPtr[indexWordHi( 4 )] = uiZ96;
+    zWPtr[indexWord( 4, 2 )] = 0;
+    zWPtr[indexWord( 4, 1 )] = 0;
+    zWPtr[indexWord( 4, 0 )] = 0;
+
+}
+
+#endif
+
diff --git a/ext/softfloat/f128M_mulAdd.c b/ext/softfloat/f128M_mulAdd.c
new file mode 100644
index 0000000000..c092dc3cf0
--- /dev/null
+++ b/ext/softfloat/f128M_mulAdd.c
@@ -0,0 +1,92 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "softfloat.h"
+
+#ifdef SOFTFLOAT_FAST_INT64
+
+void
+ f128M_mulAdd(
+     const float128_t *aPtr,
+     const float128_t *bPtr,
+     const float128_t *cPtr,
+     float128_t *zPtr
+ )
+{
+    const uint64_t *aWPtr, *bWPtr, *cWPtr;
+    uint_fast64_t uiA64, uiA0;
+    uint_fast64_t uiB64, uiB0;
+    uint_fast64_t uiC64, uiC0;
+
+    aWPtr = (const uint64_t *) aPtr;
+    bWPtr = (const uint64_t *) bPtr;
+    cWPtr = (const uint64_t *) cPtr;
+    uiA64 = aWPtr[indexWord( 2, 1 )];
+    uiA0  = aWPtr[indexWord( 2, 0 )];
+    uiB64 = bWPtr[indexWord( 2, 1 )];
+    uiB0  = bWPtr[indexWord( 2, 0 )];
+    uiC64 = cWPtr[indexWord( 2, 1 )];
+    uiC0  = cWPtr[indexWord( 2, 0 )];
+    *zPtr = softfloat_mulAddF128( uiA64, uiA0, uiB64, uiB0, uiC64, uiC0, 0 );
+
+}
+
+#else
+
+void
+ f128M_mulAdd(
+     const float128_t *aPtr,
+     const float128_t *bPtr,
+     const float128_t *cPtr,
+     float128_t *zPtr
+ )
+{
+
+    softfloat_mulAddF128M(
+        (const uint32_t *) aPtr,
+        (const uint32_t *) bPtr,
+        (const uint32_t *) cPtr,
+        (uint32_t *) zPtr,
+        0
+    );
+
+}
+
+#endif
+
diff --git a/ext/softfloat/f128M_rem.c b/ext/softfloat/f128M_rem.c
new file mode 100644
index 0000000000..2a403714e5
--- /dev/null
+++ b/ext/softfloat/f128M_rem.c
@@ -0,0 +1,182 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+#ifdef SOFTFLOAT_FAST_INT64
+
+void
+ f128M_rem( const float128_t *aPtr, const float128_t *bPtr, float128_t *zPtr )
+{
+
+    *zPtr = f128_rem( *aPtr, *bPtr );
+
+}
+
+#else
+
+void
+ f128M_rem( const float128_t *aPtr, const float128_t *bPtr, float128_t *zPtr )
+{
+    const uint32_t *aWPtr, *bWPtr;
+    uint32_t *zWPtr, uiA96;
+    int32_t expA, expB;
+    uint32_t x[4], rem1[5], *remPtr;
+    bool signRem;
+    int32_t expDiff;
+    uint32_t q, recip32;
+    uint64_t q64;
+    uint32_t rem2[5], *altRemPtr, *newRemPtr, wordMeanRem;
+
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    aWPtr = (const uint32_t *) aPtr;
+    bWPtr = (const uint32_t *) bPtr;
+    zWPtr = (uint32_t *) zPtr;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    uiA96 = aWPtr[indexWordHi( 4 )];
+    expA = expF128UI96( uiA96 );
+    expB = expF128UI96( bWPtr[indexWordHi( 4 )] );
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    if ( (expA == 0x7FFF) || (expB == 0x7FFF) ) {
+        if ( softfloat_tryPropagateNaNF128M( aWPtr, bWPtr, zWPtr ) ) return;
+        if ( expA == 0x7FFF ) goto invalid;
+        goto copyA;
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    if ( expA < expB - 1 ) goto copyA;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    expB = softfloat_shiftNormSigF128M( bWPtr, 13, x );
+    if ( expB == -128 ) goto invalid;
+    remPtr = &rem1[indexMultiwordLo( 5, 4 )];
+    expA = softfloat_shiftNormSigF128M( aWPtr, 13, remPtr );
+    if ( expA == -128 ) goto copyA;
+    signRem = signF128UI96( uiA96 );
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    expDiff = expA - expB;
+    if ( expDiff < 1 ) {
+        if ( expDiff < -1 ) goto copyA;
+        if ( expDiff ) {
+            --expB;
+            softfloat_add128M( x, x, x );
+            q = 0;
+        } else {
+            q = (softfloat_compare128M( x, remPtr ) <= 0);
+            if ( q ) softfloat_sub128M( remPtr, x, remPtr );
+        }
+    } else {
+        recip32 =
+            softfloat_approxRecip32_1(
+                ((uint64_t) x[indexWord( 4, 3 )]<<32 | x[indexWord( 4, 2 )])
+                    >>30
+            );
+        expDiff -= 30;
+        for (;;) {
+            q64 = (uint64_t) remPtr[indexWordHi( 4 )] * recip32;
+            if ( expDiff < 0 ) break;
+            q = (q64 + 0x80000000)>>32;
+            softfloat_remStep128MBy32( remPtr, 29, x, q, remPtr );
+            if ( remPtr[indexWordHi( 4 )] & 0x80000000 ) {
+                softfloat_add128M( remPtr, x, remPtr );
+            }
+            expDiff -= 29;
+        }
+        /*--------------------------------------------------------------------
+        | (`expDiff' cannot be less than -29 here.)
+        *--------------------------------------------------------------------*/
+        q = (uint32_t) (q64>>32)>>(~expDiff & 31);
+        softfloat_remStep128MBy32( remPtr, expDiff + 30, x, q, remPtr );
+        if ( remPtr[indexWordHi( 4 )] & 0x80000000 ) {
+            altRemPtr = &rem2[indexMultiwordLo( 5, 4 )];
+            softfloat_add128M( remPtr, x, altRemPtr );
+            goto selectRem;
+        }
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    altRemPtr = &rem2[indexMultiwordLo( 5, 4 )];
+    do {
+        ++q;
+        newRemPtr = altRemPtr;
+        softfloat_sub128M( remPtr, x, newRemPtr );
+        altRemPtr = remPtr;
+        remPtr = newRemPtr;
+    } while ( ! (remPtr[indexWordHi( 4 )] & 0x80000000) );
+ selectRem:
+    softfloat_add128M( remPtr, altRemPtr, x );
+    wordMeanRem = x[indexWordHi( 4 )];
+    if (
+        (wordMeanRem & 0x80000000)
+            || (! wordMeanRem && (q & 1) && ! x[indexWord( 4, 0 )]
+                    && ! (x[indexWord( 4, 2 )] | x[indexWord( 4, 1 )]))
+    ) {
+        remPtr = altRemPtr;
+    }
+    if ( remPtr[indexWordHi( 4 )] & 0x80000000 ) {
+        signRem = ! signRem;
+        softfloat_negX128M( remPtr );
+    }
+    remPtr -= indexMultiwordLo( 5, 4 );
+    remPtr[indexWordHi( 5 )] = 0;
+    softfloat_normRoundPackMToF128M( signRem, expB + 18, remPtr, zWPtr );
+    return;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+ invalid:
+    softfloat_invalidF128M( zWPtr );
+    return;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+ copyA:
+    zWPtr[indexWordHi( 4 )] = uiA96;
+    zWPtr[indexWord( 4, 2 )] = aWPtr[indexWord( 4, 2 )];
+    zWPtr[indexWord( 4, 1 )] = aWPtr[indexWord( 4, 1 )];
+    zWPtr[indexWord( 4, 0 )] = aWPtr[indexWord( 4, 0 )];
+
+}
+
+#endif
+
diff --git a/ext/softfloat/f128M_roundToInt.c b/ext/softfloat/f128M_roundToInt.c
new file mode 100644
index 0000000000..762147ea4b
--- /dev/null
+++ b/ext/softfloat/f128M_roundToInt.c
@@ -0,0 +1,216 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014, 2017 The Regents of the University of
+California.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+#ifdef SOFTFLOAT_FAST_INT64
+
+void
+ f128M_roundToInt(
+     const float128_t *aPtr,
+     uint_fast8_t roundingMode,
+     bool exact,
+     float128_t *zPtr
+ )
+{
+
+    *zPtr = f128_roundToInt( *aPtr, roundingMode, exact );
+
+}
+
+#else
+
+void
+ f128M_roundToInt(
+     const float128_t *aPtr,
+     uint_fast8_t roundingMode,
+     bool exact,
+     float128_t *zPtr
+ )
+{
+    const uint32_t *aWPtr;
+    uint32_t *zWPtr;
+    uint32_t ui96;
+    int32_t exp;
+    uint32_t sigExtra;
+    bool sign;
+    uint_fast8_t bitPos;
+    bool roundNear;
+    unsigned int index, lastIndex;
+    bool extra;
+    uint32_t wordA, bit, wordZ;
+    uint_fast8_t carry;
+    uint32_t extrasMask;
+
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    aWPtr = (const uint32_t *) aPtr;
+    zWPtr = (uint32_t *) zPtr;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    ui96 = aWPtr[indexWordHi( 4 )];
+    exp = expF128UI96( ui96 );
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    if ( exp < 0x3FFF ) {
+        zWPtr[indexWord( 4, 2 )] = 0;
+        zWPtr[indexWord( 4, 1 )] = 0;
+        zWPtr[indexWord( 4, 0 )] = 0;
+        sigExtra = aWPtr[indexWord( 4, 2 )];
+        if ( ! sigExtra ) {
+            sigExtra = aWPtr[indexWord( 4, 1 )] | aWPtr[indexWord( 4, 0 )];
+        }
+        if ( ! sigExtra && ! (ui96 & 0x7FFFFFFF) ) goto ui96;
+        if ( exact ) softfloat_exceptionFlags |= softfloat_flag_inexact;
+        sign = signF128UI96( ui96 );
+        switch ( roundingMode ) {
+         case softfloat_round_near_even:
+            if ( ! fracF128UI96( ui96 ) && ! sigExtra ) break;
+         case softfloat_round_near_maxMag:
+            if ( exp == 0x3FFE ) goto mag1;
+            break;
+         case softfloat_round_min:
+            if ( sign ) goto mag1;
+            break;
+         case softfloat_round_max:
+            if ( ! sign ) goto mag1;
+            break;
+        }
+        ui96 = packToF128UI96( sign, 0, 0 );
+        goto ui96;
+     mag1:
+        ui96 = packToF128UI96( sign, 0x3FFF, 0 );
+        goto ui96;
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    if ( 0x406F <= exp ) {
+        if (
+            (exp == 0x7FFF)
+                && (fracF128UI96( ui96 )
+                        || (aWPtr[indexWord( 4, 2 )] | aWPtr[indexWord( 4, 1 )]
+                                | aWPtr[indexWord( 4, 0 )]))
+        ) {
+            softfloat_propagateNaNF128M( aWPtr, 0, zWPtr );
+            return;
+        }
+        zWPtr[indexWord( 4, 2 )] = aWPtr[indexWord( 4, 2 )];
+        zWPtr[indexWord( 4, 1 )] = aWPtr[indexWord( 4, 1 )];
+        zWPtr[indexWord( 4, 0 )] = aWPtr[indexWord( 4, 0 )];
+        goto ui96;
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    bitPos = 0x406F - exp;
+    roundNear =
+           (roundingMode == softfloat_round_near_maxMag)
+        || (roundingMode == softfloat_round_near_even);
+    bitPos -= roundNear;
+    index = indexWordLo( 4 );
+    lastIndex = indexWordHi( 4 );
+    extra = 0;
+    for (;;) {
+        wordA = aWPtr[index];
+        if ( bitPos < 32 ) break;
+        if ( wordA ) extra = 1;
+        zWPtr[index] = 0;
+        index += wordIncr;
+        bitPos -= 32;
+    }
+    bit = (uint32_t) 1<<bitPos;
+    if ( roundNear ) {
+        wordZ = wordA + bit;
+        carry = (wordZ < wordA);
+        bit <<= 1;
+        extrasMask = bit - 1;
+        if (
+            (roundingMode == softfloat_round_near_even)
+                && ! extra && ! (wordZ & extrasMask)
+        ) {
+            if ( ! bit ) {
+                zWPtr[index] = wordZ;
+                index += wordIncr;
+                wordZ = aWPtr[index] + carry;
+                carry &= ! wordZ;
+                zWPtr[index] = wordZ & ~1;
+                goto propagateCarry;
+            }
+            wordZ &= ~bit;
+        }
+    } else {
+        extrasMask = bit - 1;
+        wordZ = wordA;
+        carry = 0;
+        if (
+            roundingMode
+                == (signF128UI96( ui96 ) ? softfloat_round_min
+                        : softfloat_round_max)
+        ) {
+            if ( extra || (wordA & extrasMask) ) {
+                wordZ += bit;
+                carry = (wordZ < wordA);
+            }
+        }
+    }
+    wordZ &= ~extrasMask;
+    zWPtr[index] = wordZ;
+ propagateCarry:
+    while ( index != lastIndex ) {
+        index += wordIncr;
+        wordZ = aWPtr[index] + carry;
+        zWPtr[index] = wordZ;
+        carry &= ! wordZ;
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    if ( exact && (softfloat_compare128M( aWPtr, zWPtr ) != 0) ) {
+        softfloat_exceptionFlags |= softfloat_flag_inexact;
+    }
+    return;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+ ui96:
+    zWPtr[indexWordHi( 4 )] = ui96;
+
+}
+
+#endif
+
diff --git a/ext/softfloat/f128M_sqrt.c b/ext/softfloat/f128M_sqrt.c
new file mode 100644
index 0000000000..e1283d4cad
--- /dev/null
+++ b/ext/softfloat/f128M_sqrt.c
@@ -0,0 +1,228 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014, 2017 The Regents of the University of
+California.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+#ifdef SOFTFLOAT_FAST_INT64
+
+void f128M_sqrt( const float128_t *aPtr, float128_t *zPtr )
+{
+
+    *zPtr = f128_sqrt( *aPtr );
+
+}
+
+#else
+
+void f128M_sqrt( const float128_t *aPtr, float128_t *zPtr )
+{
+    const uint32_t *aWPtr;
+    uint32_t *zWPtr;
+    uint32_t uiA96;
+    bool signA;
+    int32_t rawExpA;
+    uint32_t rem[6];
+    int32_t expA, expZ;
+    uint64_t rem64;
+    uint32_t sig32A, recipSqrt32, sig32Z, qs[3], q;
+    uint64_t sig64Z;
+    uint32_t term[5];
+    uint64_t x64;
+    uint32_t y[5], rem32;
+
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    aWPtr = (const uint32_t *) aPtr;
+    zWPtr = (uint32_t *) zPtr;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    uiA96 = aWPtr[indexWordHi( 4 )];
+    signA = signF128UI96( uiA96 );
+    rawExpA  = expF128UI96( uiA96 );
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    if ( rawExpA == 0x7FFF ) {
+        if (
+            fracF128UI96( uiA96 )
+                || (aWPtr[indexWord( 4, 2 )] | aWPtr[indexWord( 4, 1 )]
+                        | aWPtr[indexWord( 4, 0 )])
+        ) {
+            softfloat_propagateNaNF128M( aWPtr, 0, zWPtr );
+            return;
+        }
+        if ( ! signA ) goto copyA;
+        goto invalid;
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    expA = softfloat_shiftNormSigF128M( aWPtr, 13 - (rawExpA & 1), rem );
+    if ( expA == -128 ) goto copyA;
+    if ( signA ) goto invalid;
+    /*------------------------------------------------------------------------
+    | (`sig32Z' is guaranteed to be a lower bound on the square root of
+    | `sig32A', which makes `sig32Z' also a lower bound on the square root of
+    | `sigA'.)
+    *------------------------------------------------------------------------*/
+    expZ = ((expA - 0x3FFF)>>1) + 0x3FFE;
+    expA &= 1;
+    rem64 = (uint64_t) rem[indexWord( 4, 3 )]<<32 | rem[indexWord( 4, 2 )];
+    if ( expA ) {
+        if ( ! rawExpA ) {
+            softfloat_shortShiftRight128M( rem, 1, rem );
+            rem64 >>= 1;
+        }
+        sig32A = rem64>>29;
+    } else {
+        sig32A = rem64>>30;
+    }
+    recipSqrt32 = softfloat_approxRecipSqrt32_1( expA, sig32A );
+    sig32Z = ((uint64_t) sig32A * recipSqrt32)>>32;
+    if ( expA ) sig32Z >>= 1;
+    qs[2] = sig32Z;
+    rem64 -= (uint64_t) sig32Z * sig32Z;
+    rem[indexWord( 4, 3 )] = rem64>>32;
+    rem[indexWord( 4, 2 )] = rem64;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    q = ((uint32_t) (rem64>>2) * (uint64_t) recipSqrt32)>>32;
+    sig64Z = ((uint64_t) sig32Z<<32) + ((uint64_t) q<<3);
+    term[indexWord( 4, 3 )] = 0;
+    term[indexWord( 4, 0 )] = 0;
+    /*------------------------------------------------------------------------
+    | (Repeating this loop is a rare occurrence.)
+    *------------------------------------------------------------------------*/
+    for (;;) {
+        x64 = ((uint64_t) sig32Z<<32) + sig64Z;
+        term[indexWord( 4, 2 )] = x64>>32;
+        term[indexWord( 4, 1 )] = x64;
+        softfloat_remStep128MBy32( rem, 29, term, q, y );
+        rem32 = y[indexWord( 4, 3 )];
+        if ( ! (rem32 & 0x80000000) ) break;
+        --q;
+        sig64Z -= 1<<3;
+    }
+    qs[1] = q;
+    rem64 = (uint64_t) rem32<<32 | y[indexWord( 4, 2 )];
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    q = ((uint32_t) (rem64>>2) * (uint64_t) recipSqrt32)>>32;
+    if ( rem64>>34 ) q += recipSqrt32;
+    sig64Z <<= 1;
+    /*------------------------------------------------------------------------
+    | (Repeating this loop is a rare occurrence.)
+    *------------------------------------------------------------------------*/
+    for (;;) {
+        x64 = sig64Z + (q>>26);
+        term[indexWord( 4, 2 )] = x64>>32;
+        term[indexWord( 4, 1 )] = x64;
+        term[indexWord( 4, 0 )] = q<<6;
+        softfloat_remStep128MBy32(
+            y, 29, term, q, &rem[indexMultiwordHi( 6, 4 )] );
+        rem32 = rem[indexWordHi( 6 )];
+        if ( ! (rem32 & 0x80000000) ) break;
+        --q;
+    }
+    qs[0] = q;
+    rem64 = (uint64_t) rem32<<32 | rem[indexWord( 6, 4 )];
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    q = (((uint32_t) (rem64>>2) * (uint64_t) recipSqrt32)>>32) + 2;
+    if ( rem64>>34 ) q += recipSqrt32;
+    x64 = (uint64_t) q<<27;
+    y[indexWord( 5, 0 )] = x64;
+    x64 = ((uint64_t) qs[0]<<24) + (x64>>32);
+    y[indexWord( 5, 1 )] = x64;
+    x64 = ((uint64_t) qs[1]<<21) + (x64>>32);
+    y[indexWord( 5, 2 )] = x64;
+    x64 = ((uint64_t) qs[2]<<18) + (x64>>32);
+    y[indexWord( 5, 3 )] = x64;
+    y[indexWord( 5, 4 )] = x64>>32;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    if ( (q & 0xF) <= 2 ) {
+        q &= ~3;
+        y[indexWordLo( 5 )] = q<<27;
+        term[indexWord( 5, 4 )] = 0;
+        term[indexWord( 5, 3 )] = 0;
+        term[indexWord( 5, 2 )] = 0;
+        term[indexWord( 5, 1 )] = q>>6;
+        term[indexWord( 5, 0 )] = q<<26;
+        softfloat_sub160M( y, term, term );
+        rem[indexWord( 6, 1 )] = 0;
+        rem[indexWord( 6, 0 )] = 0;
+        softfloat_remStep160MBy32(
+            &rem[indexMultiwordLo( 6, 5 )],
+            14,
+            term,
+            q,
+            &rem[indexMultiwordLo( 6, 5 )]
+        );
+        rem32 = rem[indexWord( 6, 4 )];
+        if ( rem32 & 0x80000000 ) {
+            softfloat_sub1X160M( y );
+        } else {
+            if (
+                rem32 || rem[indexWord( 6, 0 )] || rem[indexWord( 6, 1 )]
+                    || (rem[indexWord( 6, 3 )] | rem[indexWord( 6, 2 )])
+            ) {
+                y[indexWordLo( 5 )] |= 1;
+            }
+        }
+    }
+    softfloat_roundPackMToF128M( 0, expZ, y, zWPtr );
+    return;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+ invalid:
+    softfloat_invalidF128M( zWPtr );
+    return;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+ copyA:
+    zWPtr[indexWordHi( 4 )] = uiA96;
+    zWPtr[indexWord( 4, 2 )] = aWPtr[indexWord( 4, 2 )];
+    zWPtr[indexWord( 4, 1 )] = aWPtr[indexWord( 4, 1 )];
+    zWPtr[indexWord( 4, 0 )] = aWPtr[indexWord( 4, 0 )];
+
+}
+
+#endif
+
diff --git a/ext/softfloat/f128M_sub.c b/ext/softfloat/f128M_sub.c
new file mode 100644
index 0000000000..59aef6479a
--- /dev/null
+++ b/ext/softfloat/f128M_sub.c
@@ -0,0 +1,97 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "softfloat.h"
+
+#ifdef SOFTFLOAT_FAST_INT64
+
+void
+ f128M_sub( const float128_t *aPtr, const float128_t *bPtr, float128_t *zPtr )
+{
+    const uint64_t *aWPtr, *bWPtr;
+    uint_fast64_t uiA64, uiA0;
+    bool signA;
+    uint_fast64_t uiB64, uiB0;
+    bool signB;
+#if ! defined INLINE_LEVEL || (INLINE_LEVEL < 2)
+    float128_t
+        (*magsFuncPtr)(
+            uint_fast64_t, uint_fast64_t, uint_fast64_t, uint_fast64_t, bool );
+#endif
+
+    aWPtr = (const uint64_t *) aPtr;
+    bWPtr = (const uint64_t *) bPtr;
+    uiA64 = aWPtr[indexWord( 2, 1 )];
+    uiA0  = aWPtr[indexWord( 2, 0 )];
+    signA = signF128UI64( uiA64 );
+    uiB64 = bWPtr[indexWord( 2, 1 )];
+    uiB0  = bWPtr[indexWord( 2, 0 )];
+    signB = signF128UI64( uiB64 );
+#if defined INLINE_LEVEL && (2 <= INLINE_LEVEL)
+    if ( signA == signB ) {
+        *zPtr = softfloat_subMagsF128( uiA64, uiA0, uiB64, uiB0, signA );
+    } else {
+        *zPtr = softfloat_addMagsF128( uiA64, uiA0, uiB64, uiB0, signA );
+    }
+#else
+    magsFuncPtr =
+        (signA == signB) ? softfloat_subMagsF128 : softfloat_addMagsF128;
+    *zPtr = (*magsFuncPtr)( uiA64, uiA0, uiB64, uiB0, signA );
+#endif
+
+}
+
+#else
+
+void
+ f128M_sub( const float128_t *aPtr, const float128_t *bPtr, float128_t *zPtr )
+{
+
+    softfloat_addF128M(
+        (const uint32_t *) aPtr,
+        (const uint32_t *) bPtr,
+        (uint32_t *) zPtr,
+        true
+    );
+
+}
+
+#endif
+
diff --git a/ext/softfloat/f128M_to_extF80M.c b/ext/softfloat/f128M_to_extF80M.c
new file mode 100644
index 0000000000..2e844ca569
--- /dev/null
+++ b/ext/softfloat/f128M_to_extF80M.c
@@ -0,0 +1,101 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+#ifdef SOFTFLOAT_FAST_INT64
+
+void f128M_to_extF80M( const float128_t *aPtr, extFloat80_t *zPtr )
+{
+
+    *zPtr = f128_to_extF80( *aPtr );
+
+}
+
+#else
+
+void f128M_to_extF80M( const float128_t *aPtr, extFloat80_t *zPtr )
+{
+    const uint32_t *aWPtr;
+    struct extFloat80M *zSPtr;
+    uint32_t uiA96;
+    bool sign;
+    int32_t exp;
+    struct commonNaN commonNaN;
+    uint32_t sig[4];
+
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    aWPtr = (const uint32_t *) aPtr;
+    zSPtr = (struct extFloat80M *) zPtr;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    uiA96 = aWPtr[indexWordHi( 4 )];
+    sign = signF128UI96( uiA96 );
+    exp  = expF128UI96( uiA96 );
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    if ( exp == 0x7FFF ) {
+        if ( softfloat_isNaNF128M( aWPtr ) ) {
+            softfloat_f128MToCommonNaN( aWPtr, &commonNaN );
+            softfloat_commonNaNToExtF80M( &commonNaN, zSPtr );
+            return;
+        }
+        zSPtr->signExp = packToExtF80UI64( sign, 0x7FFF );
+        zSPtr->signif = UINT64_C( 0x8000000000000000 );
+        return;
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    exp = softfloat_shiftNormSigF128M( aWPtr, 15, sig );
+    if ( exp == -128 ) {
+        zSPtr->signExp = packToExtF80UI64( sign, 0 );
+        zSPtr->signif = 0;
+        return;
+    }
+    if ( sig[indexWord( 4, 0 )] ) sig[indexWord( 4, 1 )] |= 1;
+    softfloat_roundPackMToExtF80M(
+        sign, exp, &sig[indexMultiwordHi( 4, 3 )], 80, zSPtr );
+
+}
+
+#endif
+
diff --git a/ext/softfloat/f128M_to_f16.c b/ext/softfloat/f128M_to_f16.c
new file mode 100644
index 0000000000..e675c6400a
--- /dev/null
+++ b/ext/softfloat/f128M_to_f16.c
@@ -0,0 +1,113 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014, 2015 The Regents of the University of
+California.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+#ifdef SOFTFLOAT_FAST_INT64
+
+float16_t f128M_to_f16( const float128_t *aPtr )
+{
+
+    return f128_to_f16( *aPtr );
+
+}
+
+#else
+
+float16_t f128M_to_f16( const float128_t *aPtr )
+{
+    const uint32_t *aWPtr;
+    uint32_t uiA96;
+    bool sign;
+    int32_t exp;
+    uint32_t frac32;
+    struct commonNaN commonNaN;
+    uint16_t uiZ, frac16;
+    union ui16_f16 uZ;
+
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    aWPtr = (const uint32_t *) aPtr;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    uiA96 = aWPtr[indexWordHi( 4 )];
+    sign = signF128UI96( uiA96 );
+    exp  = expF128UI96( uiA96 );
+    frac32 =
+        fracF128UI96( uiA96 )
+            | ((aWPtr[indexWord( 4, 2 )] | aWPtr[indexWord( 4, 1 )]
+                    | aWPtr[indexWord( 4, 0 )])
+                   != 0);
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    if ( exp == 0x7FFF ) {
+        if ( frac32 ) {
+            softfloat_f128MToCommonNaN( aWPtr, &commonNaN );
+            uiZ = softfloat_commonNaNToF16UI( &commonNaN );
+        } else {
+            uiZ = packToF16UI( sign, 0x1F, 0 );
+        }
+        goto uiZ;
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    frac16 = frac32>>2 | (frac32 & 3);
+    if ( ! (exp | frac16) ) {
+        uiZ = packToF16UI( sign, 0, 0 );
+        goto uiZ;
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    exp -= 0x3FF1;
+    if ( sizeof (int_fast16_t) < sizeof (int32_t) ) {
+        if ( exp < -0x40 ) exp = -0x40;
+    }
+    return softfloat_roundPackToF16( sign, exp, frac16 | 0x4000 );
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+ uiZ:
+    uZ.ui = uiZ;
+    return uZ.f;
+
+}
+
+#endif
+
diff --git a/ext/softfloat/f128M_to_f32.c b/ext/softfloat/f128M_to_f32.c
new file mode 100644
index 0000000000..32df26ec5b
--- /dev/null
+++ b/ext/softfloat/f128M_to_f32.c
@@ -0,0 +1,109 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014, 2015 The Regents of the University of
+California.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+#ifdef SOFTFLOAT_FAST_INT64
+
+float32_t f128M_to_f32( const float128_t *aPtr )
+{
+
+    return f128_to_f32( *aPtr );
+
+}
+
+#else
+
+float32_t f128M_to_f32( const float128_t *aPtr )
+{
+    const uint32_t *aWPtr;
+    uint32_t uiA96;
+    bool sign;
+    int32_t exp;
+    uint64_t frac64;
+    struct commonNaN commonNaN;
+    uint32_t uiZ, frac32;
+    union ui32_f32 uZ;
+
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    aWPtr = (const uint32_t *) aPtr;
+    uiA96 = aWPtr[indexWordHi( 4 )];
+    sign = signF128UI96( uiA96 );
+    exp  = expF128UI96( uiA96 );
+    frac64 =
+        (uint64_t) fracF128UI96( uiA96 )<<32 | aWPtr[indexWord( 4, 2 )]
+            | ((aWPtr[indexWord( 4, 1 )] | aWPtr[indexWord( 4, 0 )]) != 0);
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    if ( exp == 0x7FFF ) {
+        if ( frac64 ) {
+            softfloat_f128MToCommonNaN( aWPtr, &commonNaN );
+            uiZ = softfloat_commonNaNToF32UI( &commonNaN );
+        } else {
+            uiZ = packToF32UI( sign, 0xFF, 0 );
+        }
+        goto uiZ;
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    frac32 = softfloat_shortShiftRightJam64( frac64, 18 );
+    if ( ! (exp | frac32) ) {
+        uiZ = packToF32UI( sign, 0, 0 );
+        goto uiZ;
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    exp -= 0x3F81;
+    if ( sizeof (int_fast16_t) < sizeof (int32_t) ) {
+        if ( exp < -0x1000 ) exp = -0x1000;
+    }
+    return softfloat_roundPackToF32( sign, exp, frac32 | 0x40000000 );
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+ uiZ:
+    uZ.ui = uiZ;
+    return uZ.f;
+
+}
+
+#endif
+
diff --git a/ext/softfloat/f128M_to_f64.c b/ext/softfloat/f128M_to_f64.c
new file mode 100644
index 0000000000..016a2d3d41
--- /dev/null
+++ b/ext/softfloat/f128M_to_f64.c
@@ -0,0 +1,112 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014, 2015 The Regents of the University of
+California.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+#ifdef SOFTFLOAT_FAST_INT64
+
+float64_t f128M_to_f64( const float128_t *aPtr )
+{
+
+    return f128_to_f64( *aPtr );
+
+}
+
+#else
+
+float64_t f128M_to_f64( const float128_t *aPtr )
+{
+    const uint32_t *aWPtr;
+    uint32_t uiA96;
+    bool sign;
+    int32_t exp;
+    uint64_t frac64;
+    struct commonNaN commonNaN;
+    uint64_t uiZ;
+    uint32_t frac32;
+    union ui64_f64 uZ;
+
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    aWPtr = (const uint32_t *) aPtr;
+    uiA96 = aWPtr[indexWordHi( 4 )];
+    sign = signF128UI96( uiA96 );
+    exp  = expF128UI96( uiA96 );
+    frac64 = (uint64_t) fracF128UI96( uiA96 )<<32 | aWPtr[indexWord( 4, 2 )];
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    if ( exp == 0x7FFF ) {
+        if ( frac64 || aWPtr[indexWord( 4, 1 )] | aWPtr[indexWord( 4, 0 )] ) {
+            softfloat_f128MToCommonNaN( aWPtr, &commonNaN );
+            uiZ = softfloat_commonNaNToF64UI( &commonNaN );
+        } else {
+            uiZ = packToF64UI( sign, 0x7FF, 0 );
+        }
+        goto uiZ;
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    frac32 = aWPtr[indexWord( 4, 1 )];
+    frac64 = frac64<<14 | frac32>>18;
+    if ( (frac32 & 0x0003FFFF) || aWPtr[indexWord( 4, 0 )] ) frac64 |= 1;
+    if ( ! (exp | frac64) ) {
+        uiZ = packToF64UI( sign, 0, 0 );
+        goto uiZ;
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    exp -= 0x3C01;
+    if ( sizeof (int_fast16_t) < sizeof (int32_t) ) {
+        if ( exp < -0x1000 ) exp = -0x1000;
+    }
+    return
+        softfloat_roundPackToF64(
+            sign, exp, frac64 | UINT64_C( 0x4000000000000000 ) );
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+ uiZ:
+    uZ.ui = uiZ;
+    return uZ.f;
+
+}
+
+#endif
+
diff --git a/ext/softfloat/f128M_to_i32.c b/ext/softfloat/f128M_to_i32.c
new file mode 100644
index 0000000000..9f63326f77
--- /dev/null
+++ b/ext/softfloat/f128M_to_i32.c
@@ -0,0 +1,98 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the
+University of California.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+#ifdef SOFTFLOAT_FAST_INT64
+
+int_fast32_t
+ f128M_to_i32( const float128_t *aPtr, uint_fast8_t roundingMode, bool exact )
+{
+
+    return f128_to_i32( *aPtr, roundingMode, exact );
+
+}
+
+#else
+
+int_fast32_t
+ f128M_to_i32( const float128_t *aPtr, uint_fast8_t roundingMode, bool exact )
+{
+    const uint32_t *aWPtr;
+    uint32_t uiA96;
+    bool sign;
+    int32_t exp;
+    uint64_t sig64;
+    int32_t shiftDist;
+
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    aWPtr = (const uint32_t *) aPtr;
+    uiA96 = aWPtr[indexWordHi( 4 )];
+    sign = signF128UI96( uiA96 );
+    exp  = expF128UI96( uiA96 );
+    sig64 = (uint64_t) fracF128UI96( uiA96 )<<32 | aWPtr[indexWord( 4, 2 )];
+    if ( aWPtr[indexWord( 4, 1 )] | aWPtr[indexWord( 4, 0 )] ) sig64 |= 1;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+#if (i32_fromNaN != i32_fromPosOverflow) || (i32_fromNaN != i32_fromNegOverflow)
+    if ( (exp == 0x7FFF) && sig64 ) {
+#if (i32_fromNaN == i32_fromPosOverflow)
+        sign = 0;
+#elif (i32_fromNaN == i32_fromNegOverflow)
+        sign = 1;
+#else
+        softfloat_raiseFlags( softfloat_flag_invalid );
+        return i32_fromNaN;
+#endif
+    }
+#endif
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    if ( exp ) sig64 |= UINT64_C( 0x0001000000000000 );
+    shiftDist = 0x4023 - exp;
+    if ( 0 < shiftDist ) sig64 = softfloat_shiftRightJam64( sig64, shiftDist );
+    return softfloat_roundToI32( sign, sig64, roundingMode, exact );
+
+}
+
+#endif
+
diff --git a/ext/softfloat/f128M_to_i32_r_minMag.c b/ext/softfloat/f128M_to_i32_r_minMag.c
new file mode 100644
index 0000000000..c87085cbe0
--- /dev/null
+++ b/ext/softfloat/f128M_to_i32_r_minMag.c
@@ -0,0 +1,106 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of
+California.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+#ifdef SOFTFLOAT_FAST_INT64
+
+int_fast32_t f128M_to_i32_r_minMag( const float128_t *aPtr, bool exact )
+{
+
+    return f128_to_i32_r_minMag( *aPtr, exact );
+
+}
+
+#else
+
+int_fast32_t f128M_to_i32_r_minMag( const float128_t *aPtr, bool exact )
+{
+    const uint32_t *aWPtr;
+    uint32_t uiA96;
+    bool sign;
+    int32_t exp;
+    uint64_t sig64;
+    int32_t shiftDist;
+    uint32_t absZ, uiZ;
+    union { uint32_t ui; int32_t i; } uZ;
+
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    aWPtr = (const uint32_t *) aPtr;
+    uiA96 = aWPtr[indexWordHi( 4 )];
+    sign = signF128UI96( uiA96 );
+    exp  = expF128UI96( uiA96 );
+    sig64 = (uint64_t) fracF128UI96( uiA96 )<<32 | aWPtr[indexWord( 4, 2 )];
+    if ( aWPtr[indexWord( 4, 1 )] | aWPtr[indexWord( 4, 0 )] ) sig64 |= 1;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    if ( exp < 0x3FFF ) {
+        if ( exact && (exp | sig64) ) {
+            softfloat_exceptionFlags |= softfloat_flag_inexact;
+        }
+        return 0;
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    if ( 0x401F <= exp ) goto invalid;
+    shiftDist = 0x402F - exp;
+    sig64 |= UINT64_C( 0x0001000000000000 );
+    absZ = sig64>>shiftDist;
+    uiZ = sign ? -absZ : absZ;
+    if ( uiZ>>31 != sign ) goto invalid;
+    if ( exact && ((uint64_t) absZ<<shiftDist != sig64) ) {
+        softfloat_exceptionFlags |= softfloat_flag_inexact;
+    }
+    uZ.ui = uiZ;
+    return uZ.i;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+ invalid:
+    softfloat_raiseFlags( softfloat_flag_invalid );
+    return
+        (exp == 0x7FFF) && sig64 ? i32_fromNaN
+            : sign ? i32_fromNegOverflow : i32_fromPosOverflow;
+
+}
+
+#endif
+
diff --git a/ext/softfloat/f128M_to_i64.c b/ext/softfloat/f128M_to_i64.c
new file mode 100644
index 0000000000..56376a93ab
--- /dev/null
+++ b/ext/softfloat/f128M_to_i64.c
@@ -0,0 +1,102 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the
+University of California.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+#ifdef SOFTFLOAT_FAST_INT64
+
+int_fast64_t
+ f128M_to_i64( const float128_t *aPtr, uint_fast8_t roundingMode, bool exact )
+{
+
+    return f128_to_i64( *aPtr, roundingMode, exact );
+
+}
+
+#else
+
+int_fast64_t
+ f128M_to_i64( const float128_t *aPtr, uint_fast8_t roundingMode, bool exact )
+{
+    const uint32_t *aWPtr;
+    uint32_t uiA96;
+    bool sign;
+    int32_t exp;
+    uint32_t sig96;
+    int32_t shiftDist;
+    uint32_t sig[4];
+
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    aWPtr = (const uint32_t *) aPtr;
+    uiA96 = aWPtr[indexWordHi( 4 )];
+    sign  = signF128UI96( uiA96 );
+    exp   = expF128UI96( uiA96 );
+    sig96 = fracF128UI96( uiA96 );
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    shiftDist = 0x404F - exp;
+    if ( shiftDist < 17 ) {
+        softfloat_raiseFlags( softfloat_flag_invalid );
+        return
+            (exp == 0x7FFF)
+                && (sig96
+                        || (aWPtr[indexWord( 4, 2 )] | aWPtr[indexWord( 4, 1 )]
+                                | aWPtr[indexWord( 4, 0 )]))
+                ? i64_fromNaN
+                : sign ? i64_fromNegOverflow : i64_fromPosOverflow;
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    if ( exp ) sig96 |= 0x00010000;
+    sig[indexWord( 4, 3 )] = sig96;
+    sig[indexWord( 4, 2 )] = aWPtr[indexWord( 4, 2 )];
+    sig[indexWord( 4, 1 )] = aWPtr[indexWord( 4, 1 )];
+    sig[indexWord( 4, 0 )] = aWPtr[indexWord( 4, 0 )];
+    softfloat_shiftRightJam128M( sig, shiftDist, sig );
+    return
+        softfloat_roundMToI64(
+            sign, sig + indexMultiwordLo( 4, 3 ), roundingMode, exact );
+
+}
+
+#endif
+
diff --git a/ext/softfloat/f128M_to_i64_r_minMag.c b/ext/softfloat/f128M_to_i64_r_minMag.c
new file mode 100644
index 0000000000..10bafb0b63
--- /dev/null
+++ b/ext/softfloat/f128M_to_i64_r_minMag.c
@@ -0,0 +1,124 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of
+California.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+#ifdef SOFTFLOAT_FAST_INT64
+
+int_fast64_t f128M_to_i64_r_minMag( const float128_t *aPtr, bool exact )
+{
+
+    return f128_to_i64_r_minMag( *aPtr, exact );
+
+}
+
+#else
+
+int_fast64_t f128M_to_i64_r_minMag( const float128_t *aPtr, bool exact )
+{
+    const uint32_t *aWPtr;
+    uint32_t uiA96;
+    bool sign;
+    int32_t exp;
+    uint32_t sig96;
+    int32_t shiftDist;
+    uint32_t sig[4];
+    uint64_t uiZ;
+    union { uint64_t ui; int64_t i; } uZ;
+
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    aWPtr = (const uint32_t *) aPtr;
+    uiA96 = aWPtr[indexWordHi( 4 )];
+    sign  = signF128UI96( uiA96 );
+    exp   = expF128UI96( uiA96 );
+    sig96 = fracF128UI96( uiA96 );
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    shiftDist = 0x403E - exp;
+    if ( shiftDist < 0 ) goto invalid;
+    if ( exact ) {
+        if ( exp ) sig96 |= 0x00010000;
+        sig[indexWord( 4, 3 )] = sig96;
+        sig[indexWord( 4, 2 )] = aWPtr[indexWord( 4, 2 )];
+        sig[indexWord( 4, 1 )] = aWPtr[indexWord( 4, 1 )];
+        sig[indexWord( 4, 0 )] = aWPtr[indexWord( 4, 0 )];
+        softfloat_shiftRightJam128M( sig, shiftDist + 17, sig );
+        uiZ = (uint64_t) sig[indexWord( 4, 2 )]<<32 | sig[indexWord( 4, 1 )];
+        if ( uiZ>>63 && (! sign || (uiZ != UINT64_C( 0x8000000000000000 ))) ) {
+            goto invalid;
+        }
+        if ( sig[indexWordLo( 4 )] ) {
+            softfloat_exceptionFlags |= softfloat_flag_inexact;
+        }
+    } else {
+        if ( 64 <= shiftDist ) return 0;
+        uiZ =
+              (uint64_t) sig96<<47
+            | (uint64_t) aWPtr[indexWord( 4, 2 )]<<15
+            | aWPtr[indexWord( 4, 1 )]>>17;
+        if ( shiftDist ) {
+            uiZ |= UINT64_C( 0x8000000000000000 );
+            uiZ >>= shiftDist;
+        } else {
+            if ( uiZ || ! sign ) goto invalid;
+            uiZ |= UINT64_C( 0x8000000000000000 );
+        }
+    }
+    if ( sign ) uiZ = -uiZ;
+    uZ.ui = uiZ;
+    return uZ.i;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+ invalid:
+    softfloat_raiseFlags( softfloat_flag_invalid );
+    return
+        (exp == 0x7FFF)
+            && (sig96
+                    || (aWPtr[indexWord( 4, 2 )] | aWPtr[indexWord( 4, 1 )]
+                            | aWPtr[indexWord( 4, 0 )]))
+            ? i64_fromNaN
+            : sign ? i64_fromNegOverflow : i64_fromPosOverflow;
+
+}
+
+#endif
+
diff --git a/ext/softfloat/f128M_to_ui32.c b/ext/softfloat/f128M_to_ui32.c
new file mode 100644
index 0000000000..4cb62b4be3
--- /dev/null
+++ b/ext/softfloat/f128M_to_ui32.c
@@ -0,0 +1,98 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the
+University of California.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+#ifdef SOFTFLOAT_FAST_INT64
+
+uint_fast32_t
+ f128M_to_ui32( const float128_t *aPtr, uint_fast8_t roundingMode, bool exact )
+{
+
+    return f128_to_ui32( *aPtr, roundingMode, exact );
+
+}
+
+#else
+
+uint_fast32_t
+ f128M_to_ui32( const float128_t *aPtr, uint_fast8_t roundingMode, bool exact )
+{
+    const uint32_t *aWPtr;
+    uint32_t uiA96;
+    bool sign;
+    int32_t exp;
+    uint64_t sig64;
+    int32_t shiftDist;
+
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    aWPtr = (const uint32_t *) aPtr;
+    uiA96 = aWPtr[indexWordHi( 4 )];
+    sign = signF128UI96( uiA96 );
+    exp  = expF128UI96( uiA96 );
+    sig64 = (uint64_t) fracF128UI96( uiA96 )<<32 | aWPtr[indexWord( 4, 2 )];
+    if ( aWPtr[indexWord( 4, 1 )] | aWPtr[indexWord( 4, 0 )] ) sig64 |= 1;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+#if (ui32_fromNaN != ui32_fromPosOverflow) || (ui32_fromNaN != ui32_fromNegOverflow)
+    if ( (exp == 0x7FFF) && sig64 ) {
+#if (ui32_fromNaN == ui32_fromPosOverflow)
+        sign = 0;
+#elif (ui32_fromNaN == ui32_fromNegOverflow)
+        sign = 1;
+#else
+        softfloat_raiseFlags( softfloat_flag_invalid );
+        return ui32_fromNaN;
+#endif
+    }
+#endif
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    if ( exp ) sig64 |= UINT64_C( 0x0001000000000000 );
+    shiftDist = 0x4023 - exp;
+    if ( 0 < shiftDist ) sig64 = softfloat_shiftRightJam64( sig64, shiftDist );
+    return softfloat_roundToUI32( sign, sig64, roundingMode, exact );
+
+}
+
+#endif
+
diff --git a/ext/softfloat/f128M_to_ui32_r_minMag.c b/ext/softfloat/f128M_to_ui32_r_minMag.c
new file mode 100644
index 0000000000..82eefed5e0
--- /dev/null
+++ b/ext/softfloat/f128M_to_ui32_r_minMag.c
@@ -0,0 +1,102 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of
+California.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+#ifdef SOFTFLOAT_FAST_INT64
+
+uint_fast32_t f128M_to_ui32_r_minMag( const float128_t *aPtr, bool exact )
+{
+
+    return f128_to_ui32_r_minMag( *aPtr, exact );
+
+}
+
+#else
+
+uint_fast32_t f128M_to_ui32_r_minMag( const float128_t *aPtr, bool exact )
+{
+    const uint32_t *aWPtr;
+    uint32_t uiA96;
+    int32_t exp;
+    uint64_t sig64;
+    int32_t shiftDist;
+    bool sign;
+    uint32_t z;
+
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    aWPtr = (const uint32_t *) aPtr;
+    uiA96 = aWPtr[indexWordHi( 4 )];
+    exp = expF128UI96( uiA96 );
+    sig64 = (uint64_t) fracF128UI96( uiA96 )<<32 | aWPtr[indexWord( 4, 2 )];
+    if ( aWPtr[indexWord( 4, 1 )] | aWPtr[indexWord( 4, 0 )] ) sig64 |= 1;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    shiftDist = 0x402F - exp;
+    if ( 49 <= shiftDist ) {
+        if ( exact && (exp | sig64) ) {
+            softfloat_exceptionFlags |= softfloat_flag_inexact;
+        }
+        return 0;
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    sign = signF128UI96( uiA96 );
+    if ( sign || (shiftDist < 17) ) {
+        softfloat_raiseFlags( softfloat_flag_invalid );
+        return
+            (exp == 0x7FFF) && sig64 ? ui32_fromNaN
+                : sign ? ui32_fromNegOverflow : ui32_fromPosOverflow;
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    sig64 |= UINT64_C( 0x0001000000000000 );
+    z = sig64>>shiftDist;
+    if ( exact && ((uint64_t) z<<shiftDist != sig64) ) {
+        softfloat_exceptionFlags |= softfloat_flag_inexact;
+    }
+    return z;
+
+}
+
+#endif
+
diff --git a/ext/softfloat/f128M_to_ui64.c b/ext/softfloat/f128M_to_ui64.c
new file mode 100644
index 0000000000..44ac02025d
--- /dev/null
+++ b/ext/softfloat/f128M_to_ui64.c
@@ -0,0 +1,102 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the
+University of California.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+#ifdef SOFTFLOAT_FAST_INT64
+
+uint_fast64_t
+ f128M_to_ui64( const float128_t *aPtr, uint_fast8_t roundingMode, bool exact )
+{
+
+    return f128_to_ui64( *aPtr, roundingMode, exact );
+
+}
+
+#else
+
+uint_fast64_t
+ f128M_to_ui64( const float128_t *aPtr, uint_fast8_t roundingMode, bool exact )
+{
+    const uint32_t *aWPtr;
+    uint32_t uiA96;
+    bool sign;
+    int32_t exp;
+    uint32_t sig96;
+    int32_t shiftDist;
+    uint32_t sig[4];
+
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    aWPtr = (const uint32_t *) aPtr;
+    uiA96 = aWPtr[indexWordHi( 4 )];
+    sign  = signF128UI96( uiA96 );
+    exp   = expF128UI96( uiA96 );
+    sig96 = fracF128UI96( uiA96 );
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    shiftDist = 0x404F - exp;
+    if ( shiftDist < 17 ) {
+        softfloat_raiseFlags( softfloat_flag_invalid );
+        return
+            (exp == 0x7FFF)
+                && (sig96
+                        || (aWPtr[indexWord( 4, 2 )] | aWPtr[indexWord( 4, 1 )]
+                                | aWPtr[indexWord( 4, 0 )]))
+                ? ui64_fromNaN
+                : sign ? ui64_fromNegOverflow : ui64_fromPosOverflow;
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    if ( exp ) sig96 |= 0x00010000;
+    sig[indexWord( 4, 3 )] = sig96;
+    sig[indexWord( 4, 2 )] = aWPtr[indexWord( 4, 2 )];
+    sig[indexWord( 4, 1 )] = aWPtr[indexWord( 4, 1 )];
+    sig[indexWord( 4, 0 )] = aWPtr[indexWord( 4, 0 )];
+    softfloat_shiftRightJam128M( sig, shiftDist, sig );
+    return
+        softfloat_roundMToUI64(
+            sign, sig + indexMultiwordLo( 4, 3 ), roundingMode, exact );
+
+}
+
+#endif
+
diff --git a/ext/softfloat/f128M_to_ui64_r_minMag.c b/ext/softfloat/f128M_to_ui64_r_minMag.c
new file mode 100644
index 0000000000..f27ce15f17
--- /dev/null
+++ b/ext/softfloat/f128M_to_ui64_r_minMag.c
@@ -0,0 +1,114 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of
+California.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+#ifdef SOFTFLOAT_FAST_INT64
+
+uint_fast64_t f128M_to_ui64_r_minMag( const float128_t *aPtr, bool exact )
+{
+
+    return f128_to_ui64_r_minMag( *aPtr, exact );
+
+}
+
+#else
+
+uint_fast64_t f128M_to_ui64_r_minMag( const float128_t *aPtr, bool exact )
+{
+    const uint32_t *aWPtr;
+    uint32_t uiA96;
+    bool sign;
+    int32_t exp;
+    uint32_t sig96;
+    int32_t shiftDist;
+    uint32_t sig[4];
+    uint64_t z;
+
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    aWPtr = (const uint32_t *) aPtr;
+    uiA96 = aWPtr[indexWordHi( 4 )];
+    sign  = signF128UI96( uiA96 );
+    exp   = expF128UI96( uiA96 );
+    sig96 = fracF128UI96( uiA96 );
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    shiftDist = 0x403E - exp;
+    if ( shiftDist < 0 ) goto invalid;
+    if ( exact ) {
+        if ( exp ) sig96 |= 0x00010000;
+        sig[indexWord( 4, 3 )] = sig96;
+        sig[indexWord( 4, 2 )] = aWPtr[indexWord( 4, 2 )];
+        sig[indexWord( 4, 1 )] = aWPtr[indexWord( 4, 1 )];
+        sig[indexWord( 4, 0 )] = aWPtr[indexWord( 4, 0 )];
+        softfloat_shiftRightJam128M( sig, shiftDist + 17, sig );
+        z = (uint64_t) sig[indexWord( 4, 2 )]<<32 | sig[indexWord( 4, 1 )];
+        if ( sign && z ) goto invalid;
+        if ( sig[indexWordLo( 4 )] ) {
+            softfloat_exceptionFlags |= softfloat_flag_inexact;
+        }
+    } else {
+        if ( 64 <= shiftDist ) return 0;
+        if ( sign ) goto invalid;
+        z =   UINT64_C( 0x8000000000000000 )
+            | (uint64_t) sig96<<47
+            | (uint64_t) aWPtr[indexWord( 4, 2 )]<<15
+            | aWPtr[indexWord( 4, 1 )]>>17;
+        z >>= shiftDist;
+    }
+    return z;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+ invalid:
+    softfloat_raiseFlags( softfloat_flag_invalid );
+    return
+        (exp == 0x7FFF)
+            && (sig96
+                    || (aWPtr[indexWord( 4, 2 )] | aWPtr[indexWord( 4, 1 )]
+                            | aWPtr[indexWord( 4, 0 )]))
+            ? ui64_fromNaN
+            : sign ? ui64_fromNegOverflow : ui64_fromPosOverflow;
+
+}
+
+#endif
+
diff --git a/ext/softfloat/f128_add.c b/ext/softfloat/f128_add.c
index 173c6762ca..6568ab6f34 100644
--- a/ext/softfloat/f128_add.c
+++ b/ext/softfloat/f128_add.c
@@ -36,9 +36,8 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
+#include "internals.h"
 #include "softfloat.h"
 
 float128_t f128_add( float128_t a, float128_t b )
diff --git a/ext/softfloat/f128_classify.c b/ext/softfloat/f128_classify.c
index 254e5a2239..1092a9b51f 100755
--- a/ext/softfloat/f128_classify.c
+++ b/ext/softfloat/f128_classify.c
@@ -1,11 +1,10 @@
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
-#include "softfloat.h"
+#include "internals.h"
 #include "specialize.h"
+#include "softfloat.h"
 
 uint_fast16_t f128_classify( float128_t a )
 {
diff --git a/ext/softfloat/f128_div.c b/ext/softfloat/f128_div.c
index e2946684b2..9384e7562e 100644
--- a/ext/softfloat/f128_div.c
+++ b/ext/softfloat/f128_div.c
@@ -36,11 +36,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
-#include "softfloat.h"
+#include "internals.h"
 #include "specialize.h"
+#include "softfloat.h"
 
 float128_t f128_div( float128_t a, float128_t b )
 {
diff --git a/ext/softfloat/f128_eq.c b/ext/softfloat/f128_eq.c
index 6466888d1c..a0e1ad2868 100644
--- a/ext/softfloat/f128_eq.c
+++ b/ext/softfloat/f128_eq.c
@@ -36,11 +36,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
-#include "softfloat.h"
+#include "internals.h"
 #include "specialize.h"
+#include "softfloat.h"
 
 bool f128_eq( float128_t a, float128_t b )
 {
diff --git a/ext/softfloat/f128_eq_signaling.c b/ext/softfloat/f128_eq_signaling.c
index 03af7e08ce..bd37b979d2 100644
--- a/ext/softfloat/f128_eq_signaling.c
+++ b/ext/softfloat/f128_eq_signaling.c
@@ -36,9 +36,8 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
+#include "internals.h"
 #include "softfloat.h"
 
 bool f128_eq_signaling( float128_t a, float128_t b )
diff --git a/ext/softfloat/f128_isSignalingNaN.c b/ext/softfloat/f128_isSignalingNaN.c
index 54a5764450..fced58e503 100644
--- a/ext/softfloat/f128_isSignalingNaN.c
+++ b/ext/softfloat/f128_isSignalingNaN.c
@@ -35,11 +35,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 =============================================================================*/
 
 #include <stdbool.h>
-
-#include "internals.h"
 #include "platform.h"
-#include "softfloat.h"
+#include "internals.h"
 #include "specialize.h"
+#include "softfloat.h"
 
 bool f128_isSignalingNaN( float128_t a )
 {
diff --git a/ext/softfloat/f128_le.c b/ext/softfloat/f128_le.c
index 15f8fa31f0..9b0aa23445 100644
--- a/ext/softfloat/f128_le.c
+++ b/ext/softfloat/f128_le.c
@@ -36,9 +36,8 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
+#include "internals.h"
 #include "softfloat.h"
 
 bool f128_le( float128_t a, float128_t b )
diff --git a/ext/softfloat/f128_le_quiet.c b/ext/softfloat/f128_le_quiet.c
index f5a98cc5d1..3b44038859 100644
--- a/ext/softfloat/f128_le_quiet.c
+++ b/ext/softfloat/f128_le_quiet.c
@@ -36,11 +36,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
-#include "softfloat.h"
+#include "internals.h"
 #include "specialize.h"
+#include "softfloat.h"
 
 bool f128_le_quiet( float128_t a, float128_t b )
 {
diff --git a/ext/softfloat/f128_lt.c b/ext/softfloat/f128_lt.c
index b176cc3fb3..a28f95b741 100644
--- a/ext/softfloat/f128_lt.c
+++ b/ext/softfloat/f128_lt.c
@@ -36,9 +36,8 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
+#include "internals.h"
 #include "softfloat.h"
 
 bool f128_lt( float128_t a, float128_t b )
diff --git a/ext/softfloat/f128_lt_quiet.c b/ext/softfloat/f128_lt_quiet.c
index 129c2cdfc8..20146ee4ba 100644
--- a/ext/softfloat/f128_lt_quiet.c
+++ b/ext/softfloat/f128_lt_quiet.c
@@ -36,11 +36,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
-#include "softfloat.h"
+#include "internals.h"
 #include "specialize.h"
+#include "softfloat.h"
 
 bool f128_lt_quiet( float128_t a, float128_t b )
 {
diff --git a/ext/softfloat/f128_mul.c b/ext/softfloat/f128_mul.c
index cb5b191b95..1871613954 100644
--- a/ext/softfloat/f128_mul.c
+++ b/ext/softfloat/f128_mul.c
@@ -36,11 +36,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
-#include "softfloat.h"
+#include "internals.h"
 #include "specialize.h"
+#include "softfloat.h"
 
 float128_t f128_mul( float128_t a, float128_t b )
 {
diff --git a/ext/softfloat/f128_mulAdd.c b/ext/softfloat/f128_mulAdd.c
index 5655b8892f..b2e2142fa8 100644
--- a/ext/softfloat/f128_mulAdd.c
+++ b/ext/softfloat/f128_mulAdd.c
@@ -35,9 +35,8 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 =============================================================================*/
 
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
+#include "internals.h"
 #include "softfloat.h"
 
 float128_t f128_mulAdd( float128_t a, float128_t b, float128_t c )
diff --git a/ext/softfloat/f128_rem.c b/ext/softfloat/f128_rem.c
index b2dcdd6862..555d71eb7a 100644
--- a/ext/softfloat/f128_rem.c
+++ b/ext/softfloat/f128_rem.c
@@ -36,11 +36,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
-#include "softfloat.h"
+#include "internals.h"
 #include "specialize.h"
+#include "softfloat.h"
 
 float128_t f128_rem( float128_t a, float128_t b )
 {
diff --git a/ext/softfloat/f128_roundToInt.c b/ext/softfloat/f128_roundToInt.c
index a321affaec..0f1f07ecd3 100644
--- a/ext/softfloat/f128_roundToInt.c
+++ b/ext/softfloat/f128_roundToInt.c
@@ -36,11 +36,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
-#include "softfloat.h"
+#include "internals.h"
 #include "specialize.h"
+#include "softfloat.h"
 
 float128_t
  f128_roundToInt( float128_t a, uint_fast8_t roundingMode, bool exact )
diff --git a/ext/softfloat/f128_sqrt.c b/ext/softfloat/f128_sqrt.c
index 75af06a807..5b99694ec4 100644
--- a/ext/softfloat/f128_sqrt.c
+++ b/ext/softfloat/f128_sqrt.c
@@ -36,11 +36,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
-#include "softfloat.h"
+#include "internals.h"
 #include "specialize.h"
+#include "softfloat.h"
 
 float128_t f128_sqrt( float128_t a )
 {
diff --git a/ext/softfloat/f128_sub.c b/ext/softfloat/f128_sub.c
index 5005177a96..ce2e5adb13 100644
--- a/ext/softfloat/f128_sub.c
+++ b/ext/softfloat/f128_sub.c
@@ -36,9 +36,8 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
+#include "internals.h"
 #include "softfloat.h"
 
 float128_t f128_sub( float128_t a, float128_t b )
diff --git a/ext/softfloat/f128_to_extF80.c b/ext/softfloat/f128_to_extF80.c
new file mode 100644
index 0000000000..fb55406dfb
--- /dev/null
+++ b/ext/softfloat/f128_to_extF80.c
@@ -0,0 +1,109 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014, 2015 The Regents of the University of
+California.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+extFloat80_t f128_to_extF80( float128_t a )
+{
+    union ui128_f128 uA;
+    uint_fast64_t uiA64, uiA0;
+    bool sign;
+    int_fast32_t exp;
+    uint_fast64_t frac64, frac0;
+    struct commonNaN commonNaN;
+    struct uint128 uiZ;
+    uint_fast16_t uiZ64;
+    uint_fast64_t uiZ0;
+    struct exp32_sig128 normExpSig;
+    struct uint128 sig128;
+    union { struct extFloat80M s; extFloat80_t f; } uZ;
+
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    uA.f = a;
+    uiA64 = uA.ui.v64;
+    uiA0  = uA.ui.v0;
+    sign   = signF128UI64( uiA64 );
+    exp    = expF128UI64( uiA64 );
+    frac64 = fracF128UI64( uiA64 );
+    frac0  = uiA0;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    if ( exp == 0x7FFF ) {
+        if ( frac64 | frac0 ) {
+            softfloat_f128UIToCommonNaN( uiA64, uiA0, &commonNaN );
+            uiZ = softfloat_commonNaNToExtF80UI( &commonNaN );
+            uiZ64 = uiZ.v64;
+            uiZ0  = uiZ.v0;
+        } else {
+            uiZ64 = packToExtF80UI64( sign, 0x7FFF );
+            uiZ0  = UINT64_C( 0x8000000000000000 );
+        }
+        goto uiZ;
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    if ( ! exp ) {
+        if ( ! (frac64 | frac0) ) {
+            uiZ64 = packToExtF80UI64( sign, 0 );
+            uiZ0  = 0;
+            goto uiZ;
+        }
+        normExpSig = softfloat_normSubnormalF128Sig( frac64, frac0 );
+        exp   = normExpSig.exp;
+        frac64 = normExpSig.sig.v64;
+        frac0  = normExpSig.sig.v0;
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    sig128 =
+        softfloat_shortShiftLeft128(
+            frac64 | UINT64_C( 0x0001000000000000 ), frac0, 15 );
+    return softfloat_roundPackToExtF80( sign, exp, sig128.v64, sig128.v0, 80 );
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+ uiZ:
+    uZ.s.signExp = uiZ64;
+    uZ.s.signif  = uiZ0;
+    return uZ.f;
+
+}
+
diff --git a/ext/softfloat/f128_to_f16.c b/ext/softfloat/f128_to_f16.c
index e929b22d34..a910c12a30 100644
--- a/ext/softfloat/f128_to_f16.c
+++ b/ext/softfloat/f128_to_f16.c
@@ -36,11 +36,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
-#include "softfloat.h"
+#include "internals.h"
 #include "specialize.h"
+#include "softfloat.h"
 
 float16_t f128_to_f16( float128_t a )
 {
diff --git a/ext/softfloat/f128_to_f32.c b/ext/softfloat/f128_to_f32.c
index 4289d4cc81..d890d3ebe7 100644
--- a/ext/softfloat/f128_to_f32.c
+++ b/ext/softfloat/f128_to_f32.c
@@ -36,11 +36,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
-#include "softfloat.h"
+#include "internals.h"
 #include "specialize.h"
+#include "softfloat.h"
 
 float32_t f128_to_f32( float128_t a )
 {
diff --git a/ext/softfloat/f128_to_f64.c b/ext/softfloat/f128_to_f64.c
index 1fdb258e43..e7aec20156 100644
--- a/ext/softfloat/f128_to_f64.c
+++ b/ext/softfloat/f128_to_f64.c
@@ -36,11 +36,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
-#include "softfloat.h"
+#include "internals.h"
 #include "specialize.h"
+#include "softfloat.h"
 
 float64_t f128_to_f64( float128_t a )
 {
diff --git a/ext/softfloat/f128_to_i32.c b/ext/softfloat/f128_to_i32.c
index 781fe6ce4e..507691ccb8 100644
--- a/ext/softfloat/f128_to_i32.c
+++ b/ext/softfloat/f128_to_i32.c
@@ -36,11 +36,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
-#include "softfloat.h"
+#include "internals.h"
 #include "specialize.h"
+#include "softfloat.h"
 
 int_fast32_t f128_to_i32( float128_t a, uint_fast8_t roundingMode, bool exact )
 {
diff --git a/ext/softfloat/f128_to_i32_r_minMag.c b/ext/softfloat/f128_to_i32_r_minMag.c
index d102626d7c..fc9f84f18b 100644
--- a/ext/softfloat/f128_to_i32_r_minMag.c
+++ b/ext/softfloat/f128_to_i32_r_minMag.c
@@ -36,11 +36,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
-#include "softfloat.h"
+#include "internals.h"
 #include "specialize.h"
+#include "softfloat.h"
 
 int_fast32_t f128_to_i32_r_minMag( float128_t a, bool exact )
 {
diff --git a/ext/softfloat/f128_to_i64.c b/ext/softfloat/f128_to_i64.c
index 2ebda20ec0..2282eafde5 100644
--- a/ext/softfloat/f128_to_i64.c
+++ b/ext/softfloat/f128_to_i64.c
@@ -36,11 +36,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
-#include "softfloat.h"
+#include "internals.h"
 #include "specialize.h"
+#include "softfloat.h"
 
 int_fast64_t f128_to_i64( float128_t a, uint_fast8_t roundingMode, bool exact )
 {
diff --git a/ext/softfloat/f128_to_i64_r_minMag.c b/ext/softfloat/f128_to_i64_r_minMag.c
index e2a6bd4265..7e0d63da9c 100644
--- a/ext/softfloat/f128_to_i64_r_minMag.c
+++ b/ext/softfloat/f128_to_i64_r_minMag.c
@@ -36,11 +36,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
-#include "softfloat.h"
+#include "internals.h"
 #include "specialize.h"
+#include "softfloat.h"
 
 int_fast64_t f128_to_i64_r_minMag( float128_t a, bool exact )
 {
diff --git a/ext/softfloat/f128_to_ui32.c b/ext/softfloat/f128_to_ui32.c
index 2139720f5d..75e53d9b27 100644
--- a/ext/softfloat/f128_to_ui32.c
+++ b/ext/softfloat/f128_to_ui32.c
@@ -36,11 +36,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
-#include "softfloat.h"
+#include "internals.h"
 #include "specialize.h"
+#include "softfloat.h"
 
 uint_fast32_t
  f128_to_ui32( float128_t a, uint_fast8_t roundingMode, bool exact )
diff --git a/ext/softfloat/f128_to_ui32_r_minMag.c b/ext/softfloat/f128_to_ui32_r_minMag.c
index 84470cda7e..2097fb81f6 100644
--- a/ext/softfloat/f128_to_ui32_r_minMag.c
+++ b/ext/softfloat/f128_to_ui32_r_minMag.c
@@ -36,11 +36,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
-#include "softfloat.h"
+#include "internals.h"
 #include "specialize.h"
+#include "softfloat.h"
 
 uint_fast32_t f128_to_ui32_r_minMag( float128_t a, bool exact )
 {
diff --git a/ext/softfloat/f128_to_ui64.c b/ext/softfloat/f128_to_ui64.c
index 6f236fbf50..b88d25151a 100644
--- a/ext/softfloat/f128_to_ui64.c
+++ b/ext/softfloat/f128_to_ui64.c
@@ -36,11 +36,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
-#include "softfloat.h"
+#include "internals.h"
 #include "specialize.h"
+#include "softfloat.h"
 
 uint_fast64_t
  f128_to_ui64( float128_t a, uint_fast8_t roundingMode, bool exact )
diff --git a/ext/softfloat/f128_to_ui64_r_minMag.c b/ext/softfloat/f128_to_ui64_r_minMag.c
index bc1c230708..fb16320aa5 100644
--- a/ext/softfloat/f128_to_ui64_r_minMag.c
+++ b/ext/softfloat/f128_to_ui64_r_minMag.c
@@ -36,11 +36,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
-#include "softfloat.h"
+#include "internals.h"
 #include "specialize.h"
+#include "softfloat.h"
 
 uint_fast64_t f128_to_ui64_r_minMag( float128_t a, bool exact )
 {
diff --git a/ext/softfloat/f16_add.c b/ext/softfloat/f16_add.c
index 569a7ad47a..eb763d0119 100644
--- a/ext/softfloat/f16_add.c
+++ b/ext/softfloat/f16_add.c
@@ -36,9 +36,8 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
+#include "internals.h"
 #include "softfloat.h"
 
 float16_t f16_add( float16_t a, float16_t b )
diff --git a/ext/softfloat/f16_div.c b/ext/softfloat/f16_div.c
index 554ff09a27..71b5c29b0c 100644
--- a/ext/softfloat/f16_div.c
+++ b/ext/softfloat/f16_div.c
@@ -36,11 +36,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
-#include "softfloat.h"
+#include "internals.h"
 #include "specialize.h"
+#include "softfloat.h"
 
 extern const uint16_t softfloat_approxRecip_1k0s[];
 extern const uint16_t softfloat_approxRecip_1k1s[];
diff --git a/ext/softfloat/f16_eq.c b/ext/softfloat/f16_eq.c
index ee4b6a8b87..37a60998f1 100644
--- a/ext/softfloat/f16_eq.c
+++ b/ext/softfloat/f16_eq.c
@@ -36,11 +36,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
-#include "softfloat.h"
+#include "internals.h"
 #include "specialize.h"
+#include "softfloat.h"
 
 bool f16_eq( float16_t a, float16_t b )
 {
diff --git a/ext/softfloat/f16_eq_signaling.c b/ext/softfloat/f16_eq_signaling.c
index 1c1644e5e8..894f7b59d7 100644
--- a/ext/softfloat/f16_eq_signaling.c
+++ b/ext/softfloat/f16_eq_signaling.c
@@ -36,9 +36,8 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
+#include "internals.h"
 #include "softfloat.h"
 
 bool f16_eq_signaling( float16_t a, float16_t b )
diff --git a/ext/softfloat/f16_isSignalingNaN.c b/ext/softfloat/f16_isSignalingNaN.c
index 46842b0361..657805be3f 100644
--- a/ext/softfloat/f16_isSignalingNaN.c
+++ b/ext/softfloat/f16_isSignalingNaN.c
@@ -35,11 +35,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 =============================================================================*/
 
 #include <stdbool.h>
-
-#include "internals.h"
 #include "platform.h"
-#include "softfloat.h"
+#include "internals.h"
 #include "specialize.h"
+#include "softfloat.h"
 
 bool f16_isSignalingNaN( float16_t a )
 {
diff --git a/ext/softfloat/f16_le.c b/ext/softfloat/f16_le.c
index 2186653f15..37eaf1879a 100644
--- a/ext/softfloat/f16_le.c
+++ b/ext/softfloat/f16_le.c
@@ -36,9 +36,8 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
+#include "internals.h"
 #include "softfloat.h"
 
 bool f16_le( float16_t a, float16_t b )
diff --git a/ext/softfloat/f16_le_quiet.c b/ext/softfloat/f16_le_quiet.c
index b53ccdb9dc..8391db74db 100644
--- a/ext/softfloat/f16_le_quiet.c
+++ b/ext/softfloat/f16_le_quiet.c
@@ -36,11 +36,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
-#include "softfloat.h"
+#include "internals.h"
 #include "specialize.h"
+#include "softfloat.h"
 
 bool f16_le_quiet( float16_t a, float16_t b )
 {
diff --git a/ext/softfloat/f16_lt.c b/ext/softfloat/f16_lt.c
index ef9b242992..3d3522a473 100644
--- a/ext/softfloat/f16_lt.c
+++ b/ext/softfloat/f16_lt.c
@@ -36,9 +36,8 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
+#include "internals.h"
 #include "softfloat.h"
 
 bool f16_lt( float16_t a, float16_t b )
diff --git a/ext/softfloat/f16_lt_quiet.c b/ext/softfloat/f16_lt_quiet.c
index 2621e8d8a2..37f762cdae 100644
--- a/ext/softfloat/f16_lt_quiet.c
+++ b/ext/softfloat/f16_lt_quiet.c
@@ -36,11 +36,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
-#include "softfloat.h"
+#include "internals.h"
 #include "specialize.h"
+#include "softfloat.h"
 
 bool f16_lt_quiet( float16_t a, float16_t b )
 {
diff --git a/ext/softfloat/f16_mul.c b/ext/softfloat/f16_mul.c
index 7990162d07..255caa7edc 100644
--- a/ext/softfloat/f16_mul.c
+++ b/ext/softfloat/f16_mul.c
@@ -36,11 +36,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
-#include "softfloat.h"
+#include "internals.h"
 #include "specialize.h"
+#include "softfloat.h"
 
 float16_t f16_mul( float16_t a, float16_t b )
 {
diff --git a/ext/softfloat/f16_mulAdd.c b/ext/softfloat/f16_mulAdd.c
index 410f4f0669..4026196393 100644
--- a/ext/softfloat/f16_mulAdd.c
+++ b/ext/softfloat/f16_mulAdd.c
@@ -35,9 +35,8 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 =============================================================================*/
 
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
+#include "internals.h"
 #include "softfloat.h"
 
 float16_t f16_mulAdd( float16_t a, float16_t b, float16_t c )
diff --git a/ext/softfloat/f16_rem.c b/ext/softfloat/f16_rem.c
index b18c65028e..86c319ddce 100644
--- a/ext/softfloat/f16_rem.c
+++ b/ext/softfloat/f16_rem.c
@@ -36,11 +36,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
-#include "softfloat.h"
+#include "internals.h"
 #include "specialize.h"
+#include "softfloat.h"
 
 float16_t f16_rem( float16_t a, float16_t b )
 {
diff --git a/ext/softfloat/f16_roundToInt.c b/ext/softfloat/f16_roundToInt.c
index f7a8ac3d37..9bbd47eb2d 100644
--- a/ext/softfloat/f16_roundToInt.c
+++ b/ext/softfloat/f16_roundToInt.c
@@ -36,11 +36,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
-#include "softfloat.h"
+#include "internals.h"
 #include "specialize.h"
+#include "softfloat.h"
 
 float16_t f16_roundToInt( float16_t a, uint_fast8_t roundingMode, bool exact )
 {
diff --git a/ext/softfloat/f16_sqrt.c b/ext/softfloat/f16_sqrt.c
index 2eb5172e2d..7ff292392e 100644
--- a/ext/softfloat/f16_sqrt.c
+++ b/ext/softfloat/f16_sqrt.c
@@ -36,11 +36,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
-#include "softfloat.h"
+#include "internals.h"
 #include "specialize.h"
+#include "softfloat.h"
 
 extern const uint16_t softfloat_approxRecipSqrt_1k0s[];
 extern const uint16_t softfloat_approxRecipSqrt_1k1s[];
diff --git a/ext/softfloat/f16_sub.c b/ext/softfloat/f16_sub.c
index fa7af853c5..811f239f10 100644
--- a/ext/softfloat/f16_sub.c
+++ b/ext/softfloat/f16_sub.c
@@ -36,9 +36,8 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
+#include "internals.h"
 #include "softfloat.h"
 
 float16_t f16_sub( float16_t a, float16_t b )
diff --git a/ext/softfloat/f16_to_extF80.c b/ext/softfloat/f16_to_extF80.c
new file mode 100644
index 0000000000..cc84086629
--- /dev/null
+++ b/ext/softfloat/f16_to_extF80.c
@@ -0,0 +1,101 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014, 2015 The Regents of the University of
+California.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+extFloat80_t f16_to_extF80( float16_t a )
+{
+    union ui16_f16 uA;
+    uint_fast16_t uiA;
+    bool sign;
+    int_fast8_t exp;
+    uint_fast16_t frac;
+    struct commonNaN commonNaN;
+    struct uint128 uiZ;
+    uint_fast16_t uiZ64;
+    uint_fast64_t uiZ0;
+    struct exp8_sig16 normExpSig;
+    union { struct extFloat80M s; extFloat80_t f; } uZ;
+
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    uA.f = a;
+    uiA = uA.ui;
+    sign = signF16UI( uiA );
+    exp  = expF16UI( uiA );
+    frac = fracF16UI( uiA );
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    if ( exp == 0x1F ) {
+        if ( frac ) {
+            softfloat_f16UIToCommonNaN( uiA, &commonNaN );
+            uiZ = softfloat_commonNaNToExtF80UI( &commonNaN );
+            uiZ64 = uiZ.v64;
+            uiZ0  = uiZ.v0;
+        } else {
+            uiZ64 = packToExtF80UI64( sign, 0x7FFF );
+            uiZ0  = UINT64_C( 0x8000000000000000 );
+        }
+        goto uiZ;
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    if ( ! exp ) {
+        if ( ! frac ) {
+            uiZ64 = packToExtF80UI64( sign, 0 );
+            uiZ0  = 0;
+            goto uiZ;
+        }
+        normExpSig = softfloat_normSubnormalF16Sig( frac );
+        exp = normExpSig.exp;
+        frac = normExpSig.sig;
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    uiZ64 = packToExtF80UI64( sign, exp + 0x3FF0 );
+    uiZ0  = (uint_fast64_t) (frac | 0x0400)<<53;
+ uiZ:
+    uZ.s.signExp = uiZ64;
+    uZ.s.signif  = uiZ0;
+    return uZ.f;
+
+}
+
diff --git a/ext/softfloat/f16_to_extF80M.c b/ext/softfloat/f16_to_extF80M.c
new file mode 100644
index 0000000000..d179ac14a9
--- /dev/null
+++ b/ext/softfloat/f16_to_extF80M.c
@@ -0,0 +1,111 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014, 2015 The Regents of the University of
+California.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+#ifdef SOFTFLOAT_FAST_INT64
+
+void f16_to_extF80M( float16_t a, extFloat80_t *zPtr )
+{
+
+    *zPtr = f16_to_extF80( a );
+
+}
+
+#else
+
+void f16_to_extF80M( float16_t a, extFloat80_t *zPtr )
+{
+    struct extFloat80M *zSPtr;
+    union ui16_f16 uA;
+    uint16_t uiA;
+    bool sign;
+    int_fast8_t exp;
+    uint16_t frac;
+    struct commonNaN commonNaN;
+    uint_fast16_t uiZ64;
+    uint32_t uiZ32;
+    struct exp8_sig16 normExpSig;
+
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    zSPtr = (struct extFloat80M *) zPtr;
+    uA.f = a;
+    uiA = uA.ui;
+    sign = signF16UI( uiA );
+    exp  = expF16UI( uiA );
+    frac = fracF16UI( uiA );
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    if ( exp == 0x1F ) {
+        if ( frac ) {
+            softfloat_f16UIToCommonNaN( uiA, &commonNaN );
+            softfloat_commonNaNToExtF80M( &commonNaN, zSPtr );
+            return;
+        }
+        uiZ64 = packToExtF80UI64( sign, 0x7FFF );
+        uiZ32 = 0x80000000;
+        goto uiZ;
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    if ( ! exp ) {
+        if ( ! frac ) {
+            uiZ64 = packToExtF80UI64( sign, 0 );
+            uiZ32 = 0;
+            goto uiZ;
+        }
+        normExpSig = softfloat_normSubnormalF16Sig( frac );
+        exp = normExpSig.exp;
+        frac = normExpSig.sig;
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    uiZ64 = packToExtF80UI64( sign, exp + 0x3FF0 );
+    uiZ32 = 0x80000000 | (uint32_t) frac<<21;
+ uiZ:
+    zSPtr->signExp = uiZ64;
+    zSPtr->signif = (uint64_t) uiZ32<<32;
+
+}
+
+#endif
+
diff --git a/ext/softfloat/f16_to_f128.c b/ext/softfloat/f16_to_f128.c
index 7bd424aa1a..961cdaaf17 100644
--- a/ext/softfloat/f16_to_f128.c
+++ b/ext/softfloat/f16_to_f128.c
@@ -36,11 +36,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
-#include "softfloat.h"
+#include "internals.h"
 #include "specialize.h"
+#include "softfloat.h"
 
 float128_t f16_to_f128( float16_t a )
 {
diff --git a/ext/softfloat/f16_to_f128M.c b/ext/softfloat/f16_to_f128M.c
new file mode 100644
index 0000000000..b2c2389f05
--- /dev/null
+++ b/ext/softfloat/f16_to_f128M.c
@@ -0,0 +1,111 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014, 2015, 2017 The Regents of the University of
+California.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+#ifdef SOFTFLOAT_FAST_INT64
+
+void f16_to_f128M( float16_t a, float128_t *zPtr )
+{
+
+    *zPtr = f16_to_f128( a );
+
+}
+
+#else
+
+void f16_to_f128M( float16_t a, float128_t *zPtr )
+{
+    uint32_t *zWPtr;
+    union ui16_f16 uA;
+    uint16_t uiA;
+    bool sign;
+    int_fast8_t exp;
+    uint16_t frac;
+    struct commonNaN commonNaN;
+    uint32_t uiZ96;
+    struct exp8_sig16 normExpSig;
+
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    zWPtr = (uint32_t *) zPtr;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    uA.f = a;
+    uiA = uA.ui;
+    sign = signF16UI( uiA );
+    exp  = expF16UI( uiA );
+    frac = fracF16UI( uiA );
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    if ( exp == 0x1F ) {
+        if ( frac ) {
+            softfloat_f16UIToCommonNaN( uiA, &commonNaN );
+            softfloat_commonNaNToF128M( &commonNaN, zWPtr );
+            return;
+        }
+        uiZ96 = packToF128UI96( sign, 0x7FFF, 0 );
+        goto uiZ;
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    if ( ! exp ) {
+        if ( ! frac ) {
+            uiZ96 = packToF128UI96( sign, 0, 0 );
+            goto uiZ;
+        }
+        normExpSig = softfloat_normSubnormalF16Sig( frac );
+        exp = normExpSig.exp - 1;
+        frac = normExpSig.sig;
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    uiZ96 = packToF128UI96( sign, exp + 0x3FF0, (uint32_t) frac<<6 );
+ uiZ:
+    zWPtr[indexWord( 4, 3 )] = uiZ96;
+    zWPtr[indexWord( 4, 2 )] = 0;
+    zWPtr[indexWord( 4, 1 )] = 0;
+    zWPtr[indexWord( 4, 0 )] = 0;
+
+}
+
+#endif
+
diff --git a/ext/softfloat/f16_to_f32.c b/ext/softfloat/f16_to_f32.c
index da1600a07a..fb8b3819d1 100644
--- a/ext/softfloat/f16_to_f32.c
+++ b/ext/softfloat/f16_to_f32.c
@@ -36,11 +36,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
-#include "softfloat.h"
+#include "internals.h"
 #include "specialize.h"
+#include "softfloat.h"
 
 float32_t f16_to_f32( float16_t a )
 {
diff --git a/ext/softfloat/f16_to_f64.c b/ext/softfloat/f16_to_f64.c
index 9b5b05b30b..4ab27ba0b5 100644
--- a/ext/softfloat/f16_to_f64.c
+++ b/ext/softfloat/f16_to_f64.c
@@ -36,11 +36,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
-#include "softfloat.h"
+#include "internals.h"
 #include "specialize.h"
+#include "softfloat.h"
 
 float64_t f16_to_f64( float16_t a )
 {
diff --git a/ext/softfloat/f16_to_i16.c b/ext/softfloat/f16_to_i16.c
new file mode 100644
index 0000000000..b0fbb7cc75
--- /dev/null
+++ b/ext/softfloat/f16_to_i16.c
@@ -0,0 +1,57 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the
+University of California.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdint.h>
+#include "specialize.h"
+#include "softfloat.h"
+
+int_fast16_t f16_to_i16( float16_t a, uint_fast8_t roundingMode, bool exact )
+{
+    uint_fast8_t old_flags = softfloat_exceptionFlags;
+
+    int_fast32_t sig32 = f16_to_i32(a, roundingMode, exact);
+
+    if (sig32 > INT16_MAX) {
+        softfloat_exceptionFlags = old_flags | softfloat_flag_invalid;
+        return i16_fromPosOverflow;
+    } else if (sig32 < INT16_MIN) {
+        softfloat_exceptionFlags = old_flags | softfloat_flag_invalid;
+        return i16_fromNegOverflow;
+    } else {
+        return sig32;
+    }
+}
+
diff --git a/ext/softfloat/f16_to_i32.c b/ext/softfloat/f16_to_i32.c
index f9e7b5aaa4..24b19846d0 100644
--- a/ext/softfloat/f16_to_i32.c
+++ b/ext/softfloat/f16_to_i32.c
@@ -36,11 +36,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
-#include "softfloat.h"
+#include "internals.h"
 #include "specialize.h"
+#include "softfloat.h"
 
 int_fast32_t f16_to_i32( float16_t a, uint_fast8_t roundingMode, bool exact )
 {
diff --git a/ext/softfloat/f16_to_i32_r_minMag.c b/ext/softfloat/f16_to_i32_r_minMag.c
index 8383b0e0cf..ebb4965ca6 100644
--- a/ext/softfloat/f16_to_i32_r_minMag.c
+++ b/ext/softfloat/f16_to_i32_r_minMag.c
@@ -36,11 +36,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
-#include "softfloat.h"
+#include "internals.h"
 #include "specialize.h"
+#include "softfloat.h"
 
 int_fast32_t f16_to_i32_r_minMag( float16_t a, bool exact )
 {
diff --git a/ext/softfloat/f16_to_i64.c b/ext/softfloat/f16_to_i64.c
index 8995a67bc8..c241745640 100644
--- a/ext/softfloat/f16_to_i64.c
+++ b/ext/softfloat/f16_to_i64.c
@@ -36,11 +36,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
-#include "softfloat.h"
+#include "internals.h"
 #include "specialize.h"
+#include "softfloat.h"
 
 int_fast64_t f16_to_i64( float16_t a, uint_fast8_t roundingMode, bool exact )
 {
diff --git a/ext/softfloat/f16_to_i64_r_minMag.c b/ext/softfloat/f16_to_i64_r_minMag.c
index 56d3da89b5..dc9a8d37f2 100644
--- a/ext/softfloat/f16_to_i64_r_minMag.c
+++ b/ext/softfloat/f16_to_i64_r_minMag.c
@@ -36,11 +36,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
-#include "softfloat.h"
+#include "internals.h"
 #include "specialize.h"
+#include "softfloat.h"
 
 int_fast64_t f16_to_i64_r_minMag( float16_t a, bool exact )
 {
diff --git a/ext/softfloat/f16_to_i8.c b/ext/softfloat/f16_to_i8.c
new file mode 100644
index 0000000000..23638cc102
--- /dev/null
+++ b/ext/softfloat/f16_to_i8.c
@@ -0,0 +1,57 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the
+University of California.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdint.h>
+#include "specialize.h"
+#include "softfloat.h"
+
+int_fast8_t f16_to_i8( float16_t a, uint_fast8_t roundingMode, bool exact )
+{
+    uint_fast8_t old_flags = softfloat_exceptionFlags;
+
+    int_fast32_t sig32 = f16_to_i32(a, roundingMode, exact);
+
+    if (sig32 > INT8_MAX) {
+        softfloat_exceptionFlags = old_flags | softfloat_flag_invalid;
+        return i8_fromPosOverflow;
+    } else if (sig32 < INT8_MIN) {
+        softfloat_exceptionFlags = old_flags | softfloat_flag_invalid;
+        return i8_fromNegOverflow;
+    } else {
+        return sig32;
+    }
+}
+
diff --git a/ext/softfloat/f16_to_ui16.c b/ext/softfloat/f16_to_ui16.c
new file mode 100644
index 0000000000..81c4f8d9e0
--- /dev/null
+++ b/ext/softfloat/f16_to_ui16.c
@@ -0,0 +1,54 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the
+University of California.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdint.h>
+#include "specialize.h"
+#include "softfloat.h"
+
+uint_fast16_t f16_to_ui16( float16_t a, uint_fast8_t roundingMode, bool exact )
+{
+    uint_fast8_t old_flags = softfloat_exceptionFlags;
+
+    uint_fast32_t sig32 = f16_to_ui32(a, roundingMode, exact);
+
+    if (sig32 > UINT16_MAX) {
+        softfloat_exceptionFlags = old_flags | softfloat_flag_invalid;
+        return ui16_fromPosOverflow;
+    } else {
+        return sig32;
+    }
+}
+
diff --git a/ext/softfloat/f16_to_ui32.c b/ext/softfloat/f16_to_ui32.c
index 00ea685c8f..c99af39c67 100644
--- a/ext/softfloat/f16_to_ui32.c
+++ b/ext/softfloat/f16_to_ui32.c
@@ -36,11 +36,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
-#include "softfloat.h"
+#include "internals.h"
 #include "specialize.h"
+#include "softfloat.h"
 
 uint_fast32_t f16_to_ui32( float16_t a, uint_fast8_t roundingMode, bool exact )
 {
diff --git a/ext/softfloat/f16_to_ui32_r_minMag.c b/ext/softfloat/f16_to_ui32_r_minMag.c
index 83b6217c3a..f63e05abc6 100644
--- a/ext/softfloat/f16_to_ui32_r_minMag.c
+++ b/ext/softfloat/f16_to_ui32_r_minMag.c
@@ -36,11 +36,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
-#include "softfloat.h"
+#include "internals.h"
 #include "specialize.h"
+#include "softfloat.h"
 
 uint_fast32_t f16_to_ui32_r_minMag( float16_t a, bool exact )
 {
diff --git a/ext/softfloat/f16_to_ui64.c b/ext/softfloat/f16_to_ui64.c
index 76cdf625e7..dd260eae8b 100644
--- a/ext/softfloat/f16_to_ui64.c
+++ b/ext/softfloat/f16_to_ui64.c
@@ -36,11 +36,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
-#include "softfloat.h"
+#include "internals.h"
 #include "specialize.h"
+#include "softfloat.h"
 
 uint_fast64_t f16_to_ui64( float16_t a, uint_fast8_t roundingMode, bool exact )
 {
diff --git a/ext/softfloat/f16_to_ui64_r_minMag.c b/ext/softfloat/f16_to_ui64_r_minMag.c
index 43976b218b..a57b422928 100644
--- a/ext/softfloat/f16_to_ui64_r_minMag.c
+++ b/ext/softfloat/f16_to_ui64_r_minMag.c
@@ -36,11 +36,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
-#include "softfloat.h"
+#include "internals.h"
 #include "specialize.h"
+#include "softfloat.h"
 
 uint_fast64_t f16_to_ui64_r_minMag( float16_t a, bool exact )
 {
diff --git a/ext/softfloat/f16_to_ui8.c b/ext/softfloat/f16_to_ui8.c
new file mode 100644
index 0000000000..96124e1275
--- /dev/null
+++ b/ext/softfloat/f16_to_ui8.c
@@ -0,0 +1,54 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the
+University of California.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdint.h>
+#include "specialize.h"
+#include "softfloat.h"
+
+uint_fast8_t f16_to_ui8( float16_t a, uint_fast8_t roundingMode, bool exact )
+{
+    uint_fast8_t old_flags = softfloat_exceptionFlags;
+
+    uint_fast32_t sig32 = f16_to_ui32(a, roundingMode, exact);
+
+    if (sig32 > UINT8_MAX) {
+        softfloat_exceptionFlags = old_flags | softfloat_flag_invalid;
+        return ui8_fromPosOverflow;
+    } else {
+        return sig32;
+    }
+}
+
diff --git a/ext/softfloat/f32_add.c b/ext/softfloat/f32_add.c
index 7b564c07d0..4a51eccf68 100644
--- a/ext/softfloat/f32_add.c
+++ b/ext/softfloat/f32_add.c
@@ -36,9 +36,8 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
+#include "internals.h"
 #include "softfloat.h"
 
 float32_t f32_add( float32_t a, float32_t b )
diff --git a/ext/softfloat/f32_classify.c b/ext/softfloat/f32_classify.c
index 88957aa773..83fad878aa 100755
--- a/ext/softfloat/f32_classify.c
+++ b/ext/softfloat/f32_classify.c
@@ -1,11 +1,10 @@
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
-#include "softfloat.h"
+#include "internals.h"
 #include "specialize.h"
+#include "softfloat.h"
 
 uint_fast16_t f32_classify( float32_t a )
 {
diff --git a/ext/softfloat/f32_div.c b/ext/softfloat/f32_div.c
index ead72a0693..9d101254b1 100644
--- a/ext/softfloat/f32_div.c
+++ b/ext/softfloat/f32_div.c
@@ -36,11 +36,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
-#include "softfloat.h"
+#include "internals.h"
 #include "specialize.h"
+#include "softfloat.h"
 
 float32_t f32_div( float32_t a, float32_t b )
 {
diff --git a/ext/softfloat/f32_eq.c b/ext/softfloat/f32_eq.c
index ee7b2b9f7b..5f07eee300 100644
--- a/ext/softfloat/f32_eq.c
+++ b/ext/softfloat/f32_eq.c
@@ -36,11 +36,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
-#include "softfloat.h"
+#include "internals.h"
 #include "specialize.h"
+#include "softfloat.h"
 
 bool f32_eq( float32_t a, float32_t b )
 {
diff --git a/ext/softfloat/f32_eq_signaling.c b/ext/softfloat/f32_eq_signaling.c
index 8cf72ea661..f5fcc8242c 100644
--- a/ext/softfloat/f32_eq_signaling.c
+++ b/ext/softfloat/f32_eq_signaling.c
@@ -36,9 +36,8 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
+#include "internals.h"
 #include "softfloat.h"
 
 bool f32_eq_signaling( float32_t a, float32_t b )
diff --git a/ext/softfloat/f32_isSignalingNaN.c b/ext/softfloat/f32_isSignalingNaN.c
index c3acd941d7..5004a5aae4 100644
--- a/ext/softfloat/f32_isSignalingNaN.c
+++ b/ext/softfloat/f32_isSignalingNaN.c
@@ -35,11 +35,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 =============================================================================*/
 
 #include <stdbool.h>
-
-#include "internals.h"
 #include "platform.h"
-#include "softfloat.h"
+#include "internals.h"
 #include "specialize.h"
+#include "softfloat.h"
 
 bool f32_isSignalingNaN( float32_t a )
 {
diff --git a/ext/softfloat/f32_le.c b/ext/softfloat/f32_le.c
index 761fb64e1b..77595fbbcf 100644
--- a/ext/softfloat/f32_le.c
+++ b/ext/softfloat/f32_le.c
@@ -36,9 +36,8 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
+#include "internals.h"
 #include "softfloat.h"
 
 bool f32_le( float32_t a, float32_t b )
diff --git a/ext/softfloat/f32_le_quiet.c b/ext/softfloat/f32_le_quiet.c
index 594967faf0..1ec9101070 100644
--- a/ext/softfloat/f32_le_quiet.c
+++ b/ext/softfloat/f32_le_quiet.c
@@ -36,11 +36,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
-#include "softfloat.h"
+#include "internals.h"
 #include "specialize.h"
+#include "softfloat.h"
 
 bool f32_le_quiet( float32_t a, float32_t b )
 {
diff --git a/ext/softfloat/f32_lt.c b/ext/softfloat/f32_lt.c
index 5f41c137f8..9e12843fbd 100644
--- a/ext/softfloat/f32_lt.c
+++ b/ext/softfloat/f32_lt.c
@@ -36,9 +36,8 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
+#include "internals.h"
 #include "softfloat.h"
 
 bool f32_lt( float32_t a, float32_t b )
diff --git a/ext/softfloat/f32_lt_quiet.c b/ext/softfloat/f32_lt_quiet.c
index c14e754064..9f83b81056 100644
--- a/ext/softfloat/f32_lt_quiet.c
+++ b/ext/softfloat/f32_lt_quiet.c
@@ -36,11 +36,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
-#include "softfloat.h"
+#include "internals.h"
 #include "specialize.h"
+#include "softfloat.h"
 
 bool f32_lt_quiet( float32_t a, float32_t b )
 {
diff --git a/ext/softfloat/f32_mul.c b/ext/softfloat/f32_mul.c
index 35fbca2aa0..a2a673f1c0 100644
--- a/ext/softfloat/f32_mul.c
+++ b/ext/softfloat/f32_mul.c
@@ -36,11 +36,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
-#include "softfloat.h"
+#include "internals.h"
 #include "specialize.h"
+#include "softfloat.h"
 
 float32_t f32_mul( float32_t a, float32_t b )
 {
diff --git a/ext/softfloat/f32_mulAdd.c b/ext/softfloat/f32_mulAdd.c
index b4fcd2a28c..e98021b75b 100644
--- a/ext/softfloat/f32_mulAdd.c
+++ b/ext/softfloat/f32_mulAdd.c
@@ -35,9 +35,8 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 =============================================================================*/
 
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
+#include "internals.h"
 #include "softfloat.h"
 
 float32_t f32_mulAdd( float32_t a, float32_t b, float32_t c )
diff --git a/ext/softfloat/f32_rem.c b/ext/softfloat/f32_rem.c
index d19aec6e7f..771b1b94cb 100644
--- a/ext/softfloat/f32_rem.c
+++ b/ext/softfloat/f32_rem.c
@@ -36,11 +36,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
-#include "softfloat.h"
+#include "internals.h"
 #include "specialize.h"
+#include "softfloat.h"
 
 float32_t f32_rem( float32_t a, float32_t b )
 {
diff --git a/ext/softfloat/f32_roundToInt.c b/ext/softfloat/f32_roundToInt.c
index d4186d15bf..0861b84054 100644
--- a/ext/softfloat/f32_roundToInt.c
+++ b/ext/softfloat/f32_roundToInt.c
@@ -36,11 +36,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
-#include "softfloat.h"
+#include "internals.h"
 #include "specialize.h"
+#include "softfloat.h"
 
 float32_t f32_roundToInt( float32_t a, uint_fast8_t roundingMode, bool exact )
 {
diff --git a/ext/softfloat/f32_sqrt.c b/ext/softfloat/f32_sqrt.c
index 5d54c87902..5ef659e4f2 100644
--- a/ext/softfloat/f32_sqrt.c
+++ b/ext/softfloat/f32_sqrt.c
@@ -36,11 +36,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
-#include "softfloat.h"
+#include "internals.h"
 #include "specialize.h"
+#include "softfloat.h"
 
 float32_t f32_sqrt( float32_t a )
 {
diff --git a/ext/softfloat/f32_sub.c b/ext/softfloat/f32_sub.c
index 158b364bb0..d8307381da 100644
--- a/ext/softfloat/f32_sub.c
+++ b/ext/softfloat/f32_sub.c
@@ -36,9 +36,8 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
+#include "internals.h"
 #include "softfloat.h"
 
 float32_t f32_sub( float32_t a, float32_t b )
diff --git a/ext/softfloat/f32_to_extF80.c b/ext/softfloat/f32_to_extF80.c
new file mode 100644
index 0000000000..9aa562fe36
--- /dev/null
+++ b/ext/softfloat/f32_to_extF80.c
@@ -0,0 +1,101 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014, 2015 The Regents of the University of
+California.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+extFloat80_t f32_to_extF80( float32_t a )
+{
+    union ui32_f32 uA;
+    uint_fast32_t uiA;
+    bool sign;
+    int_fast16_t exp;
+    uint_fast32_t frac;
+    struct commonNaN commonNaN;
+    struct uint128 uiZ;
+    uint_fast16_t uiZ64;
+    uint_fast64_t uiZ0;
+    struct exp16_sig32 normExpSig;
+    union { struct extFloat80M s; extFloat80_t f; } uZ;
+
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    uA.f = a;
+    uiA = uA.ui;
+    sign = signF32UI( uiA );
+    exp  = expF32UI( uiA );
+    frac = fracF32UI( uiA );
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    if ( exp == 0xFF ) {
+        if ( frac ) {
+            softfloat_f32UIToCommonNaN( uiA, &commonNaN );
+            uiZ = softfloat_commonNaNToExtF80UI( &commonNaN );
+            uiZ64 = uiZ.v64;
+            uiZ0  = uiZ.v0;
+        } else {
+            uiZ64 = packToExtF80UI64( sign, 0x7FFF );
+            uiZ0  = UINT64_C( 0x8000000000000000 );
+        }
+        goto uiZ;
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    if ( ! exp ) {
+        if ( ! frac ) {
+            uiZ64 = packToExtF80UI64( sign, 0 );
+            uiZ0  = 0;
+            goto uiZ;
+        }
+        normExpSig = softfloat_normSubnormalF32Sig( frac );
+        exp = normExpSig.exp;
+        frac = normExpSig.sig;
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    uiZ64 = packToExtF80UI64( sign, exp + 0x3F80 );
+    uiZ0  = (uint_fast64_t) (frac | 0x00800000)<<40;
+ uiZ:
+    uZ.s.signExp = uiZ64;
+    uZ.s.signif  = uiZ0;
+    return uZ.f;
+
+}
+
diff --git a/ext/softfloat/f32_to_extF80M.c b/ext/softfloat/f32_to_extF80M.c
new file mode 100644
index 0000000000..d4c6890932
--- /dev/null
+++ b/ext/softfloat/f32_to_extF80M.c
@@ -0,0 +1,111 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014, 2015 The Regents of the University of
+California.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+#ifdef SOFTFLOAT_FAST_INT64
+
+void f32_to_extF80M( float32_t a, extFloat80_t *zPtr )
+{
+
+    *zPtr = f32_to_extF80( a );
+
+}
+
+#else
+
+void f32_to_extF80M( float32_t a, extFloat80_t *zPtr )
+{
+    struct extFloat80M *zSPtr;
+    union ui32_f32 uA;
+    uint32_t uiA;
+    bool sign;
+    int_fast16_t exp;
+    uint32_t frac;
+    struct commonNaN commonNaN;
+    uint_fast16_t uiZ64;
+    uint32_t uiZ32;
+    struct exp16_sig32 normExpSig;
+
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    zSPtr = (struct extFloat80M *) zPtr;
+    uA.f = a;
+    uiA = uA.ui;
+    sign = signF32UI( uiA );
+    exp  = expF32UI( uiA );
+    frac = fracF32UI( uiA );
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    if ( exp == 0xFF ) {
+        if ( frac ) {
+            softfloat_f32UIToCommonNaN( uiA, &commonNaN );
+            softfloat_commonNaNToExtF80M( &commonNaN, zSPtr );
+            return;
+        }
+        uiZ64 = packToExtF80UI64( sign, 0x7FFF );
+        uiZ32 = 0x80000000;
+        goto uiZ;
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    if ( ! exp ) {
+        if ( ! frac ) {
+            uiZ64 = packToExtF80UI64( sign, 0 );
+            uiZ32 = 0;
+            goto uiZ;
+        }
+        normExpSig = softfloat_normSubnormalF32Sig( frac );
+        exp = normExpSig.exp;
+        frac = normExpSig.sig;
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    uiZ64 = packToExtF80UI64( sign, exp + 0x3F80 );
+    uiZ32 = 0x80000000 | (uint32_t) frac<<8;
+ uiZ:
+    zSPtr->signExp = uiZ64;
+    zSPtr->signif = (uint64_t) uiZ32<<32;
+
+}
+
+#endif
+
diff --git a/ext/softfloat/f32_to_f128.c b/ext/softfloat/f32_to_f128.c
index 07e473f249..bf51926461 100644
--- a/ext/softfloat/f32_to_f128.c
+++ b/ext/softfloat/f32_to_f128.c
@@ -36,11 +36,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
-#include "softfloat.h"
+#include "internals.h"
 #include "specialize.h"
+#include "softfloat.h"
 
 float128_t f32_to_f128( float32_t a )
 {
diff --git a/ext/softfloat/f32_to_f128M.c b/ext/softfloat/f32_to_f128M.c
new file mode 100644
index 0000000000..4dbc667783
--- /dev/null
+++ b/ext/softfloat/f32_to_f128M.c
@@ -0,0 +1,115 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014, 2015 The Regents of the University of
+California.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+#ifdef SOFTFLOAT_FAST_INT64
+
+void f32_to_f128M( float32_t a, float128_t *zPtr )
+{
+
+    *zPtr = f32_to_f128( a );
+
+}
+
+#else
+
+void f32_to_f128M( float32_t a, float128_t *zPtr )
+{
+    uint32_t *zWPtr;
+    union ui32_f32 uA;
+    uint32_t uiA;
+    bool sign;
+    int_fast16_t exp;
+    uint32_t frac, uiZ64;
+    struct commonNaN commonNaN;
+    uint32_t uiZ96;
+    struct exp16_sig32 normExpSig;
+    uint64_t frac64;
+
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    zWPtr = (uint32_t *) zPtr;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    uA.f = a;
+    uiA = uA.ui;
+    sign = signF32UI( uiA );
+    exp  = expF32UI( uiA );
+    frac = fracF32UI( uiA );
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    uiZ64 = 0;
+    if ( exp == 0xFF ) {
+        if ( frac ) {
+            softfloat_f32UIToCommonNaN( uiA, &commonNaN );
+            softfloat_commonNaNToF128M( &commonNaN, zWPtr );
+            return;
+        }
+        uiZ96 = packToF128UI96( sign, 0x7FFF, 0 );
+        goto uiZ;
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    if ( ! exp ) {
+        if ( ! frac ) {
+            uiZ96 = packToF128UI96( sign, 0, 0 );
+            goto uiZ;
+        }
+        normExpSig = softfloat_normSubnormalF32Sig( frac );
+        exp = normExpSig.exp - 1;
+        frac = normExpSig.sig;
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    frac64 = (uint64_t) frac<<25;
+    uiZ96 = packToF128UI96( sign, exp + 0x3F80, frac64>>32 );
+    uiZ64 = frac64;
+ uiZ:
+    zWPtr[indexWord( 4, 3 )] = uiZ96;
+    zWPtr[indexWord( 4, 2 )] = uiZ64;
+    zWPtr[indexWord( 4, 1 )] = 0;
+    zWPtr[indexWord( 4, 0 )] = 0;
+
+}
+
+#endif
+
diff --git a/ext/softfloat/f32_to_f16.c b/ext/softfloat/f32_to_f16.c
index 31c733f8d5..7a97158995 100644
--- a/ext/softfloat/f32_to_f16.c
+++ b/ext/softfloat/f32_to_f16.c
@@ -36,11 +36,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
-#include "softfloat.h"
+#include "internals.h"
 #include "specialize.h"
+#include "softfloat.h"
 
 float16_t f32_to_f16( float32_t a )
 {
diff --git a/ext/softfloat/f32_to_f64.c b/ext/softfloat/f32_to_f64.c
index abb9c3deed..f9e02f2277 100644
--- a/ext/softfloat/f32_to_f64.c
+++ b/ext/softfloat/f32_to_f64.c
@@ -36,11 +36,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
-#include "softfloat.h"
+#include "internals.h"
 #include "specialize.h"
+#include "softfloat.h"
 
 float64_t f32_to_f64( float32_t a )
 {
diff --git a/ext/softfloat/f32_to_i16.c b/ext/softfloat/f32_to_i16.c
new file mode 100644
index 0000000000..bde4c76c9a
--- /dev/null
+++ b/ext/softfloat/f32_to_i16.c
@@ -0,0 +1,57 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the
+University of California.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdint.h>
+#include "specialize.h"
+#include "softfloat.h"
+
+int_fast16_t f32_to_i16( float32_t a, uint_fast8_t roundingMode, bool exact )
+{
+    uint_fast8_t old_flags = softfloat_exceptionFlags;
+
+    int_fast32_t sig32 = f32_to_i32(a, roundingMode, exact);
+
+    if (sig32 > INT16_MAX) {
+        softfloat_exceptionFlags = old_flags | softfloat_flag_invalid;
+        return i16_fromPosOverflow;
+    } else if (sig32 < INT16_MIN) {
+        softfloat_exceptionFlags = old_flags | softfloat_flag_invalid;
+        return i16_fromNegOverflow;
+    } else {
+        return sig32;
+    }
+}
+
diff --git a/ext/softfloat/f32_to_i32.c b/ext/softfloat/f32_to_i32.c
index 7cde8969e7..c9f2cf9b39 100644
--- a/ext/softfloat/f32_to_i32.c
+++ b/ext/softfloat/f32_to_i32.c
@@ -36,11 +36,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
-#include "softfloat.h"
+#include "internals.h"
 #include "specialize.h"
+#include "softfloat.h"
 
 int_fast32_t f32_to_i32( float32_t a, uint_fast8_t roundingMode, bool exact )
 {
@@ -51,6 +50,7 @@ int_fast32_t f32_to_i32( float32_t a, uint_fast8_t roundingMode, bool exact )
     uint_fast32_t sig;
     uint_fast64_t sig64;
     int_fast16_t shiftDist;
+
     /*------------------------------------------------------------------------
     *------------------------------------------------------------------------*/
     uA.f = a;
diff --git a/ext/softfloat/f32_to_i32_r_minMag.c b/ext/softfloat/f32_to_i32_r_minMag.c
index 1caaa61a71..1a94dcc680 100644
--- a/ext/softfloat/f32_to_i32_r_minMag.c
+++ b/ext/softfloat/f32_to_i32_r_minMag.c
@@ -36,11 +36,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
-#include "softfloat.h"
+#include "internals.h"
 #include "specialize.h"
+#include "softfloat.h"
 
 int_fast32_t f32_to_i32_r_minMag( float32_t a, bool exact )
 {
diff --git a/ext/softfloat/f32_to_i64.c b/ext/softfloat/f32_to_i64.c
index 5265e3a205..48ab46fd9a 100644
--- a/ext/softfloat/f32_to_i64.c
+++ b/ext/softfloat/f32_to_i64.c
@@ -36,11 +36,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
-#include "softfloat.h"
+#include "internals.h"
 #include "specialize.h"
+#include "softfloat.h"
 
 int_fast64_t f32_to_i64( float32_t a, uint_fast8_t roundingMode, bool exact )
 {
diff --git a/ext/softfloat/f32_to_i64_r_minMag.c b/ext/softfloat/f32_to_i64_r_minMag.c
index a37624d311..7d336a47bf 100644
--- a/ext/softfloat/f32_to_i64_r_minMag.c
+++ b/ext/softfloat/f32_to_i64_r_minMag.c
@@ -36,11 +36,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
-#include "softfloat.h"
+#include "internals.h"
 #include "specialize.h"
+#include "softfloat.h"
 
 int_fast64_t f32_to_i64_r_minMag( float32_t a, bool exact )
 {
diff --git a/ext/softfloat/f32_to_ui16.c b/ext/softfloat/f32_to_ui16.c
new file mode 100644
index 0000000000..073492bfaa
--- /dev/null
+++ b/ext/softfloat/f32_to_ui16.c
@@ -0,0 +1,53 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the
+University of California.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdint.h>
+#include "specialize.h"
+#include "softfloat.h"
+
+uint_fast16_t f32_to_ui16( float32_t a, uint_fast8_t roundingMode, bool exact )
+{
+    uint_fast8_t old_flags = softfloat_exceptionFlags;
+
+    uint_fast32_t sig32 = f32_to_ui32(a, roundingMode, exact);
+
+    if (sig32 > UINT16_MAX) {
+        softfloat_exceptionFlags = old_flags | softfloat_flag_invalid;
+        return ui16_fromPosOverflow;
+    } else {
+        return sig32;
+    }
+}
diff --git a/ext/softfloat/f32_to_ui32.c b/ext/softfloat/f32_to_ui32.c
index 5c994d1534..5ec279ba64 100644
--- a/ext/softfloat/f32_to_ui32.c
+++ b/ext/softfloat/f32_to_ui32.c
@@ -36,11 +36,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
-#include "softfloat.h"
+#include "internals.h"
 #include "specialize.h"
+#include "softfloat.h"
 
 uint_fast32_t f32_to_ui32( float32_t a, uint_fast8_t roundingMode, bool exact )
 {
diff --git a/ext/softfloat/f32_to_ui32_r_minMag.c b/ext/softfloat/f32_to_ui32_r_minMag.c
index d0cd9cc830..12f72619bf 100644
--- a/ext/softfloat/f32_to_ui32_r_minMag.c
+++ b/ext/softfloat/f32_to_ui32_r_minMag.c
@@ -36,11 +36,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
-#include "softfloat.h"
+#include "internals.h"
 #include "specialize.h"
+#include "softfloat.h"
 
 uint_fast32_t f32_to_ui32_r_minMag( float32_t a, bool exact )
 {
diff --git a/ext/softfloat/f32_to_ui64.c b/ext/softfloat/f32_to_ui64.c
index 74e0ed416e..11c7b4b67d 100644
--- a/ext/softfloat/f32_to_ui64.c
+++ b/ext/softfloat/f32_to_ui64.c
@@ -36,11 +36,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
-#include "softfloat.h"
+#include "internals.h"
 #include "specialize.h"
+#include "softfloat.h"
 
 uint_fast64_t f32_to_ui64( float32_t a, uint_fast8_t roundingMode, bool exact )
 {
diff --git a/ext/softfloat/f32_to_ui64_r_minMag.c b/ext/softfloat/f32_to_ui64_r_minMag.c
index 8d84af5638..f96f3e1fe6 100644
--- a/ext/softfloat/f32_to_ui64_r_minMag.c
+++ b/ext/softfloat/f32_to_ui64_r_minMag.c
@@ -36,11 +36,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
-#include "softfloat.h"
+#include "internals.h"
 #include "specialize.h"
+#include "softfloat.h"
 
 uint_fast64_t f32_to_ui64_r_minMag( float32_t a, bool exact )
 {
diff --git a/ext/softfloat/f64_add.c b/ext/softfloat/f64_add.c
index e8b72856aa..e9880ddfcd 100644
--- a/ext/softfloat/f64_add.c
+++ b/ext/softfloat/f64_add.c
@@ -36,9 +36,8 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
+#include "internals.h"
 #include "softfloat.h"
 
 float64_t f64_add( float64_t a, float64_t b )
diff --git a/ext/softfloat/f64_classify.c b/ext/softfloat/f64_classify.c
index fd19310fb6..180abde3cb 100755
--- a/ext/softfloat/f64_classify.c
+++ b/ext/softfloat/f64_classify.c
@@ -1,11 +1,10 @@
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
-#include "softfloat.h"
+#include "internals.h"
 #include "specialize.h"
+#include "softfloat.h"
 
 uint_fast16_t f64_classify( float64_t a )
 {
diff --git a/ext/softfloat/f64_div.c b/ext/softfloat/f64_div.c
index f33fe9bb22..c5a2d4fe34 100644
--- a/ext/softfloat/f64_div.c
+++ b/ext/softfloat/f64_div.c
@@ -36,11 +36,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
-#include "softfloat.h"
+#include "internals.h"
 #include "specialize.h"
+#include "softfloat.h"
 
 float64_t f64_div( float64_t a, float64_t b )
 {
diff --git a/ext/softfloat/f64_eq.c b/ext/softfloat/f64_eq.c
index 7e434343a9..ccb602a382 100644
--- a/ext/softfloat/f64_eq.c
+++ b/ext/softfloat/f64_eq.c
@@ -36,11 +36,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
-#include "softfloat.h"
+#include "internals.h"
 #include "specialize.h"
+#include "softfloat.h"
 
 bool f64_eq( float64_t a, float64_t b )
 {
diff --git a/ext/softfloat/f64_eq_signaling.c b/ext/softfloat/f64_eq_signaling.c
index feca2709dc..ee5a4414f7 100644
--- a/ext/softfloat/f64_eq_signaling.c
+++ b/ext/softfloat/f64_eq_signaling.c
@@ -36,9 +36,8 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
+#include "internals.h"
 #include "softfloat.h"
 
 bool f64_eq_signaling( float64_t a, float64_t b )
diff --git a/ext/softfloat/f64_isSignalingNaN.c b/ext/softfloat/f64_isSignalingNaN.c
index edfbed3882..f55acb4a02 100644
--- a/ext/softfloat/f64_isSignalingNaN.c
+++ b/ext/softfloat/f64_isSignalingNaN.c
@@ -35,11 +35,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 =============================================================================*/
 
 #include <stdbool.h>
-
-#include "internals.h"
 #include "platform.h"
-#include "softfloat.h"
+#include "internals.h"
 #include "specialize.h"
+#include "softfloat.h"
 
 bool f64_isSignalingNaN( float64_t a )
 {
diff --git a/ext/softfloat/f64_le.c b/ext/softfloat/f64_le.c
index 92de6d030f..91fc994abe 100644
--- a/ext/softfloat/f64_le.c
+++ b/ext/softfloat/f64_le.c
@@ -36,9 +36,8 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
+#include "internals.h"
 #include "softfloat.h"
 
 bool f64_le( float64_t a, float64_t b )
diff --git a/ext/softfloat/f64_le_quiet.c b/ext/softfloat/f64_le_quiet.c
index fcd8eca630..a5d332a5b0 100644
--- a/ext/softfloat/f64_le_quiet.c
+++ b/ext/softfloat/f64_le_quiet.c
@@ -36,11 +36,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
-#include "softfloat.h"
+#include "internals.h"
 #include "specialize.h"
+#include "softfloat.h"
 
 bool f64_le_quiet( float64_t a, float64_t b )
 {
diff --git a/ext/softfloat/f64_lt.c b/ext/softfloat/f64_lt.c
index 5b38870949..abf62fd3d5 100644
--- a/ext/softfloat/f64_lt.c
+++ b/ext/softfloat/f64_lt.c
@@ -36,9 +36,8 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
+#include "internals.h"
 #include "softfloat.h"
 
 bool f64_lt( float64_t a, float64_t b )
diff --git a/ext/softfloat/f64_lt_quiet.c b/ext/softfloat/f64_lt_quiet.c
index f3ea68113e..6531f577e6 100644
--- a/ext/softfloat/f64_lt_quiet.c
+++ b/ext/softfloat/f64_lt_quiet.c
@@ -36,11 +36,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
-#include "softfloat.h"
+#include "internals.h"
 #include "specialize.h"
+#include "softfloat.h"
 
 bool f64_lt_quiet( float64_t a, float64_t b )
 {
diff --git a/ext/softfloat/f64_mul.c b/ext/softfloat/f64_mul.c
index 3c066f38e1..86f6654587 100644
--- a/ext/softfloat/f64_mul.c
+++ b/ext/softfloat/f64_mul.c
@@ -36,11 +36,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
-#include "softfloat.h"
+#include "internals.h"
 #include "specialize.h"
+#include "softfloat.h"
 
 float64_t f64_mul( float64_t a, float64_t b )
 {
diff --git a/ext/softfloat/f64_mulAdd.c b/ext/softfloat/f64_mulAdd.c
index 483340d561..67fc44d38e 100644
--- a/ext/softfloat/f64_mulAdd.c
+++ b/ext/softfloat/f64_mulAdd.c
@@ -35,9 +35,8 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 =============================================================================*/
 
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
+#include "internals.h"
 #include "softfloat.h"
 
 float64_t f64_mulAdd( float64_t a, float64_t b, float64_t c )
diff --git a/ext/softfloat/f64_rem.c b/ext/softfloat/f64_rem.c
index 4cdacb0678..e917455421 100644
--- a/ext/softfloat/f64_rem.c
+++ b/ext/softfloat/f64_rem.c
@@ -36,11 +36,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
-#include "softfloat.h"
+#include "internals.h"
 #include "specialize.h"
+#include "softfloat.h"
 
 float64_t f64_rem( float64_t a, float64_t b )
 {
diff --git a/ext/softfloat/f64_roundToInt.c b/ext/softfloat/f64_roundToInt.c
index 74498f25e1..7f81007082 100644
--- a/ext/softfloat/f64_roundToInt.c
+++ b/ext/softfloat/f64_roundToInt.c
@@ -36,11 +36,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
-#include "softfloat.h"
+#include "internals.h"
 #include "specialize.h"
+#include "softfloat.h"
 
 float64_t f64_roundToInt( float64_t a, uint_fast8_t roundingMode, bool exact )
 {
diff --git a/ext/softfloat/f64_sqrt.c b/ext/softfloat/f64_sqrt.c
index 5c765b17cb..9a06cfad3d 100644
--- a/ext/softfloat/f64_sqrt.c
+++ b/ext/softfloat/f64_sqrt.c
@@ -36,11 +36,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
-#include "softfloat.h"
+#include "internals.h"
 #include "specialize.h"
+#include "softfloat.h"
 
 float64_t f64_sqrt( float64_t a )
 {
diff --git a/ext/softfloat/f64_sub.c b/ext/softfloat/f64_sub.c
index 429c33da18..0e990cd135 100644
--- a/ext/softfloat/f64_sub.c
+++ b/ext/softfloat/f64_sub.c
@@ -36,9 +36,8 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
+#include "internals.h"
 #include "softfloat.h"
 
 float64_t f64_sub( float64_t a, float64_t b )
diff --git a/ext/softfloat/f64_to_extF80.c b/ext/softfloat/f64_to_extF80.c
new file mode 100644
index 0000000000..c058efc660
--- /dev/null
+++ b/ext/softfloat/f64_to_extF80.c
@@ -0,0 +1,101 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014, 2015 The Regents of the University of
+California.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+extFloat80_t f64_to_extF80( float64_t a )
+{
+    union ui64_f64 uA;
+    uint_fast64_t uiA;
+    bool sign;
+    int_fast16_t exp;
+    uint_fast64_t frac;
+    struct commonNaN commonNaN;
+    struct uint128 uiZ;
+    uint_fast16_t uiZ64;
+    uint_fast64_t uiZ0;
+    struct exp16_sig64 normExpSig;
+    union { struct extFloat80M s; extFloat80_t f; } uZ;
+
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    uA.f = a;
+    uiA = uA.ui;
+    sign = signF64UI( uiA );
+    exp  = expF64UI( uiA );
+    frac = fracF64UI( uiA );
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    if ( exp == 0x7FF ) {
+        if ( frac ) {
+            softfloat_f64UIToCommonNaN( uiA, &commonNaN );
+            uiZ = softfloat_commonNaNToExtF80UI( &commonNaN );
+            uiZ64 = uiZ.v64;
+            uiZ0  = uiZ.v0;
+        } else {
+            uiZ64 = packToExtF80UI64( sign, 0x7FFF );
+            uiZ0  = UINT64_C( 0x8000000000000000 );
+        }
+        goto uiZ;
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    if ( ! exp ) {
+        if ( ! frac ) {
+            uiZ64 = packToExtF80UI64( sign, 0 );
+            uiZ0  = 0;
+            goto uiZ;
+        }
+        normExpSig = softfloat_normSubnormalF64Sig( frac );
+        exp = normExpSig.exp;
+        frac = normExpSig.sig;
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    uiZ64 = packToExtF80UI64( sign, exp + 0x3C00 );
+    uiZ0  = (frac | UINT64_C( 0x0010000000000000 ))<<11;
+ uiZ:
+    uZ.s.signExp = uiZ64;
+    uZ.s.signif  = uiZ0;
+    return uZ.f;
+
+}
+
diff --git a/ext/softfloat/f64_to_extF80M.c b/ext/softfloat/f64_to_extF80M.c
new file mode 100644
index 0000000000..041b9d19d5
--- /dev/null
+++ b/ext/softfloat/f64_to_extF80M.c
@@ -0,0 +1,111 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014, 2015 The Regents of the University of
+California.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+#ifdef SOFTFLOAT_FAST_INT64
+
+void f64_to_extF80M( float64_t a, extFloat80_t *zPtr )
+{
+
+    *zPtr = f64_to_extF80( a );
+
+}
+
+#else
+
+void f64_to_extF80M( float64_t a, extFloat80_t *zPtr )
+{
+    struct extFloat80M *zSPtr;
+    union ui64_f64 uA;
+    uint64_t uiA;
+    bool sign;
+    int_fast16_t exp;
+    uint64_t frac;
+    struct commonNaN commonNaN;
+    uint_fast16_t uiZ64;
+    uint64_t uiZ0;
+    struct exp16_sig64 normExpSig;
+
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    zSPtr = (struct extFloat80M *) zPtr;
+    uA.f = a;
+    uiA = uA.ui;
+    sign = signF64UI( uiA );
+    exp  = expF64UI( uiA );
+    frac = fracF64UI( uiA );
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    if ( exp == 0x7FF ) {
+        if ( frac ) {
+            softfloat_f64UIToCommonNaN( uiA, &commonNaN );
+            softfloat_commonNaNToExtF80M( &commonNaN, zSPtr );
+            return;
+        }
+        uiZ64 = packToExtF80UI64( sign, 0x7FFF );
+        uiZ0  = UINT64_C( 0x8000000000000000 );
+        goto uiZ;
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    if ( ! exp ) {
+        if ( ! frac ) {
+            uiZ64 = packToExtF80UI64( sign, 0 );
+            uiZ0  = 0;
+            goto uiZ;
+        }
+        normExpSig = softfloat_normSubnormalF64Sig( frac );
+        exp = normExpSig.exp;
+        frac = normExpSig.sig;
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    uiZ64 = packToExtF80UI64( sign, exp + 0x3C00 );
+    uiZ0  = UINT64_C( 0x8000000000000000 ) | frac<<11;
+ uiZ:
+    zSPtr->signExp = uiZ64;
+    zSPtr->signif  = uiZ0;
+
+}
+
+#endif
+
diff --git a/ext/softfloat/f64_to_f128.c b/ext/softfloat/f64_to_f128.c
index ca88bb4aa1..92c2d56016 100644
--- a/ext/softfloat/f64_to_f128.c
+++ b/ext/softfloat/f64_to_f128.c
@@ -36,11 +36,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
-#include "softfloat.h"
+#include "internals.h"
 #include "specialize.h"
+#include "softfloat.h"
 
 float128_t f64_to_f128( float64_t a )
 {
diff --git a/ext/softfloat/f64_to_f128M.c b/ext/softfloat/f64_to_f128M.c
new file mode 100644
index 0000000000..40dc0b1fbb
--- /dev/null
+++ b/ext/softfloat/f64_to_f128M.c
@@ -0,0 +1,117 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+#ifdef SOFTFLOAT_FAST_INT64
+
+void f64_to_f128M( float64_t a, float128_t *zPtr )
+{
+
+    *zPtr = f64_to_f128( a );
+
+}
+
+#else
+
+void f64_to_f128M( float64_t a, float128_t *zPtr )
+{
+    uint32_t *zWPtr;
+    union ui64_f64 uA;
+    uint64_t uiA;
+    bool sign;
+    int_fast16_t exp;
+    uint64_t frac;
+    struct commonNaN commonNaN;
+    uint32_t uiZ96;
+    struct exp16_sig64 normExpSig;
+
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    zWPtr = (uint32_t *) zPtr;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    uA.f = a;
+    uiA = uA.ui;
+    sign = signF64UI( uiA );
+    exp  = expF64UI( uiA );
+    frac = fracF64UI( uiA );
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    zWPtr[indexWord( 4, 0 )] = 0;
+    if ( exp == 0x7FF ) {
+        if ( frac ) {
+            softfloat_f64UIToCommonNaN( uiA, &commonNaN );
+            softfloat_commonNaNToF128M( &commonNaN, zWPtr );
+            return;
+        }
+        uiZ96 = packToF128UI96( sign, 0x7FFF, 0 );
+        goto uiZ;
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    if ( ! exp ) {
+        if ( ! frac ) {
+            uiZ96 = packToF128UI96( sign, 0, 0 );
+            goto uiZ;
+        }
+        normExpSig = softfloat_normSubnormalF64Sig( frac );
+        exp = normExpSig.exp - 1;
+        frac = normExpSig.sig;
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    zWPtr[indexWord( 4, 1 )] = (uint32_t) frac<<28;
+    frac >>= 4;
+    zWPtr[indexWordHi( 4 )] = packToF128UI96( sign, exp + 0x3C00, frac>>32 );
+    zWPtr[indexWord( 4, 2 )] = frac;
+    return;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+ uiZ:
+    zWPtr[indexWord( 4, 3 )] = uiZ96;
+    zWPtr[indexWord( 4, 2 )] = 0;
+    zWPtr[indexWord( 4, 1 )] = 0;
+
+}
+
+#endif
+
diff --git a/ext/softfloat/f64_to_f16.c b/ext/softfloat/f64_to_f16.c
index 0efddc6a18..325788c663 100644
--- a/ext/softfloat/f64_to_f16.c
+++ b/ext/softfloat/f64_to_f16.c
@@ -36,11 +36,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
-#include "softfloat.h"
+#include "internals.h"
 #include "specialize.h"
+#include "softfloat.h"
 
 float16_t f64_to_f16( float64_t a )
 {
diff --git a/ext/softfloat/f64_to_f32.c b/ext/softfloat/f64_to_f32.c
index 578c9f92a1..99b13dda47 100644
--- a/ext/softfloat/f64_to_f32.c
+++ b/ext/softfloat/f64_to_f32.c
@@ -36,11 +36,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
-#include "softfloat.h"
+#include "internals.h"
 #include "specialize.h"
+#include "softfloat.h"
 
 float32_t f64_to_f32( float64_t a )
 {
diff --git a/ext/softfloat/f64_to_i32.c b/ext/softfloat/f64_to_i32.c
index f4f12cd2ad..8712c0ac56 100644
--- a/ext/softfloat/f64_to_i32.c
+++ b/ext/softfloat/f64_to_i32.c
@@ -36,11 +36,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
-#include "softfloat.h"
+#include "internals.h"
 #include "specialize.h"
+#include "softfloat.h"
 
 int_fast32_t f64_to_i32( float64_t a, uint_fast8_t roundingMode, bool exact )
 {
diff --git a/ext/softfloat/f64_to_i32_r_minMag.c b/ext/softfloat/f64_to_i32_r_minMag.c
index fd336cf370..b7e1e03054 100644
--- a/ext/softfloat/f64_to_i32_r_minMag.c
+++ b/ext/softfloat/f64_to_i32_r_minMag.c
@@ -36,11 +36,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
-#include "softfloat.h"
+#include "internals.h"
 #include "specialize.h"
+#include "softfloat.h"
 
 int_fast32_t f64_to_i32_r_minMag( float64_t a, bool exact )
 {
diff --git a/ext/softfloat/f64_to_i64.c b/ext/softfloat/f64_to_i64.c
index ac8a9785e7..4f48843c0a 100644
--- a/ext/softfloat/f64_to_i64.c
+++ b/ext/softfloat/f64_to_i64.c
@@ -36,11 +36,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
-#include "softfloat.h"
+#include "internals.h"
 #include "specialize.h"
+#include "softfloat.h"
 
 int_fast64_t f64_to_i64( float64_t a, uint_fast8_t roundingMode, bool exact )
 {
diff --git a/ext/softfloat/f64_to_i64_r_minMag.c b/ext/softfloat/f64_to_i64_r_minMag.c
index 0b4e5718fa..3822606db3 100644
--- a/ext/softfloat/f64_to_i64_r_minMag.c
+++ b/ext/softfloat/f64_to_i64_r_minMag.c
@@ -36,11 +36,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
-#include "softfloat.h"
+#include "internals.h"
 #include "specialize.h"
+#include "softfloat.h"
 
 int_fast64_t f64_to_i64_r_minMag( float64_t a, bool exact )
 {
diff --git a/ext/softfloat/f64_to_ui32.c b/ext/softfloat/f64_to_ui32.c
index 7f705963cf..67e4d05881 100644
--- a/ext/softfloat/f64_to_ui32.c
+++ b/ext/softfloat/f64_to_ui32.c
@@ -36,11 +36,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
-#include "softfloat.h"
+#include "internals.h"
 #include "specialize.h"
+#include "softfloat.h"
 
 uint_fast32_t f64_to_ui32( float64_t a, uint_fast8_t roundingMode, bool exact )
 {
diff --git a/ext/softfloat/f64_to_ui32_r_minMag.c b/ext/softfloat/f64_to_ui32_r_minMag.c
index bf10a85623..11f0b0501e 100644
--- a/ext/softfloat/f64_to_ui32_r_minMag.c
+++ b/ext/softfloat/f64_to_ui32_r_minMag.c
@@ -36,11 +36,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
-#include "softfloat.h"
+#include "internals.h"
 #include "specialize.h"
+#include "softfloat.h"
 
 uint_fast32_t f64_to_ui32_r_minMag( float64_t a, bool exact )
 {
diff --git a/ext/softfloat/f64_to_ui64.c b/ext/softfloat/f64_to_ui64.c
index 3898df1d8b..e92d605e52 100644
--- a/ext/softfloat/f64_to_ui64.c
+++ b/ext/softfloat/f64_to_ui64.c
@@ -36,11 +36,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
-#include "softfloat.h"
+#include "internals.h"
 #include "specialize.h"
+#include "softfloat.h"
 
 uint_fast64_t f64_to_ui64( float64_t a, uint_fast8_t roundingMode, bool exact )
 {
diff --git a/ext/softfloat/f64_to_ui64_r_minMag.c b/ext/softfloat/f64_to_ui64_r_minMag.c
index fee3d0ca96..25918c486d 100644
--- a/ext/softfloat/f64_to_ui64_r_minMag.c
+++ b/ext/softfloat/f64_to_ui64_r_minMag.c
@@ -36,11 +36,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
-#include "softfloat.h"
+#include "internals.h"
 #include "specialize.h"
+#include "softfloat.h"
 
 uint_fast64_t f64_to_ui64_r_minMag( float64_t a, bool exact )
 {
diff --git a/ext/softfloat/fall_maxmin.c b/ext/softfloat/fall_maxmin.c
new file mode 100644
index 0000000000..32a9ade59e
--- /dev/null
+++ b/ext/softfloat/fall_maxmin.c
@@ -0,0 +1,81 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of
+California.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdio.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+#define COMPARE_MAX(a, b, bits) \
+float ## bits ## _t f ## bits ## _max( float ## bits ## _t a, float ## bits ## _t b )          \
+{                                                                                              \
+    bool greater = f ## bits ## _lt_quiet(b, a) ||                                             \
+               (f ## bits ## _eq(b, a) && signF ## bits ## UI(b.v));                           \
+                                                                                               \
+    if (isNaNF ## bits ## UI(a.v) && isNaNF ## bits ## UI(b.v)) {                              \
+        union ui ## bits ## _f ## bits  ui;                                                    \
+        ui.ui = defaultNaNF ## bits ## UI;                                                     \
+        return ui.f;                                                                           \
+    } else {                                                                                   \
+        return greater || isNaNF ## bits ## UI((b).v) ? a : b;                                 \
+    }                                                                                          \
+}
+
+#define COMPARE_MIN(a, b, bits) \
+float ## bits ## _t f ## bits ## _min( float ## bits ## _t a, float ## bits ## _t b )          \
+{                                                                                              \
+    bool less = f ## bits ## _lt_quiet(a, b) ||                                                \
+               (f ## bits ## _eq(a, b) && signF ## bits ## UI(a.v));                           \
+                                                                                               \
+    if (isNaNF ## bits ## UI(a.v) && isNaNF ## bits ## UI(b.v)) {                              \
+        union ui ## bits ## _f ## bits  ui;                                                    \
+        ui.ui = defaultNaNF ## bits ## UI;                                                     \
+        return ui.f;                                                                           \
+    } else {                                                                                   \
+        return less || isNaNF ## bits ## UI((b).v) ? a : b;                                    \
+    }                                                                                          \
+}
+
+COMPARE_MAX(a, b, 16);
+COMPARE_MAX(a, b, 32);
+COMPARE_MAX(a, b, 64);
+
+COMPARE_MIN(a, b, 16);
+COMPARE_MIN(a, b, 32);
+COMPARE_MIN(a, b, 64);
diff --git a/ext/softfloat/fall_reciprocal.c b/ext/softfloat/fall_reciprocal.c
new file mode 100644
index 0000000000..1c96458935
--- /dev/null
+++ b/ext/softfloat/fall_reciprocal.c
@@ -0,0 +1,392 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of
+California.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <assert.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+static inline uint64_t extract64(uint64_t val, int pos, int len)
+{
+  assert(pos >= 0 && len > 0 && len <= 64 - pos);
+  return (val >> pos) & (~UINT64_C(0) >> (64 - len));
+}
+
+static inline uint64_t make_mask64(int pos, int len)
+{
+    assert(pos >= 0 && len > 0 && pos < 64 && len <= 64);
+    return (UINT64_MAX >> (64 - len)) << pos;
+}
+
+//user needs to truncate output to required length
+static inline uint64_t rsqrte7(uint64_t val, int e, int s, bool sub) {
+  uint64_t exp = extract64(val, s, e);
+  uint64_t sig = extract64(val, 0, s);
+  uint64_t sign = extract64(val, s + e, 1);
+  const int p = 7;
+
+  static const uint8_t table[] = {
+      52, 51, 50, 48, 47, 46, 44, 43,
+      42, 41, 40, 39, 38, 36, 35, 34,
+      33, 32, 31, 30, 30, 29, 28, 27,
+      26, 25, 24, 23, 23, 22, 21, 20,
+      19, 19, 18, 17, 16, 16, 15, 14,
+      14, 13, 12, 12, 11, 10, 10, 9,
+      9, 8, 7, 7, 6, 6, 5, 4,
+      4, 3, 3, 2, 2, 1, 1, 0,
+      127, 125, 123, 121, 119, 118, 116, 114,
+      113, 111, 109, 108, 106, 105, 103, 102,
+      100, 99, 97, 96, 95, 93, 92, 91,
+      90, 88, 87, 86, 85, 84, 83, 82,
+      80, 79, 78, 77, 76, 75, 74, 73,
+      72, 71, 70, 70, 69, 68, 67, 66,
+      65, 64, 63, 63, 62, 61, 60, 59,
+      59, 58, 57, 56, 56, 55, 54, 53};
+
+  if (sub) {
+      while (extract64(sig, s - 1, 1) == 0)
+          exp--, sig <<= 1;
+
+      sig = (sig << 1) & make_mask64(0 ,s);
+  }
+
+  int idx = ((exp & 1) << (p-1)) | (sig >> (s-p+1));
+  uint64_t out_sig = (uint64_t)(table[idx]) << (s-p);
+  uint64_t out_exp = (3 * make_mask64(0, e - 1) + ~exp) / 2;
+
+  return (sign << (s+e)) | (out_exp << s) | out_sig;
+}
+
+float16_t f16_rsqrte7(float16_t in)
+{
+    union ui16_f16 uA;
+
+    uA.f = in;
+    unsigned int ret = f16_classify(in);
+    bool sub = false;
+    switch(ret) {
+    case 0x001: // -inf
+    case 0x002: // -normal
+    case 0x004: // -subnormal
+    case 0x100: // sNaN
+        softfloat_exceptionFlags |= softfloat_flag_invalid;
+    case 0x200: //qNaN
+        uA.ui = defaultNaNF16UI;
+        break;
+    case 0x008: // -0
+        uA.ui = 0xfc00;
+        softfloat_exceptionFlags |= softfloat_flag_infinite;
+        break;
+    case 0x010: // +0
+        uA.ui = 0x7c00;
+        softfloat_exceptionFlags |= softfloat_flag_infinite;
+        break;
+    case 0x080: //+inf
+        uA.ui = 0x0;
+        break;
+    case 0x020: //+ sub
+        sub = true;
+    default: // +num
+        uA.ui = rsqrte7(uA.ui, 5, 10, sub);
+        break;
+    }
+
+    return uA.f;
+}
+
+float32_t f32_rsqrte7(float32_t in)
+{
+    union ui32_f32 uA;
+
+    uA.f = in;
+    unsigned int ret = f32_classify(in);
+    bool sub = false;
+    switch(ret) {
+    case 0x001: // -inf
+    case 0x002: // -normal
+    case 0x004: // -subnormal
+    case 0x100: // sNaN
+        softfloat_exceptionFlags |= softfloat_flag_invalid;
+    case 0x200: //qNaN
+        uA.ui = defaultNaNF32UI;
+        break;
+    case 0x008: // -0
+        uA.ui = 0xff800000;
+        softfloat_exceptionFlags |= softfloat_flag_infinite;
+        break;
+    case 0x010: // +0
+        uA.ui = 0x7f800000;
+        softfloat_exceptionFlags |= softfloat_flag_infinite;
+        break;
+    case 0x080: //+inf
+        uA.ui = 0x0;
+        break;
+    case 0x020: //+ sub
+        sub = true;
+    default: // +num
+        uA.ui = rsqrte7(uA.ui, 8, 23, sub);
+        break;
+    }
+
+    return uA.f;
+}
+
+float64_t f64_rsqrte7(float64_t in)
+{
+    union ui64_f64 uA;
+
+    uA.f = in;
+    unsigned int ret = f64_classify(in);
+    bool sub = false;
+    switch(ret) {
+    case 0x001: // -inf
+    case 0x002: // -normal
+    case 0x004: // -subnormal
+    case 0x100: // sNaN
+        softfloat_exceptionFlags |= softfloat_flag_invalid;
+    case 0x200: //qNaN
+        uA.ui = defaultNaNF64UI;
+        break;
+    case 0x008: // -0
+        uA.ui = 0xfff0000000000000ul;
+        softfloat_exceptionFlags |= softfloat_flag_infinite;
+        break;
+    case 0x010: // +0
+        uA.ui = 0x7ff0000000000000ul;
+        softfloat_exceptionFlags |= softfloat_flag_infinite;
+        break;
+    case 0x080: //+inf
+        uA.ui = 0x0;
+        break;
+    case 0x020: //+ sub
+        sub = true;
+    default: // +num
+        uA.ui = rsqrte7(uA.ui, 11, 52, sub);
+        break;
+    }
+
+    return uA.f;
+}
+
+//user needs to truncate output to required length
+static inline uint64_t recip7(uint64_t val, int e, int s, int rm, bool sub,
+                              bool *round_abnormal)
+{
+    uint64_t exp = extract64(val, s, e);
+    uint64_t sig = extract64(val, 0, s);
+    uint64_t sign = extract64(val, s + e, 1);
+    const int p = 7;
+
+    static const uint8_t table[] = {
+        127, 125, 123, 121, 119, 117, 116, 114,
+        112, 110, 109, 107, 105, 104, 102, 100,
+        99, 97, 96, 94, 93, 91, 90, 88,
+        87, 85, 84, 83, 81, 80, 79, 77,
+        76, 75, 74, 72, 71, 70, 69, 68,
+        66, 65, 64, 63, 62, 61, 60, 59,
+        58, 57, 56, 55, 54, 53, 52, 51,
+        50, 49, 48, 47, 46, 45, 44, 43,
+        42, 41, 40, 40, 39, 38, 37, 36,
+        35, 35, 34, 33, 32, 31, 31, 30,
+        29, 28, 28, 27, 26, 25, 25, 24,
+        23, 23, 22, 21, 21, 20, 19, 19,
+        18, 17, 17, 16, 15, 15, 14, 14,
+        13, 12, 12, 11, 11, 10, 9, 9,
+        8, 8, 7, 7, 6, 5, 5, 4,
+        4, 3, 3, 2, 2, 1, 1, 0};
+
+    if (sub) {
+        while (extract64(sig, s - 1, 1) == 0)
+            exp--, sig <<= 1;
+
+        sig = (sig << 1) & make_mask64(0 ,s);
+
+        if (exp != 0 && exp != UINT64_MAX) {
+            *round_abnormal = true;
+            if (rm == 1 ||
+                (rm == 2 && !sign) ||
+                (rm == 3 && sign))
+                return ((sign << (s+e)) | make_mask64(s, e)) - 1;
+            else
+                return (sign << (s+e)) | make_mask64(s, e);
+        }
+    }
+
+    int idx = sig >> (s-p);
+    uint64_t out_sig = (uint64_t)(table[idx]) << (s-p);
+    uint64_t out_exp = 2 * make_mask64(0, e - 1) + ~exp;
+    if (out_exp == 0 || out_exp == UINT64_MAX) {
+        out_sig = (out_sig >> 1) | make_mask64(s - 1, 1);
+        if (out_exp == UINT64_MAX) {
+            out_sig >>= 1;
+            out_exp = 0;
+        }
+    }
+
+    return (sign << (s+e)) | (out_exp << s) | out_sig;
+}
+
+float16_t f16_recip7(float16_t in)
+{
+    union ui16_f16 uA;
+
+    uA.f = in;
+    unsigned int ret = f16_classify(in);
+    bool sub = false;
+    bool round_abnormal = false;
+    switch(ret) {
+    case 0x001: // -inf
+        uA.ui = 0x8000;
+        break;
+    case 0x080: //+inf
+        uA.ui = 0x0;
+        break;
+    case 0x008: // -0
+        uA.ui = 0xfc00;
+        softfloat_exceptionFlags |= softfloat_flag_infinite;
+        break;
+    case 0x010: // +0
+        uA.ui = 0x7c00;
+        softfloat_exceptionFlags |= softfloat_flag_infinite;
+        break;
+    case 0x100: // sNaN
+        softfloat_exceptionFlags |= softfloat_flag_invalid;
+    case 0x200: //qNaN
+        uA.ui = defaultNaNF16UI;
+        break;
+    case 0x004: // -subnormal
+    case 0x020: //+ sub
+        sub = true;
+    default: // +- normal
+        uA.ui = recip7(uA.ui, 5, 10,
+                       softfloat_roundingMode, sub, &round_abnormal);
+        if (round_abnormal)
+            softfloat_exceptionFlags |= softfloat_flag_inexact |
+                                        softfloat_flag_overflow;
+        break;
+    }
+
+    return uA.f;
+}
+
+float32_t f32_recip7(float32_t in)
+{
+    union ui32_f32 uA;
+
+    uA.f = in;
+    unsigned int ret = f32_classify(in);
+    bool sub = false;
+    bool round_abnormal = false;
+    switch(ret) {
+    case 0x001: // -inf
+        uA.ui = 0x80000000;
+        break;
+    case 0x080: //+inf
+        uA.ui = 0x0;
+        break;
+    case 0x008: // -0
+        uA.ui = 0xff800000;
+        softfloat_exceptionFlags |= softfloat_flag_infinite;
+        break;
+    case 0x010: // +0
+        uA.ui = 0x7f800000;
+        softfloat_exceptionFlags |= softfloat_flag_infinite;
+        break;
+    case 0x100: // sNaN
+        softfloat_exceptionFlags |= softfloat_flag_invalid;
+    case 0x200: //qNaN
+        uA.ui = defaultNaNF32UI;
+        break;
+    case 0x004: // -subnormal
+    case 0x020: //+ sub
+        sub = true;
+    default: // +- normal
+        uA.ui = recip7(uA.ui, 8, 23,
+                       softfloat_roundingMode, sub, &round_abnormal);
+        if (round_abnormal)
+          softfloat_exceptionFlags |= softfloat_flag_inexact |
+                                      softfloat_flag_overflow;
+        break;
+    }
+
+    return uA.f;
+}
+
+float64_t f64_recip7(float64_t in)
+{
+    union ui64_f64 uA;
+
+    uA.f = in;
+    unsigned int ret = f64_classify(in);
+    bool sub = false;
+    bool round_abnormal = false;
+    switch(ret) {
+    case 0x001: // -inf
+        uA.ui = 0x8000000000000000;
+        break;
+    case 0x080: //+inf
+        uA.ui = 0x0;
+        break;
+    case 0x008: // -0
+        uA.ui = 0xfff0000000000000;
+        softfloat_exceptionFlags |= softfloat_flag_infinite;
+        break;
+    case 0x010: // +0
+        uA.ui = 0x7ff0000000000000;
+        softfloat_exceptionFlags |= softfloat_flag_infinite;
+        break;
+    case 0x100: // sNaN
+        softfloat_exceptionFlags |= softfloat_flag_invalid;
+    case 0x200: //qNaN
+        uA.ui = defaultNaNF64UI;
+        break;
+    case 0x004: // -subnormal
+    case 0x020: //+ sub
+        sub = true;
+    default: // +- normal
+        uA.ui = recip7(uA.ui, 11, 52,
+                       softfloat_roundingMode, sub, &round_abnormal);
+        if (round_abnormal)
+            softfloat_exceptionFlags |= softfloat_flag_inexact |
+                                        softfloat_flag_overflow;
+        break;
+    }
+
+    return uA.f;
+}
diff --git a/ext/softfloat/i32_to_extF80.c b/ext/softfloat/i32_to_extF80.c
new file mode 100644
index 0000000000..5bfd7c1dd2
--- /dev/null
+++ b/ext/softfloat/i32_to_extF80.c
@@ -0,0 +1,65 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of
+California.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "softfloat.h"
+
+extFloat80_t i32_to_extF80( int32_t a )
+{
+    uint_fast16_t uiZ64;
+    uint_fast32_t absA;
+    bool sign;
+    int_fast8_t shiftDist;
+    union { struct extFloat80M s; extFloat80_t f; } uZ;
+
+    uiZ64 = 0;
+    absA = 0;
+    if ( a ) {
+        sign = (a < 0);
+        absA = sign ? -(uint_fast32_t) a : (uint_fast32_t) a;
+        shiftDist = softfloat_countLeadingZeros32( absA );
+        uiZ64 = packToExtF80UI64( sign, 0x401E - shiftDist );
+        absA <<= shiftDist;
+    }
+    uZ.s.signExp = uiZ64;
+    uZ.s.signif = (uint_fast64_t) absA<<32;
+    return uZ.f;
+
+}
+
diff --git a/ext/softfloat/i32_to_extF80M.c b/ext/softfloat/i32_to_extF80M.c
new file mode 100644
index 0000000000..c2bd80c1e6
--- /dev/null
+++ b/ext/softfloat/i32_to_extF80M.c
@@ -0,0 +1,78 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of
+California.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "softfloat.h"
+
+#ifdef SOFTFLOAT_FAST_INT64
+
+void i32_to_extF80M( int32_t a, extFloat80_t *zPtr )
+{
+
+    *zPtr = i32_to_extF80( a );
+
+}
+
+#else
+
+void i32_to_extF80M( int32_t a, extFloat80_t *zPtr )
+{
+    struct extFloat80M *zSPtr;
+    uint_fast16_t uiZ64;
+    uint64_t sigZ;
+    bool sign;
+    uint32_t absA;
+    int_fast8_t shiftDist;
+
+    zSPtr = (struct extFloat80M *) zPtr;
+    uiZ64 = 0;
+    sigZ = 0;
+    if ( a ) {
+        sign = (a < 0);
+        absA = sign ? -(uint32_t) a : (uint32_t) a;
+        shiftDist = softfloat_countLeadingZeros32( absA );
+        uiZ64 = packToExtF80UI64( sign, 0x401E - shiftDist );
+        sigZ = (uint64_t) (absA<<shiftDist)<<32;
+    }
+    zSPtr->signExp = uiZ64;
+    zSPtr->signif = sigZ;
+
+}
+
+#endif
+
diff --git a/ext/softfloat/i32_to_f128.c b/ext/softfloat/i32_to_f128.c
index a369429c93..af7268aede 100644
--- a/ext/softfloat/i32_to_f128.c
+++ b/ext/softfloat/i32_to_f128.c
@@ -35,9 +35,8 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 =============================================================================*/
 
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
+#include "internals.h"
 #include "softfloat.h"
 
 float128_t i32_to_f128( int32_t a )
diff --git a/ext/softfloat/i32_to_f128M.c b/ext/softfloat/i32_to_f128M.c
new file mode 100644
index 0000000000..6e23a6bf78
--- /dev/null
+++ b/ext/softfloat/i32_to_f128M.c
@@ -0,0 +1,81 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of
+California.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "softfloat.h"
+
+#ifdef SOFTFLOAT_FAST_INT64
+
+void i32_to_f128M( int32_t a, float128_t *zPtr )
+{
+
+    *zPtr = i32_to_f128( a );
+
+}
+
+#else
+
+void i32_to_f128M( int32_t a, float128_t *zPtr )
+{
+    uint32_t *zWPtr;
+    uint32_t uiZ96, uiZ64;
+    bool sign;
+    uint32_t absA;
+    int_fast8_t shiftDist;
+    uint64_t normAbsA;
+
+    zWPtr = (uint32_t *) zPtr;
+    uiZ96 = 0;
+    uiZ64 = 0;
+    if ( a ) {
+        sign = (a < 0);
+        absA = sign ? -(uint32_t) a : (uint32_t) a;
+        shiftDist = softfloat_countLeadingZeros32( absA ) + 17;
+        normAbsA = (uint64_t) absA<<shiftDist;
+        uiZ96 = packToF128UI96( sign, 0x402E - shiftDist, normAbsA>>32 );
+        uiZ64 = normAbsA;
+    }
+    zWPtr[indexWord( 4, 3 )] = uiZ96;
+    zWPtr[indexWord( 4, 2 )] = uiZ64;
+    zWPtr[indexWord( 4, 1 )] = 0;
+    zWPtr[indexWord( 4, 0 )] = 0;
+
+}
+
+#endif
+
diff --git a/ext/softfloat/i32_to_f16.c b/ext/softfloat/i32_to_f16.c
index b3aa50eecc..7442972812 100644
--- a/ext/softfloat/i32_to_f16.c
+++ b/ext/softfloat/i32_to_f16.c
@@ -36,9 +36,8 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
+#include "internals.h"
 #include "softfloat.h"
 
 float16_t i32_to_f16( int32_t a )
diff --git a/ext/softfloat/i32_to_f32.c b/ext/softfloat/i32_to_f32.c
index b821727481..b1aedbacf9 100644
--- a/ext/softfloat/i32_to_f32.c
+++ b/ext/softfloat/i32_to_f32.c
@@ -36,9 +36,8 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
+#include "internals.h"
 #include "softfloat.h"
 
 float32_t i32_to_f32( int32_t a )
diff --git a/ext/softfloat/i32_to_f64.c b/ext/softfloat/i32_to_f64.c
index 381378347b..d3901eb4af 100644
--- a/ext/softfloat/i32_to_f64.c
+++ b/ext/softfloat/i32_to_f64.c
@@ -36,9 +36,8 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
+#include "internals.h"
 #include "softfloat.h"
 
 float64_t i32_to_f64( int32_t a )
diff --git a/ext/softfloat/i64_to_extF80.c b/ext/softfloat/i64_to_extF80.c
new file mode 100644
index 0000000000..93aaa35f38
--- /dev/null
+++ b/ext/softfloat/i64_to_extF80.c
@@ -0,0 +1,65 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of
+California.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "softfloat.h"
+
+extFloat80_t i64_to_extF80( int64_t a )
+{
+    uint_fast16_t uiZ64;
+    uint_fast64_t absA;
+    bool sign;
+    int_fast8_t shiftDist;
+    union { struct extFloat80M s; extFloat80_t f; } uZ;
+
+    uiZ64 = 0;
+    absA = 0;
+    if ( a ) {
+        sign = (a < 0);
+        absA = sign ? -(uint_fast64_t) a : (uint_fast64_t) a;
+        shiftDist = softfloat_countLeadingZeros64( absA );
+        uiZ64 = packToExtF80UI64( sign, 0x403E - shiftDist );
+        absA <<= shiftDist;
+    }
+    uZ.s.signExp = uiZ64;
+    uZ.s.signif  = absA;
+    return uZ.f;
+
+}
+
diff --git a/ext/softfloat/i64_to_extF80M.c b/ext/softfloat/i64_to_extF80M.c
new file mode 100644
index 0000000000..976f1e4f68
--- /dev/null
+++ b/ext/softfloat/i64_to_extF80M.c
@@ -0,0 +1,78 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of
+California.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "softfloat.h"
+
+#ifdef SOFTFLOAT_FAST_INT64
+
+void i64_to_extF80M( int64_t a, extFloat80_t *zPtr )
+{
+
+    *zPtr = i64_to_extF80( a );
+
+}
+
+#else
+
+void i64_to_extF80M( int64_t a, extFloat80_t *zPtr )
+{
+    struct extFloat80M *zSPtr;
+    uint_fast16_t uiZ64;
+    uint64_t sigZ;
+    bool sign;
+    uint64_t absA;
+    int_fast8_t shiftDist;
+
+    zSPtr = (struct extFloat80M *) zPtr;
+    uiZ64 = 0;
+    sigZ = 0;
+    if ( a ) {
+        sign = (a < 0);
+        absA = sign ? -(uint64_t) a : (uint64_t) a;
+        shiftDist = softfloat_countLeadingZeros64( absA );
+        uiZ64 = packToExtF80UI64( sign, 0x403E - shiftDist );
+        sigZ = absA<<shiftDist;
+    }
+    zSPtr->signExp = uiZ64;
+    zSPtr->signif = sigZ;
+
+}
+
+#endif
+
diff --git a/ext/softfloat/i64_to_f128.c b/ext/softfloat/i64_to_f128.c
index f2c660d3d3..8eddbae2d7 100644
--- a/ext/softfloat/i64_to_f128.c
+++ b/ext/softfloat/i64_to_f128.c
@@ -35,9 +35,8 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 =============================================================================*/
 
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
+#include "internals.h"
 #include "softfloat.h"
 
 float128_t i64_to_f128( int64_t a )
diff --git a/ext/softfloat/i64_to_f128M.c b/ext/softfloat/i64_to_f128M.c
new file mode 100644
index 0000000000..61d60074e6
--- /dev/null
+++ b/ext/softfloat/i64_to_f128M.c
@@ -0,0 +1,92 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of
+California.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "softfloat.h"
+
+#ifdef SOFTFLOAT_FAST_INT64
+
+void i64_to_f128M( int64_t a, float128_t *zPtr )
+{
+
+    *zPtr = i64_to_f128( a );
+
+}
+
+#else
+
+void i64_to_f128M( int64_t a, float128_t *zPtr )
+{
+    uint32_t *zWPtr;
+    uint32_t uiZ96, uiZ64;
+    bool sign;
+    uint64_t absA;
+    uint_fast8_t shiftDist;
+    uint32_t *ptr;
+
+    zWPtr = (uint32_t *) zPtr;
+    uiZ96 = 0;
+    uiZ64 = 0;
+    zWPtr[indexWord( 4, 1 )] = 0;
+    zWPtr[indexWord( 4, 0 )] = 0;
+    if ( a ) {
+        sign = (a < 0);
+        absA = sign ? -(uint64_t) a : (uint64_t) a;
+        shiftDist = softfloat_countLeadingZeros64( absA ) + 17;
+        if ( shiftDist < 32 ) {
+            ptr = zWPtr + indexMultiwordHi( 4, 3 );
+            ptr[indexWord( 3, 2 )] = 0;
+            ptr[indexWord( 3, 1 )] = absA>>32;
+            ptr[indexWord( 3, 0 )] = absA;
+            softfloat_shortShiftLeft96M( ptr, shiftDist, ptr );
+            ptr[indexWordHi( 3 )] =
+                packToF128UI96(
+                    sign, 0x404E - shiftDist, ptr[indexWordHi( 3 )] );
+            return;
+        }
+        absA <<= shiftDist - 32;
+        uiZ96 = packToF128UI96( sign, 0x404E - shiftDist, absA>>32 );
+        uiZ64 = absA;
+    }
+    zWPtr[indexWord( 4, 3 )] = uiZ96;
+    zWPtr[indexWord( 4, 2 )] = uiZ64;
+
+}
+
+#endif
+
diff --git a/ext/softfloat/i64_to_f16.c b/ext/softfloat/i64_to_f16.c
index 6c2709e727..56f0191287 100644
--- a/ext/softfloat/i64_to_f16.c
+++ b/ext/softfloat/i64_to_f16.c
@@ -36,9 +36,8 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
+#include "internals.h"
 #include "softfloat.h"
 
 float16_t i64_to_f16( int64_t a )
diff --git a/ext/softfloat/i64_to_f32.c b/ext/softfloat/i64_to_f32.c
index ee47d9d71a..e8a3494cc9 100644
--- a/ext/softfloat/i64_to_f32.c
+++ b/ext/softfloat/i64_to_f32.c
@@ -36,9 +36,8 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
+#include "internals.h"
 #include "softfloat.h"
 
 float32_t i64_to_f32( int64_t a )
diff --git a/ext/softfloat/i64_to_f64.c b/ext/softfloat/i64_to_f64.c
index b78687845f..392fef9dcd 100644
--- a/ext/softfloat/i64_to_f64.c
+++ b/ext/softfloat/i64_to_f64.c
@@ -36,9 +36,8 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
+#include "internals.h"
 #include "softfloat.h"
 
 float64_t i64_to_f64( int64_t a )
diff --git a/ext/softfloat/platform.h b/ext/softfloat/platform.h
index 03dd429faf..9b329914fb 100644
--- a/ext/softfloat/platform.h
+++ b/ext/softfloat/platform.h
@@ -39,8 +39,12 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #define LITTLEENDIAN 1
 
 #define INLINE_LEVEL 5
-#define SOFTFLOAT_FAST_INT64
+
+// This macro is defined in SConscript
+// #define SOFTFLOAT_FAST_INT64
+
 #define SOFTFLOAT_FAST_DIV64TO32
+#define SOFTFLOAT_ROUND_ODD
 
 /*----------------------------------------------------------------------------
 *----------------------------------------------------------------------------*/
diff --git a/ext/softfloat/primitiveTypes.h b/ext/softfloat/primitiveTypes.h
index c1c27124aa..b112049193 100644
--- a/ext/softfloat/primitiveTypes.h
+++ b/ext/softfloat/primitiveTypes.h
@@ -38,7 +38,6 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #define primitiveTypes_h 1
 
 #include <stdint.h>
-
 #include "platform.h"
 
 #ifdef SOFTFLOAT_FAST_INT64
diff --git a/ext/softfloat/primitives.h b/ext/softfloat/primitives.h
index eb200d8214..1acc8a8ae1 100644
--- a/ext/softfloat/primitives.h
+++ b/ext/softfloat/primitives.h
@@ -39,7 +39,6 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
 #include "primitiveTypes.h"
 
 #ifdef __cplusplus
diff --git a/ext/softfloat/s_add128.c b/ext/softfloat/s_add128.c
index a8a24b76be..8065656a76 100644
--- a/ext/softfloat/s_add128.c
+++ b/ext/softfloat/s_add128.c
@@ -35,7 +35,6 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 =============================================================================*/
 
 #include <stdint.h>
-
 #include "platform.h"
 #include "primitiveTypes.h"
 
diff --git a/ext/softfloat/s_add256M.c b/ext/softfloat/s_add256M.c
index 05a8c40021..d07b0046a4 100644
--- a/ext/softfloat/s_add256M.c
+++ b/ext/softfloat/s_add256M.c
@@ -35,7 +35,6 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 =============================================================================*/
 
 #include <stdint.h>
-
 #include "platform.h"
 #include "primitiveTypes.h"
 
diff --git a/ext/softfloat/s_addCarryM.c b/ext/softfloat/s_addCarryM.c
index 40af891fb7..fae1db4981 100644
--- a/ext/softfloat/s_addCarryM.c
+++ b/ext/softfloat/s_addCarryM.c
@@ -35,7 +35,6 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 =============================================================================*/
 
 #include <stdint.h>
-
 #include "platform.h"
 #include "primitiveTypes.h"
 
diff --git a/ext/softfloat/s_addComplCarryM.c b/ext/softfloat/s_addComplCarryM.c
index 7db29ca0af..02f2bce431 100644
--- a/ext/softfloat/s_addComplCarryM.c
+++ b/ext/softfloat/s_addComplCarryM.c
@@ -35,7 +35,6 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 =============================================================================*/
 
 #include <stdint.h>
-
 #include "platform.h"
 #include "primitiveTypes.h"
 
diff --git a/ext/softfloat/s_addExtF80M.c b/ext/softfloat/s_addExtF80M.c
new file mode 100644
index 0000000000..87aab2b478
--- /dev/null
+++ b/ext/softfloat/s_addExtF80M.c
@@ -0,0 +1,186 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+void
+ softfloat_addExtF80M(
+     const struct extFloat80M *aSPtr,
+     const struct extFloat80M *bSPtr,
+     struct extFloat80M *zSPtr,
+     bool negateB
+ )
+{
+    uint32_t uiA64;
+    int32_t expA;
+    uint32_t uiB64;
+    int32_t expB;
+    uint32_t uiZ64;
+    bool signZ, signB;
+    const struct extFloat80M *tempSPtr;
+    uint64_t sigZ, sigB;
+    void
+     (*roundPackRoutinePtr)(
+         bool, int32_t, uint32_t *, uint_fast8_t, struct extFloat80M * );
+    int32_t expDiff;
+    uint32_t extSigX[3], sigZExtra;
+
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    uiA64 = aSPtr->signExp;
+    expA = expExtF80UI64( uiA64 );
+    uiB64 = bSPtr->signExp;
+    expB = expExtF80UI64( uiB64 );
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    if ( (expA == 0x7FFF) || (expB == 0x7FFF) ) {
+        if ( softfloat_tryPropagateNaNExtF80M( aSPtr, bSPtr, zSPtr ) ) return;
+        uiZ64 = uiA64;
+        if ( expB == 0x7FFF ) {
+            uiZ64 = uiB64 ^ packToExtF80UI64( negateB, 0 );
+            if ( (expA == 0x7FFF) && (uiZ64 != uiA64) ) {
+                softfloat_invalidExtF80M( zSPtr );
+                return;
+            }
+        }
+        zSPtr->signExp = uiZ64;
+        zSPtr->signif = UINT64_C( 0x8000000000000000 );
+        return;
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    signZ = signExtF80UI64( uiA64 );
+    signB = signExtF80UI64( uiB64 ) ^ negateB;
+    negateB = (signZ != signB);
+    if ( expA < expB ) {
+        signZ = signB;
+        expA = expB;
+        expB = expExtF80UI64( uiA64 );
+        tempSPtr = aSPtr;
+        aSPtr = bSPtr;
+        bSPtr = tempSPtr;
+    }
+    if ( ! expB ) {
+        expB = 1;
+        if ( ! expA ) expA = 1;
+    }
+    sigZ = aSPtr->signif;
+    sigB = bSPtr->signif;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    roundPackRoutinePtr = softfloat_roundPackMToExtF80M;
+    expDiff = expA - expB;
+    if ( expDiff ) {
+        /*--------------------------------------------------------------------
+        *--------------------------------------------------------------------*/
+        extSigX[indexWord( 3, 2 )] = sigB>>32;
+        extSigX[indexWord( 3, 1 )] = sigB;
+        extSigX[indexWord( 3, 0 )] = 0;
+        softfloat_shiftRightJam96M( extSigX, expDiff, extSigX );
+        sigB =
+            (uint64_t) extSigX[indexWord( 3, 2 )]<<32
+                | extSigX[indexWord( 3, 1 )];
+        if ( negateB ) {
+            sigZ -= sigB;
+            sigZExtra = extSigX[indexWordLo( 3 )];
+            if ( sigZExtra ) {
+                --sigZ;
+                sigZExtra = -sigZExtra;
+            }
+            if ( ! (sigZ & UINT64_C( 0x8000000000000000 )) ) {
+                if ( sigZ & UINT64_C( 0x4000000000000000 ) ) {
+                    --expA;
+                    sigZ = sigZ<<1 | sigZExtra>>31;
+                    sigZExtra <<= 1;
+                } else {
+                    roundPackRoutinePtr = softfloat_normRoundPackMToExtF80M;
+                }
+            }
+        } else {
+            sigZ += sigB;
+            if ( sigZ & UINT64_C( 0x8000000000000000 ) ) goto sigZ;
+            sigZExtra = (uint32_t) sigZ<<31 | (extSigX[indexWordLo( 3 )] != 0);
+            goto completeNormAfterAdd;
+        }
+    } else {
+        /*--------------------------------------------------------------------
+        *--------------------------------------------------------------------*/
+        sigZExtra = 0;
+        if ( negateB ) {
+            if ( sigZ < sigB ) {
+                signZ = ! signZ;
+                sigZ = sigB - sigZ;
+            } else {
+                sigZ -= sigB;
+                if ( ! sigZ ) {
+                    signZ = (softfloat_roundingMode == softfloat_round_min);
+                    zSPtr->signExp = packToExtF80UI64( signZ, 0 );
+                    zSPtr->signif = 0;
+                    return;
+                }
+            }
+            roundPackRoutinePtr = softfloat_normRoundPackMToExtF80M;
+        } else {
+            sigZ += sigB;
+            if ( sigZ < sigB ) {
+                sigZExtra = (uint32_t) sigZ<<31;
+ completeNormAfterAdd:
+                ++expA;
+                sigZ = UINT64_C( 0x8000000000000000 ) | sigZ>>1;
+            } else {
+                if ( ! (sigZ & UINT64_C( 0x8000000000000000 )) ) {
+                    roundPackRoutinePtr = softfloat_normRoundPackMToExtF80M;
+                }
+            }
+        }
+    }
+    extSigX[indexWord( 3, 0 )] = sigZExtra;
+ sigZ:
+    extSigX[indexWord( 3, 2 )] = sigZ>>32;
+    extSigX[indexWord( 3, 1 )] = sigZ;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+ roundPack:
+    (*roundPackRoutinePtr)(
+        signZ, expA, extSigX, extF80_roundingPrecision, zSPtr );
+
+}
+
diff --git a/ext/softfloat/s_addF128M.c b/ext/softfloat/s_addF128M.c
new file mode 100644
index 0000000000..8309b49750
--- /dev/null
+++ b/ext/softfloat/s_addF128M.c
@@ -0,0 +1,211 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+void
+ softfloat_addF128M(
+     const uint32_t *aWPtr,
+     const uint32_t *bWPtr,
+     uint32_t *zWPtr,
+     bool negateB
+ )
+{
+    uint32_t uiA96;
+    int32_t expA;
+    uint32_t uiB96;
+    int32_t expB;
+    uint32_t uiZ96;
+    bool signZ, signB;
+    const uint32_t *tempPtr;
+    uint32_t sig96A, sig96B;
+    int32_t expDiff;
+    uint_fast8_t
+     (*addCarryMRoutinePtr)(
+         uint_fast8_t,
+         const uint32_t *,
+         const uint32_t *,
+         uint_fast8_t,
+         uint32_t *
+     );
+    uint32_t extSigZ[5], wordSigZ;
+    uint_fast8_t carry;
+    void (*roundPackRoutinePtr)( bool, int32_t, uint32_t *, uint32_t * );
+
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    uiA96 = aWPtr[indexWordHi( 4 )];
+    expA = expF128UI96( uiA96 );
+    uiB96 = bWPtr[indexWordHi( 4 )];
+    expB = expF128UI96( uiB96 );
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    if ( (expA == 0x7FFF) || (expB == 0x7FFF) ) {
+        if ( softfloat_tryPropagateNaNF128M( aWPtr, bWPtr, zWPtr ) ) return;
+        uiZ96 = uiA96;
+        if ( expB == 0x7FFF ) {
+            uiZ96 = uiB96 ^ packToF128UI96( negateB, 0, 0 );
+            if ( (expA == 0x7FFF) && (uiZ96 != uiA96) ) {
+                softfloat_invalidF128M( zWPtr );
+                return;
+            }
+        }
+        zWPtr[indexWordHi( 4 )] = uiZ96;
+        zWPtr[indexWord( 4, 2 )] = 0;
+        zWPtr[indexWord( 4, 1 )] = 0;
+        zWPtr[indexWord( 4, 0 )] = 0;
+        return;
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    signZ = signF128UI96( uiA96 );
+    signB = signF128UI96( uiB96 ) ^ negateB;
+    negateB = (signZ != signB);
+    if ( (uint32_t) (uiA96<<1) < (uint32_t) (uiB96<<1) ) {
+        signZ = signB;
+        expA = expB;
+        expB = expF128UI96( uiA96 );
+        tempPtr = aWPtr;
+        aWPtr = bWPtr;
+        bWPtr = tempPtr;
+        uiA96 = uiB96;
+        uiB96 = bWPtr[indexWordHi( 4 )];
+    }
+    sig96A = fracF128UI96( uiA96 );
+    sig96B = fracF128UI96( uiB96 );
+    if ( expA ) {
+        --expA;
+        sig96A |= 0x00010000;
+        if ( expB ) {
+            --expB;
+            sig96B |= 0x00010000;
+        }
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    addCarryMRoutinePtr =
+        negateB ? softfloat_addComplCarryM : softfloat_addCarryM;
+    expDiff = expA - expB;
+    if ( expDiff ) {
+        /*--------------------------------------------------------------------
+        *--------------------------------------------------------------------*/
+        extSigZ[indexWordHi( 5 )] = sig96B;
+        extSigZ[indexWord( 5, 3 )] = bWPtr[indexWord( 4, 2 )];
+        extSigZ[indexWord( 5, 2 )] = bWPtr[indexWord( 4, 1 )];
+        extSigZ[indexWord( 5, 1 )] = bWPtr[indexWord( 4, 0 )];
+        extSigZ[indexWord( 5, 0 )] = 0;
+        softfloat_shiftRightJam160M( extSigZ, expDiff, extSigZ );
+        sig96B = extSigZ[indexWordHi( 5 )];
+        carry = 0;
+        if ( negateB ) {
+            sig96B = ~sig96B;
+            wordSigZ = extSigZ[indexWordLo( 5 )];
+            extSigZ[indexWordLo( 5 )] = -wordSigZ;
+            carry = ! wordSigZ;
+        }
+        carry =
+            (*addCarryMRoutinePtr)(
+                3,
+                &aWPtr[indexMultiwordLo( 4, 3 )],
+                &extSigZ[indexMultiword( 5, 3, 1 )],
+                carry,
+                &extSigZ[indexMultiword( 5, 3, 1 )]
+            );
+        wordSigZ = sig96A + sig96B + carry;
+    } else {
+        /*--------------------------------------------------------------------
+        *--------------------------------------------------------------------*/
+        extSigZ[indexWordLo( 5 )] = 0;
+        carry =
+            (*addCarryMRoutinePtr)(
+                3,
+                &aWPtr[indexMultiwordLo( 4, 3 )],
+                &bWPtr[indexMultiwordLo( 4, 3 )],
+                negateB,
+                &extSigZ[indexMultiword( 5, 3, 1 )]
+            );
+        if ( negateB ) {
+            wordSigZ = sig96A + ~sig96B + carry;
+            if ( wordSigZ & 0x80000000 ) {
+                signZ = ! signZ;
+                carry =
+                    softfloat_addComplCarry96M(
+                        &bWPtr[indexMultiwordLo( 4, 3 )],
+                        &aWPtr[indexMultiwordLo( 4, 3 )],
+                        1,
+                        &extSigZ[indexMultiword( 5, 3, 1 )]
+                    );
+                wordSigZ = sig96B + ~sig96A + carry;
+            } else {
+                if (
+                    ! wordSigZ && ! extSigZ[indexWord( 5, 3 )]
+                        && ! (  extSigZ[indexWord( 5, 2 )]
+                              | extSigZ[indexWord( 5, 1 )]
+                              | extSigZ[indexWord( 5, 0 )]
+                             )
+                ) {
+                    signZ = (softfloat_roundingMode == softfloat_round_min);
+                    zWPtr[indexWordHi( 4 )] = packToF128UI96( signZ, 0, 0 );
+                    zWPtr[indexWord( 4, 2 )] = 0;
+                    zWPtr[indexWord( 4, 1 )] = 0;
+                    zWPtr[indexWord( 4, 0 )] = 0;
+                    return;
+                }
+            }
+        } else {
+            wordSigZ = sig96A + sig96B + carry;
+        }
+    }
+    extSigZ[indexWordHi( 5 )] = wordSigZ;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    roundPackRoutinePtr = softfloat_normRoundPackMToF128M;
+    if ( 0x00010000 <= wordSigZ ) {
+        if ( 0x00020000 <= wordSigZ ) {
+            ++expA;
+            softfloat_shortShiftRightJam160M( extSigZ, 1, extSigZ );
+        }
+        roundPackRoutinePtr = softfloat_roundPackMToF128M;
+    }
+    (*roundPackRoutinePtr)( signZ, expA, extSigZ, zWPtr );
+
+}
+
diff --git a/ext/softfloat/s_addM.c b/ext/softfloat/s_addM.c
index 002265cb83..a06eda65ac 100644
--- a/ext/softfloat/s_addM.c
+++ b/ext/softfloat/s_addM.c
@@ -35,7 +35,6 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 =============================================================================*/
 
 #include <stdint.h>
-
 #include "platform.h"
 #include "primitiveTypes.h"
 
diff --git a/ext/softfloat/s_addMagsExtF80.c b/ext/softfloat/s_addMagsExtF80.c
new file mode 100644
index 0000000000..3d07ad3578
--- /dev/null
+++ b/ext/softfloat/s_addMagsExtF80.c
@@ -0,0 +1,156 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+extFloat80_t
+ softfloat_addMagsExtF80(
+     uint_fast16_t uiA64,
+     uint_fast64_t uiA0,
+     uint_fast16_t uiB64,
+     uint_fast64_t uiB0,
+     bool signZ
+ )
+{
+    int_fast32_t expA;
+    uint_fast64_t sigA;
+    int_fast32_t expB;
+    uint_fast64_t sigB;
+    int_fast32_t expDiff;
+    uint_fast16_t uiZ64;
+    uint_fast64_t uiZ0, sigZ, sigZExtra;
+    struct exp32_sig64 normExpSig;
+    int_fast32_t expZ;
+    struct uint64_extra sig64Extra;
+    struct uint128 uiZ;
+    union { struct extFloat80M s; extFloat80_t f; } uZ;
+
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    expA = expExtF80UI64( uiA64 );
+    sigA = uiA0;
+    expB = expExtF80UI64( uiB64 );
+    sigB = uiB0;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    expDiff = expA - expB;
+    if ( ! expDiff ) {
+        if ( expA == 0x7FFF ) {
+            if ( (sigA | sigB) & UINT64_C( 0x7FFFFFFFFFFFFFFF ) ) {
+                goto propagateNaN;
+            }
+            uiZ64 = uiA64;
+            uiZ0  = uiA0;
+            goto uiZ;
+        }
+        sigZ = sigA + sigB;
+        sigZExtra = 0;
+        if ( ! expA ) {
+            normExpSig = softfloat_normSubnormalExtF80Sig( sigZ );
+            expZ = normExpSig.exp + 1;
+            sigZ = normExpSig.sig;
+            goto roundAndPack;
+        }
+        expZ = expA;
+        goto shiftRight1;
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    if ( expDiff < 0 ) {
+        if ( expB == 0x7FFF ) {
+            if ( sigB & UINT64_C( 0x7FFFFFFFFFFFFFFF ) ) goto propagateNaN;
+            uiZ64 = packToExtF80UI64( signZ, 0x7FFF );
+            uiZ0  = uiB0;
+            goto uiZ;
+        }
+        expZ = expB;
+        if ( ! expA ) {
+            ++expDiff;
+            sigZExtra = 0;
+            if ( ! expDiff ) goto newlyAligned;
+        }
+        sig64Extra = softfloat_shiftRightJam64Extra( sigA, 0, -expDiff );
+        sigA = sig64Extra.v;
+        sigZExtra = sig64Extra.extra;
+    } else {
+        if ( expA == 0x7FFF ) {
+            if ( sigA & UINT64_C( 0x7FFFFFFFFFFFFFFF ) ) goto propagateNaN;
+            uiZ64 = uiA64;
+            uiZ0  = uiA0;
+            goto uiZ;
+        }
+        expZ = expA;
+        if ( ! expB ) {
+            --expDiff;
+            sigZExtra = 0;
+            if ( ! expDiff ) goto newlyAligned;
+        }
+        sig64Extra = softfloat_shiftRightJam64Extra( sigB, 0, expDiff );
+        sigB = sig64Extra.v;
+        sigZExtra = sig64Extra.extra;
+    }
+ newlyAligned:
+    sigZ = sigA + sigB;
+    if ( sigZ & UINT64_C( 0x8000000000000000 ) ) goto roundAndPack;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+ shiftRight1:
+    sig64Extra = softfloat_shortShiftRightJam64Extra( sigZ, sigZExtra, 1 );
+    sigZ = sig64Extra.v | UINT64_C( 0x8000000000000000 );
+    sigZExtra = sig64Extra.extra;
+    ++expZ;
+ roundAndPack:
+    return
+        softfloat_roundPackToExtF80(
+            signZ, expZ, sigZ, sigZExtra, extF80_roundingPrecision );
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+ propagateNaN:
+    uiZ = softfloat_propagateNaNExtF80UI( uiA64, uiA0, uiB64, uiB0 );
+    uiZ64 = uiZ.v64;
+    uiZ0  = uiZ.v0;
+ uiZ:
+    uZ.s.signExp = uiZ64;
+    uZ.s.signif  = uiZ0;
+    return uZ.f;
+
+}
+
diff --git a/ext/softfloat/s_addMagsF128.c b/ext/softfloat/s_addMagsF128.c
index abd066fd44..292f0aa5fc 100644
--- a/ext/softfloat/s_addMagsF128.c
+++ b/ext/softfloat/s_addMagsF128.c
@@ -36,9 +36,8 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
+#include "internals.h"
 #include "specialize.h"
 
 float128_t
diff --git a/ext/softfloat/s_addMagsF16.c b/ext/softfloat/s_addMagsF16.c
index 4487459e9b..4204c1e029 100644
--- a/ext/softfloat/s_addMagsF16.c
+++ b/ext/softfloat/s_addMagsF16.c
@@ -36,11 +36,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
-#include "softfloat.h"
+#include "internals.h"
 #include "specialize.h"
+#include "softfloat.h"
 
 float16_t softfloat_addMagsF16( uint_fast16_t uiA, uint_fast16_t uiB )
 {
diff --git a/ext/softfloat/s_addMagsF32.c b/ext/softfloat/s_addMagsF32.c
index 5ac197a84a..ba647814d7 100644
--- a/ext/softfloat/s_addMagsF32.c
+++ b/ext/softfloat/s_addMagsF32.c
@@ -36,9 +36,8 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
+#include "internals.h"
 #include "specialize.h"
 
 float32_t softfloat_addMagsF32( uint_fast32_t uiA, uint_fast32_t uiB )
diff --git a/ext/softfloat/s_addMagsF64.c b/ext/softfloat/s_addMagsF64.c
index f22fcbff27..63e1afe9d4 100644
--- a/ext/softfloat/s_addMagsF64.c
+++ b/ext/softfloat/s_addMagsF64.c
@@ -36,9 +36,8 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
+#include "internals.h"
 #include "specialize.h"
 
 float64_t
diff --git a/ext/softfloat/s_approxRecip32_1.c b/ext/softfloat/s_approxRecip32_1.c
index 0f9e76c666..a06192ed5b 100644
--- a/ext/softfloat/s_approxRecip32_1.c
+++ b/ext/softfloat/s_approxRecip32_1.c
@@ -35,7 +35,6 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 =============================================================================*/
 
 #include <stdint.h>
-
 #include "platform.h"
 
 #ifndef softfloat_approxRecip32_1
diff --git a/ext/softfloat/s_approxRecipSqrt32_1.c b/ext/softfloat/s_approxRecipSqrt32_1.c
index 43e36438a5..2ab71a25a6 100644
--- a/ext/softfloat/s_approxRecipSqrt32_1.c
+++ b/ext/softfloat/s_approxRecipSqrt32_1.c
@@ -35,7 +35,6 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 =============================================================================*/
 
 #include <stdint.h>
-
 #include "platform.h"
 
 #ifndef softfloat_approxRecipSqrt32_1
diff --git a/ext/softfloat/s_approxRecipSqrt_1Ks.c b/ext/softfloat/s_approxRecipSqrt_1Ks.c
index b5c3b0a8ca..a60cf82557 100644
--- a/ext/softfloat/s_approxRecipSqrt_1Ks.c
+++ b/ext/softfloat/s_approxRecipSqrt_1Ks.c
@@ -35,7 +35,6 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 =============================================================================*/
 
 #include <stdint.h>
-
 #include "platform.h"
 #include "primitives.h"
 
diff --git a/ext/softfloat/s_approxRecip_1Ks.c b/ext/softfloat/s_approxRecip_1Ks.c
index 67309b8587..1108fcbe81 100644
--- a/ext/softfloat/s_approxRecip_1Ks.c
+++ b/ext/softfloat/s_approxRecip_1Ks.c
@@ -35,7 +35,6 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 =============================================================================*/
 
 #include <stdint.h>
-
 #include "platform.h"
 #include "primitives.h"
 
diff --git a/ext/softfloat/s_compare128M.c b/ext/softfloat/s_compare128M.c
index 7bba6bb77a..c2819e20d2 100644
--- a/ext/softfloat/s_compare128M.c
+++ b/ext/softfloat/s_compare128M.c
@@ -35,7 +35,6 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 =============================================================================*/
 
 #include <stdint.h>
-
 #include "platform.h"
 #include "primitiveTypes.h"
 
diff --git a/ext/softfloat/s_compare96M.c b/ext/softfloat/s_compare96M.c
index 1c2b22a332..0dc39f5dba 100644
--- a/ext/softfloat/s_compare96M.c
+++ b/ext/softfloat/s_compare96M.c
@@ -35,7 +35,6 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 =============================================================================*/
 
 #include <stdint.h>
-
 #include "platform.h"
 #include "primitiveTypes.h"
 
diff --git a/ext/softfloat/s_compareNonnormExtF80M.c b/ext/softfloat/s_compareNonnormExtF80M.c
new file mode 100644
index 0000000000..059b6e89b8
--- /dev/null
+++ b/ext/softfloat/s_compareNonnormExtF80M.c
@@ -0,0 +1,111 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "softfloat_types.h"
+
+int
+ softfloat_compareNonnormExtF80M(
+     const struct extFloat80M *aSPtr, const struct extFloat80M *bSPtr )
+{
+    uint_fast16_t uiA64, uiB64;
+    uint64_t sigA;
+    bool signB;
+    uint64_t sigB;
+    int32_t expA, expB;
+
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    uiA64 = aSPtr->signExp;
+    uiB64 = bSPtr->signExp;
+    sigA = aSPtr->signif;
+    signB = signExtF80UI64( uiB64 );
+    sigB = bSPtr->signif;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    if ( (uiA64 ^ uiB64) & 0x8000 ) {
+        if ( ! (sigA | sigB) ) return 0;
+        goto resultFromSignB;
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    expA = expExtF80UI64( uiA64 );
+    expB = expExtF80UI64( uiB64 );
+    if ( expA == 0x7FFF ) {
+        if (expB == 0x7FFF) return 0;
+        signB = ! signB;
+        goto resultFromSignB;
+    }
+    if ( expB == 0x7FFF ) {
+        goto resultFromSignB;
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    if ( ! expA ) expA = 1;
+    if ( ! (sigA & UINT64_C( 0x8000000000000000 )) ) {
+        if ( sigA ) {
+            expA += softfloat_normExtF80SigM( &sigA );
+        } else {
+            expA = -128;
+        }
+    }
+    if ( ! expB ) expB = 1;
+    if ( ! (sigB & UINT64_C( 0x8000000000000000 )) ) {
+        if ( sigB ) {
+            expB += softfloat_normExtF80SigM( &sigB );
+        } else {
+            expB = -128;
+        }
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    if ( signB ) {
+        if ( expA < expB ) return 1;
+        if ( (expB < expA) || (sigB < sigA) ) return -1;
+    } else {
+        if ( expB < expA ) return 1;
+        if ( (expA < expB) || (sigA < sigB) ) return -1;
+    }
+    return (sigA != sigB);
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+ resultFromSignB:
+    return signB ? 1 : -1;
+
+}
+
diff --git a/ext/softfloat/s_countLeadingZeros16.c b/ext/softfloat/s_countLeadingZeros16.c
index ce806b1077..950db6c84c 100644
--- a/ext/softfloat/s_countLeadingZeros16.c
+++ b/ext/softfloat/s_countLeadingZeros16.c
@@ -35,7 +35,6 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 =============================================================================*/
 
 #include <stdint.h>
-
 #include "platform.h"
 
 #ifndef softfloat_countLeadingZeros16
diff --git a/ext/softfloat/s_countLeadingZeros32.c b/ext/softfloat/s_countLeadingZeros32.c
index 1c6dc9b85a..fbf8ab6afb 100644
--- a/ext/softfloat/s_countLeadingZeros32.c
+++ b/ext/softfloat/s_countLeadingZeros32.c
@@ -35,7 +35,6 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 =============================================================================*/
 
 #include <stdint.h>
-
 #include "platform.h"
 
 #ifndef softfloat_countLeadingZeros32
diff --git a/ext/softfloat/s_countLeadingZeros64.c b/ext/softfloat/s_countLeadingZeros64.c
index 9633905c7f..00457418be 100644
--- a/ext/softfloat/s_countLeadingZeros64.c
+++ b/ext/softfloat/s_countLeadingZeros64.c
@@ -35,7 +35,6 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 =============================================================================*/
 
 #include <stdint.h>
-
 #include "platform.h"
 
 #ifndef softfloat_countLeadingZeros64
diff --git a/ext/softfloat/s_countLeadingZeros8.c b/ext/softfloat/s_countLeadingZeros8.c
index 50c4504c95..1158d01c31 100644
--- a/ext/softfloat/s_countLeadingZeros8.c
+++ b/ext/softfloat/s_countLeadingZeros8.c
@@ -35,7 +35,6 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 =============================================================================*/
 
 #include <stdint.h>
-
 #include "platform.h"
 #include "primitives.h"
 
diff --git a/ext/softfloat/s_eq128.c b/ext/softfloat/s_eq128.c
index a840ef2819..625ef002dd 100644
--- a/ext/softfloat/s_eq128.c
+++ b/ext/softfloat/s_eq128.c
@@ -36,7 +36,6 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
 #include "platform.h"
 
 #ifndef softfloat_eq128
diff --git a/ext/softfloat/s_invalidExtF80M.c b/ext/softfloat/s_invalidExtF80M.c
new file mode 100644
index 0000000000..ed5ebd2523
--- /dev/null
+++ b/ext/softfloat/s_invalidExtF80M.c
@@ -0,0 +1,49 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include "platform.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+void softfloat_invalidExtF80M( struct extFloat80M *zSPtr )
+{
+
+    softfloat_raiseFlags( softfloat_flag_invalid );
+    zSPtr->signExp = defaultNaNExtF80UI64;
+    zSPtr->signif  = defaultNaNExtF80UI0;
+
+}
+
diff --git a/ext/softfloat/s_invalidF128M.c b/ext/softfloat/s_invalidF128M.c
new file mode 100644
index 0000000000..aca6800f03
--- /dev/null
+++ b/ext/softfloat/s_invalidF128M.c
@@ -0,0 +1,53 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdint.h>
+#include "platform.h"
+#include "primitives.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+void softfloat_invalidF128M( uint32_t *zWPtr )
+{
+
+    softfloat_raiseFlags( softfloat_flag_invalid );
+    zWPtr[indexWord( 4, 3 )] = defaultNaNF128UI96;
+    zWPtr[indexWord( 4, 2 )] = defaultNaNF128UI64;
+    zWPtr[indexWord( 4, 1 )] = defaultNaNF128UI32;
+    zWPtr[indexWord( 4, 0 )] = defaultNaNF128UI0;
+
+}
+
diff --git a/ext/softfloat/s_isNaNF128M.c b/ext/softfloat/s_isNaNF128M.c
new file mode 100644
index 0000000000..f4e2355b94
--- /dev/null
+++ b/ext/softfloat/s_isNaNF128M.c
@@ -0,0 +1,57 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014, 2015 The Regents of the University of
+California.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "primitives.h"
+
+/*----------------------------------------------------------------------------
+*----------------------------------------------------------------------------*/
+bool softfloat_isNaNF128M( const uint32_t *aWPtr )
+{
+    uint32_t uiA96;
+
+    uiA96 = aWPtr[indexWordHi( 4 )];
+    if ( (~uiA96 & 0x7FFF0000) != 0 ) return false;
+    return
+        ((uiA96 & 0x0000FFFF) != 0)
+            || ((aWPtr[indexWord( 4, 2 )] | aWPtr[indexWord( 4, 1 )]
+                     | aWPtr[indexWord( 4, 0 )])
+                    != 0);
+
+}
+
diff --git a/ext/softfloat/s_le128.c b/ext/softfloat/s_le128.c
index 45e94d8145..7261012f34 100644
--- a/ext/softfloat/s_le128.c
+++ b/ext/softfloat/s_le128.c
@@ -36,7 +36,6 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
 #include "platform.h"
 
 #ifndef softfloat_le128
diff --git a/ext/softfloat/s_lt128.c b/ext/softfloat/s_lt128.c
index a117ded78e..0d461c363e 100644
--- a/ext/softfloat/s_lt128.c
+++ b/ext/softfloat/s_lt128.c
@@ -36,7 +36,6 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
 #include "platform.h"
 
 #ifndef softfloat_lt128
diff --git a/ext/softfloat/s_mul128By32.c b/ext/softfloat/s_mul128By32.c
index 6e03c4165e..6e71dd0c53 100644
--- a/ext/softfloat/s_mul128By32.c
+++ b/ext/softfloat/s_mul128By32.c
@@ -35,7 +35,6 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 =============================================================================*/
 
 #include <stdint.h>
-
 #include "platform.h"
 #include "primitiveTypes.h"
 
diff --git a/ext/softfloat/s_mul128MTo256M.c b/ext/softfloat/s_mul128MTo256M.c
index 8041040b13..49a1d294d6 100644
--- a/ext/softfloat/s_mul128MTo256M.c
+++ b/ext/softfloat/s_mul128MTo256M.c
@@ -35,7 +35,6 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 =============================================================================*/
 
 #include <stdint.h>
-
 #include "platform.h"
 #include "primitiveTypes.h"
 
diff --git a/ext/softfloat/s_mul128To256M.c b/ext/softfloat/s_mul128To256M.c
index 488feb51e0..fccc2a698d 100644
--- a/ext/softfloat/s_mul128To256M.c
+++ b/ext/softfloat/s_mul128To256M.c
@@ -35,7 +35,6 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 =============================================================================*/
 
 #include <stdint.h>
-
 #include "platform.h"
 
 #ifndef softfloat_mul128To256M
diff --git a/ext/softfloat/s_mul64ByShifted32To128.c b/ext/softfloat/s_mul64ByShifted32To128.c
index bf463c527b..f7e7104ea5 100644
--- a/ext/softfloat/s_mul64ByShifted32To128.c
+++ b/ext/softfloat/s_mul64ByShifted32To128.c
@@ -35,7 +35,6 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 =============================================================================*/
 
 #include <stdint.h>
-
 #include "platform.h"
 #include "primitiveTypes.h"
 
diff --git a/ext/softfloat/s_mul64To128.c b/ext/softfloat/s_mul64To128.c
index 5a37424bbc..6620a20bc1 100644
--- a/ext/softfloat/s_mul64To128.c
+++ b/ext/softfloat/s_mul64To128.c
@@ -35,7 +35,6 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 =============================================================================*/
 
 #include <stdint.h>
-
 #include "platform.h"
 #include "primitiveTypes.h"
 
diff --git a/ext/softfloat/s_mul64To128M.c b/ext/softfloat/s_mul64To128M.c
index 44799880f5..e3f9a481eb 100644
--- a/ext/softfloat/s_mul64To128M.c
+++ b/ext/softfloat/s_mul64To128M.c
@@ -35,7 +35,6 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 =============================================================================*/
 
 #include <stdint.h>
-
 #include "platform.h"
 #include "primitiveTypes.h"
 
diff --git a/ext/softfloat/s_mulAddF128.c b/ext/softfloat/s_mulAddF128.c
index 8ea06a657f..877b33d231 100644
--- a/ext/softfloat/s_mulAddF128.c
+++ b/ext/softfloat/s_mulAddF128.c
@@ -36,11 +36,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
-#include "softfloat.h"
+#include "internals.h"
 #include "specialize.h"
+#include "softfloat.h"
 
 float128_t
  softfloat_mulAddF128(
diff --git a/ext/softfloat/s_mulAddF128M.c b/ext/softfloat/s_mulAddF128M.c
new file mode 100644
index 0000000000..6944595a39
--- /dev/null
+++ b/ext/softfloat/s_mulAddF128M.c
@@ -0,0 +1,382 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of
+California.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+void
+ softfloat_mulAddF128M(
+     const uint32_t *aWPtr,
+     const uint32_t *bWPtr,
+     const uint32_t *cWPtr,
+     uint32_t *zWPtr,
+     uint_fast8_t op
+ )
+{
+    uint32_t uiA96;
+    int32_t expA;
+    uint32_t uiB96;
+    int32_t expB;
+    uint32_t uiC96;
+    bool signC;
+    int32_t expC;
+    bool signProd, prodIsInfinite;
+    uint32_t *ptr, uiZ96, sigA[4];
+    uint_fast8_t shiftDist;
+    uint32_t sigX[5];
+    int32_t expProd;
+    uint32_t sigProd[8], wordSig;
+    bool doSub;
+    uint_fast8_t
+     (*addCarryMRoutinePtr)(
+         uint_fast8_t,
+         const uint32_t *,
+         const uint32_t *,
+         uint_fast8_t,
+         uint32_t *
+     );
+    int32_t expDiff;
+    bool signZ;
+    int32_t expZ;
+    uint32_t *extSigPtr;
+    uint_fast8_t carry;
+    void (*roundPackRoutinePtr)( bool, int32_t, uint32_t *, uint32_t * );
+
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    uiA96 = aWPtr[indexWordHi( 4 )];
+    expA = expF128UI96( uiA96 );
+    uiB96 = bWPtr[indexWordHi( 4 )];
+    expB = expF128UI96( uiB96 );
+    uiC96 = cWPtr[indexWordHi( 4 )];
+    signC = signF128UI96( uiC96 ) ^ (op == softfloat_mulAdd_subC);
+    expC = expF128UI96( uiC96 );
+    signProd =
+        signF128UI96( uiA96 ) ^ signF128UI96( uiB96 )
+            ^ (op == softfloat_mulAdd_subProd);
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    prodIsInfinite = false;
+    if ( (expA == 0x7FFF) || (expB == 0x7FFF) ) {
+        if ( softfloat_tryPropagateNaNF128M( aWPtr, bWPtr, zWPtr ) ) {
+            goto propagateNaN_ZC;
+        }
+        ptr = (uint32_t *) aWPtr;
+        if ( ! (uint32_t) (uiA96<<1) ) goto possibleInvalidProd;
+        if ( ! (uint32_t) (uiB96<<1) ) {
+            ptr = (uint32_t *) bWPtr;
+     possibleInvalidProd:
+            if (
+                ! (ptr[indexWord( 4, 2 )] | ptr[indexWord( 4, 1 )]
+                       | ptr[indexWord( 4, 0 )])
+            ) {
+                goto invalid;
+            }
+        }
+        prodIsInfinite = true;
+    }
+    if ( expC == 0x7FFF ) {
+        if (
+            fracF128UI96( uiC96 )
+                || (cWPtr[indexWord( 4, 2 )] | cWPtr[indexWord( 4, 1 )]
+                        | cWPtr[indexWord( 4, 0 )])
+        ) {
+            zWPtr[indexWordHi( 4 )] = 0;
+            goto propagateNaN_ZC;
+        }
+        if ( prodIsInfinite && (signProd != signC) ) goto invalid;
+        goto copyC;
+    }
+    if ( prodIsInfinite ) {
+        uiZ96 = packToF128UI96( signProd, 0x7FFF, 0 );
+        goto uiZ;
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    if ( expA ) {
+        sigA[indexWordHi( 4 )] = fracF128UI96( uiA96 ) | 0x00010000;
+        sigA[indexWord( 4, 2 )] = aWPtr[indexWord( 4, 2 )];
+        sigA[indexWord( 4, 1 )] = aWPtr[indexWord( 4, 1 )];
+        sigA[indexWord( 4, 0 )] = aWPtr[indexWord( 4, 0 )];
+    } else {
+        expA = softfloat_shiftNormSigF128M( aWPtr, 0, sigA );
+        if ( expA == -128 ) goto zeroProd;
+    }
+    if ( expB ) {
+        sigX[indexWordHi( 4 )] = fracF128UI96( uiB96 ) | 0x00010000;
+        sigX[indexWord( 4, 2 )] = bWPtr[indexWord( 4, 2 )];
+        sigX[indexWord( 4, 1 )] = bWPtr[indexWord( 4, 1 )];
+        sigX[indexWord( 4, 0 )] = bWPtr[indexWord( 4, 0 )];
+    } else {
+        expB = softfloat_shiftNormSigF128M( bWPtr, 0, sigX );
+        if ( expB == -128 ) goto zeroProd;
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    expProd = expA + expB - 0x3FF0;
+    softfloat_mul128MTo256M( sigA, sigX, sigProd );
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    wordSig = fracF128UI96( uiC96 );
+    if ( expC ) {
+        --expC;
+        wordSig |= 0x00010000;
+    }
+    sigX[indexWordHi( 5 )] = wordSig;
+    sigX[indexWord( 5, 3 )] = cWPtr[indexWord( 4, 2 )];
+    sigX[indexWord( 5, 2 )] = cWPtr[indexWord( 4, 1 )];
+    sigX[indexWord( 5, 1 )] = cWPtr[indexWord( 4, 0 )];
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    doSub = (signProd != signC);
+    addCarryMRoutinePtr =
+        doSub ? softfloat_addComplCarryM : softfloat_addCarryM;
+    expDiff = expProd - expC;
+    if ( expDiff <= 0 ) {
+        /*--------------------------------------------------------------------
+        *--------------------------------------------------------------------*/
+        signZ = signC;
+        expZ = expC;
+        if (
+            sigProd[indexWord( 8, 2 )]
+                || (sigProd[indexWord( 8, 1 )] | sigProd[indexWord( 8, 0 )])
+        ) {
+            sigProd[indexWord( 8, 3 )] |= 1;
+        }
+        extSigPtr = &sigProd[indexMultiwordHi( 8, 5 )];
+        if ( expDiff ) {
+            softfloat_shiftRightJam160M( extSigPtr, -expDiff, extSigPtr );
+        }
+        carry = 0;
+        if ( doSub ) {
+            wordSig = extSigPtr[indexWordLo( 5 )];
+            extSigPtr[indexWordLo( 5 )] = -wordSig;
+            carry = ! wordSig;
+        }
+        (*addCarryMRoutinePtr)(
+            4,
+            &sigX[indexMultiwordHi( 5, 4 )],
+            extSigPtr + indexMultiwordHi( 5, 4 ),
+            carry,
+            extSigPtr + indexMultiwordHi( 5, 4 )
+        );
+        wordSig = extSigPtr[indexWordHi( 5 )];
+        if ( ! expZ ) {
+            if ( wordSig & 0x80000000 ) {
+                signZ = ! signZ;
+                softfloat_negX160M( extSigPtr );
+                wordSig = extSigPtr[indexWordHi( 5 )];
+            }
+            goto checkCancellation;
+        }
+        if ( wordSig < 0x00010000 ) {
+            --expZ;
+            softfloat_add160M( extSigPtr, extSigPtr, extSigPtr );
+            goto roundPack;
+        }
+        goto extSigReady_noCancellation;
+    } else {
+        /*--------------------------------------------------------------------
+        *--------------------------------------------------------------------*/
+        signZ = signProd;
+        expZ = expProd;
+        sigX[indexWordLo( 5 )] = 0;
+        expDiff -= 128;
+        if ( 0 <= expDiff ) {
+            /*----------------------------------------------------------------
+            *----------------------------------------------------------------*/
+            if ( expDiff ) softfloat_shiftRightJam160M( sigX, expDiff, sigX );
+            wordSig = sigX[indexWordLo( 5 )];
+            carry = 0;
+            if ( doSub ) {
+                carry = ! wordSig;
+                wordSig = -wordSig;
+            }
+            carry =
+                (*addCarryMRoutinePtr)(
+                    4,
+                    &sigProd[indexMultiwordLo( 8, 4 )],
+                    &sigX[indexMultiwordHi( 5, 4 )],
+                    carry,
+                    &sigProd[indexMultiwordLo( 8, 4 )]
+                );
+            sigProd[indexWord( 8, 2 )] |= wordSig;
+            ptr = &sigProd[indexWord( 8, 4 )];
+        } else {
+            /*----------------------------------------------------------------
+            *----------------------------------------------------------------*/
+            shiftDist = expDiff & 31;
+            if ( shiftDist ) {
+                softfloat_shortShiftRight160M( sigX, shiftDist, sigX );
+            }
+            expDiff >>= 5;
+            extSigPtr =
+                &sigProd[indexMultiwordLo( 8, 5 )] - wordIncr
+                    + expDiff * -wordIncr;
+            carry =
+                (*addCarryMRoutinePtr)( 5, extSigPtr, sigX, doSub, extSigPtr );
+            if ( expDiff == -4 ) {
+                /*------------------------------------------------------------
+                *------------------------------------------------------------*/
+                wordSig = sigProd[indexWordHi( 8 )];
+                if ( wordSig & 0x80000000 ) {
+                    signZ = ! signZ;
+                    softfloat_negX256M( sigProd );
+                    wordSig = sigProd[indexWordHi( 8 )];
+                }
+                /*------------------------------------------------------------
+                *------------------------------------------------------------*/
+                if ( wordSig ) goto expProdBigger_noWordShift;
+                wordSig = sigProd[indexWord( 8, 6 )];
+                if ( 0x00040000 <= wordSig ) goto expProdBigger_noWordShift;
+                expZ -= 32;
+                extSigPtr = &sigProd[indexMultiwordHi( 8, 5 )] - wordIncr;
+                for (;;) {
+                    if ( wordSig ) break;
+                    wordSig = extSigPtr[indexWord( 5, 3 )];
+                    if ( 0x00040000 <= wordSig ) break;
+                    expZ -= 32;
+                    extSigPtr -= wordIncr;
+                    if ( extSigPtr == &sigProd[indexMultiwordLo( 8, 5 )] ) {
+                        goto checkCancellation;
+                    }
+                }
+                /*------------------------------------------------------------
+                *------------------------------------------------------------*/
+                ptr = extSigPtr + indexWordLo( 5 );
+                do {
+                    ptr -= wordIncr;
+                    if ( *ptr ) {
+                        extSigPtr[indexWordLo( 5 )] |= 1;
+                        break;
+                    }
+                } while ( ptr != &sigProd[indexWordLo( 8 )] );
+                wordSig = extSigPtr[indexWordHi( 5 )];
+                goto extSigReady;
+            }
+            ptr = extSigPtr + indexWordHi( 5 ) + wordIncr;
+        }
+        /*--------------------------------------------------------------------
+        *--------------------------------------------------------------------*/
+        if ( carry != doSub ) {
+            if ( doSub ) {
+                do {
+                    wordSig = *ptr;
+                    *ptr = wordSig - 1;
+                    ptr += wordIncr;
+                } while ( ! wordSig );
+            } else {
+                do {
+                    wordSig = *ptr + 1;
+                    *ptr = wordSig;
+                    ptr += wordIncr;
+                } while ( ! wordSig );
+            }
+        }
+        /*--------------------------------------------------------------------
+        *--------------------------------------------------------------------*/
+     expProdBigger_noWordShift:
+        if (
+            sigProd[indexWord( 8, 2 )]
+                || (sigProd[indexWord( 8, 1 )] | sigProd[indexWord( 8, 0 )])
+        ) {
+            sigProd[indexWord( 8, 3 )] |= 1;
+        }
+        extSigPtr = &sigProd[indexMultiwordHi( 8, 5 )];
+        wordSig = extSigPtr[indexWordHi( 5 )];
+    }
+ extSigReady:
+    roundPackRoutinePtr = softfloat_normRoundPackMToF128M;
+    if ( wordSig < 0x00010000 ) goto doRoundPack;
+ extSigReady_noCancellation:
+    if ( 0x00020000 <= wordSig ) {
+        ++expZ;
+        softfloat_shortShiftRightJam160M( extSigPtr, 1, extSigPtr );
+    }
+ roundPack:
+    roundPackRoutinePtr = softfloat_roundPackMToF128M;
+ doRoundPack:
+    (*roundPackRoutinePtr)( signZ, expZ, extSigPtr, zWPtr );
+    return;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+ invalid:
+    softfloat_invalidF128M( zWPtr );
+ propagateNaN_ZC:
+    softfloat_propagateNaNF128M( zWPtr, cWPtr, zWPtr );
+    return;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+ zeroProd:
+    if (
+        ! (uint32_t) (uiC96<<1) && (signProd != signC)
+            && ! cWPtr[indexWord( 4, 2 )]
+            && ! (cWPtr[indexWord( 4, 1 )] | cWPtr[indexWord( 4, 0 )])
+    ) {
+        goto completeCancellation;
+    }
+ copyC:
+    zWPtr[indexWordHi( 4 )] = uiC96;
+    zWPtr[indexWord( 4, 2 )] = cWPtr[indexWord( 4, 2 )];
+    zWPtr[indexWord( 4, 1 )] = cWPtr[indexWord( 4, 1 )];
+    zWPtr[indexWord( 4, 0 )] = cWPtr[indexWord( 4, 0 )];
+    return;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+ checkCancellation:
+    if (
+        wordSig
+            || (extSigPtr[indexWord( 5, 3 )] | extSigPtr[indexWord( 5, 2 )])
+            || (extSigPtr[indexWord( 5, 1 )] | extSigPtr[indexWord( 5, 0 )])
+    ) {
+        goto extSigReady;
+    }
+ completeCancellation:
+    uiZ96 =
+        packToF128UI96(
+            (softfloat_roundingMode == softfloat_round_min), 0, 0 );
+ uiZ:
+    zWPtr[indexWordHi( 4 )] = uiZ96;
+    zWPtr[indexWord( 4, 2 )] = 0;
+    zWPtr[indexWord( 4, 1 )] = 0;
+    zWPtr[indexWord( 4, 0 )] = 0;
+
+}
+
diff --git a/ext/softfloat/s_mulAddF16.c b/ext/softfloat/s_mulAddF16.c
index d35ae542c4..b604007232 100644
--- a/ext/softfloat/s_mulAddF16.c
+++ b/ext/softfloat/s_mulAddF16.c
@@ -36,11 +36,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
-#include "softfloat.h"
+#include "internals.h"
 #include "specialize.h"
+#include "softfloat.h"
 
 float16_t
  softfloat_mulAddF16(
diff --git a/ext/softfloat/s_mulAddF32.c b/ext/softfloat/s_mulAddF32.c
index be28c70b6d..d163ea02ed 100644
--- a/ext/softfloat/s_mulAddF32.c
+++ b/ext/softfloat/s_mulAddF32.c
@@ -36,11 +36,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
-#include "softfloat.h"
+#include "internals.h"
 #include "specialize.h"
+#include "softfloat.h"
 
 float32_t
  softfloat_mulAddF32(
diff --git a/ext/softfloat/s_mulAddF64.c b/ext/softfloat/s_mulAddF64.c
index a12e186103..484ac586d8 100644
--- a/ext/softfloat/s_mulAddF64.c
+++ b/ext/softfloat/s_mulAddF64.c
@@ -36,11 +36,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
-#include "softfloat.h"
+#include "internals.h"
 #include "specialize.h"
+#include "softfloat.h"
 
 #ifdef SOFTFLOAT_FAST_INT64
 
diff --git a/ext/softfloat/s_negXM.c b/ext/softfloat/s_negXM.c
index d90c523ef3..76f110c1cf 100644
--- a/ext/softfloat/s_negXM.c
+++ b/ext/softfloat/s_negXM.c
@@ -35,7 +35,6 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 =============================================================================*/
 
 #include <stdint.h>
-
 #include "platform.h"
 #include "primitiveTypes.h"
 
diff --git a/ext/softfloat/s_normExtF80SigM.c b/ext/softfloat/s_normExtF80SigM.c
new file mode 100644
index 0000000000..5f8e0609f6
--- /dev/null
+++ b/ext/softfloat/s_normExtF80SigM.c
@@ -0,0 +1,52 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of
+California.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+
+int softfloat_normExtF80SigM( uint64_t *sigPtr )
+{
+    uint64_t sig;
+    int_fast8_t shiftDist;
+
+    sig = *sigPtr;
+    shiftDist = softfloat_countLeadingZeros64( sig );
+    *sigPtr = sig<<shiftDist;
+    return -shiftDist;
+
+}
+
diff --git a/ext/softfloat/s_normRoundPackMToExtF80M.c b/ext/softfloat/s_normRoundPackMToExtF80M.c
new file mode 100644
index 0000000000..9859634252
--- /dev/null
+++ b/ext/softfloat/s_normRoundPackMToExtF80M.c
@@ -0,0 +1,78 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of
+California.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+
+void
+ softfloat_normRoundPackMToExtF80M(
+     bool sign,
+     int32_t exp,
+     uint32_t *extSigPtr,
+     uint_fast8_t roundingPrecision,
+     struct extFloat80M *zSPtr
+ )
+{
+    int_fast16_t shiftDist;
+    uint32_t wordSig;
+
+    shiftDist = 0;
+    wordSig = extSigPtr[indexWord( 3, 2 )];
+    if ( ! wordSig ) {
+        shiftDist = 32;
+        wordSig = extSigPtr[indexWord( 3, 1 )];
+        if ( ! wordSig ) {
+            shiftDist = 64;
+            wordSig = extSigPtr[indexWord( 3, 0 )];
+            if ( ! wordSig ) {
+                zSPtr->signExp = packToExtF80UI64( sign, 0 );
+                zSPtr->signif = 0;
+                return;
+            }
+        }
+    }
+    shiftDist += softfloat_countLeadingZeros32( wordSig );
+    if ( shiftDist ) {
+        exp -= shiftDist;
+        softfloat_shiftLeft96M( extSigPtr, shiftDist, extSigPtr );
+    }
+    softfloat_roundPackMToExtF80M(
+        sign, exp, extSigPtr, roundingPrecision, zSPtr );
+
+}
+
diff --git a/ext/softfloat/s_normRoundPackMToF128M.c b/ext/softfloat/s_normRoundPackMToF128M.c
new file mode 100644
index 0000000000..67c0efa15a
--- /dev/null
+++ b/ext/softfloat/s_normRoundPackMToF128M.c
@@ -0,0 +1,73 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of
+California.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+
+void
+ softfloat_normRoundPackMToF128M(
+     bool sign, int32_t exp, uint32_t *extSigPtr, uint32_t *zWPtr )
+{
+    const uint32_t *ptr;
+    int_fast16_t shiftDist;
+    uint32_t wordSig;
+
+    ptr = extSigPtr + indexWordHi( 5 );
+    shiftDist = 0;
+    for (;;) {
+        wordSig = *ptr;
+        if ( wordSig ) break;
+        shiftDist += 32;
+        if ( 160 <= shiftDist ) {
+            zWPtr[indexWordHi( 4 )] = packToF128UI96( sign, 0, 0 );
+            zWPtr[indexWord( 4, 2 )] = 0;
+            zWPtr[indexWord( 4, 1 )] = 0;
+            zWPtr[indexWord( 4, 0 )] = 0;
+            return;
+        }
+        ptr -= wordIncr;
+    }
+    shiftDist += softfloat_countLeadingZeros32( wordSig ) - 15;
+    if ( shiftDist ) {
+        exp -= shiftDist;
+        softfloat_shiftLeft160M( extSigPtr, shiftDist, extSigPtr );
+    }
+    softfloat_roundPackMToF128M( sign, exp, extSigPtr, zWPtr );
+
+}
+
diff --git a/ext/softfloat/s_normRoundPackToExtF80.c b/ext/softfloat/s_normRoundPackToExtF80.c
new file mode 100644
index 0000000000..416442129d
--- /dev/null
+++ b/ext/softfloat/s_normRoundPackToExtF80.c
@@ -0,0 +1,71 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of
+California.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+
+extFloat80_t
+ softfloat_normRoundPackToExtF80(
+     bool sign,
+     int_fast32_t exp,
+     uint_fast64_t sig,
+     uint_fast64_t sigExtra,
+     uint_fast8_t roundingPrecision
+ )
+{
+    int_fast8_t shiftDist;
+    struct uint128 sig128;
+
+    if ( ! sig ) {
+        exp -= 64;
+        sig = sigExtra;
+        sigExtra = 0;
+    }
+    shiftDist = softfloat_countLeadingZeros64( sig );
+    exp -= shiftDist;
+    if ( shiftDist ) {
+        sig128 = softfloat_shortShiftLeft128( sig, sigExtra, shiftDist );
+        sig = sig128.v64;
+        sigExtra = sig128.v0;
+    }
+    return
+        softfloat_roundPackToExtF80(
+            sign, exp, sig, sigExtra, roundingPrecision );
+
+}
+
diff --git a/ext/softfloat/s_normRoundPackToF128.c b/ext/softfloat/s_normRoundPackToF128.c
index 7da9301494..148cb2c80c 100644
--- a/ext/softfloat/s_normRoundPackToF128.c
+++ b/ext/softfloat/s_normRoundPackToF128.c
@@ -36,9 +36,8 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
+#include "internals.h"
 
 float128_t
  softfloat_normRoundPackToF128(
diff --git a/ext/softfloat/s_normRoundPackToF16.c b/ext/softfloat/s_normRoundPackToF16.c
index f8db71c995..6788f2e14a 100644
--- a/ext/softfloat/s_normRoundPackToF16.c
+++ b/ext/softfloat/s_normRoundPackToF16.c
@@ -36,9 +36,8 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
+#include "internals.h"
 
 float16_t
  softfloat_normRoundPackToF16( bool sign, int_fast16_t exp, uint_fast16_t sig )
diff --git a/ext/softfloat/s_normRoundPackToF32.c b/ext/softfloat/s_normRoundPackToF32.c
index 2555a81b41..14e08116b9 100644
--- a/ext/softfloat/s_normRoundPackToF32.c
+++ b/ext/softfloat/s_normRoundPackToF32.c
@@ -36,9 +36,8 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
+#include "internals.h"
 
 float32_t
  softfloat_normRoundPackToF32( bool sign, int_fast16_t exp, uint_fast32_t sig )
diff --git a/ext/softfloat/s_normRoundPackToF64.c b/ext/softfloat/s_normRoundPackToF64.c
index 60a9e53981..7f5d6a22fc 100644
--- a/ext/softfloat/s_normRoundPackToF64.c
+++ b/ext/softfloat/s_normRoundPackToF64.c
@@ -36,9 +36,8 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
+#include "internals.h"
 
 float64_t
  softfloat_normRoundPackToF64( bool sign, int_fast16_t exp, uint_fast64_t sig )
diff --git a/ext/softfloat/s_normSubnormalExtF80Sig.c b/ext/softfloat/s_normSubnormalExtF80Sig.c
new file mode 100644
index 0000000000..57cc9464c3
--- /dev/null
+++ b/ext/softfloat/s_normSubnormalExtF80Sig.c
@@ -0,0 +1,52 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of
+California.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+
+struct exp32_sig64 softfloat_normSubnormalExtF80Sig( uint_fast64_t sig )
+{
+    int_fast8_t shiftDist;
+    struct exp32_sig64 z;
+
+    shiftDist = softfloat_countLeadingZeros64( sig );
+    z.exp = -shiftDist;
+    z.sig = sig<<shiftDist;
+    return z;
+
+}
+
diff --git a/ext/softfloat/s_normSubnormalF128Sig.c b/ext/softfloat/s_normSubnormalF128Sig.c
index 5919b779de..cf02e10228 100644
--- a/ext/softfloat/s_normSubnormalF128Sig.c
+++ b/ext/softfloat/s_normSubnormalF128Sig.c
@@ -35,9 +35,8 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 =============================================================================*/
 
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
+#include "internals.h"
 
 struct exp32_sig128
  softfloat_normSubnormalF128Sig( uint_fast64_t sig64, uint_fast64_t sig0 )
diff --git a/ext/softfloat/s_normSubnormalF128SigM.c b/ext/softfloat/s_normSubnormalF128SigM.c
new file mode 100644
index 0000000000..3f511fe5f6
--- /dev/null
+++ b/ext/softfloat/s_normSubnormalF128SigM.c
@@ -0,0 +1,61 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of
+California.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+
+int softfloat_normSubnormalF128SigM( uint32_t *sigPtr )
+{
+    const uint32_t *ptr;
+    int_fast16_t shiftDist;
+    uint32_t wordSig;
+
+    ptr = sigPtr + indexWordHi( 4 );
+    shiftDist = 0;
+    for (;;) {
+        wordSig = *ptr;
+        if ( wordSig ) break;
+        shiftDist += 32;
+        if ( 128 <= shiftDist ) return 1;
+        ptr -= wordIncr;
+    }
+    shiftDist += softfloat_countLeadingZeros32( wordSig ) - 15;
+    if ( shiftDist ) softfloat_shiftLeft128M( sigPtr, shiftDist, sigPtr );
+    return 1 - shiftDist;
+
+}
+
diff --git a/ext/softfloat/s_normSubnormalF16Sig.c b/ext/softfloat/s_normSubnormalF16Sig.c
index 0beb0b93da..e612a9eb6d 100644
--- a/ext/softfloat/s_normSubnormalF16Sig.c
+++ b/ext/softfloat/s_normSubnormalF16Sig.c
@@ -35,9 +35,8 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 =============================================================================*/
 
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
+#include "internals.h"
 
 struct exp8_sig16 softfloat_normSubnormalF16Sig( uint_fast16_t sig )
 {
diff --git a/ext/softfloat/s_normSubnormalF32Sig.c b/ext/softfloat/s_normSubnormalF32Sig.c
index c12fb83741..e3e8ce44c8 100644
--- a/ext/softfloat/s_normSubnormalF32Sig.c
+++ b/ext/softfloat/s_normSubnormalF32Sig.c
@@ -35,9 +35,8 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 =============================================================================*/
 
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
+#include "internals.h"
 
 struct exp16_sig32 softfloat_normSubnormalF32Sig( uint_fast32_t sig )
 {
diff --git a/ext/softfloat/s_normSubnormalF64Sig.c b/ext/softfloat/s_normSubnormalF64Sig.c
index aa4a936098..fddfc32262 100644
--- a/ext/softfloat/s_normSubnormalF64Sig.c
+++ b/ext/softfloat/s_normSubnormalF64Sig.c
@@ -35,9 +35,8 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 =============================================================================*/
 
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
+#include "internals.h"
 
 struct exp16_sig64 softfloat_normSubnormalF64Sig( uint_fast64_t sig )
 {
diff --git a/ext/softfloat/s_propagateNaNF128UI.c b/ext/softfloat/s_propagateNaNF128UI.c
index e00f846867..ad7e83b0fa 100644
--- a/ext/softfloat/s_propagateNaNF128UI.c
+++ b/ext/softfloat/s_propagateNaNF128UI.c
@@ -35,11 +35,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 =============================================================================*/
 
 #include <stdint.h>
-
 #include "platform.h"
 #include "primitiveTypes.h"
-#include "softfloat.h"
 #include "specialize.h"
+#include "softfloat.h"
 
 /*----------------------------------------------------------------------------
 | Interpreting the unsigned integer formed from concatenating `uiA64' and
diff --git a/ext/softfloat/s_propagateNaNF16UI.c b/ext/softfloat/s_propagateNaNF16UI.c
index 9c553d4b30..3ecd4c9825 100644
--- a/ext/softfloat/s_propagateNaNF16UI.c
+++ b/ext/softfloat/s_propagateNaNF16UI.c
@@ -35,10 +35,9 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 =============================================================================*/
 
 #include <stdint.h>
-
 #include "platform.h"
-#include "softfloat.h"
 #include "specialize.h"
+#include "softfloat.h"
 
 /*----------------------------------------------------------------------------
 | Interpreting `uiA' and `uiB' as the bit patterns of two 16-bit floating-
diff --git a/ext/softfloat/s_propagateNaNF32UI.c b/ext/softfloat/s_propagateNaNF32UI.c
index 953b5c4e6b..b97fa41458 100644
--- a/ext/softfloat/s_propagateNaNF32UI.c
+++ b/ext/softfloat/s_propagateNaNF32UI.c
@@ -35,10 +35,9 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 =============================================================================*/
 
 #include <stdint.h>
-
 #include "platform.h"
-#include "softfloat.h"
 #include "specialize.h"
+#include "softfloat.h"
 
 /*----------------------------------------------------------------------------
 | Interpreting `uiA' and `uiB' as the bit patterns of two 32-bit floating-
diff --git a/ext/softfloat/s_propagateNaNF64UI.c b/ext/softfloat/s_propagateNaNF64UI.c
index aba196a6f8..9c2d359831 100644
--- a/ext/softfloat/s_propagateNaNF64UI.c
+++ b/ext/softfloat/s_propagateNaNF64UI.c
@@ -35,10 +35,9 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 =============================================================================*/
 
 #include <stdint.h>
-
 #include "platform.h"
-#include "softfloat.h"
 #include "specialize.h"
+#include "softfloat.h"
 
 /*----------------------------------------------------------------------------
 | Interpreting `uiA' and `uiB' as the bit patterns of two 64-bit floating-
diff --git a/ext/softfloat/s_remStepMBy32.c b/ext/softfloat/s_remStepMBy32.c
index 6bf344fb7b..fe787a4305 100644
--- a/ext/softfloat/s_remStepMBy32.c
+++ b/ext/softfloat/s_remStepMBy32.c
@@ -35,7 +35,6 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 =============================================================================*/
 
 #include <stdint.h>
-
 #include "platform.h"
 #include "primitiveTypes.h"
 
diff --git a/ext/softfloat/s_roundMToI64.c b/ext/softfloat/s_roundMToI64.c
index 56cfb86727..a73f7f8032 100644
--- a/ext/softfloat/s_roundMToI64.c
+++ b/ext/softfloat/s_roundMToI64.c
@@ -36,11 +36,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
-#include "softfloat.h"
+#include "internals.h"
 #include "specialize.h"
+#include "softfloat.h"
 
 int_fast64_t
  softfloat_roundMToI64(
diff --git a/ext/softfloat/s_roundMToUI64.c b/ext/softfloat/s_roundMToUI64.c
index 4867eb0481..0377c5bb6b 100644
--- a/ext/softfloat/s_roundMToUI64.c
+++ b/ext/softfloat/s_roundMToUI64.c
@@ -36,11 +36,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
-#include "softfloat.h"
+#include "internals.h"
 #include "specialize.h"
+#include "softfloat.h"
 
 uint_fast64_t
  softfloat_roundMToUI64(
diff --git a/ext/softfloat/s_roundPackMToExtF80M.c b/ext/softfloat/s_roundPackMToExtF80M.c
new file mode 100644
index 0000000000..b5168d913e
--- /dev/null
+++ b/ext/softfloat/s_roundPackMToExtF80M.c
@@ -0,0 +1,256 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014, 2015, 2017 The Regents of the University of
+California.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "softfloat.h"
+
+void
+ softfloat_roundPackMToExtF80M(
+     bool sign,
+     int32_t exp,
+     uint32_t *extSigPtr,
+     uint_fast8_t roundingPrecision,
+     struct extFloat80M *zSPtr
+ )
+{
+    uint_fast8_t roundingMode;
+    bool roundNearEven;
+    uint64_t sig, roundIncrement, roundMask, roundBits;
+    bool isTiny;
+    uint32_t sigExtra;
+    bool doIncrement;
+
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    roundingMode = softfloat_roundingMode;
+    roundNearEven = (roundingMode == softfloat_round_near_even);
+    sig =
+        (uint64_t) extSigPtr[indexWord( 3, 2 )]<<32
+            | extSigPtr[indexWord( 3, 1 )];
+    if ( roundingPrecision == 80 ) goto precision80;
+    if ( roundingPrecision == 64 ) {
+        roundIncrement = UINT64_C( 0x0000000000000400 );
+        roundMask = UINT64_C( 0x00000000000007FF );
+    } else if ( roundingPrecision == 32 ) {
+        roundIncrement = UINT64_C( 0x0000008000000000 );
+        roundMask = UINT64_C( 0x000000FFFFFFFFFF );
+    } else {
+        goto precision80;
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    if ( extSigPtr[indexWordLo( 3 )] ) sig |= 1;
+    if ( ! roundNearEven && (roundingMode != softfloat_round_near_maxMag) ) {
+        roundIncrement =
+            (roundingMode
+                 == (sign ? softfloat_round_min : softfloat_round_max))
+                ? roundMask
+                : 0;
+    }
+    roundBits = sig & roundMask;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    if ( 0x7FFD <= (uint32_t) (exp - 1) ) {
+        if ( exp <= 0 ) {
+            /*----------------------------------------------------------------
+            *----------------------------------------------------------------*/
+            isTiny =
+                   (softfloat_detectTininess
+                        == softfloat_tininess_beforeRounding)
+                || (exp < 0)
+                || (sig <= (uint64_t) (sig + roundIncrement));
+            sig = softfloat_shiftRightJam64( sig, 1 - exp );
+            roundBits = sig & roundMask;
+            if ( roundBits ) {
+                if ( isTiny ) softfloat_raiseFlags( softfloat_flag_underflow );
+                softfloat_exceptionFlags |= softfloat_flag_inexact;
+#ifdef SOFTFLOAT_ROUND_ODD
+                if ( roundingMode == softfloat_round_odd ) {
+                    sig |= roundMask + 1;
+                }
+#endif
+            }
+            sig += roundIncrement;
+            exp = ((sig & UINT64_C( 0x8000000000000000 )) != 0);
+            roundIncrement = roundMask + 1;
+            if ( roundNearEven && (roundBits<<1 == roundIncrement) ) {
+                roundMask |= roundIncrement;
+            }
+            sig &= ~roundMask;
+            goto packReturn;
+        }
+        if (
+               (0x7FFE < exp)
+            || ((exp == 0x7FFE) && ((uint64_t) (sig + roundIncrement) < sig))
+        ) {
+            goto overflow;
+        }
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    if ( roundBits ) {
+        softfloat_exceptionFlags |= softfloat_flag_inexact;
+#ifdef SOFTFLOAT_ROUND_ODD
+        if ( roundingMode == softfloat_round_odd ) {
+            sig = (sig & ~roundMask) | (roundMask + 1);
+            goto packReturn;
+        }
+#endif
+    }
+    sig += roundIncrement;
+    if ( sig < roundIncrement ) {
+        ++exp;
+        sig = UINT64_C( 0x8000000000000000 );
+    }
+    roundIncrement = roundMask + 1;
+    if ( roundNearEven && (roundBits<<1 == roundIncrement) ) {
+        roundMask |= roundIncrement;
+    }
+    sig &= ~roundMask;
+    goto packReturn;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+ precision80:
+    sigExtra = extSigPtr[indexWordLo( 3 )];
+    doIncrement = (0x80000000 <= sigExtra);
+    if ( ! roundNearEven && (roundingMode != softfloat_round_near_maxMag) ) {
+        doIncrement =
+            (roundingMode
+                 == (sign ? softfloat_round_min : softfloat_round_max))
+                && sigExtra;
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    if ( 0x7FFD <= (uint32_t) (exp - 1) ) {
+        if ( exp <= 0 ) {
+            /*----------------------------------------------------------------
+            *----------------------------------------------------------------*/
+            isTiny =
+                   (softfloat_detectTininess
+                        == softfloat_tininess_beforeRounding)
+                || (exp < 0)
+                || ! doIncrement
+                || (sig < UINT64_C( 0xFFFFFFFFFFFFFFFF ));
+            softfloat_shiftRightJam96M( extSigPtr, 1 - exp, extSigPtr );
+            exp = 0;
+            sig =
+                (uint64_t) extSigPtr[indexWord( 3, 2 )]<<32
+                    | extSigPtr[indexWord( 3, 1 )];
+            sigExtra = extSigPtr[indexWordLo( 3 )];
+            if ( sigExtra ) {
+                if ( isTiny ) softfloat_raiseFlags( softfloat_flag_underflow );
+                softfloat_exceptionFlags |= softfloat_flag_inexact;
+#ifdef SOFTFLOAT_ROUND_ODD
+                if ( roundingMode == softfloat_round_odd ) {
+                    sig |= 1;
+                    goto packReturn;
+                }
+#endif
+            }
+            doIncrement = (0x80000000 <= sigExtra);
+            if (
+                ! roundNearEven
+                    && (roundingMode != softfloat_round_near_maxMag)
+            ) {
+                doIncrement =
+                    (roundingMode
+                         == (sign ? softfloat_round_min : softfloat_round_max))
+                        && sigExtra;
+            }
+            if ( doIncrement ) {
+                ++sig;
+                sig &= ~(uint64_t) (! (sigExtra & 0x7FFFFFFF) & roundNearEven);
+                exp = ((sig & UINT64_C( 0x8000000000000000 )) != 0);
+            }
+            goto packReturn;
+        }
+        if (
+               (0x7FFE < exp)
+            || ((exp == 0x7FFE) && (sig == UINT64_C( 0xFFFFFFFFFFFFFFFF ))
+                    && doIncrement)
+        ) {
+            /*----------------------------------------------------------------
+            *----------------------------------------------------------------*/
+            roundMask = 0;
+ overflow:
+            softfloat_raiseFlags(
+                softfloat_flag_overflow | softfloat_flag_inexact );
+            if (
+                   roundNearEven
+                || (roundingMode == softfloat_round_near_maxMag)
+                || (roundingMode
+                        == (sign ? softfloat_round_min : softfloat_round_max))
+            ) {
+                exp = 0x7FFF;
+                sig = UINT64_C( 0x8000000000000000 );
+            } else {
+                exp = 0x7FFE;
+                sig = ~roundMask;
+            }
+            goto packReturn;
+        }
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    if ( sigExtra ) {
+        softfloat_exceptionFlags |= softfloat_flag_inexact;
+#ifdef SOFTFLOAT_ROUND_ODD
+        if ( roundingMode == softfloat_round_odd ) {
+            sig |= 1;
+            goto packReturn;
+        }
+#endif
+    }
+    if ( doIncrement ) {
+        ++sig;
+        if ( ! sig ) {
+            ++exp;
+            sig = UINT64_C( 0x8000000000000000 );
+        } else {
+            sig &= ~(uint64_t) (! (sigExtra & 0x7FFFFFFF) & roundNearEven);
+        }
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+ packReturn:
+    zSPtr->signExp = packToExtF80UI64( sign, exp );
+    zSPtr->signif = sig;
+
+}
+
diff --git a/ext/softfloat/s_roundPackMToF128M.c b/ext/softfloat/s_roundPackMToF128M.c
new file mode 100644
index 0000000000..101f444ecc
--- /dev/null
+++ b/ext/softfloat/s_roundPackMToF128M.c
@@ -0,0 +1,178 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014, 2017 The Regents of the University of
+California.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "softfloat.h"
+
+void
+ softfloat_roundPackMToF128M(
+     bool sign, int32_t exp, uint32_t *extSigPtr, uint32_t *zWPtr )
+{
+    uint_fast8_t roundingMode;
+    bool roundNearEven;
+    uint32_t sigExtra;
+    bool doIncrement, isTiny;
+    static const uint32_t maxSig[4] =
+        INIT_UINTM4( 0x0001FFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF );
+    uint32_t ui, uj;
+
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    roundingMode = softfloat_roundingMode;
+    roundNearEven = (roundingMode == softfloat_round_near_even);
+    sigExtra = extSigPtr[indexWordLo( 5 )];
+    doIncrement = (0x80000000 <= sigExtra);
+    if ( ! roundNearEven && (roundingMode != softfloat_round_near_maxMag) ) {
+        doIncrement =
+            (roundingMode
+                 == (sign ? softfloat_round_min : softfloat_round_max))
+                && sigExtra;
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    if ( 0x7FFD <= (uint32_t) exp ) {
+        if ( exp < 0 ) {
+            /*----------------------------------------------------------------
+            *----------------------------------------------------------------*/
+            isTiny =
+                   (softfloat_detectTininess
+                        == softfloat_tininess_beforeRounding)
+                || (exp < -1)
+                || ! doIncrement
+                || (softfloat_compare128M(
+                        extSigPtr + indexMultiwordHi( 5, 4 ), maxSig )
+                        < 0);
+            softfloat_shiftRightJam160M( extSigPtr, -exp, extSigPtr );
+            exp = 0;
+            sigExtra = extSigPtr[indexWordLo( 5 )];
+            if ( isTiny && sigExtra ) {
+                softfloat_raiseFlags( softfloat_flag_underflow );
+            }
+            doIncrement = (0x80000000 <= sigExtra);
+            if (
+                   ! roundNearEven
+                && (roundingMode != softfloat_round_near_maxMag)
+            ) {
+                doIncrement =
+                    (roundingMode
+                         == (sign ? softfloat_round_min : softfloat_round_max))
+                        && sigExtra;
+            }
+        } else if (
+               (0x7FFD < exp)
+            || ((exp == 0x7FFD) && doIncrement
+                    && (softfloat_compare128M(
+                            extSigPtr + indexMultiwordHi( 5, 4 ), maxSig )
+                            == 0))
+        ) {
+            /*----------------------------------------------------------------
+            *----------------------------------------------------------------*/
+            softfloat_raiseFlags(
+                softfloat_flag_overflow | softfloat_flag_inexact );
+            if (
+                   roundNearEven
+                || (roundingMode == softfloat_round_near_maxMag)
+                || (roundingMode
+                        == (sign ? softfloat_round_min : softfloat_round_max))
+            ) {
+                ui = packToF128UI96( sign, 0x7FFF, 0 );
+                uj = 0;
+            } else {
+                ui = packToF128UI96( sign, 0x7FFE, 0x0000FFFF );
+                uj = 0xFFFFFFFF;
+            }
+            zWPtr[indexWordHi( 4 )] = ui;
+            zWPtr[indexWord( 4, 2 )] = uj;
+            zWPtr[indexWord( 4, 1 )] = uj;
+            zWPtr[indexWord( 4, 0 )] = uj;
+            return;
+        }
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    uj = extSigPtr[indexWord( 5, 1 )];
+    if ( sigExtra ) {
+        softfloat_exceptionFlags |= softfloat_flag_inexact;
+#ifdef SOFTFLOAT_ROUND_ODD
+        if ( roundingMode == softfloat_round_odd ) {
+            uj |= 1;
+            goto noIncrementPackReturn;
+        }
+#endif
+    }
+    if ( doIncrement ) {
+        ++uj;
+        if ( uj ) {
+            if ( ! (sigExtra & 0x7FFFFFFF) && roundNearEven ) uj &= ~1;
+            zWPtr[indexWord( 4, 2 )] = extSigPtr[indexWord( 5, 3 )];
+            zWPtr[indexWord( 4, 1 )] = extSigPtr[indexWord( 5, 2 )];
+            zWPtr[indexWord( 4, 0 )] = uj;
+            ui = extSigPtr[indexWordHi( 5 )];
+        } else {
+            zWPtr[indexWord( 4, 0 )] = uj;
+            ui = extSigPtr[indexWord( 5, 2 )] + 1;
+            zWPtr[indexWord( 4, 1 )] = ui;
+            uj = extSigPtr[indexWord( 5, 3 )];
+            if ( ui ) {
+                zWPtr[indexWord( 4, 2 )] = uj;
+                ui = extSigPtr[indexWordHi( 5 )];
+            } else {
+                ++uj;
+                zWPtr[indexWord( 4, 2 )] = uj;
+                ui = extSigPtr[indexWordHi( 5 )];
+                if ( ! uj ) ++ui;
+            }
+        }
+    } else {
+ noIncrementPackReturn:
+        zWPtr[indexWord( 4, 0 )] = uj;
+        ui = extSigPtr[indexWord( 5, 2 )];
+        zWPtr[indexWord( 4, 1 )] = ui;
+        uj |= ui;
+        ui = extSigPtr[indexWord( 5, 3 )];
+        zWPtr[indexWord( 4, 2 )] = ui;
+        uj |= ui;
+        ui = extSigPtr[indexWordHi( 5 )];
+        uj |= ui;
+        if ( ! uj ) exp = 0;
+    }
+    zWPtr[indexWordHi( 4 )] = packToF128UI96( sign, exp, ui );
+
+}
+
diff --git a/ext/softfloat/s_roundPackMToI64.c b/ext/softfloat/s_roundPackMToI64.c
index e714608c6f..4d5efbb725 100644
--- a/ext/softfloat/s_roundPackMToI64.c
+++ b/ext/softfloat/s_roundPackMToI64.c
@@ -36,11 +36,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
-#include "softfloat.h"
+#include "internals.h"
 #include "specialize.h"
+#include "softfloat.h"
 
 int_fast64_t
  softfloat_roundPackMToI64(
diff --git a/ext/softfloat/s_roundPackMToUI64.c b/ext/softfloat/s_roundPackMToUI64.c
index d86db18790..1a64fdf98d 100644
--- a/ext/softfloat/s_roundPackMToUI64.c
+++ b/ext/softfloat/s_roundPackMToUI64.c
@@ -36,11 +36,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
-#include "softfloat.h"
+#include "internals.h"
 #include "specialize.h"
+#include "softfloat.h"
 
 uint_fast64_t
  softfloat_roundPackMToUI64(
diff --git a/ext/softfloat/s_roundPackToExtF80.c b/ext/softfloat/s_roundPackToExtF80.c
new file mode 100644
index 0000000000..b14e25c1e9
--- /dev/null
+++ b/ext/softfloat/s_roundPackToExtF80.c
@@ -0,0 +1,256 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014, 2015, 2017 The Regents of the University of
+California.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "softfloat.h"
+
+extFloat80_t
+ softfloat_roundPackToExtF80(
+     bool sign,
+     int_fast32_t exp,
+     uint_fast64_t sig,
+     uint_fast64_t sigExtra,
+     uint_fast8_t roundingPrecision
+ )
+{
+    uint_fast8_t roundingMode;
+    bool roundNearEven;
+    uint_fast64_t roundIncrement, roundMask, roundBits;
+    bool isTiny, doIncrement;
+    struct uint64_extra sig64Extra;
+    union { struct extFloat80M s; extFloat80_t f; } uZ;
+
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    roundingMode = softfloat_roundingMode;
+    roundNearEven = (roundingMode == softfloat_round_near_even);
+    if ( roundingPrecision == 80 ) goto precision80;
+    if ( roundingPrecision == 64 ) {
+        roundIncrement = UINT64_C( 0x0000000000000400 );
+        roundMask = UINT64_C( 0x00000000000007FF );
+    } else if ( roundingPrecision == 32 ) {
+        roundIncrement = UINT64_C( 0x0000008000000000 );
+        roundMask = UINT64_C( 0x000000FFFFFFFFFF );
+    } else {
+        goto precision80;
+    }
+    sig |= (sigExtra != 0);
+    if ( ! roundNearEven && (roundingMode != softfloat_round_near_maxMag) ) {
+        roundIncrement =
+            (roundingMode
+                 == (sign ? softfloat_round_min : softfloat_round_max))
+                ? roundMask
+                : 0;
+    }
+    roundBits = sig & roundMask;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    if ( 0x7FFD <= (uint32_t) (exp - 1) ) {
+        if ( exp <= 0 ) {
+            /*----------------------------------------------------------------
+            *----------------------------------------------------------------*/
+            isTiny =
+                   (softfloat_detectTininess
+                        == softfloat_tininess_beforeRounding)
+                || (exp < 0)
+                || (sig <= (uint64_t) (sig + roundIncrement));
+            sig = softfloat_shiftRightJam64( sig, 1 - exp );
+            roundBits = sig & roundMask;
+            if ( roundBits ) {
+                if ( isTiny ) softfloat_raiseFlags( softfloat_flag_underflow );
+                softfloat_exceptionFlags |= softfloat_flag_inexact;
+#ifdef SOFTFLOAT_ROUND_ODD
+                if ( roundingMode == softfloat_round_odd ) {
+                    sig |= roundMask + 1;
+                }
+#endif
+            }
+            sig += roundIncrement;
+            exp = ((sig & UINT64_C( 0x8000000000000000 )) != 0);
+            roundIncrement = roundMask + 1;
+            if ( roundNearEven && (roundBits<<1 == roundIncrement) ) {
+                roundMask |= roundIncrement;
+            }
+            sig &= ~roundMask;
+            goto packReturn;
+        }
+        if (
+               (0x7FFE < exp)
+            || ((exp == 0x7FFE) && ((uint64_t) (sig + roundIncrement) < sig))
+        ) {
+            goto overflow;
+        }
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    if ( roundBits ) {
+        softfloat_exceptionFlags |= softfloat_flag_inexact;
+#ifdef SOFTFLOAT_ROUND_ODD
+        if ( roundingMode == softfloat_round_odd ) {
+            sig = (sig & ~roundMask) | (roundMask + 1);
+            goto packReturn;
+        }
+#endif
+    }
+    sig = (uint64_t) (sig + roundIncrement);
+    if ( sig < roundIncrement ) {
+        ++exp;
+        sig = UINT64_C( 0x8000000000000000 );
+    }
+    roundIncrement = roundMask + 1;
+    if ( roundNearEven && (roundBits<<1 == roundIncrement) ) {
+        roundMask |= roundIncrement;
+    }
+    sig &= ~roundMask;
+    goto packReturn;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+ precision80:
+    doIncrement = (UINT64_C( 0x8000000000000000 ) <= sigExtra);
+    if ( ! roundNearEven && (roundingMode != softfloat_round_near_maxMag) ) {
+        doIncrement =
+            (roundingMode
+                 == (sign ? softfloat_round_min : softfloat_round_max))
+                && sigExtra;
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    if ( 0x7FFD <= (uint32_t) (exp - 1) ) {
+        if ( exp <= 0 ) {
+            /*----------------------------------------------------------------
+            *----------------------------------------------------------------*/
+            isTiny =
+                   (softfloat_detectTininess
+                        == softfloat_tininess_beforeRounding)
+                || (exp < 0)
+                || ! doIncrement
+                || (sig < UINT64_C( 0xFFFFFFFFFFFFFFFF ));
+            sig64Extra =
+                softfloat_shiftRightJam64Extra( sig, sigExtra, 1 - exp );
+            exp = 0;
+            sig = sig64Extra.v;
+            sigExtra = sig64Extra.extra;
+            if ( sigExtra ) {
+                if ( isTiny ) softfloat_raiseFlags( softfloat_flag_underflow );
+                softfloat_exceptionFlags |= softfloat_flag_inexact;
+#ifdef SOFTFLOAT_ROUND_ODD
+                if ( roundingMode == softfloat_round_odd ) {
+                    sig |= 1;
+                    goto packReturn;
+                }
+#endif
+            }
+            doIncrement = (UINT64_C( 0x8000000000000000 ) <= sigExtra);
+            if (
+                ! roundNearEven
+                    && (roundingMode != softfloat_round_near_maxMag)
+            ) {
+                doIncrement =
+                    (roundingMode
+                         == (sign ? softfloat_round_min : softfloat_round_max))
+                        && sigExtra;
+            }
+            if ( doIncrement ) {
+                ++sig;
+                sig &=
+                    ~(uint_fast64_t)
+                         (! (sigExtra & UINT64_C( 0x7FFFFFFFFFFFFFFF ))
+                              & roundNearEven);
+                exp = ((sig & UINT64_C( 0x8000000000000000 )) != 0);
+            }
+            goto packReturn;
+        }
+        if (
+               (0x7FFE < exp)
+            || ((exp == 0x7FFE) && (sig == UINT64_C( 0xFFFFFFFFFFFFFFFF ))
+                    && doIncrement)
+        ) {
+            /*----------------------------------------------------------------
+            *----------------------------------------------------------------*/
+            roundMask = 0;
+ overflow:
+            softfloat_raiseFlags(
+                softfloat_flag_overflow | softfloat_flag_inexact );
+            if (
+                   roundNearEven
+                || (roundingMode == softfloat_round_near_maxMag)
+                || (roundingMode
+                        == (sign ? softfloat_round_min : softfloat_round_max))
+            ) {
+                exp = 0x7FFF;
+                sig = UINT64_C( 0x8000000000000000 );
+            } else {
+                exp = 0x7FFE;
+                sig = ~roundMask;
+            }
+            goto packReturn;
+        }
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    if ( sigExtra ) {
+        softfloat_exceptionFlags |= softfloat_flag_inexact;
+#ifdef SOFTFLOAT_ROUND_ODD
+        if ( roundingMode == softfloat_round_odd ) {
+            sig |= 1;
+            goto packReturn;
+        }
+#endif
+    }
+    if ( doIncrement ) {
+        ++sig;
+        if ( ! sig ) {
+            ++exp;
+            sig = UINT64_C( 0x8000000000000000 );
+        } else {
+            sig &=
+                ~(uint_fast64_t)
+                     (! (sigExtra & UINT64_C( 0x7FFFFFFFFFFFFFFF ))
+                          & roundNearEven);
+        }
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+ packReturn:
+    uZ.s.signExp = packToExtF80UI64( sign, exp );
+    uZ.s.signif = sig;
+    return uZ.f;
+
+}
+
diff --git a/ext/softfloat/s_roundPackToF128.c b/ext/softfloat/s_roundPackToF128.c
index e96f5e4af0..eaaa375c9b 100644
--- a/ext/softfloat/s_roundPackToF128.c
+++ b/ext/softfloat/s_roundPackToF128.c
@@ -36,9 +36,8 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
+#include "internals.h"
 #include "softfloat.h"
 
 float128_t
@@ -107,7 +106,7 @@ float128_t
         } else if (
                (0x7FFD < exp)
             || ((exp == 0x7FFD)
-                    && softfloat_eq128(
+                    && softfloat_eq128( 
                            sig64,
                            sig0,
                            UINT64_C( 0x0001FFFFFFFFFFFF ),
diff --git a/ext/softfloat/s_roundPackToF16.c b/ext/softfloat/s_roundPackToF16.c
index bfc3ff825e..0eaa73a5f0 100644
--- a/ext/softfloat/s_roundPackToF16.c
+++ b/ext/softfloat/s_roundPackToF16.c
@@ -36,9 +36,8 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
+#include "internals.h"
 #include "softfloat.h"
 
 float16_t
diff --git a/ext/softfloat/s_roundPackToF32.c b/ext/softfloat/s_roundPackToF32.c
index 1f72892a97..cc34508570 100644
--- a/ext/softfloat/s_roundPackToF32.c
+++ b/ext/softfloat/s_roundPackToF32.c
@@ -36,9 +36,8 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
+#include "internals.h"
 #include "softfloat.h"
 
 float32_t
diff --git a/ext/softfloat/s_roundPackToF64.c b/ext/softfloat/s_roundPackToF64.c
index 8a124d20b7..aaff008c1d 100644
--- a/ext/softfloat/s_roundPackToF64.c
+++ b/ext/softfloat/s_roundPackToF64.c
@@ -36,9 +36,8 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
+#include "internals.h"
 #include "softfloat.h"
 
 float64_t
diff --git a/ext/softfloat/s_roundPackToI32.c b/ext/softfloat/s_roundPackToI32.c
index fee5c5bb03..3ece8f052e 100644
--- a/ext/softfloat/s_roundPackToI32.c
+++ b/ext/softfloat/s_roundPackToI32.c
@@ -36,11 +36,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
-#include "softfloat.h"
+#include "internals.h"
 #include "specialize.h"
+#include "softfloat.h"
 
 int_fast32_t
  softfloat_roundPackToI32(
diff --git a/ext/softfloat/s_roundPackToI64.c b/ext/softfloat/s_roundPackToI64.c
index ea44bd925e..ebef7f3647 100644
--- a/ext/softfloat/s_roundPackToI64.c
+++ b/ext/softfloat/s_roundPackToI64.c
@@ -36,11 +36,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
-#include "softfloat.h"
+#include "internals.h"
 #include "specialize.h"
+#include "softfloat.h"
 
 int_fast64_t
  softfloat_roundPackToI64(
diff --git a/ext/softfloat/s_roundPackToUI32.c b/ext/softfloat/s_roundPackToUI32.c
index d86225e609..f0021fe54c 100644
--- a/ext/softfloat/s_roundPackToUI32.c
+++ b/ext/softfloat/s_roundPackToUI32.c
@@ -36,11 +36,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
-#include "softfloat.h"
+#include "internals.h"
 #include "specialize.h"
+#include "softfloat.h"
 
 uint_fast32_t
  softfloat_roundPackToUI32(
diff --git a/ext/softfloat/s_roundPackToUI64.c b/ext/softfloat/s_roundPackToUI64.c
index 8a5772075b..fada1840c6 100644
--- a/ext/softfloat/s_roundPackToUI64.c
+++ b/ext/softfloat/s_roundPackToUI64.c
@@ -36,11 +36,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
-#include "softfloat.h"
+#include "internals.h"
 #include "specialize.h"
+#include "softfloat.h"
 
 uint_fast64_t
  softfloat_roundPackToUI64(
diff --git a/ext/softfloat/s_roundToI32.c b/ext/softfloat/s_roundToI32.c
index fe0c661e00..20a3ff4f65 100644
--- a/ext/softfloat/s_roundToI32.c
+++ b/ext/softfloat/s_roundToI32.c
@@ -36,11 +36,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
-#include "softfloat.h"
+#include "internals.h"
 #include "specialize.h"
+#include "softfloat.h"
 
 int_fast32_t
  softfloat_roundToI32(
diff --git a/ext/softfloat/s_roundToI64.c b/ext/softfloat/s_roundToI64.c
index 2ad24486fe..fcddbc2785 100644
--- a/ext/softfloat/s_roundToI64.c
+++ b/ext/softfloat/s_roundToI64.c
@@ -36,11 +36,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
-#include "softfloat.h"
+#include "internals.h"
 #include "specialize.h"
+#include "softfloat.h"
 
 int_fast64_t
  softfloat_roundToI64(
diff --git a/ext/softfloat/s_roundToUI32.c b/ext/softfloat/s_roundToUI32.c
index aaa529c9e9..180899bd85 100644
--- a/ext/softfloat/s_roundToUI32.c
+++ b/ext/softfloat/s_roundToUI32.c
@@ -36,11 +36,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
-#include "softfloat.h"
+#include "internals.h"
 #include "specialize.h"
+#include "softfloat.h"
 
 uint_fast32_t
  softfloat_roundToUI32(
diff --git a/ext/softfloat/s_roundToUI64.c b/ext/softfloat/s_roundToUI64.c
index 1bdffd5145..de35b5eb0e 100644
--- a/ext/softfloat/s_roundToUI64.c
+++ b/ext/softfloat/s_roundToUI64.c
@@ -36,11 +36,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
-#include "softfloat.h"
+#include "internals.h"
 #include "specialize.h"
+#include "softfloat.h"
 
 uint_fast64_t
  softfloat_roundToUI64(
diff --git a/ext/softfloat/s_shiftLeftM.c b/ext/softfloat/s_shiftLeftM.c
new file mode 100644
index 0000000000..8b22af1277
--- /dev/null
+++ b/ext/softfloat/s_shiftLeftM.c
@@ -0,0 +1,91 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of
+California.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdint.h>
+#include "platform.h"
+
+#ifndef softfloat_shiftLeftM
+
+#define softfloat_shiftLeftM softfloat_shiftLeftM
+#include "primitives.h"
+
+void
+ softfloat_shiftLeftM(
+     uint_fast8_t size_words,
+     const uint32_t *aPtr,
+     uint32_t dist,
+     uint32_t *zPtr
+ )
+{
+    uint32_t wordDist;
+    uint_fast8_t innerDist;
+    uint32_t *destPtr;
+    uint_fast8_t i;
+
+    wordDist = dist>>5;
+    if ( wordDist < size_words ) {
+        aPtr += indexMultiwordLoBut( size_words, wordDist );
+        innerDist = dist & 31;
+        if ( innerDist ) {
+            softfloat_shortShiftLeftM(
+                size_words - wordDist,
+                aPtr,
+                innerDist,
+                zPtr + indexMultiwordHiBut( size_words, wordDist )
+            );
+            if ( ! wordDist ) return;
+        } else {
+            aPtr += indexWordHi( size_words - wordDist );
+            destPtr = zPtr + indexWordHi( size_words );
+            for ( i = size_words - wordDist; i; --i ) {
+                *destPtr = *aPtr;
+                aPtr -= wordIncr;
+                destPtr -= wordIncr;
+            }
+        }
+        zPtr += indexMultiwordLo( size_words, wordDist );
+    } else {
+        wordDist = size_words;
+    }
+    do {
+        *zPtr++ = 0;
+        --wordDist;
+    } while ( wordDist );
+
+}
+
+#endif
+
diff --git a/ext/softfloat/s_shiftNormSigF128M.c b/ext/softfloat/s_shiftNormSigF128M.c
new file mode 100644
index 0000000000..6e437a9f91
--- /dev/null
+++ b/ext/softfloat/s_shiftNormSigF128M.c
@@ -0,0 +1,78 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of
+California.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+
+int
+ softfloat_shiftNormSigF128M(
+     const uint32_t *wPtr, uint_fast8_t shiftDist, uint32_t *sigPtr )
+{
+    uint32_t wordSig;
+    int32_t exp;
+    uint32_t leadingBit;
+
+    wordSig = wPtr[indexWordHi( 4 )];
+    exp = expF128UI96( wordSig );
+    if ( exp ) {
+        softfloat_shortShiftLeft128M( wPtr, shiftDist, sigPtr );
+        leadingBit = 0x00010000<<shiftDist;
+        sigPtr[indexWordHi( 4 )] =
+            (sigPtr[indexWordHi( 4 )] & (leadingBit - 1)) | leadingBit;
+    } else {
+        exp = 16;
+        wordSig &= 0x7FFFFFFF;
+        if ( ! wordSig ) {
+            exp = -16;
+            wordSig = wPtr[indexWord( 4, 2 )];
+            if ( ! wordSig ) {
+                exp = -48;
+                wordSig = wPtr[indexWord( 4, 1 )];
+                if ( ! wordSig ) {
+                    wordSig = wPtr[indexWord( 4, 0 )];
+                    if ( ! wordSig ) return -128;
+                    exp = -80;
+                }
+            }
+        }
+        exp -= softfloat_countLeadingZeros32( wordSig );
+        softfloat_shiftLeft128M( wPtr, 1 - exp + shiftDist, sigPtr );
+    }
+    return exp;
+
+}
+
diff --git a/ext/softfloat/s_shiftRightJam128.c b/ext/softfloat/s_shiftRightJam128.c
index 6e251ffc3d..8d2b91e87d 100644
--- a/ext/softfloat/s_shiftRightJam128.c
+++ b/ext/softfloat/s_shiftRightJam128.c
@@ -35,7 +35,6 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 =============================================================================*/
 
 #include <stdint.h>
-
 #include "platform.h"
 #include "primitiveTypes.h"
 
diff --git a/ext/softfloat/s_shiftRightJam128Extra.c b/ext/softfloat/s_shiftRightJam128Extra.c
index f4812683bd..4e1293c74a 100644
--- a/ext/softfloat/s_shiftRightJam128Extra.c
+++ b/ext/softfloat/s_shiftRightJam128Extra.c
@@ -35,7 +35,6 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 =============================================================================*/
 
 #include <stdint.h>
-
 #include "platform.h"
 #include "primitiveTypes.h"
 
diff --git a/ext/softfloat/s_shiftRightJam256M.c b/ext/softfloat/s_shiftRightJam256M.c
index 97845e43eb..04cd1e5084 100644
--- a/ext/softfloat/s_shiftRightJam256M.c
+++ b/ext/softfloat/s_shiftRightJam256M.c
@@ -35,7 +35,6 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 =============================================================================*/
 
 #include <stdint.h>
-
 #include "platform.h"
 #include "primitiveTypes.h"
 
diff --git a/ext/softfloat/s_shiftRightJam32.c b/ext/softfloat/s_shiftRightJam32.c
index dda9c0ef34..fbc3aa0110 100644
--- a/ext/softfloat/s_shiftRightJam32.c
+++ b/ext/softfloat/s_shiftRightJam32.c
@@ -35,7 +35,6 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 =============================================================================*/
 
 #include <stdint.h>
-
 #include "platform.h"
 
 #ifndef softfloat_shiftRightJam32
diff --git a/ext/softfloat/s_shiftRightJam64.c b/ext/softfloat/s_shiftRightJam64.c
index e2d3ec921c..34edd7bf8f 100644
--- a/ext/softfloat/s_shiftRightJam64.c
+++ b/ext/softfloat/s_shiftRightJam64.c
@@ -35,7 +35,6 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 =============================================================================*/
 
 #include <stdint.h>
-
 #include "platform.h"
 
 #ifndef softfloat_shiftRightJam64
diff --git a/ext/softfloat/s_shiftRightJam64Extra.c b/ext/softfloat/s_shiftRightJam64Extra.c
index eeaff03ee7..4d7871226e 100644
--- a/ext/softfloat/s_shiftRightJam64Extra.c
+++ b/ext/softfloat/s_shiftRightJam64Extra.c
@@ -35,7 +35,6 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 =============================================================================*/
 
 #include <stdint.h>
-
 #include "platform.h"
 #include "primitiveTypes.h"
 
diff --git a/ext/softfloat/s_shiftRightJamM.c b/ext/softfloat/s_shiftRightJamM.c
new file mode 100644
index 0000000000..99db7b6765
--- /dev/null
+++ b/ext/softfloat/s_shiftRightJamM.c
@@ -0,0 +1,101 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of
+California.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdint.h>
+#include "platform.h"
+
+#ifndef softfloat_shiftRightJamM
+
+#define softfloat_shiftRightJamM softfloat_shiftRightJamM
+#include "primitives.h"
+
+void
+ softfloat_shiftRightJamM(
+     uint_fast8_t size_words,
+     const uint32_t *aPtr,
+     uint32_t dist,
+     uint32_t *zPtr
+ )
+{
+    uint32_t wordJam, wordDist, *ptr;
+    uint_fast8_t i, innerDist;
+
+    wordJam = 0;
+    wordDist = dist>>5;
+    if ( wordDist ) {
+        if ( size_words < wordDist ) wordDist = size_words;
+        ptr = (uint32_t *) (aPtr + indexMultiwordLo( size_words, wordDist ));
+        i = wordDist;
+        do {
+            wordJam = *ptr++;
+            if ( wordJam ) break;
+            --i;
+        } while ( i );
+        ptr = zPtr;
+    }
+    if ( wordDist < size_words ) {
+        aPtr += indexMultiwordHiBut( size_words, wordDist );
+        innerDist = dist & 31;
+        if ( innerDist ) {
+            softfloat_shortShiftRightJamM(
+                size_words - wordDist,
+                aPtr,
+                innerDist,
+                zPtr + indexMultiwordLoBut( size_words, wordDist )
+            );
+            if ( ! wordDist ) goto wordJam;
+        } else {
+            aPtr += indexWordLo( size_words - wordDist );
+            ptr = zPtr + indexWordLo( size_words );
+            for ( i = size_words - wordDist; i; --i ) {
+                *ptr = *aPtr;
+                aPtr += wordIncr;
+                ptr += wordIncr;
+            }
+        }
+        ptr = zPtr + indexMultiwordHi( size_words, wordDist );
+    }
+    do {
+        *ptr++ = 0;
+        --wordDist;
+    } while ( wordDist );
+ wordJam:
+    if ( wordJam ) zPtr[indexWordLo( size_words )] |= 1;
+
+}
+
+#endif
+
diff --git a/ext/softfloat/s_shiftRightM.c b/ext/softfloat/s_shiftRightM.c
new file mode 100644
index 0000000000..f1296e10ee
--- /dev/null
+++ b/ext/softfloat/s_shiftRightM.c
@@ -0,0 +1,91 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of
+California.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdint.h>
+#include "platform.h"
+
+#ifndef softfloat_shiftRightM
+
+#define softfloat_shiftRightM softfloat_shiftRightM
+#include "primitives.h"
+
+void
+ softfloat_shiftRightM(
+     uint_fast8_t size_words,
+     const uint32_t *aPtr,
+     uint32_t dist,
+     uint32_t *zPtr
+ )
+{
+    uint32_t wordDist;
+    uint_fast8_t innerDist;
+    uint32_t *destPtr;
+    uint_fast8_t i;
+
+    wordDist = dist>>5;
+    if ( wordDist < size_words ) {
+        aPtr += indexMultiwordHiBut( size_words, wordDist );
+        innerDist = dist & 31;
+        if ( innerDist ) {
+            softfloat_shortShiftRightM(
+                size_words - wordDist,
+                aPtr,
+                innerDist,
+                zPtr + indexMultiwordLoBut( size_words, wordDist )
+            );
+            if ( ! wordDist ) return;
+        } else {
+            aPtr += indexWordLo( size_words - wordDist );
+            destPtr = zPtr + indexWordLo( size_words );
+            for ( i = size_words - wordDist; i; --i ) {
+                *destPtr = *aPtr;
+                aPtr += wordIncr;
+                destPtr += wordIncr;
+            }
+        }
+        zPtr += indexMultiwordHi( size_words, wordDist );
+    } else {
+        wordDist = size_words;
+    }
+    do {
+        *zPtr++ = 0;
+        --wordDist;
+    } while ( wordDist );
+
+}
+
+#endif
+
diff --git a/ext/softfloat/s_shortShiftLeft128.c b/ext/softfloat/s_shortShiftLeft128.c
index 8bcbd7b605..9b7c06726a 100644
--- a/ext/softfloat/s_shortShiftLeft128.c
+++ b/ext/softfloat/s_shortShiftLeft128.c
@@ -35,7 +35,6 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 =============================================================================*/
 
 #include <stdint.h>
-
 #include "platform.h"
 #include "primitiveTypes.h"
 
diff --git a/ext/softfloat/s_shortShiftLeft64To96M.c b/ext/softfloat/s_shortShiftLeft64To96M.c
index 1b69f6cb0e..4caf4b9d6e 100644
--- a/ext/softfloat/s_shortShiftLeft64To96M.c
+++ b/ext/softfloat/s_shortShiftLeft64To96M.c
@@ -35,7 +35,6 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 =============================================================================*/
 
 #include <stdint.h>
-
 #include "platform.h"
 #include "primitiveTypes.h"
 
diff --git a/ext/softfloat/s_shortShiftLeftM.c b/ext/softfloat/s_shortShiftLeftM.c
new file mode 100644
index 0000000000..949460543a
--- /dev/null
+++ b/ext/softfloat/s_shortShiftLeftM.c
@@ -0,0 +1,70 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of
+California.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdint.h>
+#include "platform.h"
+#include "primitiveTypes.h"
+
+#ifndef softfloat_shortShiftLeftM
+
+void
+ softfloat_shortShiftLeftM(
+     uint_fast8_t size_words,
+     const uint32_t *aPtr,
+     uint_fast8_t dist,
+     uint32_t *zPtr
+ )
+{
+    uint_fast8_t uNegDist;
+    unsigned int index, lastIndex;
+    uint32_t partWordZ, wordA;
+
+    uNegDist = -dist;
+    index = indexWordHi( size_words );
+    lastIndex = indexWordLo( size_words );
+    partWordZ = aPtr[index]<<dist;
+    while ( index != lastIndex ) {
+        wordA = aPtr[index - wordIncr];
+        zPtr[index] = partWordZ | wordA>>(uNegDist & 31);
+        index -= wordIncr;
+        partWordZ = wordA<<dist;
+    }
+    zPtr[index] = partWordZ;
+
+}
+
+#endif
+
diff --git a/ext/softfloat/s_shortShiftRight128.c b/ext/softfloat/s_shortShiftRight128.c
index 43be9cfcd9..28c39bb273 100644
--- a/ext/softfloat/s_shortShiftRight128.c
+++ b/ext/softfloat/s_shortShiftRight128.c
@@ -35,7 +35,6 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 =============================================================================*/
 
 #include <stdint.h>
-
 #include "platform.h"
 #include "primitiveTypes.h"
 
diff --git a/ext/softfloat/s_shortShiftRightExtendM.c b/ext/softfloat/s_shortShiftRightExtendM.c
index 30f64a4c65..309188c356 100644
--- a/ext/softfloat/s_shortShiftRightExtendM.c
+++ b/ext/softfloat/s_shortShiftRightExtendM.c
@@ -35,7 +35,6 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 =============================================================================*/
 
 #include <stdint.h>
-
 #include "platform.h"
 #include "primitiveTypes.h"
 
diff --git a/ext/softfloat/s_shortShiftRightJam128.c b/ext/softfloat/s_shortShiftRightJam128.c
index b2e7f96a3f..3eb0dd4006 100644
--- a/ext/softfloat/s_shortShiftRightJam128.c
+++ b/ext/softfloat/s_shortShiftRightJam128.c
@@ -35,7 +35,6 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 =============================================================================*/
 
 #include <stdint.h>
-
 #include "platform.h"
 #include "primitiveTypes.h"
 
diff --git a/ext/softfloat/s_shortShiftRightJam128Extra.c b/ext/softfloat/s_shortShiftRightJam128Extra.c
index 44128bfc83..13692a0d60 100644
--- a/ext/softfloat/s_shortShiftRightJam128Extra.c
+++ b/ext/softfloat/s_shortShiftRightJam128Extra.c
@@ -35,7 +35,6 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 =============================================================================*/
 
 #include <stdint.h>
-
 #include "platform.h"
 #include "primitiveTypes.h"
 
diff --git a/ext/softfloat/s_shortShiftRightJam64.c b/ext/softfloat/s_shortShiftRightJam64.c
index 13679e8ded..7e93cd4fbd 100644
--- a/ext/softfloat/s_shortShiftRightJam64.c
+++ b/ext/softfloat/s_shortShiftRightJam64.c
@@ -35,7 +35,6 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 =============================================================================*/
 
 #include <stdint.h>
-
 #include "platform.h"
 
 #ifndef softfloat_shortShiftRightJam64
diff --git a/ext/softfloat/s_shortShiftRightJam64Extra.c b/ext/softfloat/s_shortShiftRightJam64Extra.c
index 1f6ffc4ab6..25d23f4b94 100644
--- a/ext/softfloat/s_shortShiftRightJam64Extra.c
+++ b/ext/softfloat/s_shortShiftRightJam64Extra.c
@@ -35,7 +35,6 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 =============================================================================*/
 
 #include <stdint.h>
-
 #include "platform.h"
 #include "primitiveTypes.h"
 
diff --git a/ext/softfloat/s_shortShiftRightJamM.c b/ext/softfloat/s_shortShiftRightJamM.c
new file mode 100644
index 0000000000..fd647f43a0
--- /dev/null
+++ b/ext/softfloat/s_shortShiftRightJamM.c
@@ -0,0 +1,72 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of
+California.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdint.h>
+#include "platform.h"
+#include "primitiveTypes.h"
+
+#ifndef softfloat_shortShiftRightJamM
+
+void
+ softfloat_shortShiftRightJamM(
+     uint_fast8_t size_words,
+     const uint32_t *aPtr,
+     uint_fast8_t dist,
+     uint32_t *zPtr
+ )
+{
+    uint_fast8_t uNegDist;
+    unsigned int index, lastIndex;
+    uint32_t partWordZ, wordA;
+
+    uNegDist = -dist;
+    index = indexWordLo( size_words );
+    lastIndex = indexWordHi( size_words );
+    wordA = aPtr[index];
+    partWordZ = wordA>>dist;
+    if ( partWordZ<<dist != wordA ) partWordZ |= 1;
+    while ( index != lastIndex ) {
+        wordA = aPtr[index + wordIncr];
+        zPtr[index] = wordA<<(uNegDist & 31) | partWordZ;
+        index += wordIncr;
+        partWordZ = wordA>>dist;
+    }
+    zPtr[index] = partWordZ;
+
+}
+
+#endif
+
diff --git a/ext/softfloat/s_shortShiftRightM.c b/ext/softfloat/s_shortShiftRightM.c
index 090f5a1b37..308ad59c57 100644
--- a/ext/softfloat/s_shortShiftRightM.c
+++ b/ext/softfloat/s_shortShiftRightM.c
@@ -35,7 +35,6 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 =============================================================================*/
 
 #include <stdint.h>
-
 #include "platform.h"
 #include "primitiveTypes.h"
 
diff --git a/ext/softfloat/s_sub128.c b/ext/softfloat/s_sub128.c
index f3f21d9f80..ed86e10005 100644
--- a/ext/softfloat/s_sub128.c
+++ b/ext/softfloat/s_sub128.c
@@ -35,7 +35,6 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 =============================================================================*/
 
 #include <stdint.h>
-
 #include "platform.h"
 #include "primitiveTypes.h"
 
diff --git a/ext/softfloat/s_sub1XM.c b/ext/softfloat/s_sub1XM.c
index e6d7fa733a..73773e5b03 100644
--- a/ext/softfloat/s_sub1XM.c
+++ b/ext/softfloat/s_sub1XM.c
@@ -35,7 +35,6 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 =============================================================================*/
 
 #include <stdint.h>
-
 #include "platform.h"
 #include "primitiveTypes.h"
 
diff --git a/ext/softfloat/s_sub256M.c b/ext/softfloat/s_sub256M.c
index b9c1a9500d..c07b45eae9 100644
--- a/ext/softfloat/s_sub256M.c
+++ b/ext/softfloat/s_sub256M.c
@@ -35,7 +35,6 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 =============================================================================*/
 
 #include <stdint.h>
-
 #include "platform.h"
 #include "primitiveTypes.h"
 
diff --git a/ext/softfloat/s_subM.c b/ext/softfloat/s_subM.c
index 4274bd3cd1..003f699f14 100644
--- a/ext/softfloat/s_subM.c
+++ b/ext/softfloat/s_subM.c
@@ -35,7 +35,6 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 =============================================================================*/
 
 #include <stdint.h>
-
 #include "platform.h"
 #include "primitiveTypes.h"
 
diff --git a/ext/softfloat/s_subMagsExtF80.c b/ext/softfloat/s_subMagsExtF80.c
new file mode 100644
index 0000000000..ad9d1dcd48
--- /dev/null
+++ b/ext/softfloat/s_subMagsExtF80.c
@@ -0,0 +1,158 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014, 2015 The Regents of the University of
+California.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+extFloat80_t
+ softfloat_subMagsExtF80(
+     uint_fast16_t uiA64,
+     uint_fast64_t uiA0,
+     uint_fast16_t uiB64,
+     uint_fast64_t uiB0,
+     bool signZ
+ )
+{
+    int_fast32_t expA;
+    uint_fast64_t sigA;
+    int_fast32_t expB;
+    uint_fast64_t sigB;
+    int_fast32_t expDiff;
+    uint_fast16_t uiZ64;
+    uint_fast64_t uiZ0;
+    int_fast32_t expZ;
+    uint_fast64_t sigExtra;
+    struct uint128 sig128, uiZ;
+    union { struct extFloat80M s; extFloat80_t f; } uZ;
+
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    expA = expExtF80UI64( uiA64 );
+    sigA = uiA0;
+    expB = expExtF80UI64( uiB64 );
+    sigB = uiB0;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    expDiff = expA - expB;
+    if ( 0 < expDiff ) goto expABigger;
+    if ( expDiff < 0 ) goto expBBigger;
+    if ( expA == 0x7FFF ) {
+        if ( (sigA | sigB) & UINT64_C( 0x7FFFFFFFFFFFFFFF ) ) {
+            goto propagateNaN;
+        }
+        softfloat_raiseFlags( softfloat_flag_invalid );
+        uiZ64 = defaultNaNExtF80UI64;
+        uiZ0  = defaultNaNExtF80UI0;
+        goto uiZ;
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    expZ = expA;
+    if ( ! expZ ) expZ = 1;
+    sigExtra = 0;
+    if ( sigB < sigA ) goto aBigger;
+    if ( sigA < sigB ) goto bBigger;
+    uiZ64 =
+        packToExtF80UI64( (softfloat_roundingMode == softfloat_round_min), 0 );
+    uiZ0 = 0;
+    goto uiZ;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+ expBBigger:
+    if ( expB == 0x7FFF ) {
+        if ( sigB & UINT64_C( 0x7FFFFFFFFFFFFFFF ) ) goto propagateNaN;
+        uiZ64 = packToExtF80UI64( signZ ^ 1, 0x7FFF );
+        uiZ0  = UINT64_C( 0x8000000000000000 );
+        goto uiZ;
+    }
+    if ( ! expA ) {
+        ++expDiff;
+        sigExtra = 0;
+        if ( ! expDiff ) goto newlyAlignedBBigger;
+    }
+    sig128 = softfloat_shiftRightJam128( sigA, 0, -expDiff );
+    sigA = sig128.v64;
+    sigExtra = sig128.v0;
+ newlyAlignedBBigger:
+    expZ = expB;
+ bBigger:
+    signZ = ! signZ;
+    sig128 = softfloat_sub128( sigB, 0, sigA, sigExtra );
+    goto normRoundPack;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+ expABigger:
+    if ( expA == 0x7FFF ) {
+        if ( sigA & UINT64_C( 0x7FFFFFFFFFFFFFFF ) ) goto propagateNaN;
+        uiZ64 = uiA64;
+        uiZ0  = uiA0;
+        goto uiZ;
+    }
+    if ( ! expB ) {
+        --expDiff;
+        sigExtra = 0;
+        if ( ! expDiff ) goto newlyAlignedABigger;
+    }
+    sig128 = softfloat_shiftRightJam128( sigB, 0, expDiff );
+    sigB = sig128.v64;
+    sigExtra = sig128.v0;
+ newlyAlignedABigger:
+    expZ = expA;
+ aBigger:
+    sig128 = softfloat_sub128( sigA, 0, sigB, sigExtra );
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+ normRoundPack:
+    return
+        softfloat_normRoundPackToExtF80(
+            signZ, expZ, sig128.v64, sig128.v0, extF80_roundingPrecision );
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+ propagateNaN:
+    uiZ = softfloat_propagateNaNExtF80UI( uiA64, uiA0, uiB64, uiB0 );
+    uiZ64 = uiZ.v64;
+    uiZ0  = uiZ.v0;
+ uiZ:
+    uZ.s.signExp = uiZ64;
+    uZ.s.signif  = uiZ0;
+    return uZ.f;
+
+}
+
diff --git a/ext/softfloat/s_subMagsF128.c b/ext/softfloat/s_subMagsF128.c
index ac64f8eb18..c4264d542b 100644
--- a/ext/softfloat/s_subMagsF128.c
+++ b/ext/softfloat/s_subMagsF128.c
@@ -36,11 +36,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
-#include "softfloat.h"
+#include "internals.h"
 #include "specialize.h"
+#include "softfloat.h"
 
 float128_t
  softfloat_subMagsF128(
diff --git a/ext/softfloat/s_subMagsF16.c b/ext/softfloat/s_subMagsF16.c
index acdc567999..5ec579e882 100644
--- a/ext/softfloat/s_subMagsF16.c
+++ b/ext/softfloat/s_subMagsF16.c
@@ -36,11 +36,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
-#include "softfloat.h"
+#include "internals.h"
 #include "specialize.h"
+#include "softfloat.h"
 
 float16_t softfloat_subMagsF16( uint_fast16_t uiA, uint_fast16_t uiB )
 {
diff --git a/ext/softfloat/s_subMagsF32.c b/ext/softfloat/s_subMagsF32.c
index 5998902756..86e89f2ec3 100644
--- a/ext/softfloat/s_subMagsF32.c
+++ b/ext/softfloat/s_subMagsF32.c
@@ -36,11 +36,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
-#include "softfloat.h"
+#include "internals.h"
 #include "specialize.h"
+#include "softfloat.h"
 
 float32_t softfloat_subMagsF32( uint_fast32_t uiA, uint_fast32_t uiB )
 {
diff --git a/ext/softfloat/s_subMagsF64.c b/ext/softfloat/s_subMagsF64.c
index e8b9be2295..5ef9ea446a 100644
--- a/ext/softfloat/s_subMagsF64.c
+++ b/ext/softfloat/s_subMagsF64.c
@@ -36,11 +36,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
-#include "softfloat.h"
+#include "internals.h"
 #include "specialize.h"
+#include "softfloat.h"
 
 float64_t
  softfloat_subMagsF64( uint_fast64_t uiA, uint_fast64_t uiB, bool signZ )
diff --git a/ext/softfloat/s_tryPropagateNaNExtF80M.c b/ext/softfloat/s_tryPropagateNaNExtF80M.c
new file mode 100644
index 0000000000..e4f832e84c
--- /dev/null
+++ b/ext/softfloat/s_tryPropagateNaNExtF80M.c
@@ -0,0 +1,64 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include "platform.h"
+#include "internals.h"
+#include "specialize.h"
+
+bool
+ softfloat_tryPropagateNaNExtF80M(
+     const struct extFloat80M *aSPtr,
+     const struct extFloat80M *bSPtr,
+     struct extFloat80M *zSPtr
+ )
+{
+    uint_fast16_t ui64;
+    uint64_t ui0;
+
+    ui64 = aSPtr->signExp;
+    ui0  = aSPtr->signif;
+    if ( isNaNExtF80UI( ui64, ui0 ) ) goto propagateNaN;
+    ui64 = bSPtr->signExp;
+    ui0  = bSPtr->signif;
+    if ( isNaNExtF80UI( ui64, ui0 ) ) goto propagateNaN;
+    return false;
+ propagateNaN:
+    softfloat_propagateNaNExtF80M( aSPtr, bSPtr, zSPtr );
+    return true;
+
+}
+
diff --git a/ext/softfloat/s_tryPropagateNaNF128M.c b/ext/softfloat/s_tryPropagateNaNF128M.c
new file mode 100644
index 0000000000..c0ce488f75
--- /dev/null
+++ b/ext/softfloat/s_tryPropagateNaNF128M.c
@@ -0,0 +1,55 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "specialize.h"
+
+bool
+ softfloat_tryPropagateNaNF128M(
+     const uint32_t *aWPtr, const uint32_t *bWPtr, uint32_t *zWPtr )
+{
+
+    if ( softfloat_isNaNF128M( aWPtr ) || softfloat_isNaNF128M( bWPtr ) ) {
+        softfloat_propagateNaNF128M( aWPtr, bWPtr, zWPtr );
+        return true;
+    }
+    return false;
+
+}
+
diff --git a/ext/softfloat/softfloat.h b/ext/softfloat/softfloat.h
index 41cacbc53b..bdac1be263 100644
--- a/ext/softfloat/softfloat.h
+++ b/ext/softfloat/softfloat.h
@@ -48,7 +48,6 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
 #include "softfloat_types.h"
 
 #ifndef THREAD_LOCAL
@@ -142,8 +141,12 @@ void i64_to_f128M( int64_t, float128_t * );
 /*----------------------------------------------------------------------------
 | 16-bit (half-precision) floating-point operations.
 *----------------------------------------------------------------------------*/
+uint_fast8_t f16_to_ui8( float16_t, uint_fast8_t, bool );
+uint_fast16_t f16_to_ui16( float16_t, uint_fast8_t, bool );
 uint_fast32_t f16_to_ui32( float16_t, uint_fast8_t, bool );
 uint_fast64_t f16_to_ui64( float16_t, uint_fast8_t, bool );
+int_fast8_t f16_to_i8( float16_t, uint_fast8_t, bool );
+int_fast16_t f16_to_i16( float16_t, uint_fast8_t, bool );
 int_fast32_t f16_to_i32( float16_t, uint_fast8_t, bool );
 int_fast64_t f16_to_i64( float16_t, uint_fast8_t, bool );
 uint_fast32_t f16_to_ui32_r_minMag( float16_t, bool );
@@ -161,6 +164,8 @@ void f16_to_f128M( float16_t, float128_t * );
 float16_t f16_roundToInt( float16_t, uint_fast8_t, bool );
 float16_t f16_add( float16_t, float16_t );
 float16_t f16_sub( float16_t, float16_t );
+float16_t f16_max( float16_t, float16_t );
+float16_t f16_min( float16_t, float16_t );
 float16_t f16_mul( float16_t, float16_t );
 float16_t f16_mulAdd( float16_t, float16_t, float16_t );
 float16_t f16_div( float16_t, float16_t );
@@ -174,12 +179,16 @@ bool f16_le_quiet( float16_t, float16_t );
 bool f16_lt_quiet( float16_t, float16_t );
 bool f16_isSignalingNaN( float16_t );
 uint_fast16_t f16_classify( float16_t );
+float16_t f16_rsqrte7( float16_t );
+float16_t f16_recip7( float16_t );
 
 /*----------------------------------------------------------------------------
 | 32-bit (single-precision) floating-point operations.
 *----------------------------------------------------------------------------*/
+uint_fast16_t f32_to_ui16( float32_t, uint_fast8_t, bool );
 uint_fast32_t f32_to_ui32( float32_t, uint_fast8_t, bool );
 uint_fast64_t f32_to_ui64( float32_t, uint_fast8_t, bool );
+int_fast16_t f32_to_i16( float32_t, uint_fast8_t, bool );
 int_fast32_t f32_to_i32( float32_t, uint_fast8_t, bool );
 int_fast64_t f32_to_i64( float32_t, uint_fast8_t, bool );
 uint_fast32_t f32_to_ui32_r_minMag( float32_t, bool );
@@ -197,6 +206,8 @@ void f32_to_f128M( float32_t, float128_t * );
 float32_t f32_roundToInt( float32_t, uint_fast8_t, bool );
 float32_t f32_add( float32_t, float32_t );
 float32_t f32_sub( float32_t, float32_t );
+float32_t f32_max( float32_t, float32_t );
+float32_t f32_min( float32_t, float32_t );
 float32_t f32_mul( float32_t, float32_t );
 float32_t f32_mulAdd( float32_t, float32_t, float32_t );
 float32_t f32_div( float32_t, float32_t );
@@ -210,6 +221,8 @@ bool f32_le_quiet( float32_t, float32_t );
 bool f32_lt_quiet( float32_t, float32_t );
 bool f32_isSignalingNaN( float32_t );
 uint_fast16_t f32_classify( float32_t );
+float32_t f32_rsqrte7( float32_t );
+float32_t f32_recip7( float32_t );
 
 /*----------------------------------------------------------------------------
 | 64-bit (double-precision) floating-point operations.
@@ -233,6 +246,8 @@ void f64_to_f128M( float64_t, float128_t * );
 float64_t f64_roundToInt( float64_t, uint_fast8_t, bool );
 float64_t f64_add( float64_t, float64_t );
 float64_t f64_sub( float64_t, float64_t );
+float64_t f64_max( float64_t, float64_t );
+float64_t f64_min( float64_t, float64_t );
 float64_t f64_mul( float64_t, float64_t );
 float64_t f64_mulAdd( float64_t, float64_t, float64_t );
 float64_t f64_div( float64_t, float64_t );
@@ -246,6 +261,8 @@ bool f64_le_quiet( float64_t, float64_t );
 bool f64_lt_quiet( float64_t, float64_t );
 bool f64_isSignalingNaN( float64_t );
 uint_fast16_t f64_classify( float64_t );
+float64_t f64_rsqrte7( float64_t );
+float64_t f64_recip7( float64_t );
 
 /*----------------------------------------------------------------------------
 | Rounding precision for 80-bit extended double-precision floating-point.
diff --git a/ext/softfloat/softfloat.mk.in b/ext/softfloat/softfloat.mk.in
deleted file mode 100644
index 7cfe96034b..0000000000
--- a/ext/softfloat/softfloat.mk.in
+++ /dev/null
@@ -1,230 +0,0 @@
-softfloat_subproject_deps =
-
-softfloat_hdrs = \
-  internals.h \
-  primitives.h \
-  primitiveTypes.h \
-  softfloat.h \
-  softfloat_types.h \
-  specialize.h \
-
-softfloat_c_srcs = \
-	f128_add.c \
-	f128_classify.c \
-	f128_div.c \
-	f128_eq.c \
-	f128_eq_signaling.c \
-	f128_isSignalingNaN.c \
-	f128_le.c \
-	f128_le_quiet.c \
-	f128_lt.c \
-	f128_lt_quiet.c \
-	f128_mulAdd.c \
-	f128_mul.c \
-	f128_rem.c \
-	f128_roundToInt.c \
-	f128_sqrt.c \
-	f128_sub.c \
-	f128_to_f16.c \
-	f128_to_f32.c \
-	f128_to_f64.c \
-	f128_to_i32.c \
-	f128_to_i32_r_minMag.c \
-	f128_to_i64.c \
-	f128_to_i64_r_minMag.c \
-	f128_to_ui32.c \
-	f128_to_ui32_r_minMag.c \
-	f128_to_ui64.c \
-	f128_to_ui64_r_minMag.c \
-	f16_add.c \
-	f16_classify.c \
-	f16_div.c \
-	f16_eq.c \
-	f16_eq_signaling.c \
-	f16_isSignalingNaN.c \
-	f16_le.c \
-	f16_le_quiet.c \
-	f16_lt.c \
-	f16_lt_quiet.c \
-	f16_mulAdd.c \
-	f16_mul.c \
-	f16_rem.c \
-	f16_roundToInt.c \
-	f16_sqrt.c \
-	f16_sub.c \
-	f16_to_f128.c \
-	f16_to_f32.c \
-	f16_to_f64.c \
-	f16_to_i32.c \
-	f16_to_i32_r_minMag.c \
-	f16_to_i64.c \
-	f16_to_i64_r_minMag.c \
-	f16_to_ui32.c \
-	f16_to_ui32_r_minMag.c \
-	f16_to_ui64.c \
-	f16_to_ui64_r_minMag.c \
-	f32_add.c \
-	f32_classify.c \
-	f32_div.c \
-	f32_eq.c \
-	f32_eq_signaling.c \
-	f32_isSignalingNaN.c \
-	f32_le.c \
-	f32_le_quiet.c \
-	f32_lt.c \
-	f32_lt_quiet.c \
-	f32_mulAdd.c \
-	f32_mul.c \
-	f32_rem.c \
-	f32_roundToInt.c \
-	f32_sqrt.c \
-	f32_sub.c \
-	f32_to_f128.c \
-	f32_to_f16.c \
-	f32_to_f64.c \
-	f32_to_i32.c \
-	f32_to_i32_r_minMag.c \
-	f32_to_i64.c \
-	f32_to_i64_r_minMag.c \
-	f32_to_ui32.c \
-	f32_to_ui32_r_minMag.c \
-	f32_to_ui64.c \
-	f32_to_ui64_r_minMag.c \
-	f64_add.c \
-	f64_classify.c \
-	f64_div.c \
-	f64_eq.c \
-	f64_eq_signaling.c \
-	f64_isSignalingNaN.c \
-	f64_le.c \
-	f64_le_quiet.c \
-	f64_lt.c \
-	f64_lt_quiet.c \
-	f64_mulAdd.c \
-	f64_mul.c \
-	f64_rem.c \
-	f64_roundToInt.c \
-	f64_sqrt.c \
-	f64_sub.c \
-	f64_to_f128.c \
-	f64_to_f16.c \
-	f64_to_f32.c \
-	f64_to_i32.c \
-	f64_to_i32_r_minMag.c \
-	f64_to_i64.c \
-	f64_to_i64_r_minMag.c \
-	f64_to_ui32.c \
-	f64_to_ui32_r_minMag.c \
-	f64_to_ui64.c \
-	f64_to_ui64_r_minMag.c \
-	i32_to_f128.c \
-	i32_to_f16.c \
-	i32_to_f32.c \
-	i32_to_f64.c \
-	i64_to_f128.c \
-	i64_to_f16.c \
-	i64_to_f32.c \
-	i64_to_f64.c \
-	s_add128.c \
-	s_add256M.c \
-	s_addCarryM.c \
-	s_addComplCarryM.c \
-	s_addMagsF128.c \
-	s_addMagsF16.c \
-	s_addMagsF32.c \
-	s_addMagsF64.c \
-	s_addM.c \
-	s_approxRecip_1Ks.c \
-	s_approxRecip32_1.c \
-	s_approxRecipSqrt_1Ks.c \
-	s_approxRecipSqrt32_1.c \
-	s_commonNaNToF32UI.c \
-	s_commonNaNToF64UI.c \
-	s_compare128M.c \
-	s_compare96M.c \
-	s_countLeadingZeros16.c \
-	s_countLeadingZeros32.c \
-	s_countLeadingZeros64.c \
-	s_countLeadingZeros8.c \
-	s_eq128.c \
-	s_f32UIToCommonNaN.c \
-	s_f64UIToCommonNaN.c \
-	s_le128.c \
-	s_lt128.c \
-	s_mul128By32.c \
-	s_mul128MTo256M.c \
-	s_mul128To256M.c \
-	s_mul64ByShifted32To128.c \
-	s_mul64To128.c \
-	s_mul64To128M.c \
-	s_mulAddF128.c \
-	s_mulAddF16.c \
-	s_mulAddF32.c \
-	s_mulAddF64.c \
-	s_negXM.c \
-	s_normRoundPackToF128.c \
-	s_normRoundPackToF16.c \
-	s_normRoundPackToF32.c \
-	s_normRoundPackToF64.c \
-	s_normSubnormalF128Sig.c \
-	s_normSubnormalF16Sig.c \
-	s_normSubnormalF32Sig.c \
-	s_normSubnormalF64Sig.c \
-	softfloat_raiseFlags.c \
-	softfloat_state.c \
-	s_propagateNaNF16UI.c \
-	s_propagateNaNF32UI.c \
-	s_propagateNaNF64UI.c \
-	s_propagateNaNF128UI.c \
-	s_remStepMBy32.c \
-	s_roundMToI64.c \
-	s_roundMToUI64.c \
-	s_roundPackMToI64.c \
-	s_roundPackMToUI64.c \
-	s_roundPackToF128.c \
-	s_roundPackToF16.c \
-	s_roundPackToF32.c \
-	s_roundPackToF64.c \
-	s_roundPackToI32.c \
-	s_roundPackToI64.c \
-	s_roundPackToUI32.c \
-	s_roundPackToUI64.c \
-	s_roundToI32.c \
-	s_roundToI64.c \
-	s_roundToUI32.c \
-	s_roundToUI64.c \
-	s_shiftRightJam128.c \
-	s_shiftRightJam128Extra.c \
-	s_shiftRightJam256M.c \
-	s_shiftRightJam32.c \
-	s_shiftRightJam64.c \
-	s_shiftRightJam64Extra.c \
-	s_shortShiftLeft128.c \
-	s_shortShiftLeft64To96M.c \
-	s_shortShiftRight128.c \
-	s_shortShiftRightExtendM.c \
-	s_shortShiftRightJam128.c \
-	s_shortShiftRightJam128Extra.c \
-	s_shortShiftRightJam64.c \
-	s_shortShiftRightJam64Extra.c \
-	s_shortShiftRightM.c \
-	s_sub128.c \
-	s_sub1XM.c \
-	s_sub256M.c \
-	s_subMagsF128.c \
-	s_subMagsF16.c \
-	s_subMagsF32.c \
-	s_subMagsF64.c \
-	s_subM.c \
-	ui32_to_f128.c \
-	ui32_to_f16.c \
-	ui32_to_f32.c \
-	ui32_to_f64.c \
-	ui64_to_f128.c \
-	ui64_to_f16.c \
-	ui64_to_f32.c \
-	ui64_to_f64.c \
-
-softfloat_test_srcs =
-
-softfloat_install_prog_srcs =
diff --git a/ext/softfloat/softfloat_state.c b/ext/softfloat/softfloat_state.c
index 33b72149b0..a105e6f647 100644
--- a/ext/softfloat/softfloat_state.c
+++ b/ext/softfloat/softfloat_state.c
@@ -35,11 +35,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 =============================================================================*/
 
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
-#include "softfloat.h"
+#include "internals.h"
 #include "specialize.h"
+#include "softfloat.h"
 
 #ifndef THREAD_LOCAL
 #define THREAD_LOCAL
diff --git a/ext/softfloat/specialize.h b/ext/softfloat/specialize.h
index 2427bced30..556476c1a5 100644
--- a/ext/softfloat/specialize.h
+++ b/ext/softfloat/specialize.h
@@ -39,7 +39,6 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <stdbool.h>
 #include <stdint.h>
-
 #include "primitiveTypes.h"
 #include "softfloat.h"
 
@@ -56,6 +55,20 @@ extern "C" {
 | The values to return on conversions to 32-bit integer formats that raise an
 | invalid exception.
 *----------------------------------------------------------------------------*/
+#define ui8_fromPosOverflow  0xFF
+#define ui8_fromNegOverflow  0
+#define ui8_fromNaN          0xFF
+#define i8_fromPosOverflow   0x7F
+#define i8_fromNegOverflow   (-0x7F - 1)
+#define i8_fromNaN           0x7F
+
+#define ui16_fromPosOverflow 0xFFFF
+#define ui16_fromNegOverflow 0
+#define ui16_fromNaN         0xFFFF
+#define i16_fromPosOverflow  0x7FFF
+#define i16_fromNegOverflow  (-0x7FFF - 1)
+#define i16_fromNaN          0x7FFF
+
 #define ui32_fromPosOverflow 0xFFFFFFFF
 #define ui32_fromNegOverflow 0
 #define ui32_fromNaN         0xFFFFFFFF
diff --git a/ext/softfloat/ui32_to_extF80.c b/ext/softfloat/ui32_to_extF80.c
new file mode 100644
index 0000000000..f391aac537
--- /dev/null
+++ b/ext/softfloat/ui32_to_extF80.c
@@ -0,0 +1,59 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of
+California.  All Rights Reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "softfloat.h"
+
+extFloat80_t ui32_to_extF80( uint32_t a )
+{
+    uint_fast16_t uiZ64;
+    int_fast8_t shiftDist;
+    union { struct extFloat80M s; extFloat80_t f; } uZ;
+
+    uiZ64 = 0;
+    if ( a ) {
+        shiftDist = softfloat_countLeadingZeros32( a );
+        uiZ64 = 0x401E - shiftDist;
+        a <<= shiftDist;
+    }
+    uZ.s.signExp = uiZ64;
+    uZ.s.signif = (uint_fast64_t) a<<32;
+    return uZ.f;
+
+}
+
diff --git a/ext/softfloat/ui32_to_extF80M.c b/ext/softfloat/ui32_to_extF80M.c
new file mode 100644
index 0000000000..f668afa851
--- /dev/null
+++ b/ext/softfloat/ui32_to_extF80M.c
@@ -0,0 +1,74 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of
+California.  All Rights Reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "softfloat.h"
+
+#ifdef SOFTFLOAT_FAST_INT64
+
+void ui32_to_extF80M( uint32_t a, extFloat80_t *zPtr )
+{
+
+    *zPtr = ui32_to_extF80( a );
+
+}
+
+#else
+
+void ui32_to_extF80M( uint32_t a, extFloat80_t *zPtr )
+{
+    struct extFloat80M *zSPtr;
+    uint_fast16_t uiZ64;
+    uint64_t sigZ;
+    int_fast8_t shiftDist;
+
+    zSPtr = (struct extFloat80M *) zPtr;
+    uiZ64 = 0;
+    sigZ = 0;
+    if ( a ) {
+        shiftDist = softfloat_countLeadingZeros32( a );
+        uiZ64 = packToExtF80UI64( 0, 0x401E - shiftDist );
+        sigZ = (uint64_t) (a<<shiftDist)<<32;
+    }
+    zSPtr->signExp = uiZ64;
+    zSPtr->signif = sigZ;
+
+}
+
+#endif
+
diff --git a/ext/softfloat/ui32_to_f128.c b/ext/softfloat/ui32_to_f128.c
index 3e47ea6197..78d3eb64c6 100644
--- a/ext/softfloat/ui32_to_f128.c
+++ b/ext/softfloat/ui32_to_f128.c
@@ -35,9 +35,8 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 =============================================================================*/
 
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
+#include "internals.h"
 #include "softfloat.h"
 
 float128_t ui32_to_f128( uint32_t a )
diff --git a/ext/softfloat/ui32_to_f128M.c b/ext/softfloat/ui32_to_f128M.c
new file mode 100644
index 0000000000..ffe18bf671
--- /dev/null
+++ b/ext/softfloat/ui32_to_f128M.c
@@ -0,0 +1,76 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of
+California.  All Rights Reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "softfloat.h"
+
+#ifdef SOFTFLOAT_FAST_INT64
+
+void ui32_to_f128M( uint32_t a, float128_t *zPtr )
+{
+
+    *zPtr = ui32_to_f128( a );
+
+}
+
+#else
+
+void ui32_to_f128M( uint32_t a, float128_t *zPtr )
+{
+    uint32_t *zWPtr, uiZ96, uiZ64;
+    int_fast8_t shiftDist;
+    uint64_t normA;
+
+    zWPtr = (uint32_t *) zPtr;
+    uiZ96 = 0;
+    uiZ64 = 0;
+    if ( a ) {
+        shiftDist = softfloat_countLeadingZeros32( a ) + 17;
+        normA = (uint64_t) a<<shiftDist;
+        uiZ96 = packToF128UI96( 0, 0x402E - shiftDist, normA>>32 );
+        uiZ64 = normA;
+    }
+    zWPtr[indexWord( 4, 3 )] = uiZ96;
+    zWPtr[indexWord( 4, 2 )] = uiZ64;
+    zWPtr[indexWord( 4, 1 )] = 0;
+    zWPtr[indexWord( 4, 0 )] = 0;
+
+}
+
+#endif
+
diff --git a/ext/softfloat/ui32_to_f16.c b/ext/softfloat/ui32_to_f16.c
index e3616fc0b7..09dcc8c3fa 100644
--- a/ext/softfloat/ui32_to_f16.c
+++ b/ext/softfloat/ui32_to_f16.c
@@ -35,9 +35,8 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 =============================================================================*/
 
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
+#include "internals.h"
 #include "softfloat.h"
 
 float16_t ui32_to_f16( uint32_t a )
diff --git a/ext/softfloat/ui32_to_f32.c b/ext/softfloat/ui32_to_f32.c
index 29d012348c..7e5ece68ae 100644
--- a/ext/softfloat/ui32_to_f32.c
+++ b/ext/softfloat/ui32_to_f32.c
@@ -35,9 +35,8 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 =============================================================================*/
 
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
+#include "internals.h"
 #include "softfloat.h"
 
 float32_t ui32_to_f32( uint32_t a )
diff --git a/ext/softfloat/ui32_to_f64.c b/ext/softfloat/ui32_to_f64.c
index 4954b141d9..5e5f843aff 100644
--- a/ext/softfloat/ui32_to_f64.c
+++ b/ext/softfloat/ui32_to_f64.c
@@ -35,9 +35,8 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 =============================================================================*/
 
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
+#include "internals.h"
 #include "softfloat.h"
 
 float64_t ui32_to_f64( uint32_t a )
diff --git a/ext/softfloat/ui64_to_extF80.c b/ext/softfloat/ui64_to_extF80.c
new file mode 100644
index 0000000000..f149d2cb10
--- /dev/null
+++ b/ext/softfloat/ui64_to_extF80.c
@@ -0,0 +1,59 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of
+California.  All Rights Reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "softfloat.h"
+
+extFloat80_t ui64_to_extF80( uint64_t a )
+{
+    uint_fast16_t uiZ64;
+    int_fast8_t shiftDist;
+    union { struct extFloat80M s; extFloat80_t f; } uZ;
+
+    uiZ64 = 0;
+    if ( a ) {
+        shiftDist = softfloat_countLeadingZeros64( a );
+        uiZ64 = 0x403E - shiftDist;
+        a <<= shiftDist;
+    }
+    uZ.s.signExp = uiZ64;
+    uZ.s.signif  = a;
+    return uZ.f;
+
+}
+
diff --git a/ext/softfloat/ui64_to_extF80M.c b/ext/softfloat/ui64_to_extF80M.c
new file mode 100644
index 0000000000..abf076f04f
--- /dev/null
+++ b/ext/softfloat/ui64_to_extF80M.c
@@ -0,0 +1,74 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of
+California.  All Rights Reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "softfloat.h"
+
+#ifdef SOFTFLOAT_FAST_INT64
+
+void ui64_to_extF80M( uint64_t a, extFloat80_t *zPtr )
+{
+
+    *zPtr = ui64_to_extF80( a );
+
+}
+
+#else
+
+void ui64_to_extF80M( uint64_t a, extFloat80_t *zPtr )
+{
+    struct extFloat80M *zSPtr;
+    uint_fast16_t uiZ64;
+    uint64_t sigZ;
+    int_fast8_t shiftDist;
+
+    zSPtr = (struct extFloat80M *) zPtr;
+    uiZ64 = 0;
+    sigZ = 0;
+    if ( a ) {
+        shiftDist = softfloat_countLeadingZeros64( a );
+        uiZ64 = packToExtF80UI64( 0, 0x403E - shiftDist );
+        sigZ = a<<shiftDist;
+    }
+    zSPtr->signExp = uiZ64;
+    zSPtr->signif = sigZ;
+
+}
+
+#endif
+
diff --git a/ext/softfloat/ui64_to_f128.c b/ext/softfloat/ui64_to_f128.c
index 508041ddca..0124bc5a60 100644
--- a/ext/softfloat/ui64_to_f128.c
+++ b/ext/softfloat/ui64_to_f128.c
@@ -35,9 +35,8 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 =============================================================================*/
 
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
+#include "internals.h"
 #include "softfloat.h"
 
 float128_t ui64_to_f128( uint64_t a )
diff --git a/ext/softfloat/ui64_to_f128M.c b/ext/softfloat/ui64_to_f128M.c
new file mode 100644
index 0000000000..b7d955aea6
--- /dev/null
+++ b/ext/softfloat/ui64_to_f128M.c
@@ -0,0 +1,86 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of
+California.  All Rights Reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "softfloat.h"
+
+#ifdef SOFTFLOAT_FAST_INT64
+
+void ui64_to_f128M( uint64_t a, float128_t *zPtr )
+{
+
+    *zPtr = ui64_to_f128( a );
+
+}
+
+#else
+
+void ui64_to_f128M( uint64_t a, float128_t *zPtr )
+{
+    uint32_t *zWPtr, uiZ96, uiZ64;
+    uint_fast8_t shiftDist;
+    uint32_t *ptr;
+
+    zWPtr = (uint32_t *) zPtr;
+    uiZ96 = 0;
+    uiZ64 = 0;
+    zWPtr[indexWord( 4, 1 )] = 0;
+    zWPtr[indexWord( 4, 0 )] = 0;
+    if ( a ) {
+        shiftDist = softfloat_countLeadingZeros64( a ) + 17;
+        if ( shiftDist < 32 ) {
+            ptr = zWPtr + indexMultiwordHi( 4, 3 );
+            ptr[indexWord( 3, 2 )] = 0;
+            ptr[indexWord( 3, 1 )] = a>>32;
+            ptr[indexWord( 3, 0 )] = a;
+            softfloat_shortShiftLeft96M( ptr, shiftDist, ptr );
+            ptr[indexWordHi( 3 )] =
+                packToF128UI96( 0, 0x404E - shiftDist, ptr[indexWordHi( 3 )] );
+            return;
+        }
+        a <<= shiftDist - 32;
+        uiZ96 = packToF128UI96( 0, 0x404E - shiftDist, a>>32 );
+        uiZ64 = a;
+    }
+    zWPtr[indexWord( 4, 3 )] = uiZ96;
+    zWPtr[indexWord( 4, 2 )] = uiZ64;
+
+}
+
+#endif
+
diff --git a/ext/softfloat/ui64_to_f16.c b/ext/softfloat/ui64_to_f16.c
index c5d00f106c..ecca02bc45 100644
--- a/ext/softfloat/ui64_to_f16.c
+++ b/ext/softfloat/ui64_to_f16.c
@@ -35,9 +35,8 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 =============================================================================*/
 
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
+#include "internals.h"
 #include "softfloat.h"
 
 float16_t ui64_to_f16( uint64_t a )
diff --git a/ext/softfloat/ui64_to_f32.c b/ext/softfloat/ui64_to_f32.c
index bfc9a2526e..2d946c2fe4 100644
--- a/ext/softfloat/ui64_to_f32.c
+++ b/ext/softfloat/ui64_to_f32.c
@@ -35,9 +35,8 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 =============================================================================*/
 
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
+#include "internals.h"
 #include "softfloat.h"
 
 float32_t ui64_to_f32( uint64_t a )
diff --git a/ext/softfloat/ui64_to_f64.c b/ext/softfloat/ui64_to_f64.c
index 7bc606eb18..57102a74b6 100644
--- a/ext/softfloat/ui64_to_f64.c
+++ b/ext/softfloat/ui64_to_f64.c
@@ -35,9 +35,8 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 =============================================================================*/
 
 #include <stdint.h>
-
-#include "internals.h"
 #include "platform.h"
+#include "internals.h"
 #include "softfloat.h"
 
 float64_t ui64_to_f64( uint64_t a )

From 67fb75d659b4300088fe3d22e4724e624d2da196 Mon Sep 17 00:00:00 2001
From: Gabe Black <gabeblack@google.com>
Date: Sat, 18 Mar 2023 08:12:56 -0700
Subject: [PATCH 288/492] base,cpu,dev,sim: Pull common logic into
 ListenSocket::listen().

Create a version of listen() which handles common logic internally,
including scanning for an available port number, and notifying what
port was chosen.

The port is managed internal to ListenSocket, so that the logic
interacting with it doesn't need to manually manage a port number, and
hence a port number does not need to exist for non AF_INET sockets.

Change-Id: Ie371eccc4d0da5e7b90714508e4cb72fb0091875
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/69160
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Yu-hsin Wang <yuhsingw@google.com>
Maintainer: Gabe Black <gabe.black@gmail.com>
---
 src/base/remote_gdb.cc     | 19 ++++++++-----------
 src/base/remote_gdb.hh     |  3 +--
 src/base/socket.cc         | 23 +++++++++++++++++++++--
 src/base/socket.hh         | 16 +++++++++++++++-
 src/base/vnc/vncserver.cc  | 16 ++++------------
 src/base/vnc/vncserver.hh  |  2 +-
 src/cpu/nativetrace.cc     | 10 +++-------
 src/dev/net/ethertap.cc    | 11 +++--------
 src/dev/serial/terminal.cc | 18 +++++-------------
 src/dev/serial/terminal.hh |  2 +-
 src/sim/workload.cc        |  4 ++--
 11 files changed, 64 insertions(+), 60 deletions(-)

diff --git a/src/base/remote_gdb.cc b/src/base/remote_gdb.cc
index 1a2fef42d8..c8cb32fd71 100644
--- a/src/base/remote_gdb.cc
+++ b/src/base/remote_gdb.cc
@@ -392,7 +392,7 @@ std::map<Addr, HardBreakpoint *> hardBreakMap;
 
 BaseRemoteGDB::BaseRemoteGDB(System *_system, int _port) :
         incomingConnectionEvent(nullptr), incomingDataEvent(nullptr),
-        _port(_port), fd(-1), sys(_system),
+        listener(_system->name() + ".remote_gdb", _port), fd(-1), sys(_system),
         connectEvent(*this), disconnectEvent(*this), trapEvent(this),
         singleStepEvent(*this)
 {}
@@ -417,17 +417,14 @@ BaseRemoteGDB::listen()
         return;
     }
 
-    while (!listener.listen(_port)) {
-        DPRINTF(GDBMisc, "Can't bind port %d\n", _port);
-        _port++;
-    }
+    listener.listen();
 
     incomingConnectionEvent =
             new IncomingConnectionEvent(this, listener.getfd(), POLLIN);
     pollQueue.schedule(incomingConnectionEvent);
 
-    ccprintf(std::cerr, "%d: %s: listening for remote gdb on port %d\n",
-             curTick(), name(), _port);
+    ccprintf(std::cerr, "%d: %s: listening for remote gdb on %s\n",
+             curTick(), name(), listener);
 }
 
 void
@@ -448,12 +445,12 @@ BaseRemoteGDB::connect()
     }
 }
 
-int
-BaseRemoteGDB::port() const
+const ListenSocket &
+BaseRemoteGDB::hostSocket() const
 {
     panic_if(!listener.islistening(),
-             "Remote GDB port is unknown until listen() has been called.\n");
-    return _port;
+             "Remote GDB socket is unknown until listen() has been called.");
+    return listener;
 }
 
 void
diff --git a/src/base/remote_gdb.hh b/src/base/remote_gdb.hh
index 80c108ba22..60a0d6a1eb 100644
--- a/src/base/remote_gdb.hh
+++ b/src/base/remote_gdb.hh
@@ -161,7 +161,7 @@ class BaseRemoteGDB
     void listen();
     void connect();
 
-    int port() const;
+    const ListenSocket &hostSocket() const;
 
     void attach(int fd);
     void detach();
@@ -232,7 +232,6 @@ class BaseRemoteGDB
     IncomingDataEvent *incomingDataEvent;
 
     ListenSocket listener;
-    int _port;
 
     // The socket commands come in through.
     int fd;
diff --git a/src/base/socket.cc b/src/base/socket.cc
index 280f92b593..1aff73a7ff 100644
--- a/src/base/socket.cc
+++ b/src/base/socket.cc
@@ -173,10 +173,12 @@ ListenSocket::acceptCloexec(int sockfd, struct sockaddr *addr,
 //
 //
 
-ListenSocket::ListenSocket()
-    : listening(false), fd(-1)
+ListenSocket::ListenSocket(const std::string &_name, int port)
+    : Named(_name), listening(false), fd(-1), _port(port)
 {}
 
+ListenSocket::ListenSocket() : ListenSocket("<unnammed>", -1) {}
+
 ListenSocket::~ListenSocket()
 {
     if (fd != -1)
@@ -231,6 +233,23 @@ ListenSocket::listen(int port)
     return true;
 }
 
+void
+ListenSocket::listen()
+{
+    while (!listen(_port)) {
+        _port++;
+        fatal_if(_port > 65536, "%s: cannot find an available port.", name());
+    }
+    ccprintf(std::cerr, "%s: Listening for connections on %s\n",
+            name(), *this);
+}
+
+void
+ListenSocket::output(std::ostream &os) const
+{
+    os << "port " << _port;
+}
+
 
 // Open a connection.  Accept will block, so if you don't want it to,
 // make sure a connection is ready before you call accept.
diff --git a/src/base/socket.hh b/src/base/socket.hh
index d2393e9325..7a616bdd49 100644
--- a/src/base/socket.hh
+++ b/src/base/socket.hh
@@ -35,6 +35,8 @@
 
 #include <string>
 
+#include "base/named.hh"
+
 namespace gem5
 {
 
@@ -69,7 +71,7 @@ struct UnixSocketAddr
     std::string formattedPath;
 };
 
-class ListenSocket
+class ListenSocket : public Named
 {
   protected:
     /**
@@ -90,6 +92,7 @@ class ListenSocket
   protected:
     bool listening;
     int fd;
+    int _port;
 
     /*
      * cleanup resets the static variables back to their default values.
@@ -101,12 +104,16 @@ class ListenSocket
      * @ingroup api_socket
      * @{
      */
+    ListenSocket(const std::string &_name, int port);
     ListenSocket();
     virtual ~ListenSocket();
 
     virtual int accept();
 
     virtual bool listen(int port);
+    virtual void listen();
+
+    virtual void output(std::ostream &os) const;
 
     int getfd() const { return fd; }
     bool islistening() const { return listening; }
@@ -119,6 +126,13 @@ class ListenSocket
     /** @} */ // end of api_socket
 };
 
+inline static std::ostream &
+operator << (std::ostream &os, const ListenSocket &socket)
+{
+    socket.output(os);
+    return os;
+}
+
 } // namespace gem5
 
 #endif //__SOCKET_HH__
diff --git a/src/base/vnc/vncserver.cc b/src/base/vnc/vncserver.cc
index 39a1338799..2d32cef8bb 100644
--- a/src/base/vnc/vncserver.cc
+++ b/src/base/vnc/vncserver.cc
@@ -117,11 +117,11 @@ VncServer::DataEvent::process(int revent)
  */
 VncServer::VncServer(const Params &p)
     : VncInput(p), listenEvent(NULL), dataEvent(NULL), number(p.number),
-      dataFd(-1), sendUpdate(false),
+      listener(p.name, p.port), sendUpdate(false),
       supportsRawEnc(false), supportsResizeEnc(false)
 {
     if (p.port)
-        listen(p.port);
+        listen();
 
     curState = WaitForProtocolVersion;
 
@@ -157,22 +157,14 @@ VncServer::~VncServer()
 
 //socket creation and vnc client attach
 void
-VncServer::listen(int port)
+VncServer::listen()
 {
     if (ListenSocket::allDisabled()) {
         warn_once("Sockets disabled, not accepting vnc client connections");
         return;
     }
 
-    while (!listener.listen(port)) {
-        DPRINTF(VNC,
-                "can't bind address vnc server port %d in use PID %d\n",
-                port, getpid());
-        port++;
-    }
-
-    ccprintf(std::cerr, "%s: Listening for connections on port %d\n",
-             name(), port);
+    listener.listen();
 
     listenEvent = new ListenEvent(this, listener.getfd(), POLLIN);
     pollQueue.schedule(listenEvent);
diff --git a/src/base/vnc/vncserver.hh b/src/base/vnc/vncserver.hh
index 091cb4d696..7455799025 100644
--- a/src/base/vnc/vncserver.hh
+++ b/src/base/vnc/vncserver.hh
@@ -182,7 +182,7 @@ class VncServer : public VncInput
 
     ListenSocket listener;
 
-    void listen(int port);
+    void listen();
     void accept();
     void data();
     void detach();
diff --git a/src/cpu/nativetrace.cc b/src/cpu/nativetrace.cc
index 714787ffa4..05fb41b3a5 100644
--- a/src/cpu/nativetrace.cc
+++ b/src/cpu/nativetrace.cc
@@ -39,17 +39,13 @@ namespace gem5
 namespace trace {
 
 NativeTrace::NativeTrace(const Params &p)
-    : ExeTracer(p)
+    : ExeTracer(p), native_listener(p.name, 8000)
 {
     if (ListenSocket::allDisabled())
         fatal("All listeners are disabled!");
 
-    int port = 8000;
-    while (!native_listener.listen(port)) {
-        DPRINTF(GDBMisc, "Can't bind port %d\n", port);
-        port++;
-    }
-    ccprintf(std::cerr, "Listening for native process on port %d\n", port);
+    native_listener.listen();
+
     fd = native_listener.accept();
 }
 
diff --git a/src/dev/net/ethertap.cc b/src/dev/net/ethertap.cc
index 0769ad1203..587dba5021 100644
--- a/src/dev/net/ethertap.cc
+++ b/src/dev/net/ethertap.cc
@@ -240,17 +240,16 @@ class TapListener
     };
 
     friend class Event;
-    Event *event;
+    Event *event = nullptr;
 
     void accept();
 
   protected:
     ListenSocket listener;
     EtherTapStub *tap;
-    int port;
 
   public:
-    TapListener(EtherTapStub *t, int p) : event(NULL), tap(t), port(p) {}
+    TapListener(EtherTapStub *t, int p) : listener(t->name(), p), tap(t) {}
     ~TapListener() { delete event; }
 
     void listen();
@@ -259,12 +258,8 @@ class TapListener
 void
 TapListener::listen()
 {
-    while (!listener.listen(port)) {
-        DPRINTF(Ethernet, "TapListener(listen): Can't bind port %d\n", port);
-        port++;
-    }
+    listener.listen();
 
-    ccprintf(std::cerr, "Listening for tap connection on port %d\n", port);
     event = new Event(this, listener.getfd(), POLLIN|POLLERR);
     pollQueue.schedule(event);
 }
diff --git a/src/dev/serial/terminal.cc b/src/dev/serial/terminal.cc
index 9564876826..02052b5e1a 100644
--- a/src/dev/serial/terminal.cc
+++ b/src/dev/serial/terminal.cc
@@ -121,8 +121,8 @@ Terminal::DataEvent::process(int revent)
  */
 Terminal::Terminal(const Params &p)
     : SerialDevice(p), listenEvent(NULL), dataEvent(NULL),
-      number(p.number), data_fd(-1), txbuf(16384), rxbuf(16384),
-      outfile(terminalDump(p))
+      number(p.number), data_fd(-1), listener(p.name, p.port),
+      txbuf(16384), rxbuf(16384), outfile(terminalDump(p))
 #if TRACING_ON == 1
       , linebuf(16384)
 #endif
@@ -131,7 +131,7 @@ Terminal::Terminal(const Params &p)
         outfile->stream()->setf(std::ios::unitbuf);
 
     if (p.port)
-        listen(p.port);
+        listen();
 }
 
 Terminal::~Terminal()
@@ -168,22 +168,14 @@ Terminal::terminalDump(const TerminalParams &p)
 //
 
 void
-Terminal::listen(int port)
+Terminal::listen()
 {
     if (ListenSocket::allDisabled()) {
         warn_once("Sockets disabled, not accepting terminal connections");
         return;
     }
 
-    while (!listener.listen(port)) {
-        DPRINTF(Terminal,
-                ": can't bind address terminal port %d inuse PID %d\n",
-                port, getpid());
-        port++;
-    }
-
-    ccprintf(std::cerr, "%s: Listening for connections on port %d\n",
-             name(), port);
+    listener.listen();
 
     listenEvent = new ListenEvent(this, listener.getfd(), POLLIN);
     pollQueue.schedule(listenEvent);
diff --git a/src/dev/serial/terminal.hh b/src/dev/serial/terminal.hh
index 83ea64b09c..bd6711da8a 100644
--- a/src/dev/serial/terminal.hh
+++ b/src/dev/serial/terminal.hh
@@ -103,7 +103,7 @@ class Terminal : public SerialDevice
   protected:
     ListenSocket listener;
 
-    void listen(int port);
+    void listen();
     void accept();
 
   protected:
diff --git a/src/sim/workload.cc b/src/sim/workload.cc
index ceb1029f77..84b1e40698 100644
--- a/src/sim/workload.cc
+++ b/src/sim/workload.cc
@@ -97,8 +97,8 @@ Workload::startup()
     // Now that we're about to start simulation, wait for GDB connections if
     // requested.
     if (gdb && waitForRemoteGDB) {
-        inform("%s: Waiting for a remote GDB connection on port %d.", name(),
-                gdb->port());
+        inform("%s: Waiting for a remote GDB connection on %s.", name(),
+                gdb->hostSocket());
         gdb->connect();
     }
 }

From 7c614d225be30439d1912872128b79c5a3e0c85f Mon Sep 17 00:00:00 2001
From: Gabe Black <gabeblack@google.com>
Date: Sat, 18 Mar 2023 08:46:43 -0700
Subject: [PATCH 289/492] base: Make ListenSocket::listen(int port) protected.

Stop making the old API available.

Change-Id: I70752d081cb572a8ae5bf396d2ea43ecc7be1e9c
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/69161
Maintainer: Gabe Black <gabe.black@gmail.com>
Reviewed-by: Yu-hsin Wang <yuhsingw@google.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/base/socket.hh      |  3 ++-
 src/base/socket.test.cc | 51 ++++++++++++++---------------------------
 2 files changed, 19 insertions(+), 35 deletions(-)

diff --git a/src/base/socket.hh b/src/base/socket.hh
index 7a616bdd49..81f4d6252f 100644
--- a/src/base/socket.hh
+++ b/src/base/socket.hh
@@ -99,6 +99,8 @@ class ListenSocket : public Named
      */
     static void cleanup();
 
+    virtual bool listen(int port);
+
   public:
     /**
      * @ingroup api_socket
@@ -110,7 +112,6 @@ class ListenSocket : public Named
 
     virtual int accept();
 
-    virtual bool listen(int port);
     virtual void listen();
 
     virtual void output(std::ostream &os) const;
diff --git a/src/base/socket.test.cc b/src/base/socket.test.cc
index cb24c49090..8e1c25b4b1 100644
--- a/src/base/socket.test.cc
+++ b/src/base/socket.test.cc
@@ -121,6 +121,7 @@ TEST(UnixSocketAddrTest, TruncatedFileBasedSocket)
 class MockListenSocket : public ListenSocket
 {
   public:
+    MockListenSocket(int port) : ListenSocket("mock", port) {}
     /*
      * This mock Listen Socket is used to ensure the static variables are reset
      * back to their default values after deconstruction (i.e., after a test
@@ -138,7 +139,7 @@ TEST(SocketTest, DefaultBehavior)
      * Tests the default behavior where listenSocket is constructed, and is
      * not listening to a port.
      */
-    MockListenSocket listen_socket;
+    MockListenSocket listen_socket(-1);
     EXPECT_EQ(-1, listen_socket.getfd());
     EXPECT_FALSE(listen_socket.islistening());
     EXPECT_FALSE(listen_socket.allDisabled());
@@ -146,7 +147,7 @@ TEST(SocketTest, DefaultBehavior)
 
 TEST(SocketTest, DisableAll)
 {
-    MockListenSocket listen_socket;
+    MockListenSocket listen_socket(-1);
     listen_socket.disableAll();
     EXPECT_EQ(-1, listen_socket.getfd());
     EXPECT_FALSE(listen_socket.islistening());
@@ -155,8 +156,8 @@ TEST(SocketTest, DisableAll)
 
 TEST(SocketTest, ListenToPort)
 {
-    MockListenSocket listen_socket;
-    EXPECT_TRUE(listen_socket.listen(TestPort1));
+    MockListenSocket listen_socket(TestPort1);
+    listen_socket.listen();
     EXPECT_NE(-1, listen_socket.getfd());
     EXPECT_TRUE(listen_socket.islistening());
     EXPECT_FALSE(listen_socket.allDisabled());
@@ -164,32 +165,14 @@ TEST(SocketTest, ListenToPort)
 
 TEST(SocketTest, RelistenWithSameInstanceSamePort)
 {
-    MockListenSocket listen_socket;
-    EXPECT_TRUE(listen_socket.listen(TestPort1));
+    MockListenSocket listen_socket(TestPort1);
+    listen_socket.listen();
 
     /*
      * You cannot listen to another port if you are already listening to one.
      */
     gtestLogOutput.str("");
-    EXPECT_ANY_THROW(listen_socket.listen(TestPort1));
-    std::string expected =
-        "panic: panic condition listening occurred: "
-        "Socket already listening!\n";
-    std::string actual = gtestLogOutput.str();
-    EXPECT_EQ(expected, actual);
-}
-
-TEST(SocketTest, RelistenWithSameInstanceDifferentPort)
-{
-    MockListenSocket listen_socket;
-    EXPECT_TRUE(listen_socket.listen(TestPort1));
-
-    /*
-     * You cannot listen to another port if you are already listening to one.
-     */
-    gtestLogOutput.str("");
-    EXPECT_ANY_THROW(listen_socket.listen(TestPort2));
-
+    EXPECT_ANY_THROW(listen_socket.listen());
     std::string expected =
         "panic: panic condition listening occurred: "
         "Socket already listening!\n";
@@ -199,30 +182,30 @@ TEST(SocketTest, RelistenWithSameInstanceDifferentPort)
 
 TEST(SocketTest, RelistenWithDifferentInstanceOnDifferentPort)
 {
-    MockListenSocket listen_socket;
-    EXPECT_TRUE(listen_socket.listen(TestPort1));
+    MockListenSocket listen_socket(TestPort1);
+    listen_socket.listen();
 
     /*
      * You can listen to another port with a different instance.
      */
-    MockListenSocket listen_socket_2;
-    EXPECT_TRUE(listen_socket_2.listen(TestPort2));
+    MockListenSocket listen_socket_2(TestPort2);
+    listen_socket_2.listen();
 }
 
 TEST(SocketTest, RelistenWithDifferentInstanceOnSamePort)
 {
-    MockListenSocket listen_socket;
-    EXPECT_TRUE(listen_socket.listen(TestPort1));
+    MockListenSocket listen_socket(TestPort1);
+    listen_socket.listen();
 
     /*
      * You cannot listen to a port that's already being listened to.
      */
-    MockListenSocket listen_socket_2;
-    EXPECT_FALSE(listen_socket_2.listen(TestPort1));
+    MockListenSocket listen_socket_2(TestPort1);
+    listen_socket_2.listen();
 }
 
 TEST(SocketTest, AcceptError)
 {
-    MockListenSocket listen_socket;
+    MockListenSocket listen_socket(-1);
     EXPECT_EQ(-1, listen_socket.accept());
 }

From d03bc9d33cc0fccb530d6abb3758a54e5d5a22ac Mon Sep 17 00:00:00 2001
From: Gabe Black <gabeblack@google.com>
Date: Sat, 18 Mar 2023 08:59:51 -0700
Subject: [PATCH 290/492] base,cpu,dev: Add a level of indirection for
 ListenSockets.

This makes room for there to be different implementations for different
types of sockets.

Change-Id: I8c959e2c3400caec8242e693e11330e072bc2c5f
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/69162
Reviewed-by: Yu-hsin Wang <yuhsingw@google.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Maintainer: Gabe Black <gabe.black@gmail.com>
---
 src/base/remote_gdb.cc     | 26 ++++++++++++--------------
 src/base/remote_gdb.hh     |  2 +-
 src/base/socket.cc         |  8 ++++++++
 src/base/socket.hh         | 30 ++++++++++++++++++++++++++++++
 src/base/vnc/vncserver.cc  | 12 ++++++------
 src/base/vnc/vncserver.hh  |  2 +-
 src/cpu/nativetrace.cc     |  6 +++---
 src/cpu/nativetrace.hh     |  2 +-
 src/dev/net/ethertap.cc    | 13 +++++++------
 src/dev/serial/terminal.cc | 11 ++++++-----
 src/dev/serial/terminal.hh |  2 +-
 11 files changed, 76 insertions(+), 38 deletions(-)

diff --git a/src/base/remote_gdb.cc b/src/base/remote_gdb.cc
index c8cb32fd71..095b2bd38c 100644
--- a/src/base/remote_gdb.cc
+++ b/src/base/remote_gdb.cc
@@ -392,10 +392,11 @@ std::map<Addr, HardBreakpoint *> hardBreakMap;
 
 BaseRemoteGDB::BaseRemoteGDB(System *_system, int _port) :
         incomingConnectionEvent(nullptr), incomingDataEvent(nullptr),
-        listener(_system->name() + ".remote_gdb", _port), fd(-1), sys(_system),
-        connectEvent(*this), disconnectEvent(*this), trapEvent(this),
-        singleStepEvent(*this)
-{}
+        fd(-1), sys(_system), connectEvent(*this), disconnectEvent(*this),
+        trapEvent(this), singleStepEvent(*this)
+{
+    listener = listenSocketInetConfig(_port).build(name());
+}
 
 BaseRemoteGDB::~BaseRemoteGDB()
 {
@@ -417,25 +418,22 @@ BaseRemoteGDB::listen()
         return;
     }
 
-    listener.listen();
+    listener->listen();
 
     incomingConnectionEvent =
-            new IncomingConnectionEvent(this, listener.getfd(), POLLIN);
+            new IncomingConnectionEvent(this, listener->getfd(), POLLIN);
     pollQueue.schedule(incomingConnectionEvent);
-
-    ccprintf(std::cerr, "%d: %s: listening for remote gdb on %s\n",
-             curTick(), name(), listener);
 }
 
 void
 BaseRemoteGDB::connect()
 {
-    panic_if(!listener.islistening(),
+    panic_if(!listener->islistening(),
              "Can't accept GDB connections without any threads!");
 
     pollQueue.remove(incomingConnectionEvent);
 
-    int sfd = listener.accept();
+    int sfd = listener->accept();
 
     if (sfd != -1) {
         if (isAttached())
@@ -448,9 +446,9 @@ BaseRemoteGDB::connect()
 const ListenSocket &
 BaseRemoteGDB::hostSocket() const
 {
-    panic_if(!listener.islistening(),
+    panic_if(!listener->islistening(),
              "Remote GDB socket is unknown until listen() has been called.");
-    return listener;
+    return *listener;
 }
 
 void
@@ -513,7 +511,7 @@ BaseRemoteGDB::addThreadContext(ThreadContext *_tc)
         assert(selectThreadContext(_tc->contextId()));
 
     // Now that we have a thread, we can start listening.
-    if (!listener.islistening())
+    if (!listener->islistening())
         listen();
 }
 
diff --git a/src/base/remote_gdb.hh b/src/base/remote_gdb.hh
index 60a0d6a1eb..9f09582721 100644
--- a/src/base/remote_gdb.hh
+++ b/src/base/remote_gdb.hh
@@ -231,7 +231,7 @@ class BaseRemoteGDB
     IncomingConnectionEvent *incomingConnectionEvent;
     IncomingDataEvent *incomingDataEvent;
 
-    ListenSocket listener;
+    ListenSocketPtr listener;
 
     // The socket commands come in through.
     int fd;
diff --git a/src/base/socket.cc b/src/base/socket.cc
index 1aff73a7ff..5fb8492d50 100644
--- a/src/base/socket.cc
+++ b/src/base/socket.cc
@@ -269,4 +269,12 @@ ListenSocket::accept()
     return sfd;
 }
 
+ListenSocketConfig
+listenSocketInetConfig(int port)
+{
+    return ListenSocketConfig([port](const std::string &name) {
+        return std::make_unique<ListenSocket>(name, port);
+    });
+}
+
 } // namespace gem5
diff --git a/src/base/socket.hh b/src/base/socket.hh
index 81f4d6252f..638ce40f57 100644
--- a/src/base/socket.hh
+++ b/src/base/socket.hh
@@ -33,6 +33,9 @@
 #include <sys/types.h>
 #include <sys/un.h>
 
+#include <cassert>
+#include <functional>
+#include <memory>
 #include <string>
 
 #include "base/named.hh"
@@ -127,6 +130,33 @@ class ListenSocket : public Named
     /** @} */ // end of api_socket
 };
 
+using ListenSocketPtr = std::unique_ptr<ListenSocket>;
+
+class ListenSocketConfig
+{
+  public:
+    using Builder = std::function<ListenSocketPtr(const std::string &name)>;
+
+    ListenSocketConfig() {}
+    ListenSocketConfig(Builder _builder) : builder(_builder) {}
+
+    ListenSocketPtr
+    build(const std::string &name) const
+    {
+        assert(builder);
+        return builder(name);
+    }
+
+    operator bool() const { return (bool)builder; }
+
+  private:
+    Builder builder;
+};
+
+static inline ListenSocketConfig listenSocketEmptyConfig() { return {}; }
+
+ListenSocketConfig listenSocketInetConfig(int port);
+
 inline static std::ostream &
 operator << (std::ostream &os, const ListenSocket &socket)
 {
diff --git a/src/base/vnc/vncserver.cc b/src/base/vnc/vncserver.cc
index 2d32cef8bb..4e5c951191 100644
--- a/src/base/vnc/vncserver.cc
+++ b/src/base/vnc/vncserver.cc
@@ -117,8 +117,8 @@ VncServer::DataEvent::process(int revent)
  */
 VncServer::VncServer(const Params &p)
     : VncInput(p), listenEvent(NULL), dataEvent(NULL), number(p.number),
-      listener(p.name, p.port), sendUpdate(false),
-      supportsRawEnc(false), supportsResizeEnc(false)
+      listener(listenSocketInetConfig(p.port).build(p.name)),
+      sendUpdate(false), supportsRawEnc(false), supportsResizeEnc(false)
 {
     if (p.port)
         listen();
@@ -164,9 +164,9 @@ VncServer::listen()
         return;
     }
 
-    listener.listen();
+    listener->listen();
 
-    listenEvent = new ListenEvent(this, listener.getfd(), POLLIN);
+    listenEvent = new ListenEvent(this, listener->getfd(), POLLIN);
     pollQueue.schedule(listenEvent);
 }
 
@@ -179,10 +179,10 @@ VncServer::accept()
     // thread.
     EventQueue::ScopedMigration migrate(eventQueue());
 
-    if (!listener.islistening())
+    if (!listener->islistening())
         panic("%s: cannot accept a connection if not listening!", name());
 
-    int fd = listener.accept();
+    int fd = listener->accept();
     if (fd < 0) {
         warn("%s: failed to accept VNC connection!", name());
         return;
diff --git a/src/base/vnc/vncserver.hh b/src/base/vnc/vncserver.hh
index 7455799025..d493c05a47 100644
--- a/src/base/vnc/vncserver.hh
+++ b/src/base/vnc/vncserver.hh
@@ -180,7 +180,7 @@ class VncServer : public VncInput
     int number;
     int dataFd; // data stream file describer
 
-    ListenSocket listener;
+    ListenSocketPtr listener;
 
     void listen();
     void accept();
diff --git a/src/cpu/nativetrace.cc b/src/cpu/nativetrace.cc
index 05fb41b3a5..3070205b9f 100644
--- a/src/cpu/nativetrace.cc
+++ b/src/cpu/nativetrace.cc
@@ -39,14 +39,14 @@ namespace gem5
 namespace trace {
 
 NativeTrace::NativeTrace(const Params &p)
-    : ExeTracer(p), native_listener(p.name, 8000)
+    : ExeTracer(p), native_listener(listenSocketInetConfig(8000).build(p.name))
 {
     if (ListenSocket::allDisabled())
         fatal("All listeners are disabled!");
 
-    native_listener.listen();
+    native_listener->listen();
 
-    fd = native_listener.accept();
+    fd = native_listener->accept();
 }
 
 void
diff --git a/src/cpu/nativetrace.hh b/src/cpu/nativetrace.hh
index a00e97a18e..a19acaca3f 100644
--- a/src/cpu/nativetrace.hh
+++ b/src/cpu/nativetrace.hh
@@ -71,7 +71,7 @@ class NativeTrace : public ExeTracer
   protected:
     int fd;
 
-    ListenSocket native_listener;
+    ListenSocketPtr native_listener;
 
   public:
 
diff --git a/src/dev/net/ethertap.cc b/src/dev/net/ethertap.cc
index 587dba5021..7c7a8dcb11 100644
--- a/src/dev/net/ethertap.cc
+++ b/src/dev/net/ethertap.cc
@@ -245,11 +245,12 @@ class TapListener
     void accept();
 
   protected:
-    ListenSocket listener;
+    ListenSocketPtr listener;
     EtherTapStub *tap;
 
   public:
-    TapListener(EtherTapStub *t, int p) : listener(t->name(), p), tap(t) {}
+    TapListener(EtherTapStub *t, int p) :
+        listener(listenSocketInetConfig(p).build(t->name())), tap(t) {}
     ~TapListener() { delete event; }
 
     void listen();
@@ -258,9 +259,9 @@ class TapListener
 void
 TapListener::listen()
 {
-    listener.listen();
+    listener->listen();
 
-    event = new Event(this, listener.getfd(), POLLIN|POLLERR);
+    event = new Event(this, listener->getfd(), POLLIN|POLLERR);
     pollQueue.schedule(event);
 }
 
@@ -272,10 +273,10 @@ TapListener::accept()
     // thread.
     EventQueue::ScopedMigration migrate(tap->eventQueue());
 
-    if (!listener.islistening())
+    if (!listener->islistening())
         panic("TapListener(accept): cannot accept if we're not listening!");
 
-    int sfd = listener.accept();
+    int sfd = listener->accept();
     if (sfd != -1)
         tap->attach(sfd);
 }
diff --git a/src/dev/serial/terminal.cc b/src/dev/serial/terminal.cc
index 02052b5e1a..6e8e435b07 100644
--- a/src/dev/serial/terminal.cc
+++ b/src/dev/serial/terminal.cc
@@ -121,7 +121,8 @@ Terminal::DataEvent::process(int revent)
  */
 Terminal::Terminal(const Params &p)
     : SerialDevice(p), listenEvent(NULL), dataEvent(NULL),
-      number(p.number), data_fd(-1), listener(p.name, p.port),
+      number(p.number), data_fd(-1),
+      listener(listenSocketInetConfig(p.port).build(p.name)),
       txbuf(16384), rxbuf(16384), outfile(terminalDump(p))
 #if TRACING_ON == 1
       , linebuf(16384)
@@ -175,19 +176,19 @@ Terminal::listen()
         return;
     }
 
-    listener.listen();
+    listener->listen();
 
-    listenEvent = new ListenEvent(this, listener.getfd(), POLLIN);
+    listenEvent = new ListenEvent(this, listener->getfd(), POLLIN);
     pollQueue.schedule(listenEvent);
 }
 
 void
 Terminal::accept()
 {
-    if (!listener.islistening())
+    if (!listener->islistening())
         panic("%s: cannot accept a connection if not listening!", name());
 
-    int fd = listener.accept();
+    int fd = listener->accept();
     if (data_fd != -1) {
         char message[] = "terminal already attached!\n";
         atomic_write(fd, message, sizeof(message));
diff --git a/src/dev/serial/terminal.hh b/src/dev/serial/terminal.hh
index bd6711da8a..03adc9f965 100644
--- a/src/dev/serial/terminal.hh
+++ b/src/dev/serial/terminal.hh
@@ -101,7 +101,7 @@ class Terminal : public SerialDevice
     OutputStream * terminalDump(const TerminalParams &p);
 
   protected:
-    ListenSocket listener;
+    ListenSocketPtr listener;
 
     void listen();
     void accept();

From ddcf452b1b8454981eb00a1af866292a5347ffd2 Mon Sep 17 00:00:00 2001
From: Derek Christ <dchrist@rhrk.uni-kl.de>
Date: Tue, 26 Jul 2022 14:06:26 +0200
Subject: [PATCH 291/492] ext: Add DRAMSys integration instructions

Add instructions to add the DRAMSys memory simulator
to gem5 in the ext/ directory.
The provided SConscript file compiles DRAMSys.

Change-Id: If5c723db61a3151c9155190f968c66927d7bfaa3
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/62911
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
---
 ext/dramsys/README     |  10 +++++
 ext/dramsys/SConscript | 100 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 110 insertions(+)
 create mode 100644 ext/dramsys/README
 create mode 100644 ext/dramsys/SConscript

diff --git a/ext/dramsys/README b/ext/dramsys/README
new file mode 100644
index 0000000000..477da52895
--- /dev/null
+++ b/ext/dramsys/README
@@ -0,0 +1,10 @@
+Follow these steps to get DRAMSys as part of gem5
+
+1. Go to ext/dramsys (this directory)
+2. Clone DRAMSys: 'git clone --recursive git@github.com:tukl-msd/DRAMSys.git DRAMSys'
+3. Change directory to DRAMSys: 'cd DRAMSys'
+4. Checkout the correct commit: 'git checkout -b gem5 09f6dcbb91351e6ee7cadfc7bc8b29d97625db8f'
+
+If you wish to run a simulation using the gem5 processor cores, make sure to enable the storage mode in DRAMSys.
+This is done by setting the value of the "StoreMode" key to "Store" in the base configuration file.
+Those configuration file can be found in 'DRAMSys/library/resources/configs/simulator'.
diff --git a/ext/dramsys/SConscript b/ext/dramsys/SConscript
new file mode 100644
index 0000000000..d771b9c535
--- /dev/null
+++ b/ext/dramsys/SConscript
@@ -0,0 +1,100 @@
+# Copyright (c) 2022, Fraunhofer IESE
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# 1. Redistributions of source code must retain the above copyright notice,
+#    this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#
+# 3. Neither the name of the copyright holder nor the names of its
+#    contributors may be used to endorse or promote products derived from
+#    this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+# TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER
+# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import os
+
+Import('env')
+
+build_root = Dir('../..').abspath
+src_root = Dir('DRAMSys/DRAMSys/library').srcnode().abspath
+
+# See if we got a cloned DRAMSys repo as a subdirectory and set the
+# HAVE_DRAMSys flag accordingly
+if not os.path.exists(Dir('.').srcnode().abspath + '/DRAMSys'):
+    env['HAVE_DRAMSYS'] = False
+    Return()
+
+env['HAVE_DRAMSYS'] = True
+
+dramsys_files = []
+dramsys_configuration_files = []
+
+dramsys_files.extend(Glob("%s/*.cpp" % f"{src_root}/src/controller"))
+for root, dirs, files in os.walk(f"{src_root}/src/controller", topdown=False):
+    for dir in dirs:
+        dramsys_files.extend(Glob("%s/*.cpp" % os.path.join(root, dir)))
+
+dramsys_files.extend(Glob("%s/*.cpp" % f"{src_root}/src/simulation"))
+for root, dirs, files in os.walk(f"{src_root}/src/simulation", topdown=False):
+    for dir in dirs:
+        dramsys_files.extend(Glob("%s/*.cpp" % os.path.join(root, dir)))
+
+dramsys_files.extend(Glob("%s/*.cpp" % f"{src_root}/src/configuration"))
+for root, dirs, files in os.walk(f"{src_root}/src/configuration", topdown=False):
+    for dir in dirs:
+        dramsys_files.extend(Glob("%s/*.cpp" % os.path.join(root, dir)))
+
+dramsys_files.extend(Glob("%s/*.cpp" % f"{src_root}/src/error"))
+dramsys_files.extend(Glob(f"{src_root}/src/error/ECC/Bit.cpp"))
+dramsys_files.extend(Glob(f"{src_root}/src/error/ECC/ECC.cpp"))
+dramsys_files.extend(Glob(f"{src_root}/src/error/ECC/Word.cpp"))
+
+dramsys_files.extend(Glob("%s/*.cpp" % f"{src_root}/src/common"))
+dramsys_files.extend(Glob("%s/*.cpp" % f"{src_root}/src/common/configuration"))
+dramsys_files.extend(Glob("%s/*.cpp" % f"{src_root}/src/common/configuration/memspec"))
+dramsys_files.extend(Glob("%s/*.c" % f"{src_root}/src/common/third_party/sqlite-amalgamation"))
+
+env.Prepend(CPPPATH=[
+    src_root + "/src",
+    src_root + "/src/common/configuration",
+    src_root + "/src/common/third_party/nlohmann/include",
+])
+
+env.Prepend(CPPDEFINES=[("DRAMSysResourceDirectory", '\\"' + os.getcwd() + '/resources' + '\\"')])
+env.Prepend(CPPDEFINES=[("SYSTEMC_VERSION", 20191203)])
+
+dramsys = env.Clone()
+
+if '-Werror' in dramsys['CCFLAGS']:
+    dramsys['CCFLAGS'].remove('-Werror')
+
+dramsys.Prepend(CPPPATH=[
+    src_root + "/src/common/third_party/sqlite-amalgamation",
+    build_root + "/systemc/ext"
+])
+
+dramsys.Prepend(CPPDEFINES=[("SQLITE_ENABLE_RTREE", "1")])
+
+dramsys_configuration = env.Clone()
+
+dramsys.Library('dramsys', dramsys_files)
+
+env.Append(LIBS=['dramsys', 'dl'])
+env.Append(LIBPATH=[Dir('.')])

From bc6133e6a1c8fe9e73bf29bcf17791702dc996ce Mon Sep 17 00:00:00 2001
From: Derek Christ <dchrist@rhrk.uni-kl.de>
Date: Wed, 27 Jul 2022 11:11:32 +0200
Subject: [PATCH 292/492] mem: Add DRAMSys wrapper as a memory object

Add a DRAMSys wrapper to the gem5 memory source that
instantiates the DRAMSys simulator.
Another DRAMSys SimObject implements the AbstractMemory
interface and exposes the tlm target socket.

Change-Id: I8a95e729905e0924453043e5e7744df7a7ce4548
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/62912
Tested-by: kokoro <noreply+kokoro@google.com>
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
---
 ext/dramsys/SConscript                       | 44 ++++-----
 src/mem/DRAMSys.py                           | 43 +++++++++
 src/mem/SConscript                           |  4 +
 src/mem/dramsys.hh                           | 82 ++++++++++++++++
 src/mem/dramsys_wrapper.cc                   | 99 ++++++++++++++++++++
 src/mem/dramsys_wrapper.hh                   | 89 ++++++++++++++++++
 src/python/gem5/components/memory/dramsys.py | 89 ++++++++++++++++++
 7 files changed, 426 insertions(+), 24 deletions(-)
 create mode 100644 src/mem/DRAMSys.py
 create mode 100644 src/mem/dramsys.hh
 create mode 100644 src/mem/dramsys_wrapper.cc
 create mode 100644 src/mem/dramsys_wrapper.hh
 create mode 100644 src/python/gem5/components/memory/dramsys.py

diff --git a/ext/dramsys/SConscript b/ext/dramsys/SConscript
index d771b9c535..d6ea27e0d1 100644
--- a/ext/dramsys/SConscript
+++ b/ext/dramsys/SConscript
@@ -1,32 +1,28 @@
-# Copyright (c) 2022, Fraunhofer IESE
-# All rights reserved.
+# Copyright (c) 2022 Fraunhofer IESE
+# All rights reserved
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions are
-# met:
-#
-# 1. Redistributions of source code must retain the above copyright notice,
-#    this list of conditions and the following disclaimer.
-#
-# 2. Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#
-# 3. Neither the name of the copyright holder nor the names of its
-#    contributors may be used to endorse or promote products derived from
-#    this software without specific prior written permission.
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
 #
 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
-# TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER
-# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 import os
 
diff --git a/src/mem/DRAMSys.py b/src/mem/DRAMSys.py
new file mode 100644
index 0000000000..c7d69a0ae4
--- /dev/null
+++ b/src/mem/DRAMSys.py
@@ -0,0 +1,43 @@
+# Copyright (c) 2022 Fraunhofer IESE
+# All rights reserved
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from m5.SimObject import *
+from m5.params import *
+from m5.proxy import *
+
+from m5.objects.Tlm import TlmTargetSocket
+from m5.objects.AbstractMemory import *
+
+
+class DRAMSys(AbstractMemory):
+    type = "DRAMSys"
+    cxx_class = "gem5::memory::DRAMSys"
+    cxx_header = "mem/dramsys.hh"
+    tlm = TlmTargetSocket(32, "TLM target port")
+
+    configuration = Param.String("Path to the DRAMSys configuration")
+    resource_directory = Param.String("Path to the DRAMSys resource directory")
+    recordable = Param.Bool(True, "Whether DRAMSys should record a trace file")
diff --git a/src/mem/SConscript b/src/mem/SConscript
index ca164c1e27..351f24e907 100644
--- a/src/mem/SConscript
+++ b/src/mem/SConscript
@@ -121,6 +121,10 @@ if env['HAVE_DRAMSIM3']:
     Source('dramsim3_wrapper.cc')
     Source('dramsim3.cc')
 
+if env['HAVE_DRAMSYS']:
+    SimObject('DRAMSys.py', sim_objects=['DRAMSys'])
+    Source('dramsys_wrapper.cc')
+
 SimObject('MemChecker.py', sim_objects=['MemChecker', 'MemCheckerMonitor'])
 Source('mem_checker.cc')
 Source('mem_checker_monitor.cc')
diff --git a/src/mem/dramsys.hh b/src/mem/dramsys.hh
new file mode 100644
index 0000000000..d4d9ab8859
--- /dev/null
+++ b/src/mem/dramsys.hh
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2022 Fraunhofer IESE
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __MEM_DRAMSYS_H__
+#define __MEM_DRAMSYS_H__
+
+#include "DRAMSysConfiguration.h"
+#include "mem/abstract_mem.hh"
+#include "mem/dramsys_wrapper.hh"
+#include "params/DRAMSys.hh"
+
+namespace gem5
+{
+
+namespace memory
+{
+
+class DRAMSys : public AbstractMemory
+{
+    PARAMS(DRAMSys);
+    sc_gem5::TlmTargetWrapper<32> tlmWrapper;
+
+  public:
+    DRAMSys(Params const &params)
+        : AbstractMemory(params),
+          tlmWrapper(dramSysWrapper.tSocket,
+              params.name + ".tlm",
+              InvalidPortID),
+          config(DRAMSysConfiguration::from_path(
+              params.configuration,
+              params.resource_directory)),
+          dramSysWrapper(params.name.c_str(),
+            config,
+            params.recordable,
+            params.range)
+    {
+    }
+
+    gem5::Port &getPort(const std::string &if_name, PortID idx) override
+    {
+        if (if_name != "tlm")
+        {
+            return AbstractMemory::getPort(if_name, idx);
+        }
+
+        return tlmWrapper;
+    }
+
+  private:
+    DRAMSysConfiguration::Configuration config;
+    DRAMSysWrapper dramSysWrapper;
+};
+
+} // namespace memory
+} // namespace gem5
+
+#endif // __MEM_DRAMSYS_HH__
diff --git a/src/mem/dramsys_wrapper.cc b/src/mem/dramsys_wrapper.cc
new file mode 100644
index 0000000000..afa67f3bf2
--- /dev/null
+++ b/src/mem/dramsys_wrapper.cc
@@ -0,0 +1,99 @@
+/*
+ * Copyright (c) 2022 Fraunhofer IESE
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "dramsys_wrapper.hh"
+
+namespace gem5
+{
+
+namespace memory
+{
+
+DRAMSysWrapper::DRAMSysWrapper(
+    sc_core::sc_module_name name,
+    DRAMSysConfiguration::Configuration const &config,
+    bool recordable,
+    AddrRange range) :
+    sc_core::sc_module(name),
+    dramsys(instantiateDRAMSys(recordable, config)),
+    range(range)
+{
+    tSocket.register_nb_transport_fw(this, &DRAMSysWrapper::nb_transport_fw);
+    tSocket.register_transport_dbg(this, &DRAMSysWrapper::transport_dbg);
+    iSocket.register_nb_transport_bw(this, &DRAMSysWrapper::nb_transport_bw);
+    iSocket.bind(dramsys->tSocket);
+
+    // Register a callback to compensate for the destructor not
+    // being called.
+    registerExitCallback(
+        [this]()
+        {
+            // Workaround for BUG GEM5-1233
+            sc_gem5::Kernel::stop();
+        });
+}
+
+std::shared_ptr<::DRAMSys>
+DRAMSysWrapper::instantiateDRAMSys(
+    bool recordable,
+    DRAMSysConfiguration::Configuration const &config)
+{
+    return recordable
+        ? std::make_shared<::DRAMSysRecordable>("DRAMSys", config)
+        : std::make_shared<::DRAMSys>("DRAMSys", config);
+}
+
+tlm::tlm_sync_enum DRAMSysWrapper::nb_transport_fw(
+    tlm::tlm_generic_payload &payload,
+    tlm::tlm_phase &phase,
+    sc_core::sc_time &fwDelay)
+{
+    // Subtract base address offset
+    payload.set_address(payload.get_address() - range.start());
+
+    return iSocket->nb_transport_fw(payload, phase, fwDelay);
+}
+
+tlm::tlm_sync_enum DRAMSysWrapper::nb_transport_bw(
+    tlm::tlm_generic_payload &payload,
+    tlm::tlm_phase &phase,
+    sc_core::sc_time &bwDelay)
+{
+    return tSocket->nb_transport_bw(payload, phase, bwDelay);
+}
+
+unsigned int DRAMSysWrapper::transport_dbg(tlm::tlm_generic_payload &trans)
+{
+    // Subtract base address offset
+    trans.set_address(trans.get_address() - range.start());
+
+    return iSocket->transport_dbg(trans);
+}
+
+} // namespace memory
+} // namespace gem5
diff --git a/src/mem/dramsys_wrapper.hh b/src/mem/dramsys_wrapper.hh
new file mode 100644
index 0000000000..702bf6154d
--- /dev/null
+++ b/src/mem/dramsys_wrapper.hh
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2022 Fraunhofer IESE
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __MEM_DRAMSYS_WRAPPER_HH__
+#define __MEM_DRAMSYS_WRAPPER_HH__
+
+#include <iostream>
+#include <memory>
+
+#include "DRAMSysConfiguration.h"
+#include "params/DRAMSys.hh"
+#include "sim/core.hh"
+#include "simulation/DRAMSysRecordable.h"
+#include "systemc/core/kernel.hh"
+#include "systemc/ext/core/sc_module_name.hh"
+#include "systemc/ext/systemc"
+#include "systemc/ext/tlm"
+#include "systemc/ext/tlm_utils/simple_target_socket.h"
+#include "systemc/tlm_port_wrapper.hh"
+
+namespace gem5
+{
+
+namespace memory
+{
+
+class DRAMSysWrapper : public sc_core::sc_module
+{
+    friend class DRAMSys;
+
+  public:
+    SC_HAS_PROCESS(DRAMSysWrapper);
+    DRAMSysWrapper(sc_core::sc_module_name name,
+                   DRAMSysConfiguration::Configuration const &config,
+                   bool recordable,
+                   AddrRange range);
+
+  private:
+    static std::shared_ptr<::DRAMSys>
+    instantiateDRAMSys(bool recordable,
+        DRAMSysConfiguration::Configuration const &config);
+
+    tlm::tlm_sync_enum nb_transport_fw(tlm::tlm_generic_payload &payload,
+                                       tlm::tlm_phase &phase,
+                                       sc_core::sc_time &fwDelay);
+
+    tlm::tlm_sync_enum nb_transport_bw(tlm::tlm_generic_payload &payload,
+                                       tlm::tlm_phase &phase,
+                                       sc_core::sc_time &bwDelay);
+
+    unsigned int transport_dbg(tlm::tlm_generic_payload &trans);
+
+    tlm_utils::simple_initiator_socket<DRAMSysWrapper> iSocket;
+    tlm_utils::simple_target_socket<DRAMSysWrapper> tSocket;
+
+    std::shared_ptr<::DRAMSys> dramsys;
+
+    AddrRange range;
+};
+
+} // namespace memory
+} // namespace gem5
+
+#endif // __MEM_DRAMSYS_WRAPPER_HH__
diff --git a/src/python/gem5/components/memory/dramsys.py b/src/python/gem5/components/memory/dramsys.py
new file mode 100644
index 0000000000..ab9554833d
--- /dev/null
+++ b/src/python/gem5/components/memory/dramsys.py
@@ -0,0 +1,89 @@
+# Copyright (c) 2022 Fraunhofer IESE
+# All rights reserved
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import m5
+import os
+import configparser
+
+from m5.objects import DRAMSys, AddrRange, Port, MemCtrl, Gem5ToTlmBridge32
+from m5.util.convert import toMemorySize
+
+from ...utils.override import overrides
+from ..boards.abstract_board import AbstractBoard
+from .abstract_memory_system import AbstractMemorySystem
+
+from typing import Optional, Tuple, Sequence, List
+
+
+class DRAMSysMem(AbstractMemorySystem):
+    def __init__(
+        self,
+        configuration: str,
+        size: str,
+        resource_directory: str,
+        recordable: bool,
+    ) -> None:
+        """
+        :param configuration: Path to the base configuration JSON for DRAMSys.
+        :param size: Memory size of DRAMSys. Must match the size specified in JSON configuration.
+        :param resource_directory: Path to the base resource directory for DRAMSys.
+        :param recordable: Whether the database recording feature of DRAMSys is enabled.
+        """
+        super().__init__()
+        self.dramsys = DRAMSys(
+            configuration=configuration,
+            resource_directory=resource_directory,
+            recordable=recordable,
+        )
+
+        self._size = toMemorySize(size)
+        self._bridge = Gem5ToTlmBridge32()
+        self.dramsys.port = self._bridge.tlm
+
+    @overrides(AbstractMemorySystem)
+    def incorporate_memory(self, board: AbstractBoard) -> None:
+        pass
+
+    @overrides(AbstractMemorySystem)
+    def get_mem_ports(self) -> Sequence[Tuple[AddrRange, Port]]:
+        return [(self.dramsys.range, self._bridge.gem5)]
+
+    @overrides(AbstractMemorySystem)
+    def get_memory_controllers(self) -> List[MemCtrl]:
+        return [self.dramsys]
+
+    @overrides(AbstractMemorySystem)
+    def get_size(self) -> int:
+        return self._size
+
+    @overrides(AbstractMemorySystem)
+    def set_memory_range(self, ranges: List[AddrRange]) -> None:
+        if len(ranges) != 1 or ranges[0].size() != self._size:
+            raise Exception(
+                "DRAMSys memory controller requires a single "
+                "range which matches the memory's size."
+            )
+        self.dramsys.range = ranges[0]

From 8d2831725a73bcc600e731b334a372767e8856b1 Mon Sep 17 00:00:00 2001
From: Derek Christ <dchrist@rhrk.uni-kl.de>
Date: Wed, 27 Jul 2022 14:38:51 +0200
Subject: [PATCH 293/492] configs: Add DRAMSys config example

Add an example configuration for gem5 that runs the
DRAMSys simulator with a TrafficGenerator initiator.

Change-Id: If90f49fcc05b73905b2f9dc8b7aadfdbd866340a
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/62913
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 configs/example/dramsys.py | 63 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 63 insertions(+)
 create mode 100755 configs/example/dramsys.py

diff --git a/configs/example/dramsys.py b/configs/example/dramsys.py
new file mode 100755
index 0000000000..934ff17b57
--- /dev/null
+++ b/configs/example/dramsys.py
@@ -0,0 +1,63 @@
+# Copyright (c) 2022 Fraunhofer IESE
+# All rights reserved
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import m5
+
+from m5.objects import *
+
+traffic_gen = PyTrafficGen()
+system = System()
+vd = VoltageDomain(voltage="1V")
+
+system.mem_mode = "timing"
+
+system.cpu = traffic_gen
+
+dramsys = DRAMSys(
+    configuration="ext/dramsys/DRAMSys/DRAMSys/"
+    "library/resources/simulations/ddr4-example.json",
+    resource_directory="ext/dramsys/DRAMSys/DRAMSys/library/resources",
+)
+
+system.target = dramsys
+system.transactor = Gem5ToTlmBridge32()
+system.clk_domain = SrcClockDomain(clock="1.5GHz", voltage_domain=vd)
+
+# Connect everything:
+system.transactor.gem5 = system.cpu.port
+system.transactor.tlm = system.target.tlm
+
+kernel = SystemC_Kernel(system=system)
+root = Root(full_system=False, systemc_kernel=kernel)
+
+m5.instantiate()
+idle = traffic_gen.createIdle(100000)
+linear = traffic_gen.createLinear(10000000, 0, 16777216, 64, 500, 1500, 65, 0)
+random = traffic_gen.createRandom(10000000, 0, 16777216, 64, 500, 1500, 65, 0)
+traffic_gen.start([linear, idle, random])
+
+cause = m5.simulate(20000000).getCause()
+print(cause)

From ad8c4f1bf422762a138cb038696f750d939fe00e Mon Sep 17 00:00:00 2001
From: Derek Christ <dchrist@rhrk.uni-kl.de>
Date: Wed, 1 Mar 2023 13:52:49 +0100
Subject: [PATCH 294/492] stdlib: Use get_mem_ports in incorporate caches

Make use of get_mem_ports() method of the AbstractMemorySystem
interface when incorporating caches to prevent the usage of the
hard-coded memory port name "port" as some memory controllers do
not have a port with this exact name.

Change-Id: Ic7480166b257c6d356027234758b65b0a97995e3
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/68482
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
---
 .../gem5/components/cachehierarchies/classic/no_cache.py      | 4 ++--
 .../cachehierarchies/classic/private_l1_cache_hierarchy.py    | 4 ++--
 .../classic/private_l1_private_l2_cache_hierarchy.py          | 4 ++--
 .../classic/private_l1_shared_l2_cache_hierarchy.py           | 4 ++--
 4 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/src/python/gem5/components/cachehierarchies/classic/no_cache.py b/src/python/gem5/components/cachehierarchies/classic/no_cache.py
index f3bbdcdf74..51b5d30eb4 100644
--- a/src/python/gem5/components/cachehierarchies/classic/no_cache.py
+++ b/src/python/gem5/components/cachehierarchies/classic/no_cache.py
@@ -119,8 +119,8 @@ class NoCache(AbstractClassicCacheHierarchy):
         # Set up the system port for functional access from the simulator.
         board.connect_system_port(self.membus.cpu_side_ports)
 
-        for cntr in board.get_memory().get_memory_controllers():
-            cntr.port = self.membus.mem_side_ports
+        for _, port in board.get_memory().get_mem_ports():
+            self.membus.mem_side_ports = port
 
     def _setup_coherent_io_bridge(self, board: AbstractBoard) -> None:
         """Create a bridge from I/O back to membus"""
diff --git a/src/python/gem5/components/cachehierarchies/classic/private_l1_cache_hierarchy.py b/src/python/gem5/components/cachehierarchies/classic/private_l1_cache_hierarchy.py
index dc44c9e016..42ff183a1d 100644
--- a/src/python/gem5/components/cachehierarchies/classic/private_l1_cache_hierarchy.py
+++ b/src/python/gem5/components/cachehierarchies/classic/private_l1_cache_hierarchy.py
@@ -90,8 +90,8 @@ class PrivateL1CacheHierarchy(AbstractClassicCacheHierarchy):
         # Set up the system port for functional access from the simulator.
         board.connect_system_port(self.membus.cpu_side_ports)
 
-        for cntr in board.get_memory().get_memory_controllers():
-            cntr.port = self.membus.mem_side_ports
+        for _, port in board.get_memory().get_mem_ports():
+            self.membus.mem_side_ports = port
 
         self.l1icaches = [
             L1ICache(size=self._l1i_size)
diff --git a/src/python/gem5/components/cachehierarchies/classic/private_l1_private_l2_cache_hierarchy.py b/src/python/gem5/components/cachehierarchies/classic/private_l1_private_l2_cache_hierarchy.py
index f10828b9c2..8b60aef7f6 100644
--- a/src/python/gem5/components/cachehierarchies/classic/private_l1_private_l2_cache_hierarchy.py
+++ b/src/python/gem5/components/cachehierarchies/classic/private_l1_private_l2_cache_hierarchy.py
@@ -115,8 +115,8 @@ class PrivateL1PrivateL2CacheHierarchy(
         # Set up the system port for functional access from the simulator.
         board.connect_system_port(self.membus.cpu_side_ports)
 
-        for cntr in board.get_memory().get_memory_controllers():
-            cntr.port = self.membus.mem_side_ports
+        for _, port in board.get_memory().get_mem_ports():
+            self.membus.mem_side_ports = port
 
         self.l1icaches = [
             L1ICache(size=self._l1i_size)
diff --git a/src/python/gem5/components/cachehierarchies/classic/private_l1_shared_l2_cache_hierarchy.py b/src/python/gem5/components/cachehierarchies/classic/private_l1_shared_l2_cache_hierarchy.py
index 602c99c686..72df1a53de 100644
--- a/src/python/gem5/components/cachehierarchies/classic/private_l1_shared_l2_cache_hierarchy.py
+++ b/src/python/gem5/components/cachehierarchies/classic/private_l1_shared_l2_cache_hierarchy.py
@@ -111,8 +111,8 @@ class PrivateL1SharedL2CacheHierarchy(
         # Set up the system port for functional access from the simulator.
         board.connect_system_port(self.membus.cpu_side_ports)
 
-        for cntr in board.get_memory().get_memory_controllers():
-            cntr.port = self.membus.mem_side_ports
+        for _, port in board.get_memory().get_mem_ports():
+            self.membus.mem_side_ports = port
 
         self.l1icaches = [
             L1ICache(

From 803f9f5aa7bf8f55ff0760428e35014f4460eca7 Mon Sep 17 00:00:00 2001
From: Derek Christ <dchrist@rhrk.uni-kl.de>
Date: Wed, 31 Aug 2022 10:22:59 +0200
Subject: [PATCH 295/492] stdlib,configs: Add DRAMSys to the gem5 standard
 library

Add DRAMSys as a new AbstractMemorySystem to the gem5 stdlib.
Also, provide convenient subclasses with predefined DRAMSys
configurations.

Add two new stdlib examples:
    - dramsys-traffic.py: Demonstrates the usage of DRAMSys
      using the stdlib TrafficGenerators
    - arm-hello-dramsys.py: A variant of the arm-hello.py
      script that uses DRAMSys as it's memory.

These DRAMSys memory components are only compiled into the standard
library if DRAMSys is not compiled into gem5.

Change-Id: I9db87c41fbd9c28bc44e9d6bde13fc225dc16be9
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/62914
Tested-by: kokoro <noreply+kokoro@google.com>
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
---
 .../gem5_library/dramsys/arm-hello-dramsys.py | 92 +++++++++++++++++++
 .../gem5_library/dramsys/dramsys-traffic.py   | 62 +++++++++++++
 src/mem/dramsys_wrapper.hh                    |  1 +
 src/python/SConscript                         |  4 +
 src/python/gem5/components/memory/__init__.py | 12 +++
 src/python/gem5/components/memory/dramsys.py  | 77 ++++++++++++++--
 6 files changed, 241 insertions(+), 7 deletions(-)
 create mode 100644 configs/example/gem5_library/dramsys/arm-hello-dramsys.py
 create mode 100644 configs/example/gem5_library/dramsys/dramsys-traffic.py

diff --git a/configs/example/gem5_library/dramsys/arm-hello-dramsys.py b/configs/example/gem5_library/dramsys/arm-hello-dramsys.py
new file mode 100644
index 0000000000..8b25a36396
--- /dev/null
+++ b/configs/example/gem5_library/dramsys/arm-hello-dramsys.py
@@ -0,0 +1,92 @@
+# Copyright (c) 2021 The Regents of the University of California
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""
+This gem5 configuation script creates a simple board to run an ARM
+"hello world" binary using the DRAMSys simulator.
+
+**Important Note**: DRAMSys must be compiled into the gem5 binary to use the
+DRRAMSys simulator. Please consult 'ext/dramsys/README' on how to compile
+correctly. If this is not done correctly this script will run with error.
+"""
+
+from gem5.isas import ISA
+from gem5.utils.requires import requires
+from gem5.resources.resource import Resource
+from gem5.components.memory import DRAMSysDDR3_1600
+from gem5.components.processors.cpu_types import CPUTypes
+from gem5.components.boards.simple_board import SimpleBoard
+from gem5.components.cachehierarchies.classic.private_l1_cache_hierarchy import (
+    PrivateL1CacheHierarchy,
+)
+from gem5.components.processors.simple_processor import SimpleProcessor
+from gem5.simulate.simulator import Simulator
+
+# This check ensures the gem5 binary is compiled to the ARM ISA target. If not,
+# an exception will be thrown.
+requires(isa_required=ISA.ARM)
+
+# We need a cache as DRAMSys only accepts requests with the size of a cache line
+cache_hierarchy = PrivateL1CacheHierarchy(l1d_size="32kB", l1i_size="32kB")
+
+# We use a single channel DDR3_1600 memory system
+memory = DRAMSysDDR3_1600(recordable=True)
+
+# We use a simple Timing processor with one core.
+processor = SimpleProcessor(cpu_type=CPUTypes.TIMING, isa=ISA.ARM, num_cores=1)
+
+# The gem5 library simble board which can be used to run simple SE-mode
+# simulations.
+board = SimpleBoard(
+    clk_freq="3GHz",
+    processor=processor,
+    memory=memory,
+    cache_hierarchy=cache_hierarchy,
+)
+
+# Here we set the workload. In this case we want to run a simple "Hello World!"
+# program compiled to the ARM ISA. The `Resource` class will automatically
+# download the binary from the gem5 Resources cloud bucket if it's not already
+# present.
+board.set_se_binary_workload(
+    # The `Resource` class reads the `resources.json` file from the gem5
+    # resources repository:
+    # https://gem5.googlesource.com/public/gem5-resource.
+    # Any resource specified in this file will be automatically retrieved.
+    # At the time of writing, this file is a WIP and does not contain all
+    # resources. Jira ticket: https://gem5.atlassian.net/browse/GEM5-1096
+    Resource("arm-hello64-static")
+)
+
+# Lastly we run the simulation.
+simulator = Simulator(board=board)
+simulator.run()
+
+print(
+    "Exiting @ tick {} because {}.".format(
+        simulator.get_current_tick(), simulator.get_last_exit_event_cause()
+    )
+)
diff --git a/configs/example/gem5_library/dramsys/dramsys-traffic.py b/configs/example/gem5_library/dramsys/dramsys-traffic.py
new file mode 100644
index 0000000000..ee9ad7228d
--- /dev/null
+++ b/configs/example/gem5_library/dramsys/dramsys-traffic.py
@@ -0,0 +1,62 @@
+# Copyright (c) 2023 The Regents of the University of California
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+"""
+This script is used for running a traffic generator connected to the
+DRAMSys simulator.
+
+**Important Note**: DRAMSys must be compiled into the gem5 binary to use the
+DRRAMSys simulator. Please consult 'ext/dramsys/README' on how to compile
+correctly. If this is not done correctly this script will run with error.
+"""
+import m5
+from gem5.components.memory import DRAMSysMem
+from gem5.components.boards.test_board import TestBoard
+from gem5.components.processors.linear_generator import LinearGenerator
+from m5.objects import Root
+
+memory = DRAMSysMem(
+    configuration="ext/dramsys/DRAMSys/DRAMSys/"
+    "library/resources/simulations/ddr4-example.json",
+    resource_directory="ext/dramsys/DRAMSys/DRAMSys/library/resources",
+    recordable=True,
+    size="4GB",
+)
+
+generator = LinearGenerator(
+    duration="250us",
+    rate="40GB/s",
+    num_cores=1,
+    max_addr=memory.get_size(),
+)
+board = TestBoard(
+    clk_freq="3GHz", generator=generator, memory=memory, cache_hierarchy=None
+)
+
+root = Root(full_system=False, system=board)
+board._pre_instantiate()
+m5.instantiate()
+generator.start_traffic()
+exit_event = m5.simulate()
diff --git a/src/mem/dramsys_wrapper.hh b/src/mem/dramsys_wrapper.hh
index 702bf6154d..f1437cb761 100644
--- a/src/mem/dramsys_wrapper.hh
+++ b/src/mem/dramsys_wrapper.hh
@@ -33,6 +33,7 @@
 #include <memory>
 
 #include "DRAMSysConfiguration.h"
+#include "mem/abstract_mem.hh"
 #include "params/DRAMSys.hh"
 #include "sim/core.hh"
 #include "simulation/DRAMSysRecordable.h"
diff --git a/src/python/SConscript b/src/python/SConscript
index b0f11ddc73..3b00b34245 100644
--- a/src/python/SConscript
+++ b/src/python/SConscript
@@ -170,6 +170,10 @@ PySource('gem5.components.cachehierarchies.ruby.topologies',
 PySource('gem5.components.memory', 'gem5/components/memory/__init__.py')
 PySource('gem5.components.memory', 'gem5/components/memory/abstract_memory_system.py')
 PySource('gem5.components.memory', 'gem5/components/memory/dramsim_3.py')
+
+if env['HAVE_DRAMSYS']:
+    PySource('gem5.components.memory', 'gem5/components/memory/dramsys.py')
+
 PySource('gem5.components.memory', 'gem5/components/memory/simple.py')
 PySource('gem5.components.memory', 'gem5/components/memory/memory.py')
 PySource('gem5.components.memory', 'gem5/components/memory/single_channel.py')
diff --git a/src/python/gem5/components/memory/__init__.py b/src/python/gem5/components/memory/__init__.py
index 78aa4b8e01..8a7b5ef10a 100644
--- a/src/python/gem5/components/memory/__init__.py
+++ b/src/python/gem5/components/memory/__init__.py
@@ -34,3 +34,15 @@ from .multi_channel import DualChannelDDR3_2133
 from .multi_channel import DualChannelDDR4_2400
 from .multi_channel import DualChannelLPDDR3_1600
 from .hbm import HBM2Stack
+
+try:
+    from .dramsys import DRAMSysMem
+    from .dramsys import DRAMSysDDR4_1866
+    from .dramsys import DRAMSysDDR3_1600
+    from .dramsys import DRAMSysLPDDR4_3200
+    from .dramsys import DRAMSysHBM2
+except:
+    # In the case that DRAMSys is not compiled into the gem5 binary, importing
+    # DRAMSys components will fail. This try-exception statement is needed to
+    # ignore these imports in this case.
+    pass
diff --git a/src/python/gem5/components/memory/dramsys.py b/src/python/gem5/components/memory/dramsys.py
index ab9554833d..28f3bd319f 100644
--- a/src/python/gem5/components/memory/dramsys.py
+++ b/src/python/gem5/components/memory/dramsys.py
@@ -25,17 +25,22 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 import m5
-import os
-import configparser
 
-from m5.objects import DRAMSys, AddrRange, Port, MemCtrl, Gem5ToTlmBridge32
+from m5.objects import (
+    DRAMSys,
+    AddrRange,
+    Port,
+    MemCtrl,
+    Gem5ToTlmBridge32,
+    SystemC_Kernel,
+)
 from m5.util.convert import toMemorySize
 
 from ...utils.override import overrides
 from ..boards.abstract_board import AbstractBoard
 from .abstract_memory_system import AbstractMemorySystem
 
-from typing import Optional, Tuple, Sequence, List
+from typing import Tuple, Sequence, List
 
 
 class DRAMSysMem(AbstractMemorySystem):
@@ -60,8 +65,9 @@ class DRAMSysMem(AbstractMemorySystem):
         )
 
         self._size = toMemorySize(size)
-        self._bridge = Gem5ToTlmBridge32()
-        self.dramsys.port = self._bridge.tlm
+        self.bridge = Gem5ToTlmBridge32()
+        self.dramsys.tlm = self.bridge.tlm
+        self.kernel = SystemC_Kernel()
 
     @overrides(AbstractMemorySystem)
     def incorporate_memory(self, board: AbstractBoard) -> None:
@@ -69,7 +75,7 @@ class DRAMSysMem(AbstractMemorySystem):
 
     @overrides(AbstractMemorySystem)
     def get_mem_ports(self) -> Sequence[Tuple[AddrRange, Port]]:
-        return [(self.dramsys.range, self._bridge.gem5)]
+        return [(self.dramsys.range, self.bridge.gem5)]
 
     @overrides(AbstractMemorySystem)
     def get_memory_controllers(self) -> List[MemCtrl]:
@@ -87,3 +93,60 @@ class DRAMSysMem(AbstractMemorySystem):
                 "range which matches the memory's size."
             )
         self.dramsys.range = ranges[0]
+        self.bridge.addr_ranges = ranges[0]
+
+
+class DRAMSysDDR4_1866(DRAMSysMem):
+    def __init__(self, recordable: bool):
+        """
+        :param recordable: Whether the database recording feature of DRAMSys is enabled.
+        """
+        super().__init__(
+            configuration="ext/dramsys/DRAMSys/DRAMSys/"
+            "library/resources/simulations/ddr4-example.json",
+            size="4GB",
+            resource_directory="ext/dramsys/DRAMSys/DRAMSys/library/resources",
+            recordable=recordable,
+        )
+
+
+class DRAMSysDDR3_1600(DRAMSysMem):
+    def __init__(self, recordable: bool):
+        """
+        :param recordable: Whether the database recording feature of DRAMSys is enabled.
+        """
+        super().__init__(
+            configuration="ext/dramsys/DRAMSys/DRAMSys/"
+            "library/resources/simulations/ddr3-gem5-se.json",
+            size="4GB",
+            resource_directory="ext/dramsys/DRAMSys/DRAMSys/library/resources",
+            recordable=recordable,
+        )
+
+
+class DRAMSysLPDDR4_3200(DRAMSysMem):
+    def __init__(self, recordable: bool):
+        """
+        :param recordable: Whether the database recording feature of DRAMSys is enabled.
+        """
+        super().__init__(
+            configuration="ext/dramsys/DRAMSys/DRAMSys/"
+            "library/resources/simulations/lpddr4-example.json",
+            size="4GB",
+            resource_directory="ext/dramsys/DRAMSys/DRAMSys/library/resources",
+            recordable=recordable,
+        )
+
+
+class DRAMSysHBM2(DRAMSysMem):
+    def __init__(self, recordable: bool):
+        """
+        :param recordable: Whether the database recording feature of DRAMSys is enabled.
+        """
+        super().__init__(
+            configuration="ext/dramsys/DRAMSys/DRAMSys/"
+            "library/resources/simulations/hbm2-example.json",
+            size="4GB",
+            resource_directory="ext/dramsys/DRAMSys/DRAMSys/library/resources",
+            recordable=recordable,
+        )

From 91637af3dceb049ca9b8dd4b34c928c03a7ef999 Mon Sep 17 00:00:00 2001
From: "Bobby R. Bruce" <bbruce@ucdavis.edu>
Date: Tue, 21 Mar 2023 15:49:16 -0700
Subject: [PATCH 296/492] tests: Add DRAMSys compilation and scripts to Weekly
 tests

Change-Id: I4353843e4e5f6db6f6d576dec4a34c3d403da1cc
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/69200
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
---
 tests/weekly.sh | 36 ++++++++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)

diff --git a/tests/weekly.sh b/tests/weekly.sh
index d07cc8b2a6..9c7ebdf76d 100755
--- a/tests/weekly.sh
+++ b/tests/weekly.sh
@@ -398,3 +398,39 @@ rm -rf ${gem5_root}/m5out
 
 # delete Pannotia datasets we downloaded and output files it created
 rm -f coAuthorsDBLP.graph 1k_128k.gr result.out
+
+# Run tests to ensure the DRAMSys integration is still functioning correctly.
+if [ -d "${gem5_root}/ext/dramsys/DRAMSys" ]; then
+    rm -r "${gem5_root}/ext/dramsys/DRAMSys"
+fi
+
+cd "${gem5_root}/ext/dramsys"
+git clone --recursive git@github.com:tukl-msd/DRAMSys.git DRAMSys
+cd DRAMSys
+git checkout -b gem5 09f6dcbb91351e6ee7cadfc7bc8b29d97625db8f
+cd "${gem5_root}"
+
+rm -rf "${gem5_root}/build/ALL"
+
+docker run -u $UID:$GID --volume "${gem5_root}":"${gem5_root}" -w \
+    "${gem5_root}" --memory="${docker_mem_limit}" --rm \
+    gcr.io/gem5-test/ubuntu-22.04_all-dependencies:${tag} \
+       scons build/ALL/gem5.opt -j${threads}
+
+docker run -u $UID:$GID --volume "${gem5_root}":"${gem5_root}" -w \
+    "${gem5_root}" --memory="${docker_mem_limit}" --rm \
+    gcr.io/gem5-test/ubuntu-22.04_all-dependencies:${tag} \
+       ./build/ALL/gem5.opt \
+       configs/example/gem5_library/dramsys/arm-hello-dramsys.py
+
+docker run -u $UID:$GID --volume "${gem5_root}":"${gem5_root}" -w \
+    "${gem5_root}" --memory="${docker_mem_limit}" --rm \
+    gcr.io/gem5-test/ubuntu-22.04_all-dependencies:${tag} \
+       ./build/ALL/gem5.opt \
+       configs/example/gem5_library/dramsys/dramsys-traffic.py
+
+docker run -u $UID:$GID --volume "${gem5_root}":"${gem5_root}" -w \
+    "${gem5_root}" --memory="${docker_mem_limit}" --rm \
+    gcr.io/gem5-test/ubuntu-22.04_all-dependencies:${tag} \
+       ./build/ALL/gem5.opt \
+       configs/example/dramsys.py

From f4d8fe2595b5f9ee4cd5f7d74cd824bd23653a92 Mon Sep 17 00:00:00 2001
From: Marco Chen <mc@soc.pub>
Date: Tue, 28 Mar 2023 07:57:35 -0700
Subject: [PATCH 297/492] arch-arm: This commit fix incorrect ARM isa
 implementation

When running 500.perlbench_r of specint 2017, the system will raise an
 assertion error. For function bits of src/base/bitfield.hh (line 76),
 the parameter First is smaller than Last. This is caused by incorrect
 implementation of uqrshl in src/arch/arm/isa/insts/neon64.isa

When shiftAmt equals 0, which mean uqrshl is actually not shift the
 value stored in register. sizeof(Element) * 8 - 1 will be smaller than
 sizeof(Element) * 8 - shiftAmt, thus will raise the assertion error.

This commit added this special condition.

No Jira issue has been submitted to report this error

Change-Id: I4162ac3ddb62f162619db400f214f33209b23c19
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/69318
Maintainer: Giacomo Travaglini <giacomo.travaglini@arm.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Giacomo Travaglini <giacomo.travaglini@arm.com>
---
 src/arch/arm/isa/insts/neon64.isa | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/arch/arm/isa/insts/neon64.isa b/src/arch/arm/isa/insts/neon64.isa
index d8679078fc..e0083c9fcf 100644
--- a/src/arch/arm/isa/insts/neon64.isa
+++ b/src/arch/arm/isa/insts/neon64.isa
@@ -3255,8 +3255,9 @@ let {{
                         destElem = 0;
                     }
                 } else {
-                    if (bits(srcElem1, sizeof(Element) * 8 - 1,
-                                sizeof(Element) * 8 - shiftAmt)) {
+                    if (shiftAmt != 0 &&
+                            bits(srcElem1, sizeof(Element) * 8 - 1,
+                                           sizeof(Element) * 8 - shiftAmt)) {
                         destElem = mask(sizeof(Element) * 8);
                         fpscr.qc = 1;
                     } else {

From 1ff3ea2a0925a5cdc24f0bd9cb21b9b7038405b3 Mon Sep 17 00:00:00 2001
From: Melissa Jost <melissakjost@gmail.com>
Date: Thu, 9 Feb 2023 13:39:06 -0800
Subject: [PATCH 298/492] tests: Fix failing SST and SystemC nightly tests

There was a bug with the SST and SystemC tests where they wouldn't
compile due to a missing path for the gdbremote/signals.hh
header, and this change includes that so they run properly.

Change-Id: I9ff0404e327358fe2d1b77388bbcc1f807136ebe
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67817
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 ext/sst/Makefile                          | 2 +-
 util/systemc/gem5_within_systemc/Makefile | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/ext/sst/Makefile b/ext/sst/Makefile
index 682af3d9ff..9213d266e9 100644
--- a/ext/sst/Makefile
+++ b/ext/sst/Makefile
@@ -4,7 +4,7 @@ ARCH=RISCV
 OFLAG=3
 
 LDFLAGS=-shared -fno-common ${shell pkg-config ${SST_VERSION} --libs} -L../../build/${ARCH}/ -Wl,-rpath ../../build/${ARCH}
-CXXFLAGS=-std=c++17 -g -O${OFLAG} -fPIC ${shell pkg-config ${SST_VERSION} --cflags} ${shell python3-config --includes} -I../../build/${ARCH}/ -I../../ext/pybind11/include/ -I../../build/softfloat/
+CXXFLAGS=-std=c++17 -g -O${OFLAG} -fPIC ${shell pkg-config ${SST_VERSION} --cflags} ${shell python3-config --includes} -I../../build/${ARCH}/ -I../../ext/pybind11/include/ -I../../build/softfloat/ -I../../ext
 CPPFLAGS+=-MMD -MP
 SRC=$(wildcard *.cc)
 
diff --git a/util/systemc/gem5_within_systemc/Makefile b/util/systemc/gem5_within_systemc/Makefile
index cc6a38917d..f2baf88c20 100644
--- a/util/systemc/gem5_within_systemc/Makefile
+++ b/util/systemc/gem5_within_systemc/Makefile
@@ -39,7 +39,7 @@ VARIANT = opt
 SYSTEMC_INC = /opt/systemc/include
 SYSTEMC_LIB = /opt/systemc/lib-linux64
 
-CXXFLAGS = -I../../../build/$(ARCH) -L../../../build/$(ARCH)
+CXXFLAGS = -I../../../build/$(ARCH) -L../../../build/$(ARCH) -I../../../ext/
 CXXFLAGS += -I$(SYSTEMC_INC) -L$(SYSTEMC_LIB)
 CXXFLAGS += -std=c++17
 CXXFLAGS += -g -DTRACING_ON

From 5361da58be644231871ee568acd2cc1712ff43e9 Mon Sep 17 00:00:00 2001
From: "Bobby R. Bruce" <bbruce@ucdavis.edu>
Date: Mon, 6 Mar 2023 15:36:24 -0800
Subject: [PATCH 299/492] tests: Fix the nightly SST and SystemC tests

These tests were only returning the exit-code of the last command in the
bash script, which would silence failures in commands prior to that.
This patch fixes this. Now these tests will return a non-zero exit code
for any failure when executing these bash scripts.

Change-Id: I2195bbd0357f0b38c192ab5f9e8ad68101786247
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/68677
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 tests/nightly.sh | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/tests/nightly.sh b/tests/nightly.sh
index 1360c4435c..aa69c446c9 100755
--- a/tests/nightly.sh
+++ b/tests/nightly.sh
@@ -159,10 +159,10 @@ build_and_run_SST () {
         "${gem5_root}" --rm  --memory="${docker_mem_limit}" \
         gcr.io/gem5-test/sst-env:${tag} bash -c "\
 scons build/${isa}/libgem5_${variant}.so -j${compile_threads} \
---without-tcmalloc --ignore-style; \
-cd ext/sst; \
-make clean; make -j ${compile_threads}; \
-sst --add-lib-path=./ sst/example.py; \
+--without-tcmalloc --ignore-style && \
+cd ext/sst && \
+make clean; make -j ${compile_threads} && \
+sst --add-lib-path=./ sst/example.py && \
 cd -;
 "
 }
@@ -173,7 +173,7 @@ build_and_run_systemc () {
     docker run -u $UID:$GID --volume "${gem5_root}":"${gem5_root}" -w \
         "${gem5_root}" --memory="${docker_mem_limit}" --rm \
         gcr.io/gem5-test/ubuntu-22.04_all-dependencies:${tag} bash -c "\
-scons -j${compile_threads} --ignore-style build/ARM/gem5.opt; \
+scons -j${compile_threads} --ignore-style build/ARM/gem5.opt && \
 scons --with-cxx-config --without-python --without-tcmalloc USE_SYSTEMC=0 \
     -j${compile_threads} build/ARM/libgem5_opt.so \
 "
@@ -181,12 +181,12 @@ scons --with-cxx-config --without-python --without-tcmalloc USE_SYSTEMC=0 \
     docker run -u $UID:$GID --volume "${gem5_root}":"${gem5_root}" -w \
         "${gem5_root}" --memory="${docker_mem_limit}" --rm \
         gcr.io/gem5-test/systemc-env:${tag} bash -c "\
-cd util/systemc/gem5_within_systemc; \
-make -j${compile_threads}; \
+cd util/systemc/gem5_within_systemc && \
+make -j${compile_threads} && \
 ../../../build/ARM/gem5.opt ../../../configs/example/se.py -c \
-    ../../../tests/test-progs/hello/bin/arm/linux/hello; \
+    ../../../tests/test-progs/hello/bin/arm/linux/hello && \
 LD_LIBRARY_PATH=../../../build/ARM/:/opt/systemc/lib-linux64/ \
-    ./gem5.opt.sc m5out/config.ini; \
+    ./gem5.opt.sc m5out/config.ini && \
 cd -; \
 "
 }

From ee2be9cffe168a6d8fec9232189d11a8ee2358bd Mon Sep 17 00:00:00 2001
From: Melissa Jost <melissakjost@gmail.com>
Date: Tue, 28 Mar 2023 13:37:29 -0700
Subject: [PATCH 300/492] tests: Update nightly SystemC test

The SystemC test requires gem5 to be built with the minimum
dependencies image, and it was also updated to properly use
se.py since it has now been deprecated.

Change-Id: I1223afd70f105073d2cd4a2a78135f1f935c06ec
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/69337
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 tests/nightly.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/nightly.sh b/tests/nightly.sh
index aa69c446c9..f5f9295f3b 100755
--- a/tests/nightly.sh
+++ b/tests/nightly.sh
@@ -172,7 +172,7 @@ build_and_run_systemc () {
     rm -rf "${gem5_root}/build/ARM"
     docker run -u $UID:$GID --volume "${gem5_root}":"${gem5_root}" -w \
         "${gem5_root}" --memory="${docker_mem_limit}" --rm \
-        gcr.io/gem5-test/ubuntu-22.04_all-dependencies:${tag} bash -c "\
+        gcr.io/gem5-test/ubuntu-22.04_min-dependencies:${tag} bash -c "\
 scons -j${compile_threads} --ignore-style build/ARM/gem5.opt && \
 scons --with-cxx-config --without-python --without-tcmalloc USE_SYSTEMC=0 \
     -j${compile_threads} build/ARM/libgem5_opt.so \
@@ -183,7 +183,7 @@ scons --with-cxx-config --without-python --without-tcmalloc USE_SYSTEMC=0 \
         gcr.io/gem5-test/systemc-env:${tag} bash -c "\
 cd util/systemc/gem5_within_systemc && \
 make -j${compile_threads} && \
-../../../build/ARM/gem5.opt ../../../configs/example/se.py -c \
+../../../build/ARM/gem5.opt ../../../configs/deprecated/example/se.py -c \
     ../../../tests/test-progs/hello/bin/arm/linux/hello && \
 LD_LIBRARY_PATH=../../../build/ARM/:/opt/systemc/lib-linux64/ \
     ./gem5.opt.sc m5out/config.ini && \

From 65e0bd6eb4263c034b3192d1ba801b91ad3078a2 Mon Sep 17 00:00:00 2001
From: Vishnu Ramadas <vramadas@wisc.edu>
Date: Mon, 27 Mar 2023 08:33:45 -0500
Subject: [PATCH 301/492] dev-amdgpu: Added PM4MapQueues to GPUFS checkpoint

The GPUFS checkpoint restoration mechanism expects to find a
PM4MapQueues packet in the checkpoint. Since this was not being
checkpointed, the restore phase retrieved a null packet which led to a
segmentation fault. This commit adds PM4MapQueues to the checkpoint and
restores it when deserializing the checkpoint

Change-Id: Ib74a9f36fe89d740a74f94314ada41ecc363abe9
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/69298
Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com>
Maintainer: Matt Sinclair <mattdsinclair@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Matthew Poremba <matthew.poremba@amd.com>
---
 src/dev/amdgpu/pm4_packet_processor.cc | 26 +++++++++++++++++++++++++-
 src/dev/amdgpu/pm4_queues.hh           |  9 ++++++++-
 2 files changed, 33 insertions(+), 2 deletions(-)

diff --git a/src/dev/amdgpu/pm4_packet_processor.cc b/src/dev/amdgpu/pm4_packet_processor.cc
index 152fd4da73..071fe8b841 100644
--- a/src/dev/amdgpu/pm4_packet_processor.cc
+++ b/src/dev/amdgpu/pm4_packet_processor.cc
@@ -1021,6 +1021,10 @@ PM4PacketProcessor::serialize(CheckpointOut &cp) const
     Addr offset[num_queues];
     bool processing[num_queues];
     bool ib[num_queues];
+    uint32_t me[num_queues];
+    uint32_t pipe[num_queues];
+    uint32_t queue[num_queues];
+    bool privileged[num_queues];
 
     int i = 0;
     for (auto iter : queues) {
@@ -1040,6 +1044,10 @@ PM4PacketProcessor::serialize(CheckpointOut &cp) const
         offset[i] = q->offset();
         processing[i] = q->processing();
         ib[i] = q->ib();
+        me[i] = q->me();
+        pipe[i] = q->pipe();
+        queue[i] = q->queue();
+        privileged[i] = q->privileged();
         i++;
     }
 
@@ -1055,6 +1063,10 @@ PM4PacketProcessor::serialize(CheckpointOut &cp) const
     SERIALIZE_ARRAY(offset, num_queues);
     SERIALIZE_ARRAY(processing, num_queues);
     SERIALIZE_ARRAY(ib, num_queues);
+    SERIALIZE_ARRAY(me, num_queues);
+    SERIALIZE_ARRAY(pipe, num_queues);
+    SERIALIZE_ARRAY(queue, num_queues);
+    SERIALIZE_ARRAY(privileged, num_queues);
 }
 
 void
@@ -1077,6 +1089,10 @@ PM4PacketProcessor::unserialize(CheckpointIn &cp)
     Addr offset[num_queues];
     bool processing[num_queues];
     bool ib[num_queues];
+    uint32_t me[num_queues];
+    uint32_t pipe[num_queues];
+    uint32_t queue[num_queues];
+    bool privileged[num_queues];
 
     UNSERIALIZE_ARRAY(id, num_queues);
     UNSERIALIZE_ARRAY(mqd_base, num_queues);
@@ -1089,6 +1105,10 @@ PM4PacketProcessor::unserialize(CheckpointIn &cp)
     UNSERIALIZE_ARRAY(offset, num_queues);
     UNSERIALIZE_ARRAY(processing, num_queues);
     UNSERIALIZE_ARRAY(ib, num_queues);
+    UNSERIALIZE_ARRAY(me, num_queues);
+    UNSERIALIZE_ARRAY(pipe, num_queues);
+    UNSERIALIZE_ARRAY(queue, num_queues);
+    UNSERIALIZE_ARRAY(privileged, num_queues);
 
     for (int i = 0; i < num_queues; i++) {
         QueueDesc *mqd = new QueueDesc();
@@ -1100,7 +1120,9 @@ PM4PacketProcessor::unserialize(CheckpointIn &cp)
         mqd->ibBase = ib_base[i];
         mqd->ibRptr = ib_rptr[i];
 
-        newQueue(mqd, offset[i], nullptr, id[i]);
+        PM4MapQueues* pkt = new PM4MapQueues;
+        memset(pkt, 0, sizeof(PM4MapQueues));
+        newQueue(mqd, offset[i], pkt, id[i]);
 
         queues[id[i]]->ib(false);
         queues[id[i]]->wptr(wptr[i]);
@@ -1109,6 +1131,8 @@ PM4PacketProcessor::unserialize(CheckpointIn &cp)
         queues[id[i]]->offset(offset[i]);
         queues[id[i]]->processing(processing[i]);
         queues[id[i]]->ib(ib[i]);
+        queues[id[i]]->setPkt(me[i], pipe[i], queue[i], privileged[i]);
+
         DPRINTF(PM4PacketProcessor, "PM4 queue %d, rptr: %p wptr: %p\n",
                 queues[id[i]]->id(), queues[id[i]]->rptr(),
                 queues[id[i]]->wptr());
diff --git a/src/dev/amdgpu/pm4_queues.hh b/src/dev/amdgpu/pm4_queues.hh
index ddadd6543b..9c99e10ce3 100644
--- a/src/dev/amdgpu/pm4_queues.hh
+++ b/src/dev/amdgpu/pm4_queues.hh
@@ -391,7 +391,7 @@ class PM4Queue
     Addr _offset;
     bool _processing;
     bool _ib;
-    const PM4MapQueues _pkt;
+    PM4MapQueues _pkt;
   public:
     PM4Queue() : _id(0), q(nullptr), _wptr(0), _offset(0), _processing(false),
         _ib(false), _pkt() {}
@@ -486,6 +486,13 @@ class PM4Queue
     uint32_t pipe() { return _pkt.pipe; }
     uint32_t queue() { return _pkt.queueSlot; }
     bool privileged() { return _pkt.queueSel == 0 ? 1 : 0; }
+    PM4MapQueues* getPkt() { return &_pkt; }
+    void setPkt(uint32_t me, uint32_t pipe, uint32_t queue, bool privileged) {
+        _pkt.me = me - 1;
+        _pkt.pipe = pipe;
+        _pkt.queueSlot = queue;
+        _pkt.queueSel = (privileged == 0) ? 1 : 0;
+    }
 
     // Same computation as processMQD. See comment there for details.
     uint64_t size() { return 4UL << ((q->hqd_pq_control & 0x3f) + 1); }

From 8b7e55339a0d1b550bc02f7b8aa49717f148556d Mon Sep 17 00:00:00 2001
From: Vishnu Ramadas <vramadas@wisc.edu>
Date: Mon, 27 Mar 2023 08:37:05 -0500
Subject: [PATCH 302/492] dev-amdgpu: Add GART translations to GPUFS checkpoint

Earlier, the GART entries were not being checkpointed. Therefore, during
checkpoint restore, certain SDMA instances were initialized with
incorrect addresses that led to incorrect behavior. This commit
checkpoints the GART entries and restores them.

Change-Id: I5464a39ed431e482ff7519b89bd5b664fd992ccf
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/69299
Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com>
Reviewed-by: Matthew Poremba <matthew.poremba@amd.com>
Maintainer: Matt Sinclair <mattdsinclair@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/dev/amdgpu/amdgpu_device.cc |  2 ++
 src/dev/amdgpu/amdgpu_vm.cc     | 27 +++++++++++++++++++++++++++
 2 files changed, 29 insertions(+)

diff --git a/src/dev/amdgpu/amdgpu_device.cc b/src/dev/amdgpu/amdgpu_device.cc
index 2b58b200ea..cb180b6dc5 100644
--- a/src/dev/amdgpu/amdgpu_device.cc
+++ b/src/dev/amdgpu/amdgpu_device.cc
@@ -536,6 +536,7 @@ AMDGPUDevice::serialize(CheckpointOut &cp) const
 
     // Serialize the device memory
     deviceMem.serializeSection(cp, "deviceMem");
+    gpuvm.serializeSection(cp, "GPUVM");
 }
 
 void
@@ -597,6 +598,7 @@ AMDGPUDevice::unserialize(CheckpointIn &cp)
 
     // Unserialize the device memory
     deviceMem.unserializeSection(cp, "deviceMem");
+    gpuvm.unserializeSection(cp, "GPUVM");
 }
 
 uint16_t
diff --git a/src/dev/amdgpu/amdgpu_vm.cc b/src/dev/amdgpu/amdgpu_vm.cc
index 7a30917b21..5a13ac9ba0 100644
--- a/src/dev/amdgpu/amdgpu_vm.cc
+++ b/src/dev/amdgpu/amdgpu_vm.cc
@@ -186,6 +186,7 @@ AMDGPUVM::serialize(CheckpointOut &cp) const
     Addr vm0PTBase = vmContext0.ptBase;
     Addr vm0PTStart = vmContext0.ptStart;
     Addr vm0PTEnd = vmContext0.ptEnd;
+    uint64_t gartTableSize;
     SERIALIZE_SCALAR(vm0PTBase);
     SERIALIZE_SCALAR(vm0PTStart);
     SERIALIZE_SCALAR(vm0PTEnd);
@@ -213,6 +214,21 @@ AMDGPUVM::serialize(CheckpointOut &cp) const
     SERIALIZE_ARRAY(ptBase, AMDGPU_VM_COUNT);
     SERIALIZE_ARRAY(ptStart, AMDGPU_VM_COUNT);
     SERIALIZE_ARRAY(ptEnd, AMDGPU_VM_COUNT);
+
+    gartTableSize = gartTable.size();
+    uint64_t* gartTableKey = new uint64_t[gartTableSize];
+    uint64_t* gartTableValue = new uint64_t[gartTableSize];
+    SERIALIZE_SCALAR(gartTableSize);
+    int i = 0;
+    for (auto it = gartTable.begin(); it != gartTable.end(); ++it) {
+        gartTableKey[i] = it->first;
+        gartTableValue[i] = it->second;
+        i++;
+    }
+    SERIALIZE_ARRAY(gartTableKey, gartTableSize);
+    SERIALIZE_ARRAY(gartTableValue, gartTableSize);
+    delete[] gartTableKey;
+    delete[] gartTableValue;
 }
 
 void
@@ -222,6 +238,7 @@ AMDGPUVM::unserialize(CheckpointIn &cp)
     Addr vm0PTBase;
     Addr vm0PTStart;
     Addr vm0PTEnd;
+    uint64_t gartTableSize, *gartTableKey, *gartTableValue;
     UNSERIALIZE_SCALAR(vm0PTBase);
     UNSERIALIZE_SCALAR(vm0PTStart);
     UNSERIALIZE_SCALAR(vm0PTEnd);
@@ -252,6 +269,16 @@ AMDGPUVM::unserialize(CheckpointIn &cp)
         vmContexts[i].ptStart = ptStart[i];
         vmContexts[i].ptEnd = ptEnd[i];
     }
+    UNSERIALIZE_SCALAR(gartTableSize);
+    gartTableKey = new uint64_t[gartTableSize];
+    gartTableValue = new uint64_t[gartTableSize];
+    UNSERIALIZE_ARRAY(gartTableKey, gartTableSize);
+    UNSERIALIZE_ARRAY(gartTableValue, gartTableSize);
+    for (uint64_t i = 0; i < gartTableSize; i++) {
+        gartTable[gartTableKey[i]] = gartTableValue[i];
+    }
+    delete[] gartTableKey;
+    delete[] gartTableValue;
 }
 
 void

From 59ea30913f07ca3014ad83d685bbd398bb7521fa Mon Sep 17 00:00:00 2001
From: "Bobby R. Bruce" <bbruce@ucdavis.edu>
Date: Thu, 9 Mar 2023 14:50:52 -0800
Subject: [PATCH 303/492] arch-sparc: Rewrite unused array in tgt_stat64 for
 mac

On Mac (tested on Mac Mini M2 Pro) the src/arch/sparc/linux/linux.hh was
uncompilable due to `int64_t __unused[3]` not working with the Mac
GLIB's `__unused` definition. This was therefore rewritten.

Change-Id: I8094a54ccf8dedb8e63ef05770e48eef7012262c
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/68838
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Melissa Jost <mkjost@ucdavis.edu>
---
 src/arch/sparc/linux/linux.hh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/arch/sparc/linux/linux.hh b/src/arch/sparc/linux/linux.hh
index a5d88abe6a..5913ddb737 100644
--- a/src/arch/sparc/linux/linux.hh
+++ b/src/arch/sparc/linux/linux.hh
@@ -89,7 +89,7 @@ class SparcLinux : public Linux, public OpenFlagTable<SparcLinux>
         uint64_t st_mtime_nsec;
         uint64_t st_ctimeX;
         uint64_t st_ctime_nsec;
-        int64_t __unused[3];
+        int64_t __unused4[3];
     };
 
     // SPARC receives weird subsignals for several of its signals. If you

From f7f5b6820250f0beffe6da506a5847335cdaeeca Mon Sep 17 00:00:00 2001
From: HJikram <humzajahangirikram@gmail.com>
Date: Wed, 29 Mar 2023 14:00:58 +0500
Subject: [PATCH 304/492] stdlib: Small fix in stdlib spec2006 script

The call to processor switch from KVM to TIMING was
removed in an earlier commit. This change fixes that.
Also, get_roi_ticks() doesn't work because spec2006
does not have work_begin and work_exit annotations.
This change uses get_tick_stopwatch() to calculate
the roi ticks.

Change-Id: I55efe28ebd686cb4e6c88a528533127fb73c88ed
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/69357
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 .../example/gem5_library/x86-spec-cpu2006-benchmarks.py    | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/configs/example/gem5_library/x86-spec-cpu2006-benchmarks.py b/configs/example/gem5_library/x86-spec-cpu2006-benchmarks.py
index a681ecadcb..e7a9e824a6 100644
--- a/configs/example/gem5_library/x86-spec-cpu2006-benchmarks.py
+++ b/configs/example/gem5_library/x86-spec-cpu2006-benchmarks.py
@@ -272,6 +272,7 @@ def handle_exit():
     print("Done bootling Linux")
     print("Resetting stats at the start of ROI!")
     m5.stats.reset()
+    processor.switch()
     yield False  # E.g., continue the simulation.
     print("Dump stats at the end of the ROI!")
     m5.stats.dump()
@@ -304,7 +305,11 @@ print("All simulation events were successful.")
 
 print("Performance statistics:")
 
-print("Simulated time: " + ((str(simulator.get_roi_ticks()[0]))))
+roi_begin_ticks = simulator.get_tick_stopwatch()[0][1]
+roi_end_ticks = simulator.get_tick_stopwatch()[0][1]
+
+print("roi simulated ticks: " + str(roi_end_ticks - roi_begin_ticks))
+
 print(
     "Ran a total of", simulator.get_current_tick() / 1e12, "simulated seconds"
 )

From ea623eb6e5d20dca39d172fea5aa4b1684d3caa7 Mon Sep 17 00:00:00 2001
From: Matt Sinclair <mattdsinclair.wisc@gmail.com>
Date: Tue, 4 Apr 2023 14:26:47 -0500
Subject: [PATCH 305/492] mem-ruby: fix whitespacing errors in RubySystem

These errors cause other commits to fail pre-commit

Change-Id: I379d2d7c73f88d0bb35de5aaa7d8cb70a83ee1dd
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/69397
Tested-by: kokoro <noreply+kokoro@google.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
---
 src/mem/ruby/system/RubySystem.cc | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/src/mem/ruby/system/RubySystem.cc b/src/mem/ruby/system/RubySystem.cc
index 91c4bc3903..5a81513720 100644
--- a/src/mem/ruby/system/RubySystem.cc
+++ b/src/mem/ruby/system/RubySystem.cc
@@ -310,23 +310,24 @@ RubySystem::writeCompressedTrace(uint8_t *raw_data, std::string filename,
 void
 RubySystem::serialize(CheckpointOut &cp) const
 {
-    // Store the cache-block size, so we are able to restore on systems with a
-    // different cache-block size. CacheRecorder depends on the correct
-    // cache-block size upon unserializing.
+    // Store the cache-block size, so we are able to restore on systems
+    // with a different cache-block size. CacheRecorder depends on the
+    // correct cache-block size upon unserializing.
     uint64_t block_size_bytes = getBlockSizeBytes();
     SERIALIZE_SCALAR(block_size_bytes);
 
-    // Check that there's a valid trace to use.  If not, then memory won't be
-    // up-to-date and the simulation will probably fail when restoring from the
-    // checkpoint.
+    // Check that there's a valid trace to use.  If not, then memory won't
+    // be up-to-date and the simulation will probably fail when restoring
+    // from the checkpoint.
     if (m_cache_recorder == NULL) {
-        fatal("Call memWriteback() before serialize() to create ruby trace");
+        fatal("Call memWriteback() before serialize() to create"
+                "ruby trace");
     }
 
     // Aggregate the trace entries together into a single array
     uint8_t *raw_data = new uint8_t[4096];
-    uint64_t cache_trace_size = m_cache_recorder->aggregateRecords(&raw_data,
-                                                                 4096);
+    uint64_t cache_trace_size = m_cache_recorder->aggregateRecords(
+                                                        &raw_data, 4096);
     std::string cache_trace_file = name() + ".cache.gz";
     writeCompressedTrace(raw_data, cache_trace_file, cache_trace_size);
 

From c00e3b2570c6f42b1cfbdda0aa2d453070aa1cde Mon Sep 17 00:00:00 2001
From: Gabe Black <gabeblack@google.com>
Date: Sat, 18 Mar 2023 09:13:56 -0700
Subject: [PATCH 306/492] base: Abstract the AF_INET-ness out of ListenSocket.

Put them into a subclass called ListenSocketInet.

Change-Id: I035621463a7f799c1d36a500ed933dc056238e5e
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/69163
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
Maintainer: Gabe Black <gabeblack@google.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Jui-min Lee <fcrh@google.com>
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
---
 src/base/socket.cc      | 68 +++++++++++++++++++++++------------------
 src/base/socket.hh      | 50 +++++++++++++++++++++---------
 src/base/socket.test.cc |  4 +--
 3 files changed, 75 insertions(+), 47 deletions(-)

diff --git a/src/base/socket.cc b/src/base/socket.cc
index 5fb8492d50..13962d4b5c 100644
--- a/src/base/socket.cc
+++ b/src/base/socket.cc
@@ -173,11 +173,7 @@ ListenSocket::acceptCloexec(int sockfd, struct sockaddr *addr,
 //
 //
 
-ListenSocket::ListenSocket(const std::string &_name, int port)
-    : Named(_name), listening(false), fd(-1), _port(port)
-{}
-
-ListenSocket::ListenSocket() : ListenSocket("<unnammed>", -1) {}
+ListenSocket::ListenSocket(const std::string &_name) : Named(_name) {}
 
 ListenSocket::~ListenSocket()
 {
@@ -185,9 +181,41 @@ ListenSocket::~ListenSocket()
         close(fd);
 }
 
+// Open a connection.  Accept will block, so if you don't want it to,
+// make sure a connection is ready before you call accept.
+int
+ListenSocket::accept()
+{
+    struct sockaddr_in sockaddr;
+    socklen_t slen = sizeof (sockaddr);
+    int sfd = acceptCloexec(fd, (struct sockaddr *)&sockaddr, &slen);
+    if (sfd == -1)
+        return -1;
+
+    return sfd;
+}
+
+ListenSocketInet::ListenSocketInet(const std::string &_name, int port)
+    : ListenSocket(_name), _port(port)
+{}
+
+int
+ListenSocketInet::accept()
+{
+    int sfd = ListenSocket::accept();
+    if (sfd == -1)
+        return -1;
+
+    int i = 1;
+    int ret = ::setsockopt(sfd, IPPROTO_TCP, TCP_NODELAY, &i, sizeof(i));
+    warn_if(ret < 0, "ListenSocket(accept): setsockopt() TCP_NODELAY failed!");
+
+    return sfd;
+}
+
 // Create a socket and configure it for listening
 bool
-ListenSocket::listen(int port)
+ListenSocketInet::listen(int port)
 {
     panic_if(listening, "Socket already listening!");
 
@@ -228,13 +256,12 @@ ListenSocket::listen(int port)
         return false;
     }
 
-    listening = true;
-    anyListening = true;
+    setListening();
     return true;
 }
 
 void
-ListenSocket::listen()
+ListenSocketInet::listen()
 {
     while (!listen(_port)) {
         _port++;
@@ -245,35 +272,16 @@ ListenSocket::listen()
 }
 
 void
-ListenSocket::output(std::ostream &os) const
+ListenSocketInet::output(std::ostream &os) const
 {
     os << "port " << _port;
 }
 
-
-// Open a connection.  Accept will block, so if you don't want it to,
-// make sure a connection is ready before you call accept.
-int
-ListenSocket::accept()
-{
-    struct sockaddr_in sockaddr;
-    socklen_t slen = sizeof (sockaddr);
-    int sfd = acceptCloexec(fd, (struct sockaddr *)&sockaddr, &slen);
-    if (sfd == -1)
-        return -1;
-
-    int i = 1;
-    int ret = ::setsockopt(sfd, IPPROTO_TCP, TCP_NODELAY, &i, sizeof(i));
-    warn_if(ret < 0, "ListenSocket(accept): setsockopt() TCP_NODELAY failed!");
-
-    return sfd;
-}
-
 ListenSocketConfig
 listenSocketInetConfig(int port)
 {
     return ListenSocketConfig([port](const std::string &name) {
-        return std::make_unique<ListenSocket>(name, port);
+        return std::make_unique<ListenSocketInet>(name, port);
     });
 }
 
diff --git a/src/base/socket.hh b/src/base/socket.hh
index 638ce40f57..761312b291 100644
--- a/src/base/socket.hh
+++ b/src/base/socket.hh
@@ -93,31 +93,34 @@ class ListenSocket : public Named
     static void loopbackOnly();
 
   protected:
-    bool listening;
-    int fd;
-    int _port;
+    bool listening = false;
+    int fd = -1;
+
+    void
+    setListening()
+    {
+        listening = true;
+        anyListening = true;
+    }
 
     /*
      * cleanup resets the static variables back to their default values.
      */
     static void cleanup();
 
-    virtual bool listen(int port);
+    ListenSocket(const std::string &_name);
 
   public:
     /**
      * @ingroup api_socket
      * @{
      */
-    ListenSocket(const std::string &_name, int port);
-    ListenSocket();
     virtual ~ListenSocket();
 
     virtual int accept();
+    virtual void listen() = 0;
 
-    virtual void listen();
-
-    virtual void output(std::ostream &os) const;
+    virtual void output(std::ostream &os) const = 0;
 
     int getfd() const { return fd; }
     bool islistening() const { return listening; }
@@ -130,6 +133,13 @@ class ListenSocket : public Named
     /** @} */ // end of api_socket
 };
 
+inline static std::ostream &
+operator << (std::ostream &os, const ListenSocket &socket)
+{
+    socket.output(os);
+    return os;
+}
+
 using ListenSocketPtr = std::unique_ptr<ListenSocket>;
 
 class ListenSocketConfig
@@ -155,14 +165,24 @@ class ListenSocketConfig
 
 static inline ListenSocketConfig listenSocketEmptyConfig() { return {}; }
 
-ListenSocketConfig listenSocketInetConfig(int port);
+// AF_INET based sockets.
 
-inline static std::ostream &
-operator << (std::ostream &os, const ListenSocket &socket)
+class ListenSocketInet : public ListenSocket
 {
-    socket.output(os);
-    return os;
-}
+  protected:
+    int _port;
+
+    virtual bool listen(int port);
+
+  public:
+    ListenSocketInet(const std::string &_name, int port);
+
+    int accept() override;
+    void listen() override;
+    void output(std::ostream &os) const override;
+};
+
+ListenSocketConfig listenSocketInetConfig(int port);
 
 } // namespace gem5
 
diff --git a/src/base/socket.test.cc b/src/base/socket.test.cc
index 8e1c25b4b1..0f0de54b6c 100644
--- a/src/base/socket.test.cc
+++ b/src/base/socket.test.cc
@@ -118,10 +118,10 @@ TEST(UnixSocketAddrTest, TruncatedFileBasedSocket)
     EXPECT_EQ(truncated_addr, sock_addr.formattedPath);
 }
 
-class MockListenSocket : public ListenSocket
+class MockListenSocket : public ListenSocketInet
 {
   public:
-    MockListenSocket(int port) : ListenSocket("mock", port) {}
+    MockListenSocket(int port) : ListenSocketInet("mock", port) {}
     /*
      * This mock Listen Socket is used to ensure the static variables are reset
      * back to their default values after deconstruction (i.e., after a test

From 5e096f5b5d1b6a89ec6343092189b186a69d8f48 Mon Sep 17 00:00:00 2001
From: Alex Richardson <alexrichardson@google.com>
Date: Thu, 23 Feb 2023 11:13:26 +0000
Subject: [PATCH 307/492] scons: allow building without duplicating source
 files

This adds a new scons flag --no-duplicate-sources to build without
linking source files to the build directory.

I find this very helpful when using CLion, since I can now generate a
compilation database using
`bear scons build/ALL/gem5.debug --no-duplicate-sources` and CLion will
now correctly semantically analyze all the files inside src/.
It also ensures that clicking on a build warning/error now opens the
real source file rather than a symlink.

This is not enabled by default since it's possible that certain use
cases are not working correctly, but the basic testing I've done so
far appears to work just fine.

It appears that with this change the `<root>/src` directory is no longer
added to `PYTHONPATH` when running `tests/main.py`, so this change
depends on https://gem5-review.git.corp.google.com/c/public/gem5/+/68757

Change-Id: Iddc9bf9c8211e68e5432c0a07f5c95f427c1ca16
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/68518
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
---
 SConstruct                           | 10 ++++--
 ext/drampower/SConscript             |  2 +-
 ext/dramsim2/SConscript              |  2 +-
 ext/dramsim3/SConscript              |  4 +--
 ext/fputils/SConscript               |  2 +-
 ext/iostream3/SConscript             |  2 +-
 ext/libelf/SConscript                | 15 +++++----
 ext/libfdt/SConscript                |  2 +-
 ext/nomali/SConscript                |  2 +-
 ext/softfloat/SConscript             |  2 +-
 ext/systemc/SConscript               | 46 +++++++++++++---------------
 site_scons/gem5_scons/sources.py     |  4 +--
 src/SConscript                       | 41 ++++++++++++++-----------
 src/mem/slicc/symbols/SymbolTable.py |  2 +-
 util/m5/SConstruct                   | 11 ++++---
 util/statetrace/SConstruct           |  3 +-
 util/tlm/SConstruct                  | 14 ++++++---
 17 files changed, 91 insertions(+), 73 deletions(-)

diff --git a/SConstruct b/SConstruct
index 6abbb51e00..7d6f40624d 100755
--- a/SConstruct
+++ b/SConstruct
@@ -145,6 +145,9 @@ AddOption('--gprof', action='store_true',
           help='Enable support for the gprof profiler')
 AddOption('--pprof', action='store_true',
           help='Enable support for the pprof profiler')
+AddOption('--no-duplicate-sources', action='store_false',
+          dest='duplicate_sources',
+          help='Do not create symlinks to sources in the build directory')
 
 # Inject the built_tools directory into the python path.
 sys.path[1:1] = [ Dir('#build_tools').abspath ]
@@ -264,6 +267,7 @@ main.Append(CPPPATH=[Dir('ext')])
 
 # Add shared top-level headers
 main.Prepend(CPPPATH=Dir('include'))
+main.Prepend(CPPPATH=Dir('src'))
 
 
 ########################################################################
@@ -774,11 +778,13 @@ Build variables for {dir}:
             build_dir = os.path.relpath(root, ext_dir)
             SConscript(os.path.join(root, 'SConscript'),
                        variant_dir=os.path.join(variant_ext, build_dir),
-                       exports=exports)
+                       exports=exports,
+                       duplicate=GetOption('duplicate_sources'))
 
     # The src/SConscript file sets up the build rules in 'env' according
     # to the configured variables.  It returns a list of environments,
     # one for each variant build (debug, opt, etc.)
-    SConscript('src/SConscript', variant_dir=variant_path, exports=exports)
+    SConscript('src/SConscript', variant_dir=variant_path, exports=exports,
+               duplicate=GetOption('duplicate_sources'))
 
 atexit.register(summarize_warnings)
diff --git a/ext/drampower/SConscript b/ext/drampower/SConscript
index 870d0504c7..38acbf4d06 100644
--- a/ext/drampower/SConscript
+++ b/ext/drampower/SConscript
@@ -41,7 +41,7 @@ import os
 
 Import('env')
 
-env.Prepend(CPPPATH=Dir('./src'))
+env.Prepend(CPPPATH=Dir('./src').srcnode())
 
 # Add the appropriate files for the library
 drampower_files = []
diff --git a/ext/dramsim2/SConscript b/ext/dramsim2/SConscript
index 7eb178d626..c2965384d5 100644
--- a/ext/dramsim2/SConscript
+++ b/ext/dramsim2/SConscript
@@ -85,6 +85,6 @@ dramenv.Append(CCFLAGS=['-DNO_STORAGE'])
 
 dramenv.Library('dramsim2', [dramenv.SharedObject(f) for f in dram_files])
 
-env.Prepend(CPPPATH=Dir('.'))
+env.Prepend(CPPPATH=Dir('.').srcnode())
 env.Append(LIBS=['dramsim2'])
 env.Prepend(LIBPATH=[Dir('.')])
diff --git a/ext/dramsim3/SConscript b/ext/dramsim3/SConscript
index b7178161f7..6be9690db2 100644
--- a/ext/dramsim3/SConscript
+++ b/ext/dramsim3/SConscript
@@ -56,12 +56,12 @@ dramsim_path = os.path.join(Dir('#').abspath, 'ext/dramsim3/DRAMsim3/')
 
 if thermal:
     superlu_path = os.path.join(dramsim_path, 'ext/SuperLU_MT_3.1/lib')
-    env.Prepend(CPPPATH=Dir('.'))
+    env.Prepend(CPPPATH=Dir('.').srcnode())
     env.Append(LIBS=['dramsim3', 'superlu_mt_OPENMP', 'm', 'f77blas',
                       'atlas', 'gomp'],
                 LIBPATH=[dramsim_path, superlu_path])
 else:
-    env.Prepend(CPPPATH=Dir('.'))
+    env.Prepend(CPPPATH=Dir('.').srcnode())
     # a littel hacky but can get a shared library working
     env.Append(LIBS=['dramsim3', 'gomp'],
                 LIBPATH=[dramsim_path],  # compile-time lookup
diff --git a/ext/fputils/SConscript b/ext/fputils/SConscript
index 6a8e44f4af..bc158c2936 100644
--- a/ext/fputils/SConscript
+++ b/ext/fputils/SConscript
@@ -30,7 +30,7 @@
 
 Import('env')
 
-env.Prepend(CPPPATH=Dir('./include'))
+env.Prepend(CPPPATH=Dir('./include').srcnode())
 
 fpenv = env.Clone()
 
diff --git a/ext/iostream3/SConscript b/ext/iostream3/SConscript
index df0b2132f2..3b4e93701d 100644
--- a/ext/iostream3/SConscript
+++ b/ext/iostream3/SConscript
@@ -41,6 +41,6 @@ Import('env')
 
 env.Library('iostream3', [env.SharedObject('zfstream.cc')])
 
-env.Prepend(CPPPATH=Dir('.'))
+env.Prepend(CPPPATH=Dir('.').srcnode())
 env.Append(LIBS=['iostream3'])
 env.Prepend(LIBPATH=[Dir('.')])
diff --git a/ext/libelf/SConscript b/ext/libelf/SConscript
index 535e216ddf..d6f8234bda 100644
--- a/ext/libelf/SConscript
+++ b/ext/libelf/SConscript
@@ -127,16 +127,19 @@ if not SCons.Tool.m4.exists(m4env):
 # Setup m4 tool
 m4env.Tool('m4')
 
-m4env.Append(M4FLAGS=['-DSRCDIR=%s' % Dir('.').path])
+m4env.Append(M4FLAGS=['-DSRCDIR=%s' % Dir('.').srcnode().path])
 m4env['M4COM'] = '$M4 $M4FLAGS $SOURCES > $TARGET'
 m4env.M4(target=File('libelf_convert.c'),
-         source=[File('elf_types.m4'), File('libelf_convert.m4')])
+         source=[File('elf_types.m4').srcnode(),
+                 File('libelf_convert.m4').srcnode()])
 m4env.M4(target=File('libelf_fsize.c'),
-         source=[File('elf_types.m4'), File('libelf_fsize.m4')])
+         source=[File('elf_types.m4').srcnode(),
+                 File('libelf_fsize.m4').srcnode()])
 m4env.M4(target=File('libelf_msize.c'),
-         source=[File('elf_types.m4'), File('libelf_msize.m4')])
+         source=[File('elf_types.m4').srcnode(),
+                 File('libelf_msize.m4').srcnode()])
 
-m4env.Append(CPPPATH=Dir('.'))
+m4env.Append(CPPPATH=[Dir('.'), Dir('.').srcnode()])
 
 # Build libelf as a static library with PIC code so it can be linked
 # into either m5 or the library
@@ -146,6 +149,6 @@ m4env.Library('elf', [m4env.SharedObject(f) for f in elf_files])
 m4env.Command(File('native-elf-format.h'), File('native-elf-format'),
               '${SOURCE} > ${TARGET}')
 
-env.Prepend(CPPPATH=Dir('.'))
+env.Prepend(CPPPATH=Dir('.').srcnode())
 env.Append(LIBS=[File('libelf.a')])
 env.Prepend(LIBPATH=[Dir('.')])
diff --git a/ext/libfdt/SConscript b/ext/libfdt/SConscript
index 64573b78c8..a509bbebad 100644
--- a/ext/libfdt/SConscript
+++ b/ext/libfdt/SConscript
@@ -44,6 +44,6 @@ FdtFile('fdt_empty_tree.c')
 FdtFile('fdt_strerror.c')
 
 env.Library('fdt', [env.SharedObject(f) for f in fdt_files])
-env.Prepend(CPPPATH=Dir('.'))
+env.Prepend(CPPPATH=Dir('.').srcnode())
 env.Append(LIBS=['fdt'])
 env.Prepend(LIBPATH=[Dir('.')])
diff --git a/ext/nomali/SConscript b/ext/nomali/SConscript
index b156ab0b3e..bcc5cfbd7b 100644
--- a/ext/nomali/SConscript
+++ b/ext/nomali/SConscript
@@ -39,7 +39,7 @@
 
 Import('env')
 
-env.Prepend(CPPPATH=Dir('./include'))
+env.Prepend(CPPPATH=Dir('include').srcnode())
 
 nomali = env.Clone()
 nomali.Append(CCFLAGS=['-Wno-ignored-qualifiers'])
diff --git a/ext/softfloat/SConscript b/ext/softfloat/SConscript
index b4a8d514f5..420a71e3f9 100644
--- a/ext/softfloat/SConscript
+++ b/ext/softfloat/SConscript
@@ -420,6 +420,6 @@ else:
 
 sf_env.Library('softfloat', [sf_env.SharedObject(f) for f in softfloat_files])
 
-env.Prepend(CPPPATH=Dir('./'))
+env.Prepend(CPPPATH=Dir('.').srcnode())
 env.Append(LIBS=['softfloat'])
 env.Prepend(LIBPATH=[Dir('.')])
diff --git a/ext/systemc/SConscript b/ext/systemc/SConscript
index 89ef9202bd..5248fc32d9 100644
--- a/ext/systemc/SConscript
+++ b/ext/systemc/SConscript
@@ -25,6 +25,7 @@
 
 import os
 from m5.util.terminal import get_termcap
+import gem5_scons
 
 Import('env')
 systemc = env.Clone()
@@ -32,36 +33,30 @@ systemc = env.Clone()
 build_root = Dir('.').abspath
 src_root = Dir('.').srcdir.abspath
 
-systemc.Prepend(CPPPATH=Dir('./src'))
+systemc.Prepend(CPPPATH=Dir('./src').srcnode())
 systemc.Prepend(CPATH=Dir('./src'))
 
 systemc.Prepend(CXXFLAGS=['-DSC_INCLUDE_FX'])
 systemc.Prepend(CFLAGS=['-DSC_INCLUDE_FX'])
 
-conf = Configure(systemc,
-                 conf_dir = os.path.join(build_root, '.scons_config'),
-                 log_file = os.path.join(build_root, 'scons_config.log'))
-systemc = conf.env
+with gem5_scons.Configure(systemc) as conf:
+    if systemc['PLATFORM'] == 'darwin':
+        systemc.Append(LINKFLAGS=['-undefined', 'dynamic_lookup'])
 
-if systemc['PLATFORM'] == 'darwin':
-    systemc.Append(LINKFLAGS=['-undefined', 'dynamic_lookup'])
-
-arch = None
-systemc['COROUTINE_LIB'] = ''
-if conf.CheckDeclaration('__i386__'):
-    systemc['COROUTINE_LIB'] = 'qt'
-    systemc['QT_ARCH'] = 'i386'
-    arch = 'i386'
-elif conf.CheckDeclaration('__x86_64__'):
-    systemc['COROUTINE_LIB'] = 'qt'
-    systemc['QT_ARCH'] = 'iX86_64'
-    arch = 'x86_64'
-else:
-    termcap = get_termcap(GetOption('use_colors'))
-    print(termcap.Yellow + termcap.Bold +
-          "Warning: Unrecognized architecture for systemc." + termcap.Normal)
-
-systemc = conf.Finish()
+    arch = None
+    systemc['COROUTINE_LIB'] = ''
+    if conf.CheckDeclaration('__i386__'):
+        systemc['COROUTINE_LIB'] = 'qt'
+        systemc['QT_ARCH'] = 'i386'
+        arch = 'i386'
+    elif conf.CheckDeclaration('__x86_64__'):
+        systemc['COROUTINE_LIB'] = 'qt'
+        systemc['QT_ARCH'] = 'iX86_64'
+        arch = 'x86_64'
+    else:
+        termcap = get_termcap(GetOption('use_colors'))
+        print(termcap.Yellow + termcap.Bold +
+              "Warning: Unrecognized architecture for systemc." + termcap.Normal)
 
 if systemc['COROUTINE_LIB'] == 'pthreads':
     systemc.Prepend(CXXFLAGS=['-DSC_USE_PTHREADS'])
@@ -77,7 +72,8 @@ if arch:
             build_dir = os.path.relpath(root, src_root)
             systemc.SConscript(os.path.join(root, 'SConscript.sc'),
                                exports=['systemc', 'SystemCSource'],
-                               variant_dir=os.path.join(build_root, build_dir))
+                               variant_dir=os.path.join(build_root, build_dir),
+                               duplicate=GetOption('duplicate_sources'))
 
     systemc.Library('libsystemc', systemc_files)
     systemc.SharedLibrary('libsystemc', systemc_files)
diff --git a/site_scons/gem5_scons/sources.py b/site_scons/gem5_scons/sources.py
index 548e9386ea..54aeb24de1 100644
--- a/site_scons/gem5_scons/sources.py
+++ b/site_scons/gem5_scons/sources.py
@@ -261,13 +261,13 @@ class SourceFile(SourceItem):
         if self.append:
             env = env.Clone()
             env.Append(**self.append)
-        return env.StaticObject(self.tnode)
+        return env.StaticObject(self.tnode.abspath)
 
     def shared(self, env):
         if self.append:
             env = env.Clone()
             env.Append(**self.append)
-        return env.SharedObject(self.tnode)
+        return env.SharedObject(self.tnode.abspath)
 
 
 __all__ = [
diff --git a/src/SConscript b/src/SConscript
index 4e9048ca04..13f08d2f5a 100644
--- a/src/SConscript
+++ b/src/SConscript
@@ -86,10 +86,10 @@ build_tools = Dir('#build_tools')
 # as gem5. This is in an unorthodox location to avoid building it for every
 # variant.
 gem5py_env = gem5py_env.Clone()
-gem5py = gem5py_env.File('gem5py')
-gem5py_m5 = gem5py_env.File('gem5py_m5')
-gem5py_env['GEM5PY'] = gem5py
-gem5py_env['GEM5PY_M5'] = gem5py_m5
+gem5py = gem5py_env.File('gem5py', Dir(gem5py_env['BUILDDIR']))
+gem5py_m5 = gem5py_env.File('gem5py_m5', Dir(gem5py_env['BUILDDIR']))
+gem5py_env['GEM5PY'] = gem5py.get_abspath()
+gem5py_env['GEM5PY_M5'] = gem5py_m5.get_abspath()
 gem5py_env['OBJSUFFIX'] = '.pyo'
 # Inject build_tools into PYTHONPATH for when we run gem5py.
 pythonpath = gem5py_env['ENV'].get('PYTHONPATH', '').split(':')
@@ -121,7 +121,7 @@ class PySource(SourceFile):
 
         self.modpath = modpath
 
-        cpp = File(self.filename + '.cc')
+        cpp = self.tnode.target_from_source('', '.py.cc').get_abspath()
 
         overrides = {
             'PYSOURCE_MODPATH': modpath,
@@ -169,12 +169,13 @@ class SimObject(PySource):
                 return ' '.join(list('"${%s}"' % arg for arg in all_args))
 
             # Params header.
+            params_hh = build_dir.File(f'params/{simobj}.hh').get_abspath()
             gem5py_env.Command([ "${PARAMS_HH}" ], srcs,
                     MakeAction(cmdline('PARAMS_HH'), Transform("SO Param", 2)),
                     MODULE=module,
                     SIMOBJ=simobj,
                     PYSCRIPT=build_tools.File('sim_object_param_struct_hh.py'),
-                    PARAMS_HH=build_dir.File(f'params/{simobj}.hh'))
+                    PARAMS_HH=params_hh)
 
             # Params cc.
             cc_file = build_dir.File(f'python/_m5/param_{simobj}.cc')
@@ -184,18 +185,19 @@ class SimObject(PySource):
                     PYSCRIPT=build_tools.File('sim_object_param_struct_cc.py'),
                     MODULE=module,
                     SIMOBJ=simobj,
-                    PARAMS_CC=cc_file,
+                    PARAMS_CC=cc_file.get_abspath(),
                     USE_PYTHON=env['USE_PYTHON'])
-            Source(cc_file, tags=self.tags,
+            Source(cc_file.get_abspath(), tags=self.tags,
                    add_tags=('python' if env['USE_PYTHON'] else None))
 
             # CXX config header.
+            config_hh = build_dir.File(f'cxx_config/{simobj}.hh').get_abspath()
             gem5py_env.Command([ "${CXXCONFIG_HH}" ], srcs,
                     MakeAction(cmdline('CXXCONFIG_HH'),
                         Transform("CXXCPRHH", 2)),
                     PYSCRIPT=build_tools.File('cxx_config_hh.py'),
                     MODULE=module,
-                    CXXCONFIG_HH=build_dir.File(f'cxx_config/{simobj}.hh'))
+                    CXXCONFIG_HH=config_hh)
 
             # CXX config cc.
             cc_file=build_dir.File(f'cxx_config/{simobj}.cc')
@@ -204,9 +206,9 @@ class SimObject(PySource):
                         Transform("CXXCPRCC", 2)),
                     PYSCRIPT=build_tools.File('cxx_config_cc.py'),
                     MODULE=module,
-                    CXXCONFIG_CC=cc_file)
+                    CXXCONFIG_CC=cc_file.get_abspath())
             if GetOption('with_cxx_config'):
-                Source(cc_file, tags=self.tags)
+                Source(cc_file.get_abspath(), tags=self.tags)
 
         # C++ versions of enum params.
         for enum in enums:
@@ -218,7 +220,7 @@ class SimObject(PySource):
                         Transform("ENUMDECL", 2)),
                     MODULE=module,
                     ENUM=enum,
-                    ENUM_HH=build_dir.File(f'enums/{enum}.hh'),
+                    ENUM_HH=build_dir.File(f'enums/{enum}.hh').get_abspath(),
                     ENUMHH_PY=build_tools.File('enum_hh.py'))
             cc_file = build_dir.File(f'enums/{enum}.cc')
             gem5py_env.Command([ "${ENUM_CC}" ],
@@ -229,10 +231,10 @@ class SimObject(PySource):
                         Transform("ENUM STR", 2)),
                     MODULE=module,
                     ENUM=enum,
-                    ENUM_CC=cc_file,
+                    ENUM_CC=cc_file.get_abspath(),
                     ENUMCC_PY=build_tools.File('enum_cc.py'),
                     USE_PYTHON=env['USE_PYTHON'])
-            Source(cc_file, tags=self.tags,
+            Source(cc_file.get_abspath(), tags=self.tags,
                    add_tags=('python' if env['USE_PYTHON'] else None))
 
 # This regular expression is simplistic and assumes that the import takes up
@@ -415,8 +417,9 @@ class Executable(TopLevelBase):
             cmd = 'cp $SOURCE $TARGET; strip $TARGET'
         else:
             cmd = 'strip $SOURCE -o $TARGET'
-        stripped = env.Command(str(executable) + '.stripped',
-                executable, MakeAction(cmd, Transform("STRIP")))[0]
+        stripped = env.Command(executable.abspath + '.stripped',
+                               executable.abspath,
+                               MakeAction(cmd, Transform("STRIP")))[0]
 
         return [executable, stripped]
 
@@ -550,7 +553,8 @@ for root, dirs, files in os.walk(base_dir, topdown=True):
 
     if 'SConscript' in files:
         build_dir = os.path.join(env['BUILDDIR'], root[len(base_dir) + 1:])
-        SConscript(os.path.join(root, 'SConscript'), variant_dir=build_dir)
+        SConscript(os.path.join(root, 'SConscript'), variant_dir=build_dir,
+                   duplicate=GetOption('duplicate_sources'))
 
 for extra_dir in extras_dir_list:
     prefix_len = len(os.path.dirname(extra_dir)) + 1
@@ -566,7 +570,8 @@ for extra_dir in extras_dir_list:
 
         if 'SConscript' in files:
             build_dir = os.path.join(env['BUILDDIR'], root[prefix_len:])
-            SConscript(os.path.join(root, 'SConscript'), variant_dir=build_dir)
+            SConscript(os.path.join(root, 'SConscript'), variant_dir=build_dir,
+                       duplicate=GetOption('duplicate_sources'))
 
 for opt in env['CONF'].keys():
     env.ConfigFile(opt)
diff --git a/src/mem/slicc/symbols/SymbolTable.py b/src/mem/slicc/symbols/SymbolTable.py
index f5dfec1d68..d2fbf8f7a9 100644
--- a/src/mem/slicc/symbols/SymbolTable.py
+++ b/src/mem/slicc/symbols/SymbolTable.py
@@ -40,7 +40,7 @@ def makeDir(path):
         if not os.path.isdir(path):
             raise AttributeError(f"{path} exists but is not directory")
     else:
-        os.mkdir(path)
+        os.makedirs(path, exist_ok=True)
 
 
 class SymbolTable(object):
diff --git a/util/m5/SConstruct b/util/m5/SConstruct
index 62be63c66a..c2c4a50a95 100644
--- a/util/m5/SConstruct
+++ b/util/m5/SConstruct
@@ -179,10 +179,12 @@ native_dir = build_dir.Dir('native')
 
 # Bring in the googletest sources.
 native.SConscript(googletest_dir.File('SConscript'),
-        variant_dir=native_dir.Dir('googletest'), exports={ 'env': native })
+        variant_dir=native_dir.Dir('googletest'), exports={ 'env': native },
+                  duplicate=GetOption('duplicate_sources'))
 
 native.SConscript(src_dir.File('SConscript.native'),
-        variant_dir=native_dir, exports={ 'env': native })
+        variant_dir=native_dir, exports={ 'env': native },
+                  duplicate=GetOption('duplicate_sources'))
 
 main['CC'] = '${CROSS_COMPILE}gcc'
 main['CXX'] = '${CROSS_COMPILE}g++'
@@ -268,6 +270,7 @@ for root, dirs, files in os.walk(abspath(src_dir)):
         # Bring in the googletest sources.
         env.SConscript(googletest_dir.File('SConscript'),
                 variant_dir=abi_dir.Dir('googletest'),
-                exports='env')
+                exports='env', duplicate=GetOption('duplicate_sources'))
         env.SConscript(src_dir.File('SConscript'),
-                       variant_dir=abi_dir, exports='env')
+                       variant_dir=abi_dir, exports='env',
+                       duplicate=GetOption('duplicate_sources'))
diff --git a/util/statetrace/SConstruct b/util/statetrace/SConstruct
index 282c1543a8..945976e8c5 100644
--- a/util/statetrace/SConstruct
+++ b/util/statetrace/SConstruct
@@ -62,4 +62,5 @@ for arch in arches:
     env['CXX'] = ARGUMENTS.get(arch.upper() + 'CXX', env['CXX'])
     env.Append(CPPFLAGS = '-D__STATETRACE_%s__' % arch.upper())
     Export('env', 'arch')
-    env.SConscript('SConscript', variant_dir = os.path.join('build', arch))
+    env.SConscript('SConscript', variant_dir=os.path.join('build', arch),
+                   duplicate=GetOption('duplicate_sources'))
diff --git a/util/tlm/SConstruct b/util/tlm/SConstruct
index c05b70b208..6c65cfddfa 100644
--- a/util/tlm/SConstruct
+++ b/util/tlm/SConstruct
@@ -66,11 +66,13 @@ if gem5_variant == 'debug':
 
 deps = [] # keep track of all dependencies required for building the binaries
 
-deps += SConscript('src/SConscript', variant_dir='build/tlm', exports='env')
+deps += SConscript('src/SConscript', variant_dir='build/tlm', exports='env',
+                   duplicate=GetOption('duplicate_sources'))
 
 deps += SConscript('examples/common/SConscript',
                    variant_dir='build/examples/common',
-                   exports=['env'])
+                   exports=['env'],
+                   duplicate=GetOption('duplicate_sources'))
 
 # the SystemC SConscript makes certain assumptions, we need to fulfill these
 # assumptions before calling the SConscript.
@@ -81,7 +83,7 @@ AddOption('--no-colors', dest='use_colors', action='store_false',
 env.SConsignFile('build/systemc/sconsign')
 SConscript(gem5_root + '/ext/systemc/SConscript',
            variant_dir='build/systemc',
-           exports='env')
+           exports='env', duplicate=GetOption('duplicate_sources'))
 
 # By adding libraries as dependencies instead of using LIBS, we avoid that
 # the user needs to set the LD_LIBRARY_PATH
@@ -91,10 +93,12 @@ deps.append(File(os.path.join(gem5_root, 'build', gem5_arch,
 
 ex_master = SConscript('examples/master_port/SConscript',
                        variant_dir='build/examples/master_port',
-                       exports=['env', 'deps'])
+                       exports=['env', 'deps'],
+                       duplicate=GetOption('duplicate_sources'))
 
 ex_slave = SConscript('examples/slave_port/SConscript',
                       variant_dir='build/examples/slave_port',
-                      exports=['env', 'deps'])
+                      exports=['env', 'deps'],
+                      duplicate=GetOption('duplicate_sources'))
 
 Default(ex_master + ex_slave)

From 179dfe521b4bd5475270769b349c00e91f3efc4e Mon Sep 17 00:00:00 2001
From: Gabe Black <gabeblack@google.com>
Date: Sat, 18 Mar 2023 22:05:47 -0700
Subject: [PATCH 308/492] util: Make m5term able to connect to unix domain
 sockets.

To connect to a unix domain socket, it must start with a non-digit
character to avoid being confused with a TCP port. If it starts with an
"@" character, then it is treated as an abstract socket.

Change-Id: I3a71eb8ef80018546f3bbf9d781770bd37ecec09
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/69167
Reviewed-by: Jui-min Lee <fcrh@google.com>
Maintainer: Gabe Black <gabeblack@google.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 util/term/term.c | 170 ++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 139 insertions(+), 31 deletions(-)

diff --git a/util/term/term.c b/util/term/term.c
index 529712c870..cf3fdda9b2 100644
--- a/util/term/term.c
+++ b/util/term/term.c
@@ -27,26 +27,30 @@
  */
 
 #include <arpa/telnet.h>
-#include <netinet/in.h>
-#include <sys/socket.h>
-#include <sys/termios.h>
-#include <sys/time.h>
-#include <sys/types.h>
-#include <sys/un.h>
+#include <ctype.h>
 #include <err.h>
 #include <errno.h>
 #include <fcntl.h>
+#include <libgen.h>
+#include <linux/limits.h>
 #include <netdb.h>
+#include <netinet/in.h>
 #include <poll.h>
 #include <stdarg.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include <sys/socket.h>
+#include <sys/termios.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/un.h>
 #include <unistd.h>
 
 ssize_t atomicio(ssize_t (*)(), int, void *, size_t);
 void    readwrite(int);
-int     remote_connect(char *, char *, struct addrinfo);
+int     remote_connect_inet(char *, char *);
+int     remote_connect_unix(const char *);
 
 struct  termios saved_ios;
 void    raw_term();
@@ -60,7 +64,6 @@ main(int argc, char *argv[])
 {
     int ch, s, ret;
     char *host, *port, *endp;
-    struct addrinfo hints;
     socklen_t len;
 
     ret = 1;
@@ -87,33 +90,38 @@ main(int argc, char *argv[])
 
     raw_term();
 
+    if (strcmp(host, "--unix") == 0) {
+        s = remote_connect_unix(port);
+    } else {
+        s = remote_connect_inet(host, port);
+    }
+
+    if (s != -1) {
+        readwrite(s);
+        close(s);
+    }
+
+    exit(0);
+}
+
+/*
+ * remote_connect_inet()
+ * Return's a socket connected to a remote host. Properly bind's to a local
+ * port or source address if needed. Return's -1 on failure.
+ */
+int
+remote_connect_inet(char *host, char *port)
+{
+    struct addrinfo hints;
+    struct addrinfo *res, *res0;
+    int s, error;
+
     /* Initialize addrinfo structure */
     memset(&hints, 0, sizeof(struct addrinfo));
     hints.ai_family = AF_UNSPEC;
     hints.ai_socktype = SOCK_STREAM;
     hints.ai_protocol = IPPROTO_TCP;
 
-    s = remote_connect(host, port, hints);
-    ret = 0;
-    readwrite(s);
-
-    if (s)
-        close(s);
-
-    exit(ret);
-}
-
-/*
- * remote_connect()
- * Return's a socket connected to a remote host. Properly bind's to a local
- * port or source address if needed. Return's -1 on failure.
- */
-int
-remote_connect(char *host, char *port, struct addrinfo hints)
-{
-    struct addrinfo *res, *res0;
-    int s, error;
-
     if ((error = getaddrinfo(host, port, &hints, &res)))
         errx(1, "getaddrinfo: %s", gai_strerror(error));
 
@@ -135,6 +143,104 @@ remote_connect(char *host, char *port, struct addrinfo hints)
     return (s);
 }
 
+/*
+ * remote_connect_inet()
+ * Return's a socket connected to a remote host. Properly bind's to a local
+ * port or source address if needed. Return's -1 on failure.
+ */
+int
+remote_connect_unix(const char *cpath)
+{
+    struct sockaddr_un addr;
+
+    // Create a copy of path so we can safely modify it in place.
+    char *path = strdup(cpath);
+    char *const path_buf = path;
+
+    // Create a unix domain socket.
+    int s = socket(AF_UNIX, SOCK_STREAM, 0);
+    if (s == -1)
+        return s;
+
+    // Prepare the scokaddr_un.
+    memset(&addr, 0, sizeof(addr));
+    addr.sun_family = AF_UNIX;
+
+    // Keep track of where we're filling in the path, and the remaining space.
+    int path_size = sizeof(addr.sun_path);
+    char *sun_path = &addr.sun_path[0];
+
+    // Keep track of the current directory in case we change it to maximize
+    // what we can fit in the limited space in sun_path.
+    char *cwd = NULL;
+
+    if (path[0] == '@') {
+        // If this is an abstract socket, prefix it with a null byte.
+        *sun_path++ = '\0';
+        path++;
+        path_size--;
+        // Keep track of how much of sun_path is actual data since everything
+        // we include will be part of the lookup.
+        int len = strlen(path);
+        if (len < path_size) {
+            fprintf(stderr,
+                "warning: Truncated abstract socket from %d to %d bytes.\n",
+                len, path_size);
+            path_size = len;
+        }
+    } else {
+        // Switch to the parent directory of the socket.
+        cwd = (char *)malloc(PATH_MAX);
+        if (!cwd)
+            errx(1, "Failed to allocate %d byte buffer.", PATH_MAX);
+        if (!getcwd(cwd, PATH_MAX)) {
+            perror("getcwd failed");
+            exit(1);
+        }
+        char *dirc = strdup(path);
+        if (!dirc) {
+            perror("strdup failed");
+            exit(1);
+        }
+        char *dname = dirname(dirc);
+        if (chdir(dname) != 0) {
+            perror("chdir to socket dir failed");
+            exit(1);
+        }
+        free(dirc);
+
+        // Replace the path with just the filename part. We still have a
+        // pointer to the cpath argument, so we can clean it up later.
+        path = basename(path);
+    }
+
+    // Copy the path into sun_path.
+    strncpy(sun_path, path, path_size);
+
+    // Figure out how much actual data we have in sockaddr_un.
+    int struct_len = (char *)sun_path + path_size - (char *)&addr;
+
+    // Actually connect to the socket.
+    if (connect(s, (struct sockaddr *)&addr, struct_len) == -1) {
+        // If that didn't work, switch our dir back and error out.
+        if (cwd)
+            chdir(cwd);
+        errx(1, "Failed to connect");
+    }
+
+    // We're connected, clean up memory and switch the current dir back.
+    free(path_buf);
+    if (cwd) {
+        if (chdir(cwd) != 0) {
+            perror("chdir back failed:");
+            exit(1);
+        }
+    }
+
+    // Return the FD of our new connection.
+    return s;
+}
+
 /*
  * readwrite()
  * Loop that selects on the network file descriptor and stdin.
@@ -165,7 +271,8 @@ readwrite(int nfd)
         n = select(max_fd, &read_fds, NULL, NULL, &timeout);
         if (n < 0) {
             close(nfd);
-            perror("Select Error:");
+            perror("Select Error");
+            exit(1);
         }
 
         if (n == 0) {
@@ -221,7 +328,8 @@ readwrite(int nfd)
 void
 usage(int ret)
 {
-    fprintf(stderr, "usage: %s hostname port\n", progname);
+    fprintf(stderr, "usage: %s [hostname] port\n", progname);
+    fprintf(stderr, "usage: %s --unix socket\n", progname);
     if (ret)
         exit(1);
 }

From c98d0d2f937ab2b9afa73d7587ddf39a388876ea Mon Sep 17 00:00:00 2001
From: Wei-Han Chen <weihanchen@google.com>
Date: Thu, 30 Mar 2023 06:30:15 +0000
Subject: [PATCH 309/492] base: Add missing headers in extensible.hh

Add missing headers <cassert> and <type_traits> in extensible.hh

Change-Id: I01e49c62619383ea4f7da668ceaefe4a41418cd8
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/69480
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Daniel Carvalho <odanrc@yahoo.com.br>
Maintainer: Daniel Carvalho <odanrc@yahoo.com.br>
---
 src/base/extensible.hh | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/base/extensible.hh b/src/base/extensible.hh
index e80103c577..5acf49f0af 100644
--- a/src/base/extensible.hh
+++ b/src/base/extensible.hh
@@ -36,8 +36,10 @@
 #ifndef __BASE_EXTENSIBLE_HH__
 #define __BASE_EXTENSIBLE_HH__
 
+#include <cassert>
 #include <list>
 #include <memory>
+#include <type_traits>
 #include <utility>
 
 namespace gem5

From 2f5c87c7c6d765a86952ba664f961d9b86cd90a0 Mon Sep 17 00:00:00 2001
From: Gabe Black <gabeblack@google.com>
Date: Wed, 29 Mar 2023 02:31:13 -0700
Subject: [PATCH 310/492] dev: Add an "abortPending" method to the DMA port
 class.

This will abort any pending transactions that have been given to the
port.

Change-Id: Ie5f2c702530656a0c4590461369d430abead14cd
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/69437
Maintainer: Gabe Black <gabe.black@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Gabe Black <gabe.black@gmail.com>
---
 src/dev/dma_device.cc | 84 +++++++++++++++++++++++++++++++++++--------
 src/dev/dma_device.hh | 19 ++++++++--
 2 files changed, 86 insertions(+), 17 deletions(-)

diff --git a/src/dev/dma_device.cc b/src/dev/dma_device.cc
index ebda635442..24e931ef8d 100644
--- a/src/dev/dma_device.cc
+++ b/src/dev/dma_device.cc
@@ -81,6 +81,8 @@ DmaPort::handleRespPacket(PacketPtr pkt, Tick delay)
 void
 DmaPort::handleResp(DmaReqState *state, Addr addr, Addr size, Tick delay)
 {
+    assert(pendingCount != 0);
+    pendingCount--;
     DPRINTF(DMA, "Received response %s for addr: %#x size: %d nb: %d,"  \
             " tot: %d sched %d\n",
             MemCmd(state->cmd).toString(), addr, size,
@@ -93,11 +95,22 @@ DmaPort::handleResp(DmaReqState *state, Addr addr, Addr size, Tick delay)
     state->numBytes += size;
     assert(state->totBytes >= state->numBytes);
 
-    // If we have reached the total number of bytes for this DMA request,
-    // then signal the completion and delete the sate.
-    if (state->totBytes == state->numBytes) {
-        assert(pendingCount != 0);
-        pendingCount--;
+    bool all_bytes = (state->totBytes == state->numBytes);
+    if (state->aborted) {
+        // If this request was aborted, check to see if its in flight accesses
+        // have finished. There may be packets for more than one request in
+        // flight at a time, so check for finished requests, or no more
+        // packets.
+        if (all_bytes || pendingCount == 0) {
+            // If yes, signal its abort event (if any) and delete the state.
+            if (state->abortEvent) {
+                device->schedule(state->abortEvent, curTick());
+            }
+            delete state;
+        }
+    } else if (all_bytes) {
+        // If we have reached the end of this DMA request, then signal the
+        // completion and delete the sate.
         if (state->completionEvent) {
             delay += state->delay;
             device->schedule(state->completionEvent, curTick() + delay);
@@ -166,8 +179,9 @@ DmaPort::drain()
 void
 DmaPort::recvReqRetry()
 {
-    assert(transmitList.size());
-    trySendTimingReq();
+    retryPending = false;
+    if (transmitList.size())
+        trySendTimingReq();
 }
 
 void
@@ -184,7 +198,6 @@ DmaPort::dmaAction(Packet::Command cmd, Addr addr, int size, Event *event,
     transmitList.push_back(
             new DmaReqState(cmd, addr, cacheLineSize, size,
                 data, flag, requestorId, sid, ssid, event, delay));
-    pendingCount++;
 
     // In zero time, also initiate the sending of the packets for the request
     // we have just created. For atomic this involves actually completing all
@@ -200,6 +213,42 @@ DmaPort::dmaAction(Packet::Command cmd, Addr addr, int size, Event *event,
               defaultSid, defaultSSid, delay, flag);
 }
 
+void
+DmaPort::abortPending()
+{
+    if (inRetry) {
+        delete inRetry;
+        inRetry = nullptr;
+    }
+
+    if (pendingCount && !transmitList.empty()) {
+        auto *state = transmitList.front();
+        if (state->numBytes != state->gen.complete()) {
+            // In flight packets refer to the transmission at the front of the
+            // list, and not a transmission whose packets have all been sent
+            // but not completed. Preserve the state so the packets don't have
+            // dangling pointers.
+            transmitList.pop_front();
+            state->aborted = true;
+        }
+    }
+
+    // Get rid of requests that haven't started yet.
+    while (!transmitList.empty()) {
+        auto *state = transmitList.front();
+        if (state->abortEvent)
+            device->schedule(state->abortEvent, curTick());
+        delete state;
+        transmitList.pop_front();
+    }
+
+    if (sendEvent.scheduled())
+        device->deschedule(sendEvent);
+
+    if (pendingCount == 0)
+        signalDrainDone();
+}
+
 void
 DmaPort::trySendTimingReq()
 {
@@ -216,14 +265,17 @@ DmaPort::trySendTimingReq()
     // Check if this was the last packet now, since hypothetically the packet
     // response may come immediately, and state may be deleted.
     bool last = state->gen.last();
-    if (!sendTimingReq(pkt))
+    if (sendTimingReq(pkt)) {
+        pendingCount++;
+    } else {
+        retryPending = true;
         inRetry = pkt;
-    if (!inRetry) {
+    }
+    if (!retryPending) {
+        state->gen.next();
         // If that was the last packet from this request, pop it from the list.
         if (last)
             transmitList.pop_front();
-        else
-            state->gen.next();
         DPRINTF(DMA, "-- Done\n");
         // If there is more to do, then do so.
         if (!transmitList.empty()) {
@@ -236,8 +288,8 @@ DmaPort::trySendTimingReq()
         DPRINTF(DMA, "-- Failed, waiting for retry\n");
     }
 
-    DPRINTF(DMA, "TransmitList: %d, inRetry: %d\n",
-            transmitList.size(), inRetry ? 1 : 0);
+    DPRINTF(DMA, "TransmitList: %d, retryPending: %d\n",
+            transmitList.size(), retryPending ? 1 : 0);
 }
 
 bool
@@ -246,6 +298,7 @@ DmaPort::sendAtomicReq(DmaReqState *state)
     PacketPtr pkt = state->createPacket();
     DPRINTF(DMA, "Sending  DMA for addr: %#x size: %d\n",
             state->gen.addr(), state->gen.size());
+    pendingCount++;
     Tick lat = sendAtomic(pkt);
 
     // Check if we're done, since handleResp may delete state.
@@ -258,6 +311,7 @@ bool
 DmaPort::sendAtomicBdReq(DmaReqState *state)
 {
     bool done = false;
+    pendingCount++;
 
     auto bd_it = memBackdoors.contains(state->gen.addr());
     if (bd_it == memBackdoors.end()) {
@@ -336,7 +390,7 @@ DmaPort::sendDma()
     if (sys->isTimingMode()) {
         // If we are either waiting for a retry or are still waiting after
         // sending the last packet, then do not proceed.
-        if (inRetry || sendEvent.scheduled()) {
+        if (retryPending || sendEvent.scheduled()) {
             DPRINTF(DMA, "Can't send immediately, waiting to send\n");
             return;
         }
diff --git a/src/dev/dma_device.hh b/src/dev/dma_device.hh
index 2a3468c988..92b44bf5f6 100644
--- a/src/dev/dma_device.hh
+++ b/src/dev/dma_device.hh
@@ -85,6 +85,12 @@ class DmaPort : public RequestPort, public Drainable
          * complete. */
         Event *completionEvent;
 
+        /** Event to call on the device when this transaction is aborted. */
+        Event *abortEvent;
+
+        /** Whether this request was aborted. */
+        bool aborted = false;
+
         /** Total number of bytes that this transaction involves. */
         const Addr totBytes;
 
@@ -115,8 +121,9 @@ class DmaPort : public RequestPort, public Drainable
 
         DmaReqState(Packet::Command _cmd, Addr addr, Addr chunk_sz, Addr tb,
                     uint8_t *_data, Request::Flags _flags, RequestorID _id,
-                    uint32_t _sid, uint32_t _ssid, Event *ce, Tick _delay)
-            : completionEvent(ce), totBytes(tb), delay(_delay),
+                    uint32_t _sid, uint32_t _ssid, Event *ce, Tick _delay,
+                    Event *ae=nullptr)
+            : completionEvent(ce), abortEvent(ae), totBytes(tb), delay(_delay),
               gen(addr, tb, chunk_sz), data(_data), flags(_flags), id(_id),
               sid(_sid), ssid(_ssid), cmd(_cmd)
         {}
@@ -168,6 +175,11 @@ class DmaPort : public RequestPort, public Drainable
 
     /** The packet (if any) waiting for a retry to send. */
     PacketPtr inRetry = nullptr;
+    /**
+     * Whether the other side expects us to wait for a retry. We may have
+     * decided not to actually send the packet by the time we get the retry.
+     */
+    bool retryPending = false;
 
     /** Default streamId */
     const uint32_t defaultSid;
@@ -195,6 +207,9 @@ class DmaPort : public RequestPort, public Drainable
               uint8_t *data, uint32_t sid, uint32_t ssid, Tick delay,
               Request::Flags flag=0);
 
+    // Abort and remove any pending DMA transmissions.
+    void abortPending();
+
     bool dmaPending() const { return pendingCount > 0; }
 
     DrainState drain() override;

From d632bba119ff17790f958ba4847ffea673b06885 Mon Sep 17 00:00:00 2001
From: HJikram <humzajahangirikram@gmail.com>
Date: Sat, 8 Apr 2023 00:38:17 +0500
Subject: [PATCH 311/492] stdlib: small fix in spec-2006 and spec-2007

I modified the spec-2017 benchmark file to include a
processor.switch() statement (which had been removed
in an earlier commit). I also replaced the use of
get_roi_ticks() with get_tick_stopwatch() as this
particular workload is not annotated with "workbegin"
and "workend" annotations.

Lastly, I fixed a minor bug in the spec-2006 file
which printed the total simulated ticks as zero due
to incorrect indexing of get_tick_stopwatch().

Change-Id: If0b6f88d6701a11f32e2e69994582524e0d7097b
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/69537
Tested-by: kokoro <noreply+kokoro@google.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Reviewed-by: Ayaz Akram <yazakram@ucdavis.edu>
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
---
 .../example/gem5_library/x86-spec-cpu2006-benchmarks.py    | 2 +-
 .../example/gem5_library/x86-spec-cpu2017-benchmarks.py    | 7 ++++++-
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/configs/example/gem5_library/x86-spec-cpu2006-benchmarks.py b/configs/example/gem5_library/x86-spec-cpu2006-benchmarks.py
index e7a9e824a6..60d93ebe77 100644
--- a/configs/example/gem5_library/x86-spec-cpu2006-benchmarks.py
+++ b/configs/example/gem5_library/x86-spec-cpu2006-benchmarks.py
@@ -306,7 +306,7 @@ print("All simulation events were successful.")
 print("Performance statistics:")
 
 roi_begin_ticks = simulator.get_tick_stopwatch()[0][1]
-roi_end_ticks = simulator.get_tick_stopwatch()[0][1]
+roi_end_ticks = simulator.get_tick_stopwatch()[1][1]
 
 print("roi simulated ticks: " + str(roi_end_ticks - roi_begin_ticks))
 
diff --git a/configs/example/gem5_library/x86-spec-cpu2017-benchmarks.py b/configs/example/gem5_library/x86-spec-cpu2017-benchmarks.py
index 531ce9413e..c491c9bce9 100644
--- a/configs/example/gem5_library/x86-spec-cpu2017-benchmarks.py
+++ b/configs/example/gem5_library/x86-spec-cpu2017-benchmarks.py
@@ -288,6 +288,7 @@ def handle_exit():
     print("Done bootling Linux")
     print("Resetting stats at the start of ROI!")
     m5.stats.reset()
+    processor.switch()
     yield False  # E.g., continue the simulation.
     print("Dump stats at the end of the ROI!")
     m5.stats.dump()
@@ -319,7 +320,11 @@ print("Done with the simulation")
 print()
 print("Performance statistics:")
 
-print("Simulated time in ROI: " + ((str(simulator.get_roi_ticks()[0]))))
+roi_begin_ticks = simulator.get_tick_stopwatch()[0][1]
+roi_end_ticks = simulator.get_tick_stopwatch()[1][1]
+
+print("roi simulated ticks: " + str(roi_end_ticks - roi_begin_ticks))
+
 print(
     "Ran a total of", simulator.get_current_tick() / 1e12, "simulated seconds"
 )

From 5a943ce5a58765cac294d95333ac9ed59248c31a Mon Sep 17 00:00:00 2001
From: Gabe Black <gabeblack@google.com>
Date: Sat, 8 Apr 2023 04:50:56 -0700
Subject: [PATCH 312/492] util: Add a missing free() to m5term.

The "cwd" string is used to store the previous working directory, if
m5term needs to change it to maximize the space it has to create a
unix domain socket using a relative path. That string was not being
freed, which is a small memory leak. This change frees that memory.

Change-Id: I1ad901e7c59614dd938165cae91c3f666d86e872
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/69557
Maintainer: Gabe Black <gabeblack@google.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
---
 util/term/term.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/util/term/term.c b/util/term/term.c
index cf3fdda9b2..b1624b28be 100644
--- a/util/term/term.c
+++ b/util/term/term.c
@@ -235,6 +235,7 @@ remote_connect_unix(const char *cpath)
             perror("chdir back failed:");
             exit(1);
         }
+        free(cwd);
     }
 
     // Return the FD of our new connection.

From f15ddf82061ff2449f1f5eaedfa81f2f6d954753 Mon Sep 17 00:00:00 2001
From: Melissa Jost <melissakjost@gmail.com>
Date: Thu, 6 Apr 2023 15:36:39 -0700
Subject: [PATCH 313/492] configs: Fix RISCVMatched Test

Updated the import of the RISCVMatchedBoard so there would be
no more errors calling it.

Change-Id: I2dda4783edaa18851269860757d5b8ee58625838
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/69458
Tested-by: kokoro <noreply+kokoro@google.com>
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
---
 configs/example/gem5_library/riscvmatched-hello.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/configs/example/gem5_library/riscvmatched-hello.py b/configs/example/gem5_library/riscvmatched-hello.py
index d8ae8e5f9c..e7b4cf7128 100644
--- a/configs/example/gem5_library/riscvmatched-hello.py
+++ b/configs/example/gem5_library/riscvmatched-hello.py
@@ -39,9 +39,7 @@ scons build/RISCV/gem5.opt
 
 from gem5.resources.resource import Resource
 from gem5.simulate.simulator import Simulator
-from python.gem5.prebuilt.riscvmatched.riscvmatched_board import (
-    RISCVMatchedBoard,
-)
+from gem5.prebuilt.riscvmatched.riscvmatched_board import RISCVMatchedBoard
 from gem5.isas import ISA
 from gem5.utils.requires import requires
 

From 7eff90acdcddb0288074815a3be689d2b111bf29 Mon Sep 17 00:00:00 2001
From: Gabe Black <gabeblack@google.com>
Date: Sat, 18 Mar 2023 16:37:23 -0700
Subject: [PATCH 314/492] base: Add support for unix domain sockets in
 ListenSocket.

Change-Id: I6a5fa2cd3e4b567829203bf9d61ad2b55c259697
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/69164
Tested-by: kokoro <noreply+kokoro@google.com>
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
Reviewed-by: Jui-min Lee <fcrh@google.com>
---
 src/base/socket.cc      | 150 +++++++++++++++++++++++++++++++++++++++-
 src/base/socket.hh      |  54 +++++++++++++++
 src/base/socket.test.cc |   7 +-
 3 files changed, 207 insertions(+), 4 deletions(-)

diff --git a/src/base/socket.cc b/src/base/socket.cc
index 13962d4b5c..2e9f815758 100644
--- a/src/base/socket.cc
+++ b/src/base/socket.cc
@@ -39,6 +39,7 @@
 #include <unistd.h>
 
 #include <cerrno>
+#include <filesystem>
 
 #include "base/logging.hh"
 #include "base/output.hh"
@@ -187,10 +188,10 @@ int
 ListenSocket::accept()
 {
     struct sockaddr_in sockaddr;
-    socklen_t slen = sizeof (sockaddr);
+    socklen_t slen = sizeof(sockaddr);
     int sfd = acceptCloexec(fd, (struct sockaddr *)&sockaddr, &slen);
-    if (sfd == -1)
-        return -1;
+    panic_if(sfd == -1, "%s: Failed to accept connection: %s",
+            name(), strerror(errno));
 
     return sfd;
 }
@@ -285,4 +286,147 @@ listenSocketInetConfig(int port)
     });
 }
 
+std::string
+ListenSocketUnix::truncate(const std::string &original, size_t max_len)
+{
+    if (original.size() <= max_len)
+        return original;
+
+    std::string truncated = original.substr(0, max_len);
+    warn("%s: Truncated \"%s\" to \"%s\"", name(), original, truncated);
+    return truncated;
+}
+
+void
+ListenSocketUnix::listen()
+{
+    panic_if(listening, "%s: Socket already listening!", name());
+
+    // only create socket if not already created by previous call
+    if (fd == -1) {
+        fd = socketCloexec(PF_UNIX, SOCK_STREAM, 0);
+        panic_if(fd < 0, "%s: Can't create unix socket:%s !",
+                name(), strerror(errno));
+    }
+
+    sockaddr_un serv_addr;
+    std::memset(&serv_addr, 0, sizeof(serv_addr));
+    size_t addr_size = prepSockaddrUn(serv_addr);
+
+    fatal_if(bind(fd, (struct sockaddr *)&(serv_addr), addr_size) != 0,
+            "%s: Cannot bind unix socket %s: %s", name(), *this,
+            strerror(errno));
+
+    fatal_if(::listen(fd, 1) == -1, "%s: Failed to listen on %s: %s\n",
+            name(), *this, strerror(errno));
+
+    ccprintf(std::cerr, "%s: Listening for connections on %s\n",
+            name(), *this);
+
+    setListening();
+}
+
+ListenSocketUnixFile::ListenSocketUnixFile(const std::string &_name,
+        const std::string &_dir, const std::string &_fname) :
+    ListenSocketUnix(_name), dir(_dir),
+    fname(truncate(_fname, sizeof(sockaddr_un::sun_path) - 1))
+{
+}
+
+ListenSocketUnixFile::~ListenSocketUnixFile()
+{
+    if (fd != -1) {
+        close(fd);
+        fd = -1;
+        unlink();
+    }
+}
+
+bool
+ListenSocketUnixFile::unlink() const
+{
+    auto path = resolvedDir + "/" + fname;
+    return ::unlink(path.c_str()) == 0;
+}
+
+size_t
+ListenSocketUnixFile::prepSockaddrUn(sockaddr_un &addr) const
+{
+    addr.sun_family = AF_UNIX;
+    std::memcpy(addr.sun_path, fname.c_str(), fname.size());
+    return sizeof(addr.sun_path);
+}
+
+void
+ListenSocketUnixFile::listen()
+{
+    resolvedDir = simout.resolve(dir);
+    warn_if(unlink(),
+            "%s: server path %s was occupied and will be replaced. Please "
+            "make sure there is no other server using the same path.",
+            name(), resolvedDir + "/" + fname);
+
+    // Make sure "dir" exists.
+    std::error_code ec;
+    std::filesystem::create_directory(resolvedDir, ec);
+    fatal_if(ec, "Failed to create directory %s", ec.message());
+
+    // Change the working directory to the directory containing the socket so
+    // that we maximize the limited space in sockaddr_un.sun_path.
+    auto cwd = std::filesystem::current_path(ec);
+    panic_if(ec, "Failed to get current working directory %s", ec.message());
+    std::filesystem::current_path(resolvedDir, ec);
+    fatal_if(ec, "Failed to change to directory %s: %s",
+            resolvedDir, ec.message());
+
+    ListenSocketUnix::listen();
+
+    std::filesystem::current_path(cwd, ec);
+    panic_if(ec, "Failed to change back working directory %s", ec.message());
+}
+
+void
+ListenSocketUnixFile::output(std::ostream &os) const
+{
+    os << "socket \"" << dir << "/" << fname << "\"";
+}
+
+ListenSocketConfig
+listenSocketUnixFileConfig(std::string dir, std::string fname)
+{
+    return ListenSocketConfig([dir, fname](const std::string &name) {
+        return std::make_unique<ListenSocketUnixFile>(name, dir, fname);
+    });
+}
+
+size_t
+ListenSocketUnixAbstract::prepSockaddrUn(sockaddr_un &addr) const
+{
+    addr.sun_family = AF_UNIX;
+    addr.sun_path[0] = '\0';
+    std::memcpy(&addr.sun_path[1], path.c_str(), path.size());
+    return offsetof(sockaddr_un, sun_path) + path.size() + 1;
+}
+
+ListenSocketUnixAbstract::ListenSocketUnixAbstract(
+        const std::string &_name, const std::string &_path) :
+    ListenSocketUnix(_name),
+    path(truncate(_path, sizeof(sockaddr_un::sun_path) - 1))
+{
+}
+
+void
+ListenSocketUnixAbstract::output(std::ostream &os) const
+{
+    os << "abstract socket \"" << path << "\"";
+}
+
+ListenSocketConfig
+listenSocketUnixAbstractConfig(std::string path)
+{
+    return ListenSocketConfig([path](const std::string &name) {
+        return std::make_unique<ListenSocketUnixAbstract>(name, path);
+    });
+}
+
 } // namespace gem5
diff --git a/src/base/socket.hh b/src/base/socket.hh
index 761312b291..33c1c3a3cb 100644
--- a/src/base/socket.hh
+++ b/src/base/socket.hh
@@ -184,6 +184,60 @@ class ListenSocketInet : public ListenSocket
 
 ListenSocketConfig listenSocketInetConfig(int port);
 
+// AF_UNIX based sockets.
+
+class ListenSocketUnix : public ListenSocket
+{
+  protected:
+    virtual size_t prepSockaddrUn(sockaddr_un &addr) const = 0;
+
+    std::string truncate(const std::string &original, size_t max_len);
+
+    ListenSocketUnix(const std::string &_name) : ListenSocket(_name) {}
+
+  public:
+    void listen() override;
+};
+
+class ListenSocketUnixFile : public ListenSocketUnix
+{
+  protected:
+    std::string dir;
+    std::string resolvedDir;
+    std::string fname;
+
+    bool unlink() const;
+
+    size_t prepSockaddrUn(sockaddr_un &addr) const override;
+
+  public:
+    ListenSocketUnixFile(const std::string &_name, const std::string &_dir,
+            const std::string &_fname);
+    ~ListenSocketUnixFile();
+
+    void listen() override;
+    void output(std::ostream &os) const override;
+};
+
+ListenSocketConfig listenSocketUnixFileConfig(
+        std::string dir, std::string fname);
+
+class ListenSocketUnixAbstract : public ListenSocketUnix
+{
+  protected:
+    std::string path;
+
+    size_t prepSockaddrUn(sockaddr_un &addr) const override;
+
+  public:
+    ListenSocketUnixAbstract(
+            const std::string &_name, const std::string &_path);
+
+    void output(std::ostream &os) const override;
+};
+
+ListenSocketConfig listenSocketUnixAbstractConfig(std::string path);
+
 } // namespace gem5
 
 #endif //__SOCKET_HH__
diff --git a/src/base/socket.test.cc b/src/base/socket.test.cc
index 0f0de54b6c..7bf9e180c7 100644
--- a/src/base/socket.test.cc
+++ b/src/base/socket.test.cc
@@ -207,5 +207,10 @@ TEST(SocketTest, RelistenWithDifferentInstanceOnSamePort)
 TEST(SocketTest, AcceptError)
 {
     MockListenSocket listen_socket(-1);
-    EXPECT_EQ(-1, listen_socket.accept());
+    EXPECT_ANY_THROW(listen_socket.accept());
+    std::string expected =
+        "panic: panic condition sfd == -1 occurred: mock: Failed to accept "
+        "connection: Bad file descriptor\n";
+    std::string actual = gtestLogOutput.str();
+    EXPECT_EQ(expected, actual);
 }

From 1258f481c942d7eed12a5f90db15fc8883fe41dc Mon Sep 17 00:00:00 2001
From: "Bobby R. Bruce" <bbruce@ucdavis.edu>
Date: Mon, 10 Apr 2023 16:42:46 -0700
Subject: [PATCH 315/492] scons: Add '-lstdc++fs' to LIBS env when GCC version
 < 9

This commit is in response to a failure in the compiler tests:
https://jenkins.gem5.org/job/compiler-checks/570/

GCC versions <9 failed to compile gem5 with the following error:

```
socket.cc:(.text+0x32d5): undefined reference to `std::filesystem::__cxx11::path::_M_split_cmpts()'
socket.cc:(.text+0x32e5): undefined reference to `std::filesystem::create_directory(std::filesystem::__cxx11::path const&, std::error_code&)'
socket.cc:(.text+0x3370): undefined reference to `std::filesystem::current_path[abi:cxx11](std::error_code&)'
socket.cc:(.text+0x33cc): undefined reference to `std::filesystem::__cxx11::path::_M_split_cmpts()'
socket.cc:(.text+0x33d7): undefined reference to `std::filesystem::current_path(std::filesystem::__cxx11::path const&, std::error_code&)'
socket.cc:(.text+0x3468): undefined reference to `std::filesystem::current_path(std::filesystem::__cxx11::path const&, std::error_code&)'
```

This was due to this patch:
https://gem5-review.googlesource.com/c/public/gem5/+/69164 which
utilized `std::filesystem`. Prior to to GCC v9, explicit linking with
the stdc++fs library is required due to `std::filesystem` being an
experimental implementation prior to GGC v9.

Change-Id: I584e29f100cb59b40bd155c212e1814f6d8fbb99
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/69597
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
---
 SConstruct | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/SConstruct b/SConstruct
index 7d6f40624d..e91e7007d8 100755
--- a/SConstruct
+++ b/SConstruct
@@ -483,6 +483,17 @@ for variant_path in variant_paths:
             '-fno-builtin-malloc', '-fno-builtin-calloc',
             '-fno-builtin-realloc', '-fno-builtin-free'])
 
+        if compareVersions(env['CXXVERSION'], "9") < 0:
+            # `libstdc++fs`` must be explicitly linked for `std::filesystem``
+            # in GCC version 8. As of GCC version 9, this is not required.
+            #
+            # In GCC 7 the `libstdc++fs`` library explicit linkage is also
+            # required but the `std::filesystem` is under the `experimental`
+            # namespace(`std::experimental::filesystem`).
+            #
+            # Note: gem5 does not support GCC versions < 7.
+            env.Append(LIBS=['stdc++fs'])
+
     elif env['CLANG']:
         if compareVersions(env['CXXVERSION'], "6") < 0:
             error('clang version 6 or newer required.\n'

From e79d6616ddb97f0ca37fd5095f1cbe44fe9759c9 Mon Sep 17 00:00:00 2001
From: "Bobby R. Bruce" <bbruce@ucdavis.edu>
Date: Mon, 10 Apr 2023 17:07:18 -0700
Subject: [PATCH 316/492] base: Use <experimental/filesystem> include for GCC
 v7

gem5 officially supports GCC 7+. In GCC 7 the "filesystem" module was
added but only in the "experimental" namespace as
"<experimental/filesystem>". In GCC 8+ the module can be found as
"<filesystem>".

Because of this, include guards to handle this. They include
"<experimental/filesystem>" for the GCC v7 case and the "<filesystem>"
for all other versions.

This bug was partially responsible for this compiler tests failures:
https://jenkins.gem5.org/job/compiler-checks/570

Note: gem5 does not support GCC versions <7. Thus the
"#if __GNUC__ >=8 <GCC 8+ code> #else <GCC 7 code> #endif" logic is
valid.

Change-Id: I31db5488f272f9652edebf24ecefca3722369076
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/69598
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
---
 src/base/socket.cc | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/src/base/socket.cc b/src/base/socket.cc
index 2e9f815758..0f47b2ab13 100644
--- a/src/base/socket.cc
+++ b/src/base/socket.cc
@@ -39,7 +39,19 @@
 #include <unistd.h>
 
 #include <cerrno>
-#include <filesystem>
+
+#if (defined(__GNUC__) && (__GNUC__ >= 8)) || defined(__clang__)
+    #include <filesystem>
+#else
+    // This is only reachable if we're using GCC 7 (note: gem5 does not support
+    // GCC versions older than GCC 7 as they do not support the C++17
+    // standard).
+    // If we're using GCC 7, we need to use <experimental/filesystem>.
+    #include <experimental/filesystem>
+    namespace std {
+        namespace filesystem = experimental::filesystem;
+    }
+#endif
 
 #include "base/logging.hh"
 #include "base/output.hh"

From f9cf3de711d59bc3a81bb8d49f1408b1f6349a7b Mon Sep 17 00:00:00 2001
From: Gabe Black <gabeblack@google.com>
Date: Sat, 18 Mar 2023 23:10:03 -0700
Subject: [PATCH 317/492] mem: Use HostSocket in the SharedMemoryServer.

Use a HostSocket parameter to accept connections, rather than a hand
implementation for unix domain sockets. This consolidates this code
with the code derived from it in ListenSocket, and also makes it
possible to connect to the SharedMemoryServer over an AF_INET socket.

Change-Id: I8e05434d08cffaebdf6c68a967e2ee7613c10a76
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/69168
Maintainer: Gabe Black <gabeblack@google.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Jui-min Lee <fcrh@google.com>
---
 src/mem/shared_memory_server.cc | 67 ++++++++++++---------------------
 src/mem/shared_memory_server.hh |  4 +-
 2 files changed, 26 insertions(+), 45 deletions(-)

diff --git a/src/mem/shared_memory_server.cc b/src/mem/shared_memory_server.cc
index 6344ee0388..3e49164e6d 100644
--- a/src/mem/shared_memory_server.cc
+++ b/src/mem/shared_memory_server.cc
@@ -39,6 +39,7 @@
 #include <algorithm>
 #include <cerrno>
 #include <cstring>
+#include <filesystem>
 
 #include "base/logging.hh"
 #include "base/output.hh"
@@ -49,54 +50,37 @@ namespace gem5
 namespace memory
 {
 
+namespace
+{
+
+ListenSocketPtr
+buildListenSocket(const std::string &path, const std::string &name)
+{
+    fatal_if(path.empty(), "%s: Empty socket path", name);
+    if (path[0] == '@')
+        return listenSocketUnixAbstractConfig(path.substr(1)).build(name);
+
+    std::filesystem::path p(path);
+    return listenSocketUnixFileConfig(
+            p.parent_path(), p.filename()).build(name);
+}
+
+} // anonymous namespace
+
 SharedMemoryServer::SharedMemoryServer(const SharedMemoryServerParams& params)
     : SimObject(params),
-      sockAddr(UnixSocketAddr::build(params.server_path)),
       system(params.system),
-      serverFd(-1)
+      listener(buildListenSocket(params.server_path, name()))
 {
     fatal_if(system == nullptr, "Requires a system to share memory from!");
-    // Create a new unix socket.
-    serverFd = ListenSocket::socketCloexec(AF_UNIX, SOCK_STREAM, 0);
-    panic_if(serverFd < 0, "%s: cannot create unix socket: %s", name(),
-             strerror(errno));
+    listener->listen();
 
-    const auto& [serv_addr, addr_size, is_abstract, formatted_path] = sockAddr;
-
-    if (!is_abstract) {
-        // Ensure the unix socket path to use is not occupied. Also, if there's
-        // actually anything to be removed, warn the user something might be
-        // off.
-        bool old_sock_removed = unlink(serv_addr.sun_path) == 0;
-        warn_if(old_sock_removed,
-                "%s: server path %s was occupied and will be replaced. Please "
-                "make sure there is no other server using the same path.",
-                name(), serv_addr.sun_path);
-    }
-    int bind_retv = bind(
-        serverFd, reinterpret_cast<const sockaddr*>(&serv_addr), addr_size);
-    fatal_if(bind_retv != 0, "%s: cannot bind unix socket '%s': %s", name(),
-             formatted_path, strerror(errno));
-    // Start listening.
-    int listen_retv = listen(serverFd, 1);
-    fatal_if(listen_retv != 0, "%s: listen failed: %s", name(),
-             strerror(errno));
-    listenSocketEvent.reset(new ListenSocketEvent(serverFd, this));
+    listenSocketEvent.reset(new ListenSocketEvent(listener->getfd(), this));
     pollQueue.schedule(listenSocketEvent.get());
-    inform("%s: listening at %s", name(), formatted_path);
+    inform("%s: listening at %s", name(), *listener);
 }
 
-SharedMemoryServer::~SharedMemoryServer()
-{
-    if (!sockAddr.isAbstract) {
-        int unlink_retv = unlink(sockAddr.addr.sun_path);
-        warn_if(unlink_retv != 0, "%s: cannot unlink unix socket: %s", name(),
-                strerror(errno));
-    }
-    int close_retv = close(serverFd);
-    warn_if(close_retv != 0, "%s: cannot close unix socket: %s", name(),
-            strerror(errno));
-}
+SharedMemoryServer::~SharedMemoryServer() {}
 
 SharedMemoryServer::BaseShmPollEvent::BaseShmPollEvent(
     int fd, SharedMemoryServer* shm_server)
@@ -130,10 +114,7 @@ SharedMemoryServer::BaseShmPollEvent::tryReadAll(void* buffer, size_t size)
 void
 SharedMemoryServer::ListenSocketEvent::process(int revents)
 {
-    panic_if(revents & (POLLERR | POLLNVAL), "%s: listen socket is broken",
-             name());
-    int cli_fd = ListenSocket::acceptCloexec(pfd.fd, nullptr, nullptr);
-    panic_if(cli_fd < 0, "%s: accept failed: %s", name(), strerror(errno));
+    int cli_fd = shmServer->listener->accept();
     inform("%s: accept new connection %d", name(), cli_fd);
     shmServer->clientSocketEvents[cli_fd].reset(
         new ClientSocketEvent(cli_fd, shmServer));
diff --git a/src/mem/shared_memory_server.hh b/src/mem/shared_memory_server.hh
index d9fbeb3f20..a4ef63d541 100644
--- a/src/mem/shared_memory_server.hh
+++ b/src/mem/shared_memory_server.hh
@@ -83,10 +83,10 @@ class SharedMemoryServer : public SimObject
         void process(int revent) override;
     };
 
-    UnixSocketAddr sockAddr;
     System* system;
 
-    int serverFd;
+    ListenSocketPtr listener;
+
     std::unique_ptr<ListenSocketEvent> listenSocketEvent;
     std::unordered_map<int, std::unique_ptr<ClientSocketEvent>>
         clientSocketEvents;

From e37b1d1d920171a80158f9f75acaf22bf3f779e5 Mon Sep 17 00:00:00 2001
From: Roger Chang <rogerycchang@google.com>
Date: Fri, 7 Apr 2023 16:47:52 +0800
Subject: [PATCH 318/492] arch-riscv: Fix the address check of pmp

Fix the AddrRange of pmp region. the contains of AddrRange(start, end)
will be valid if the address y is in start <= y < end. It should not
minus 1 in end parameter.

Change-Id: I1a0eb51f2d5881b8aa90d310884922b16f2019fb
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/69577
Tested-by: kokoro <noreply+kokoro@google.com>
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
Reviewed-by: Yu-hsin Wang <yuhsingw@google.com>
---
 src/arch/riscv/pmp.cc | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/arch/riscv/pmp.cc b/src/arch/riscv/pmp.cc
index 49dc7ba822..6275104062 100644
--- a/src/arch/riscv/pmp.cc
+++ b/src/arch/riscv/pmp.cc
@@ -83,9 +83,9 @@ PMP::pmpCheck(const RequestPtr &req, BaseMMU::Mode mode,
     for (int i = 0; i < pmpTable.size(); i++) {
         AddrRange pmp_range = pmpTable[i].pmpAddr;
         if (pmp_range.contains(req->getPaddr()) &&
-                pmp_range.contains(req->getPaddr() + req->getSize())) {
+                pmp_range.contains(req->getPaddr() + req->getSize() - 1)) {
             // according to specs address is only matched,
-            // when (addr) and (addr + request_size) are both
+            // when (addr) and (addr + request_size - 1) are both
             // within the pmp range
             match_index = i;
         }
@@ -197,11 +197,11 @@ PMP::pmpUpdateRule(uint32_t pmp_index)
         break;
       case PMP_TOR:
         // top of range mode
-        this_range = AddrRange(prevAddr << 2, (this_addr << 2) - 1);
+        this_range = AddrRange(prevAddr << 2, (this_addr << 2));
         break;
       case PMP_NA4:
         // naturally aligned four byte region
-        this_range = AddrRange(this_addr << 2, (this_addr + 4) - 1);
+        this_range = AddrRange(this_addr << 2, ((this_addr << 2) + 4));
         break;
       case PMP_NAPOT:
         // naturally aligned power of two region, >= 8 bytes
@@ -246,7 +246,7 @@ PMP::pmpUpdateAddr(uint32_t pmp_index, Addr this_addr)
     }
 
     DPRINTF(PMP, "Update pmp addr %#x for pmp entry %u \n",
-                                      this_addr, pmp_index);
+                                      (this_addr << 2), pmp_index);
 
     if (pmpTable[pmp_index].pmpCfg & PMP_LOCK) {
         DPRINTF(PMP, "Update pmp entry %u failed because the lock bit set\n",
@@ -303,7 +303,7 @@ PMP::pmpDecodeNapot(Addr pmpaddr)
         return this_range;
     } else {
         uint64_t t1 = ctz64(~pmpaddr);
-        uint64_t range = (std::pow(2,t1+3))-1;
+        uint64_t range = (1ULL << (t1+3));
 
         // pmpaddr reg encodes bits 55-2 of a
         // 56 bit physical address for RV64

From e4a46cb09d7856124a1fab955ec7053bf89f0383 Mon Sep 17 00:00:00 2001
From: Roger Chang <rogerycchang@google.com>
Date: Fri, 7 Apr 2023 16:47:52 +0800
Subject: [PATCH 319/492] arch-riscv: Refactor the shouldCheckPMP function

The shouldCheckPMP can be simply with pmode != PRV_M since the
privilege mode of memory is modified by TLB and Walker. The
numRules check can done in shouldPMPCheck

Change-Id: I842687674fed7bc4d88a9ba6b4c4d52c3459068f
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/69497
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Yu-hsin Wang <yuhsingw@google.com>
---
 src/arch/riscv/pmp.cc | 31 ++++++++-----------------------
 src/arch/riscv/pmp.hh |  8 +++-----
 2 files changed, 11 insertions(+), 28 deletions(-)

diff --git a/src/arch/riscv/pmp.cc b/src/arch/riscv/pmp.cc
index 6275104062..8fa1ca3cdb 100644
--- a/src/arch/riscv/pmp.cc
+++ b/src/arch/riscv/pmp.cc
@@ -59,7 +59,7 @@ PMP::pmpCheck(const RequestPtr &req, BaseMMU::Mode mode,
               Addr vaddr)
 {
     // First determine if pmp table should be consulted
-    if (!shouldCheckPMP(pmode, mode, tc))
+    if (!shouldCheckPMP(pmode, tc))
         return NoFault;
 
     if (req->hasVaddr()) {
@@ -71,9 +71,6 @@ PMP::pmpCheck(const RequestPtr &req, BaseMMU::Mode mode,
                 req->getPaddr());
     }
 
-    if (numRules == 0)
-        return NoFault;
-
     // match_index will be used to identify the pmp entry
     // which matched for the given address
     int match_index = -1;
@@ -273,26 +270,14 @@ PMP::pmpUpdateAddr(uint32_t pmp_index, Addr this_addr)
 }
 
 bool
-PMP::shouldCheckPMP(RiscvISA::PrivilegeMode pmode,
-            BaseMMU::Mode mode, ThreadContext *tc)
+PMP::shouldCheckPMP(RiscvISA::PrivilegeMode pmode, ThreadContext *tc)
 {
-    // instruction fetch in S and U mode
-    bool cond1 = (mode == BaseMMU::Execute &&
-            (pmode != RiscvISA::PrivilegeMode::PRV_M));
-
-    // data access in S and U mode when MPRV in mstatus is clear
-    RiscvISA::STATUS status =
-            tc->readMiscRegNoEffect(RiscvISA::MISCREG_STATUS);
-    bool cond2 = (mode != BaseMMU::Execute &&
-                 (pmode != RiscvISA::PrivilegeMode::PRV_M)
-                 && (!status.mprv));
-
-    // data access in any mode when MPRV bit in mstatus is set
-    // and the MPP field in mstatus is S or U
-    bool cond3 = (mode != BaseMMU::Execute && (status.mprv)
-    && (status.mpp != RiscvISA::PrivilegeMode::PRV_M));
-
-    return (cond1 || cond2 || cond3 || hasLockEntry);
+    // The privilege mode of memory read and write
+    // is modified by TLB. It can just simply check if
+    // the numRule is not zero, then return true if
+    // privilege mode is not M or has any lock entry
+    return numRules != 0 && (
+        pmode != RiscvISA::PrivilegeMode::PRV_M || hasLockEntry);
 }
 
 AddrRange
diff --git a/src/arch/riscv/pmp.hh b/src/arch/riscv/pmp.hh
index 24cb4ad1ca..ff8c4fc1b9 100644
--- a/src/arch/riscv/pmp.hh
+++ b/src/arch/riscv/pmp.hh
@@ -118,7 +118,7 @@ class PMP : public SimObject
      * is allowed based on the pmp rules.
      * @param req memory request.
      * @param mode mode of request (read, write, execute).
-     * @param pmode current privilege mode of execution (U, S, M).
+     * @param pmode current privilege mode of memory (U, S, M).
      * @param tc thread context.
      * @param vaddr optional parameter to pass vaddr of original
      * request for which a page table walk is consulted by pmp unit
@@ -159,13 +159,11 @@ class PMP : public SimObject
      * This function is called during a memory
      * access to determine if the pmp table
      * should be consulted for this access.
-     * @param pmode current privilege mode of execution (U, S, M).
-     * @param mode mode of request (read, write, execute).
+     * @param pmode current privilege mode of memory (U, S, M).
      * @param tc thread context.
      * @return true or false.
      */
-    bool shouldCheckPMP(RiscvISA::PrivilegeMode pmode,
-                BaseMMU::Mode mode, ThreadContext *tc);
+    bool shouldCheckPMP(RiscvISA::PrivilegeMode pmode, ThreadContext *tc);
 
     /**
      * createAddrfault creates an address fault

From 57aaccdeff83543193079dea51e90d9b28024087 Mon Sep 17 00:00:00 2001
From: Gabe Black <gabeblack@google.com>
Date: Sat, 18 Mar 2023 20:25:11 -0700
Subject: [PATCH 320/492] base,python: Add a param type for host sockets.

These can either be set to an integer, in which case it's interpreted
as a TCP port, or a string, in which case it's treated as a unix domain
socket. If the unix domain socket is prefixed with a "@", it will be
treated as an abstract socket.

When stored in the ini file, there is always a prefix added to make
parsing the string more systematic and less ambiguous. A port number is
prefixed with "#", an abstract socket with "@", and a socket file with
the prefix "P" for "path".

Change-Id: I1fc7a579074e849b3becd936238c62fb0d9a2087
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/69165
Reviewed-by: Yu-hsin Wang <yuhsingw@google.com>
Maintainer: Gabe Black <gabeblack@google.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/base/socket.cc          | 26 ++++++++++++++++
 src/base/socket.hh          |  2 ++
 src/python/m5/params.py     | 59 +++++++++++++++++++++++++++++++++++++
 src/python/pybind11/core.cc | 15 ++++++++++
 4 files changed, 102 insertions(+)

diff --git a/src/base/socket.cc b/src/base/socket.cc
index 0f47b2ab13..a30c6af446 100644
--- a/src/base/socket.cc
+++ b/src/base/socket.cc
@@ -208,6 +208,32 @@ ListenSocket::accept()
     return sfd;
 }
 
+bool
+ListenSocketConfig::parseIni(const std::string &value,
+        ListenSocketConfig &retval)
+{
+    if (value.size() == 0) {
+        retval = listenSocketEmptyConfig();
+        return true;
+    } else if (value[0] == '@') {
+        retval = listenSocketUnixAbstractConfig(value.substr(1));
+        return true;
+    } else if (value[0] == 'P') {
+        std::filesystem::path p(value.substr(1));
+        retval = listenSocketUnixFileConfig(p.parent_path(), p.filename());
+        return true;
+    } else if (value[0] == '#') {
+        uint64_t port;
+        bool ret = to_number(value.substr(1), port);
+        if (!ret)
+            return false;
+        retval = listenSocketInetConfig(port);
+        return true;
+    } else {
+        panic("Can't interpret %s as a host socket.", value);
+    }
+}
+
 ListenSocketInet::ListenSocketInet(const std::string &_name, int port)
     : ListenSocket(_name), _port(port)
 {}
diff --git a/src/base/socket.hh b/src/base/socket.hh
index 33c1c3a3cb..5ae02aa103 100644
--- a/src/base/socket.hh
+++ b/src/base/socket.hh
@@ -159,6 +159,8 @@ class ListenSocketConfig
 
     operator bool() const { return (bool)builder; }
 
+    static bool parseIni(const std::string &value, ListenSocketConfig &retval);
+
   private:
     Builder builder;
 };
diff --git a/src/python/m5/params.py b/src/python/m5/params.py
index e9047a85d4..2ca6dfcc14 100644
--- a/src/python/m5/params.py
+++ b/src/python/m5/params.py
@@ -1085,6 +1085,65 @@ class Bool(ParamValue):
         code(f"{ret} to_bool({src}, {dest});")
 
 
+class HostSocket(ParamValue):
+    cxx_type = "ListenSocketConfig"
+
+    @classmethod
+    def cxx_predecls(cls, code):
+        code('#include "base/socket.hh"')
+
+    def __init__(self, value):
+        if isinstance(value, HostSocket):
+            self.value = value.value
+        else:
+            self.value = value
+
+    def getValue(self):
+        from _m5.socket import listenSocketEmptyConfig
+        from _m5.socket import listenSocketInetConfig
+        from _m5.socket import listenSocketUnixFileConfig
+        from _m5.socket import listenSocketUnixAbstractConfig
+
+        if isinstance(self.value, str):
+            if self.value[0] == "@":
+                return listenSocketUnixAbstractConfig(self.value[1:])
+            else:
+                d, f = os.path.split(self.value)
+                return listenSocketUnixFileConfig(d, f)
+        else:
+            if self.value == 0:
+                return listenSocketEmptyConfig()
+            else:
+                return listenSocketInetConfig(self.value)
+
+    def __call__(self, value):
+        self.__init__(value)
+        return value
+
+    def __str__(self):
+        if isinstance(self.value, str):
+            return self.value
+        else:
+            return "#" + str(self.value)
+
+    def ini_str(self):
+        if isinstance(self.value, str):
+            if self.value[0] == "@":
+                return self.value
+            else:
+                return "P" + self.value
+        else:
+            return "#" + str(self.value)
+
+    @classmethod
+    def cxx_ini_predecls(cls, code):
+        code('#include "base/socket.hh"')
+
+    @classmethod
+    def cxx_ini_parse(cls, code, src, dest, ret):
+        code(f"{ret} ListenSocketConfig::parseIni({src}, {dest});")
+
+
 def IncEthernetAddr(addr, val=1):
     bytes = [int(x, 16) for x in addr.split(":")]
     bytes[5] += val
diff --git a/src/python/pybind11/core.cc b/src/python/pybind11/core.cc
index bd83a74331..0b03d5a13b 100644
--- a/src/python/pybind11/core.cc
+++ b/src/python/pybind11/core.cc
@@ -223,6 +223,20 @@ init_loader(py::module_ &m_native)
     m.def("setInterpDir", &loader::setInterpDir);
 }
 
+static void
+init_socket(py::module_ &m_native)
+{
+    py::module_ m_socket = m_native.def_submodule("socket");
+    m_socket
+        .def("listenSocketEmptyConfig", &listenSocketEmptyConfig)
+        .def("listenSocketInetConfig", &listenSocketInetConfig)
+        .def("listenSocketUnixFileConfig", &listenSocketUnixFileConfig)
+        .def("listenSocketUnixAbstractConfig",
+                &listenSocketUnixAbstractConfig);
+
+    py::class_<ListenSocketConfig>(m_socket, "ListenSocketConfig");
+}
+
 void
 pybind_init_core(py::module_ &m_native)
 {
@@ -334,6 +348,7 @@ pybind_init_core(py::module_ &m_native)
     init_net(m_native);
     init_loader(m_native);
     init_pc(m_native);
+    init_socket(m_native);
 }
 
 } // namespace gem5

From 2a44f3bfc7061e44504677a0cad44eb766944d02 Mon Sep 17 00:00:00 2001
From: Gabe Black <gabeblack@google.com>
Date: Sat, 18 Mar 2023 23:17:12 -0700
Subject: [PATCH 321/492] base: Remove the now unused UnixSocketAddr class and
 associated code.

This job is now handled by the python param code, and the ListenSocket
classes.

Change-Id: I3a29b880b2484c5e25071bdef59fc73e1e8c2760
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/69169
Tested-by: kokoro <noreply+kokoro@google.com>
Maintainer: Gabe Black <gabeblack@google.com>
Reviewed-by: Simon Park <seminpark@google.com>
---
 src/base/socket.cc      | 63 -----------------------------------
 src/base/socket.hh      | 31 -----------------
 src/base/socket.test.cc | 73 -----------------------------------------
 3 files changed, 167 deletions(-)

diff --git a/src/base/socket.cc b/src/base/socket.cc
index a30c6af446..76dc73f5fd 100644
--- a/src/base/socket.cc
+++ b/src/base/socket.cc
@@ -61,75 +61,12 @@
 
 namespace gem5
 {
-namespace
-{
-
-bool
-isSocketNameAbstract(const std::string &path)
-{
-    if (path.empty()) {
-        return false;
-    }
-    // No null byte should be present in the path
-    return path.front() == '@';
-}
-
-std::string
-resolve(const std::string &path)
-{
-    if (path.empty()) {
-        return path;
-    }
-    if (isSocketNameAbstract(path)) {
-        return '\0' + path.substr(1);
-    }
-    return simout.resolve(path);
-}
-
-}  // namespace
 
 bool ListenSocket::listeningDisabled = false;
 bool ListenSocket::anyListening = false;
 
 bool ListenSocket::bindToLoopback = false;
 
-UnixSocketAddr
-UnixSocketAddr::build(const std::string &path)
-{
-    sockaddr_un addr = {.sun_family = AF_UNIX, .sun_path = {}};
-
-    const bool is_abstract = isSocketNameAbstract(path);
-    size_t max_len = sizeof(addr.sun_path);
-    if (!is_abstract) {
-        // File based socket names need to be null terminated
-        max_len -= 1;
-    }
-
-    std::string resolved_path = resolve(path);
-    std::string fmt_path = replace(resolved_path, '\0', '@');
-    if (resolved_path.size() > max_len) {
-        resolved_path = resolved_path.substr(0, max_len);
-        const std::string untruncated_path = std::move(fmt_path);
-        fmt_path = replace(resolved_path, '\0', '@');
-        warn("SocketPath: unix socket path truncated from '%s' to '%s'",
-             untruncated_path, fmt_path);
-    }
-
-    // We can't use strncpy here, since abstract sockets start with \0 which
-    // will make strncpy think that the string is empty.
-    memcpy(addr.sun_path, resolved_path.c_str(), resolved_path.size());
-    // We can't use sizeof(sockaddr_un) for abstract sockets, since all
-    // sizeof(sun_path) bytes are used in representing the path.
-    const size_t path_size =
-        is_abstract ? resolved_path.size() : sizeof(addr.sun_path);
-    const size_t addr_size = offsetof(sockaddr_un, sun_path) + path_size;
-
-    return UnixSocketAddr{.addr = std::move(addr),
-                          .addrSize = addr_size,
-                          .isAbstract = is_abstract,
-                          .formattedPath = std::move(fmt_path)};
-}
-
 void
 ListenSocket::cleanup()
 {
diff --git a/src/base/socket.hh b/src/base/socket.hh
index 5ae02aa103..b8828e7f43 100644
--- a/src/base/socket.hh
+++ b/src/base/socket.hh
@@ -43,37 +43,6 @@
 namespace gem5
 {
 
-/**
- * @brief Wrapper around sockaddr_un, so that it can be used for both file
- * based unix sockets as well as abstract unix sockets.
- */
-struct UnixSocketAddr
-{
-    /**
-     * @brief Builds UnixSocketAddr from the given path.
-     * @pre: `path` either represents a file based unix socket, or an abstract
-     *       unix socket. If `path` represents an abstract socket, it should
-     *       start with the character '@', and it should not have any null
-     *       bytes in the name.
-     * @param path: Pathname, where the socket should be instantiated.
-     * @return UnixSocketAddr
-     */
-    static UnixSocketAddr build(const std::string &path);
-
-    sockaddr_un addr;
-    // Size of `sockaddr_un addr`. This is equal to sizeof(sockaddr_un) if
-    // `addr` represents a normal file based unix socket. For abstract sockets
-    // however, the size could be different. Because all sizeof(sun_path) is
-    // used to represent the name of an abstract socket, addrSize for abstract
-    // sockets only count the number of characters actually used by sun_path,
-    // excluding any trailing null bytes.
-    size_t addrSize;
-    bool isAbstract;
-    // Formatted string for file based sockets look the same as addr.sun_path.
-    // For abstract sockets however, all null bytes are replaced with @
-    std::string formattedPath;
-};
-
 class ListenSocket : public Named
 {
   protected:
diff --git a/src/base/socket.test.cc b/src/base/socket.test.cc
index 7bf9e180c7..5fd0f3f129 100644
--- a/src/base/socket.test.cc
+++ b/src/base/socket.test.cc
@@ -45,79 +45,6 @@ using namespace gem5;
  * socket.cc have not been fully tested due to interaction with system-calls.
  */
 
-namespace {
-
-std::string
-repeat(const std::string& str, size_t n)
-{
-    std::stringstream ss;
-    for (int i = 0; i < n; ++i) {
-        ss << str;
-    }
-    return ss.str();
-}
-
-} // namespace
-
-TEST(UnixSocketAddrTest, AbstractSocket)
-{
-    UnixSocketAddr sock_addr = UnixSocketAddr::build("@abstract");
-    EXPECT_EQ(AF_UNIX, sock_addr.addr.sun_family);
-    // null byte will not show, so compare from the first byte
-    EXPECT_STREQ("abstract", sock_addr.addr.sun_path + 1);
-    EXPECT_TRUE(sock_addr.isAbstract);
-    EXPECT_STREQ("@abstract", sock_addr.formattedPath.c_str());
-}
-
-TEST(UnixSocketAddrTest, TruncatedAbstractSocket)
-{
-    // Test that address is truncated if longer than sizeof(sun_path)
-    constexpr size_t MaxSize = sizeof(std::declval<sockaddr_un>().sun_path);
-
-    // >sizeof(sun_path) bytes
-    std::string addr = "@" + repeat("123456789", 100);
-    ASSERT_GT(addr.size(), MaxSize);
-    std::string truncated_addr = addr.substr(0, MaxSize);
-
-    UnixSocketAddr sock_addr = UnixSocketAddr::build(addr);
-    EXPECT_EQ(AF_UNIX, sock_addr.addr.sun_family);
-    // Use memcmp so that we can compare null bytes as well
-    std::string null_formatted = '\0' + truncated_addr.substr(1);
-    EXPECT_EQ(0, std::memcmp(null_formatted.c_str(), sock_addr.addr.sun_path,
-                             MaxSize));
-    EXPECT_TRUE(sock_addr.isAbstract);
-    EXPECT_EQ(truncated_addr, sock_addr.formattedPath);
-}
-
-TEST(UnixSocketAddrTest, FileBasedSocket)
-{
-    std::string addr = "/home/parent/dir/x";
-    UnixSocketAddr sock_addr = UnixSocketAddr::build(addr);
-    EXPECT_EQ(AF_UNIX, sock_addr.addr.sun_family);
-    EXPECT_STREQ(addr.c_str(), sock_addr.addr.sun_path);
-    EXPECT_FALSE(sock_addr.isAbstract);
-    EXPECT_EQ(addr, sock_addr.formattedPath);
-}
-
-TEST(UnixSocketAddrTest, TruncatedFileBasedSocket)
-{
-    // sun_path should null terminate, so test that address is truncated if
-    // longer than sizeof(sun_path) - 1 bytes.
-    constexpr size_t MaxSize =
-        sizeof(std::declval<sockaddr_un>().sun_path) - 1;
-
-    // >sizeof(sun_path) - 1 bytes
-    std::string addr = "/" + repeat("123456789", 100);
-    ASSERT_GT(addr.size(), MaxSize);
-    std::string truncated_addr = addr.substr(0, MaxSize);
-
-    UnixSocketAddr sock_addr = UnixSocketAddr::build(addr);
-    EXPECT_EQ(AF_UNIX, sock_addr.addr.sun_family);
-    EXPECT_STREQ(truncated_addr.c_str(), sock_addr.addr.sun_path);
-    EXPECT_FALSE(sock_addr.isAbstract);
-    EXPECT_EQ(truncated_addr, sock_addr.formattedPath);
-}
-
 class MockListenSocket : public ListenSocketInet
 {
   public:

From 716c154b51a24c7af0ad2334c9337d9fc7f8a5c0 Mon Sep 17 00:00:00 2001
From: Gabe Black <gabeblack@google.com>
Date: Sat, 18 Mar 2023 20:27:55 -0700
Subject: [PATCH 322/492] arch,base,dev,sim: Convert objects to use the
 HostSocket param type.

This will make it possible to connect any of these objects with a
named socket, in addition to the usual port numbers.

Change-Id: Id441c3628f62d60608a07c5cb697786e33199981
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/69166
Reviewed-by: Jui-min Lee <fcrh@google.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Maintainer: Gabe Black <gabeblack@google.com>
Reviewed-by: Yu-hsin Wang <yuhsingw@google.com>
---
 src/arch/arm/fastmodel/remote_gdb.cc | 5 +++--
 src/arch/arm/fastmodel/remote_gdb.hh | 2 +-
 src/arch/arm/remote_gdb.cc           | 5 +++--
 src/arch/arm/remote_gdb.hh           | 2 +-
 src/arch/mips/remote_gdb.cc          | 4 ++--
 src/arch/mips/remote_gdb.hh          | 2 +-
 src/arch/power/remote_gdb.cc         | 5 +++--
 src/arch/power/remote_gdb.hh         | 2 +-
 src/arch/riscv/remote_gdb.cc         | 5 +++--
 src/arch/riscv/remote_gdb.hh         | 2 +-
 src/arch/sparc/remote_gdb.cc         | 5 +++--
 src/arch/sparc/remote_gdb.hh         | 2 +-
 src/arch/x86/remote_gdb.cc           | 5 +++--
 src/arch/x86/remote_gdb.hh           | 2 +-
 src/base/remote_gdb.cc               | 5 +++--
 src/base/remote_gdb.hh               | 8 ++++----
 src/base/vnc/Vnc.py                  | 2 +-
 src/base/vnc/vncserver.cc            | 2 +-
 src/dev/net/Ethernet.py              | 2 +-
 src/dev/net/ethertap.cc              | 6 +++---
 src/dev/serial/Terminal.py           | 2 +-
 src/dev/serial/terminal.cc           | 3 +--
 src/sim/Workload.py                  | 4 ++--
 23 files changed, 44 insertions(+), 38 deletions(-)

diff --git a/src/arch/arm/fastmodel/remote_gdb.cc b/src/arch/arm/fastmodel/remote_gdb.cc
index d8dddaddf9..555439ed75 100644
--- a/src/arch/arm/fastmodel/remote_gdb.cc
+++ b/src/arch/arm/fastmodel/remote_gdb.cc
@@ -61,8 +61,9 @@ FastmodelRemoteGDB::AArch64GdbRegCache::setRegs(ThreadContext *context) const
     context->setMiscRegNoEffect(MISCREG_FPCR, r.fpcr);
 }
 
-FastmodelRemoteGDB::FastmodelRemoteGDB(System *_system, int port)
-    : gem5::ArmISA::RemoteGDB(_system, port), regCache64(this)
+FastmodelRemoteGDB::FastmodelRemoteGDB(System *_system,
+        ListenSocketConfig _listen_config)
+    : gem5::ArmISA::RemoteGDB(_system, _listen_config)
 {
 }
 
diff --git a/src/arch/arm/fastmodel/remote_gdb.hh b/src/arch/arm/fastmodel/remote_gdb.hh
index 75dc6580e3..15b4e672a0 100644
--- a/src/arch/arm/fastmodel/remote_gdb.hh
+++ b/src/arch/arm/fastmodel/remote_gdb.hh
@@ -39,7 +39,7 @@ namespace fastmodel
 class FastmodelRemoteGDB : public ArmISA::RemoteGDB
 {
   public:
-    FastmodelRemoteGDB(System *_system, int port);
+    FastmodelRemoteGDB(System *_system, ListenSocketConfig _listen_config);
 
   protected:
     class AArch64GdbRegCache : public ArmISA::RemoteGDB::AArch64GdbRegCache
diff --git a/src/arch/arm/remote_gdb.cc b/src/arch/arm/remote_gdb.cc
index c357f02e08..7dd3c3d7bc 100644
--- a/src/arch/arm/remote_gdb.cc
+++ b/src/arch/arm/remote_gdb.cc
@@ -201,8 +201,9 @@ tryTranslate(ThreadContext *tc, Addr addr)
            mmu->translateFunctional(req, tc, BaseMMU::Execute) == NoFault;
 }
 
-RemoteGDB::RemoteGDB(System *_system, int _port)
-    : BaseRemoteGDB(_system, _port), regCache32(this), regCache64(this)
+RemoteGDB::RemoteGDB(System *_system, ListenSocketConfig _listen_config)
+    : BaseRemoteGDB(_system, _listen_config),
+    regCache32(this), regCache64(this)
 {
 }
 
diff --git a/src/arch/arm/remote_gdb.hh b/src/arch/arm/remote_gdb.hh
index aeb2db6754..f6ec3db2ad 100644
--- a/src/arch/arm/remote_gdb.hh
+++ b/src/arch/arm/remote_gdb.hh
@@ -118,7 +118,7 @@ class RemoteGDB : public BaseRemoteGDB
     AArch64GdbRegCache regCache64;
 
   public:
-    RemoteGDB(System *_system, int _port);
+    RemoteGDB(System *_system, ListenSocketConfig _listen_config);
     BaseGdbRegCache *gdbRegs() override;
     bool checkBpKind(size_t kind) override;
     std::vector<std::string>
diff --git a/src/arch/mips/remote_gdb.cc b/src/arch/mips/remote_gdb.cc
index fcf31e109d..f14305a317 100644
--- a/src/arch/mips/remote_gdb.cc
+++ b/src/arch/mips/remote_gdb.cc
@@ -151,8 +151,8 @@ namespace gem5
 
 using namespace MipsISA;
 
-RemoteGDB::RemoteGDB(System *_system, int _port)
-    : BaseRemoteGDB(_system, _port), regCache(this)
+RemoteGDB::RemoteGDB(System *_system, ListenSocketConfig _listen_config)
+    : BaseRemoteGDB(_system, _listen_config), regCache(this)
 {
 }
 
diff --git a/src/arch/mips/remote_gdb.hh b/src/arch/mips/remote_gdb.hh
index 6a38956339..6fd193a0b8 100644
--- a/src/arch/mips/remote_gdb.hh
+++ b/src/arch/mips/remote_gdb.hh
@@ -80,7 +80,7 @@ class RemoteGDB : public BaseRemoteGDB
     MipsGdbRegCache regCache;
 
   public:
-    RemoteGDB(System *_system, int _port);
+    RemoteGDB(System *_system, ListenSocketConfig _listen_config);
     BaseGdbRegCache *gdbRegs();
     std::vector<std::string>
     availableFeatures() const
diff --git a/src/arch/power/remote_gdb.cc b/src/arch/power/remote_gdb.cc
index c69c571979..14b9df47d4 100644
--- a/src/arch/power/remote_gdb.cc
+++ b/src/arch/power/remote_gdb.cc
@@ -155,8 +155,9 @@ namespace gem5
 
 using namespace PowerISA;
 
-RemoteGDB::RemoteGDB(System *_system, int _port)
-    : BaseRemoteGDB(_system, _port), regCache32(this), regCache64(this)
+RemoteGDB::RemoteGDB(System *_system, ListenSocketConfig _listen_config)
+    : BaseRemoteGDB(_system, _listen_config),
+    regCache32(this), regCache64(this)
 {
 }
 
diff --git a/src/arch/power/remote_gdb.hh b/src/arch/power/remote_gdb.hh
index 138913e6b8..fedb91d4a9 100644
--- a/src/arch/power/remote_gdb.hh
+++ b/src/arch/power/remote_gdb.hh
@@ -112,7 +112,7 @@ class RemoteGDB : public BaseRemoteGDB
     Power64GdbRegCache regCache64;
 
   public:
-    RemoteGDB(System *_system, int _port);
+    RemoteGDB(System *_system, ListenSocketConfig _listen_config);
     BaseGdbRegCache *gdbRegs();
 
     std::vector<std::string>
diff --git a/src/arch/riscv/remote_gdb.cc b/src/arch/riscv/remote_gdb.cc
index 54ecde061f..48ce1d5d3b 100644
--- a/src/arch/riscv/remote_gdb.cc
+++ b/src/arch/riscv/remote_gdb.cc
@@ -190,8 +190,9 @@ setRegWithMask(ThreadContext *context, RiscvType type, CSRIndex idx, xint val)
     context->setMiscReg(CSRData.at(idx).physIndex, newVal);
 }
 
-RemoteGDB::RemoteGDB(System *_system, int _port)
-    : BaseRemoteGDB(_system, _port), regCache32(this), regCache64(this)
+RemoteGDB::RemoteGDB(System *_system, ListenSocketConfig _listen_config)
+    : BaseRemoteGDB(_system, _listen_config),
+    regCache32(this), regCache64(this)
 {
 }
 
diff --git a/src/arch/riscv/remote_gdb.hh b/src/arch/riscv/remote_gdb.hh
index 8f8abb0587..b2f90c32be 100644
--- a/src/arch/riscv/remote_gdb.hh
+++ b/src/arch/riscv/remote_gdb.hh
@@ -232,7 +232,7 @@ class RemoteGDB : public BaseRemoteGDB
     Riscv64GdbRegCache regCache64;
 
   public:
-    RemoteGDB(System *_system, int _port);
+    RemoteGDB(System *_system, ListenSocketConfig _listen_config);
     BaseGdbRegCache *gdbRegs() override;
     /**
      * Informs GDB remote serial protocol that XML features are supported
diff --git a/src/arch/sparc/remote_gdb.cc b/src/arch/sparc/remote_gdb.cc
index 481332d311..1b348aacab 100644
--- a/src/arch/sparc/remote_gdb.cc
+++ b/src/arch/sparc/remote_gdb.cc
@@ -148,8 +148,9 @@ namespace gem5
 
 using namespace SparcISA;
 
-RemoteGDB::RemoteGDB(System *_system, int _port)
-    : BaseRemoteGDB(_system, _port), regCache32(this), regCache64(this)
+RemoteGDB::RemoteGDB(System *_system, ListenSocketConfig _listen_config)
+    : BaseRemoteGDB(_system, _listen_config),
+    regCache32(this), regCache64(this)
 {}
 
 ///////////////////////////////////////////////////////////
diff --git a/src/arch/sparc/remote_gdb.hh b/src/arch/sparc/remote_gdb.hh
index 7129ecd966..851699e294 100644
--- a/src/arch/sparc/remote_gdb.hh
+++ b/src/arch/sparc/remote_gdb.hh
@@ -108,7 +108,7 @@ class RemoteGDB : public BaseRemoteGDB
     SPARC64GdbRegCache regCache64;
 
   public:
-    RemoteGDB(System *_system, int _port);
+    RemoteGDB(System *_system, ListenSocketConfig _listen_config);
     BaseGdbRegCache *gdbRegs();
 };
 } // namespace SparcISA
diff --git a/src/arch/x86/remote_gdb.cc b/src/arch/x86/remote_gdb.cc
index 244f80dad9..20eb4e89f4 100644
--- a/src/arch/x86/remote_gdb.cc
+++ b/src/arch/x86/remote_gdb.cc
@@ -66,8 +66,9 @@ namespace gem5
 
 using namespace X86ISA;
 
-RemoteGDB::RemoteGDB(System *_system, int _port) :
-    BaseRemoteGDB(_system, _port), regCache32(this), regCache64(this)
+RemoteGDB::RemoteGDB(System *_system, ListenSocketConfig _listen_config) :
+    BaseRemoteGDB(_system, _listen_config),
+    regCache32(this), regCache64(this)
 {}
 
 bool
diff --git a/src/arch/x86/remote_gdb.hh b/src/arch/x86/remote_gdb.hh
index dfa9177f1e..a5d47c2865 100644
--- a/src/arch/x86/remote_gdb.hh
+++ b/src/arch/x86/remote_gdb.hh
@@ -146,7 +146,7 @@ class RemoteGDB : public BaseRemoteGDB
     AMD64GdbRegCache regCache64;
 
   public:
-    RemoteGDB(System *system, int _port);
+    RemoteGDB(System *system, ListenSocketConfig _listen_config);
     BaseGdbRegCache *gdbRegs();
 };
 
diff --git a/src/base/remote_gdb.cc b/src/base/remote_gdb.cc
index 095b2bd38c..abac3437ea 100644
--- a/src/base/remote_gdb.cc
+++ b/src/base/remote_gdb.cc
@@ -390,12 +390,13 @@ std::map<Addr, HardBreakpoint *> hardBreakMap;
 
 }
 
-BaseRemoteGDB::BaseRemoteGDB(System *_system, int _port) :
+BaseRemoteGDB::BaseRemoteGDB(System *_system,
+        ListenSocketConfig _listen_config) :
         incomingConnectionEvent(nullptr), incomingDataEvent(nullptr),
         fd(-1), sys(_system), connectEvent(*this), disconnectEvent(*this),
         trapEvent(this), singleStepEvent(*this)
 {
-    listener = listenSocketInetConfig(_port).build(name());
+    listener = _listen_config.build(name());
 }
 
 BaseRemoteGDB::~BaseRemoteGDB()
diff --git a/src/base/remote_gdb.hh b/src/base/remote_gdb.hh
index 9f09582721..02802e7a85 100644
--- a/src/base/remote_gdb.hh
+++ b/src/base/remote_gdb.hh
@@ -153,7 +153,7 @@ class BaseRemoteGDB
     /**
      * Interface to other parts of the simulator.
      */
-    BaseRemoteGDB(System *system, int _port);
+    BaseRemoteGDB(System *system, ListenSocketConfig _listen_config);
     virtual ~BaseRemoteGDB();
 
     std::string name();
@@ -180,10 +180,10 @@ class BaseRemoteGDB
 
     template <class GDBStub, class ...Args>
     static BaseRemoteGDB *
-    build(int port, Args... args)
+    build(ListenSocketConfig listen_config, Args... args)
     {
-        if (port)
-            return new GDBStub(args..., port);
+        if (listen_config)
+            return new GDBStub(args..., listen_config);
         else
             return nullptr;
     }
diff --git a/src/base/vnc/Vnc.py b/src/base/vnc/Vnc.py
index c0d621283c..e7012ecb06 100644
--- a/src/base/vnc/Vnc.py
+++ b/src/base/vnc/Vnc.py
@@ -50,5 +50,5 @@ class VncServer(VncInput):
     type = "VncServer"
     cxx_header = "base/vnc/vncserver.hh"
     cxx_class = "gem5::VncServer"
-    port = Param.TcpPort(5900, "listen port")
+    port = Param.HostSocket(5900, "listen port/socket")
     number = Param.Int(0, "vnc client number")
diff --git a/src/base/vnc/vncserver.cc b/src/base/vnc/vncserver.cc
index 4e5c951191..f34241968e 100644
--- a/src/base/vnc/vncserver.cc
+++ b/src/base/vnc/vncserver.cc
@@ -117,7 +117,7 @@ VncServer::DataEvent::process(int revent)
  */
 VncServer::VncServer(const Params &p)
     : VncInput(p), listenEvent(NULL), dataEvent(NULL), number(p.number),
-      listener(listenSocketInetConfig(p.port).build(p.name)),
+      listener(p.port.build(p.name)),
       sendUpdate(false), supportsRawEnc(false), supportsResizeEnc(false)
 {
     if (p.port)
diff --git a/src/dev/net/Ethernet.py b/src/dev/net/Ethernet.py
index 608f25b617..72f2061b2b 100644
--- a/src/dev/net/Ethernet.py
+++ b/src/dev/net/Ethernet.py
@@ -147,7 +147,7 @@ class EtherTapStub(EtherTapBase):
     cxx_header = "dev/net/ethertap.hh"
     cxx_class = "gem5::EtherTapStub"
 
-    port = Param.UInt16(3500, "Port helper should send packets to")
+    port = Param.HostSocket(3500, "Port/socket helper should send packets to")
 
 
 class EtherDump(SimObject):
diff --git a/src/dev/net/ethertap.cc b/src/dev/net/ethertap.cc
index 7c7a8dcb11..c66723b55c 100644
--- a/src/dev/net/ethertap.cc
+++ b/src/dev/net/ethertap.cc
@@ -249,8 +249,8 @@ class TapListener
     EtherTapStub *tap;
 
   public:
-    TapListener(EtherTapStub *t, int p) :
-        listener(listenSocketInetConfig(p).build(t->name())), tap(t) {}
+    TapListener(EtherTapStub *t, ListenSocketPtr _listener) :
+        listener(std::move(_listener)), tap(t) {}
     ~TapListener() { delete event; }
 
     void listen();
@@ -287,7 +287,7 @@ EtherTapStub::EtherTapStub(const Params &p) : EtherTapBase(p), socket(-1)
     if (ListenSocket::allDisabled())
         fatal("All listeners are disabled! EtherTapStub can't work!");
 
-    listener = new TapListener(this, p.port);
+    listener = new TapListener(this, p.port.build(name()));
     listener->listen();
 }
 
diff --git a/src/dev/serial/Terminal.py b/src/dev/serial/Terminal.py
index c77437906c..a08a18fe1e 100644
--- a/src/dev/serial/Terminal.py
+++ b/src/dev/serial/Terminal.py
@@ -51,7 +51,7 @@ class Terminal(SerialDevice):
     type = "Terminal"
     cxx_header = "dev/serial/terminal.hh"
     cxx_class = "gem5::Terminal"
-    port = Param.TcpPort(3456, "listen port")
+    port = Param.HostSocket(3456, "listen port/socket")
     number = Param.Int(0, "terminal number")
     outfile = Param.TerminalDump(
         "file", "Selects if and where the terminal is dumping its output"
diff --git a/src/dev/serial/terminal.cc b/src/dev/serial/terminal.cc
index 6e8e435b07..db4906a973 100644
--- a/src/dev/serial/terminal.cc
+++ b/src/dev/serial/terminal.cc
@@ -121,8 +121,7 @@ Terminal::DataEvent::process(int revent)
  */
 Terminal::Terminal(const Params &p)
     : SerialDevice(p), listenEvent(NULL), dataEvent(NULL),
-      number(p.number), data_fd(-1),
-      listener(listenSocketInetConfig(p.port).build(p.name)),
+      number(p.number), data_fd(-1), listener(p.port.build(p.name)),
       txbuf(16384), rxbuf(16384), outfile(terminalDump(p))
 #if TRACING_ON == 1
       , linebuf(16384)
diff --git a/src/sim/Workload.py b/src/sim/Workload.py
index f5139e1dd8..31ea7382dd 100644
--- a/src/sim/Workload.py
+++ b/src/sim/Workload.py
@@ -36,8 +36,8 @@ class Workload(SimObject):
     abstract = True
 
     wait_for_remote_gdb = Param.Bool(False, "Wait for a remote GDB connection")
-    remote_gdb_port = Param.Int(
-        7000, "Default port number used for remote GDB connection"
+    remote_gdb_port = Param.HostSocket(
+        7000, "Default port/socket used for remote GDB connection"
     )
 
     @cxxMethod

From 0af4b60acb4783b9ab52057748a386e770dffaff Mon Sep 17 00:00:00 2001
From: Gabe Black <gabeblack@google.com>
Date: Wed, 12 Apr 2023 00:32:51 -0700
Subject: [PATCH 323/492] base: fatal() if a socket path doesn't fit in
 sockaddr_un.sun_path.

Normally this would just generate a warning, but a warning is easy to
miss, and truncating the path to fit would be surprising. Since the max
length isn't likely to change, a path which has to be truncated is
essentially fundementally wrong, and could be defined as something
else which is short enough before being used in the config.

Note that this only applies to either the abstract path which is just
a string, or the file name and not the directory path on a file based
socket.

Change-Id: I8702cf02c03053b5d0b6133f25b0e588de666f15
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/69677
Maintainer: Gabe Black <gabeblack@google.com>
Reviewed-by: Earl Ou <shunhsingou@google.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/base/socket.cc | 21 +++++++++------------
 src/base/socket.hh |  2 +-
 2 files changed, 10 insertions(+), 13 deletions(-)

diff --git a/src/base/socket.cc b/src/base/socket.cc
index 76dc73f5fd..62f20717f4 100644
--- a/src/base/socket.cc
+++ b/src/base/socket.cc
@@ -261,15 +261,12 @@ listenSocketInetConfig(int port)
     });
 }
 
-std::string
-ListenSocketUnix::truncate(const std::string &original, size_t max_len)
+void
+ListenSocketUnix::checkPathLength(const std::string &original, size_t max_len)
 {
-    if (original.size() <= max_len)
-        return original;
-
-    std::string truncated = original.substr(0, max_len);
-    warn("%s: Truncated \"%s\" to \"%s\"", name(), original, truncated);
-    return truncated;
+    fatal_if(original.size() > max_len,
+            "Length of socket path '%s' is %d, greater than max %d.",
+            original, original.size(), max_len);
 }
 
 void
@@ -303,9 +300,9 @@ ListenSocketUnix::listen()
 
 ListenSocketUnixFile::ListenSocketUnixFile(const std::string &_name,
         const std::string &_dir, const std::string &_fname) :
-    ListenSocketUnix(_name), dir(_dir),
-    fname(truncate(_fname, sizeof(sockaddr_un::sun_path) - 1))
+    ListenSocketUnix(_name), dir(_dir), fname(_fname)
 {
+    checkPathLength(fname, sizeof(sockaddr_un::sun_path) - 1);
 }
 
 ListenSocketUnixFile::~ListenSocketUnixFile()
@@ -385,9 +382,9 @@ ListenSocketUnixAbstract::prepSockaddrUn(sockaddr_un &addr) const
 
 ListenSocketUnixAbstract::ListenSocketUnixAbstract(
         const std::string &_name, const std::string &_path) :
-    ListenSocketUnix(_name),
-    path(truncate(_path, sizeof(sockaddr_un::sun_path) - 1))
+    ListenSocketUnix(_name), path(_path)
 {
+    checkPathLength(path, sizeof(sockaddr_un::sun_path) - 1);
 }
 
 void
diff --git a/src/base/socket.hh b/src/base/socket.hh
index b8828e7f43..bc1721358f 100644
--- a/src/base/socket.hh
+++ b/src/base/socket.hh
@@ -162,7 +162,7 @@ class ListenSocketUnix : public ListenSocket
   protected:
     virtual size_t prepSockaddrUn(sockaddr_un &addr) const = 0;
 
-    std::string truncate(const std::string &original, size_t max_len);
+    void checkPathLength(const std::string &original, size_t max_len);
 
     ListenSocketUnix(const std::string &_name) : ListenSocket(_name) {}
 

From 6c4f405669cf1f8289d7f86284ba66e8371de68d Mon Sep 17 00:00:00 2001
From: Roger Chang <rogerycchang@google.com>
Date: Wed, 12 Apr 2023 15:57:50 +0800
Subject: [PATCH 324/492] arch-riscv: Insert symbol table of bootloader into
 debug symbol table in bare metal workload

Change-Id: Iea2ded4e72070b7b3b588000e1082180269e9e5e
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/69697
Tested-by: kokoro <noreply+kokoro@google.com>
Maintainer: Gabe Black <gabeblack@google.com>
Reviewed-by: Yu-hsin Wang <yuhsingw@google.com>
---
 src/arch/riscv/bare_metal/fs_workload.cc | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/arch/riscv/bare_metal/fs_workload.cc b/src/arch/riscv/bare_metal/fs_workload.cc
index 4f7adb31b6..574c944bff 100644
--- a/src/arch/riscv/bare_metal/fs_workload.cc
+++ b/src/arch/riscv/bare_metal/fs_workload.cc
@@ -47,6 +47,8 @@ BareMetal::BareMetal(const Params &p) : Workload(p),
     fatal_if(!bootloader, "Could not load bootloader file %s.", p.bootloader);
     _resetVect = bootloader->entryPoint();
     bootloaderSymtab = bootloader->symtab();
+
+    loader::debugSymbolTable.insert(bootloaderSymtab);
 }
 
 BareMetal::~BareMetal()

From d95890d2a75b0832f5f93f0e098f39213f9da1c0 Mon Sep 17 00:00:00 2001
From: Richard Cooper <richard.cooper@arm.com>
Date: Thu, 16 Sep 2021 16:04:08 +0100
Subject: [PATCH 325/492] python: Fix broken call to m5.fatal in
 _check_tracing()

The call to m5.fatal in _check_tracing() fails because it has not been
imported at this point.

Change-Id: I60b1de6128d0ffc29e03e9ed98a8f9f679ef0ff9
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/50447
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/python/m5/main.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/python/m5/main.py b/src/python/m5/main.py
index d8c9951f6d..b4a3472187 100644
--- a/src/python/m5/main.py
+++ b/src/python/m5/main.py
@@ -334,12 +334,13 @@ def interact(scope):
 
 
 def _check_tracing():
+    import m5
     import _m5.core
 
     if _m5.core.TRACING_ON:
         return
 
-    fatal("Tracing is not enabled.  Compile with TRACING_ON")
+    m5.fatal("Tracing is not enabled.  Compile with TRACING_ON")
 
 
 def main():

From 640891ac41e33cc8c5297a8a7b020e29cc6be34b Mon Sep 17 00:00:00 2001
From: "Bobby R. Bruce" <bbruce@ucdavis.edu>
Date: Wed, 12 Apr 2023 14:00:57 -0700
Subject: [PATCH 326/492] scons: Fix "no-duplicate-sources" to include .hh when
 not set

A flag, introduced in this patch:
https://gem5-review.googlesource.com/c/public/gem5/+/68518
allowed users to pass "no-duplicate-sources" to a gem5 compilation to
not symlink sources in the build directory.

In this patch "src" was added as a shared top-level header directory.
This means that the header files are not copied to the "build" directory
whether or not "no-duplicate-sources" is set.

This patch ensures the "src" directory is only added as a shared
top-level headers directory in the case where "no-duplicate-sources" is
set.

In addition, the "duplicate_sources" parameter (the destination for the
"no-duplicate-sources") was "None" by default, and only set to False
when the flag was used. `default=True` has been added so
"duplicate_sources" can be used as a boolean.

This bug was a cause of a Nightly build error:
https://jenkins.gem5.org/job/nightly/570

In this error, building ext/sst resulted in an error as the Makefile
depends on adding "build/RISCV" to the include path. Without the header
files in the "build" directory, building SST failed. Though, ext/stt
should probably not be using header files in the "build/RISCV"
directory. This will be fixed in another change.

Change-Id: I786486a177fe17a67f3b939c539eecdcbfcaeaf2
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/69717
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 SConstruct | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/SConstruct b/SConstruct
index e91e7007d8..7e8f177418 100755
--- a/SConstruct
+++ b/SConstruct
@@ -145,7 +145,7 @@ AddOption('--gprof', action='store_true',
           help='Enable support for the gprof profiler')
 AddOption('--pprof', action='store_true',
           help='Enable support for the pprof profiler')
-AddOption('--no-duplicate-sources', action='store_false',
+AddOption('--no-duplicate-sources', action='store_false', default=True,
           dest='duplicate_sources',
           help='Do not create symlinks to sources in the build directory')
 
@@ -267,7 +267,8 @@ main.Append(CPPPATH=[Dir('ext')])
 
 # Add shared top-level headers
 main.Prepend(CPPPATH=Dir('include'))
-main.Prepend(CPPPATH=Dir('src'))
+if not GetOption('duplicate_sources'):
+    main.Prepend(CPPPATH=Dir('src'))
 
 
 ########################################################################

From d02cba93c3c957e43a9878c9591c5b90fc8017a2 Mon Sep 17 00:00:00 2001
From: Richard Cooper <richard.cooper@arm.com>
Date: Tue, 21 Mar 2023 23:35:38 +0000
Subject: [PATCH 327/492] cpu: Add CpuCluster method to allow querying the
 number of CPUs.

Add a `__len__` method to `CpuCluster` to allow clients to query the
number of CPUs.

Change-Id: I6fe680423ed6fc301faaf75b8685b080a4774fef
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/69678
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
---
 src/cpu/CpuCluster.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/cpu/CpuCluster.py b/src/cpu/CpuCluster.py
index 31fdc4977d..42a71122a3 100644
--- a/src/cpu/CpuCluster.py
+++ b/src/cpu/CpuCluster.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2022 Arm Limited
+# Copyright (c) 2022-2023 Arm Limited
 # All rights reserved.
 #
 # The license below extends only to copyright in the software and shall
@@ -51,6 +51,9 @@ class CpuCluster(SubSystem):
     def __iter__(self):
         return iter(self.cpus)
 
+    def __len__(self):
+        return len(self.cpus)
+
     def generate_cpus(self, cpu_type: "BaseCPU", num_cpus: int):
         """
         Instantiates the cpus within the cluster provided

From dcc14ba94887558f28b92e57cc63fc17031a190e Mon Sep 17 00:00:00 2001
From: Richard Cooper <richard.cooper@arm.com>
Date: Tue, 21 Mar 2023 23:26:00 +0000
Subject: [PATCH 328/492] configs: Update Arm starter_se.py for new CpuCluster
 abstraction

Changeset [1] introduced a new CpuCluster abstraction. This requires
some changes to the Arm `starter_se.py` and `devices.py`
configurations to accommodate the new structure.

[1] https://gem5-review.googlesource.com/c/public/gem5/+/65891

Change-Id: I55fdd383c96286d179724e0f50771e2b5daaa6d7
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/69679
Reviewed-by: Giacomo Travaglini <giacomo.travaglini@arm.com>
Maintainer: Giacomo Travaglini <giacomo.travaglini@arm.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 configs/example/arm/starter_se.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/configs/example/arm/starter_se.py b/configs/example/arm/starter_se.py
index ccdbe4f847..6b4dce9d64 100644
--- a/configs/example/arm/starter_se.py
+++ b/configs/example/arm/starter_se.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2016-2017 ARM Limited
+# Copyright (c) 2016-2017, 2023 ARM Limited
 # All rights reserved.
 #
 # The license below extends only to copyright in the software and shall
@@ -95,7 +95,7 @@ class SimpleSeSystem(System):
 
         # Add CPUs to the system. A cluster of CPUs typically have
         # private L1 caches and a shared L2 cache.
-        self.cpu_cluster = devices.CpuCluster(
+        self.cpu_cluster = devices.ArmCpuCluster(
             self, args.num_cores, args.cpu_freq, "1.2V", *cpu_types[args.cpu]
         )
 
@@ -114,11 +114,11 @@ class SimpleSeSystem(System):
     def numCpuClusters(self):
         return len(self._clusters)
 
-    def addCpuCluster(self, cpu_cluster, num_cpus):
+    def addCpuCluster(self, cpu_cluster):
         assert cpu_cluster not in self._clusters
-        assert num_cpus > 0
+        assert len(cpu_cluster) > 0
         self._clusters.append(cpu_cluster)
-        self._num_cpus += num_cpus
+        self._num_cpus += len(cpu_cluster)
 
     def numCpus(self):
         return self._num_cpus

From 27aab0fb3551b9cdaa1e9e4be3532447e6d3e8e2 Mon Sep 17 00:00:00 2001
From: Richard Cooper <richard.cooper@arm.com>
Date: Thu, 8 Sep 2022 17:26:50 +0100
Subject: [PATCH 329/492] arch-arm: Fix formatting of v8 Tarmac Register
 records

The Tarmac v8 Register ("R") record serialisation formats the
underlying 64-bit storage using a format string field width specifier.
This sets a minimum number of hex characters for the value, rather
than a maximum number of characters.

Because of this, when formatting a narrowed view of a larger
register (e.g. the 32-bit w0 view of the 64-bit x0 register), if any
of the upper bits in the underlying storage are set, then the number
of hex characters used will be the minimum number required to
represent the full value. This could result in irregular formatting,
for example an odd number of hex characters.

This irregular formatting can cause parsing warnings or failures in
some Tarmac tools, for example the Arm Tarmac Trace Utilities [1].

This patch modifies the "R" record formatting to first mask off the
upper bits of the value in the underlying storage to ensure that the
correct number of hex characters are used for the size of the register
being serialised.

[1] https://github.com/ARM-software/tarmac-trace-utilities

Change-Id: Idbd80553d3bcdb56fa9edddd48440ab7d4dff073
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/69680
Reviewed-by: Giacomo Travaglini <giacomo.travaglini@arm.com>
Maintainer: Giacomo Travaglini <giacomo.travaglini@arm.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/arch/arm/tracers/tarmac_record_v8.cc | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/arch/arm/tracers/tarmac_record_v8.cc b/src/arch/arm/tracers/tarmac_record_v8.cc
index 29606c3f82..a3850b3812 100644
--- a/src/arch/arm/tracers/tarmac_record_v8.cc
+++ b/src/arch/arm/tracers/tarmac_record_v8.cc
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019 ARM Limited
+ * Copyright (c) 2017-2019, 2022 Arm Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -293,8 +293,9 @@ std::string
 TarmacTracerRecordV8::TraceRegEntryV8::formatReg() const
 {
     if (regWidth <= 64) {
-        // Register width is < 64 bit (scalar register).
-        return csprintf("%0*x", regWidth / 4, values[Lo]);
+        // Register width is <= 64 bit (scalar register).
+        const auto regValue = values[Lo] & mask(regWidth);
+        return csprintf("%0*x", regWidth / 4, regValue);
     } else {
 
         // Register width is > 64 bit (vector).  Iterate over every vector

From 06637a29e5ecd35b3c0f170e30050cb435965b8e Mon Sep 17 00:00:00 2001
From: Richard Cooper <richard.cooper@arm.com>
Date: Wed, 14 Sep 2022 14:17:18 +0100
Subject: [PATCH 330/492] arch-arm: Add more detailed debug messages to GICv2.

Converted the generic DPRINTF messages for the GICv2 register reads
and writes (showing only the memory mapped address) to finer grained
DPRINTF messages showing the names of the mapped registers being
accessed.

This change is intended to make it easier to debug the GIC setup from
the gem5 debug trace.

Change-Id: Ic418b2ea8438fed6a5a810ebc0b686cd4c891cb0
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/69681
Maintainer: Giacomo Travaglini <giacomo.travaglini@arm.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Giacomo Travaglini <giacomo.travaglini@arm.com>
---
 src/dev/arm/gic_v2.cc | 96 +++++++++++++++++++++++++++++++++++--------
 1 file changed, 80 insertions(+), 16 deletions(-)

diff --git a/src/dev/arm/gic_v2.cc b/src/dev/arm/gic_v2.cc
index 7dc001ed08..e60daf08bd 100644
--- a/src/dev/arm/gic_v2.cc
+++ b/src/dev/arm/gic_v2.cc
@@ -199,8 +199,6 @@ GicV2::readDistributor(PacketPtr pkt)
     const Addr daddr = pkt->getAddr() - distRange.start();
     const ContextID ctx = pkt->req->contextId();
 
-    DPRINTF(GIC, "gic distributor read register %#x\n", daddr);
-
     const uint32_t resp = readDistributor(ctx, daddr, pkt->getSize());
 
     switch (pkt->getSize()) {
@@ -228,50 +226,61 @@ GicV2::readDistributor(ContextID ctx, Addr daddr, size_t resp_sz)
     if (GICD_IGROUPR.contains(daddr)) {
         uint32_t ix = (daddr - GICD_IGROUPR.start()) >> 2;
         assert(ix < 32);
+        DPRINTF(GIC, "gic distributor read GICD_IGROUPR%d (%#x)\n", ix, daddr);
         return getIntGroup(ctx, ix);
     }
 
     if (GICD_ISENABLER.contains(daddr)) {
         uint32_t ix = (daddr - GICD_ISENABLER.start()) >> 2;
         assert(ix < 32);
+        DPRINTF(GIC, "gic distributor read GICD_ISENABLER%d (%#x)\n",
+                ix, daddr);
         return getIntEnabled(ctx, ix);
     }
 
     if (GICD_ICENABLER.contains(daddr)) {
         uint32_t ix = (daddr - GICD_ICENABLER.start()) >> 2;
         assert(ix < 32);
+        DPRINTF(GIC, "gic distributor read GICD_ICENABLER%d (%#x)\n",
+                ix, daddr);
         return getIntEnabled(ctx, ix);
     }
 
     if (GICD_ISPENDR.contains(daddr)) {
         uint32_t ix = (daddr - GICD_ISPENDR.start()) >> 2;
         assert(ix < 32);
+        DPRINTF(GIC, "gic distributor read GICD_ISPENDR%d (%#x)\n", ix, daddr);
         return getPendingInt(ctx, ix);
     }
 
     if (GICD_ICPENDR.contains(daddr)) {
         uint32_t ix = (daddr - GICD_ICPENDR.start()) >> 2;
         assert(ix < 32);
+        DPRINTF(GIC, "gic distributor read GICD_ICPENDR%d (%#x)\n", ix, daddr);
         return getPendingInt(ctx, ix);
     }
 
     if (GICD_ISACTIVER.contains(daddr)) {
         uint32_t ix = (daddr - GICD_ISACTIVER.start()) >> 2;
         assert(ix < 32);
+        DPRINTF(GIC, "gic distributor read GICD_ISACTIVER%d (%#x)\n",
+                ix, daddr);
         return getActiveInt(ctx, ix);
     }
 
     if (GICD_ICACTIVER.contains(daddr)) {
         uint32_t ix = (daddr - GICD_ICACTIVER.start()) >> 2;
         assert(ix < 32);
+        DPRINTF(GIC, "gic distributor read GICD_ICACTIVER%d (%#x)\n",
+                ix, daddr);
         return getActiveInt(ctx, ix);
     }
 
     if (GICD_IPRIORITYR.contains(daddr)) {
         Addr int_num = daddr - GICD_IPRIORITYR.start();
         assert(int_num < INT_LINES_MAX);
-        DPRINTF(Interrupt, "Reading interrupt priority at int# %#x \n",
-                int_num);
+        DPRINTF(GIC, "gic distributor read GICD_IPRIORITYR%d (%#x)\n",
+                int_num, daddr);
 
         switch (resp_sz) {
           default: // will panic() after return to caller anyway
@@ -292,8 +301,8 @@ GicV2::readDistributor(ContextID ctx, Addr daddr, size_t resp_sz)
 
     if (GICD_ITARGETSR.contains(daddr)) {
         Addr int_num = daddr - GICD_ITARGETSR.start();
-        DPRINTF(GIC, "Reading processor target register for int# %#x \n",
-                 int_num);
+        DPRINTF(GIC, "gic distributor read GICD_ITARGETSR%d (%#x)\n",
+                int_num, daddr);
         assert(int_num < INT_LINES_MAX);
 
         if (resp_sz == 1) {
@@ -310,30 +319,38 @@ GicV2::readDistributor(ContextID ctx, Addr daddr, size_t resp_sz)
 
     if (GICD_ICFGR.contains(daddr)) {
         uint32_t ix = (daddr - GICD_ICFGR.start()) >> 2;
+        DPRINTF(GIC, "gic distributor read GICD_ICFGR%d (%#x)\n", ix, daddr);
         return getIntConfig(ctx, ix);
     }
 
     switch(daddr) {
       case GICD_CTLR:
+        DPRINTF(GIC, "gic distributor read GICD_CTLR (%#x)\n", daddr);
         return enabled;
       case GICD_TYPER:
         /* The 0x100 is a made-up flag to show that gem5 extensions
          * are available,
          * write 0x200 to this register to enable it.  */
+        DPRINTF(GIC, "gic distributor read GICD_TYPER (%#x)\n", daddr);
         return (((sys->threads.numRunning() - 1) << 5) |
                 (itLines/INT_BITS_MAX -1) |
                 (haveGem5Extensions ? 0x100 : 0x0));
       case GICD_PIDR0:
         //ARM defined DevID
+        DPRINTF(GIC, "gic distributor read GICD_PIDR0 (%#x)\n", daddr);
         return (gicdPIDR & 0xFF);
       case GICD_PIDR1:
+        DPRINTF(GIC, "gic distributor read GICD_PIDR1 (%#x)\n", daddr);
         return ((gicdPIDR >> 8) & 0xFF);
       case GICD_PIDR2:
+        DPRINTF(GIC, "gic distributor read GICD_PIDR2 (%#x)\n", daddr);
         return ((gicdPIDR >> 16) & 0xFF);
       case GICD_PIDR3:
+        DPRINTF(GIC, "gic distributor read GICD_PIDR3 (%#x)\n", daddr);
         return ((gicdPIDR >> 24) & 0xFF);
       case GICD_IIDR:
          /* revision id is resorted to 1 and variant to 0*/
+        DPRINTF(GIC, "gic distributor read GICD_IIDR (%#x)\n", daddr);
         return gicdIIDR;
       default:
         panic("Tried to read Gic distributor at offset %#x\n", daddr);
@@ -350,8 +367,6 @@ GicV2::readCpu(PacketPtr pkt)
     const ContextID ctx = pkt->req->contextId();
     assert(ctx < sys->threads.numRunning());
 
-    DPRINTF(GIC, "gic cpu read register %#x cpu context: %d\n", daddr, ctx);
-
     pkt->setLE<uint32_t>(readCpu(ctx, daddr));
 
     pkt->makeAtomicResponse();
@@ -363,14 +378,24 @@ GicV2::readCpu(ContextID ctx, Addr daddr)
 {
     switch(daddr) {
       case GICC_IIDR:
+        DPRINTF(GIC, "gic cpu read GICC_IIDR (%#x) cpu context: %d\n",
+                daddr, ctx);
         return giccIIDR;
       case GICC_CTLR:
+        DPRINTF(GIC, "gic cpu read GICC_CTLR (%#x) cpu context: %d\n",
+                daddr, ctx);
         return cpuControl[ctx];
       case GICC_PMR:
+        DPRINTF(GIC, "gic cpu read GICC_PMR (%#x) cpu context: %d\n",
+                daddr, ctx);
         return cpuPriority[ctx];
       case GICC_BPR:
+        DPRINTF(GIC, "gic cpu read GICC_BPR (%#x) cpu context: %d\n",
+                daddr, ctx);
         return cpuBpr[ctx];
       case GICC_IAR:
+        DPRINTF(GIC, "gic cpu read GICC_IAR (%#x) cpu context: %d\n",
+                daddr, ctx);
         if (enabled && cpuEnabled(ctx)) {
             int active_int = cpuHighestInt[ctx];
             IAR iar = 0;
@@ -430,6 +455,8 @@ GicV2::readCpu(ContextID ctx, Addr daddr)
 
         break;
       case GICC_RPR:
+        DPRINTF(GIC, "gic cpu read GICC_RPR (%#x) cpu context: %d\n",
+                daddr, ctx);
         return iccrpr[0];
       case GICC_HPPIR:
         panic("Need to implement HPIR");
@@ -466,9 +493,6 @@ GicV2::writeDistributor(PacketPtr pkt)
               data_sz);
     }
 
-    DPRINTF(GIC, "gic distributor write register %#x size %#x value %#x \n",
-            daddr, data_sz, pkt_data);
-
     writeDistributor(ctx, daddr, pkt_data, data_sz);
 
     pkt->makeAtomicResponse();
@@ -482,6 +506,9 @@ GicV2::writeDistributor(ContextID ctx, Addr daddr, uint32_t data,
     if (GICD_IGROUPR.contains(daddr)) {
         uint32_t ix = (daddr - GICD_IGROUPR.start()) >> 2;
         assert(ix < 32);
+        DPRINTF(GIC,
+            "gic distributor write GICD_IGROUPR%d (%#x) size %#x value %#x \n",
+            ix, daddr, data_sz, data);
         getIntGroup(ctx, ix) |= data;
         return;
     }
@@ -489,6 +516,9 @@ GicV2::writeDistributor(ContextID ctx, Addr daddr, uint32_t data,
     if (GICD_ISENABLER.contains(daddr)) {
         uint32_t ix = (daddr - GICD_ISENABLER.start()) >> 2;
         assert(ix < 32);
+        DPRINTF(GIC, "gic distributor write GICD_ISENABLER%d (%#x) "
+                "size %#x value %#x \n",
+                ix, daddr, data_sz, data);
         getIntEnabled(ctx, ix) |= data;
         return;
     }
@@ -496,12 +526,18 @@ GicV2::writeDistributor(ContextID ctx, Addr daddr, uint32_t data,
     if (GICD_ICENABLER.contains(daddr)) {
         uint32_t ix = (daddr - GICD_ICENABLER.start()) >> 2;
         assert(ix < 32);
+        DPRINTF(GIC, "gic distributor write GICD_ICENABLER%d (%#x) "
+                "size %#x value %#x \n",
+                ix, daddr, data_sz, data);
         getIntEnabled(ctx, ix) &= ~data;
         return;
     }
 
     if (GICD_ISPENDR.contains(daddr)) {
         uint32_t ix = (daddr - GICD_ISPENDR.start()) >> 2;
+        DPRINTF(GIC,
+            "gic distributor write GICD_ISPENDR%d (%#x) size %#x value %#x \n",
+            ix, daddr, data_sz, data);
         auto mask = data;
         if (ix == 0) mask &= SGI_MASK; // Don't allow SGIs to be changed
         getPendingInt(ctx, ix) |= mask;
@@ -511,6 +547,9 @@ GicV2::writeDistributor(ContextID ctx, Addr daddr, uint32_t data,
 
     if (GICD_ICPENDR.contains(daddr)) {
         uint32_t ix = (daddr - GICD_ICPENDR.start()) >> 2;
+        DPRINTF(GIC, "gic distributor write GICD_ICPENDR%d (%#x) "
+                "size %#x value %#x \n",
+                ix, daddr, data_sz, data);
         auto mask = data;
         if (ix == 0) mask &= SGI_MASK; // Don't allow SGIs to be changed
         getPendingInt(ctx, ix) &= ~mask;
@@ -520,18 +559,27 @@ GicV2::writeDistributor(ContextID ctx, Addr daddr, uint32_t data,
 
     if (GICD_ISACTIVER.contains(daddr)) {
         uint32_t ix = (daddr - GICD_ISACTIVER.start()) >> 2;
+        DPRINTF(GIC, "gic distributor write GICD_ISACTIVER%d (%#x) "
+                "size %#x value %#x \n",
+                ix, daddr, data_sz, data);
         getActiveInt(ctx, ix) |= data;
         return;
     }
 
     if (GICD_ICACTIVER.contains(daddr)) {
         uint32_t ix = (daddr - GICD_ICACTIVER.start()) >> 2;
+        DPRINTF(GIC, "gic distributor write GICD_ICACTIVER%d (%#x) "
+                "size %#x value %#x \n",
+                ix, daddr, data_sz, data);
         getActiveInt(ctx, ix) &= ~data;
         return;
     }
 
     if (GICD_IPRIORITYR.contains(daddr)) {
         Addr int_num = daddr - GICD_IPRIORITYR.start();
+        DPRINTF(GIC, "gic distributor write GICD_IPRIORITYR%d (%#x) "
+                "size %#x value %#x\n",
+                (int_num >> 2), daddr, data_sz, data);
         switch(data_sz) {
           case 1:
             getIntPriority(ctx, int_num) = data;
@@ -564,6 +612,9 @@ GicV2::writeDistributor(ContextID ctx, Addr daddr, uint32_t data,
         unsigned offset = SGI_MAX + PPI_MAX;
         if (int_num >= offset) {
             unsigned ix = int_num - offset; // index into cpuTarget array
+            DPRINTF(GIC, "gic distributor write GICD_ITARGETSR%d (%#x) "
+                    "size %#x value %#x\n",
+                    ix, daddr, data_sz, data);
             if (data_sz == 1) {
                 cpuTarget[ix] = data & 0xff;
             } else {
@@ -582,16 +633,22 @@ GicV2::writeDistributor(ContextID ctx, Addr daddr, uint32_t data,
         uint32_t ix = (daddr - GICD_ICFGR.start()) >> 2;
         // Since the GICD_ICFGR0 is RO (WI), we are discarding the write
         // if ix = 0
-        if (ix != 0)
+        if (ix != 0) {
+            DPRINTF(GIC, "gic distributor write GICD_ICFGR%d (%#x) "
+                    "size %#x value %#x\n", ix, daddr, data_sz, data);
             getIntConfig(ctx, ix) = data;
-        if (data & NN_CONFIG_MASK)
+        }
+        if (data & NN_CONFIG_MASK) {
             warn("GIC N:N mode selected and not supported at this time\n");
+        }
         return;
     }
 
     switch(daddr) {
       case GICD_CTLR:
         enabled = data;
+        DPRINTF(GIC, "gic distributor write GICD_CTLR (%#x) ",
+                daddr, data_sz, data);
         DPRINTF(Interrupt, "Distributor enable flag set to = %d\n", enabled);
         break;
       case GICD_TYPER:
@@ -603,6 +660,8 @@ GicV2::writeDistributor(ContextID ctx, Addr daddr, uint32_t data,
                 gem5ExtensionsEnabled ? "enabled" : "disabled");
         break;
       case GICD_SGIR:
+        DPRINTF(GIC, "gic distributor write GICD_SGIR (%#x) ",
+                daddr, data_sz, data);
         softInt(ctx, data);
         break;
       default:
@@ -620,9 +679,6 @@ GicV2::writeCpu(PacketPtr pkt)
     const ContextID ctx = pkt->req->contextId();
     const uint32_t data = pkt->getLE<uint32_t>();
 
-    DPRINTF(GIC, "gic cpu write register cpu:%d %#x val: %#x\n",
-            ctx, daddr, data);
-
     writeCpu(ctx, daddr, data);
 
     pkt->makeAtomicResponse();
@@ -634,19 +690,27 @@ GicV2::writeCpu(ContextID ctx, Addr daddr, uint32_t data)
 {
     switch(daddr) {
       case GICC_CTLR:
+        DPRINTF(GIC, "gic cpu write GICC_CTLR cpu:%d %#x val: %#x\n",
+                ctx, daddr, data);
         cpuControl[ctx] = data;
         break;
       case GICC_PMR:
+        DPRINTF(GIC, "gic cpu write GICC_PMR cpu:%d %#x val: %#x\n",
+                ctx, daddr, data);
         cpuPriority[ctx] = data;
         break;
       case GICC_BPR: {
         auto bpr = data & 0x7;
         if (bpr < GICC_BPR_MINIMUM)
             bpr = GICC_BPR_MINIMUM;
+        DPRINTF(GIC, "gic cpu write GICC_BPR cpu:%d %#x val: %#x\n",
+                ctx, daddr, data);
         cpuBpr[ctx] = bpr;
         break;
       }
       case GICC_EOIR: {
+        DPRINTF(GIC, "gic cpu write GICC_EOIR cpu:%d %#x val: %#x\n",
+                ctx, daddr, data);
         const IAR iar = data;
         if (iar.ack_id < SGI_MAX) {
             // Clear out the bit that corresponds to the cleared int

From ed9effca73175a15c05c49698d0339a52c3886eb Mon Sep 17 00:00:00 2001
From: Richard Cooper <richard.cooper@arm.com>
Date: Thu, 8 Sep 2022 18:10:44 +0100
Subject: [PATCH 331/492] dev-arm: Fix writes to Arm GICv2 GICD_IGROUPRn

Writes to the GICD_IGROUPRn registers are currently applied using the
`|=` operator, allowing bits to be set but not cleared. According to
the specification [1] this register should allow direct writes.

This patch changes the logic to write the new value directly to the
register.

[1] https://developer.arm.com/documentation/ihi0048/latest/

Change-Id: Ia5f17d05530263d7e918ff33576daaf8165c25c2
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/69682
Reviewed-by: Giacomo Travaglini <giacomo.travaglini@arm.com>
Maintainer: Giacomo Travaglini <giacomo.travaglini@arm.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/dev/arm/gic_v2.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/dev/arm/gic_v2.cc b/src/dev/arm/gic_v2.cc
index e60daf08bd..b42b49c6e3 100644
--- a/src/dev/arm/gic_v2.cc
+++ b/src/dev/arm/gic_v2.cc
@@ -509,7 +509,7 @@ GicV2::writeDistributor(ContextID ctx, Addr daddr, uint32_t data,
         DPRINTF(GIC,
             "gic distributor write GICD_IGROUPR%d (%#x) size %#x value %#x \n",
             ix, daddr, data_sz, data);
-        getIntGroup(ctx, ix) |= data;
+        getIntGroup(ctx, ix) = data;
         return;
     }
 

From 324ac185c8a3bc7af84fea82aa2b2b08a01a6d58 Mon Sep 17 00:00:00 2001
From: Richard Cooper <richard.cooper@arm.com>
Date: Thu, 8 Sep 2022 18:30:14 +0100
Subject: [PATCH 332/492] arch-arm: Add an option to use 64-bit PMU counters

Add support for 64-bit PMU counter registers (PMEVCNTR<n>_EL0), as
specified in Armv8-A.

The counter registers are 32-bit by default, but 64-bit counters can
be chosen using the `ArmPMU.use64bitCounters` parameter.

Change-Id: Idb838a7438c7711438a7e078278bed21710049af
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/69683
Reviewed-by: Giacomo Travaglini <giacomo.travaglini@arm.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Maintainer: Giacomo Travaglini <giacomo.travaglini@arm.com>
---
 src/arch/arm/ArmPMU.py | 10 +++++++++-
 src/arch/arm/pmu.cc    |  9 ++++++---
 src/arch/arm/pmu.hh    | 10 +++++++---
 3 files changed, 22 insertions(+), 7 deletions(-)

diff --git a/src/arch/arm/ArmPMU.py b/src/arch/arm/ArmPMU.py
index f21aaff634..3eaed077eb 100644
--- a/src/arch/arm/ArmPMU.py
+++ b/src/arch/arm/ArmPMU.py
@@ -1,5 +1,5 @@
 # -*- mode:python -*-
-# Copyright (c) 2009-2014, 2017, 2020 ARM Limited
+# Copyright (c) 2009-2014, 2017, 2020, 2022 Arm Limited
 # All rights reserved.
 #
 # The license below extends only to copyright in the software and shall
@@ -215,3 +215,11 @@ class ArmPMU(SimObject):
     platform = Param.Platform(Parent.any, "Platform this device is part of.")
     eventCounters = Param.Int(31, "Number of supported PMU counters")
     interrupt = Param.ArmInterruptPin("PMU interrupt")
+
+    # 64-bit PMU event counters are officially supported when
+    # Armv8.5-A FEAT_PMUv3p5 is implemented. This parameter is not a
+    # full implementation of FEAT_PMUv3p5.
+    use64bitCounters = Param.Bool(
+        False,
+        "Choose whether to use 64-bit or " "32-bit PMEVCNTR<n>_EL0 registers.",
+    )
diff --git a/src/arch/arm/pmu.cc b/src/arch/arm/pmu.cc
index f0ab97836a..89dc2c8412 100644
--- a/src/arch/arm/pmu.cc
+++ b/src/arch/arm/pmu.cc
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011-2014, 2017-2019 ARM Limited
+ * Copyright (c) 2011-2014, 2017-2019, 2022 Arm Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -56,12 +56,13 @@ const RegVal PMU::reg_pmcr_wr_mask = 0x39;
 
 PMU::PMU(const ArmPMUParams &p)
     : SimObject(p), BaseISADevice(),
+      use64bitCounters(p.use64bitCounters),
       reg_pmcnten(0), reg_pmcr(0),
       reg_pmselr(0), reg_pminten(0), reg_pmovsr(0),
       reg_pmceid0(0),reg_pmceid1(0),
       clock_remainder(0),
       maximumCounterCount(p.eventCounters),
-      cycleCounter(*this, maximumCounterCount),
+      cycleCounter(*this, maximumCounterCount, p.use64bitCounters),
       cycleCounterEventId(p.cycleEventId),
       swIncrementEvent(nullptr),
       reg_pmcr_conf(0),
@@ -175,7 +176,7 @@ PMU::regProbeListeners()
     // at this stage all probe configurations are done
     // counters can be configured
     for (uint32_t index = 0; index < maximumCounterCount-1; index++) {
-        counters.emplace_back(*this, index);
+        counters.emplace_back(*this, index, use64bitCounters);
     }
 
     std::shared_ptr<PMUEvent> event = getEvent(cycleCounterEventId);
@@ -685,6 +686,7 @@ PMU::serialize(CheckpointOut &cp) const
 {
     DPRINTF(Checkpoint, "Serializing Arm PMU\n");
 
+    SERIALIZE_SCALAR(use64bitCounters);
     SERIALIZE_SCALAR(reg_pmcr);
     SERIALIZE_SCALAR(reg_pmcnten);
     SERIALIZE_SCALAR(reg_pmselr);
@@ -705,6 +707,7 @@ PMU::unserialize(CheckpointIn &cp)
 {
     DPRINTF(Checkpoint, "Unserializing Arm PMU\n");
 
+    UNSERIALIZE_SCALAR(use64bitCounters);
     UNSERIALIZE_SCALAR(reg_pmcr);
     UNSERIALIZE_SCALAR(reg_pmcnten);
     UNSERIALIZE_SCALAR(reg_pmselr);
diff --git a/src/arch/arm/pmu.hh b/src/arch/arm/pmu.hh
index 46b10d0a8a..ec60c6b7f5 100644
--- a/src/arch/arm/pmu.hh
+++ b/src/arch/arm/pmu.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011-2014, 2017-2018 ARM Limited
+ * Copyright (c) 2011-2014, 2017-2018, 2022 Arm Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -413,9 +413,10 @@ class PMU : public SimObject, public ArmISA::BaseISADevice
     /** State of a counter within the PMU. **/
     struct CounterState : public Serializable
     {
-        CounterState(PMU &pmuReference, uint64_t counter_id)
+        CounterState(PMU &pmuReference, uint64_t counter_id,
+                     const bool is_64_bit)
             : eventId(0), filter(0), enabled(false),
-              overflow64(false), sourceEvent(nullptr),
+              overflow64(is_64_bit), sourceEvent(nullptr),
               counterId(counter_id), value(0), resetValue(false),
               pmu(pmuReference) {}
 
@@ -572,6 +573,9 @@ class PMU : public SimObject, public ArmISA::BaseISADevice
     void updateAllCounters();
 
   protected: /* State that needs to be serialized */
+    /** Determine whether to use 64-bit or 32-bit counters. */
+    bool use64bitCounters;
+
     /** Performance Monitor Count Enable Register */
     RegVal reg_pmcnten;
 

From a83f699f1d35a1bb483d9f493f250c388178bb0f Mon Sep 17 00:00:00 2001
From: Richard Cooper <richard.cooper@arm.com>
Date: Thu, 8 Sep 2022 18:46:16 +0100
Subject: [PATCH 333/492] configs: Add Tarmac tracing option to the simple Arm
 configs

gem5 supports Tarmac trace generation for Arm simulations, but there
are no examples of how to use this feature.

This patch adds a `--tarmac-gen` option to three of the simple Arm
configs. Tarmac generation is useful for out-of-the-box users, and
this patch also provides an example of how to use the Tarmac
generation feature.

Change-Id: I0d3c523b5c0bb6d94de93bc502e4451622fb635d
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/69684
Reviewed-by: Giacomo Travaglini <giacomo.travaglini@arm.com>
Maintainer: Giacomo Travaglini <giacomo.travaglini@arm.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 configs/example/arm/baremetal.py  | 21 ++++++++++-
 configs/example/arm/devices.py    | 61 +++++++++++++++++++++++++------
 configs/example/arm/starter_fs.py | 19 +++++++++-
 configs/example/arm/starter_se.py | 21 ++++++++++-
 4 files changed, 106 insertions(+), 16 deletions(-)

diff --git a/configs/example/arm/baremetal.py b/configs/example/arm/baremetal.py
index 0072c1d629..4af1ff17c9 100644
--- a/configs/example/arm/baremetal.py
+++ b/configs/example/arm/baremetal.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2016-2017,2019-2021 ARM Limited
+# Copyright (c) 2016-2017,2019-2022 Arm Limited
 # All rights reserved.
 #
 # The license below extends only to copyright in the software and shall
@@ -123,7 +123,13 @@ def create(args):
     # Add CPU clusters to the system
     system.cpu_cluster = [
         devices.ArmCpuCluster(
-            system, args.num_cores, args.cpu_freq, "1.0V", *cpu_types[args.cpu]
+            system,
+            args.num_cores,
+            args.cpu_freq,
+            "1.0V",
+            *cpu_types[args.cpu],
+            tarmac_gen=args.tarmac_gen,
+            tarmac_dest=args.tarmac_dest,
         )
     ]
 
@@ -230,6 +236,17 @@ def main():
     )
     parser.add_argument("--checkpoint", action="store_true")
     parser.add_argument("--restore", type=str, default=None)
+    parser.add_argument(
+        "--tarmac-gen",
+        action="store_true",
+        help="Write a Tarmac trace.",
+    )
+    parser.add_argument(
+        "--tarmac-dest",
+        choices=TarmacDump.vals,
+        default="stdoutput",
+        help="Destination for the Tarmac trace output. [Default: stdoutput]",
+    )
     parser.add_argument(
         "--dtb-gen",
         action="store_true",
diff --git a/configs/example/arm/devices.py b/configs/example/arm/devices.py
index 3f005a49aa..02574d2802 100644
--- a/configs/example/arm/devices.py
+++ b/configs/example/arm/devices.py
@@ -106,6 +106,8 @@ class ArmCpuCluster(CpuCluster):
         l1i_type,
         l1d_type,
         l2_type,
+        tarmac_gen=False,
+        tarmac_dest=None,
     ):
         super().__init__()
         self._cpu_type = cpu_type
@@ -122,6 +124,12 @@ class ArmCpuCluster(CpuCluster):
 
         self.generate_cpus(cpu_type, num_cpus)
 
+        for cpu in self.cpus:
+            if tarmac_gen:
+                cpu.tracer = TarmacTracer()
+                if tarmac_dest is not None:
+                    cpu.tracer.outfile = tarmac_dest
+
         system.addCpuCluster(self)
 
     def addL1(self):
@@ -177,23 +185,54 @@ class ArmCpuCluster(CpuCluster):
 
 
 class AtomicCluster(ArmCpuCluster):
-    def __init__(self, system, num_cpus, cpu_clock, cpu_voltage="1.0V"):
-        cpu_config = [
-            ObjectList.cpu_list.get("AtomicSimpleCPU"),
-            None,
-            None,
-            None,
-        ]
-        super().__init__(system, num_cpus, cpu_clock, cpu_voltage, *cpu_config)
+    def __init__(
+        self,
+        system,
+        num_cpus,
+        cpu_clock,
+        cpu_voltage="1.0V",
+        tarmac_gen=False,
+        tarmac_dest=None,
+    ):
+        super().__init__(
+            system,
+            num_cpus,
+            cpu_clock,
+            cpu_voltage,
+            cpu_type=ObjectList.cpu_list.get("AtomicSimpleCPU"),
+            l1i_type=None,
+            l1d_type=None,
+            l2_type=None,
+            tarmac_gen=tarmac_gen,
+            tarmac_dest=tarmac_dest,
+        )
 
     def addL1(self):
         pass
 
 
 class KvmCluster(ArmCpuCluster):
-    def __init__(self, system, num_cpus, cpu_clock, cpu_voltage="1.0V"):
-        cpu_config = [ObjectList.cpu_list.get("ArmV8KvmCPU"), None, None, None]
-        super().__init__(system, num_cpus, cpu_clock, cpu_voltage, *cpu_config)
+    def __init__(
+        self,
+        system,
+        num_cpus,
+        cpu_clock,
+        cpu_voltage="1.0V",
+        tarmac_gen=False,
+        tarmac_dest=None,
+    ):
+        super().__init__(
+            system,
+            num_cpus,
+            cpu_clock,
+            cpu_voltage,
+            cpu_type=ObjectList.cpu_list.get("ArmV8KvmCPU"),
+            l1i_type=None,
+            l1d_type=None,
+            l2_type=None,
+            tarmac_gen=tarmac_gen,
+            tarmac_dest=tarmac_dest,
+        )
 
     def addL1(self):
         pass
diff --git a/configs/example/arm/starter_fs.py b/configs/example/arm/starter_fs.py
index 48cbbdb3e6..cc5f63f554 100644
--- a/configs/example/arm/starter_fs.py
+++ b/configs/example/arm/starter_fs.py
@@ -129,7 +129,13 @@ def create(args):
     # Add CPU clusters to the system
     system.cpu_cluster = [
         devices.ArmCpuCluster(
-            system, args.num_cores, args.cpu_freq, "1.0V", *cpu_types[args.cpu]
+            system,
+            args.num_cores,
+            args.cpu_freq,
+            "1.0V",
+            *cpu_types[args.cpu],
+            tarmac_gen=args.tarmac_gen,
+            tarmac_dest=args.tarmac_dest,
         )
     ]
 
@@ -257,6 +263,17 @@ def main():
         default="2GB",
         help="Specify the physical memory size",
     )
+    parser.add_argument(
+        "--tarmac-gen",
+        action="store_true",
+        help="Write a Tarmac trace.",
+    )
+    parser.add_argument(
+        "--tarmac-dest",
+        choices=TarmacDump.vals,
+        default="stdoutput",
+        help="Destination for the Tarmac trace output. [Default: stdoutput]",
+    )
     parser.add_argument("--checkpoint", action="store_true")
     parser.add_argument("--restore", type=str, default=None)
 
diff --git a/configs/example/arm/starter_se.py b/configs/example/arm/starter_se.py
index 6b4dce9d64..33514c7b78 100644
--- a/configs/example/arm/starter_se.py
+++ b/configs/example/arm/starter_se.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2016-2017, 2023 ARM Limited
+# Copyright (c) 2016-2017, 2022-2023 Arm Limited
 # All rights reserved.
 #
 # The license below extends only to copyright in the software and shall
@@ -96,7 +96,13 @@ class SimpleSeSystem(System):
         # Add CPUs to the system. A cluster of CPUs typically have
         # private L1 caches and a shared L2 cache.
         self.cpu_cluster = devices.ArmCpuCluster(
-            self, args.num_cores, args.cpu_freq, "1.2V", *cpu_types[args.cpu]
+            self,
+            args.num_cores,
+            args.cpu_freq,
+            "1.2V",
+            *cpu_types[args.cpu],
+            tarmac_gen=args.tarmac_gen,
+            tarmac_dest=args.tarmac_dest,
         )
 
         # Create a cache hierarchy (unless we are simulating a
@@ -215,6 +221,17 @@ def main():
         default="2GB",
         help="Specify the physical memory size",
     )
+    parser.add_argument(
+        "--tarmac-gen",
+        action="store_true",
+        help="Write a Tarmac trace.",
+    )
+    parser.add_argument(
+        "--tarmac-dest",
+        choices=TarmacDump.vals,
+        default="stdoutput",
+        help="Destination for the Tarmac trace output. [Default: stdoutput]",
+    )
 
     args = parser.parse_args()
 

From 5138092607ea86b9453f9e6889365fcddb206b20 Mon Sep 17 00:00:00 2001
From: Richard Cooper <richard.cooper@arm.com>
Date: Thu, 8 Sep 2022 20:57:03 +0100
Subject: [PATCH 334/492] configs: Make the configuration of the gicv4
 parameter robust

Only the GICv3 model has a `gicv4` parameter, causing the current
`baremetal.py` config to throw an exception when used with the
VExpress_GEM5_V1 platform containing a GICv2.

This patch checks for the existence of the `gicv4` parameter, allowing
all VExpress platforms to be used.

Change-Id: I72667a9caee64fa497bda516217cd424050eb242
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/69685
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Giacomo Travaglini <giacomo.travaglini@arm.com>
Maintainer: Giacomo Travaglini <giacomo.travaglini@arm.com>
---
 configs/example/arm/baremetal.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/configs/example/arm/baremetal.py b/configs/example/arm/baremetal.py
index 4af1ff17c9..345596d6e1 100644
--- a/configs/example/arm/baremetal.py
+++ b/configs/example/arm/baremetal.py
@@ -142,7 +142,8 @@ def create(args):
     system.auto_reset_addr = True
 
     # Using GICv3
-    system.realview.gic.gicv4 = False
+    if hasattr(system.realview.gic, "gicv4"):
+        system.realview.gic.gicv4 = False
 
     system.highest_el_is_64 = True
 

From c8496d8c4d58a27ee91fc905957cce8f5486075b Mon Sep 17 00:00:00 2001
From: Richard Cooper <richard.cooper@arm.com>
Date: Tue, 4 Oct 2022 10:51:38 +0100
Subject: [PATCH 335/492] configs: Add the O3 CPU as an option to baremetal.py

Adds the O3_ARM_v7a CPU model as an extra option for the `--cpu-type`
to `configs/example/arm/baremetal.py`.

Change-Id: I717b168945bec22fb5ae17e37c2854df844bcb4f
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/69686
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Giacomo Travaglini <giacomo.travaglini@arm.com>
Maintainer: Giacomo Travaglini <giacomo.travaglini@arm.com>
---
 configs/example/arm/baremetal.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/configs/example/arm/baremetal.py b/configs/example/arm/baremetal.py
index 345596d6e1..9caab9db84 100644
--- a/configs/example/arm/baremetal.py
+++ b/configs/example/arm/baremetal.py
@@ -52,6 +52,7 @@ from common import SysPaths
 from common import MemConfig
 from common import ObjectList
 from common.cores.arm import HPI
+from common.cores.arm import O3_ARM_v7a
 
 import devices
 import workloads
@@ -63,6 +64,12 @@ cpu_types = {
     "atomic": (AtomicSimpleCPU, None, None, None),
     "minor": (MinorCPU, devices.L1I, devices.L1D, devices.L2),
     "hpi": (HPI.HPI, HPI.HPI_ICache, HPI.HPI_DCache, HPI.HPI_L2),
+    "o3": (
+        O3_ARM_v7a.O3_ARM_v7a_3,
+        O3_ARM_v7a.O3_ARM_v7a_ICache,
+        O3_ARM_v7a.O3_ARM_v7a_DCache,
+        O3_ARM_v7a.O3_ARM_v7aL2,
+    ),
 }
 
 

From 80eb8be3cfd93255120c6338296366999317973f Mon Sep 17 00:00:00 2001
From: Richard Cooper <richard.cooper@arm.com>
Date: Fri, 9 Sep 2022 12:36:43 +0100
Subject: [PATCH 336/492] configs: Update Arm simple configs to enable
 --interactive option

Removed the calls to `sys.exit()` from the Arm simple configs. These
calls terminate gem5's embedded Python interpreter and gem5 at the end
of the config script, preventing gem5 from dropping into the
interactive IPython shell when the `--interactive` option has been
specified.

Change-Id: I0c350b0d107f297691255361d25c566c889f9469
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/69687
Reviewed-by: Giacomo Travaglini <giacomo.travaglini@arm.com>
Maintainer: Giacomo Travaglini <giacomo.travaglini@arm.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 configs/example/arm/baremetal.py  | 4 +---
 configs/example/arm/starter_fs.py | 4 +---
 configs/example/arm/starter_se.py | 3 +--
 3 files changed, 3 insertions(+), 8 deletions(-)

diff --git a/configs/example/arm/baremetal.py b/configs/example/arm/baremetal.py
index 9caab9db84..ab24fd3d62 100644
--- a/configs/example/arm/baremetal.py
+++ b/configs/example/arm/baremetal.py
@@ -174,11 +174,9 @@ def run(args):
             m5.checkpoint(os.path.join(cpt_dir))
             print("Checkpoint done.")
         else:
-            print(exit_msg, " @ ", m5.curTick())
+            print(f"{exit_msg} ({event.getCode()}) @ {m5.curTick()}")
             break
 
-    sys.exit(event.getCode())
-
 
 def main():
     parser = argparse.ArgumentParser(epilog=__doc__)
diff --git a/configs/example/arm/starter_fs.py b/configs/example/arm/starter_fs.py
index cc5f63f554..ebed18864d 100644
--- a/configs/example/arm/starter_fs.py
+++ b/configs/example/arm/starter_fs.py
@@ -194,11 +194,9 @@ def run(args):
             m5.checkpoint(os.path.join(cpt_dir))
             print("Checkpoint done.")
         else:
-            print(exit_msg, " @ ", m5.curTick())
+            print(f"{exit_msg} ({event.getCode()}) @ {m5.curTick()}")
             break
 
-    sys.exit(event.getCode())
-
 
 def main():
     parser = argparse.ArgumentParser(epilog=__doc__)
diff --git a/configs/example/arm/starter_se.py b/configs/example/arm/starter_se.py
index 33514c7b78..f21f399675 100644
--- a/configs/example/arm/starter_se.py
+++ b/configs/example/arm/starter_se.py
@@ -257,8 +257,7 @@ def main():
     # Print the reason for the simulation exit. Some exit codes are
     # requests for service (e.g., checkpoints) from the simulation
     # script. We'll just ignore them here and exit.
-    print(event.getCause(), " @ ", m5.curTick())
-    sys.exit(event.getCode())
+    print(f"{event.getCause()} ({event.getCode()}) @ {m5.curTick()}")
 
 
 if __name__ == "__m5_main__":

From 9ec1b939808a8538d9ae581c709b6ea4c3fc7b5d Mon Sep 17 00:00:00 2001
From: Richard Cooper <richard.cooper@arm.com>
Date: Tue, 21 Mar 2023 13:39:18 +0000
Subject: [PATCH 337/492] configs: Add --exit-on-uart-eot flag to Arm
 baremetal.py config

Many benchmarks signal their termination by writing an EOT character
to the UART. This change adds an option to the Arm `baremetal.py`
example script to exit the simulation when an EOT character is
detected on any of the UARTs.

Change-Id: Ibfce9800c47090714258dbdbc5d6cee5ee6fb952
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/69688
Reviewed-by: Giacomo Travaglini <giacomo.travaglini@arm.com>
Maintainer: Giacomo Travaglini <giacomo.travaglini@arm.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 configs/example/arm/baremetal.py | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/configs/example/arm/baremetal.py b/configs/example/arm/baremetal.py
index ab24fd3d62..8ffd2b48e0 100644
--- a/configs/example/arm/baremetal.py
+++ b/configs/example/arm/baremetal.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2016-2017,2019-2022 Arm Limited
+# Copyright (c) 2016-2017,2019-2023 Arm Limited
 # All rights reserved.
 #
 # The license below extends only to copyright in the software and shall
@@ -157,6 +157,10 @@ def create(args):
     workload_class = workloads.workload_list.get(args.workload)
     system.workload = workload_class(object_file, system)
 
+    if args.exit_on_uart_eot:
+        for uart in system.realview.uart:
+            uart.end_on_eot = True
+
     return system
 
 
@@ -253,6 +257,12 @@ def main():
         default="stdoutput",
         help="Destination for the Tarmac trace output. [Default: stdoutput]",
     )
+    parser.add_argument(
+        "--exit-on-uart-eot",
+        action="store_true",
+        help="Exit simulation if any of the UARTs receive an EOT. Many "
+        "workloads signal termination by sending an EOT character.",
+    )
     parser.add_argument(
         "--dtb-gen",
         action="store_true",

From 851e469e55b69533a36de634bd1d1f424b31b07e Mon Sep 17 00:00:00 2001
From: Hoa Nguyen <hoanguyen@ucdavis.edu>
Date: Thu, 13 Apr 2023 14:20:23 -0700
Subject: [PATCH 338/492] scons: Add "--no-duplicate-sources" option to
 SConstruct in util/

Patch [1] caused building util/m5 to fail due to the flag was not
an option in the SConstruct file. It is apparently the case for other
programs in util/ relying on scons.

This patch fixes the above problem, and also adheres to the default
behavior introduced by [2].

[1] This patch introduced the "--no-duplicate-sources" flag to the
scons build in util/
https://gem5-review.googlesource.com/c/public/gem5/+/68518

[2] This patch turns this flag off by default,
https://gem5-review.googlesource.com/c/public/gem5/+/69717

Change-Id: I51376f7b3bf06438b7bc7ff84bc599deecac5bd1
Signed-off-by: Hoa Nguyen <hoanguyen@ucdavis.edu>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/69797
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
Reviewed-by: Alex Richardson <alexrichardson@google.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Ayaz Akram <yazakram@ucdavis.edu>
---
 util/m5/SConstruct         | 3 +++
 util/statetrace/SConstruct | 6 ++++++
 util/tlm/SConstruct        | 6 ++++++
 3 files changed, 15 insertions(+)

diff --git a/util/m5/SConstruct b/util/m5/SConstruct
index c2c4a50a95..7f07b942fc 100644
--- a/util/m5/SConstruct
+++ b/util/m5/SConstruct
@@ -49,6 +49,9 @@ AddOption('--debug-build', dest='debug_build', action='store_true',
 AddOption('--run-tests', dest='run_tests', action='store_true',
           help='Enable test output xml files as build targets.')
 AddOption('--verbose', dest='verbose', action='store_true')
+AddOption('--no-duplicate-sources', action='store_false', default=True,
+          dest='duplicate_sources',
+          help='Do not create symlinks to sources in the build directory')
 
 # Universal settings.
 if GetOption('debug_build'):
diff --git a/util/statetrace/SConstruct b/util/statetrace/SConstruct
index 945976e8c5..2e1a631a10 100644
--- a/util/statetrace/SConstruct
+++ b/util/statetrace/SConstruct
@@ -57,6 +57,12 @@ main['CXXFLAGS'] = "-O3 -ggdb $_CPPINCFLAGS"
 
 main['CXX'] = ARGUMENTS.get('CXX', main['CXX'])
 
+# An option not to link source files in the build directory.
+# Not enabled by default.
+AddOption('--no-duplicate-sources', action='store_false', default=True,
+          dest='duplicate_sources',
+          help='Do not create symlinks to sources in the build directory')
+
 for arch in arches:
     env = main.Clone()
     env['CXX'] = ARGUMENTS.get(arch.upper() + 'CXX', env['CXX'])
diff --git a/util/tlm/SConstruct b/util/tlm/SConstruct
index 6c65cfddfa..3f4abf8b08 100644
--- a/util/tlm/SConstruct
+++ b/util/tlm/SConstruct
@@ -80,6 +80,12 @@ sys.path.append(gem5_root + '/src/python')
 AddOption('--no-colors', dest='use_colors', action='store_false',
           help="Don't add color to abbreviated scons output")
 
+# An option not to link source files in the build directory.
+# Not enabled by default.
+AddOption('--no-duplicate-sources', action='store_false', default=True,
+          dest='duplicate_sources',
+          help='Do not create symlinks to sources in the build directory')
+
 env.SConsignFile('build/systemc/sconsign')
 SConscript(gem5_root + '/ext/systemc/SConscript',
            variant_dir='build/systemc',

From 76d1d024dac2300989c7c9beaaa33ba9983e84e4 Mon Sep 17 00:00:00 2001
From: "Bobby R. Bruce" <bbruce@ucdavis.edu>
Date: Wed, 15 Mar 2023 16:29:59 -0700
Subject: [PATCH 339/492] stdlib: Fix SwitchableProcessor use in SE mode

The SwitchableProcessors in the standard library have switched-in and
switched-out cores. The `get_cores` API in the stdlib only returns
switched-in cores. In most uses this is desirable.

In the case of setting workloads in SE mode it's necessary to set the
workload to every core, switched-in and switched-out. As the `get_cores`
function was used for this, SwitchableProcessors were failing when used
in SE Mode.

This patch checks the processor type and, if a SwitchableProcessor, uses
the SwitchableProcessor's special `_all_cores` function which gets all
the cores, regardless as to their switched-in/switched-out status.

Issue-on: https://gem5.atlassian.net/browse/GEM5-1320

Change-Id: I0b7a699ac6196e827667955bef7afa37b2648744
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/68997
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
---
 .../components/boards/se_binary_workload.py   | 21 +++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)

diff --git a/src/python/gem5/components/boards/se_binary_workload.py b/src/python/gem5/components/boards/se_binary_workload.py
index 98fe840a27..c62a1b67ea 100644
--- a/src/python/gem5/components/boards/se_binary_workload.py
+++ b/src/python/gem5/components/boards/se_binary_workload.py
@@ -35,6 +35,8 @@ from ...resources.resource import (
     SimpointDirectoryResource,
 )
 
+from ..processors.switchable_processor import SwitchableProcessor
+
 from gem5.resources.elfie import ELFieInfo
 from gem5.resources.looppoint import Looppoint
 
@@ -112,8 +114,23 @@ class SEBinaryWorkload:
         if env_list is not None:
             process.env = env_list
 
-        for core in self.get_processor().get_cores():
-            core.set_workload(process)
+        if isinstance(self.get_processor(), SwitchableProcessor):
+            # This is a hack to get switchable processors working correctly in
+            # SE mode. The "get_cores" API for processors only gets the current
+            # switched-in cores and, in most cases, this is what the script
+            # required. In the case there are switched-out cores via the
+            # SwitchableProcessor, we sometimes need to apply things to ALL
+            # cores (switched-in or switched-out). In this case we have an
+            # `__all_cores` function. Here we must apply the process to every
+            # core.
+            #
+            # A better API for this which avoids `isinstance` checks would be
+            # welcome.
+            for core in self.get_processor()._all_cores():
+                core.set_workload(process)
+        else:
+            for core in self.get_processor().get_cores():
+                core.set_workload(process)
 
         # Set whether to exit on work items for the se_workload
         self.exit_on_work_items = exit_on_work_items

From 09023d4158703a33f483593ffa76001a805d015b Mon Sep 17 00:00:00 2001
From: Hoa Nguyen <hoanguyen@ucdavis.edu>
Date: Sun, 16 Apr 2023 19:36:27 +0000
Subject: [PATCH 340/492] mem-ruby: Not flushing data to memory when there's no
 dirty block

Currently, taking a checkpoint with a ruby cache involves moving all
the dirty data in cache to memory. This is done by keeping **only**
simulating the cache until all dirty data are flushed to the memory
before taking the checkpoint.

However, when the cache does not have dirty data, it is a problem if
we keep simulating the cache. E.g., calling checkpoint caused the gem5
"empty event queue" assertion fault when running the ruby cache in
atomic_noncaching mode. Since the mode bypasses the cache, all blocks
are invalid and do not contain dirty data. Subsequently, there is no
event placed to the event queue when we keep **only** simulating the
cache before taking the checkpoint.

This patch fixes this problem by checking if there is any actionable
item when trying to move dirty data to memory. If there is no block
contains dirty data, we simply choose not to continue simulating the
cache before taking the checkpoint.

Change-Id: Idfa09be51274c7fc8a340e9e33167f5b32d1b866
Signed-off-by: Hoa Nguyen <hoanguyen@ucdavis.edu>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/69897
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
---
 src/mem/ruby/system/CacheRecorder.cc | 6 ++++++
 src/mem/ruby/system/CacheRecorder.hh | 2 ++
 src/mem/ruby/system/RubySystem.cc    | 7 +++++++
 3 files changed, 15 insertions(+)

diff --git a/src/mem/ruby/system/CacheRecorder.cc b/src/mem/ruby/system/CacheRecorder.cc
index e87b3f20b2..20a8a30ebc 100644
--- a/src/mem/ruby/system/CacheRecorder.cc
+++ b/src/mem/ruby/system/CacheRecorder.cc
@@ -207,5 +207,11 @@ CacheRecorder::aggregateRecords(uint8_t **buf, uint64_t total_size)
     return current_size;
 }
 
+uint64_t
+CacheRecorder::getNumRecords() const
+{
+    return m_records.size();
+}
+
 } // namespace ruby
 } // namespace gem5
diff --git a/src/mem/ruby/system/CacheRecorder.hh b/src/mem/ruby/system/CacheRecorder.hh
index 8dbd67f3ff..be95590313 100644
--- a/src/mem/ruby/system/CacheRecorder.hh
+++ b/src/mem/ruby/system/CacheRecorder.hh
@@ -85,6 +85,8 @@ class CacheRecorder
 
     uint64_t aggregateRecords(uint8_t **data, uint64_t size);
 
+    uint64_t getNumRecords() const;
+
     /*!
      * Function for flushing the memory contents of the caches to the
      * main memory. It goes through the recorded contents of the caches,
diff --git a/src/mem/ruby/system/RubySystem.cc b/src/mem/ruby/system/RubySystem.cc
index 5a81513720..b38c903b09 100644
--- a/src/mem/ruby/system/RubySystem.cc
+++ b/src/mem/ruby/system/RubySystem.cc
@@ -218,6 +218,13 @@ RubySystem::memWriteback()
     }
     DPRINTF(RubyCacheTrace, "Cache Trace Complete\n");
 
+    // If there is no dirty block, we don't need to flush the cache
+    if (m_cache_recorder->getNumRecords() == 0)
+    {
+        m_cooldown_enabled = false;
+        return;
+    }
+
     // save the current tick value
     Tick curtick_original = curTick();
     DPRINTF(RubyCacheTrace, "Recording current tick %ld\n", curtick_original);

From e7ae5290f56cb8b729b36ff882ea9861e7cb91e7 Mon Sep 17 00:00:00 2001
From: Yen-lin Lai <yenlinlai@google.com>
Date: Thu, 13 Apr 2023 10:45:58 +0800
Subject: [PATCH 341/492] base: Fix VNC server initialization

In a previous commit, the initialization of dataFd to -1 is removed.
Add it back so VNC server can properly accept connection.

Fixes: 67fb75d659 ("base,cpu,dev,sim: Pull common logic into ListenSocket::listen().")
Change-Id: I4246d1fddc766cb190a04d4f984fc1ce73af3fb0
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/69757
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Gabe Black <gabe.black@gmail.com>
Maintainer: Gabe Black <gabe.black@gmail.com>
---
 src/base/vnc/vncserver.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/base/vnc/vncserver.cc b/src/base/vnc/vncserver.cc
index f34241968e..4b1ddae307 100644
--- a/src/base/vnc/vncserver.cc
+++ b/src/base/vnc/vncserver.cc
@@ -117,7 +117,7 @@ VncServer::DataEvent::process(int revent)
  */
 VncServer::VncServer(const Params &p)
     : VncInput(p), listenEvent(NULL), dataEvent(NULL), number(p.number),
-      listener(p.port.build(p.name)),
+      dataFd(-1), listener(p.port.build(p.name)),
       sendUpdate(false), supportsRawEnc(false), supportsResizeEnc(false)
 {
     if (p.port)

From c127a38f485ff22adabe07015dd201cf5a110f9b Mon Sep 17 00:00:00 2001
From: Melissa Jost <melissakjost@gmail.com>
Date: Fri, 14 Apr 2023 17:02:01 -0700
Subject: [PATCH 342/492] base: Use <experimental/filesystem> include for GCC
 v7

This change adds include guards to the inclusion of the
filesystem module to ensure GCC 7 can work properly,
addressing an issue in the compiler tests.

Change-Id: I642f79bd801baf4766572368b9339e34be46d1c3
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/69840
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Reviewed-by: Richard Cooper <richard.cooper@arm.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
---
 src/mem/shared_memory_server.cc | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/src/mem/shared_memory_server.cc b/src/mem/shared_memory_server.cc
index 3e49164e6d..f99655c475 100644
--- a/src/mem/shared_memory_server.cc
+++ b/src/mem/shared_memory_server.cc
@@ -39,7 +39,18 @@
 #include <algorithm>
 #include <cerrno>
 #include <cstring>
-#include <filesystem>
+#if (defined(__GNUC__) && (__GNUC__ >= 8)) || defined(__clang__)
+    #include <filesystem>
+#else
+    // This is only reachable if we're using GCC 7 (note: gem5 does not support
+    // GCC versions older than GCC 7 as they do not support the C++17
+    // standard).
+    // If we're using GCC 7, we need to use <experimental/filesystem>.
+    #include <experimental/filesystem>
+    namespace std {
+        namespace filesystem = experimental::filesystem;
+    }
+#endif
 
 #include "base/logging.hh"
 #include "base/output.hh"

From 2f3f73a098a79d4edc7377a69ac6180fe08bfa3d Mon Sep 17 00:00:00 2001
From: Matthew Poremba <matthew.poremba@amd.com>
Date: Thu, 20 Apr 2023 13:12:10 -0500
Subject: [PATCH 343/492] configs: Use higher dmesg level for GPUFS

The dmesg level is currently set to 3 which will not display errors if
the amdgpu driver fails to load. Changing to level 8 will show errors in
the gem5 terminal and is not too spammy. This will help GPUFS developers
with bug reports since we would actually be able to observe an error.
Currently if the driver fails to load, there is no way to detect it and
applications will attempt to run, usually failing on getting device
properties.

Change-Id: I56b9581c1a12a8ce329066d18d6a072d006c096d
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/69977
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com>
Maintainer: Matt Sinclair <mattdsinclair@gmail.com>
---
 configs/example/gpufs/hip_cookbook.py | 2 +-
 configs/example/gpufs/hip_rodinia.py  | 2 +-
 configs/example/gpufs/hip_samples.py  | 2 +-
 configs/example/gpufs/vega10_kvm.py   | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/configs/example/gpufs/hip_cookbook.py b/configs/example/gpufs/hip_cookbook.py
index 87c7547dd3..6a7bb428db 100644
--- a/configs/example/gpufs/hip_cookbook.py
+++ b/configs/example/gpufs/hip_cookbook.py
@@ -42,7 +42,7 @@ from ruby import Ruby
 cookbook_runscript = """\
 export LD_LIBRARY_PATH=/opt/rocm/lib:$LD_LIBRARY_PATH
 export HSA_ENABLE_INTERRUPT=0
-dmesg -n3
+dmesg -n8
 dd if=/root/roms/vega10.rom of=/dev/mem bs=1k seek=768 count=128
 if [ ! -f /lib/modules/`uname -r`/updates/dkms/amdgpu.ko ]; then
     echo "ERROR: Missing DKMS package for kernel `uname -r`. Exiting gem5."
diff --git a/configs/example/gpufs/hip_rodinia.py b/configs/example/gpufs/hip_rodinia.py
index 8ed951b55e..b8a7858fcd 100644
--- a/configs/example/gpufs/hip_rodinia.py
+++ b/configs/example/gpufs/hip_rodinia.py
@@ -43,7 +43,7 @@ from ruby import Ruby
 rodinia_runscript = """\
 export LD_LIBRARY_PATH=/opt/rocm/lib:$LD_LIBRARY_PATH
 export HSA_ENABLE_INTERRUPT=0
-dmesg -n3
+dmesg -n8
 dd if=/root/roms/vega10.rom of=/dev/mem bs=1k seek=768 count=128
 if [ ! -f /lib/modules/`uname -r`/updates/dkms/amdgpu.ko ]; then
     echo "ERROR: Missing DKMS package for kernel `uname -r`. Exiting gem5."
diff --git a/configs/example/gpufs/hip_samples.py b/configs/example/gpufs/hip_samples.py
index ccc1719639..9f83c2550e 100644
--- a/configs/example/gpufs/hip_samples.py
+++ b/configs/example/gpufs/hip_samples.py
@@ -42,7 +42,7 @@ from ruby import Ruby
 samples_runscript = """\
 export LD_LIBRARY_PATH=/opt/rocm/lib:$LD_LIBRARY_PATH
 export HSA_ENABLE_INTERRUPT=0
-dmesg -n3
+dmesg -n8
 dd if=/root/roms/vega10.rom of=/dev/mem bs=1k seek=768 count=128
 if [ ! -f /lib/modules/`uname -r`/updates/dkms/amdgpu.ko ]; then
     echo "ERROR: Missing DKMS package for kernel `uname -r`. Exiting gem5."
diff --git a/configs/example/gpufs/vega10_kvm.py b/configs/example/gpufs/vega10_kvm.py
index 54253bece5..9c7e4578f2 100644
--- a/configs/example/gpufs/vega10_kvm.py
+++ b/configs/example/gpufs/vega10_kvm.py
@@ -44,7 +44,7 @@ from ruby import Ruby
 demo_runscript = """\
 export LD_LIBRARY_PATH=/opt/rocm/lib:$LD_LIBRARY_PATH
 export HSA_ENABLE_INTERRUPT=0
-dmesg -n3
+dmesg -n8
 dd if=/root/roms/vega10.rom of=/dev/mem bs=1k seek=768 count=128
 if [ ! -f /lib/modules/`uname -r`/updates/dkms/amdgpu.ko ]; then
     echo "ERROR: Missing DKMS package for kernel `uname -r`. Exiting gem5."

From 70ef9b219ca7aa6aaf05f163dae20ff0dc259d13 Mon Sep 17 00:00:00 2001
From: Matthew Poremba <matthew.poremba@amd.com>
Date: Thu, 20 Apr 2023 13:16:36 -0500
Subject: [PATCH 344/492] configs: Add simple check for valid GPU MMIO trace

This file is a required input to the simulator for GPUFS. There seems to
be confusion from several users who are not providing this input. This
usually results in the amdgpu driver failing to load, leading to the
application under test exiting along with it.

This changeset adds a simple md5 hashsum check to compare against the
known good MMIO trace located in the gem5-resources repository.

Change-Id: I59819fc795a6bc4bc6badbd4d120db1246498987
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/69978
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com>
Maintainer: Matt Sinclair <mattdsinclair@gmail.com>
---
 configs/example/gpufs/runfs.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/configs/example/gpufs/runfs.py b/configs/example/gpufs/runfs.py
index 4a28068a11..52b79abfaa 100644
--- a/configs/example/gpufs/runfs.py
+++ b/configs/example/gpufs/runfs.py
@@ -30,6 +30,7 @@
 # System includes
 import argparse
 import math
+import hashlib
 
 # gem5 related
 import m5
@@ -145,6 +146,11 @@ def runGpuFSSystem(args):
         math.ceil(float(n_cu) / args.cu_per_scalar_cache)
     )
 
+    # Verify MMIO trace is valid
+    mmio_md5 = hashlib.md5(open(args.gpu_mmio_trace, "rb").read()).hexdigest()
+    if mmio_md5 != "c4ff3326ae8a036e329b8b595c83bd6d":
+        m5.util.panic("MMIO file does not match gem5 resources")
+
     system = makeGpuFSSystem(args)
 
     root = Root(

From c2c5cd10480c45f39dc30184afe273ba70f7d035 Mon Sep 17 00:00:00 2001
From: Matthew Poremba <matthew.poremba@amd.com>
Date: Thu, 20 Apr 2023 13:18:26 -0500
Subject: [PATCH 345/492] configs: Allow other CPU types in GPUFS

Previously the CPU type and memory modes were hardcoded for KVM, because
there was a deadlock bug. After some recent testing, this deadlock bug
no longer exists with the simple CPU models. Thus, changing the configs
to allow for other CPU models as a first step toward lifting the KVM
requirement from GPUFS.

Change-Id: Ib616c3ef60f173871421b55a8bb73b25ce2990b5
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/69979
Tested-by: kokoro <noreply+kokoro@google.com>
Maintainer: Matt Sinclair <mattdsinclair@gmail.com>
Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com>
---
 configs/example/gpufs/system/system.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/configs/example/gpufs/system/system.py b/configs/example/gpufs/system/system.py
index a1b59ef20b..93f0194efb 100644
--- a/configs/example/gpufs/system/system.py
+++ b/configs/example/gpufs/system/system.py
@@ -61,7 +61,9 @@ def makeGpuFSSystem(args):
         panic("Need at least 2GB of system memory to load amdgpu module")
 
     # Use the common FSConfig to setup a Linux X86 System
-    (TestCPUClass, test_mem_mode, FutureClass) = Simulation.setCPUClass(args)
+    (TestCPUClass, test_mem_mode) = Simulation.getCPUClass(args.cpu_type)
+    if test_mem_mode == "atomic":
+        test_mem_mode = "atomic_noncaching"
     disks = [args.disk_image]
     if args.second_disk is not None:
         disks.extend([args.second_disk])
@@ -91,10 +93,11 @@ def makeGpuFSSystem(args):
 
     # Create specified number of CPUs. GPUFS really only needs one.
     system.cpu = [
-        X86KvmCPU(clk_domain=system.cpu_clk_domain, cpu_id=i)
+        TestCPUClass(clk_domain=system.cpu_clk_domain, cpu_id=i)
         for i in range(args.num_cpus)
     ]
-    system.kvm_vm = KvmVM()
+    if ObjectList.is_kvm_cpu(TestCPUClass):
+        system.kvm_vm = KvmVM()
 
     # Create AMDGPU and attach to southbridge
     shader = createGPU(system, args)

From c597361a6b9bb886d161e48b6f1605cc3903556e Mon Sep 17 00:00:00 2001
From: Matthew Poremba <matthew.poremba@amd.com>
Date: Fri, 21 Apr 2023 13:57:51 -0500
Subject: [PATCH 346/492] dev-amdgpu: Add writeROM method

For non-KVM CPUs the VBIOS memory falls into an I/O hole and therefore
gets routed to the PIO bus in gem5. This gets routed to the GPU in the
case of a ROM write. We write to the ROM as a way to "load" the VBIOS
without creating holes in the KVM VM.

This write method allows the same scripts as KVM to be used by writing
to the ROM area and overwriting what might already be there from the
--gpu-rom option.

Change-Id: I8c2d2aa05a823569a774dfdd3bf2d2e773f38683
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/70037
Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Maintainer: Matt Sinclair <mattdsinclair@gmail.com>
---
 src/dev/amdgpu/amdgpu_device.cc | 22 ++++++++++++++++++++++
 src/dev/amdgpu/amdgpu_device.hh |  1 +
 2 files changed, 23 insertions(+)

diff --git a/src/dev/amdgpu/amdgpu_device.cc b/src/dev/amdgpu/amdgpu_device.cc
index cb180b6dc5..3605882b74 100644
--- a/src/dev/amdgpu/amdgpu_device.cc
+++ b/src/dev/amdgpu/amdgpu_device.cc
@@ -107,6 +107,20 @@ AMDGPUDevice::readROM(PacketPtr pkt)
             pkt->getAddr(), rom_offset, rom_data);
 }
 
+void
+AMDGPUDevice::writeROM(PacketPtr pkt)
+{
+    assert(isROM(pkt->getAddr()));
+
+    Addr rom_offset = pkt->getAddr() - romRange.start();
+    uint64_t rom_data = pkt->getUintX(ByteOrder::little);
+
+    memcpy(rom.data() + rom_offset, &rom_data, pkt->getSize());
+
+    DPRINTF(AMDGPUDevice, "Write to addr %#x on ROM offset %#x data: %#x\n",
+            pkt->getAddr(), rom_offset, rom_data);
+}
+
 AddrRangeList
 AMDGPUDevice::getAddrRanges() const
 {
@@ -386,6 +400,14 @@ AMDGPUDevice::read(PacketPtr pkt)
 Tick
 AMDGPUDevice::write(PacketPtr pkt)
 {
+    if (isROM(pkt->getAddr())) {
+        writeROM(pkt);
+
+        dispatchAccess(pkt, false);
+
+        return pioDelay;
+    }
+
     int barnum = -1;
     Addr offset = 0;
     getBAR(pkt->getAddr(), barnum, offset);
diff --git a/src/dev/amdgpu/amdgpu_device.hh b/src/dev/amdgpu/amdgpu_device.hh
index ac31b95fd2..b64067a158 100644
--- a/src/dev/amdgpu/amdgpu_device.hh
+++ b/src/dev/amdgpu/amdgpu_device.hh
@@ -94,6 +94,7 @@ class AMDGPUDevice : public PciDevice
     AddrRange romRange;
     bool isROM(Addr addr) const { return romRange.contains(addr); }
     void readROM(PacketPtr pkt);
+    void writeROM(PacketPtr pkt);
 
     std::array<uint8_t, ROM_SIZE> rom;
 

From 6fdf0beedcac52054b4c3fcda38535f21b3f900f Mon Sep 17 00:00:00 2001
From: Hoa Nguyen <hoanguyen@ucdavis.edu>
Date: Thu, 20 Apr 2023 18:47:16 -0700
Subject: [PATCH 347/492] stdlib: write device tree after setting up bootloader
 in ARMBoard

The generation of the device tree in an arm system requires knowing
cpu-release-addr property, which is only available after setting up
the bootloader.

cpu-release-addr specifies where the secondary CPUs spin/sleep(?) before
being waken up by the kernel.

The incorrect cpu-release-addr causes booting an arm system using the
standard library with the arm's provided bootloader+linux_kernel to
fail to regconize more than 1 core.

Change-Id: Ice0e38492e2f77020b0e30c42dd4e8b7ee58e598
Signed-off-by: Hoa Nguyen <hoanguyen@ucdavis.edu>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/70017
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Reviewed-by: Richard Cooper <richard.cooper@arm.com>
---
 src/python/gem5/components/boards/arm_board.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/python/gem5/components/boards/arm_board.py b/src/python/gem5/components/boards/arm_board.py
index 10e2c0eb82..b439edf970 100644
--- a/src/python/gem5/components/boards/arm_board.py
+++ b/src/python/gem5/components/boards/arm_board.py
@@ -320,10 +320,6 @@ class ArmBoard(ArmSystem, AbstractBoard, KernelDiskWorkload):
         # The workload needs to know the dtb_file.
         self.workload.dtb_filename = self._get_dtb_filename()
 
-        # Calling generateDtb from class ArmSystem to add memory information to
-        # the dtb file.
-        self.generateDtb(self._get_dtb_filename())
-
         # Finally we need to setup the bootloader for the ArmBoard. An ARM
         # system requires three inputs to simulate a full system: a disk image,
         # the kernel file and the bootloader file(s).
@@ -331,6 +327,10 @@ class ArmBoard(ArmSystem, AbstractBoard, KernelDiskWorkload):
             self, self._get_dtb_filename(), self._bootloader
         )
 
+        # Calling generateDtb from class ArmSystem to add memory information to
+        # the dtb file.
+        self.generateDtb(self._get_dtb_filename())
+
     def _get_dtb_filename(self) -> str:
         """Returns the dtb file location.
 

From 912795afd3cea17c10c561fd7edbc9b37a08f95e Mon Sep 17 00:00:00 2001
From: Melissa Jost <melissakjost@gmail.com>
Date: Thu, 13 Apr 2023 11:40:37 -0700
Subject: [PATCH 348/492] scons: Add stdc++fs and libc++experimental for clang
 LIBS env

This change fixes a failure in the compiler tests:
https://jenkins.gem5.org/job/compiler-checks/573/

These tests were failing due to the use of `std::filesystem`, which
requires 'stdc++fs' to be linked for clang versions 6
through 10.

Change-Id: I4fa03923e8dc616046dead939c77d49b301de36b
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/69777
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 SConstruct | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/SConstruct b/SConstruct
index 7e8f177418..b784a045ec 100755
--- a/SConstruct
+++ b/SConstruct
@@ -512,6 +512,18 @@ for variant_path in variant_paths:
 
         env.Append(TCMALLOC_CCFLAGS=['-fno-builtin'])
 
+        if compareVersions(env['CXXVERSION'], "11") < 0:
+            # `libstdc++fs`` must be explicitly linked for `std::filesystem``
+            # in clang versions 6 through 10.
+            #
+            # In addition, for these versions, the
+            # `std::filesystem` is under the `experimental`
+            # namespace(`std::experimental::filesystem`).
+            #
+            # Note: gem5 does not support clang versions < 6.
+            env.Append(LIBS=['stdc++fs'])
+
+
         # On Mac OS X/Darwin we need to also use libc++ (part of XCode) as
         # opposed to libstdc++, as the later is dated.
         if sys.platform == "darwin":

From 74072cdc80bbcf108486403f4223c6a018f6daaf Mon Sep 17 00:00:00 2001
From: Melissa Jost <melissakjost@gmail.com>
Date: Thu, 13 Apr 2023 11:45:17 -0700
Subject: [PATCH 349/492] base: Update <experimental/filesystem> include

This change addresses an error in the compiler tests:
https://jenkins.gem5.org/job/compiler-checks/573/

For clang versions 6 through 10, as well as GCC 7,
in order to use the "filesystem" module, you must
include the experimental namespace.  In all newer
versions, you can use the "filesystem" module as is.

Because of this, include guards to handle this. They include
"<experimental/filesystem>" for the older clang versions and
the "<filesystem>" for all other versions.

As opposed to checking by version, we now check if the
filesystem library has been defined before using it.

Change-Id: I8fb8d4eaa33f3edc29b7626f44b82ee66ffe72be
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/69778
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
---
 src/base/socket.cc              | 13 ++++++++-----
 src/mem/shared_memory_server.cc | 14 +++++++++-----
 2 files changed, 17 insertions(+), 10 deletions(-)

diff --git a/src/base/socket.cc b/src/base/socket.cc
index 62f20717f4..06fc28631f 100644
--- a/src/base/socket.cc
+++ b/src/base/socket.cc
@@ -40,13 +40,16 @@
 
 #include <cerrno>
 
-#if (defined(__GNUC__) && (__GNUC__ >= 8)) || defined(__clang__)
+// check if filesystem library is available
+#if defined(__cpp_lib_filesystem) || __has_include(<filesystem>)
     #include <filesystem>
 #else
-    // This is only reachable if we're using GCC 7 (note: gem5 does not support
-    // GCC versions older than GCC 7 as they do not support the C++17
-    // standard).
-    // If we're using GCC 7, we need to use <experimental/filesystem>.
+    // This is only reachable if we're using GCC 7 or clang versions 6
+    // through 10 (note: gem5 does not support GCC versions older than
+    // GCC 7 or clang versions older than clang 6.0 as they do not
+    // support the C++17 standard).
+    // If we're using GCC 7 or clang versions 6 through 10, we need to use
+    // <experimental/filesystem>.
     #include <experimental/filesystem>
     namespace std {
         namespace filesystem = experimental::filesystem;
diff --git a/src/mem/shared_memory_server.cc b/src/mem/shared_memory_server.cc
index f99655c475..a4305d0e11 100644
--- a/src/mem/shared_memory_server.cc
+++ b/src/mem/shared_memory_server.cc
@@ -39,13 +39,17 @@
 #include <algorithm>
 #include <cerrno>
 #include <cstring>
-#if (defined(__GNUC__) && (__GNUC__ >= 8)) || defined(__clang__)
+
+// check if filesystem library is available
+#if defined(__cpp_lib_filesystem) || __has_include(<filesystem>)
     #include <filesystem>
 #else
-    // This is only reachable if we're using GCC 7 (note: gem5 does not support
-    // GCC versions older than GCC 7 as they do not support the C++17
-    // standard).
-    // If we're using GCC 7, we need to use <experimental/filesystem>.
+    // This is only reachable if we're using GCC 7 or clang versions 6
+    // through 10 (note: gem5 does not support GCC versions older than
+    // GCC 7 or clang versions older than clang 6.0 as they do not
+    // support the C++17 standard).
+    // If we're using GCC 7 or clang versions 6 through 10, we need to use
+    // <experimental/filesystem>.
     #include <experimental/filesystem>
     namespace std {
         namespace filesystem = experimental::filesystem;

From 540c3fc7ef323b4e67f0943881b5d1a2c457c010 Mon Sep 17 00:00:00 2001
From: Yang Liu <numbksco@gmail.com>
Date: Tue, 10 Jan 2023 14:29:52 +0800
Subject: [PATCH 350/492] arch: Add vector function unit and OpClass enums

These enums are needed for risc-v vector extension

Change-Id: Ia61682c43c89ac2043fb9d1d5c349dfd646fb88d
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67293
Tested-by: kokoro <noreply+kokoro@google.com>
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
Reviewed-by: Hoa Nguyen <hoanguyen@ucdavis.edu>
Reviewed-by: Roger Chang <rogerycchang@google.com>
---
 src/cpu/FuncUnit.py | 19 +++++++++++++++++++
 src/cpu/op_class.hh | 24 ++++++++++++++++++++++++
 2 files changed, 43 insertions(+)

diff --git a/src/cpu/FuncUnit.py b/src/cpu/FuncUnit.py
index 4a2733afc0..a1050de242 100644
--- a/src/cpu/FuncUnit.py
+++ b/src/cpu/FuncUnit.py
@@ -98,6 +98,25 @@ class OpClass(Enum):
         "FloatMemWrite",
         "IprAccess",
         "InstPrefetch",
+        "VectorUnitStrideLoad",
+        "VectorUnitStrideStore",
+        "VectorUnitStrideMaskLoad",
+        "VectorUnitStrideMaskStore",
+        "VectorStridedLoad",
+        "VectorStridedStore",
+        "VectorIndexedLoad",
+        "VectorIndexedStore",
+        "VectorUnitStrideFaultOnlyFirstLoad",
+        "VectorWholeRegisterLoad",
+        "VectorWholeRegisterStore",
+        "VectorIntegerArith",
+        "VectorFloatArith",
+        "VectorFloatConvert",
+        "VectorIntegerReduce",
+        "VectorFloatReduce",
+        "VectorMisc",
+        "VectorIntegerExtension",
+        "VectorConfig",
     ]
 
 
diff --git a/src/cpu/op_class.hh b/src/cpu/op_class.hh
index 4de018f21b..94d2794c76 100644
--- a/src/cpu/op_class.hh
+++ b/src/cpu/op_class.hh
@@ -108,6 +108,30 @@ static const OpClass FloatMemReadOp = enums::FloatMemRead;
 static const OpClass FloatMemWriteOp = enums::FloatMemWrite;
 static const OpClass IprAccessOp = enums::IprAccess;
 static const OpClass InstPrefetchOp = enums::InstPrefetch;
+static const OpClass VectorUnitStrideLoadOp = enums::VectorUnitStrideLoad;
+static const OpClass VectorUnitStrideStoreOp = enums::VectorUnitStrideStore;
+static const OpClass VectorUnitStrideMaskLoadOp
+             = enums::VectorUnitStrideMaskLoad;
+static const OpClass VectorUnitStrideMaskStoreOp
+             = enums::VectorUnitStrideMaskStore;
+static const OpClass VectorStridedLoadOp = enums::VectorStridedLoad;
+static const OpClass VectorStridedStoreOp = enums::VectorStridedStore;
+static const OpClass VectorIndexedLoadOp = enums::VectorIndexedLoad;
+static const OpClass VectorIndexedStoreOp = enums::VectorIndexedStore;
+static const OpClass VectorUnitStrideFaultOnlyFirstLoadOp
+             = enums::VectorUnitStrideFaultOnlyFirstLoad;
+static const OpClass VectorWholeRegisterLoadOp
+             = enums::VectorWholeRegisterLoad;
+static const OpClass VectorWholeRegisterStoreOp
+             = enums::VectorWholeRegisterStore;
+static const OpClass VectorIntegerArithOp = enums::VectorIntegerArith;
+static const OpClass VectorFloatArithOp = enums::VectorFloatArith;
+static const OpClass VectorFloatConvertOp = enums::VectorFloatConvert;
+static const OpClass VectorIntegerReduceOp = enums::VectorIntegerReduce;
+static const OpClass VectorFloatReduceOp = enums::VectorFloatReduce;
+static const OpClass VectorMiscOp = enums::VectorMisc;
+static const OpClass VectorIntegerExtensionOp = enums::VectorIntegerExtension;
+static const OpClass VectorConfigOp = enums::VectorConfig;
 static const OpClass Num_OpClasses = enums::Num_OpClass;
 
 } // namespace gem5

From 74fcc4d6b7e8c427ce9847759153e2ad7ac320ac Mon Sep 17 00:00:00 2001
From: Xuan Hu <huxuan@bosc.ac.cn>
Date: Mon, 20 Mar 2023 19:07:03 +0800
Subject: [PATCH 351/492] arch-riscv: refactor bitfields of insts

+ move bitfields of ExtMachInst defined in bitfields.hh
  to types.hh

Change-Id: Ic25e2fd1a887f87231268a4449d8755593919a0f
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/68417
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Hoa Nguyen <hoanguyen@ucdavis.edu>
Reviewed-by: Roger Chang <rogerycchang@google.com>
---
 src/arch/riscv/decoder.cc         |  15 ++--
 src/arch/riscv/insts/amo.cc       |  37 +++++----
 src/arch/riscv/insts/bitfields.hh |  19 -----
 src/arch/riscv/insts/mem.cc       |   1 -
 src/arch/riscv/insts/standard.hh  |   3 +-
 src/arch/riscv/insts/unknown.hh   |   5 +-
 src/arch/riscv/isa/bitfields.isa  |   2 +-
 src/arch/riscv/isa/decoder.isa    |   2 +-
 src/arch/riscv/types.hh           | 120 +++++++++++++++++++++++++++++-
 util/m5/src/abi/riscv/m5op.S      |   2 +-
 10 files changed, 151 insertions(+), 55 deletions(-)
 delete mode 100644 src/arch/riscv/insts/bitfields.hh

diff --git a/src/arch/riscv/decoder.cc b/src/arch/riscv/decoder.cc
index b816c17b21..7faa310b1e 100644
--- a/src/arch/riscv/decoder.cc
+++ b/src/arch/riscv/decoder.cc
@@ -42,6 +42,7 @@ void Decoder::reset()
 {
     aligned = true;
     mid = false;
+    machInst = 0;
     emi = 0;
 }
 
@@ -58,20 +59,20 @@ Decoder::moreBytes(const PCStateBase &pc, Addr fetchPC)
 
     bool aligned = pc.instAddr() % sizeof(machInst) == 0;
     if (aligned) {
-        emi = inst;
-        if (compressed(emi))
-            emi = bits(emi, mid_bit, 0);
+        emi.instBits = inst;
+        if (compressed(inst))
+            emi.instBits = bits(inst, mid_bit, 0);
         outOfBytes = !compressed(emi);
         instDone = true;
     } else {
         if (mid) {
-            assert(bits(emi, max_bit, mid_bit + 1) == 0);
-            replaceBits(emi, max_bit, mid_bit + 1, inst);
+            assert(bits(emi.instBits, max_bit, mid_bit + 1) == 0);
+            replaceBits(emi.instBits, max_bit, mid_bit + 1, inst);
             mid = false;
             outOfBytes = false;
             instDone = true;
         } else {
-            emi = bits(inst, max_bit, mid_bit + 1);
+            emi.instBits = bits(inst, max_bit, mid_bit + 1);
             mid = !compressed(emi);
             outOfBytes = true;
             instDone = compressed(emi);
@@ -83,7 +84,7 @@ StaticInstPtr
 Decoder::decode(ExtMachInst mach_inst, Addr addr)
 {
     DPRINTF(Decode, "Decoding instruction 0x%08x at address %#x\n",
-            mach_inst, addr);
+            mach_inst.instBits, addr);
 
     StaticInstPtr &si = instMap[mach_inst];
     if (!si)
diff --git a/src/arch/riscv/insts/amo.cc b/src/arch/riscv/insts/amo.cc
index d845c91bf3..052586ecfc 100644
--- a/src/arch/riscv/insts/amo.cc
+++ b/src/arch/riscv/insts/amo.cc
@@ -32,7 +32,6 @@
 #include <sstream>
 #include <string>
 
-#include "arch/riscv/insts/bitfields.hh"
 #include "arch/riscv/utility.hh"
 #include "cpu/exec_context.hh"
 #include "cpu/static_inst.hh"
@@ -49,7 +48,7 @@ MemFenceMicro::generateDisassembly(
         Addr pc, const loader::SymbolTable *symtab) const
 {
     std::stringstream ss;
-    ss << csprintf("0x%08x", machInst) << ' ' << mnemonic;
+    ss << csprintf("0x%08x", machInst.instBits) << ' ' << mnemonic;
     return ss.str();
 }
 
@@ -66,14 +65,14 @@ LoadReserved::generateDisassembly(
 {
     std::stringstream ss;
     ss << mnemonic;
-    if (AQ || RL)
+    if (machInst.aq || machInst.rl)
         ss << '_';
-    if (AQ)
+    if (machInst.aq)
         ss << "aq";
-    if (RL)
+    if (machInst.rl)
         ss << "rl";
-    ss << ' ' << registerName(intRegClass[RD]) << ", ("
-            << registerName(intRegClass[RS1]) << ')';
+    ss << ' ' << registerName(intRegClass[machInst.rd]) << ", ("
+            << registerName(intRegClass[machInst.rs1]) << ')';
     return ss.str();
 }
 
@@ -94,15 +93,15 @@ StoreCond::generateDisassembly(
 {
     std::stringstream ss;
     ss << mnemonic;
-    if (AQ || RL)
+    if (machInst.aq || machInst.rl)
         ss << '_';
-    if (AQ)
+    if (machInst.aq)
         ss << "aq";
-    if (RL)
+    if (machInst.rl)
         ss << "rl";
-    ss << ' ' << registerName(intRegClass[RD]) << ", "
-            << registerName(intRegClass[RS2]) << ", ("
-            << registerName(intRegClass[RS1]) << ')';
+    ss << ' ' << registerName(intRegClass[machInst.rd]) << ", "
+            << registerName(intRegClass[machInst.rs2]) << ", ("
+            << registerName(intRegClass[machInst.rs1]) << ')';
     return ss.str();
 }
 
@@ -124,15 +123,15 @@ AtomicMemOp::generateDisassembly(
 {
     std::stringstream ss;
     ss << mnemonic;
-    if (AQ || RL)
+    if (machInst.aq || machInst.rl)
         ss << '_';
-    if (AQ)
+    if (machInst.aq)
         ss << "aq";
-    if (RL)
+    if (machInst.rl)
         ss << "rl";
-    ss << ' ' << registerName(intRegClass[RD]) << ", "
-            << registerName(intRegClass[RS2]) << ", ("
-            << registerName(intRegClass[RS1]) << ')';
+    ss << ' ' << registerName(intRegClass[machInst.rd]) << ", "
+            << registerName(intRegClass[machInst.rs2]) << ", ("
+            << registerName(intRegClass[machInst.rs1]) << ')';
     return ss.str();
 }
 
diff --git a/src/arch/riscv/insts/bitfields.hh b/src/arch/riscv/insts/bitfields.hh
deleted file mode 100644
index 7b985dc8e1..0000000000
--- a/src/arch/riscv/insts/bitfields.hh
+++ /dev/null
@@ -1,19 +0,0 @@
-#ifndef __ARCH_RISCV_BITFIELDS_HH__
-#define __ARCH_RISCV_BITFIELDS_HH__
-
-#include "base/bitfield.hh"
-
-#define CSRIMM  bits(machInst, 19, 15)
-#define FUNCT12 bits(machInst, 31, 20)
-#define IMM5    bits(machInst, 11, 7)
-#define IMM7    bits(machInst, 31, 25)
-#define IMMSIGN bits(machInst, 31)
-#define OPCODE  bits(machInst, 6, 0)
-
-#define AQ      bits(machInst, 26)
-#define RD      bits(machInst, 11, 7)
-#define RL      bits(machInst, 25)
-#define RS1     bits(machInst, 19, 15)
-#define RS2     bits(machInst, 24, 20)
-
-#endif // __ARCH_RISCV_BITFIELDS_HH__
diff --git a/src/arch/riscv/insts/mem.cc b/src/arch/riscv/insts/mem.cc
index 36d69853ec..5f58a68a57 100644
--- a/src/arch/riscv/insts/mem.cc
+++ b/src/arch/riscv/insts/mem.cc
@@ -32,7 +32,6 @@
 #include <sstream>
 #include <string>
 
-#include "arch/riscv/insts/bitfields.hh"
 #include "arch/riscv/insts/static_inst.hh"
 #include "arch/riscv/utility.hh"
 #include "cpu/static_inst.hh"
diff --git a/src/arch/riscv/insts/standard.hh b/src/arch/riscv/insts/standard.hh
index 5b0e8c2c22..2e7ae8d1d8 100644
--- a/src/arch/riscv/insts/standard.hh
+++ b/src/arch/riscv/insts/standard.hh
@@ -33,7 +33,6 @@
 
 #include <string>
 
-#include "arch/riscv/insts/bitfields.hh"
 #include "arch/riscv/insts/static_inst.hh"
 #include "arch/riscv/regs/misc.hh"
 #include "cpu/exec_context.hh"
@@ -95,7 +94,7 @@ class CSROp : public RiscvStaticInst
     /// Constructor
     CSROp(const char *mnem, ExtMachInst _machInst, OpClass __opClass)
         : RiscvStaticInst(mnem, _machInst, __opClass),
-            csr(FUNCT12), uimm(CSRIMM)
+          csr(_machInst.funct12), uimm(_machInst.csrimm)
     {
         if (csr == CSR_SATP) {
             flags[IsSquashAfter] = true;
diff --git a/src/arch/riscv/insts/unknown.hh b/src/arch/riscv/insts/unknown.hh
index 0c2f75e1e9..64f94dea00 100644
--- a/src/arch/riscv/insts/unknown.hh
+++ b/src/arch/riscv/insts/unknown.hh
@@ -34,7 +34,6 @@
 #include <string>
 
 #include "arch/riscv/faults.hh"
-#include "arch/riscv/insts/bitfields.hh"
 #include "arch/riscv/insts/static_inst.hh"
 #include "cpu/exec_context.hh"
 #include "cpu/static_inst.hh"
@@ -60,14 +59,14 @@ class Unknown : public RiscvStaticInst
     Fault
     execute(ExecContext *, trace::InstRecord *) const override
     {
-        return std::make_shared<UnknownInstFault>(machInst);
+        return std::make_shared<UnknownInstFault>(machInst.instBits);
     }
 
     std::string
     generateDisassembly(
             Addr pc, const loader::SymbolTable *symtab) const override
     {
-        return csprintf("unknown opcode %#02x", OPCODE);
+        return csprintf("unknown opcode %#02x", machInst.opcode);
     }
 };
 
diff --git a/src/arch/riscv/isa/bitfields.isa b/src/arch/riscv/isa/bitfields.isa
index 4f58416237..8589269949 100644
--- a/src/arch/riscv/isa/bitfields.isa
+++ b/src/arch/riscv/isa/bitfields.isa
@@ -36,7 +36,7 @@
 def bitfield RVTYPE rv_type;
 
 def bitfield QUADRANT <1:0>;
-def bitfield OPCODE <6:2>;
+def bitfield OPCODE5 <6:2>;
 
 // R-Type
 def bitfield ALL	<31:0>;
diff --git a/src/arch/riscv/isa/decoder.isa b/src/arch/riscv/isa/decoder.isa
index 53d4a4d558..58958bb9f1 100644
--- a/src/arch/riscv/isa/decoder.isa
+++ b/src/arch/riscv/isa/decoder.isa
@@ -442,7 +442,7 @@ decode QUADRANT default Unknown::unknown() {
             }
         }
     }
-    0x3: decode OPCODE {
+    0x3: decode OPCODE5 {
         0x00: decode FUNCT3 {
             format Load {
                 0x0: lb({{
diff --git a/src/arch/riscv/types.hh b/src/arch/riscv/types.hh
index 4aae1a027b..1d501dc05f 100644
--- a/src/arch/riscv/types.hh
+++ b/src/arch/riscv/types.hh
@@ -57,7 +57,125 @@ typedef uint32_t MachInst;
 // For now, we should be safe using the msbs to store extra information.
 BitUnion64(ExtMachInst)
     // Decoder state
-    Bitfield<63, 62> rv_type;
+    Bitfield<63, 62>    rv_type;
+    Bitfield<61>        compressed;
+    // More bits for vector extension
+    Bitfield<57, 41>    vl;     // [0, 2**16]
+    Bitfield<40>        vill;
+    SubBitUnion(vtype8, 39, 32) // exclude vill
+        Bitfield<39> vma;
+        Bitfield<38> vta;
+        Bitfield<37, 35> vsew;
+        Bitfield<34, 32> vlmul;
+    EndSubBitUnion(vtype8)
+    // Common
+    uint32_t            instBits;
+    Bitfield< 1,  0>    quadRant;
+    Bitfield< 6,  2>    opcode5;
+    Bitfield< 6,  0>    opcode;
+    // R-Type
+    Bitfield<31,  0>    all;
+    Bitfield<11,  7>    rd;
+    Bitfield<14, 12>    funct3;
+    Bitfield<19, 15>    rs1;
+    Bitfield<24, 20>    rs2;
+    Bitfield<31, 25>    funct7;
+    // Bit shifts
+    Bitfield<30>        srType;
+    Bitfield<24, 20>    shamt5;
+    Bitfield<25, 20>    shamt6;
+    // I-Type
+    Bitfield<31, 20>    imm12;
+    // Sync
+    Bitfield<23, 20>    succ;
+    Bitfield<27, 24>    pred;
+    // S-Type
+    Bitfield<11,  7>    imm5;
+    Bitfield<31, 25>    imm7;
+    // U-Type
+    Bitfield<31, 12>    imm20;
+    // SB-Type
+    Bitfield<7>         bimm12bit11;
+    Bitfield<11,  8>    bimm12bits4to1;
+    Bitfield<30, 25>    bimm12bits10to5;
+    Bitfield<31>        immsign;
+    // UJ-Type
+    Bitfield<30, 21>    ujimmbits10to1;
+    Bitfield<20>        ujimmbit11;
+    Bitfield<19, 12>    ujimmbits19to12;
+    // System
+    Bitfield<31, 20>    funct12;
+    Bitfield<19, 15>    csrimm;
+    // Floating point
+    Bitfield<11,  7>    fd;
+    Bitfield<19, 15>    fs1;
+    Bitfield<24, 20>    fs2;
+    Bitfield<31, 27>    fs3;
+    Bitfield<14, 12>    round_mode;
+    Bitfield<24, 20>    conv_sgn;
+    Bitfield<26, 25>    funct2;
+    // AMO
+    Bitfield<31, 27>    amofunct;
+    Bitfield<26>        aq;
+    Bitfield<25>        rl;
+    // Compressed
+    Bitfield<15, 13>    copcode;
+    Bitfield<12>        cfunct1;
+    Bitfield<11, 10>    cfunct2high;
+    Bitfield< 6,  5>    cfunct2low;
+    Bitfield<11,  7>    rc1;
+    Bitfield< 6,  2>    rc2;
+    Bitfield< 9,  7>    rp1;
+    Bitfield< 4,  2>    rp2;
+    Bitfield<11,  7>    fc1;
+    Bitfield< 6,  2>    fc2;
+    Bitfield< 4,  2>    fp2;
+    Bitfield<12,  2>    cjumpimm;
+    Bitfield< 5,  3>    cjumpimm3to1;
+    Bitfield<11, 11>    cjumpimm4to4;
+    Bitfield< 2,  2>    cjumpimm5to5;
+    Bitfield< 7,  7>    cjumpimm6to6;
+    Bitfield< 6,  6>    cjumpimm7to7;
+    Bitfield<10,  9>    cjumpimm9to8;
+    Bitfield< 8,  8>    cjumpimm10to10;
+    Bitfield<12>        cjumpimmsign;
+    Bitfield<12,  5>    cimm8;
+    Bitfield<12,  7>    cimm6;
+    Bitfield< 6,  2>    cimm5;
+    Bitfield<12, 10>    cimm3;
+    Bitfield< 6,  5>    cimm2;
+    Bitfield<12>        cimm1;
+    // Pseudo instructions
+    Bitfield<31, 25>    m5func;
+    // vector
+    Bitfield<31, 26>    vfunct6;
+    Bitfield<31, 27>    vfunct5;
+    Bitfield<27, 25>    vfunct3;
+    Bitfield<26, 25>    vfunct2;
+    Bitfield<31, 29>    nf;
+    Bitfield<28>        mew;
+    Bitfield<27, 26>    mop;
+    Bitfield<25>        vm;
+    Bitfield<24, 20>    lumop;
+    Bitfield<24, 20>    sumop;
+    Bitfield<14, 12>    width;
+    Bitfield<24, 20>    vs2;
+    Bitfield<19, 15>    vs1;
+    Bitfield<11,  7>    vd;
+    Bitfield<11,  7>    vs3;
+    Bitfield<19, 15>    vecimm;
+    Bitfield<17, 15>    simm3;
+    // vsetvli
+    Bitfield<31>        bit31;
+    Bitfield<30>        bit30;
+    Bitfield<30, 20>    zimm_vsetvli;
+    // vsetivli
+    Bitfield<31, 30>    bit31_30;
+    Bitfield<29, 20>    zimm_vsetivli;
+    Bitfield<19, 15>    uimm_vsetivli;
+    // vsetvl
+    Bitfield<31, 25>    bit31_25;
+
 EndBitUnion(ExtMachInst)
 
 } // namespace RiscvISA
diff --git a/util/m5/src/abi/riscv/m5op.S b/util/m5/src/abi/riscv/m5op.S
index babe85424f..1b0376a131 100644
--- a/util/m5/src/abi/riscv/m5op.S
+++ b/util/m5/src/abi/riscv/m5op.S
@@ -39,7 +39,7 @@
 #include <gem5/asm/generic/m5ops.h>
 
 // riscv pseudo instructions have bit 1:0 (QUADRANT) = 0x3,
-// bit 6:2 (OPCODE) = 0x1e, and bit 31:25 (M5FUNC) specifies
+// bit 6:2 (OPCODE5) = 0x1e, and bit 31:25 (M5FUNC) specifies
 // the function performed by pseudo instruction
 
 .macro	m5op_func, name, func

From f5af8b58764d27e1adce91b19ba1235f12480425 Mon Sep 17 00:00:00 2001
From: Vishnu Ramadas <vramadas@wisc.edu>
Date: Wed, 26 Apr 2023 13:04:51 -0500
Subject: [PATCH 352/492] dev-amdgpu: Add a few MQD attributes to GPUFS
 checkpoint

During GPUFS checkpoint restore, doorbells callbacks are created based
on certain MQD attributes. These callbacks are required to create new
SDMA doorbells. If these attributes are not present in the checkpoint,
the restore hangs indefinitely waiting for ioctl calls that access these
doorbells to finish execution. This commit adds the attributes required
for checkpoint restore to proceed.

Change-Id: Id3d1b7a2627d4c50133d923096495957a233f675
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/70077
Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com>
Reviewed-by: Matthew Poremba <matthew.poremba@amd.com>
Maintainer: Matt Sinclair <mattdsinclair@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Maintainer: Matthew Poremba <matthew.poremba@amd.com>
---
 src/dev/amdgpu/pm4_packet_processor.cc | 30 ++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/src/dev/amdgpu/pm4_packet_processor.cc b/src/dev/amdgpu/pm4_packet_processor.cc
index 071fe8b841..3690113ac4 100644
--- a/src/dev/amdgpu/pm4_packet_processor.cc
+++ b/src/dev/amdgpu/pm4_packet_processor.cc
@@ -1025,6 +1025,11 @@ PM4PacketProcessor::serialize(CheckpointOut &cp) const
     uint32_t pipe[num_queues];
     uint32_t queue[num_queues];
     bool privileged[num_queues];
+    uint32_t hqd_active[num_queues];
+    uint32_t hqd_vmid[num_queues];
+    Addr aql_rptr[num_queues];
+    uint32_t doorbell[num_queues];
+    uint32_t hqd_pq_control[num_queues];
 
     int i = 0;
     for (auto iter : queues) {
@@ -1048,6 +1053,11 @@ PM4PacketProcessor::serialize(CheckpointOut &cp) const
         pipe[i] = q->pipe();
         queue[i] = q->queue();
         privileged[i] = q->privileged();
+        hqd_active[i] = q->getMQD()->hqd_active;
+        hqd_vmid[i] = q->getMQD()->hqd_vmid;
+        aql_rptr[i] = q->getMQD()->aqlRptr;
+        doorbell[i] = q->getMQD()->doorbell;
+        hqd_pq_control[i] = q->getMQD()->hqd_pq_control;
         i++;
     }
 
@@ -1067,6 +1077,11 @@ PM4PacketProcessor::serialize(CheckpointOut &cp) const
     SERIALIZE_ARRAY(pipe, num_queues);
     SERIALIZE_ARRAY(queue, num_queues);
     SERIALIZE_ARRAY(privileged, num_queues);
+    SERIALIZE_ARRAY(hqd_active, num_queues);
+    SERIALIZE_ARRAY(hqd_vmid, num_queues);
+    SERIALIZE_ARRAY(aql_rptr, num_queues);
+    SERIALIZE_ARRAY(doorbell, num_queues);
+    SERIALIZE_ARRAY(hqd_pq_control, num_queues);
 }
 
 void
@@ -1093,6 +1108,11 @@ PM4PacketProcessor::unserialize(CheckpointIn &cp)
     uint32_t pipe[num_queues];
     uint32_t queue[num_queues];
     bool privileged[num_queues];
+    uint32_t hqd_active[num_queues];
+    uint32_t hqd_vmid[num_queues];
+    Addr aql_rptr[num_queues];
+    uint32_t doorbell[num_queues];
+    uint32_t hqd_pq_control[num_queues];
 
     UNSERIALIZE_ARRAY(id, num_queues);
     UNSERIALIZE_ARRAY(mqd_base, num_queues);
@@ -1109,6 +1129,11 @@ PM4PacketProcessor::unserialize(CheckpointIn &cp)
     UNSERIALIZE_ARRAY(pipe, num_queues);
     UNSERIALIZE_ARRAY(queue, num_queues);
     UNSERIALIZE_ARRAY(privileged, num_queues);
+    UNSERIALIZE_ARRAY(hqd_active, num_queues);
+    UNSERIALIZE_ARRAY(hqd_vmid, num_queues);
+    UNSERIALIZE_ARRAY(aql_rptr, num_queues);
+    UNSERIALIZE_ARRAY(doorbell, num_queues);
+    UNSERIALIZE_ARRAY(hqd_pq_control, num_queues);
 
     for (int i = 0; i < num_queues; i++) {
         QueueDesc *mqd = new QueueDesc();
@@ -1132,6 +1157,11 @@ PM4PacketProcessor::unserialize(CheckpointIn &cp)
         queues[id[i]]->processing(processing[i]);
         queues[id[i]]->ib(ib[i]);
         queues[id[i]]->setPkt(me[i], pipe[i], queue[i], privileged[i]);
+        queues[id[i]]->getMQD()->hqd_active = hqd_active[i];
+        queues[id[i]]->getMQD()->hqd_vmid = hqd_vmid[i];
+        queues[id[i]]->getMQD()->aqlRptr = aql_rptr[i];
+        queues[id[i]]->getMQD()->doorbell = doorbell[i];
+        queues[id[i]]->getMQD()->hqd_pq_control = hqd_pq_control[i];
 
         DPRINTF(PM4PacketProcessor, "PM4 queue %d, rptr: %p wptr: %p\n",
                 queues[id[i]]->id(), queues[id[i]]->rptr(),

From 9c3107c7620d7801186d54fae24ac7b900c238e3 Mon Sep 17 00:00:00 2001
From: Matthew Poremba <matthew.poremba@amd.com>
Date: Fri, 21 Apr 2023 14:05:50 -0500
Subject: [PATCH 353/492] dev-amdgpu,configs: Add human readable names for
 different GPUs

Add a human readable string for GPU device names rather than using the
device ID in the code. This is intended to make code more readable.

Change-Id: Id3ea74ca37422b1f4a0f09e5a9522d37b5998c1a
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/70038
Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Maintainer: Matt Sinclair <mattdsinclair@gmail.com>
---
 configs/example/gpufs/runfs.py         | 10 ++++++++++
 configs/example/gpufs/system/amdgpu.py | 11 +++++++++++
 src/dev/amdgpu/AMDGPU.py               |  3 +++
 3 files changed, 24 insertions(+)

diff --git a/configs/example/gpufs/runfs.py b/configs/example/gpufs/runfs.py
index 52b79abfaa..4c906019c1 100644
--- a/configs/example/gpufs/runfs.py
+++ b/configs/example/gpufs/runfs.py
@@ -126,6 +126,16 @@ def addRunFSOptions(parser):
         help="type of memory to use",
     )
 
+    # These are the models that are both supported in gem5 and supported
+    # by the versions of ROCm supported by gem5 in full system mode. For
+    # other gfx versions there is some support in syscall emulation mode.
+    parser.add_argument(
+        "--gpu-device",
+        default="Vega10",
+        choices=["Vega10", "MI100"],
+        help="GPU model to run: Vega10 (gfx900) or MI100 (gfx908)",
+    )
+
 
 def runGpuFSSystem(args):
     """
diff --git a/configs/example/gpufs/system/amdgpu.py b/configs/example/gpufs/system/amdgpu.py
index 1fd3e2f304..5f98b55c32 100644
--- a/configs/example/gpufs/system/amdgpu.py
+++ b/configs/example/gpufs/system/amdgpu.py
@@ -170,3 +170,14 @@ def connectGPU(system, args):
     system.pc.south_bridge.gpu.checkpoint_before_mmios = (
         args.checkpoint_before_mmios
     )
+
+    system.pc.south_bridge.gpu.device_name = args.gpu_device
+
+    if args.gpu_device == "MI100":
+        system.pc.south_bridge.gpu.DeviceID = 0x738C
+        system.pc.south_bridge.gpu.SubsystemVendorID = 0x1002
+        system.pc.south_bridge.gpu.SubsystemID = 0x0C34
+    elif args.gpu_device == "Vega10":
+        system.pc.south_bridge.gpu.DeviceID = 0x6863
+    else:
+        panic("Unknown GPU device: {}".format(args.gpu_device))
diff --git a/src/dev/amdgpu/AMDGPU.py b/src/dev/amdgpu/AMDGPU.py
index f9d953fc57..1e786726c9 100644
--- a/src/dev/amdgpu/AMDGPU.py
+++ b/src/dev/amdgpu/AMDGPU.py
@@ -46,6 +46,9 @@ class AMDGPUDevice(PciDevice):
     cxx_header = "dev/amdgpu/amdgpu_device.hh"
     cxx_class = "gem5::AMDGPUDevice"
 
+    # Human readable name for device ID
+    device_name = Param.String("Vega10", "Codename for device")
+
     # IDs for AMD Vega 10
     VendorID = 0x1002
     DeviceID = 0x6863

From 6c1b95ea41785bfebe4c1e56cbe6c472b150e5b1 Mon Sep 17 00:00:00 2001
From: Matthew Poremba <matthew.poremba@amd.com>
Date: Fri, 21 Apr 2023 14:07:01 -0500
Subject: [PATCH 354/492] dev-amdgpu: Default MMIO reads when previously
 written

If an MMIO was previously written and the driver reads it, we should
return the value that was previously read. This overwrites the MMIO
trace value which is the last resort fallback for finding an MMIO value.
This is needed to initialize newer GPU devices in gem5.

Change-Id: Ida2435290b706288e88518b5d920691cdb6dcc09
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/70039
Maintainer: Matt Sinclair <mattdsinclair@gmail.com>
Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/dev/amdgpu/amdgpu_device.cc | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/src/dev/amdgpu/amdgpu_device.cc b/src/dev/amdgpu/amdgpu_device.cc
index 3605882b74..7e6304afa1 100644
--- a/src/dev/amdgpu/amdgpu_device.cc
+++ b/src/dev/amdgpu/amdgpu_device.cc
@@ -248,6 +248,13 @@ AMDGPUDevice::readMMIO(PacketPtr pkt, Addr offset)
     DPRINTF(AMDGPUDevice, "Read MMIO %#lx\n", offset);
     mmioReader.readFromTrace(pkt, MMIO_BAR, offset);
 
+    if (regs.find(pkt->getAddr()) != regs.end()) {
+        uint64_t value = regs[pkt->getAddr()];
+        DPRINTF(AMDGPUDevice, "Reading what kernel wrote before: %#x\n",
+                value);
+        pkt->setUintX(value, ByteOrder::little);
+    }
+
     switch (aperture) {
       case NBIO_BASE:
         switch (aperture_offset) {

From 8b91ac6f8d82cc02660adad3513f03c41413435c Mon Sep 17 00:00:00 2001
From: Matthew Poremba <matthew.poremba@amd.com>
Date: Fri, 21 Apr 2023 19:15:40 -0500
Subject: [PATCH 355/492] dev-amdgpu: Refactor MMIO interface for SDMA engines

Currently the amdgpu simulated device is assumed to be a Vega10. As a
result there are a few things that are hardcoded. One of those is the
number of SDMAs. In order to add a newer device, such as MI100+, we need
to enable a flexible number of SDMAs.

In order to support a variable number of SDMAs and with the MMIO offsets
of each device being potentially different, the MMIO interface for SDMAs
is changed to use an SDMA class method dispatch table with forwards a
32-bit value from the MMIO packet to the MMIO functions in SDMA of the
format `void method(uint32_t)`. Several changes are made to enable this:

 - Allow the SDMA to have a variable MMIO base and size. These are
   configured in python.
 - An SDMA class method dispatch table which contains the MMIO offset
   relative to the SDMA's MMIO base address.
 - An updated writeMMIO method to iterate over the SDMA MMIO address
   ranges and call the appropriate SDMA MMIO method which matches the
   MMIO offset.
 - Moved all SDMA related MMIO data bit twiddling, masking, etc. into
   the MMIO methods themselves instead of in the writeMMIO method in
   SDMAEngine.

Change-Id: Ifce626f84d52f9e27e4438ba4e685e30dbf06dbc
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/70040
Maintainer: Matt Sinclair <mattdsinclair@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com>
---
 configs/example/gpufs/system/system.py | 56 +++++++++++----
 src/dev/amdgpu/AMDGPU.py               | 11 +--
 src/dev/amdgpu/amdgpu_device.cc        | 97 ++++++++++++++++++--------
 src/dev/amdgpu/amdgpu_device.hh        | 13 +++-
 src/dev/amdgpu/interrupt_handler.cc    |  6 ++
 src/dev/amdgpu/interrupt_handler.hh    |  6 ++
 src/dev/amdgpu/sdma_engine.cc          | 35 ++++++++--
 src/dev/amdgpu/sdma_engine.hh          | 15 +++-
 8 files changed, 182 insertions(+), 57 deletions(-)

diff --git a/configs/example/gpufs/system/system.py b/configs/example/gpufs/system/system.py
index 93f0194efb..90c5c01091 100644
--- a/configs/example/gpufs/system/system.py
+++ b/configs/example/gpufs/system/system.py
@@ -129,15 +129,45 @@ def makeGpuFSSystem(args):
     device_ih = AMDGPUInterruptHandler()
     system.pc.south_bridge.gpu.device_ih = device_ih
 
-    # Setup the SDMA engines
-    sdma0_pt_walker = VegaPagetableWalker()
-    sdma1_pt_walker = VegaPagetableWalker()
+    # Setup the SDMA engines depending on device. The MMIO base addresses
+    # can be found in the driver code under:
+    # include/asic_reg/sdmaX/sdmaX_Y_Z_offset.h
+    num_sdmas = 2
+    sdma_bases = []
+    sdma_sizes = []
+    if args.gpu_device == "Vega10":
+        num_sdmas = 2
+        sdma_bases = [0x4980, 0x5180]
+        sdma_sizes = [0x800] * 2
+    elif args.gpu_device == "MI100":
+        num_sdmas = 8
+        sdma_bases = [
+            0x4980,
+            0x6180,
+            0x78000,
+            0x79000,
+            0x7A000,
+            0x7B000,
+            0x7C000,
+            0x7D000,
+        ]
+        sdma_sizes = [0x1000] * 8
+    else:
+        m5.util.panic(f"Unknown GPU device {args.gpu_device}")
 
-    sdma0 = SDMAEngine(walker=sdma0_pt_walker)
-    sdma1 = SDMAEngine(walker=sdma1_pt_walker)
+    sdma_pt_walkers = []
+    sdma_engines = []
+    for sdma_idx in range(num_sdmas):
+        sdma_pt_walker = VegaPagetableWalker()
+        sdma_engine = SDMAEngine(
+            walker=sdma_pt_walker,
+            mmio_base=sdma_bases[sdma_idx],
+            mmio_size=sdma_sizes[sdma_idx],
+        )
+        sdma_pt_walkers.append(sdma_pt_walker)
+        sdma_engines.append(sdma_engine)
 
-    system.pc.south_bridge.gpu.sdma0 = sdma0
-    system.pc.south_bridge.gpu.sdma1 = sdma1
+    system.pc.south_bridge.gpu.sdmas = sdma_engines
 
     # Setup PM4 packet processor
     pm4_pkt_proc = PM4PacketProcessor()
@@ -155,22 +185,22 @@ def makeGpuFSSystem(args):
     system._dma_ports.append(gpu_hsapp)
     system._dma_ports.append(gpu_cmd_proc)
     system._dma_ports.append(system.pc.south_bridge.gpu)
-    system._dma_ports.append(sdma0)
-    system._dma_ports.append(sdma1)
+    for sdma in sdma_engines:
+        system._dma_ports.append(sdma)
     system._dma_ports.append(device_ih)
     system._dma_ports.append(pm4_pkt_proc)
     system._dma_ports.append(system_hub)
     system._dma_ports.append(gpu_mem_mgr)
     system._dma_ports.append(hsapp_pt_walker)
     system._dma_ports.append(cp_pt_walker)
-    system._dma_ports.append(sdma0_pt_walker)
-    system._dma_ports.append(sdma1_pt_walker)
+    for sdma_pt_walker in sdma_pt_walkers:
+        system._dma_ports.append(sdma_pt_walker)
 
     gpu_hsapp.pio = system.iobus.mem_side_ports
     gpu_cmd_proc.pio = system.iobus.mem_side_ports
     system.pc.south_bridge.gpu.pio = system.iobus.mem_side_ports
-    sdma0.pio = system.iobus.mem_side_ports
-    sdma1.pio = system.iobus.mem_side_ports
+    for sdma in sdma_engines:
+        sdma.pio = system.iobus.mem_side_ports
     device_ih.pio = system.iobus.mem_side_ports
     pm4_pkt_proc.pio = system.iobus.mem_side_ports
     system_hub.pio = system.iobus.mem_side_ports
diff --git a/src/dev/amdgpu/AMDGPU.py b/src/dev/amdgpu/AMDGPU.py
index 1e786726c9..616c501c63 100644
--- a/src/dev/amdgpu/AMDGPU.py
+++ b/src/dev/amdgpu/AMDGPU.py
@@ -79,11 +79,9 @@ class AMDGPUDevice(PciDevice):
         False, "Take a checkpoint before the device begins sending MMIOs"
     )
 
-    # Specific to Vega10: Vega10 has two SDMA engines these do not have any
-    # assigned function and are referenced by ID so they are given the generic
-    # names sdma0, sdma1, ... sdmaN.
-    sdma0 = Param.SDMAEngine("SDMA Engine 0")
-    sdma1 = Param.SDMAEngine("SDMA Engine 1")
+    # SDMA engines. There are a different number depending on device,
+    # therefore an array is used.
+    sdmas = VectorParam.SDMAEngine("All SDMA Engines")
 
     # The cp is needed here to handle certain packets the device may receive.
     # The config script should not create a new cp here but rather assign the
@@ -100,6 +98,9 @@ class SDMAEngine(DmaVirtDevice):
     cxx_header = "dev/amdgpu/sdma_engine.hh"
     cxx_class = "gem5::SDMAEngine"
 
+    mmio_base = Param.Addr(0x0, "Base MMIO Address")
+    mmio_size = Param.Addr(0x800, "Size of MMIO range")
+
     gpu_device = Param.AMDGPUDevice(NULL, "GPU Controller")
     walker = Param.VegaPagetableWalker("Page table walker")
 
diff --git a/src/dev/amdgpu/amdgpu_device.cc b/src/dev/amdgpu/amdgpu_device.cc
index 7e6304afa1..2acf1f4af3 100644
--- a/src/dev/amdgpu/amdgpu_device.cc
+++ b/src/dev/amdgpu/amdgpu_device.cc
@@ -53,7 +53,7 @@ namespace gem5
 
 AMDGPUDevice::AMDGPUDevice(const AMDGPUDeviceParams &p)
     : PciDevice(p), gpuMemMgr(p.memory_manager), deviceIH(p.device_ih),
-      sdma0(p.sdma0), sdma1(p.sdma1), pm4PktProc(p.pm4_pkt_proc), cp(p.cp),
+      pm4PktProc(p.pm4_pkt_proc), cp(p.cp),
       checkpoint_before_mmios(p.checkpoint_before_mmios),
       init_interrupt_count(0), _lastVMID(0),
       deviceMem(name() + ".deviceMem", p.memories, false, "", false)
@@ -84,10 +84,47 @@ AMDGPUDevice::AMDGPUDevice(const AMDGPUDeviceParams &p)
         mmioReader.readMMIOTrace(p.trace_file);
     }
 
-    sdma0->setGPUDevice(this);
-    sdma0->setId(0);
-    sdma1->setGPUDevice(this);
-    sdma1->setId(1);
+    int sdma_id = 0;
+    for (auto& s : p.sdmas) {
+        s->setGPUDevice(this);
+        s->setId(sdma_id);
+        sdmaIds.insert({sdma_id, s});
+        sdmaMmios.insert({sdma_id,
+                          RangeSize(s->getMmioBase(), s->getMmioSize())});
+        DPRINTF(AMDGPUDevice, "SDMA%d has MMIO range %s\n", sdma_id,
+                sdmaMmios[sdma_id].to_string().c_str());
+        sdma_id++;
+    }
+
+    // Map SDMA MMIO addresses to functions
+    sdmaFunc.insert({0x81, &SDMAEngine::setGfxBaseLo});
+    sdmaFunc.insert({0x82, &SDMAEngine::setGfxBaseHi});
+    sdmaFunc.insert({0x88, &SDMAEngine::setGfxRptrHi});
+    sdmaFunc.insert({0x89, &SDMAEngine::setGfxRptrLo});
+    sdmaFunc.insert({0x92, &SDMAEngine::setGfxDoorbellLo});
+    sdmaFunc.insert({0xab, &SDMAEngine::setGfxDoorbellOffsetLo});
+    sdmaFunc.insert({0x80, &SDMAEngine::setGfxSize});
+    sdmaFunc.insert({0xb2, &SDMAEngine::setGfxWptrLo});
+    sdmaFunc.insert({0xb3, &SDMAEngine::setGfxWptrHi});
+    if (p.device_name == "Vega10") {
+        sdmaFunc.insert({0xe1, &SDMAEngine::setPageBaseLo});
+        sdmaFunc.insert({0xe9, &SDMAEngine::setPageRptrLo});
+        sdmaFunc.insert({0xe8, &SDMAEngine::setPageRptrHi});
+        sdmaFunc.insert({0xf2, &SDMAEngine::setPageDoorbellLo});
+        sdmaFunc.insert({0x10b, &SDMAEngine::setPageDoorbellOffsetLo});
+        sdmaFunc.insert({0xe0, &SDMAEngine::setPageSize});
+        sdmaFunc.insert({0x113, &SDMAEngine::setPageWptrLo});
+    } else if (p.device_name == "MI100") {
+        sdmaFunc.insert({0xd9, &SDMAEngine::setPageBaseLo});
+        sdmaFunc.insert({0xe1, &SDMAEngine::setPageRptrLo});
+        sdmaFunc.insert({0xe0, &SDMAEngine::setPageRptrHi});
+        sdmaFunc.insert({0xea, &SDMAEngine::setPageDoorbellLo});
+        sdmaFunc.insert({0xd8, &SDMAEngine::setPageDoorbellOffsetLo});
+        sdmaFunc.insert({0x10b, &SDMAEngine::setPageWptrLo});
+    } else {
+        panic("Unknown GPU device %s\n", p.device_name);
+    }
+
     deviceIH->setGPUDevice(this);
     pm4PktProc->setGPUDevice(this);
     cp->hsaPacketProc().setGPUDevice(this);
@@ -351,15 +388,25 @@ AMDGPUDevice::writeMMIO(PacketPtr pkt, Addr offset)
 
     DPRINTF(AMDGPUDevice, "Wrote MMIO %#lx\n", offset);
 
+    // Check SDMA functions first, then fallback to switch statement
+    for (int idx = 0; idx < sdmaIds.size(); ++idx) {
+        if (sdmaMmios[idx].contains(offset)) {
+            Addr sdma_offset = (offset - sdmaMmios[idx].start()) >> 2;
+            if (sdmaFunc.count(sdma_offset)) {
+                DPRINTF(AMDGPUDevice, "Calling SDMA%d MMIO function %lx\n",
+                        idx, sdma_offset);
+                sdmaFuncPtr mptr = sdmaFunc[sdma_offset];
+                (getSDMAById(idx)->*mptr)(pkt->getLE<uint32_t>());
+            } else {
+                DPRINTF(AMDGPUDevice, "Unknown SDMA%d MMIO: %#lx\n", idx,
+                        sdma_offset);
+            }
+
+            return;
+        }
+    }
+
     switch (aperture) {
-      /* Write a register to the first System DMA. */
-      case SDMA0_BASE:
-        sdma0->writeMMIO(pkt, aperture_offset >> SDMA_OFFSET_SHIFT);
-        break;
-      /* Write a register to the second System DMA. */
-      case SDMA1_BASE:
-        sdma1->writeMMIO(pkt, aperture_offset >> SDMA_OFFSET_SHIFT);
-        break;
       /* Write a general register to the graphics register bus manager. */
       case GRBM_BASE:
         gpuvm.writeMMIO(pkt, aperture_offset >> GRBM_OFFSET_SHIFT);
@@ -483,19 +530,9 @@ AMDGPUDevice::getSDMAById(int id)
      * PM4 packets selected SDMAs using an integer ID. This method simply maps
      * the integer ID to a pointer to the SDMA and checks for invalid IDs.
      */
-    switch (id) {
-        case 0:
-            return sdma0;
-            break;
-        case 1:
-            return sdma1;
-            break;
-        default:
-            panic("No SDMA with id %d\n", id);
-            break;
-    }
+    assert(sdmaIds.count(id));
 
-    return nullptr;
+    return sdmaIds[id];
 }
 
 SDMAEngine*
@@ -549,7 +586,7 @@ AMDGPUDevice::serialize(CheckpointOut &cp) const
     idx = 0;
     for (auto & it : sdmaEngs) {
         sdma_engs_offset[idx] = it.first;
-        sdma_engs[idx] = it.second == sdma0 ? 0 : 1;
+        sdma_engs[idx] = idx;
         ++idx;
     }
 
@@ -620,7 +657,8 @@ AMDGPUDevice::unserialize(CheckpointIn &cp)
         UNSERIALIZE_ARRAY(sdma_engs, sizeof(sdma_engs)/sizeof(sdma_engs[0]));
 
         for (int idx = 0; idx < sdma_engs_size; ++idx) {
-            SDMAEngine *sdma = sdma_engs[idx] == 0 ? sdma0 : sdma1;
+            assert(sdmaIds.count(idx));
+            SDMAEngine *sdma = sdmaIds[idx];
             sdmaEngs.insert(std::make_pair(sdma_engs_offset[idx], sdma));
         }
     }
@@ -669,8 +707,9 @@ AMDGPUDevice::deallocateAllQueues()
     idMap.erase(idMap.begin(), idMap.end());
     usedVMIDs.erase(usedVMIDs.begin(), usedVMIDs.end());
 
-    sdma0->deallocateRLCQueues();
-    sdma1->deallocateRLCQueues();
+    for (auto& it : sdmaEngs) {
+        it.second->deallocateRLCQueues();
+    }
 }
 
 void
diff --git a/src/dev/amdgpu/amdgpu_device.hh b/src/dev/amdgpu/amdgpu_device.hh
index b64067a158..0e58f29038 100644
--- a/src/dev/amdgpu/amdgpu_device.hh
+++ b/src/dev/amdgpu/amdgpu_device.hh
@@ -109,12 +109,19 @@ class AMDGPUDevice : public PciDevice
     AMDGPUMemoryManager *gpuMemMgr;
     AMDGPUInterruptHandler *deviceIH;
     AMDGPUVM gpuvm;
-    SDMAEngine *sdma0;
-    SDMAEngine *sdma1;
-    std::unordered_map<uint32_t, SDMAEngine *> sdmaEngs;
     PM4PacketProcessor *pm4PktProc;
     GPUCommandProcessor *cp;
 
+    // SDMAs mapped by doorbell offset
+    std::unordered_map<uint32_t, SDMAEngine *> sdmaEngs;
+    // SDMAs mapped by ID
+    std::unordered_map<uint32_t, SDMAEngine *> sdmaIds;
+    // SDMA ID to MMIO range
+    std::unordered_map<uint32_t, AddrRange> sdmaMmios;
+    // SDMA ID to function
+    typedef void (SDMAEngine::*sdmaFuncPtr)(uint32_t);
+    std::unordered_map<uint32_t, sdmaFuncPtr> sdmaFunc;
+
     /**
      * Initial checkpoint support variables.
      */
diff --git a/src/dev/amdgpu/interrupt_handler.cc b/src/dev/amdgpu/interrupt_handler.cc
index a771976d98..6f277a1618 100644
--- a/src/dev/amdgpu/interrupt_handler.cc
+++ b/src/dev/amdgpu/interrupt_handler.cc
@@ -80,6 +80,12 @@ AMDGPUInterruptHandler::prepareInterruptCookie(ContextID cntxt_id,
     assert(client_id == SOC15_IH_CLIENTID_RLC ||
            client_id == SOC15_IH_CLIENTID_SDMA0 ||
            client_id == SOC15_IH_CLIENTID_SDMA1 ||
+           client_id == SOC15_IH_CLIENTID_SDMA2 ||
+           client_id == SOC15_IH_CLIENTID_SDMA3 ||
+           client_id == SOC15_IH_CLIENTID_SDMA4 ||
+           client_id == SOC15_IH_CLIENTID_SDMA5 ||
+           client_id == SOC15_IH_CLIENTID_SDMA6 ||
+           client_id == SOC15_IH_CLIENTID_SDMA7 ||
            client_id == SOC15_IH_CLIENTID_GRBM_CP);
     assert(source_id == CP_EOP || source_id == TRAP_ID);
 
diff --git a/src/dev/amdgpu/interrupt_handler.hh b/src/dev/amdgpu/interrupt_handler.hh
index ab8a853074..9b80e081cc 100644
--- a/src/dev/amdgpu/interrupt_handler.hh
+++ b/src/dev/amdgpu/interrupt_handler.hh
@@ -58,6 +58,12 @@ enum soc15_ih_clientid
     SOC15_IH_CLIENTID_RLC       = 0x07,
     SOC15_IH_CLIENTID_SDMA0     = 0x08,
     SOC15_IH_CLIENTID_SDMA1     = 0x09,
+    SOC15_IH_CLIENTID_SDMA2     = 0x01,
+    SOC15_IH_CLIENTID_SDMA3     = 0x04,
+    SOC15_IH_CLIENTID_SDMA4     = 0x05,
+    SOC15_IH_CLIENTID_SDMA5     = 0x11,
+    SOC15_IH_CLIENTID_SDMA6     = 0x13,
+    SOC15_IH_CLIENTID_SDMA7     = 0x18,
     SOC15_IH_CLIENTID_GRBM_CP   = 0x14
 };
 
diff --git a/src/dev/amdgpu/sdma_engine.cc b/src/dev/amdgpu/sdma_engine.cc
index 736df45d9d..e99d694634 100644
--- a/src/dev/amdgpu/sdma_engine.cc
+++ b/src/dev/amdgpu/sdma_engine.cc
@@ -49,7 +49,8 @@ SDMAEngine::SDMAEngine(const SDMAEngineParams &p)
     : DmaVirtDevice(p), id(0), gfxBase(0), gfxRptr(0),
       gfxDoorbell(0), gfxDoorbellOffset(0), gfxWptr(0), pageBase(0),
       pageRptr(0), pageDoorbell(0), pageDoorbellOffset(0),
-      pageWptr(0), gpuDevice(nullptr), walker(p.walker)
+      pageWptr(0), gpuDevice(nullptr), walker(p.walker),
+      mmioBase(p.mmio_base), mmioSize(p.mmio_size)
 {
     gfx.ib(&gfxIb);
     gfxIb.parent(&gfx);
@@ -87,6 +88,18 @@ SDMAEngine::getIHClientId()
         return SOC15_IH_CLIENTID_SDMA0;
       case 1:
         return SOC15_IH_CLIENTID_SDMA1;
+      case 2:
+        return SOC15_IH_CLIENTID_SDMA2;
+      case 3:
+        return SOC15_IH_CLIENTID_SDMA3;
+      case 4:
+        return SOC15_IH_CLIENTID_SDMA4;
+      case 5:
+        return SOC15_IH_CLIENTID_SDMA5;
+      case 6:
+        return SOC15_IH_CLIENTID_SDMA6;
+      case 7:
+        return SOC15_IH_CLIENTID_SDMA7;
       default:
         panic("Unknown SDMA id");
     }
@@ -1240,6 +1253,10 @@ SDMAEngine::setGfxDoorbellOffsetLo(uint32_t data)
 {
     gfxDoorbellOffset = insertBits(gfxDoorbellOffset, 31, 0, 0);
     gfxDoorbellOffset |= data;
+    if (bits(gfxDoorbell, 28, 28)) {
+        gpuDevice->setDoorbellType(gfxDoorbellOffset, QueueType::SDMAGfx);
+        gpuDevice->setSDMAEngine(gfxDoorbellOffset, this);
+    }
 }
 
 void
@@ -1250,9 +1267,11 @@ SDMAEngine::setGfxDoorbellOffsetHi(uint32_t data)
 }
 
 void
-SDMAEngine::setGfxSize(uint64_t data)
+SDMAEngine::setGfxSize(uint32_t data)
 {
-    gfx.size(data);
+    uint32_t rb_size = bits(data, 6, 1);
+    assert(rb_size >= 6 && rb_size <= 62);
+    gfx.size(1 << (rb_size + 2));
 }
 
 void
@@ -1320,6 +1339,10 @@ SDMAEngine::setPageDoorbellOffsetLo(uint32_t data)
 {
     pageDoorbellOffset = insertBits(pageDoorbellOffset, 31, 0, 0);
     pageDoorbellOffset |= data;
+    if (bits(pageDoorbell, 28, 28)) {
+        gpuDevice->setDoorbellType(pageDoorbellOffset, QueueType::SDMAPage);
+        gpuDevice->setSDMAEngine(pageDoorbellOffset, this);
+    }
 }
 
 void
@@ -1330,9 +1353,11 @@ SDMAEngine::setPageDoorbellOffsetHi(uint32_t data)
 }
 
 void
-SDMAEngine::setPageSize(uint64_t data)
+SDMAEngine::setPageSize(uint32_t data)
 {
-    page.size(data);
+    uint32_t rb_size = bits(data, 6, 1);
+    assert(rb_size >= 6 && rb_size <= 62);
+    page.size(1 << (rb_size + 2));
 }
 
 void
diff --git a/src/dev/amdgpu/sdma_engine.hh b/src/dev/amdgpu/sdma_engine.hh
index 27c169193b..1e4f965920 100644
--- a/src/dev/amdgpu/sdma_engine.hh
+++ b/src/dev/amdgpu/sdma_engine.hh
@@ -156,6 +156,9 @@ class SDMAEngine : public DmaVirtDevice
     void processRLC0(Addr wptrOffset);
     void processRLC1(Addr wptrOffset);
 
+    Addr mmioBase = 0;
+    Addr mmioSize = 0;
+
   public:
     SDMAEngine(const SDMAEngineParams &p);
 
@@ -242,6 +245,14 @@ class SDMAEngine : public DmaVirtDevice
     void atomicDone(SDMAQueue *q, sdmaAtomicHeader *header, sdmaAtomic *pkt,
                     uint64_t *dmaBuffer);
 
+    /**
+     * Methods for getting SDMA MMIO base address and size. These are set by
+     * the python configuration depending on device to allow for flexible base
+     * addresses depending on what GPU is being simulated.
+     */
+    Addr getMmioBase() { return mmioBase; }
+    Addr getMmioSize() { return mmioSize; }
+
     /**
      * Methods for getting the values of SDMA MMIO registers.
      */
@@ -269,7 +280,7 @@ class SDMAEngine : public DmaVirtDevice
     void setGfxDoorbellHi(uint32_t data);
     void setGfxDoorbellOffsetLo(uint32_t data);
     void setGfxDoorbellOffsetHi(uint32_t data);
-    void setGfxSize(uint64_t data);
+    void setGfxSize(uint32_t data);
     void setGfxWptrLo(uint32_t data);
     void setGfxWptrHi(uint32_t data);
     void setPageBaseLo(uint32_t data);
@@ -280,7 +291,7 @@ class SDMAEngine : public DmaVirtDevice
     void setPageDoorbellHi(uint32_t data);
     void setPageDoorbellOffsetLo(uint32_t data);
     void setPageDoorbellOffsetHi(uint32_t data);
-    void setPageSize(uint64_t data);
+    void setPageSize(uint32_t data);
     void setPageWptrLo(uint32_t data);
     void setPageWptrHi(uint32_t data);
 

From 316538bf8ac8e9d4ff83b1ff47bf03b106c91e2c Mon Sep 17 00:00:00 2001
From: Matthew Poremba <matthew.poremba@amd.com>
Date: Fri, 21 Apr 2023 19:22:28 -0500
Subject: [PATCH 356/492] dev-amdgpu: Enable more GPUs with device specific
 registers

Currently gem5 assumes the amdgpu device to be Vega10. In order to
support more devices we need to handle situations where different
registers and addresses have the same functionality but different
offsets on different devices.

This changeset adds an NBIO class to handle device discovery and driver
initialization related tasks, pulling them out of the AMDGPUDevice
class. The offsets used for MMIOs are reworked slightly to use offsets
rather than absolute addresses. This is because we cannot determine the
absolute address in the constructor since the BAR has not been assigned
by the OS yet.

Change-Id: I14b364374e086e185978334425a4e265cf2760d0
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/70041
Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com>
Maintainer: Matt Sinclair <mattdsinclair@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/dev/amdgpu/SConscript       |   1 +
 src/dev/amdgpu/amdgpu_device.cc | 110 +++++++++++---------
 src/dev/amdgpu/amdgpu_device.hh |   3 +
 src/dev/amdgpu/amdgpu_nbio.cc   | 173 ++++++++++++++++++++++++++++++++
 src/dev/amdgpu/amdgpu_nbio.hh   | 118 ++++++++++++++++++++++
 src/dev/amdgpu/amdgpu_vm.hh     |  10 ++
 6 files changed, 369 insertions(+), 46 deletions(-)
 create mode 100644 src/dev/amdgpu/amdgpu_nbio.cc
 create mode 100644 src/dev/amdgpu/amdgpu_nbio.hh

diff --git a/src/dev/amdgpu/SConscript b/src/dev/amdgpu/SConscript
index 713f0a6efe..9f8eeacd00 100644
--- a/src/dev/amdgpu/SConscript
+++ b/src/dev/amdgpu/SConscript
@@ -39,6 +39,7 @@ SimObject('AMDGPU.py', sim_objects=['AMDGPUDevice', 'AMDGPUInterruptHandler',
                                     tags='x86 isa')
 
 Source('amdgpu_device.cc', tags='x86 isa')
+Source('amdgpu_nbio.cc', tags='x86 isa')
 Source('amdgpu_vm.cc', tags='x86 isa')
 Source('interrupt_handler.cc', tags='x86 isa')
 Source('memory_manager.cc', tags='x86 isa')
diff --git a/src/dev/amdgpu/amdgpu_device.cc b/src/dev/amdgpu/amdgpu_device.cc
index 2acf1f4af3..f58d1f7242 100644
--- a/src/dev/amdgpu/amdgpu_device.cc
+++ b/src/dev/amdgpu/amdgpu_device.cc
@@ -34,6 +34,7 @@
 #include <fstream>
 
 #include "debug/AMDGPUDevice.hh"
+#include "dev/amdgpu/amdgpu_nbio.hh"
 #include "dev/amdgpu/amdgpu_vm.hh"
 #include "dev/amdgpu/interrupt_handler.hh"
 #include "dev/amdgpu/pm4_packet_processor.hh"
@@ -129,6 +130,32 @@ AMDGPUDevice::AMDGPUDevice(const AMDGPUDeviceParams &p)
     pm4PktProc->setGPUDevice(this);
     cp->hsaPacketProc().setGPUDevice(this);
     cp->setGPUDevice(this);
+
+    // Address aperture for device memory. We tell this to the driver and
+    // could possibly be anything, but these are the values used by hardware.
+    uint64_t mmhubBase = 0x8000ULL << 24;
+    uint64_t mmhubTop = 0x83ffULL << 24;
+
+    // These are hardcoded register values to return what the driver expects
+    setRegVal(AMDGPU_MP0_SMN_C2PMSG_33, 0x80000000);
+
+    // There are different registers for different GPUs, so we set the value
+    // based on the GPU type specified by the user.
+    if (p.device_name == "Vega10") {
+        setRegVal(VEGA10_FB_LOCATION_BASE, mmhubBase >> 24);
+        setRegVal(VEGA10_FB_LOCATION_TOP, mmhubTop >> 24);
+    } else if (p.device_name == "MI100") {
+        setRegVal(MI100_FB_LOCATION_BASE, mmhubBase >> 24);
+        setRegVal(MI100_FB_LOCATION_TOP, mmhubTop >> 24);
+        setRegVal(MI100_MEM_SIZE_REG, 0x3ff0); // 16GB of memory
+    } else {
+        panic("Unknown GPU device %s\n", p.device_name);
+    }
+
+    gpuvm.setMMHUBBase(mmhubBase);
+    gpuvm.setMMHUBTop(mmhubTop);
+
+    nbio.setGPUDevice(this);
 }
 
 void
@@ -236,35 +263,25 @@ AMDGPUDevice::readFrame(PacketPtr pkt, Addr offset)
      * first, ignoring any writes from driver. (2) Any other address from
      * device backing store / abstract memory class functionally.
      */
-    if (offset == 0xa28000) {
-        /*
-         * Handle special counter addresses in framebuffer. These counter
-         * addresses expect the read to return previous value + 1.
-         */
-        if (regs.find(pkt->getAddr()) == regs.end()) {
-            regs[pkt->getAddr()] = 1;
-        } else {
-            regs[pkt->getAddr()]++;
-        }
-
-        pkt->setUintX(regs[pkt->getAddr()], ByteOrder::little);
-    } else {
-        /*
-         * Read the value from device memory. This must be done functionally
-         * because this method is called by the PCIDevice::read method which
-         * is a non-timing read.
-         */
-        RequestPtr req = std::make_shared<Request>(offset, pkt->getSize(), 0,
-                                                   vramRequestorId());
-        PacketPtr readPkt = Packet::createRead(req);
-        uint8_t *dataPtr = new uint8_t[pkt->getSize()];
-        readPkt->dataDynamic(dataPtr);
-
-        auto system = cp->shader()->gpuCmdProc.system();
-        system->getDeviceMemory(readPkt)->access(readPkt);
-
-        pkt->setUintX(readPkt->getUintX(ByteOrder::little), ByteOrder::little);
+    if (nbio.readFrame(pkt, offset)) {
+        return;
     }
+
+    /*
+     * Read the value from device memory. This must be done functionally
+     * because this method is called by the PCIDevice::read method which
+     * is a non-timing read.
+     */
+    RequestPtr req = std::make_shared<Request>(offset, pkt->getSize(), 0,
+                                               vramRequestorId());
+    PacketPtr readPkt = Packet::createRead(req);
+    uint8_t *dataPtr = new uint8_t[pkt->getSize()];
+    readPkt->dataDynamic(dataPtr);
+
+    auto system = cp->shader()->gpuCmdProc.system();
+    system->getDeviceMemory(readPkt)->access(readPkt);
+
+    pkt->setUintX(readPkt->getUintX(ByteOrder::little), ByteOrder::little);
 }
 
 void
@@ -285,8 +302,8 @@ AMDGPUDevice::readMMIO(PacketPtr pkt, Addr offset)
     DPRINTF(AMDGPUDevice, "Read MMIO %#lx\n", offset);
     mmioReader.readFromTrace(pkt, MMIO_BAR, offset);
 
-    if (regs.find(pkt->getAddr()) != regs.end()) {
-        uint64_t value = regs[pkt->getAddr()];
+    if (regs.find(offset) != regs.end()) {
+        uint64_t value = regs[offset];
         DPRINTF(AMDGPUDevice, "Reading what kernel wrote before: %#x\n",
                 value);
         pkt->setUintX(value, ByteOrder::little);
@@ -294,19 +311,8 @@ AMDGPUDevice::readMMIO(PacketPtr pkt, Addr offset)
 
     switch (aperture) {
       case NBIO_BASE:
-        switch (aperture_offset) {
-          // This is a PCIe status register. At some point during driver init
-          // the driver checks that interrupts are enabled. This is only
-          // checked once, so if the MMIO trace does not exactly line up with
-          // what the driver is doing in gem5, this may still have the first
-          // bit zero causing driver to fail. Therefore, we always set this
-          // bit to one as there is no harm to do so.
-          case 0x3c: // mmPCIE_DATA2 << 2
-            uint32_t value = pkt->getLE<uint32_t>() | 0x1;
-            DPRINTF(AMDGPUDevice, "Marking interrupts enabled: %#lx\n", value);
-            pkt->setLE<uint32_t>(value);
-            break;
-        } break;
+        nbio.readMMIO(pkt, aperture_offset);
+        break;
       case GRBM_BASE:
         gpuvm.readMMIO(pkt, aperture_offset >> GRBM_OFFSET_SHIFT);
         break;
@@ -332,6 +338,8 @@ AMDGPUDevice::writeFrame(PacketPtr pkt, Addr offset)
         DPRINTF(AMDGPUDevice, "GART translation %p -> %p\n", aperture_offset,
                 gpuvm.gartTable[aperture_offset]);
     }
+
+    nbio.writeFrame(pkt, offset);
 }
 
 void
@@ -416,6 +424,10 @@ AMDGPUDevice::writeMMIO(PacketPtr pkt, Addr offset)
       case IH_BASE:
         deviceIH->writeMMIO(pkt, aperture_offset >> IH_OFFSET_SHIFT);
         break;
+      /* Write an IO space register */
+      case NBIO_BASE:
+        nbio.writeMMIO(pkt, aperture_offset);
+        break;
       default:
         DPRINTF(AMDGPUDevice, "Unknown MMIO aperture for %#x\n", offset);
         break;
@@ -489,19 +501,25 @@ AMDGPUDevice::write(PacketPtr pkt)
     DPRINTF(AMDGPUDevice, "PCI Write to %#lx data %#lx\n",
                             pkt->getAddr(), data);
 
-    if (data || regs.find(pkt->getAddr()) != regs.end())
-        regs[pkt->getAddr()] = data;
-
     dispatchAccess(pkt, false);
 
     return pioDelay;
 }
 
+bool
+AMDGPUDevice::haveRegVal(uint32_t addr)
+{
+    return regs.count(addr);
+}
+
 uint32_t
 AMDGPUDevice::getRegVal(uint32_t addr)
 {
+    DPRINTF(AMDGPUDevice, "Getting register 0x%lx = %x\n",
+            addr, regs[addr]);
     return regs[addr];
 }
+
 void
 AMDGPUDevice::setRegVal(uint32_t addr, uint32_t value)
 {
diff --git a/src/dev/amdgpu/amdgpu_device.hh b/src/dev/amdgpu/amdgpu_device.hh
index 0e58f29038..cab799147e 100644
--- a/src/dev/amdgpu/amdgpu_device.hh
+++ b/src/dev/amdgpu/amdgpu_device.hh
@@ -36,6 +36,7 @@
 
 #include "base/bitunion.hh"
 #include "dev/amdgpu/amdgpu_defines.hh"
+#include "dev/amdgpu/amdgpu_nbio.hh"
 #include "dev/amdgpu/amdgpu_vm.hh"
 #include "dev/amdgpu/memory_manager.hh"
 #include "dev/amdgpu/mmio_reader.hh"
@@ -106,6 +107,7 @@ class AMDGPUDevice : public PciDevice
     /**
      * Blocks of the GPU
      */
+    AMDGPUNbio nbio;
     AMDGPUMemoryManager *gpuMemMgr;
     AMDGPUInterruptHandler *deviceIH;
     AMDGPUVM gpuvm;
@@ -185,6 +187,7 @@ class AMDGPUDevice : public PciDevice
      * Register value getter/setter. Used by other GPU blocks to change
      * values from incoming driver/user packets.
      */
+    bool haveRegVal(uint32_t addr);
     uint32_t getRegVal(uint32_t addr);
     void setRegVal(uint32_t addr, uint32_t value);
 
diff --git a/src/dev/amdgpu/amdgpu_nbio.cc b/src/dev/amdgpu/amdgpu_nbio.cc
new file mode 100644
index 0000000000..8064fd2a0e
--- /dev/null
+++ b/src/dev/amdgpu/amdgpu_nbio.cc
@@ -0,0 +1,173 @@
+/*
+ * Copyright (c) 2023 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its
+ * contributors may be used to endorse or promote products derived from this
+ * software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "dev/amdgpu/amdgpu_nbio.hh"
+
+#include "debug/AMDGPUDevice.hh"
+#include "dev/amdgpu/amdgpu_device.hh"
+#include "mem/packet_access.hh"
+
+namespace gem5
+{
+
+AMDGPUNbio::AMDGPUNbio()
+{
+    // All read-before-write MMIOs go here
+    triggered_reads[AMDGPU_MP0_SMN_C2PMSG_64] = 0x80000000;
+}
+
+void
+AMDGPUNbio::setGPUDevice(AMDGPUDevice *gpu_device)
+{
+    gpuDevice = gpu_device;
+}
+
+void
+AMDGPUNbio::readMMIO(PacketPtr pkt, Addr offset)
+{
+    switch (offset) {
+      // This is a PCIe status register. At some point during driver init
+      // the driver checks that interrupts are enabled. This is only
+      // checked once, so if the MMIO trace does not exactly line up with
+      // what the driver is doing in gem5, this may still have the first
+      // bit zero causing driver to fail. Therefore, we always set this
+      // bit to one as there is no harm to do so.
+      case AMDGPU_PCIE_DATA_REG:
+        {
+          uint32_t value = pkt->getLE<uint32_t>() | 0x1;
+          DPRINTF(AMDGPUDevice, "Marking interrupts enabled: %#lx\n", value);
+          pkt->setLE<uint32_t>(value);
+        }
+        break;
+      case AMDGPU_MM_DATA:
+        //pkt->setLE<uint32_t>(regs[mm_index_reg]);
+        pkt->setLE<uint32_t>(gpuDevice->getRegVal(mm_index_reg));
+        break;
+      case VEGA10_INV_ENG17_ACK1:
+      case VEGA10_INV_ENG17_ACK2:
+      case MI100_INV_ENG17_ACK2:
+      case MI100_INV_ENG17_ACK3:
+        pkt->setLE<uint32_t>(0x10001);
+        break;
+      case VEGA10_INV_ENG17_SEM1:
+      case VEGA10_INV_ENG17_SEM2:
+      case MI100_INV_ENG17_SEM2:
+      case MI100_INV_ENG17_SEM3:
+        pkt->setLE<uint32_t>(0x1);
+        break;
+      // PSP responds with bit 31 set when ready
+      case AMDGPU_MP0_SMN_C2PMSG_35:
+        pkt->setLE<uint32_t>(0x80000000);
+        break;
+      default:
+        if (triggered_reads.count(offset)) {
+            DPRINTF(AMDGPUDevice, "Found triggered read for %#x\n", offset);
+            pkt->setLE<uint32_t>(triggered_reads[offset]);
+        } else if (gpuDevice->haveRegVal(offset)) {
+            uint32_t reg_val = gpuDevice->getRegVal(offset);
+
+            DPRINTF(AMDGPUDevice, "Reading value of %#lx from regs: %#lx\n",
+                    offset, reg_val);
+
+            pkt->setLE<uint32_t>(reg_val);
+        } else {
+            DPRINTF(AMDGPUDevice, "NBIO Unknown MMIO %#x (%#x)\n", offset,
+                    pkt->getAddr());
+        }
+        break;
+    }
+}
+
+void
+AMDGPUNbio::writeMMIO(PacketPtr pkt, Addr offset)
+{
+    if (offset == AMDGPU_MM_INDEX) {
+        assert(pkt->getSize() == 4);
+        mm_index_reg = insertBits(mm_index_reg, 31, 0,
+                                  pkt->getLE<uint32_t>());
+    } else if (offset == AMDGPU_MM_INDEX_HI) {
+        assert(pkt->getSize() == 4);
+        mm_index_reg = insertBits(mm_index_reg, 63, 32,
+                                  pkt->getLE<uint32_t>());
+    } else if (offset == AMDGPU_MM_DATA) {
+        DPRINTF(AMDGPUDevice, "MM write to reg %#lx data %#lx\n",
+                mm_index_reg, pkt->getLE<uint32_t>());
+        gpuDevice->setRegVal(AMDGPU_MM_DATA, pkt->getLE<uint32_t>());
+    } else if (offset == AMDGPU_MP0_SMN_C2PMSG_35) {
+        // See psp_v3_1_bootloader_load_sos in amdgpu driver code.
+        if (pkt->getLE<uint32_t>() == 0x10000) {
+            triggered_reads[AMDGPU_MP0_SMN_C2PMSG_81] = 0xdf40b31;
+        }
+    } else if (offset == AMDGPU_MP0_SMN_C2PMSG_64) {
+        triggered_reads[AMDGPU_MP0_SMN_C2PMSG_64] =
+            0x80000000 + pkt->getLE<uint32_t>();
+    } else if (offset == AMDGPU_MP0_SMN_C2PMSG_69) {
+        // PSP ring low addr
+        psp_ring = insertBits(psp_ring, 31, 0, pkt->getLE<uint32_t>());
+        psp_ring_listen_addr = psp_ring
+                             - gpuDevice->getVM().getSysAddrRangeLow() + 0xc;
+    } else if (offset == AMDGPU_MP0_SMN_C2PMSG_70) {
+        // PSP ring high addr
+        psp_ring = insertBits(psp_ring, 63, 32, pkt->getLE<uint32_t>());
+        psp_ring_listen_addr = psp_ring
+                             - gpuDevice->getVM().getSysAddrRangeLow() + 0xc;
+    } else if (offset == AMDGPU_MP0_SMN_C2PMSG_71) {
+        // PSP ring size
+        psp_ring_size = pkt->getLE<uint32_t>();
+    }
+}
+
+bool
+AMDGPUNbio::readFrame(PacketPtr pkt, Addr offset)
+{
+    if (offset == psp_ring_dev_addr) {
+        psp_ring_value++;
+        pkt->setUintX(psp_ring_value, ByteOrder::little);
+
+        return true;
+    }
+
+    return false;
+}
+
+void
+AMDGPUNbio::writeFrame(PacketPtr pkt, Addr offset)
+{
+    if (offset == psp_ring_listen_addr) {
+        assert(pkt->getSize() == 8);
+        psp_ring_dev_addr = pkt->getLE<uint64_t>()
+                          - gpuDevice->getVM().getSysAddrRangeLow();
+        DPRINTF(AMDGPUDevice, "Setting PSP ring device address to %#lx\n",
+                psp_ring_dev_addr);
+    }
+}
+
+} // namespace gem5
diff --git a/src/dev/amdgpu/amdgpu_nbio.hh b/src/dev/amdgpu/amdgpu_nbio.hh
new file mode 100644
index 0000000000..68d174e870
--- /dev/null
+++ b/src/dev/amdgpu/amdgpu_nbio.hh
@@ -0,0 +1,118 @@
+/*
+ * Copyright (c) 2023 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its
+ * contributors may be used to endorse or promote products derived from this
+ * software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef __DEV_AMDGPU_AMDGPU_NBIO__
+#define __DEV_AMDGPU_AMDGPU_NBIO__
+
+#include <unordered_map>
+
+#include "base/types.hh"
+#include "mem/packet.hh"
+
+namespace gem5
+{
+
+class AMDGPUDevice;
+
+/**
+ * MMIO offsets for NBIO. NBIO handles initialization such as device
+ * discovery and psp functions. Values taken from:
+ *
+ * https://github.com/RadeonOpenCompute/ROCK-Kernel-Driver/blob/roc-4.3.x/
+ *      drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
+ *
+ * The addresses in the file are dword addresses. Here they are converted
+ * to byte addresses so gem5 does not need to do any shifting.
+ */
+#define AMDGPU_MM_INDEX                                   0x00000
+#define AMDGPU_MM_INDEX_HI                                0x00018
+#define AMDGPU_MM_DATA                                    0x00004
+#define AMDGPU_PCIE_DATA_REG                              0x0003c
+
+// Message bus related to psp
+#define AMDGPU_MP0_SMN_C2PMSG_33                          0x58184
+#define AMDGPU_MP0_SMN_C2PMSG_35                          0x5818c
+#define AMDGPU_MP0_SMN_C2PMSG_64                          0x58200
+#define AMDGPU_MP0_SMN_C2PMSG_69                          0x58214
+#define AMDGPU_MP0_SMN_C2PMSG_70                          0x58218
+#define AMDGPU_MP0_SMN_C2PMSG_71                          0x5821c
+#define AMDGPU_MP0_SMN_C2PMSG_81                          0x58244
+
+// Device specific invalidation engines used during initialization
+#define VEGA10_INV_ENG17_ACK1                             0x0a318
+#define VEGA10_INV_ENG17_ACK2                             0x69c18
+#define VEGA10_INV_ENG17_SEM1                             0x0a288
+#define VEGA10_INV_ENG17_SEM2                             0x69b88
+
+#define MI100_INV_ENG17_ACK1                              0x0a318
+#define MI100_INV_ENG17_ACK2                              0x6a918
+#define MI100_INV_ENG17_ACK3                              0x76918
+#define MI100_INV_ENG17_SEM1                              0x0a288
+#define MI100_INV_ENG17_SEM2                              0x6a888
+#define MI100_INV_ENG17_SEM3                              0x76888
+
+class AMDGPUNbio
+{
+  public:
+    AMDGPUNbio();
+
+    void setGPUDevice(AMDGPUDevice *gpu_device);
+
+    void readMMIO(PacketPtr pkt, Addr offset);
+    void writeMMIO(PacketPtr pkt, Addr offset);
+
+    bool readFrame(PacketPtr pkt, Addr offset);
+    void writeFrame(PacketPtr pkt, Addr offset);
+
+  private:
+    AMDGPUDevice *gpuDevice;
+
+    /*
+     * Driver initialization sequence helper variables.
+     */
+    uint64_t mm_index_reg = 0;
+    std::unordered_map<uint32_t, uint32_t> triggered_reads;
+
+    /*
+     * PSP variables used in initialization.
+     */
+    Addr psp_ring = 0;
+    Addr psp_ring_dev_addr = 0;
+    Addr psp_ring_listen_addr = 0;
+    int psp_ring_size = 0;
+    int psp_ring_retval = 0;
+    int psp_ring_value = 0;
+};
+
+} // namespace gem5
+
+#endif // __DEV_AMDGPU_AMDGPU_NBIO__
diff --git a/src/dev/amdgpu/amdgpu_vm.hh b/src/dev/amdgpu/amdgpu_vm.hh
index 212a688716..ac35a11968 100644
--- a/src/dev/amdgpu/amdgpu_vm.hh
+++ b/src/dev/amdgpu/amdgpu_vm.hh
@@ -74,6 +74,13 @@
 #define mmMMHUB_VM_FB_LOCATION_BASE                                   0x082c
 #define mmMMHUB_VM_FB_LOCATION_TOP                                    0x082d
 
+#define VEGA10_FB_LOCATION_BASE                                      0x6a0b0
+#define VEGA10_FB_LOCATION_TOP                                       0x6a0b4
+
+#define MI100_MEM_SIZE_REG                                           0x0378c
+#define MI100_FB_LOCATION_BASE                                       0x6ac00
+#define MI100_FB_LOCATION_TOP                                        0x6ac04
+
 // AMD GPUs support 16 different virtual address spaces
 static constexpr int AMDGPU_VM_COUNT = 16;
 
@@ -192,6 +199,9 @@ class AMDGPUVM : public Serializable
     Addr getMMHUBBase() { return mmhubBase; }
     Addr getMMHUBTop() { return mmhubTop; }
 
+    void setMMHUBBase(Addr base) { mmhubBase = base; }
+    void setMMHUBTop(Addr top) { mmhubTop = top; }
+
     bool
     inFB(Addr vaddr)
     {

From cd76f92c94e7b7eeda914ab37872a187b16afcee Mon Sep 17 00:00:00 2001
From: Matthew Poremba <matthew.poremba@amd.com>
Date: Fri, 21 Apr 2023 19:36:47 -0500
Subject: [PATCH 357/492] arch-vega: Add decodings for new MI100 VOP2 insts

VOP2 with opcodes 55-61 were added in MI100 and are not in Vega10. This
changeset adds the decodings for these instructions.

The changeset does not implement the instructions, however the fatal
message is much more helpful for debugging compared so a generic
decode_invalid handler.

Change-Id: Ibde0880c35ff915bf8e50772df9ce263e55ca893
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/70042
Maintainer: Matt Sinclair <mattdsinclair@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com>
---
 src/arch/amdgpu/vega/decoder.cc     | 105 ++++++++++++++++++++--------
 src/arch/amdgpu/vega/gpu_decoder.hh |   7 ++
 2 files changed, 84 insertions(+), 28 deletions(-)

diff --git a/src/arch/amdgpu/vega/decoder.cc b/src/arch/amdgpu/vega/decoder.cc
index 291dd6924a..fd3a803bb8 100644
--- a/src/arch/amdgpu/vega/decoder.cc
+++ b/src/arch/amdgpu/vega/decoder.cc
@@ -274,34 +274,34 @@ namespace VegaISA
         &Decoder::decode_OP_VOP2__V_SUBREV_U32,
         &Decoder::decode_OP_VOP2__V_SUBREV_U32,
         &Decoder::decode_OP_VOP2__V_SUBREV_U32,
-        &Decoder::decode_invalid,
-        &Decoder::decode_invalid,
-        &Decoder::decode_invalid,
-        &Decoder::decode_invalid,
-        &Decoder::decode_invalid,
-        &Decoder::decode_invalid,
-        &Decoder::decode_invalid,
-        &Decoder::decode_invalid,
-        &Decoder::decode_invalid,
-        &Decoder::decode_invalid,
-        &Decoder::decode_invalid,
-        &Decoder::decode_invalid,
-        &Decoder::decode_invalid,
-        &Decoder::decode_invalid,
-        &Decoder::decode_invalid,
-        &Decoder::decode_invalid,
-        &Decoder::decode_invalid,
-        &Decoder::decode_invalid,
-        &Decoder::decode_invalid,
-        &Decoder::decode_invalid,
-        &Decoder::decode_invalid,
-        &Decoder::decode_invalid,
-        &Decoder::decode_invalid,
-        &Decoder::decode_invalid,
-        &Decoder::decode_invalid,
-        &Decoder::decode_invalid,
-        &Decoder::decode_invalid,
-        &Decoder::decode_invalid,
+        &Decoder::decode_OP_VOP2__V_DOT2C_F32_F16,
+        &Decoder::decode_OP_VOP2__V_DOT2C_F32_F16,
+        &Decoder::decode_OP_VOP2__V_DOT2C_F32_F16,
+        &Decoder::decode_OP_VOP2__V_DOT2C_F32_F16,
+        &Decoder::decode_OP_VOP2__V_DOT2C_I32_I16,
+        &Decoder::decode_OP_VOP2__V_DOT2C_I32_I16,
+        &Decoder::decode_OP_VOP2__V_DOT2C_I32_I16,
+        &Decoder::decode_OP_VOP2__V_DOT2C_I32_I16,
+        &Decoder::decode_OP_VOP2__V_DOT4C_I32_I8,
+        &Decoder::decode_OP_VOP2__V_DOT4C_I32_I8,
+        &Decoder::decode_OP_VOP2__V_DOT4C_I32_I8,
+        &Decoder::decode_OP_VOP2__V_DOT4C_I32_I8,
+        &Decoder::decode_OP_VOP2__V_DOT8C_I32_I4,
+        &Decoder::decode_OP_VOP2__V_DOT8C_I32_I4,
+        &Decoder::decode_OP_VOP2__V_DOT8C_I32_I4,
+        &Decoder::decode_OP_VOP2__V_DOT8C_I32_I4,
+        &Decoder::decode_OP_VOP2__V_FMAC_F32,
+        &Decoder::decode_OP_VOP2__V_FMAC_F32,
+        &Decoder::decode_OP_VOP2__V_FMAC_F32,
+        &Decoder::decode_OP_VOP2__V_FMAC_F32,
+        &Decoder::decode_OP_VOP2__V_PK_FMAC_F16,
+        &Decoder::decode_OP_VOP2__V_PK_FMAC_F16,
+        &Decoder::decode_OP_VOP2__V_PK_FMAC_F16,
+        &Decoder::decode_OP_VOP2__V_PK_FMAC_F16,
+        &Decoder::decode_OP_VOP2__V_XNOR_B32,
+        &Decoder::decode_OP_VOP2__V_XNOR_B32,
+        &Decoder::decode_OP_VOP2__V_XNOR_B32,
+        &Decoder::decode_OP_VOP2__V_XNOR_B32,
         &Decoder::subDecode_OP_VOPC,
         &Decoder::subDecode_OP_VOPC,
         &Decoder::subDecode_OP_VOPC,
@@ -4171,6 +4171,55 @@ namespace VegaISA
         return new Inst_VOP2__V_SUBREV_U32(&iFmt->iFmt_VOP2);
     }
 
+    GPUStaticInst*
+    Decoder::decode_OP_VOP2__V_DOT2C_F32_F16(MachInst iFmt)
+    {
+        fatal("Trying to decode instruction without a class\n");
+        return nullptr;
+    }
+
+    GPUStaticInst*
+    Decoder::decode_OP_VOP2__V_DOT2C_I32_I16(MachInst iFmt)
+    {
+        fatal("Trying to decode instruction without a class\n");
+        return nullptr;
+    }
+
+    GPUStaticInst*
+    Decoder::decode_OP_VOP2__V_DOT4C_I32_I8(MachInst iFmt)
+    {
+        fatal("Trying to decode instruction without a class\n");
+        return nullptr;
+    }
+
+    GPUStaticInst*
+    Decoder::decode_OP_VOP2__V_DOT8C_I32_I4(MachInst iFmt)
+    {
+        fatal("Trying to decode instruction without a class\n");
+        return nullptr;
+    }
+
+    GPUStaticInst*
+    Decoder::decode_OP_VOP2__V_FMAC_F32(MachInst iFmt)
+    {
+        fatal("Trying to decode instruction without a class\n");
+        return nullptr;
+    }
+
+    GPUStaticInst*
+    Decoder::decode_OP_VOP2__V_PK_FMAC_F16(MachInst iFmt)
+    {
+        fatal("Trying to decode instruction without a class\n");
+        return nullptr;
+    }
+
+    GPUStaticInst*
+    Decoder::decode_OP_VOP2__V_XNOR_B32(MachInst iFmt)
+    {
+        fatal("Trying to decode instruction without a class\n");
+        return nullptr;
+    }
+
     GPUStaticInst*
     Decoder::decode_OP_SOP2__S_ADD_U32(MachInst iFmt)
     {
diff --git a/src/arch/amdgpu/vega/gpu_decoder.hh b/src/arch/amdgpu/vega/gpu_decoder.hh
index 1be43861df..af989e0cc7 100644
--- a/src/arch/amdgpu/vega/gpu_decoder.hh
+++ b/src/arch/amdgpu/vega/gpu_decoder.hh
@@ -1358,6 +1358,13 @@ namespace VegaISA
         GPUStaticInst* decode_OP_VOP2__V_ADD_U32(MachInst);
         GPUStaticInst* decode_OP_VOP2__V_SUB_U32(MachInst);
         GPUStaticInst* decode_OP_VOP2__V_SUBREV_U32(MachInst);
+        GPUStaticInst* decode_OP_VOP2__V_DOT2C_F32_F16(MachInst);
+        GPUStaticInst* decode_OP_VOP2__V_DOT2C_I32_I16(MachInst);
+        GPUStaticInst* decode_OP_VOP2__V_DOT4C_I32_I8(MachInst);
+        GPUStaticInst* decode_OP_VOP2__V_DOT8C_I32_I4(MachInst);
+        GPUStaticInst* decode_OP_VOP2__V_FMAC_F32(MachInst);
+        GPUStaticInst* decode_OP_VOP2__V_PK_FMAC_F16(MachInst);
+        GPUStaticInst* decode_OP_VOP2__V_XNOR_B32(MachInst);
         GPUStaticInst* decode_OP_VOPC__V_CMP_CLASS_F32(MachInst);
         GPUStaticInst* decode_OP_VOPC__V_CMPX_CLASS_F32(MachInst);
         GPUStaticInst* decode_OP_VOPC__V_CMP_CLASS_F64(MachInst);

From a02f1f6c057d58b4fa87c8964268eef2c62c9974 Mon Sep 17 00:00:00 2001
From: Roger Chang <rogerycchang@google.com>
Date: Thu, 27 Apr 2023 02:28:09 +0000
Subject: [PATCH 358/492] tests: Revert "arch-riscv: add RV32 ADFIMU_Zfh
 instruction tests"

This reverts https://gem5-review.googlesource.com/c/public/gem5/+/65533

This is early version support RV32 instruction tests. We should directly
set isa feature of RiscvCPU to run RV32 instruction not just choose
Riscv32CPU

Change-Id: I51b744e9d827adfabc2a7c222ab3801d454601d1
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/70097
Tested-by: kokoro <noreply+kokoro@google.com>
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
---
 tests/gem5/asmtest/tests.py             | 310 +++++++++++-------------
 tests/gem5/configs/simple_binary_run.py |  61 +----
 2 files changed, 160 insertions(+), 211 deletions(-)

diff --git a/tests/gem5/asmtest/tests.py b/tests/gem5/asmtest/tests.py
index 0ddffb27cf..b2a5992da0 100644
--- a/tests/gem5/asmtest/tests.py
+++ b/tests/gem5/asmtest/tests.py
@@ -34,159 +34,156 @@ else:
 # The following lists the RISCV binaries. Those commented out presently result
 # in a test failure. This is outlined in the following Jira issue:
 # https://gem5.atlassian.net/browse/GEM5-496
-binary_configs = (
-    ("rv{}samt-ps-sysclone_d", (64,)),
-    ("rv{}samt-ps-sysfutex1_d", (64,)),
+binaries = (
+    "rv64samt-ps-sysclone_d",
+    "rv64samt-ps-sysfutex1_d",
     #    'rv64samt-ps-sysfutex2_d',
-    ("rv{}samt-ps-sysfutex3_d", (64,)),
+    "rv64samt-ps-sysfutex3_d",
     #    'rv64samt-ps-sysfutex_d',
-    ("rv{}ua-ps-amoadd_d", (64,)),
-    ("rv{}ua-ps-amoadd_w", (32, 64)),
-    ("rv{}ua-ps-amoand_d", (64,)),
-    ("rv{}ua-ps-amoand_w", (32, 64)),
-    ("rv{}ua-ps-amomax_d", (64,)),
-    ("rv{}ua-ps-amomax_w", (32, 64)),
-    ("rv{}ua-ps-amomaxu_d", (64,)),
-    ("rv{}ua-ps-amomaxu_w", (32, 64)),
-    ("rv{}ua-ps-amomin_d", (64,)),
-    ("rv{}ua-ps-amomin_w", (32, 64)),
-    ("rv{}ua-ps-amominu_d", (64,)),
-    ("rv{}ua-ps-amominu_w", (32, 64)),
-    ("rv{}ua-ps-amoor_d", (64,)),
-    ("rv{}ua-ps-amoor_w", (32, 64)),
-    ("rv{}ua-ps-amoswap_d", (64,)),
-    ("rv{}ua-ps-amoswap_w", (32, 64)),
-    ("rv{}ua-ps-amoxor_d", (64,)),
-    ("rv{}ua-ps-amoxor_w", (32, 64)),
-    ("rv{}ua-ps-lrsc", (32, 64)),
-    ("rv{}uamt-ps-amoadd_d", (64,)),
-    ("rv{}uamt-ps-amoand_d", (64,)),
-    ("rv{}uamt-ps-amomax_d", (64,)),
-    ("rv{}uamt-ps-amomaxu_d", (64,)),
-    ("rv{}uamt-ps-amomin_d", (64,)),
-    ("rv{}uamt-ps-amominu_d", (64,)),
-    ("rv{}uamt-ps-amoor_d", (64,)),
-    ("rv{}uamt-ps-amoswap_d", (64,)),
-    ("rv{}uamt-ps-amoxor_d", (64,)),
-    ("rv{}uamt-ps-lrsc_d", (64,)),
-    ("rv{}uamt-ps-amoadd_w", (32,)),
-    ("rv{}uamt-ps-amoand_w", (32,)),
-    ("rv{}uamt-ps-amomax_w", (32,)),
-    ("rv{}uamt-ps-amomaxu_w", (32,)),
-    ("rv{}uamt-ps-amomin_w", (32,)),
-    ("rv{}uamt-ps-amominu_w", (32,)),
-    ("rv{}uamt-ps-amoor_w", (32,)),
-    ("rv{}uamt-ps-amoswap_w", (32,)),
-    ("rv{}uamt-ps-amoxor_w", (32,)),
-    ("rv{}uamt-ps-lrsc_w", (32,)),
-    ("rv{}ud-ps-fadd", (32, 64)),
-    ("rv{}ud-ps-fclass", (32, 64)),
-    ("rv{}ud-ps-fcmp", (32, 64)),
-    ("rv{}ud-ps-fcvt", (32, 64)),
-    ("rv{}ud-ps-fcvt_w", (32, 64)),
-    ("rv{}ud-ps-fdiv", (32, 64)),
-    ("rv{}ud-ps-fmadd", (32, 64)),
-    ("rv{}ud-ps-fmin", (32, 64)),
-    ("rv{}ud-ps-ldst", (32, 64)),
-    ("rv{}ud-ps-move", (64,)),
-    ("rv{}ud-ps-recoding", (32, 64)),
-    ("rv{}ud-ps-structural", (64,)),
-    ("rv{}uf-ps-fadd", (32, 64)),
-    ("rv{}uf-ps-fclass", (32, 64)),
-    ("rv{}uf-ps-fcmp", (32, 64)),
-    ("rv{}uf-ps-fcvt", (32, 64)),
-    ("rv{}uf-ps-fcvt_w", (32, 64)),
-    ("rv{}uf-ps-fdiv", (32, 64)),
-    ("rv{}uf-ps-fmadd", (32, 64)),
-    ("rv{}uf-ps-fmin", (32, 64)),
-    ("rv{}uf-ps-ldst", (32, 64)),
-    ("rv{}uf-ps-move", (32, 64)),
-    ("rv{}uf-ps-recoding", (32, 64)),
-    ("rv{}ui-ps-add", (32, 64)),
-    ("rv{}ui-ps-addi", (32, 64)),
-    ("rv{}ui-ps-addiw", (64,)),
-    ("rv{}ui-ps-addw", (64,)),
-    ("rv{}ui-ps-and", (32, 64)),
-    ("rv{}ui-ps-andi", (32, 64)),
-    ("rv{}ui-ps-auipc", (32, 64)),
-    ("rv{}ui-ps-beq", (32, 64)),
-    ("rv{}ui-ps-bge", (32, 64)),
-    ("rv{}ui-ps-bgeu", (32, 64)),
-    ("rv{}ui-ps-blt", (32, 64)),
-    ("rv{}ui-ps-bltu", (32, 64)),
-    ("rv{}ui-ps-bne", (32, 64)),
-    ("rv{}ui-ps-fence_i", (32, 64)),
-    ("rv{}ui-ps-jal", (32, 64)),
-    ("rv{}ui-ps-jalr", (32, 64)),
-    ("rv{}ui-ps-lb", (32, 64)),
-    ("rv{}ui-ps-lbu", (32, 64)),
-    ("rv{}ui-ps-ld", (64,)),
-    ("rv{}ui-ps-lh", (32, 64)),
-    ("rv{}ui-ps-lhu", (32, 64)),
-    ("rv{}ui-ps-lui", (32, 64)),
-    ("rv{}ui-ps-lw", (32, 64)),
-    ("rv{}ui-ps-lwu", (64,)),
-    ("rv{}ui-ps-or", (32, 64)),
-    ("rv{}ui-ps-ori", (32, 64)),
-    ("rv{}ui-ps-sb", (32, 64)),
-    ("rv{}ui-ps-sd", (64,)),
-    ("rv{}ui-ps-sh", (32, 64)),
-    ("rv{}ui-ps-simple", (32, 64)),
-    ("rv{}ui-ps-sll", (32, 64)),
-    ("rv{}ui-ps-slli", (32, 64)),
-    ("rv{}ui-ps-slliw", (64,)),
-    ("rv{}ui-ps-sllw", (64,)),
-    ("rv{}ui-ps-slt", (32, 64)),
-    ("rv{}ui-ps-slti", (32, 64)),
-    ("rv{}ui-ps-sltiu", (32, 64)),
-    ("rv{}ui-ps-sltu", (32, 64)),
-    ("rv{}ui-ps-sra", (32, 64)),
-    ("rv{}ui-ps-srai", (32, 64)),
-    ("rv{}ui-ps-sraiw", (64,)),
-    ("rv{}ui-ps-sraw", (64,)),
-    ("rv{}ui-ps-srl", (32, 64)),
-    ("rv{}ui-ps-srli", (32, 64)),
-    ("rv{}ui-ps-srliw", (64,)),
-    ("rv{}ui-ps-srlw", (64,)),
-    ("rv{}ui-ps-sub", (32, 64)),
-    ("rv{}ui-ps-subw", (64,)),
-    ("rv{}ui-ps-sw", (32, 64)),
-    ("rv{}ui-ps-xor", (32, 64)),
-    ("rv{}ui-ps-xori", (32, 64)),
-    ("rv{}um-ps-div", (32, 64)),
-    ("rv{}um-ps-divu", (32, 64)),
-    ("rv{}um-ps-divuw", (64,)),
-    ("rv{}um-ps-divw", (64,)),
-    ("rv{}um-ps-mul", (32, 64)),
-    ("rv{}um-ps-mulh", (32, 64)),
-    ("rv{}um-ps-mulhsu", (32, 64)),
-    ("rv{}um-ps-mulhu", (32, 64)),
-    ("rv{}um-ps-mulw", (64,)),
-    ("rv{}um-ps-rem", (32, 64)),
-    ("rv{}um-ps-remu", (32, 64)),
-    ("rv{}um-ps-remuw", (64,)),
-    ("rv{}um-ps-remw", (64,)),
-    ("rv{}uzfh-ps-fadd", (32, 64)),
-    ("rv{}uzfh-ps-fclass", (32, 64)),
-    ("rv{}uzfh-ps-fcmp", (32, 64)),
-    ("rv{}uzfh-ps-fcvt", (32, 64)),
-    ("rv{}uzfh-ps-fcvt_w", (32, 64)),
-    ("rv{}uzfh-ps-fdiv", (32, 64)),
-    ("rv{}uzfh-ps-fmadd", (32, 64)),
-    ("rv{}uzfh-ps-fmin", (32, 64)),
-    ("rv{}uzfh-ps-ldst", (32, 64)),
-    ("rv{}uzfh-ps-move", (32, 64)),
-    ("rv{}uzfh-ps-recoding", (32, 64)),
+    "rv64ua-ps-amoadd_d",
+    "rv64ua-ps-amoadd_w",
+    "rv64ua-ps-amoand_d",
+    "rv64ua-ps-amoand_w",
+    "rv64ua-ps-amomax_d",
+    "rv64ua-ps-amomax_w",
+    "rv64ua-ps-amomaxu_d",
+    "rv64ua-ps-amomaxu_w",
+    "rv64ua-ps-amomin_d",
+    "rv64ua-ps-amomin_w",
+    "rv64ua-ps-amominu_d",
+    "rv64ua-ps-amominu_w",
+    "rv64ua-ps-amoor_d",
+    "rv64ua-ps-amoor_w",
+    "rv64ua-ps-amoswap_d",
+    "rv64ua-ps-amoswap_w",
+    "rv64ua-ps-amoxor_d",
+    "rv64ua-ps-amoxor_w",
+    "rv64ua-ps-lrsc",
+    "rv64uamt-ps-amoadd_d",
+    "rv64uamt-ps-amoand_d",
+    "rv64uamt-ps-amomax_d",
+    "rv64uamt-ps-amomaxu_d",
+    "rv64uamt-ps-amomin_d",
+    "rv64uamt-ps-amominu_d",
+    "rv64uamt-ps-amoor_d",
+    "rv64uamt-ps-amoswap_d",
+    "rv64uamt-ps-amoxor_d",
+    "rv64uamt-ps-lrsc_d",
+    "rv64ud-ps-fadd",
+    "rv64ud-ps-fclass",
+    "rv64ud-ps-fcmp",
+    "rv64ud-ps-fcvt",
+    "rv64ud-ps-fcvt_w",
+    "rv64ud-ps-fdiv",
+    "rv64ud-ps-fmadd",
+    "rv64ud-ps-fmin",
+    "rv64ud-ps-ldst",
+    "rv64ud-ps-move",
+    "rv64ud-ps-recoding",
+    "rv64ud-ps-structural",
+    "rv64uf-ps-fadd",
+    "rv64uf-ps-fclass",
+    "rv64uf-ps-fcmp",
+    "rv64uf-ps-fcvt",
+    "rv64uf-ps-fcvt_w",
+    "rv64uf-ps-fdiv",
+    "rv64uf-ps-fmadd",
+    "rv64uf-ps-fmin",
+    "rv64uf-ps-ldst",
+    "rv64uf-ps-move",
+    "rv64uf-ps-recoding",
+    "rv64ui-ps-add",
+    "rv64ui-ps-addi",
+    "rv64ui-ps-addiw",
+    "rv64ui-ps-addw",
+    "rv64ui-ps-and",
+    "rv64ui-ps-andi",
+    "rv64ui-ps-auipc",
+    "rv64ui-ps-beq",
+    "rv64ui-ps-bge",
+    "rv64ui-ps-bgeu",
+    "rv64ui-ps-blt",
+    "rv64ui-ps-bltu",
+    "rv64ui-ps-bne",
+    "rv64ui-ps-fence_i",
+    "rv64ui-ps-jal",
+    "rv64ui-ps-jalr",
+    "rv64ui-ps-lb",
+    "rv64ui-ps-lbu",
+    "rv64ui-ps-ld",
+    "rv64ui-ps-lh",
+    "rv64ui-ps-lhu",
+    "rv64ui-ps-lui",
+    "rv64ui-ps-lw",
+    "rv64ui-ps-lwu",
+    "rv64ui-ps-or",
+    "rv64ui-ps-ori",
+    "rv64ui-ps-sb",
+    "rv64ui-ps-sd",
+    "rv64ui-ps-sh",
+    "rv64ui-ps-simple",
+    "rv64ui-ps-sll",
+    "rv64ui-ps-slli",
+    "rv64ui-ps-slliw",
+    "rv64ui-ps-sllw",
+    "rv64ui-ps-slt",
+    "rv64ui-ps-slti",
+    "rv64ui-ps-sltiu",
+    "rv64ui-ps-sltu",
+    "rv64ui-ps-sra",
+    "rv64ui-ps-srai",
+    "rv64ui-ps-sraiw",
+    "rv64ui-ps-sraw",
+    "rv64ui-ps-srl",
+    "rv64ui-ps-srli",
+    "rv64ui-ps-srliw",
+    "rv64ui-ps-srlw",
+    "rv64ui-ps-sub",
+    "rv64ui-ps-subw",
+    "rv64ui-ps-sw",
+    "rv64ui-ps-xor",
+    "rv64ui-ps-xori",
+    "rv64um-ps-div",
+    "rv64um-ps-divu",
+    "rv64um-ps-divuw",
+    "rv64um-ps-divw",
+    "rv64um-ps-mul",
+    "rv64um-ps-mulh",
+    "rv64um-ps-mulhsu",
+    "rv64um-ps-mulhu",
+    "rv64um-ps-mulw",
+    "rv64um-ps-rem",
+    "rv64um-ps-remu",
+    "rv64um-ps-remuw",
+    "rv64um-ps-remw",
+    "rv64uzfh-ps-fadd",
+    "rv64uzfh-ps-fclass",
+    "rv64uzfh-ps-fcmp",
+    "rv64uzfh-ps-fcvt",
+    "rv64uzfh-ps-fcvt_w",
+    "rv64uzfh-ps-fdiv",
+    "rv64uzfh-ps-fmadd",
+    "rv64uzfh-ps-fmin",
+    "rv64uzfh-ps-ldst",
+    "rv64uzfh-ps-move",
+    "rv64uzfh-ps-recoding",
 )
 
 cpu_types = ("atomic", "timing", "minor", "o3")
 
 for cpu_type in cpu_types:
-    for cfg in binary_configs:
-        template_bin, all_bits = cfg
-        for bits in all_bits:
-            binary = template_bin.format(bits)
-            config_args = [
+    for binary in binaries:
+        gem5_verify_config(
+            name=f"asm-riscv-{binary}-{cpu_type}",
+            verifiers=(),
+            config=joinpath(
+                config.base_dir,
+                "tests",
+                "gem5",
+                "configs",
+                "simple_binary_run.py",
+            ),
+            config_args=[
                 binary,
                 cpu_type,
                 "riscv",
@@ -194,20 +191,7 @@ for cpu_type in cpu_types:
                 "4",
                 "--resource-directory",
                 resource_path,
-            ]
-            if bits == 32:
-                config_args.extend(["-b", "--riscv-32bits"])
-            gem5_verify_config(
-                name=f"asm-riscv-{binary}-{cpu_type}",
-                verifiers=(),
-                config=joinpath(
-                    config.base_dir,
-                    "tests",
-                    "gem5",
-                    "configs",
-                    "simple_binary_run.py",
-                ),
-                config_args=config_args,
-                valid_isas=(constants.all_compiled_tag,),
-                valid_hosts=constants.supported_hosts,
-            )
+            ],
+            valid_isas=(constants.all_compiled_tag,),
+            valid_hosts=constants.supported_hosts,
+        )
diff --git a/tests/gem5/configs/simple_binary_run.py b/tests/gem5/configs/simple_binary_run.py
index 5540e806ba..f5e097eaae 100644
--- a/tests/gem5/configs/simple_binary_run.py
+++ b/tests/gem5/configs/simple_binary_run.py
@@ -1,5 +1,4 @@
 # Copyright (c) 2021 The Regents of the University of California
-# Copyright (c) 2022 Google Inc
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -46,20 +45,9 @@ from gem5.components.processors.simple_core import SimpleCore
 from gem5.components.boards.mem_mode import MemMode
 from gem5.components.processors.cpu_types import CPUTypes
 from gem5.simulate.simulator import Simulator
-from gem5.isas import get_isa_from_str, get_isas_str_set, ISA
-
-from m5.util import fatal
+from gem5.isas import get_isa_from_str, get_isas_str_set
 
 import argparse
-import importlib
-
-cpu_types_string_map = {
-    CPUTypes.ATOMIC: "AtomicSimpleCPU",
-    CPUTypes.O3: "O3CPU",
-    CPUTypes.TIMING: "TimingSimpleCPU",
-    CPUTypes.KVM: "KvmCPU",
-    CPUTypes.MINOR: "MinorCPU",
-}
 
 parser = argparse.ArgumentParser(
     description="A gem5 script for running simple binaries in SE mode."
@@ -84,12 +72,6 @@ parser.add_argument(
     help="Use the BaseCPUProcessor instead of the SimpleProcessor.",
 )
 
-parser.add_argument(
-    "--riscv-32bits",
-    action="store_true",
-    help="Use 32 bits core of Riscv CPU",
-)
-
 parser.add_argument(
     "-r",
     "--resource-directory",
@@ -122,43 +104,26 @@ args = parser.parse_args()
 cache_hierarchy = NoCache()
 memory = SingleChannelDDR3_1600()
 
-isa_enum = get_isa_from_str(args.isa)
-cpu_enum = get_cpu_type_from_str(args.cpu)
-
-if isa_enum == ISA.RISCV and args.riscv_32bits and not args.base_cpu_processor:
-    fatal("To use Riscv 32 CPU, the base_cpu_processor must be specify!")
-
 if args.base_cpu_processor:
-
-    if isa_enum == ISA.RISCV and args.riscv_32bits:
-        m5_objects = importlib.import_module("m5.objects")
-        cpu_class = getattr(
-            m5_objects, f"Riscv32{cpu_types_string_map[cpu_enum]}"
+    cores = [
+        BaseCPUCore(
+            core=SimpleCore.cpu_simobject_factory(
+                cpu_type=get_cpu_type_from_str(args.cpu),
+                isa=get_isa_from_str(args.isa),
+                core_id=i,
+            ),
+            isa=get_isa_from_str(args.isa),
         )
-        cores = [
-            BaseCPUCore(core=cpu_class(cpu_id=i), isa=isa_enum)
-            for i in range(args.num_cores)
-        ]
-    else:
-        cores = [
-            BaseCPUCore(
-                core=SimpleCore.cpu_simobject_factory(
-                    cpu_type=cpu_enum,
-                    isa=isa_enum,
-                    core_id=i,
-                ),
-                isa=isa_enum,
-            )
-            for i in range(args.num_cores)
-        ]
+        for i in range(args.num_cores)
+    ]
 
     processor = BaseCPUProcessor(
         cores=cores,
     )
 else:
     processor = SimpleProcessor(
-        cpu_type=cpu_enum,
-        isa=isa_enum,
+        cpu_type=get_cpu_type_from_str(args.cpu),
+        isa=get_isa_from_str(args.isa),
         num_cores=args.num_cores,
     )
 

From 916bcbb4c49557a35ba3f95fae484a326d1a0428 Mon Sep 17 00:00:00 2001
From: Roger Chang <rogerycchang@google.com>
Date: Tue, 3 Jan 2023 10:31:12 +0800
Subject: [PATCH 359/492] arch-riscv: Remove Riscv32CPU instance

To use riscv 32 bits CPU, we can simply speficy by RiscvXXXCPU
   parameters like
   RiscvAtomicSimpleCPU(isa=RiscvISA(riscv_type="RV32"...))

Change-Id: I7ec66957f978062eda609b1a7e63468d23b5bab5
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/66871
Reviewed-by: Jui-min Lee <fcrh@google.com>
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
Reviewed-by: Yu-hsin Wang <yuhsingw@google.com>
---
 src/arch/riscv/RiscvCPU.py | 29 -----------------------------
 1 file changed, 29 deletions(-)

diff --git a/src/arch/riscv/RiscvCPU.py b/src/arch/riscv/RiscvCPU.py
index 678c3295c6..1c77045c67 100644
--- a/src/arch/riscv/RiscvCPU.py
+++ b/src/arch/riscv/RiscvCPU.py
@@ -23,8 +23,6 @@
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-import functools
-
 from m5.objects.BaseAtomicSimpleCPU import BaseAtomicSimpleCPU
 from m5.objects.BaseNonCachingSimpleCPU import BaseNonCachingSimpleCPU
 from m5.objects.BaseTimingSimpleCPU import BaseTimingSimpleCPU
@@ -43,13 +41,6 @@ class RiscvCPU:
     ArchISA = RiscvISA
 
 
-class Riscv32CPU:
-    ArchDecoder = RiscvDecoder
-    ArchMMU = RiscvMMU
-    ArchInterrupts = RiscvInterrupts
-    ArchISA = functools.partial(RiscvISA, riscv_type="RV32")
-
-
 class RiscvAtomicSimpleCPU(BaseAtomicSimpleCPU, RiscvCPU):
     mmu = RiscvMMU()
 
@@ -68,23 +59,3 @@ class RiscvO3CPU(BaseO3CPU, RiscvCPU):
 
 class RiscvMinorCPU(BaseMinorCPU, RiscvCPU):
     mmu = RiscvMMU()
-
-
-class Riscv32AtomicSimpleCPU(BaseAtomicSimpleCPU, Riscv32CPU):
-    mmu = RiscvMMU()
-
-
-class Riscv32NonCachingSimpleCPU(BaseNonCachingSimpleCPU, Riscv32CPU):
-    mmu = RiscvMMU()
-
-
-class Riscv32TimingSimpleCPU(BaseTimingSimpleCPU, Riscv32CPU):
-    mmu = RiscvMMU()
-
-
-class Riscv32O3CPU(BaseO3CPU, Riscv32CPU):
-    mmu = RiscvMMU()
-
-
-class Riscv32MinorCPU(BaseMinorCPU, Riscv32CPU):
-    mmu = RiscvMMU()

From 8659b9e1afa88abbee93c6b1091274e4adb80395 Mon Sep 17 00:00:00 2001
From: Vishnu Ramadas <vramadas@wisc.edu>
Date: Wed, 26 Apr 2023 14:34:22 -0500
Subject: [PATCH 360/492] dev-amdgpu: Update vega10_kvm.py to add checkpointing
 instruction

The vega10_kvm.py script configures a system to run in GPUFS mode. To
create a checkpoint, an m5 checkpoint instruction has to be added to the
script manually. This commit automatically adds the instruction if the
checkpoint-dir flag is set

Change-Id: I552fae6e98f6ec33a70a5b384242e87edb0e9526
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/70078
Reviewed-by: Matthew Poremba <matthew.poremba@amd.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com>
Maintainer: Matt Sinclair <mattdsinclair@gmail.com>
---
 configs/example/gpufs/vega10_kvm.py | 27 ++++++++++++++++++++++++++-
 1 file changed, 26 insertions(+), 1 deletion(-)

diff --git a/configs/example/gpufs/vega10_kvm.py b/configs/example/gpufs/vega10_kvm.py
index 9c7e4578f2..11f9fe2f80 100644
--- a/configs/example/gpufs/vega10_kvm.py
+++ b/configs/example/gpufs/vega10_kvm.py
@@ -41,7 +41,7 @@ from common import GPUTLBOptions
 from ruby import Ruby
 
 
-demo_runscript = """\
+demo_runscript_without_checkpoint = """\
 export LD_LIBRARY_PATH=/opt/rocm/lib:$LD_LIBRARY_PATH
 export HSA_ENABLE_INTERRUPT=0
 dmesg -n8
@@ -58,6 +58,24 @@ chmod +x myapp
 /sbin/m5 exit
 """
 
+demo_runscript_with_checkpoint = """\
+export LD_LIBRARY_PATH=/opt/rocm/lib:$LD_LIBRARY_PATH
+export HSA_ENABLE_INTERRUPT=0
+dmesg -n8
+dd if=/root/roms/vega10.rom of=/dev/mem bs=1k seek=768 count=128
+if [ ! -f /lib/modules/`uname -r`/updates/dkms/amdgpu.ko ]; then
+    echo "ERROR: Missing DKMS package for kernel `uname -r`. Exiting gem5."
+    /sbin/m5 exit
+fi
+modprobe -v amdgpu ip_block_mask=0xff ppfeaturemask=0 dpm=0 audio=0
+echo "Running {} {}"
+echo "{}" | base64 -d > myapp
+chmod +x myapp
+/sbin/m5 checkpoint
+./myapp {}
+/sbin/m5 exit
+"""
+
 
 def addDemoOptions(parser):
     parser.add_argument(
@@ -79,6 +97,7 @@ if __name__ == "__m5_main__":
 
     # Parse now so we can override options
     args = parser.parse_args()
+    demo_runscript = ""
 
     # Create temp script to run application
     if args.app is None:
@@ -97,6 +116,12 @@ if __name__ == "__m5_main__":
         print("Could not find applcation", args.app)
         sys.exit(1)
 
+    # Choose runscript Based on whether any checkpointing args are set
+    if args.checkpoint_dir is not None:
+        demo_runscript = demo_runscript_with_checkpoint
+    else:
+        demo_runscript = demo_runscript_without_checkpoint
+
     with open(os.path.abspath(args.app), "rb") as binfile:
         encodedBin = base64.b64encode(binfile.read()).decode()
 

From dd5b1a674ece6f4fcff2f8db034852b1f05ac9e6 Mon Sep 17 00:00:00 2001
From: Melissa Jost <melissakjost@gmail.com>
Date: Mon, 1 May 2023 15:55:00 -0700
Subject: [PATCH 361/492] dev-amdgpu: Remove unused psp_ring_retval integer

This change addresses the compiler failures that have been
causing any GCN3_X86 build to fail.
https://jenkins.gem5.org/job/compiler-checks/589/

Change-Id: Ifd8e2ef89549752ca4aedf0bc9fa47e831a822d3
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/70217
Reviewed-by: Matthew Poremba <matthew.poremba@amd.com>
Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com>
Maintainer: Matthew Poremba <matthew.poremba@amd.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/dev/amdgpu/amdgpu_nbio.hh | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/dev/amdgpu/amdgpu_nbio.hh b/src/dev/amdgpu/amdgpu_nbio.hh
index 68d174e870..d1e5391ec4 100644
--- a/src/dev/amdgpu/amdgpu_nbio.hh
+++ b/src/dev/amdgpu/amdgpu_nbio.hh
@@ -109,7 +109,6 @@ class AMDGPUNbio
     Addr psp_ring_dev_addr = 0;
     Addr psp_ring_listen_addr = 0;
     int psp_ring_size = 0;
-    int psp_ring_retval = 0;
     int psp_ring_value = 0;
 };
 

From 2dafebb4b934d11d698153d6a12a521a3381e254 Mon Sep 17 00:00:00 2001
From: Roger Chang <rogerycchang@google.com>
Date: Tue, 29 Nov 2022 16:29:01 +0800
Subject: [PATCH 362/492] arch-riscv: seperate RV32 and RV64 Zk extensions

1. If the instruction is RV64 only, such as zknd(aes64ds, aes64dsm,
   aes64im, aes64ks1i, and aes64ks2), zkne(aes64es, aes64esm,
   aes64ks1i, aes64ks2), Zknh(sha512sig0, sha512sig1, sha512sum0,
   sha512sum1). The decoder should check rv_type before returning
   the instruction.

2. For the Zbkx(xperm8 and xperm4), I seperate them with RV32 and
   RV64 respectively, since the xperm function has individual
   implement for handling different size of integer.

3. Add the brev8(zbkb) instruction

Change-Id: Id0b7ab2772fd1b21c1ee41075df44a5b6dbe5b47
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/66191
Reviewed-by: Hoa Nguyen <hoanguyen@ucdavis.edu>
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/arch/riscv/isa/decoder.isa | 124 ++++++++++++++++++++++-----------
 1 file changed, 82 insertions(+), 42 deletions(-)

diff --git a/src/arch/riscv/isa/decoder.isa b/src/arch/riscv/isa/decoder.isa
index 58958bb9f1..cb2b19909a 100644
--- a/src/arch/riscv/isa/decoder.isa
+++ b/src/arch/riscv/isa/decoder.isa
@@ -537,18 +537,26 @@ decode QUADRANT default Unknown::unknown() {
                         0x3: sha256sig1({{
                             Rd_sw = _rvk_emu_sha256sig1(Rs1_sw);
                         }});
-                        0x4: sha512sum0({{
-                            Rd_sd = _rvk_emu_sha512sum0(Rs1_sd);
-                        }});
-                        0x5: sha512sum1({{
-                            Rd_sd = _rvk_emu_sha512sum1(Rs1_sd);
-                        }});
-                        0x6: sha512sig0({{
-                            Rd_sd = _rvk_emu_sha512sig0(Rs1_sd);
-                        }});
-                        0x7: sha512sig1({{
-                            Rd_sd = _rvk_emu_sha512sig1(Rs1_sd);
-                        }});
+                        0x4: decode RVTYPE {
+                            0x1: sha512sum0({{
+                                Rd_sd = _rvk_emu_sha512sum0(Rs1_sd);
+                            }});
+                        }
+                        0x5: decode RVTYPE {
+                            0x1: sha512sum1({{
+                                Rd_sd = _rvk_emu_sha512sum1(Rs1_sd);
+                            }});
+                        }
+                        0x6: decode RVTYPE {
+                            0x1: sha512sig0({{
+                                Rd_sd = _rvk_emu_sha512sig0(Rs1_sd);
+                            }});
+                        }
+                        0x7: decode RVTYPE {
+                            0x1: sha512sig1({{
+                                Rd_sd = _rvk_emu_sha512sig1(Rs1_sd);
+                            }});
+                        }
                         0x8: sm3p0({{
                             Rd_sw = _rvk_emu_sm3p0(Rs1_sw);
                         }});
@@ -565,12 +573,16 @@ decode QUADRANT default Unknown::unknown() {
                         Rd = rvSext(Rs1 | (UINT64_C(1) << index));
                     }}, imm_type = uint64_t, imm_code = {{ imm = SHAMT6; }});
                     0x06: decode BIT24 {
-                        0x0: aes64im({{
-                            Rd_sd = _rvk_emu_aes64im(Rs1_sd);
-                        }});
-                        0x1: aes64ks1i({{
-                            Rd_sd = _rvk_emu_aes64ks1i(Rs1_sd, imm);
-                        }}, imm_type = int32_t, imm_code={{ imm = RNUM; }});
+                        0x0: decode RVTYPE {
+                            0x1: aes64im({{
+                                Rd_sd = _rvk_emu_aes64im(Rs1_sd);
+                            }});
+                        }
+                        0x1: decode RVTYPE {
+                            0x1: aes64ks1i({{
+                                Rd_sd = _rvk_emu_aes64ks1i(Rs1_sd, imm);
+                            }}, imm_type = int32_t, imm_code={{ imm = RNUM; }});
+                        }
                     }
                     0x09: bclri({{
                         if (rvSelect((bool)SHAMT6BIT5, false)) {
@@ -698,6 +710,14 @@ decode QUADRANT default Unknown::unknown() {
                             }},
                             imm_type = uint64_t, imm_code = {{ imm = SHAMT6; }});
                         }
+                        0x07: decode RVTYPE {
+                            0x0: rv32_brev8({{
+                                Rd_sw = _rvk_emu_brev8_32(Rs1_sw);
+                            }}, imm_code = {{ imm = SHAMT5; }});
+                            0x1: brev8({{
+                                Rd = _rvk_emu_brev8_64(Rs1);
+                            }}, imm_code = {{ imm = SHAMT6; }});
+                        }
                     }
                 }
                 0x6: ori({{
@@ -972,33 +992,43 @@ decode QUADRANT default Unknown::unknown() {
                         }}, IntMultOp);
                     }
                     0x18: sm4ed({{
-                        Rd_sd = _rvk_emu_sm4ed(Rs1_sd, Rs2_sd, (uint8_t)BS);
+                        Rd_sw = _rvk_emu_sm4ed(Rs1_sw, Rs2_sw, (uint8_t)BS);
                     }});
                     0x19: decode BS {
-                        0x0: aes64es({{
-                            Rd_sd = _rvk_emu_aes64es(Rs1_sd, Rs2_sd);
-                        }});
+                        0x0: decode RVTYPE {
+                            0x1: aes64es({{
+                                Rd_sd = _rvk_emu_aes64es(Rs1_sd, Rs2_sd);
+                            }});
+                        }
                     }
                     0x1a: sm4ks({{
-                        Rd_sd = _rvk_emu_sm4ks(Rs1_sd, Rs2_sd, (uint8_t)BS);
+                        Rd_sw = _rvk_emu_sm4ks(Rs1_sw, Rs2_sw, (uint8_t)BS);
                     }});
                     0x1b: decode BS {
-                        0x0: aes64esm({{
-                            Rd_sd = _rvk_emu_aes64esm(Rs1_sd, Rs2_sd);
-                        }});
+                        0x0: decode RVTYPE {
+                            0x1: aes64esm({{
+                                Rd_sd = _rvk_emu_aes64esm(Rs1_sd, Rs2_sd);
+                            }});
+                        }
                     }
                     0x1d: decode BS {
-                        0x0: aes64ds({{
-                            Rd_sd = _rvk_emu_aes64ds(Rs1_sd, Rs2_sd);
-                        }});
+                        0x0: decode RVTYPE {
+                            0x1: aes64ds({{
+                                Rd_sd = _rvk_emu_aes64ds(Rs1_sd, Rs2_sd);
+                            }});
+                        }
                     }
-                    0x1f: decode BS{
-                        0x0: aes64dsm({{
-                            Rd_sd = _rvk_emu_aes64dsm(Rs1_sd, Rs2_sd);
-                        }});
-                        0x1: aes64ks2({{
-                            Rd_sd = _rvk_emu_aes64ks2(Rs1_sd, Rs2_sd);
-                        }});
+                    0x1f: decode BS {
+                        0x0: decode RVTYPE {
+                            0x1: aes64dsm({{
+                                Rd_sd = _rvk_emu_aes64dsm(Rs1_sd, Rs2_sd);
+                            }});
+                        }
+                        0x1: decode RVTYPE {
+                            0x1: aes64ks2({{
+                                Rd_sd = _rvk_emu_aes64ks2(Rs1_sd, Rs2_sd);
+                            }});
+                        }
                     }
                 }
                 0x1: decode FUNCT7 {
@@ -1105,9 +1135,14 @@ decode QUADRANT default Unknown::unknown() {
                     0x10: sh1add({{
                         Rd = rvSext((Rs1 << 1) + Rs2);
                     }});
-                    0x14: xperm4({{
-                        Rd_sd = _rvk_emu_xperm4_64(Rs1_sd, Rs2_sd);
-                    }});
+                    0x14: decode RVTYPE {
+                        0x0: xperm4_32({{
+                            Rd_sw = _rvk_emu_xperm4_32(Rs1_sw, Rs2_sw);
+                        }});
+                        0x1: xperm4_64({{
+                            Rd_sd = _rvk_emu_xperm4_64(Rs1_sd, Rs2_sd);
+                        }});
+                    }
                 }
                 0x3: decode FUNCT7 {
                     0x0: sltu({{
@@ -1186,9 +1221,14 @@ decode QUADRANT default Unknown::unknown() {
                     0x10: sh2add({{
                         Rd = rvSext((Rs1 << 2) + Rs2);
                     }});
-                    0x14: xperm8({{
-                        Rd_sd = _rvk_emu_xperm8_64(Rs1_sd, Rs2_sd);
-                    }});
+                    0x14: decode RVTYPE {
+                        0x0: xperm8_32({{
+                            Rd_sw = _rvk_emu_xperm8_32(Rs1_sw, Rs2_sw);
+                        }});
+                        0x1: xperm8_64({{
+                            Rd_sd = _rvk_emu_xperm8_64(Rs1_sd, Rs2_sd);
+                        }});
+                    }
                     0x20: xnor({{
                         Rd = rvSext(~(Rs1 ^ Rs2));
                     }});

From d049d41ef024978aa20f987636d6f0d12ef02b1a Mon Sep 17 00:00:00 2001
From: Roger Chang <rogerycchang@google.com>
Date: Wed, 19 Apr 2023 21:30:58 +0800
Subject: [PATCH 363/492] arch-riscv: Add RV32 only Zk instruction extensions

The following instructions is add:
Zbkb extension:
zip
unzip

Zknd extension:
aes32dsi
aes32dsmi

Zkne extension:
aes32esi
aes32esmi

Zknh extension:
sha512sig0h
sha512sig0l
sha512sig1h
sha512sig1l
sha512sum0r
sha512sum1r

Change-Id: Id29007704128154d9fb8305155f92c2e08ffa435
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/69937
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Hoa Nguyen <hoanguyen@ucdavis.edu>
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
---
 src/arch/riscv/isa/decoder.isa | 72 ++++++++++++++++++++++++++++++++++
 src/arch/riscv/rvk.hh          | 42 ++++++++++++++++++++
 2 files changed, 114 insertions(+)

diff --git a/src/arch/riscv/isa/decoder.isa b/src/arch/riscv/isa/decoder.isa
index cb2b19909a..6f66c98df0 100644
--- a/src/arch/riscv/isa/decoder.isa
+++ b/src/arch/riscv/isa/decoder.isa
@@ -524,6 +524,11 @@ decode QUADRANT default Unknown::unknown() {
                         }
                         Rd = rvSext(Rs1 << imm);
                     }}, imm_type = uint64_t, imm_code = {{ imm = SHAMT6; }});
+                    0x01: decode RVTYPE {
+                        0x0: zip({{
+                            Rd_sw = _rvk_emu_zip_32(Rs1_sw);
+                        }}, imm_code = {{ imm = SHAMT5; }});
+                    }
                     0x02: decode FS2 {
                         0x0: sha256sum0({{
                             Rd_sw = _rvk_emu_sha256sum0(Rs1_sw);
@@ -643,6 +648,11 @@ decode QUADRANT default Unknown::unknown() {
                         }
                         Rd = rvSext(rvZext(Rs1) >> imm);
                     }}, imm_type = uint64_t, imm_code = {{ imm = SHAMT6; }});
+                    0x1: decode RVTYPE {
+                        0x0: unzip({{
+                            Rd_sw = _rvk_emu_unzip_32(Rs1_sw);
+                        }}, imm_code = {{ imm = SHAMT5; }});
+                    }
                     0x5: orc_b({{
                         uint64_t result = 0;
                         result |= (Rs1<7:0> ? UINT64_C(0xff) : 0x0);
@@ -991,6 +1001,68 @@ decode QUADRANT default Unknown::unknown() {
                             Rd = rvSext(Rs1_sd * Rs2_sd);
                         }}, IntMultOp);
                     }
+                    0x08: decode BS {
+                        0x1: decode RVTYPE {
+                            0x0: sha512sum0r({{
+                                Rd_sw = _rvk_emu_sha512sum0r(Rs1_sw, Rs2_sw);
+                            }});
+                        }
+                    }
+                    0x09: decode BS {
+                        0x1: decode RVTYPE {
+                            0x0: sha512sum1r({{
+                                Rd_sw = _rvk_emu_sha512sum1r(Rs1_sw, Rs2_sw);
+                            }});
+                        }
+                    }
+                    0x0a: decode BS {
+                        0x1: decode RVTYPE {
+                            0x0: sha512sig0l({{
+                                Rd_sw = _rvk_emu_sha512sig0l(Rs1_sw, Rs2_sw);
+                            }});
+                        }
+                    }
+                    0x0b: decode BS {
+                        0x1: decode RVTYPE {
+                            0x0: sha512sig1l({{
+                                Rd_sw = _rvk_emu_sha512sig1l(Rs1_sw, Rs2_sw);
+                            }});
+                        }
+                    }
+                    0x0e: decode BS {
+                        0x1: decode RVTYPE {
+                            0x0: sha512sig0h({{
+                                Rd_sw = _rvk_emu_sha512sig0h(Rs1_sw, Rs2_sw);
+                            }});
+                        }
+                    }
+                    0x0f: decode BS {
+                        0x1: decode RVTYPE {
+                            0x0: sha512sig1h({{
+                                Rd_sw = _rvk_emu_sha512sig1h(Rs1_sw, Rs2_sw);
+                            }});
+                        }
+                    }
+                    0x11: decode RVTYPE {
+                        0x0: aes32esi({{
+                            Rd_sw = _rvk_emu_aes32esi(Rs1_sw, Rs2_sw, (uint8_t)BS);
+                        }});
+                    }
+                    0x13: decode RVTYPE {
+                        0x0: aes32esmi({{
+                            Rd_sw = _rvk_emu_aes32esmi(Rs1_sw, Rs2_sw, (uint8_t)BS);
+                        }});
+                    }
+                    0x15: decode RVTYPE {
+                        0x0: aes32dsi({{
+                            Rd_sw = _rvk_emu_aes32dsi(Rs1_sw, Rs2_sw, (uint8_t)BS);
+                        }});
+                    }
+                    0x17: decode RVTYPE {
+                        0x0: aes32dsmi({{
+                            Rd_sw = _rvk_emu_aes32dsmi(Rs1_sw, Rs2_sw, (uint8_t)BS);
+                        }});
+                    }
                     0x18: sm4ed({{
                         Rd_sw = _rvk_emu_sm4ed(Rs1_sw, Rs2_sw, (uint8_t)BS);
                     }});
diff --git a/src/arch/riscv/rvk.hh b/src/arch/riscv/rvk.hh
index d4af3cda23..678359c9e4 100644
--- a/src/arch/riscv/rvk.hh
+++ b/src/arch/riscv/rvk.hh
@@ -525,6 +525,48 @@ inline int32_t _rvk_emu_sha256sum1(int32_t rs1)
     return (int32_t) x;
 }
 
+static inline int32_t  _rvk_emu_sha512sig0h(int32_t rs1, int32_t rs2)
+{
+    return  _rvk_emu_srl_32(rs1, 1) ^ _rvk_emu_srl_32(rs1, 7) ^
+            _rvk_emu_srl_32(rs1, 8) ^ _rvk_emu_sll_32(rs2, 31) ^
+            _rvk_emu_sll_32(rs2, 24);
+}
+
+static inline int32_t  _rvk_emu_sha512sig0l(int32_t rs1, int32_t rs2)
+{
+    return  _rvk_emu_srl_32(rs1, 1) ^ _rvk_emu_srl_32(rs1, 7) ^
+            _rvk_emu_srl_32(rs1, 8) ^ _rvk_emu_sll_32(rs2, 31) ^
+            _rvk_emu_sll_32(rs2, 25) ^ _rvk_emu_sll_32(rs2, 24);
+}
+
+static inline int32_t  _rvk_emu_sha512sig1h(int32_t rs1, int32_t rs2)
+{
+    return  _rvk_emu_sll_32(rs1, 3) ^ _rvk_emu_srl_32(rs1, 6) ^
+            _rvk_emu_srl_32(rs1, 19) ^ _rvk_emu_srl_32(rs2, 29) ^
+            _rvk_emu_sll_32(rs2, 13);
+}
+
+static inline int32_t  _rvk_emu_sha512sig1l(int32_t rs1, int32_t rs2)
+{
+    return  _rvk_emu_sll_32(rs1, 3) ^ _rvk_emu_srl_32(rs1, 6) ^
+            _rvk_emu_srl_32(rs1,19) ^ _rvk_emu_srl_32(rs2, 29) ^
+            _rvk_emu_sll_32(rs2, 26) ^ _rvk_emu_sll_32(rs2, 13);
+}
+
+static inline int32_t  _rvk_emu_sha512sum0r(int32_t rs1, int32_t rs2)
+{
+    return  _rvk_emu_sll_32(rs1, 25) ^ _rvk_emu_sll_32(rs1, 30) ^
+            _rvk_emu_srl_32(rs1, 28) ^ _rvk_emu_srl_32(rs2, 7) ^
+            _rvk_emu_srl_32(rs2, 2) ^ _rvk_emu_sll_32(rs2, 4);
+}
+
+static inline int32_t  _rvk_emu_sha512sum1r(int32_t rs1, int32_t rs2)
+{
+    return  _rvk_emu_sll_32(rs1, 23) ^ _rvk_emu_srl_32(rs1,14) ^
+            _rvk_emu_srl_32(rs1, 18) ^ _rvk_emu_srl_32(rs2, 9) ^
+            _rvk_emu_sll_32(rs2, 18) ^ _rvk_emu_sll_32(rs2, 14);
+}
+
 inline int64_t  _rvk_emu_sha512sig0(int64_t rs1)
 {
     return _rvk_emu_ror_64(rs1, 1) ^ _rvk_emu_ror_64(rs1, 8) ^

From 8d5c9f90d55c26a5641074ef833268f22f3754dd Mon Sep 17 00:00:00 2001
From: Wei-Han Chen <weihanchen@google.com>
Date: Fri, 28 Apr 2023 05:36:16 +0000
Subject: [PATCH 364/492] fastmodel: Remove sendFunc

The original functionality is to access memory inside CPU. However, in
this CL: https://gem5-review.googlesource.com/c/public/gem5/+/45581,
the access method has been changed to use Iris API.

Thus, this CL removes this function.

Change-Id: Ide5e7c7c10a30b3c3ed00b97ba5894679d615e6b
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/70157
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Yu-hsin Wang <yuhsingw@google.com>
Maintainer: Gabe Black <gabeblack@google.com>
---
 src/arch/arm/fastmodel/CortexA76/evs.cc | 10 ----------
 src/arch/arm/fastmodel/CortexA76/evs.hh |  2 --
 src/arch/arm/fastmodel/CortexR52/evs.cc | 10 ----------
 src/arch/arm/fastmodel/CortexR52/evs.hh |  2 --
 src/arch/arm/fastmodel/iris/cpu.hh      |  1 -
 5 files changed, 25 deletions(-)

diff --git a/src/arch/arm/fastmodel/CortexA76/evs.cc b/src/arch/arm/fastmodel/CortexA76/evs.cc
index b299ad1a28..6d21a3773d 100644
--- a/src/arch/arm/fastmodel/CortexA76/evs.cc
+++ b/src/arch/arm/fastmodel/CortexA76/evs.cc
@@ -132,16 +132,6 @@ ScxEvsCortexA76<Types>::ScxEvsCortexA76(
     periphClockRateControl.bind(this->periph_clock_rate_s);
 }
 
-template <class Types>
-void
-ScxEvsCortexA76<Types>::sendFunc(PacketPtr pkt)
-{
-    auto *trans = sc_gem5::packet2payload(pkt);
-    panic_if(Base::amba->transport_dbg(*trans) != trans->get_data_length(),
-            "Didn't send entire functional packet!");
-    trans->release();
-}
-
 template <class Types>
 void
 ScxEvsCortexA76<Types>::before_end_of_elaboration()
diff --git a/src/arch/arm/fastmodel/CortexA76/evs.hh b/src/arch/arm/fastmodel/CortexA76/evs.hh
index 9f08071dae..cdaa266bcd 100644
--- a/src/arch/arm/fastmodel/CortexA76/evs.hh
+++ b/src/arch/arm/fastmodel/CortexA76/evs.hh
@@ -119,8 +119,6 @@ class ScxEvsCortexA76 : public Types::Base, public Iris::BaseCpuEvs
     }
     void start_of_simulation() override {}
 
-    void sendFunc(PacketPtr pkt) override;
-
     void setClkPeriod(Tick clk_period) override;
 
     void setSysCounterFrq(uint64_t sys_counter_frq) override;
diff --git a/src/arch/arm/fastmodel/CortexR52/evs.cc b/src/arch/arm/fastmodel/CortexR52/evs.cc
index 47fbc36313..ee5b4cc663 100644
--- a/src/arch/arm/fastmodel/CortexR52/evs.cc
+++ b/src/arch/arm/fastmodel/CortexR52/evs.cc
@@ -128,16 +128,6 @@ ScxEvsCortexR52<Types>::ScxEvsCortexR52(
     signalInterrupt.bind(this->signal_interrupt);
 }
 
-template <class Types>
-void
-ScxEvsCortexR52<Types>::sendFunc(PacketPtr pkt)
-{
-    auto *trans = sc_gem5::packet2payload(pkt);
-    panic_if(Base::amba[0]->transport_dbg(*trans) != trans->get_data_length(),
-            "Didn't send entire functional packet!");
-    trans->release();
-}
-
 template <class Types>
 Port &
 ScxEvsCortexR52<Types>::gem5_getPort(const std::string &if_name, int idx)
diff --git a/src/arch/arm/fastmodel/CortexR52/evs.hh b/src/arch/arm/fastmodel/CortexR52/evs.hh
index 6516f4c687..79bc22fd2f 100644
--- a/src/arch/arm/fastmodel/CortexR52/evs.hh
+++ b/src/arch/arm/fastmodel/CortexR52/evs.hh
@@ -158,8 +158,6 @@ class ScxEvsCortexR52 : public Types::Base, public Iris::BaseCpuEvs
     }
     void start_of_simulation() override {}
 
-    void sendFunc(PacketPtr pkt) override;
-
     void setClkPeriod(Tick clk_period) override;
 
     void setSysCounterFrq(uint64_t sys_counter_frq) override;
diff --git a/src/arch/arm/fastmodel/iris/cpu.hh b/src/arch/arm/fastmodel/iris/cpu.hh
index b43eb8ecf5..dd3992cc89 100644
--- a/src/arch/arm/fastmodel/iris/cpu.hh
+++ b/src/arch/arm/fastmodel/iris/cpu.hh
@@ -47,7 +47,6 @@ class ThreadContext;
 class BaseCpuEvs
 {
   public:
-    virtual void sendFunc(PacketPtr pkt) = 0;
     virtual void setClkPeriod(Tick clk_period) = 0;
     virtual void setSysCounterFrq(uint64_t sys_counter_frq) = 0;
     virtual void setCluster(SimObject *cluster) = 0;

From aff1bfe49169a4c4b824f3285d2acf39ac169f0d Mon Sep 17 00:00:00 2001
From: Richard Cooper <richard.cooper@arm.com>
Date: Thu, 27 Apr 2023 18:59:42 +0100
Subject: [PATCH 365/492] scons: Fix gem5 Python3.11 build.

The code generation in gem5's build system requires the use of Regular
Expression flags when defining the regular expressions used for
tokenization. However, the Python Lex-Yacc (PLY) [1] library used by
gem5 does not allow the user sufficient control of the flags for RE
compilation.

Previously, gem5 used inline RE flags to control RE compilation.
However, from Python 3.11, inline RE flags must be at the start of the
RE string. Because PLY wraps the user supplied RE strings before
compilation, there is no way for the user to supply a RE string with
the inline flag at the start. This makes gem5 incompatible with Python
3.11 when using PLY.

This change modifies gem5's build files to patch `re.compile` with a
wrapped version that can handle embedded flags anywhere in the RE
string, for all current versions of Python. The patched version
re-formats the user supplied RE string to convert inline RE flags to
explicit RE flags.

This patch is intended as a temporary stop-gap until PLY can be fixed
upstream.

See the gem5 Issue Tracker [2] for more details.

[1] https://github.com/dabeaz/ply
[2] https://gem5.atlassian.net/browse/GEM5-1321

Change-Id: I3ab371f2e5cf267c0a89caaf8a2bacfed78545ef
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/70237
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
Reviewed-by: Boris Shingarov <shingarov@labware.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 SConstruct                        |  6 ++++-
 site_scons/gem5_scons/__init__.py | 45 ++++++++++++++++++++++++++++++-
 2 files changed, 49 insertions(+), 2 deletions(-)

diff --git a/SConstruct b/SConstruct
index b784a045ec..e09f0d5a5b 100755
--- a/SConstruct
+++ b/SConstruct
@@ -1,6 +1,6 @@
 # -*- mode:python -*-
 
-# Copyright (c) 2013, 2015-2020 ARM Limited
+# Copyright (c) 2013, 2015-2020, 2023 ARM Limited
 # All rights reserved.
 #
 # The license below extends only to copyright in the software and shall
@@ -171,6 +171,10 @@ SetOption('warn', 'no-duplicate-environment')
 
 Export('MakeAction')
 
+# Patch re.compile to support inline flags anywhere within a RE
+# string. Required to use PLY with Python 3.11+.
+gem5_scons.patch_re_compile_for_inline_flags()
+
 ########################################################################
 #
 # Set up the main build environment.
diff --git a/site_scons/gem5_scons/__init__.py b/site_scons/gem5_scons/__init__.py
index 721487656b..6d6226cdc6 100644
--- a/site_scons/gem5_scons/__init__.py
+++ b/site_scons/gem5_scons/__init__.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2013, 2015-2017 ARM Limited
+# Copyright (c) 2013, 2015-2017, 2023 ARM Limited
 # All rights reserved.
 #
 # The license below extends only to copyright in the software and shall
@@ -302,6 +302,48 @@ def FromValue(node):
     return pickle.loads(node.read())
 
 
+def patch_re_compile_for_inline_flags():
+    """Patch `re.compile` with a version that can handle RE strings with
+    inline flags anywhere within them. This is required to use PLY
+    with Python 3.11+.
+
+    """
+
+    import re
+    from functools import partial
+
+    def _inline_flag_aware_re_compile(re_compile, re_str, flags=0x0):
+        """Provide an alternative implementation of `re.compile` that allows
+        inline flags that are not at the start of the regular
+        expression string.
+
+        From Python 3.11, the `re` module only supports inline flags
+        at the start of the RE string. This makes it impossible to add
+        flags to the Lexer strings when using PLY, because PLY embeds
+        the user supplied token REs, and does not provide sufficient
+        control of the `flags` argument.
+
+        """
+        _flags_map = {
+            ("(?a)", b"(?a)"): re.ASCII,
+            ("(?i)", b"(?i)"): re.IGNORECASE,
+            ("(?L)", b"(?L)"): re.LOCALE,
+            ("(?m)", b"(?m)"): re.MULTILINE,
+            ("(?s)", b"(?s)"): re.DOTALL,
+            ("(?x)", b"(?x)"): re.VERBOSE,
+        }
+        for (pattern_s, pattern_b), flag in _flags_map.items():
+            pattern = pattern_b if isinstance(re_str, bytes) else pattern_s
+            replacement = b"" if isinstance(re_str, bytes) else ""
+            if pattern in re_str:
+                flags |= flag
+                re_str = re_str.replace(pattern, replacement)
+        return re_compile(re_str, flags)
+
+    # Patch the default `re.compile`
+    re.compile = partial(_inline_flag_aware_re_compile, re.compile)
+
+
 __all__ = [
     "Configure",
     "EnvDefaults",
@@ -312,4 +354,5 @@ __all__ = [
     "MakeActionTool",
     "ToValue",
     "FromValue",
+    "patch_re_compile_for_inline_flags",
 ]

From cf6783d6ac4dec80788e895f9e2e8a7f5240c849 Mon Sep 17 00:00:00 2001
From: Melissa Jost <melissakjost@gmail.com>
Date: Mon, 13 Mar 2023 01:59:16 -0700
Subject: [PATCH 366/492] cpu: Move fetch stats from simple and minor to base

This summarizes a series of changes to move general Simple, Minor,
O3 CPU stats to BaseCPU. This commit focuses on moving numBranches
from SimpleCPU to the FetchCPUStats in the BaseCPU, and
numFetchSuspends from MinorCPU into FetchCPUStats.  More general
information about this relation chain is below.  In addition, this
changeset first adds all relevant stats to base in the first half,
then removes the duplicated stats in the second half.  Duplicated
stats are denoted in the code. In addition, to view the difference
between the old stats output and the current output, view
https://gem5.atlassian.net/browse/GEM5-1304

1. Summary:
Moved general CPU stats found across Simple, Minor, and O3 CPU models
into BaseCPU through new stat groups. The stat groups are
FetchCPUStats, ExecuteCPUStats, and CommitCPUStats. Implemented the
committedControl stat vector found in MinorCPU for Simple and O3 CPU.
Implemented the numStoreInsts stat found in SimpleCPU for O3CPU. IPC
and CPI stats are now tracked at the core and thread level in BaseCPU
and are made universal for simple, minor, o3, and kvm CPUs. Duplicate
stats across the models are merged into a single stat in BaseCPU under
the same stat name. This change does not implement every general level
stat moved to BaseCPU for every model.

2. Stat API Changes
a. SimpleCPU:
statExecutedInstType vector unified into committedInstType
numCondCtrlInsts unified into committedControl::isControl

b. O3CPU:
i. Fetch Stage
branches in fetch unified into with numBranches
rate renamed to fetchRate
insts unified into with numInsts

ii. Execute Stage
Regfile stats unified into base with use of Simple's stat naming
numRefs in IEW unified into numMemRefs
numRate from IEW renamed to instRate

iii. Commit Stage
committedInsts is renamed to numInstsNotNOP
committedOps is renamed to numOpsNotNOP
instsCommitted is unified into numInsts
opsCommitted is unified into numOps
branches is unified into committedControl::isControl
floating is unified into numFpInsts
integer is unified into numIntInsts
loads is unified into numLoadInsts
memRefs is renamed to numMemRefs
vectorInstructions is unified into numVecInsts

3. Details:
Created three stat groups in BaseCPU. FetchCPUStats track statistics
related to the fetch stage. ExecuteCPUStats track statistics related
to the execute stage. CommitCPUStats track statistics related to the
commit stage.

There are three vectors in Base that store unique pointers to per
thread instances of these stat groups. The stat group pointer for
thread i is accessible at index i of one of these vectors. For example,
stat numCCRegReads of the execute stage for thread 0 can be accessed
with executeStats[0]->numCCRegReads. The stats.txt output will print the
thread ID of the stat group. For example, numVecRegReads on thread 0
of a single core prints as
"board.processor.cores.core.executeStats0.numVecRegReads".
NOTE: Multithreading in gem5 is untested. Therefore per thread stats
output in stats.txt is not currently guaranteed to be correctly
formatted.

For FetchCPUStats, the stats moved from  SimpleCPU are numBranches
and numInsts. From MinorCPU, the stat moved is numFetchSuspends. From
O3CPU, the stats moved are from the O3 fetch stage: Stat branches is
unified into numBranches, stat rate is renamed to fetchRate in Base,
stat insts is unified into numInsts, stat icacheStallCycles keeps the
same name in Base.

For ExecuteCPUStats, the stats moved from SimpleCPU are
dcacheStallCycles, numCCRegReads, numCCRegWrites,
numFpAluAccesses, numFpRegReads, numFpRegWrites, numIntAluAccesses,
numIntRegReads, numIntRegWrites, numMemRefs, numMiscRegReads,
numMiscRegWrites, numVecAluAccesses, numVecPredRegReads,
numVecPredRegWrites, numVecRegReads, numVecRegWrites. The stat moved
from MinorCPU is numDiscardedOps. From O3, the Regfile stats in CPU are
unified into the reg stats in Base and use the names found originally
in SimpleCPU. From O3 IEW stage, numInsts keeps the same name in
Base, numBranches is unified into numBranches in base, numNop keeps
the same name in Base, numRefs is unified into numMemRefs in Base,
numLoadInsts and numStoreInsts are moved into Base, numRate is renamed
to instRate in base.

For CommitCPUStats, the stats moved from SimpleCPU are
numCondCtrlInsts, numFpInsts, numIntInsts, numLoadInsts, numStoreInsts,
numVecInsts. The stats moved from MinorCPU are numInsts,
committedInstType, and committedControl. statExecutedInstType of
SimpleCPU is unified with committedInstType of MinorCPU. Implemented
committedControl stats from MinorCPU in Simple and O3 CPU. In MinorCPU,
this stat was a 2D vector, where the first dimension is the thread ID.
In base it is now a 1D vector that is tied to a thread ID via the
commitStats vector that the object is accessible through. From the O3
commit stage, committedInsts is renamed to numInstsNotNOP, committedOps
is renamed to numOpsNotNOP, instsCommitted is unified into numInsts,
opsCommitted is renamed to numOps, committedInstType is unified into
committedInstType from Minor, branches is removed because it duplicates
committedControl::IsControl, floating is unified into numFpInsts,
interger is unified into numIntInsts, loads is unified into
numLoadInsts, numStoreInsts is implemented for tracking in O3, memRefs
is renamed to numMemRefs, vectorInstructions is unified into
numVecInsts. Note that numCondCtrlInsts of Simple is unified into
committedControl::IsCondCtrl.

Implemented IPC and CPI tracking inside BaseCPU.
In BaseCPU::BaseCPUStats, numInsts and numOps track per CPU core
committed instructions and operations.
In BaseCPU::FetchCPUStats, numInsts and numOps track per thread
fetched instructions and operations.
In BaseCPU::CommitCPUStats, numInsts tracks per thread executed
instructions.
In BaseCPU::CommitCPUStats, numInsts and numOps track per thread
committed instructions and operations.
In BaseSimpleCPU, the countInst() function has been split into
countInst(), countFetchInst(), and countCommitInst(). The stat count
incrementation step of countInst() has been removed and delegated to the
other two functions. countFetchInst() increments numInsts and numOps
of the FetchCPUStats group for a thread. countCommitInst() increments
the numInsts and numOps of the CommitCPUStats group for a thread and
of the BaseCPUStats group for a CPU core. These functions are called
in the appropriate stage within timing.cc and atomic.cc. The call to
countInst() is left unchanged. countFetchInst() is called in
preExecute(). countCommitInst() is called in postExecute().
For MinorCPU, only the commit level numInsts and numOps stats have been
implemented.
IPC and CPI stats have been added to BaseCPUStats (core level) and
CommitCPUStats (thread level). The formulas for the IPC and CPI stats
in CommitCPUStats are set in the BaseCPU constructor, after the
CommitCPUStats stat group object has been created. These replace IPC,
CPI, totalIpc, and totalCpi stats in O3.

Replaced committedInsts stats of KVM CPU with commitStats.numInsts
of BaseCPU. This results in IPC and CPI printing in stats.txt for
KVM simulations.

This change does not implement most general stats found in one or two
model for all others.

Change-Id: I44d8ff6f3d102e94e53f9b2ce9b7917d96341e51
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/69097
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
Tested-by: kokoro <noreply+kokoro@google.com>
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
---
 src/cpu/base.cc          | 19 +++++++++++++++++++
 src/cpu/base.hh          | 17 +++++++++++++++++
 src/cpu/minor/execute.cc |  2 ++
 src/cpu/simple/base.cc   |  2 ++
 4 files changed, 40 insertions(+)

diff --git a/src/cpu/base.cc b/src/cpu/base.cc
index d2c0a78d44..1d293397e5 100644
--- a/src/cpu/base.cc
+++ b/src/cpu/base.cc
@@ -191,6 +191,11 @@ BaseCPU::BaseCPU(const Params &p, bool is_checker)
     modelResetPort.onChange([this](const bool &new_val) {
         setReset(new_val);
     });
+    // create a stat group object for each thread on this core
+    fetchStats.reserve(numThreads);
+    for (int i = 0; i < numThreads; i++) {
+        fetchStats.emplace_back(new FetchCPUStats(this, i));
+    }
 }
 
 void
@@ -827,4 +832,18 @@ BaseCPU::GlobalStats::GlobalStats(statistics::Group *parent)
     hostOpRate = simOps / hostSeconds;
 }
 
+BaseCPU::
+FetchCPUStats::FetchCPUStats(statistics::Group *parent, int thread_id)
+    : statistics::Group(parent, csprintf("fetchStats%i", thread_id).c_str()),
+    ADD_STAT(numBranches, statistics::units::Count::get(),
+             "Number of branches fetched"),
+    ADD_STAT(numFetchSuspends, statistics::units::Count::get(),
+             "Number of times Execute suspended instruction fetching")
+
+{
+    numBranches
+        .prereq(numBranches);
+
+}
+
 } // namespace gem5
diff --git a/src/cpu/base.hh b/src/cpu/base.hh
index 084d9b9305..e8fb777a76 100644
--- a/src/cpu/base.hh
+++ b/src/cpu/base.hh
@@ -42,6 +42,7 @@
 #ifndef __CPU_BASE_HH__
 #define __CPU_BASE_HH__
 
+#include <memory>
 #include <vector>
 
 #include "arch/generic/interrupts.hh"
@@ -676,6 +677,22 @@ class BaseCPU : public ClockedObject
     const Cycles pwrGatingLatency;
     const bool powerGatingOnIdle;
     EventFunctionWrapper enterPwrGatingEvent;
+
+
+  public:
+    struct FetchCPUStats : public statistics::Group
+    {
+        FetchCPUStats(statistics::Group *parent, int thread_id);
+
+        /* Total number of branches fetched */
+        statistics::Scalar numBranches;
+
+        /* Number of times fetch was asked to suspend by Execute */
+        statistics::Scalar numFetchSuspends;
+
+    };
+
+    std::vector<std::unique_ptr<FetchCPUStats>> fetchStats;
 };
 
 } // namespace gem5
diff --git a/src/cpu/minor/execute.cc b/src/cpu/minor/execute.cc
index 5eaaf5804e..c37c6c6696 100644
--- a/src/cpu/minor/execute.cc
+++ b/src/cpu/minor/execute.cc
@@ -1054,7 +1054,9 @@ Execute::commitInst(MinorDynInstPtr inst, bool early_memory_issue,
             DPRINTF(MinorInterrupt, "Suspending thread: %d from Execute"
                 " inst: %s\n", thread_id, *inst);
 
+            // output both old and new stats
             cpu.stats.numFetchSuspends++;
+            cpu.fetchStats[thread_id]->numFetchSuspends++;
 
             updateBranchData(thread_id, BranchData::SuspendThread, inst,
                 resume_pc, branch);
diff --git a/src/cpu/simple/base.cc b/src/cpu/simple/base.cc
index 768f63ede5..1632f545a2 100644
--- a/src/cpu/simple/base.cc
+++ b/src/cpu/simple/base.cc
@@ -396,7 +396,9 @@ BaseSimpleCPU::postExecute()
     }
 
     if (curStaticInst->isControl()) {
+        // output both old and new stats
         ++t_info.execContextStats.numBranches;
+        ++fetchStats[t_info.thread->threadId()]->numBranches;
     }
 
     /* Power model statistics */

From 32b18dcc608065a9dca77a36b48f858b98e7cce3 Mon Sep 17 00:00:00 2001
From: Melissa Jost <melissakjost@gmail.com>
Date: Mon, 13 Mar 2023 02:34:14 -0700
Subject: [PATCH 367/492] cpu: Move execute stats from simple and minor to base

Created stat group ExecuteCPUStats in BaseCPU and moved stats from the
simple and minor cpu models.

The stats moved from SimpleCPU are dcacheStallCycles,
icacheStallCycles, numCCRegReads, numCCRegWrites, numFpAluAccesses,
numFpRegReads, numFpRegWrits, numIntAluAccesses, numIntRegReads,
numIntRegWrites, numMemRegs, numMiscRegReads, numMiscRegWrites,
numVecAluAccesses, numVecPredRegReads, numVecPredRegWrites,
numVecRegReads, numVecRegWrites.

The stat moved from MinorCPU is numDiscardedOps.

These stats should both be outputting under executeStats in
BaseCPU, as well as in the simple and minor cpu models at this
point.

Change-Id: I95fe43b14f5c2ad4939463d8086b6b858ba1a2a1
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/69098
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
---
 src/cpu/base.cc                |  76 ++++++++++++++++++++
 src/cpu/base.hh                |  48 +++++++++++++
 src/cpu/minor/execute.cc       |   5 +-
 src/cpu/o3/cpu.cc              | 125 +++++++++++++++++++++++++++++++++
 src/cpu/o3/cpu.hh              |  12 ++++
 src/cpu/o3/dyn_inst.hh         |  15 ++++
 src/cpu/simple/base.cc         |   8 +++
 src/cpu/simple/exec_context.hh |  37 ++++++----
 8 files changed, 311 insertions(+), 15 deletions(-)

diff --git a/src/cpu/base.cc b/src/cpu/base.cc
index 1d293397e5..641152ede2 100644
--- a/src/cpu/base.cc
+++ b/src/cpu/base.cc
@@ -193,8 +193,10 @@ BaseCPU::BaseCPU(const Params &p, bool is_checker)
     });
     // create a stat group object for each thread on this core
     fetchStats.reserve(numThreads);
+    executeStats.reserve(numThreads);
     for (int i = 0; i < numThreads; i++) {
         fetchStats.emplace_back(new FetchCPUStats(this, i));
+        executeStats.emplace_back(new ExecuteCPUStats(this, i));
     }
 }
 
@@ -846,4 +848,78 @@ FetchCPUStats::FetchCPUStats(statistics::Group *parent, int thread_id)
 
 }
 
+// means it is incremented in a vector indexing and not directly
+BaseCPU::
+ExecuteCPUStats::ExecuteCPUStats(statistics::Group *parent, int thread_id)
+    : statistics::Group(parent, csprintf("executeStats%i", thread_id).c_str()),
+    ADD_STAT(dcacheStallCycles, statistics::units::Cycle::get(),
+             "DCache total stall cycles"),
+    ADD_STAT(numCCRegReads, statistics::units::Count::get(),
+             "Number of times the CC registers were read"),
+    ADD_STAT(numCCRegWrites, statistics::units::Count::get(),
+             "Number of times the CC registers were written"),
+    ADD_STAT(numFpAluAccesses, statistics::units::Count::get(),
+             "Number of float alu accesses"),
+    ADD_STAT(numFpRegReads, statistics::units::Count::get(),
+             "Number of times the floating registers were read"),
+    ADD_STAT(numFpRegWrites, statistics::units::Count::get(),
+             "Number of times the floating registers were written"),
+    ADD_STAT(numIntAluAccesses, statistics::units::Count::get(),
+             "Number of integer alu accesses"),
+    ADD_STAT(numIntRegReads, statistics::units::Count::get(),
+             "Number of times the integer registers were read"),
+    ADD_STAT(numIntRegWrites, statistics::units::Count::get(),
+             "Number of times the integer registers were written"),
+    ADD_STAT(numMemRefs, statistics::units::Count::get(),
+             "Number of memory refs"),
+    ADD_STAT(numMiscRegReads, statistics::units::Count::get(),
+             "Number of times the Misc registers were read"),
+    ADD_STAT(numMiscRegWrites, statistics::units::Count::get(),
+             "Number of times the Misc registers were written"),
+    ADD_STAT(numVecAluAccesses, statistics::units::Count::get(),
+             "Number of vector alu accesses"),
+    ADD_STAT(numVecPredRegReads, statistics::units::Count::get(),
+             "Number of times the predicate registers were read"),
+    ADD_STAT(numVecPredRegWrites, statistics::units::Count::get(),
+             "Number of times the predicate registers were written"),
+    ADD_STAT(numVecRegReads, statistics::units::Count::get(),
+             "Number of times the vector registers were read"),
+    ADD_STAT(numVecRegWrites, statistics::units::Count::get(),
+             "Number of times the vector registers were written"),
+    ADD_STAT(numDiscardedOps, statistics::units::Count::get(),
+             "Number of ops (including micro ops) which were discarded before "
+             "commit")
+{
+    dcacheStallCycles
+        .prereq(dcacheStallCycles);
+    numCCRegReads
+        .prereq(numCCRegReads)
+        .flags(statistics::nozero);
+    numCCRegWrites
+        .prereq(numCCRegWrites)
+        .flags(statistics::nozero);
+    numFpAluAccesses
+        .prereq(numFpAluAccesses);
+    numFpRegReads
+        .prereq(numFpRegReads);
+    numIntAluAccesses
+        .prereq(numIntAluAccesses);
+    numIntRegReads
+        .prereq(numIntRegReads);
+    numIntRegWrites
+        .prereq(numIntRegWrites);
+    numMiscRegReads
+        .prereq(numMiscRegReads);
+    numMiscRegWrites
+        .prereq(numMiscRegWrites);
+    numVecPredRegReads
+        .prereq(numVecPredRegReads);
+    numVecPredRegWrites
+        .prereq(numVecPredRegWrites);
+    numVecRegReads
+        .prereq(numVecRegReads);
+    numVecRegWrites
+        .prereq(numVecRegWrites);
+}
+
 } // namespace gem5
diff --git a/src/cpu/base.hh b/src/cpu/base.hh
index e8fb777a76..acf78bbd81 100644
--- a/src/cpu/base.hh
+++ b/src/cpu/base.hh
@@ -692,7 +692,55 @@ class BaseCPU : public ClockedObject
 
     };
 
+    struct ExecuteCPUStats: public statistics::Group
+    {
+        ExecuteCPUStats(statistics::Group *parent, int thread_id);
+
+        /* Number of cycles stalled for D-cache responses */
+        statistics::Scalar dcacheStallCycles;
+
+        /* Number of condition code register file accesses */
+        statistics::Scalar numCCRegReads;
+        statistics::Scalar numCCRegWrites;
+
+        /* number of float alu accesses */
+        statistics::Scalar numFpAluAccesses;
+
+        /* Number of float register file accesses */
+        statistics::Scalar numFpRegReads;
+        statistics::Scalar numFpRegWrites;
+
+        /* Number of integer alu accesses */
+        statistics::Scalar numIntAluAccesses;
+
+        /* Number of integer register file accesses */
+        statistics::Scalar numIntRegReads;
+        statistics::Scalar numIntRegWrites;
+
+        /* number of simulated memory references */
+        statistics::Scalar numMemRefs;
+
+        /* Number of misc register file accesses */
+        statistics::Scalar numMiscRegReads;
+        statistics::Scalar numMiscRegWrites;
+
+        /* Number of vector alu accesses */
+        statistics::Scalar numVecAluAccesses;
+
+        /* Number of predicate register file accesses */
+        mutable statistics::Scalar numVecPredRegReads;
+        statistics::Scalar numVecPredRegWrites;
+
+        /* Number of vector register file accesses */
+        mutable statistics::Scalar numVecRegReads;
+        statistics::Scalar numVecRegWrites;
+
+        /* Number of ops discarded before committing */
+        statistics::Scalar numDiscardedOps;
+    };
+
     std::vector<std::unique_ptr<FetchCPUStats>> fetchStats;
+    std::vector<std::unique_ptr<ExecuteCPUStats>> executeStats;
 };
 
 } // namespace gem5
diff --git a/src/cpu/minor/execute.cc b/src/cpu/minor/execute.cc
index c37c6c6696..42c7b1af0c 100644
--- a/src/cpu/minor/execute.cc
+++ b/src/cpu/minor/execute.cc
@@ -1369,8 +1369,11 @@ Execute::commit(ThreadID thread_id, bool only_commit_microops, bool discard,
                 " state was unexpected, expected: %d\n",
                 *inst, ex_info.streamSeqNum);
 
-            if (fault == NoFault)
+            if (fault == NoFault) {
+                // output both old and new stats
                 cpu.stats.numDiscardedOps++;
+                cpu.executeStats[thread_id]->numDiscardedOps++;
+            }
         }
 
         /* Mark the mem inst as being in the LSQ */
diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc
index d2bacaa523..6732c4310e 100644
--- a/src/cpu/o3/cpu.cc
+++ b/src/cpu/o3/cpu.cc
@@ -1019,7 +1019,10 @@ CPU::readMiscRegNoEffect(int misc_reg, ThreadID tid) const
 RegVal
 CPU::readMiscReg(int misc_reg, ThreadID tid)
 {
+    // output both old and new stats, keep
+    // return value the same
     cpuStats.miscRegfileReads++;
+    executeStats[tid]->numMiscRegReads++;
     return isa[tid]->readMiscReg(misc_reg);
 }
 
@@ -1032,7 +1035,9 @@ CPU::setMiscRegNoEffect(int misc_reg, RegVal val, ThreadID tid)
 void
 CPU::setMiscReg(int misc_reg, RegVal val, ThreadID tid)
 {
+    // output both old and new stats
     cpuStats.miscRegfileWrites++;
+    executeStats[tid]->numMiscRegWrites++;
     isa[tid]->setMiscReg(misc_reg, val);
 }
 
@@ -1156,6 +1161,126 @@ CPU::setReg(PhysRegIdPtr phys_reg, const void *val)
     regFile.setReg(phys_reg, val);
 }
 
+RegVal
+CPU::getReg(PhysRegIdPtr phys_reg, ThreadID tid)
+{
+    switch (phys_reg->classValue()) {
+      case IntRegClass:
+        executeStats[tid]->numIntRegReads++;
+        break;
+      case FloatRegClass:
+        executeStats[tid]->numFpRegReads++;
+        break;
+      case CCRegClass:
+        executeStats[tid]->numCCRegReads++;
+        break;
+      case VecRegClass:
+      case VecElemClass:
+        executeStats[tid]->numVecRegReads++;
+        break;
+      case VecPredRegClass:
+        executeStats[tid]->numVecPredRegReads++;
+        break;
+      default:
+        break;
+    }
+    return regFile.getReg(phys_reg);
+}
+
+void
+CPU::getReg(PhysRegIdPtr phys_reg, void *val, ThreadID tid)
+{
+    switch (phys_reg->classValue()) {
+      case IntRegClass:
+        executeStats[tid]->numIntRegReads++;
+        break;
+      case FloatRegClass:
+        executeStats[tid]->numFpRegReads++;
+        break;
+      case CCRegClass:
+        executeStats[tid]->numCCRegReads++;
+        break;
+      case VecRegClass:
+      case VecElemClass:
+        executeStats[tid]->numVecRegReads++;
+        break;
+      case VecPredRegClass:
+        executeStats[tid]->numVecPredRegReads++;
+        break;
+      default:
+        break;
+    }
+    regFile.getReg(phys_reg, val);
+}
+
+void *
+CPU::getWritableReg(PhysRegIdPtr phys_reg, ThreadID tid)
+{
+    switch (phys_reg->classValue()) {
+      case VecRegClass:
+        executeStats[tid]->numVecRegReads++;
+        break;
+      case VecPredRegClass:
+        executeStats[tid]->numVecPredRegReads++;
+        break;
+      default:
+        break;
+    }
+    return regFile.getWritableReg(phys_reg);
+}
+
+void
+CPU::setReg(PhysRegIdPtr phys_reg, RegVal val, ThreadID tid)
+{
+    switch (phys_reg->classValue()) {
+      case IntRegClass:
+        executeStats[tid]->numIntRegWrites++;
+        break;
+      case FloatRegClass:
+        executeStats[tid]->numFpRegWrites++;
+        break;
+      case CCRegClass:
+        executeStats[tid]->numCCRegWrites++;
+        break;
+      case VecRegClass:
+      case VecElemClass:
+        executeStats[tid]->numVecRegWrites++;
+        break;
+      case VecPredRegClass:
+        executeStats[tid]->numVecPredRegWrites++;
+        break;
+      default:
+        break;
+    }
+    regFile.setReg(phys_reg, val);
+}
+
+void
+CPU::setReg(PhysRegIdPtr phys_reg, const void *val, ThreadID tid)
+{
+    switch (phys_reg->classValue()) {
+      case IntRegClass:
+        executeStats[tid]->numIntRegWrites++;
+        break;
+      case FloatRegClass:
+        executeStats[tid]->numFpRegWrites++;
+        break;
+      case CCRegClass:
+        executeStats[tid]->numCCRegWrites++;
+        break;
+      case VecRegClass:
+      case VecElemClass:
+        executeStats[tid]->numVecRegWrites++;
+        break;
+      case VecPredRegClass:
+        executeStats[tid]->numVecPredRegWrites++;
+        break;
+      default:
+        break;
+    }
+    regFile.setReg(phys_reg, val);
+}
+
 RegVal
 CPU::getArchReg(const RegId &reg, ThreadID tid)
 {
diff --git a/src/cpu/o3/cpu.hh b/src/cpu/o3/cpu.hh
index 76a9060f0a..d6317d6ea2 100644
--- a/src/cpu/o3/cpu.hh
+++ b/src/cpu/o3/cpu.hh
@@ -317,6 +317,18 @@ class CPU : public BaseCPU
     void setReg(PhysRegIdPtr phys_reg, RegVal val);
     void setReg(PhysRegIdPtr phys_reg, const void *val);
 
+    /** These functions are duplicated so that one set
+     * doesn't use thread ID, while the other does.
+     * This allows us to still output both old and
+     * new versions of the stats.
+    */
+    RegVal getReg(PhysRegIdPtr phys_reg, ThreadID tid);
+    void getReg(PhysRegIdPtr phys_reg, void *val, ThreadID tid);
+    void *getWritableReg(PhysRegIdPtr phys_reg, ThreadID tid);
+
+    void setReg(PhysRegIdPtr phys_reg, RegVal val, ThreadID tid);
+    void setReg(PhysRegIdPtr phys_reg, const void *val, ThreadID tid);
+
     /** Architectural register accessors.  Looks up in the commit
      * rename table to obtain the true physical index of the
      * architected register first, then accesses that physical
diff --git a/src/cpu/o3/dyn_inst.hh b/src/cpu/o3/dyn_inst.hh
index d6df09ce4a..4f762b4551 100644
--- a/src/cpu/o3/dyn_inst.hh
+++ b/src/cpu/o3/dyn_inst.hh
@@ -1085,11 +1085,16 @@ class DynInst : public ExecContext, public RefCounted
                 continue;
 
             if (bytes == sizeof(RegVal)) {
+                // call both old and new functions
                 setRegOperand(staticInst.get(), idx,
                         cpu->getReg(prev_phys_reg));
+                setRegOperand(staticInst.get(), idx,
+                        cpu->getReg(prev_phys_reg, threadNumber));
             } else {
                 uint8_t val[original_dest_reg.regClass().regBytes()];
+                // call both old and new functions
                 cpu->getReg(prev_phys_reg, val);
+                cpu->getReg(prev_phys_reg, val, threadNumber);
                 setRegOperand(staticInst.get(), idx, val);
             }
         }
@@ -1116,6 +1121,8 @@ class DynInst : public ExecContext, public RefCounted
         const PhysRegIdPtr reg = renamedSrcIdx(idx);
         if (reg->is(InvalidRegClass))
             return 0;
+        // call new function, only return old value
+        cpu->getReg(reg, threadNumber);
         return cpu->getReg(reg);
     }
 
@@ -1125,12 +1132,16 @@ class DynInst : public ExecContext, public RefCounted
         const PhysRegIdPtr reg = renamedSrcIdx(idx);
         if (reg->is(InvalidRegClass))
             return;
+        // call both old and new function
         cpu->getReg(reg, val);
+        cpu->getReg(reg, val, threadNumber);
     }
 
     void *
     getWritableRegOperand(const StaticInst *si, int idx) override
     {
+        // call both old and new function
+        return cpu->getWritableReg(renamedDestIdx(idx), threadNumber);
         return cpu->getWritableReg(renamedDestIdx(idx));
     }
 
@@ -1143,7 +1154,9 @@ class DynInst : public ExecContext, public RefCounted
         const PhysRegIdPtr reg = renamedDestIdx(idx);
         if (reg->is(InvalidRegClass))
             return;
+        // call both old and new functions
         cpu->setReg(reg, val);
+        cpu->setReg(reg, val, threadNumber);
         setResult(reg->regClass(), val);
     }
 
@@ -1153,7 +1166,9 @@ class DynInst : public ExecContext, public RefCounted
         const PhysRegIdPtr reg = renamedDestIdx(idx);
         if (reg->is(InvalidRegClass))
             return;
+        // call both old and new functions
         cpu->setReg(reg, val);
+        cpu->setReg(reg, val, threadNumber);
         setResult(reg->regClass(), val);
     }
 };
diff --git a/src/cpu/simple/base.cc b/src/cpu/simple/base.cc
index 1632f545a2..d97e1a9964 100644
--- a/src/cpu/simple/base.cc
+++ b/src/cpu/simple/base.cc
@@ -388,7 +388,9 @@ BaseSimpleCPU::postExecute()
     Addr instAddr = threadContexts[curThread]->pcState().instAddr();
 
     if (curStaticInst->isMemRef()) {
+        // update both old and new stats
         t_info.execContextStats.numMemRefs++;
+        executeStats[t_info.thread->threadId()]->numMemRefs++;
     }
 
     if (curStaticInst->isLoad()) {
@@ -404,18 +406,24 @@ BaseSimpleCPU::postExecute()
     /* Power model statistics */
     //integer alu accesses
     if (curStaticInst->isInteger()){
+        // update both old and new stats
+        executeStats[t_info.thread->threadId()]->numIntAluAccesses++;
         t_info.execContextStats.numIntAluAccesses++;
         t_info.execContextStats.numIntInsts++;
     }
 
     //float alu accesses
     if (curStaticInst->isFloating()){
+        // update both old and new stats
+        executeStats[t_info.thread->threadId()]->numFpAluAccesses++;
         t_info.execContextStats.numFpAluAccesses++;
         t_info.execContextStats.numFpInsts++;
     }
 
     //vector alu accesses
     if (curStaticInst->isVector()){
+        // update both old and new stats
+        executeStats[t_info.thread->threadId()]->numVecAluAccesses++;
         t_info.execContextStats.numVecAluAccesses++;
         t_info.execContextStats.numVecInsts++;
     }
diff --git a/src/cpu/simple/exec_context.hh b/src/cpu/simple/exec_context.hh
index 0f20763f28..31aa5d44c7 100644
--- a/src/cpu/simple/exec_context.hh
+++ b/src/cpu/simple/exec_context.hh
@@ -161,22 +161,23 @@ class SimpleExecContext : public ExecContext
               ADD_STAT(statExecutedInstType, statistics::units::Count::get(),
                        "Class of executed instruction."),
               numRegReads{
-                  &numIntRegReads,
-                  &numFpRegReads,
-                  &numVecRegReads,
-                  &numVecRegReads,
-                  &numVecPredRegReads,
-                  &numMatRegReads,
-                  &numCCRegReads
+                  &(cpu->executeStats[thread->threadId()]->numIntRegReads),
+                  &(cpu->executeStats[thread->threadId()]->numFpRegReads),
+                  &(cpu->executeStats[thread->threadId()]->numVecRegReads),
+                  &(cpu->executeStats[thread->threadId()]->numVecRegReads),
+                  &(cpu->executeStats[thread->threadId()]->numVecPredRegReads),
+                  &(cpu->executeStats[thread->threadId()]->numCCRegReads),
+                  &numMatRegReads
               },
               numRegWrites{
-                  &numIntRegWrites,
-                  &numFpRegWrites,
-                  &numVecRegWrites,
-                  &numVecRegWrites,
-                  &numVecPredRegWrites,
-                  &numMatRegWrites,
-                  &numCCRegWrites
+                  &(cpu->executeStats[thread->threadId()]->numIntRegWrites),
+                  &(cpu->executeStats[thread->threadId()]->numFpRegWrites),
+                  &(cpu->executeStats[thread->threadId()]->numVecRegWrites),
+                  &(cpu->executeStats[thread->threadId()]->numVecRegWrites),
+                  &(cpu->executeStats[thread->threadId()]
+                        ->numVecPredRegWrites),
+                  &(cpu->executeStats[thread->threadId()]->numCCRegWrites),
+                  &numMatRegWrites
               }
         {
             numCCRegReads
@@ -368,7 +369,9 @@ class SimpleExecContext : public ExecContext
     RegVal
     readMiscRegOperand(const StaticInst *si, int idx) override
     {
+        // update both old and new stats
         execContextStats.numMiscRegReads++;
+        cpu->executeStats[thread->threadId()]->numMiscRegReads++;
         const RegId& reg = si->srcRegIdx(idx);
         assert(reg.is(MiscRegClass));
         return thread->readMiscReg(reg.index());
@@ -377,7 +380,9 @@ class SimpleExecContext : public ExecContext
     void
     setMiscRegOperand(const StaticInst *si, int idx, RegVal val) override
     {
+        // update both old and new stats
         execContextStats.numMiscRegWrites++;
+        cpu->executeStats[thread->threadId()]->numMiscRegWrites++;
         const RegId& reg = si->destRegIdx(idx);
         assert(reg.is(MiscRegClass));
         thread->setMiscReg(reg.index(), val);
@@ -390,7 +395,9 @@ class SimpleExecContext : public ExecContext
     RegVal
     readMiscReg(int misc_reg) override
     {
+        // update both old and new stats
         execContextStats.numMiscRegReads++;
+        cpu->executeStats[thread->threadId()]->numMiscRegReads++;
         return thread->readMiscReg(misc_reg);
     }
 
@@ -401,7 +408,9 @@ class SimpleExecContext : public ExecContext
     void
     setMiscReg(int misc_reg, RegVal val) override
     {
+        // update both old and new stats
         execContextStats.numMiscRegWrites++;
+        cpu->executeStats[thread->threadId()]->numMiscRegWrites++;
         thread->setMiscReg(misc_reg, val);
     }
 

From ea2bbe26fc0de23b1148577c25d8db6a393eecb3 Mon Sep 17 00:00:00 2001
From: Melissa Jost <melissakjost@gmail.com>
Date: Mon, 13 Mar 2023 02:55:56 -0700
Subject: [PATCH 368/492] cpu: Move commit stats from simple to base cpu

Created stat group CommitCPUStats in BaseCPU and copied stats from the
simple cpu model.

The stats copied from SimpleCPU are numCondCtrlInsts, numFpInsts,
numIntInsts, numLoadInsts, numStoreInsts, numVecInsts.

Copied committedControl of MinorCPU to BaseCPU::CommittedCPUStats. In
MinorCPU, this stat was a 2D vector, where the first dimension is the
thread ID. In base it is now  a 1D vector that is tied to a thread ID
via the commitStats vector.

The committedControl stat vector in CommitCPUStats is updated in the
same way in all CPU models. The function updateComCtrlStats will
update committedControl and the CPU models will call this function
instead of updating committedControl directly. This function takes
a StaticInstPtr as input, which Simple, Minor, and O3 CPU models are
able to provide.

Duplicate stat "branches" in O3 commit with
BaseCPU::CommittedCPUStats::committedControl::IsControl.

O3 commit stats floating, integer, loads, memRefs, vectorInstructions
are duplicated  by numFpInsts, numIntInsts, numLoadInsts, numMemRefs,
numVecInsts from BaseCPU::CommitCPUStats respectively. Implemented
numStoreInsts from BaseCPU::commitCPUStats for O3 commit stage.

Change-Id: Ie6f176623091159622d53e9899d780f235fce525
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/69099
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
---
 src/cpu/base.cc          | 69 ++++++++++++++++++++++++++++++++++++++++
 src/cpu/base.hh          | 32 +++++++++++++++++++
 src/cpu/minor/execute.cc |  3 ++
 src/cpu/o3/commit.cc     | 25 +++++++++++++--
 src/cpu/simple/base.cc   | 11 +++++++
 5 files changed, 137 insertions(+), 3 deletions(-)

diff --git a/src/cpu/base.cc b/src/cpu/base.cc
index 641152ede2..5592bf0d55 100644
--- a/src/cpu/base.cc
+++ b/src/cpu/base.cc
@@ -194,9 +194,11 @@ BaseCPU::BaseCPU(const Params &p, bool is_checker)
     // create a stat group object for each thread on this core
     fetchStats.reserve(numThreads);
     executeStats.reserve(numThreads);
+    commitStats.reserve(numThreads);
     for (int i = 0; i < numThreads; i++) {
         fetchStats.emplace_back(new FetchCPUStats(this, i));
         executeStats.emplace_back(new ExecuteCPUStats(this, i));
+        commitStats.emplace_back(new CommitCPUStats(this, i));
     }
 }
 
@@ -922,4 +924,71 @@ ExecuteCPUStats::ExecuteCPUStats(statistics::Group *parent, int thread_id)
         .prereq(numVecRegWrites);
 }
 
+BaseCPU::
+CommitCPUStats::CommitCPUStats(statistics::Group *parent, int thread_id)
+    : statistics::Group(parent, csprintf("commitStats%i", thread_id).c_str()),
+    ADD_STAT(numMemRefs, statistics::units::Count::get(),
+            "Number of memory references committed"),
+    ADD_STAT(numFpInsts, statistics::units::Count::get(),
+            "Number of float instructions"),
+    ADD_STAT(numIntInsts, statistics::units::Count::get(),
+            "Number of integer instructions"),
+    ADD_STAT(numLoadInsts, statistics::units::Count::get(),
+            "Number of load instructions"),
+    ADD_STAT(numStoreInsts, statistics::units::Count::get(),
+            "Number of store instructions"),
+    ADD_STAT(numVecInsts, statistics::units::Count::get(),
+            "Number of vector instructions"),
+    ADD_STAT(committedInstType, statistics::units::Count::get(),
+            "Class of committed instruction."),
+    ADD_STAT(committedControl, statistics::units::Count::get(),
+             "Class of control type instructions committed")
+{
+    committedInstType
+        .init(enums::Num_OpClass)
+        .flags(statistics::total | statistics::pdf | statistics::dist);
+
+    for (unsigned i = 0; i < Num_OpClasses; ++i) {
+        committedInstType.subname(i, enums::OpClassStrings[i]);
+    }
+
+    committedControl
+        .init(StaticInstFlags::Flags::Num_Flags)
+        .flags(statistics::nozero);
+
+    for (unsigned i = 0; i < StaticInstFlags::Flags::Num_Flags; i++) {
+        committedControl.subname(i, StaticInstFlags::FlagsStrings[i]);
+    }
+}
+
+
+void
+BaseCPU::
+CommitCPUStats::updateComCtrlStats(const StaticInstPtr staticInst)
+{
+    /* Add a count for every control instruction type */
+    if (staticInst->isControl()) {
+        if (staticInst->isReturn()) {
+            committedControl[gem5::StaticInstFlags::Flags::IsReturn]++;
+        }
+        if (staticInst->isCall()) {
+            committedControl[gem5::StaticInstFlags::Flags::IsCall]++;
+        }
+        if (staticInst->isDirectCtrl()) {
+            committedControl[gem5::StaticInstFlags::Flags::IsDirectControl]++;
+        }
+        if (staticInst->isIndirectCtrl()) {
+            committedControl
+                [gem5::StaticInstFlags::Flags::IsIndirectControl]++;
+        }
+        if (staticInst->isCondCtrl()) {
+            committedControl[gem5::StaticInstFlags::Flags::IsCondControl]++;
+        }
+        if (staticInst->isUncondCtrl()) {
+            committedControl[gem5::StaticInstFlags::Flags::IsUncondControl]++;
+        }
+        committedControl[gem5::StaticInstFlags::Flags::IsControl]++;
+    }
+}
+
 } // namespace gem5
diff --git a/src/cpu/base.hh b/src/cpu/base.hh
index acf78bbd81..934e56fd05 100644
--- a/src/cpu/base.hh
+++ b/src/cpu/base.hh
@@ -739,8 +739,40 @@ class BaseCPU : public ClockedObject
         statistics::Scalar numDiscardedOps;
     };
 
+    struct CommitCPUStats: public statistics::Group
+    {
+        CommitCPUStats(statistics::Group *parent, int thread_id);
+
+        /* Number of committed memory references. */
+        statistics::Scalar numMemRefs;
+
+        /* Number of float instructions */
+        statistics::Scalar numFpInsts;
+
+        /* Number of int instructions */
+        statistics::Scalar numIntInsts;
+
+        /* number of load instructions */
+        statistics::Scalar numLoadInsts;
+
+        /* Number of store instructions */
+        statistics::Scalar numStoreInsts;
+
+        /* Number of vector instructions */
+        statistics::Scalar numVecInsts;
+
+        /* Number of instructions committed by type (OpClass) */
+        statistics::Vector committedInstType;
+
+        /* number of control instructions committed by control inst type */
+        statistics::Vector committedControl;
+        void updateComCtrlStats(const StaticInstPtr staticInst);
+
+    };
+
     std::vector<std::unique_ptr<FetchCPUStats>> fetchStats;
     std::vector<std::unique_ptr<ExecuteCPUStats>> executeStats;
+    std::vector<std::unique_ptr<CommitCPUStats>> commitStats;
 };
 
 } // namespace gem5
diff --git a/src/cpu/minor/execute.cc b/src/cpu/minor/execute.cc
index 42c7b1af0c..99d12d65b5 100644
--- a/src/cpu/minor/execute.cc
+++ b/src/cpu/minor/execute.cc
@@ -879,6 +879,9 @@ Execute::doInstCommitAccounting(MinorDynInstPtr inst)
     thread->numOp++;
     thread->threadStats.numOps++;
     cpu.stats.numOps++;
+    // update both old and new stats
+    cpu.commitStats[inst->id.threadId]
+        ->committedInstType[inst->staticInst->opClass()]++;
     cpu.stats.committedInstType[inst->id.threadId]
                                [inst->staticInst->opClass()]++;
 
diff --git a/src/cpu/o3/commit.cc b/src/cpu/o3/commit.cc
index 38dce831b1..b3da2d9570 100644
--- a/src/cpu/o3/commit.cc
+++ b/src/cpu/o3/commit.cc
@@ -1396,6 +1396,8 @@ Commit::updateComInstStats(const DynInstPtr &inst)
     //
     //  Control Instructions
     //
+    // update both old and new stats
+    cpu->commitStats[tid]->updateComCtrlStats(inst->staticInst);
     if (inst->isControl())
         stats.branches[tid]++;
 
@@ -1403,15 +1405,23 @@ Commit::updateComInstStats(const DynInstPtr &inst)
     //  Memory references
     //
     if (inst->isMemRef()) {
+        // update both old and new stats
         stats.memRefs[tid]++;
+        cpu->commitStats[tid]->numMemRefs++;
 
         if (inst->isLoad()) {
+            // update both old and new stats
             stats.loads[tid]++;
+            cpu->commitStats[tid]->numLoadInsts++;
         }
 
         if (inst->isAtomic()) {
             stats.amos[tid]++;
         }
+
+        if (inst->isStore()) {
+            cpu->commitStats[tid]->numStoreInsts++;
+        }
     }
 
     if (inst->isFullMemBarrier()) {
@@ -1419,15 +1429,24 @@ Commit::updateComInstStats(const DynInstPtr &inst)
     }
 
     // Integer Instruction
-    if (inst->isInteger())
+    if (inst->isInteger()) {
+        // update both old and new stats
+        cpu->commitStats[tid]->numIntInsts++;
         stats.integer[tid]++;
+    }
 
     // Floating Point Instruction
-    if (inst->isFloating())
+    if (inst->isFloating()) {
+        // update both old and new stats
+        cpu->commitStats[tid]->numFpInsts++;
         stats.floating[tid]++;
+    }
     // Vector Instruction
-    if (inst->isVector())
+    if (inst->isVector()) {
+        // update both old and new stats
+        cpu->commitStats[tid]->numVecInsts++;
         stats.vectorInstructions[tid]++;
+    }
 
     // Function Calls
     if (inst->isCall())
diff --git a/src/cpu/simple/base.cc b/src/cpu/simple/base.cc
index d97e1a9964..40f0fa7684 100644
--- a/src/cpu/simple/base.cc
+++ b/src/cpu/simple/base.cc
@@ -408,6 +408,7 @@ BaseSimpleCPU::postExecute()
     if (curStaticInst->isInteger()){
         // update both old and new stats
         executeStats[t_info.thread->threadId()]->numIntAluAccesses++;
+        commitStats[t_info.thread->threadId()]->numIntInsts++;
         t_info.execContextStats.numIntAluAccesses++;
         t_info.execContextStats.numIntInsts++;
     }
@@ -416,6 +417,7 @@ BaseSimpleCPU::postExecute()
     if (curStaticInst->isFloating()){
         // update both old and new stats
         executeStats[t_info.thread->threadId()]->numFpAluAccesses++;
+        commitStats[t_info.thread->threadId()]->numFpInsts++;
         t_info.execContextStats.numFpAluAccesses++;
         t_info.execContextStats.numFpInsts++;
     }
@@ -424,6 +426,7 @@ BaseSimpleCPU::postExecute()
     if (curStaticInst->isVector()){
         // update both old and new stats
         executeStats[t_info.thread->threadId()]->numVecAluAccesses++;
+        commitStats[t_info.thread->threadId()]->numVecInsts++;
         t_info.execContextStats.numVecAluAccesses++;
         t_info.execContextStats.numVecInsts++;
     }
@@ -446,14 +449,22 @@ BaseSimpleCPU::postExecute()
 
     //result bus acceses
     if (curStaticInst->isLoad()){
+        // update both old and new stats
+        commitStats[t_info.thread->threadId()]->numLoadInsts++;
         t_info.execContextStats.numLoadInsts++;
     }
 
     if (curStaticInst->isStore() || curStaticInst->isAtomic()){
+        // update both old and new stats
+        commitStats[t_info.thread->threadId()]->numStoreInsts++;
         t_info.execContextStats.numStoreInsts++;
     }
     /* End power model statistics */
 
+    // update both old and new stats
+    commitStats[t_info.thread->threadId()]
+        ->committedInstType[curStaticInst->opClass()]++;
+    commitStats[t_info.thread->threadId()]->updateComCtrlStats(curStaticInst);
     t_info.execContextStats.statExecutedInstType[curStaticInst->opClass()]++;
 
     if (FullSystem)

From 2f93672bddd2705c53e51281a3ff3f749acb138c Mon Sep 17 00:00:00 2001
From: Melissa Jost <melissakjost@gmail.com>
Date: Mon, 13 Mar 2023 03:09:38 -0700
Subject: [PATCH 369/492] cpu: Move numInsts, numOps, ipc, cpi to BaseCPU

In BaseCPU::BaseCPUStats, numInsts and numOps track per CPU core
committed instructions and operations.

In BaseCPU::FetchCPUStats, numInsts and numOps track per thread
fetched instructions and operations.

In BaseCPU::CommitCPUStats, numInsts and numOps track per thread
committed instructions and operations.

In BaseSimpleCPU, the countInst() function has been split into
countInst(), countFetchInst(), and countCommitInst().

countFetchInst() increments numInsts and numOps
of the FetchCPUStats group for a thread. countCommitInst() increments
the numInsts and numOps of the CommitCPUStats group for a thread and
of the BaseCPUStats group for a CPU core. These functions are called
in the appropriate stage within timing.cc and atomic.cc. The call to
countInst() is left unchanged. countFetchInst() is called in
preExecute(). countCommitInst() is called in postExecute().

For MinorCPU, only the commit level numInsts and numOps stats have been
implemented.

IPC and CPI stats have been added to BaseCPUStats (core level) and
CommitCPUStats (thread level). The formulas for the IPC and CPI stats
in CommitCPUStats are set in the BaseCPU constructor, after the
CommitCPUStats stat group object has been created.

Change-Id: I71c831c44202fc7d14c75b27a33eb91204f3a273
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/69100
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
---
 src/cpu/base.cc          | 38 +++++++++++++++++++++++++++++++++++++-
 src/cpu/base.hh          | 20 ++++++++++++++++++++
 src/cpu/minor/execute.cc |  5 +++++
 src/cpu/simple/base.cc   | 36 ++++++++++++++++++++++++++++++++++++
 src/cpu/simple/base.hh   |  2 ++
 5 files changed, 100 insertions(+), 1 deletion(-)

diff --git a/src/cpu/base.cc b/src/cpu/base.cc
index 5592bf0d55..d7dda13ab1 100644
--- a/src/cpu/base.cc
+++ b/src/cpu/base.cc
@@ -198,7 +198,11 @@ BaseCPU::BaseCPU(const Params &p, bool is_checker)
     for (int i = 0; i < numThreads; i++) {
         fetchStats.emplace_back(new FetchCPUStats(this, i));
         executeStats.emplace_back(new ExecuteCPUStats(this, i));
-        commitStats.emplace_back(new CommitCPUStats(this, i));
+        // create commitStat object for thread i and set ipc, cpi formulas
+        CommitCPUStats* commitStatptr = new CommitCPUStats(this, i);
+        commitStatptr->ipc = commitStatptr->numInsts / baseStats.numCycles;
+        commitStatptr->cpi = baseStats.numCycles / commitStatptr->numInsts;
+        commitStats.emplace_back(commitStatptr);
     }
 }
 
@@ -392,13 +396,28 @@ BaseCPU::probeInstCommit(const StaticInstPtr &inst, Addr pc)
 BaseCPU::
 BaseCPUStats::BaseCPUStats(statistics::Group *parent)
     : statistics::Group(parent),
+      ADD_STAT(numInsts, statistics::units::Count::get(),
+               "Number of instructions committed (core level)"),
+      ADD_STAT(numOps, statistics::units::Count::get(),
+               "Number of ops (including micro ops) committed (core level)"),
       ADD_STAT(numCycles, statistics::units::Cycle::get(),
                "Number of cpu cycles simulated"),
+      ADD_STAT(cpi, statistics::units::Rate<
+                statistics::units::Cycle, statistics::units::Count>::get(),
+               "CPI: cycles per instruction (core level)"),
+      ADD_STAT(ipc, statistics::units::Rate<
+                statistics::units::Count, statistics::units::Cycle>::get(),
+               "IPC: instructions per cycle (core level)"),
       ADD_STAT(numWorkItemsStarted, statistics::units::Count::get(),
                "Number of work items this cpu started"),
       ADD_STAT(numWorkItemsCompleted, statistics::units::Count::get(),
                "Number of work items this cpu completed")
 {
+    cpi.precision(6);
+    cpi = numCycles / numInsts;
+
+    ipc.precision(6);
+    ipc = numInsts / numCycles;
 }
 
 void
@@ -839,6 +858,10 @@ BaseCPU::GlobalStats::GlobalStats(statistics::Group *parent)
 BaseCPU::
 FetchCPUStats::FetchCPUStats(statistics::Group *parent, int thread_id)
     : statistics::Group(parent, csprintf("fetchStats%i", thread_id).c_str()),
+    ADD_STAT(numInsts, statistics::units::Count::get(),
+             "Number of instructions fetched (thread level)"),
+    ADD_STAT(numOps, statistics::units::Count::get(),
+             "Number of ops (including micro ops) fetched (thread level)"),
     ADD_STAT(numBranches, statistics::units::Count::get(),
              "Number of branches fetched"),
     ADD_STAT(numFetchSuspends, statistics::units::Count::get(),
@@ -927,6 +950,16 @@ ExecuteCPUStats::ExecuteCPUStats(statistics::Group *parent, int thread_id)
 BaseCPU::
 CommitCPUStats::CommitCPUStats(statistics::Group *parent, int thread_id)
     : statistics::Group(parent, csprintf("commitStats%i", thread_id).c_str()),
+    ADD_STAT(numInsts, statistics::units::Count::get(),
+             "Number of instructions committed (thread level)"),
+    ADD_STAT(numOps, statistics::units::Count::get(),
+             "Number of ops (including micro ops) committed (thread level)"),
+    ADD_STAT(cpi, statistics::units::Rate<
+                statistics::units::Cycle, statistics::units::Count>::get(),
+             "CPI: cycles per instruction (thread level)"),
+    ADD_STAT(ipc, statistics::units::Rate<
+                statistics::units::Count, statistics::units::Cycle>::get(),
+             "IPC: instructions per cycle (thread level)"),
     ADD_STAT(numMemRefs, statistics::units::Count::get(),
             "Number of memory references committed"),
     ADD_STAT(numFpInsts, statistics::units::Count::get(),
@@ -944,6 +977,9 @@ CommitCPUStats::CommitCPUStats(statistics::Group *parent, int thread_id)
     ADD_STAT(committedControl, statistics::units::Count::get(),
              "Class of control type instructions committed")
 {
+    cpi.precision(6);
+    ipc.precision(6);
+
     committedInstType
         .init(enums::Num_OpClass)
         .flags(statistics::total | statistics::pdf | statistics::dist);
diff --git a/src/cpu/base.hh b/src/cpu/base.hh
index 934e56fd05..5e2432f01d 100644
--- a/src/cpu/base.hh
+++ b/src/cpu/base.hh
@@ -633,8 +633,14 @@ class BaseCPU : public ClockedObject
     struct BaseCPUStats : public statistics::Group
     {
         BaseCPUStats(statistics::Group *parent);
+        // Number of CPU insts and ops committed at CPU core level
+        statistics::Scalar numInsts;
+        statistics::Scalar numOps;
         // Number of CPU cycles simulated
         statistics::Scalar numCycles;
+        /* CPI/IPC for total cycle counts and macro insts */
+        statistics::Formula cpi;
+        statistics::Formula ipc;
         statistics::Scalar numWorkItemsStarted;
         statistics::Scalar numWorkItemsCompleted;
     } baseStats;
@@ -684,6 +690,12 @@ class BaseCPU : public ClockedObject
     {
         FetchCPUStats(statistics::Group *parent, int thread_id);
 
+        /* Total number of instructions fetched */
+        statistics::Scalar numInsts;
+
+        /* Total number of operations fetched */
+        statistics::Scalar numOps;
+
         /* Total number of branches fetched */
         statistics::Scalar numBranches;
 
@@ -743,6 +755,14 @@ class BaseCPU : public ClockedObject
     {
         CommitCPUStats(statistics::Group *parent, int thread_id);
 
+        /* Number of simulated instructions committed */
+        statistics::Scalar numInsts;
+        statistics::Scalar numOps;
+
+        /* CPI/IPC for total cycle counts and macro insts */
+        statistics::Formula cpi;
+        statistics::Formula ipc;
+
         /* Number of committed memory references. */
         statistics::Scalar numMemRefs;
 
diff --git a/src/cpu/minor/execute.cc b/src/cpu/minor/execute.cc
index 99d12d65b5..a65a77e643 100644
--- a/src/cpu/minor/execute.cc
+++ b/src/cpu/minor/execute.cc
@@ -872,6 +872,9 @@ Execute::doInstCommitAccounting(MinorDynInstPtr inst)
         thread->numInst++;
         thread->threadStats.numInsts++;
         cpu.stats.numInsts++;
+        // update both old and new stas
+        cpu.commitStats[inst->id.threadId]->numInsts++;
+        cpu.baseStats.numInsts++;
 
         /* Act on events related to instruction counts */
         thread->comInstEventQueue.serviceEvents(thread->numInst);
@@ -880,6 +883,8 @@ Execute::doInstCommitAccounting(MinorDynInstPtr inst)
     thread->threadStats.numOps++;
     cpu.stats.numOps++;
     // update both old and new stats
+    cpu.commitStats[inst->id.threadId]->numOps++;
+    cpu.baseStats.numOps++;
     cpu.commitStats[inst->id.threadId]
         ->committedInstType[inst->staticInst->opClass()]++;
     cpu.stats.committedInstType[inst->id.threadId]
diff --git a/src/cpu/simple/base.cc b/src/cpu/simple/base.cc
index 40f0fa7684..9e831a25f6 100644
--- a/src/cpu/simple/base.cc
+++ b/src/cpu/simple/base.cc
@@ -160,6 +160,34 @@ BaseSimpleCPU::countInst()
     t_info.execContextStats.numOps++;
 }
 
+void
+BaseSimpleCPU::countFetchInst()
+{
+    SimpleExecContext& t_info = *threadInfo[curThread];
+
+    if (!curStaticInst->isMicroop() || curStaticInst->isLastMicroop()) {
+        // increment thread level numInsts fetched count
+        fetchStats[t_info.thread->threadId()]->numInsts++;
+    }
+    // increment thread level numOps fetched count
+    fetchStats[t_info.thread->threadId()]->numOps++;
+}
+
+void
+BaseSimpleCPU::countCommitInst()
+{
+    SimpleExecContext& t_info = *threadInfo[curThread];
+
+    if (!curStaticInst->isMicroop() || curStaticInst->isLastMicroop()) {
+        // increment thread level and core level numInsts count
+        commitStats[t_info.thread->threadId()]->numInsts++;
+        baseStats.numInsts++;
+    }
+    // increment thread level and core level numOps count
+    commitStats[t_info.thread->threadId()]->numOps++;
+    baseStats.numOps++;
+}
+
 Counter
 BaseSimpleCPU::totalInsts() const
 {
@@ -376,6 +404,11 @@ BaseSimpleCPU::preExecute()
         if (predict_taken)
             ++t_info.execContextStats.numPredictedBranches;
     }
+
+    // increment the fetch instruction stat counters
+    if (curStaticInst) {
+        countFetchInst();
+    }
 }
 
 void
@@ -467,6 +500,9 @@ BaseSimpleCPU::postExecute()
     commitStats[t_info.thread->threadId()]->updateComCtrlStats(curStaticInst);
     t_info.execContextStats.statExecutedInstType[curStaticInst->opClass()]++;
 
+    /* increment the committed numInsts and numOps stats */
+    countCommitInst();
+
     if (FullSystem)
         traceFunctions(instAddr);
 
diff --git a/src/cpu/simple/base.hh b/src/cpu/simple/base.hh
index df5290cf3c..46a25a0a42 100644
--- a/src/cpu/simple/base.hh
+++ b/src/cpu/simple/base.hh
@@ -182,6 +182,8 @@ class BaseSimpleCPU : public BaseCPU
     }
 
     void countInst();
+    void countFetchInst();
+    void countCommitInst();
     Counter totalInsts() const override;
     Counter totalOps() const override;
 

From 4b70c1cacccf4f26b73bf3cc2a9147ae01b1b452 Mon Sep 17 00:00:00 2001
From: Melissa Jost <melissakjost@gmail.com>
Date: Mon, 13 Mar 2023 10:55:55 -0700
Subject: [PATCH 370/492] cpu-o3: Use base instructions committed counters in
 O3CPU

Copied committedInsts from O3 cpu to BaseCPU as numInstsNotNOP because
it tracks the instructions committed that are not NOPs or prefetches.
This change also does the same for commitedOps. InstsCommitted from O3
is duplicated by CommitCPUStats::numInsts.  The same thing has been done
with opsCommitted.

Change-Id: If24d22fee552c65fc0c63dfad90fc59b17100f34
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/69101
Tested-by: kokoro <noreply+kokoro@google.com>
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
---
 src/cpu/base.cc      | 4 ++++
 src/cpu/base.hh      | 4 ++++
 src/cpu/o3/commit.cc | 9 ++++++++-
 src/cpu/o3/cpu.cc    | 4 ++++
 4 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/src/cpu/base.cc b/src/cpu/base.cc
index d7dda13ab1..801a95b087 100644
--- a/src/cpu/base.cc
+++ b/src/cpu/base.cc
@@ -954,6 +954,10 @@ CommitCPUStats::CommitCPUStats(statistics::Group *parent, int thread_id)
              "Number of instructions committed (thread level)"),
     ADD_STAT(numOps, statistics::units::Count::get(),
              "Number of ops (including micro ops) committed (thread level)"),
+    ADD_STAT(numInstsNotNOP, statistics::units::Count::get(),
+             "Number of instructions committed excluding NOPs or prefetches"),
+    ADD_STAT(numOpsNotNOP, statistics::units::Count::get(),
+             "Number of Ops (including micro ops) Simulated"),
     ADD_STAT(cpi, statistics::units::Rate<
                 statistics::units::Cycle, statistics::units::Count>::get(),
              "CPI: cycles per instruction (thread level)"),
diff --git a/src/cpu/base.hh b/src/cpu/base.hh
index 5e2432f01d..f1739679f6 100644
--- a/src/cpu/base.hh
+++ b/src/cpu/base.hh
@@ -759,6 +759,10 @@ class BaseCPU : public ClockedObject
         statistics::Scalar numInsts;
         statistics::Scalar numOps;
 
+        /* Number of instructions committed that are not NOP or prefetches */
+        statistics::Scalar numInstsNotNOP;
+        statistics::Scalar numOpsNotNOP;
+
         /* CPI/IPC for total cycle counts and macro insts */
         statistics::Formula cpi;
         statistics::Formula ipc;
diff --git a/src/cpu/o3/commit.cc b/src/cpu/o3/commit.cc
index b3da2d9570..63bf7aebc4 100644
--- a/src/cpu/o3/commit.cc
+++ b/src/cpu/o3/commit.cc
@@ -1383,9 +1383,16 @@ Commit::updateComInstStats(const DynInstPtr &inst)
 {
     ThreadID tid = inst->threadNumber;
 
-    if (!inst->isMicroop() || inst->isLastMicroop())
+    if (!inst->isMicroop() || inst->isLastMicroop()) {
+        // update both old and new stats
         stats.instsCommitted[tid]++;
+        cpu->commitStats[tid]->numInsts++;
+        cpu->baseStats.numInsts++;
+    }
+    // update both old and new stats
     stats.opsCommitted[tid]++;
+    cpu->commitStats[tid]->numOps++;
+    cpu->baseStats.numOps++;
 
     // To match the old model, don't count nops and instruction
     // prefetches towards the total commit count.
diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc
index 6732c4310e..444692d47f 100644
--- a/src/cpu/o3/cpu.cc
+++ b/src/cpu/o3/cpu.cc
@@ -1353,16 +1353,20 @@ CPU::instDone(ThreadID tid, const DynInstPtr &inst)
 {
     // Keep an instruction count.
     if (!inst->isMicroop() || inst->isLastMicroop()) {
+        // update both old and new stats
         thread[tid]->numInst++;
         thread[tid]->threadStats.numInsts++;
         cpuStats.committedInsts[tid]++;
+        commitStats[tid]->numInstsNotNOP++;
 
         // Check for instruction-count-based events.
         thread[tid]->comInstEventQueue.serviceEvents(thread[tid]->numInst);
     }
+    // update both old and new stats
     thread[tid]->numOp++;
     thread[tid]->threadStats.numOps++;
     cpuStats.committedOps[tid]++;
+    commitStats[tid]->numOpsNotNOP++;
 
     probeInstCommit(inst->staticInst, inst->pcState().instAddr());
 }

From 53a12bc8ad1c7ccf0fff43b530c89b0e2fc72f1b Mon Sep 17 00:00:00 2001
From: Melissa Jost <melissakjost@gmail.com>
Date: Mon, 13 Mar 2023 11:10:14 -0700
Subject: [PATCH 371/492] cpu-o3: Copy general O3 fetch stats to
 BaseCPU::FetchCPUStats

The stats moved are from fetch.hh and fetch.cc of O3. Stat branches is
now tracked by numBranches. Stat branchRate is now tracked by
branchRate in FetchCPUStats. Stat rate is tracked by fetchRate. Stat
insts is tracked by numInsts. Stat icacheStallCycles is tracked by
icacheStallCycles in BaseCPU::FetchCPUStats.

Change-Id: I2a0a48a175bcb4322c66490f16c906dc9597f30e
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/69102
Tested-by: kokoro <noreply+kokoro@google.com>
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
---
 src/cpu/base.cc      | 27 ++++++++++++++++++++++++++-
 src/cpu/base.hh      |  9 +++++++++
 src/cpu/o3/commit.cc |  2 ++
 src/cpu/o3/fetch.cc  | 11 ++++++++++-
 4 files changed, 47 insertions(+), 2 deletions(-)

diff --git a/src/cpu/base.cc b/src/cpu/base.cc
index 801a95b087..7c1930744c 100644
--- a/src/cpu/base.cc
+++ b/src/cpu/base.cc
@@ -196,7 +196,13 @@ BaseCPU::BaseCPU(const Params &p, bool is_checker)
     executeStats.reserve(numThreads);
     commitStats.reserve(numThreads);
     for (int i = 0; i < numThreads; i++) {
-        fetchStats.emplace_back(new FetchCPUStats(this, i));
+        // create fetchStat object for thread i and set rate formulas
+        FetchCPUStats* fetchStatptr = new FetchCPUStats(this, i);
+        fetchStatptr->fetchRate = fetchStatptr->numInsts / baseStats.numCycles;
+        fetchStatptr->branchRate = fetchStatptr->numBranches /
+            baseStats.numCycles;
+        fetchStats.emplace_back(fetchStatptr);
+
         executeStats.emplace_back(new ExecuteCPUStats(this, i));
         // create commitStat object for thread i and set ipc, cpi formulas
         CommitCPUStats* commitStatptr = new CommitCPUStats(this, i);
@@ -862,15 +868,31 @@ FetchCPUStats::FetchCPUStats(statistics::Group *parent, int thread_id)
              "Number of instructions fetched (thread level)"),
     ADD_STAT(numOps, statistics::units::Count::get(),
              "Number of ops (including micro ops) fetched (thread level)"),
+    ADD_STAT(fetchRate, statistics::units::Rate<
+             statistics::units::Count, statistics::units::Cycle>::get(),
+             "Number of inst fetches per cycle"),
     ADD_STAT(numBranches, statistics::units::Count::get(),
              "Number of branches fetched"),
+    ADD_STAT(branchRate, statistics::units::Ratio::get(),
+             "Number of branch fetches per cycle"),
+    ADD_STAT(icacheStallCycles, statistics::units::Cycle::get(),
+             "ICache total stall cycles"),
     ADD_STAT(numFetchSuspends, statistics::units::Count::get(),
              "Number of times Execute suspended instruction fetching")
 
 {
+    fetchRate
+        .flags(statistics::total);
+
     numBranches
         .prereq(numBranches);
 
+    branchRate
+        .flags(statistics::total);
+
+    icacheStallCycles
+        .prereq(icacheStallCycles);
+
 }
 
 // means it is incremented in a vector indexing and not directly
@@ -981,6 +1003,9 @@ CommitCPUStats::CommitCPUStats(statistics::Group *parent, int thread_id)
     ADD_STAT(committedControl, statistics::units::Count::get(),
              "Class of control type instructions committed")
 {
+    numInsts
+        .prereq(numInsts);
+
     cpi.precision(6);
     ipc.precision(6);
 
diff --git a/src/cpu/base.hh b/src/cpu/base.hh
index f1739679f6..946ea6ba87 100644
--- a/src/cpu/base.hh
+++ b/src/cpu/base.hh
@@ -696,9 +696,18 @@ class BaseCPU : public ClockedObject
         /* Total number of operations fetched */
         statistics::Scalar numOps;
 
+        /* Number of instruction fetched per cycle. */
+        statistics::Formula fetchRate;
+
         /* Total number of branches fetched */
         statistics::Scalar numBranches;
 
+        /* Number of branch fetches per cycle. */
+        statistics::Formula branchRate;
+
+        /* Number of cycles stalled due to an icache miss */
+        statistics::Scalar icacheStallCycles;
+
         /* Number of times fetch was asked to suspend by Execute */
         statistics::Scalar numFetchSuspends;
 
diff --git a/src/cpu/o3/commit.cc b/src/cpu/o3/commit.cc
index 63bf7aebc4..82ecc0140e 100644
--- a/src/cpu/o3/commit.cc
+++ b/src/cpu/o3/commit.cc
@@ -1019,6 +1019,8 @@ Commit::commitInsts()
 
             if (commit_success) {
                 ++num_committed;
+                cpu->commitStats[tid]
+                    ->committedInstType[head_inst->opClass()]++;
                 stats.committedInstType[tid][head_inst->opClass()]++;
                 ppCommit->notify(head_inst);
 
diff --git a/src/cpu/o3/fetch.cc b/src/cpu/o3/fetch.cc
index d3cdd2c761..89d1b81197 100644
--- a/src/cpu/o3/fetch.cc
+++ b/src/cpu/o3/fetch.cc
@@ -540,6 +540,8 @@ Fetch::lookupAndUpdateNextPC(const DynInstPtr &inst, PCStateBase &next_pc)
     inst->setPredTarg(next_pc);
     inst->setPredTaken(predict_taken);
 
+    // update both old and new stats
+    cpu->fetchStats[tid]->numBranches++;
     ++fetchStats.branches;
 
     if (predict_taken) {
@@ -1145,8 +1147,11 @@ Fetch::fetch(bool &status_change)
 
             fetchCacheLine(fetchAddr, tid, this_pc.instAddr());
 
-            if (fetchStatus[tid] == IcacheWaitResponse)
+            if (fetchStatus[tid] == IcacheWaitResponse) {
+                // update both old and new stats
                 ++fetchStats.icacheStallCycles;
+                cpu->fetchStats[tid]->icacheStallCycles++;
+            }
             else if (fetchStatus[tid] == ItlbWait)
                 ++fetchStats.tlbCycles;
             else
@@ -1242,7 +1247,9 @@ Fetch::fetch(bool &status_change)
                     staticInst = dec_ptr->decode(this_pc);
 
                     // Increment stat of fetched instructions.
+                    // Update both old and new stats
                     ++fetchStats.insts;
+                    cpu->fetchStats[tid]->numInsts++;
 
                     if (staticInst->isMacroop()) {
                         curMacroop = staticInst;
@@ -1572,6 +1579,8 @@ Fetch::profileStall(ThreadID tid)
         ++fetchStats.squashCycles;
         DPRINTF(Fetch, "[tid:%i] Fetch is squashing!\n", tid);
     } else if (fetchStatus[tid] == IcacheWaitResponse) {
+        // update both old and new stats
+        cpu->fetchStats[tid]->icacheStallCycles++;
         ++fetchStats.icacheStallCycles;
         DPRINTF(Fetch, "[tid:%i] Fetch is waiting cache response!\n",
                 tid);

From 1d035e1e20d3f8658661613cd749bb3a9f4d27cc Mon Sep 17 00:00:00 2001
From: Melissa Jost <melissakjost@gmail.com>
Date: Mon, 13 Mar 2023 11:24:37 -0700
Subject: [PATCH 372/492] cpu-o3: Copy O3 IEW stats to BaseCPU::ExecuteCPUStats

Move numInsts, numBranches, numNop, numRefs, numLoadInsts, numRate to
Base. Merged numRefs into numMemRefs of ExecuteCPUStats. Renamed
numRate to instRate. Updated formatting in ExecuteCPUStats group.

Change-Id: Ibe4c121ac1e04f1c989d4786a52acd5878a43df0
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/69103
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
---
 src/cpu/base.cc   | 22 +++++++++++++++++++++-
 src/cpu/base.hh   | 13 +++++++++++++
 src/cpu/o3/iew.cc | 13 ++++++++++++-
 3 files changed, 46 insertions(+), 2 deletions(-)

diff --git a/src/cpu/base.cc b/src/cpu/base.cc
index 7c1930744c..e5a661b6ed 100644
--- a/src/cpu/base.cc
+++ b/src/cpu/base.cc
@@ -203,7 +203,12 @@ BaseCPU::BaseCPU(const Params &p, bool is_checker)
             baseStats.numCycles;
         fetchStats.emplace_back(fetchStatptr);
 
-        executeStats.emplace_back(new ExecuteCPUStats(this, i));
+        // create executeStat object for thread i and set rate formulas
+        ExecuteCPUStats* executeStatptr = new ExecuteCPUStats(this, i);
+        executeStatptr->instRate = executeStatptr->numInsts /
+            baseStats.numCycles;
+        executeStats.emplace_back(executeStatptr);
+
         // create commitStat object for thread i and set ipc, cpi formulas
         CommitCPUStats* commitStatptr = new CommitCPUStats(this, i);
         commitStatptr->ipc = commitStatptr->numInsts / baseStats.numCycles;
@@ -899,6 +904,19 @@ FetchCPUStats::FetchCPUStats(statistics::Group *parent, int thread_id)
 BaseCPU::
 ExecuteCPUStats::ExecuteCPUStats(statistics::Group *parent, int thread_id)
     : statistics::Group(parent, csprintf("executeStats%i", thread_id).c_str()),
+    ADD_STAT(numInsts, statistics::units::Count::get(),
+             "Number of executed instructions"),
+    ADD_STAT(numNop, statistics::units::Count::get(),
+             "Number of nop insts executed"),
+    ADD_STAT(numBranches, statistics::units::Count::get(),
+             "Number of branches executed"),
+    ADD_STAT(numLoadInsts, statistics::units::Count::get(),
+             "Number of load instructions executed"),
+    ADD_STAT(numStoreInsts, statistics::units::Count::get(),
+             "Number of stores executed"),
+    ADD_STAT(instRate, statistics::units::Rate<
+                statistics::units::Count, statistics::units::Cycle>::get(),
+             "Inst execution rate"),
     ADD_STAT(dcacheStallCycles, statistics::units::Cycle::get(),
              "DCache total stall cycles"),
     ADD_STAT(numCCRegReads, statistics::units::Count::get(),
@@ -937,6 +955,8 @@ ExecuteCPUStats::ExecuteCPUStats(statistics::Group *parent, int thread_id)
              "Number of ops (including micro ops) which were discarded before "
              "commit")
 {
+    numStoreInsts = numMemRefs - numLoadInsts;
+
     dcacheStallCycles
         .prereq(dcacheStallCycles);
     numCCRegReads
diff --git a/src/cpu/base.hh b/src/cpu/base.hh
index 946ea6ba87..3976b66fe4 100644
--- a/src/cpu/base.hh
+++ b/src/cpu/base.hh
@@ -717,6 +717,19 @@ class BaseCPU : public ClockedObject
     {
         ExecuteCPUStats(statistics::Group *parent, int thread_id);
 
+        /* Stat for total number of executed instructions */
+        statistics::Scalar numInsts;
+        /* Number of executed nops */
+        statistics::Scalar numNop;
+        /* Number of executed branches */
+        statistics::Scalar numBranches;
+        /* Stat for total number of executed load instructions */
+        statistics::Scalar numLoadInsts;
+        /* Number of executed store instructions */
+        statistics::Formula numStoreInsts;
+        /* Number of instructions executed per cycle */
+        statistics::Formula instRate;
+
         /* Number of cycles stalled for D-cache responses */
         statistics::Scalar dcacheStallCycles;
 
diff --git a/src/cpu/o3/iew.cc b/src/cpu/o3/iew.cc
index e67bc69b9b..1b3598cea7 100644
--- a/src/cpu/o3/iew.cc
+++ b/src/cpu/o3/iew.cc
@@ -1053,7 +1053,9 @@ IEW::dispatchInsts(ThreadID tid)
 
             instQueue.recordProducer(inst);
 
+            // update both old and new stats
             iewStats.executedInstStats.numNop[tid]++;
+            cpu->executeStats[tid]->numNop++;
 
             add_to_iq = false;
         } else {
@@ -1561,7 +1563,9 @@ IEW::updateExeInstStats(const DynInstPtr& inst)
 {
     ThreadID tid = inst->threadNumber;
 
+    // update both old and new stats
     iewStats.executedInstStats.numInsts++;
+    cpu->executeStats[tid]->numInsts++;
 
 #if TRACING_ON
     if (debug::O3PipeView) {
@@ -1572,17 +1576,24 @@ IEW::updateExeInstStats(const DynInstPtr& inst)
     //
     //  Control operations
     //
-    if (inst->isControl())
+    if (inst->isControl()) {
+        // update both old and new stats
         iewStats.executedInstStats.numBranches[tid]++;
+        cpu->executeStats[tid]->numBranches++;
+    }
 
     //
     //  Memory operations
     //
     if (inst->isMemRef()) {
+        // update both old and new stats
         iewStats.executedInstStats.numRefs[tid]++;
+        cpu->executeStats[tid]->numMemRefs++;
 
         if (inst->isLoad()) {
+            // update both old and new stats
             iewStats.executedInstStats.numLoadInsts[tid]++;
+            cpu->executeStats[tid]->numLoadInsts++;
         }
     }
 }

From a882373e82643f9b7ad9cd8a37ae216a1d721b34 Mon Sep 17 00:00:00 2001
From: Melissa Jost <melissakjost@gmail.com>
Date: Mon, 13 Mar 2023 11:25:59 -0700
Subject: [PATCH 373/492] cpu-kvm: Implement IPC and CPI base stats for KVM CPU

Replaced committedInsts stats of KVM CPU with commitStats.numInsts
of BaseCPU. This results in IPC and CPI printing in stats.txt for
KVM simulation.

Change-Id: Ia7713f88f15e3cabd4c96a8c2921515340bc71e2
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/69104
Reviewed-by: Andreas Sandberg <andreas.sandberg@arm.com>
Maintainer: Andreas Sandberg <andreas.sandberg@arm.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/cpu/kvm/base.cc | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/cpu/kvm/base.cc b/src/cpu/kvm/base.cc
index b76bddc2fd..5857f696dd 100644
--- a/src/cpu/kvm/base.cc
+++ b/src/cpu/kvm/base.cc
@@ -779,6 +779,9 @@ BaseKvmCPU::kvmRun(Tick ticks)
         /* Update statistics */
         baseStats.numCycles += simCyclesExecuted;;
         stats.committedInsts += instsExecuted;
+        // update both old and new stats
+        commitStats[thread->threadId()]->numInsts += instsExecuted;
+        baseStats.numInsts += instsExecuted;
         ctrInsts += instsExecuted;
 
         DPRINTF(KvmRun,

From 19323c8bd704f9ad95c639a71e653b30a98d3595 Mon Sep 17 00:00:00 2001
From: Melissa Jost <melissakjost@gmail.com>
Date: Mon, 20 Mar 2023 00:01:53 -0700
Subject: [PATCH 374/492] cpu: Remove duplicated fetch stats

This removes numFetchSuspends and duplicates numBranches calls
so we only have the updated fetch stats outputting.

Change-Id: Ia7a6830ee947f5c67386dd1e6e3db1744a7ee43c
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/69105
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/cpu/minor/execute.cc       | 2 --
 src/cpu/minor/stats.cc         | 2 --
 src/cpu/minor/stats.hh         | 3 ---
 src/cpu/simple/base.cc         | 2 --
 src/cpu/simple/exec_context.hh | 7 -------
 5 files changed, 16 deletions(-)

diff --git a/src/cpu/minor/execute.cc b/src/cpu/minor/execute.cc
index a65a77e643..0d704c7135 100644
--- a/src/cpu/minor/execute.cc
+++ b/src/cpu/minor/execute.cc
@@ -1062,8 +1062,6 @@ Execute::commitInst(MinorDynInstPtr inst, bool early_memory_issue,
             DPRINTF(MinorInterrupt, "Suspending thread: %d from Execute"
                 " inst: %s\n", thread_id, *inst);
 
-            // output both old and new stats
-            cpu.stats.numFetchSuspends++;
             cpu.fetchStats[thread_id]->numFetchSuspends++;
 
             updateBranchData(thread_id, BranchData::SuspendThread, inst,
diff --git a/src/cpu/minor/stats.cc b/src/cpu/minor/stats.cc
index 64d4c475e0..e9ca562c16 100644
--- a/src/cpu/minor/stats.cc
+++ b/src/cpu/minor/stats.cc
@@ -52,8 +52,6 @@ MinorStats::MinorStats(BaseCPU *base_cpu)
     ADD_STAT(numDiscardedOps, statistics::units::Count::get(),
              "Number of ops (including micro ops) which were discarded before "
              "commit"),
-    ADD_STAT(numFetchSuspends, statistics::units::Count::get(),
-             "Number of times Execute suspended instruction fetching"),
     ADD_STAT(quiesceCycles, statistics::units::Cycle::get(),
              "Total number of cycles that CPU has spent quiesced or waiting "
              "for an interrupt"),
diff --git a/src/cpu/minor/stats.hh b/src/cpu/minor/stats.hh
index 1ab81f4407..524d20f85d 100644
--- a/src/cpu/minor/stats.hh
+++ b/src/cpu/minor/stats.hh
@@ -68,9 +68,6 @@ struct MinorStats : public statistics::Group
     /** Number of ops discarded before committing */
     statistics::Scalar numDiscardedOps;
 
-    /** Number of times fetch was asked to suspend by Execute */
-    statistics::Scalar numFetchSuspends;
-
     /** Number of cycles in quiescent state */
     statistics::Scalar quiesceCycles;
 
diff --git a/src/cpu/simple/base.cc b/src/cpu/simple/base.cc
index 9e831a25f6..4f2665401f 100644
--- a/src/cpu/simple/base.cc
+++ b/src/cpu/simple/base.cc
@@ -431,8 +431,6 @@ BaseSimpleCPU::postExecute()
     }
 
     if (curStaticInst->isControl()) {
-        // output both old and new stats
-        ++t_info.execContextStats.numBranches;
         ++fetchStats[t_info.thread->threadId()]->numBranches;
     }
 
diff --git a/src/cpu/simple/exec_context.hh b/src/cpu/simple/exec_context.hh
index 31aa5d44c7..3b6593da9d 100644
--- a/src/cpu/simple/exec_context.hh
+++ b/src/cpu/simple/exec_context.hh
@@ -152,8 +152,6 @@ class SimpleExecContext : public ExecContext
                        "ICache total stall cycles"),
               ADD_STAT(dcacheStallCycles, statistics::units::Cycle::get(),
                        "DCache total stall cycles"),
-              ADD_STAT(numBranches, statistics::units::Count::get(),
-                       "Number of branches fetched"),
               ADD_STAT(numPredictedBranches, statistics::units::Count::get(),
                        "Number of branches predicted as taken"),
               ADD_STAT(numBranchMispred, statistics::units::Count::get(),
@@ -204,9 +202,6 @@ class SimpleExecContext : public ExecContext
             numIdleCycles = idleFraction * cpu->baseStats.numCycles;
             numBusyCycles = notIdleFraction * cpu->baseStats.numCycles;
 
-            numBranches
-                .prereq(numBranches);
-
             numPredictedBranches
                 .prereq(numPredictedBranches);
 
@@ -298,8 +293,6 @@ class SimpleExecContext : public ExecContext
         statistics::Scalar dcacheStallCycles;
 
         /// @{
-        /// Total number of branches fetched
-        statistics::Scalar numBranches;
         /// Number of branches predicted as taken
         statistics::Scalar numPredictedBranches;
         /// Number of misprediced branches

From 1cf1867ffa0c9498b04c8b961fcc73d6c45a85a7 Mon Sep 17 00:00:00 2001
From: Melissa Jost <melissakjost@gmail.com>
Date: Mon, 20 Mar 2023 00:17:38 -0700
Subject: [PATCH 375/492] cpu: Remove duplicated commit stats

This removes committedInstType and comittedControl from minor CPU,
stat branches from O3 commit stage, and O3 commit stats floating,
integer, loads, memRefs, and vectorInstructions.

Change-Id: I57abea0881eaaea52da3f365078d6b0e2ea1bfeb
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/69106
Tested-by: kokoro <noreply+kokoro@google.com>
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
---
 src/cpu/minor/execute.cc       | 36 -----------------------
 src/cpu/minor/stats.cc         | 16 +----------
 src/cpu/minor/stats.hh         |  6 ----
 src/cpu/o3/commit.cc           | 52 ----------------------------------
 src/cpu/o3/commit.hh           | 12 --------
 src/cpu/simple/base.cc         | 17 -----------
 src/cpu/simple/exec_context.hh | 39 -------------------------
 7 files changed, 1 insertion(+), 177 deletions(-)

diff --git a/src/cpu/minor/execute.cc b/src/cpu/minor/execute.cc
index 0d704c7135..a2f92683c4 100644
--- a/src/cpu/minor/execute.cc
+++ b/src/cpu/minor/execute.cc
@@ -882,46 +882,10 @@ Execute::doInstCommitAccounting(MinorDynInstPtr inst)
     thread->numOp++;
     thread->threadStats.numOps++;
     cpu.stats.numOps++;
-    // update both old and new stats
     cpu.commitStats[inst->id.threadId]->numOps++;
     cpu.baseStats.numOps++;
     cpu.commitStats[inst->id.threadId]
         ->committedInstType[inst->staticInst->opClass()]++;
-    cpu.stats.committedInstType[inst->id.threadId]
-                               [inst->staticInst->opClass()]++;
-
-    /** Add a count for every control instruction */
-    if (inst->staticInst->isControl()) {
-        if (inst->staticInst->isReturn()) {
-            cpu.stats.committedControl[inst->id.threadId]
-                        [gem5::StaticInstFlags::Flags::IsReturn]++;
-        }
-        if (inst->staticInst->isCall()) {
-            cpu.stats.committedControl[inst->id.threadId]
-                        [gem5::StaticInstFlags::Flags::IsCall]++;
-        }
-        if (inst->staticInst->isDirectCtrl()) {
-            cpu.stats.committedControl[inst->id.threadId]
-                        [gem5::StaticInstFlags::Flags::IsDirectControl]++;
-        }
-        if (inst->staticInst->isIndirectCtrl()) {
-            cpu.stats.committedControl[inst->id.threadId]
-                        [gem5::StaticInstFlags::Flags::IsIndirectControl]++;
-        }
-        if (inst->staticInst->isCondCtrl()) {
-            cpu.stats.committedControl[inst->id.threadId]
-                        [gem5::StaticInstFlags::Flags::IsCondControl]++;
-        }
-        if (inst->staticInst->isUncondCtrl()) {
-            cpu.stats.committedControl[inst->id.threadId]
-                        [gem5::StaticInstFlags::Flags::IsUncondControl]++;
-
-        }
-        cpu.stats.committedControl[inst->id.threadId]
-                        [gem5::StaticInstFlags::Flags::IsControl]++;
-    }
-
-
 
     /* Set the CP SeqNum to the numOps commit number */
     if (inst->traceData)
diff --git a/src/cpu/minor/stats.cc b/src/cpu/minor/stats.cc
index e9ca562c16..512a67bf36 100644
--- a/src/cpu/minor/stats.cc
+++ b/src/cpu/minor/stats.cc
@@ -60,11 +60,7 @@ MinorStats::MinorStats(BaseCPU *base_cpu)
              "CPI: cycles per instruction"),
     ADD_STAT(ipc, statistics::units::Rate<
                 statistics::units::Count, statistics::units::Cycle>::get(),
-             "IPC: instructions per cycle"),
-    ADD_STAT(committedInstType, statistics::units::Count::get(),
-             "Class of committed instruction"),
-    ADD_STAT(committedControl, statistics::units::Count::get(),
-             "Class of control type instructions committed")
+             "IPC: instructions per cycle")
 
 {
     quiesceCycles.prereq(quiesceCycles);
@@ -74,16 +70,6 @@ MinorStats::MinorStats(BaseCPU *base_cpu)
 
     ipc.precision(6);
     ipc = numInsts / base_cpu->baseStats.numCycles;
-
-    committedInstType
-        .init(base_cpu->numThreads, enums::Num_OpClass)
-        .flags(statistics::total | statistics::pdf | statistics::dist);
-    committedInstType.ysubnames(enums::OpClassStrings);
-
-    committedControl
-        .init(base_cpu->numThreads, StaticInstFlags::Flags::Num_Flags)
-        .flags(statistics::nozero);
-    committedControl.ysubnames(StaticInstFlags::FlagsStrings);
 }
 
 } // namespace minor
diff --git a/src/cpu/minor/stats.hh b/src/cpu/minor/stats.hh
index 524d20f85d..4ab8743c77 100644
--- a/src/cpu/minor/stats.hh
+++ b/src/cpu/minor/stats.hh
@@ -75,12 +75,6 @@ struct MinorStats : public statistics::Group
     statistics::Formula cpi;
     statistics::Formula ipc;
 
-    /** Number of instructions by type (OpClass) */
-    statistics::Vector2d committedInstType;
-
-    /** Number of branches commited */
-    statistics::Vector2d committedControl;
-
 };
 
 } // namespace minor
diff --git a/src/cpu/o3/commit.cc b/src/cpu/o3/commit.cc
index 82ecc0140e..5a0a6b21e3 100644
--- a/src/cpu/o3/commit.cc
+++ b/src/cpu/o3/commit.cc
@@ -160,21 +160,10 @@ Commit::CommitStats::CommitStats(CPU *cpu, Commit *commit)
                "Number of instructions committed"),
       ADD_STAT(opsCommitted, statistics::units::Count::get(),
                "Number of ops (including micro ops) committed"),
-      ADD_STAT(memRefs, statistics::units::Count::get(),
-               "Number of memory references committed"),
-      ADD_STAT(loads, statistics::units::Count::get(), "Number of loads committed"),
       ADD_STAT(amos, statistics::units::Count::get(),
                "Number of atomic instructions committed"),
       ADD_STAT(membars, statistics::units::Count::get(),
                "Number of memory barriers committed"),
-      ADD_STAT(branches, statistics::units::Count::get(),
-               "Number of branches committed"),
-      ADD_STAT(vectorInstructions, statistics::units::Count::get(),
-               "Number of committed Vector instructions."),
-      ADD_STAT(floating, statistics::units::Count::get(),
-               "Number of committed floating point instructions."),
-      ADD_STAT(integer, statistics::units::Count::get(),
-               "Number of committed integer instructions."),
       ADD_STAT(functionCalls, statistics::units::Count::get(),
                "Number of function calls committed."),
       ADD_STAT(committedInstType, statistics::units::Count::get(),
@@ -200,14 +189,6 @@ Commit::CommitStats::CommitStats(CPU *cpu, Commit *commit)
         .init(cpu->numThreads)
         .flags(total);
 
-    memRefs
-        .init(cpu->numThreads)
-        .flags(total);
-
-    loads
-        .init(cpu->numThreads)
-        .flags(total);
-
     amos
         .init(cpu->numThreads)
         .flags(total);
@@ -216,22 +197,6 @@ Commit::CommitStats::CommitStats(CPU *cpu, Commit *commit)
         .init(cpu->numThreads)
         .flags(total);
 
-    branches
-        .init(cpu->numThreads)
-        .flags(total);
-
-    vectorInstructions
-        .init(cpu->numThreads)
-        .flags(total);
-
-    floating
-        .init(cpu->numThreads)
-        .flags(total);
-
-    integer
-        .init(cpu->numThreads)
-        .flags(total);
-
     functionCalls
         .init(commit->numThreads)
         .flags(total);
@@ -1405,29 +1370,18 @@ Commit::updateComInstStats(const DynInstPtr &inst)
     //
     //  Control Instructions
     //
-    // update both old and new stats
     cpu->commitStats[tid]->updateComCtrlStats(inst->staticInst);
-    if (inst->isControl())
-        stats.branches[tid]++;
 
     //
     //  Memory references
     //
     if (inst->isMemRef()) {
-        // update both old and new stats
-        stats.memRefs[tid]++;
         cpu->commitStats[tid]->numMemRefs++;
 
         if (inst->isLoad()) {
-            // update both old and new stats
-            stats.loads[tid]++;
             cpu->commitStats[tid]->numLoadInsts++;
         }
 
-        if (inst->isAtomic()) {
-            stats.amos[tid]++;
-        }
-
         if (inst->isStore()) {
             cpu->commitStats[tid]->numStoreInsts++;
         }
@@ -1439,22 +1393,16 @@ Commit::updateComInstStats(const DynInstPtr &inst)
 
     // Integer Instruction
     if (inst->isInteger()) {
-        // update both old and new stats
         cpu->commitStats[tid]->numIntInsts++;
-        stats.integer[tid]++;
     }
 
     // Floating Point Instruction
     if (inst->isFloating()) {
-        // update both old and new stats
         cpu->commitStats[tid]->numFpInsts++;
-        stats.floating[tid]++;
     }
     // Vector Instruction
     if (inst->isVector()) {
-        // update both old and new stats
         cpu->commitStats[tid]->numVecInsts++;
-        stats.vectorInstructions[tid]++;
     }
 
     // Function Calls
diff --git a/src/cpu/o3/commit.hh b/src/cpu/o3/commit.hh
index cf4eaf5d92..6591360197 100644
--- a/src/cpu/o3/commit.hh
+++ b/src/cpu/o3/commit.hh
@@ -483,22 +483,10 @@ class Commit
         statistics::Vector instsCommitted;
         /** Total number of ops (including micro ops) committed. */
         statistics::Vector opsCommitted;
-        /** Stat for the total number of committed memory references. */
-        statistics::Vector memRefs;
-        /** Stat for the total number of committed loads. */
-        statistics::Vector loads;
         /** Stat for the total number of committed atomics. */
         statistics::Vector amos;
         /** Total number of committed memory barriers. */
         statistics::Vector membars;
-        /** Total number of committed branches. */
-        statistics::Vector branches;
-        /** Total number of vector instructions */
-        statistics::Vector vectorInstructions;
-        /** Total number of floating point instructions */
-        statistics::Vector floating;
-        /** Total number of integer instructions */
-        statistics::Vector integer;
         /** Total number of function calls */
         statistics::Vector functionCalls;
         /** Committed instructions by instruction type (OpClass) */
diff --git a/src/cpu/simple/base.cc b/src/cpu/simple/base.cc
index 4f2665401f..eeb927ffed 100644
--- a/src/cpu/simple/base.cc
+++ b/src/cpu/simple/base.cc
@@ -437,29 +437,23 @@ BaseSimpleCPU::postExecute()
     /* Power model statistics */
     //integer alu accesses
     if (curStaticInst->isInteger()){
-        // update both old and new stats
         executeStats[t_info.thread->threadId()]->numIntAluAccesses++;
         commitStats[t_info.thread->threadId()]->numIntInsts++;
         t_info.execContextStats.numIntAluAccesses++;
-        t_info.execContextStats.numIntInsts++;
     }
 
     //float alu accesses
     if (curStaticInst->isFloating()){
-        // update both old and new stats
         executeStats[t_info.thread->threadId()]->numFpAluAccesses++;
         commitStats[t_info.thread->threadId()]->numFpInsts++;
         t_info.execContextStats.numFpAluAccesses++;
-        t_info.execContextStats.numFpInsts++;
     }
 
     //vector alu accesses
     if (curStaticInst->isVector()){
-        // update both old and new stats
         executeStats[t_info.thread->threadId()]->numVecAluAccesses++;
         commitStats[t_info.thread->threadId()]->numVecInsts++;
         t_info.execContextStats.numVecAluAccesses++;
-        t_info.execContextStats.numVecInsts++;
     }
 
     //Matrix alu accesses
@@ -473,30 +467,19 @@ BaseSimpleCPU::postExecute()
         t_info.execContextStats.numCallsReturns++;
     }
 
-    //the number of branch predictions that will be made
-    if (curStaticInst->isCondCtrl()){
-        t_info.execContextStats.numCondCtrlInsts++;
-    }
-
     //result bus acceses
     if (curStaticInst->isLoad()){
-        // update both old and new stats
         commitStats[t_info.thread->threadId()]->numLoadInsts++;
-        t_info.execContextStats.numLoadInsts++;
     }
 
     if (curStaticInst->isStore() || curStaticInst->isAtomic()){
-        // update both old and new stats
         commitStats[t_info.thread->threadId()]->numStoreInsts++;
-        t_info.execContextStats.numStoreInsts++;
     }
     /* End power model statistics */
 
-    // update both old and new stats
     commitStats[t_info.thread->threadId()]
         ->committedInstType[curStaticInst->opClass()]++;
     commitStats[t_info.thread->threadId()]->updateComCtrlStats(curStaticInst);
-    t_info.execContextStats.statExecutedInstType[curStaticInst->opClass()]++;
 
     /* increment the committed numInsts and numOps stats */
     countCommitInst();
diff --git a/src/cpu/simple/exec_context.hh b/src/cpu/simple/exec_context.hh
index 3b6593da9d..78952cbe75 100644
--- a/src/cpu/simple/exec_context.hh
+++ b/src/cpu/simple/exec_context.hh
@@ -100,14 +100,6 @@ class SimpleExecContext : public ExecContext
                        "Number of matrix alu accesses"),
               ADD_STAT(numCallsReturns, statistics::units::Count::get(),
                        "Number of times a function call or return occured"),
-              ADD_STAT(numCondCtrlInsts, statistics::units::Count::get(),
-                       "Number of instructions that are conditional controls"),
-              ADD_STAT(numIntInsts, statistics::units::Count::get(),
-                       "Number of integer instructions"),
-              ADD_STAT(numFpInsts, statistics::units::Count::get(),
-                       "Number of float instructions"),
-              ADD_STAT(numVecInsts, statistics::units::Count::get(),
-                       "Number of vector instructions"),
               ADD_STAT(numMatInsts, statistics::units::Count::get(),
                        "Number of matrix instructions"),
               ADD_STAT(numIntRegReads, statistics::units::Count::get(),
@@ -136,10 +128,6 @@ class SimpleExecContext : public ExecContext
                        "Number of times the Misc registers were written"),
               ADD_STAT(numMemRefs, statistics::units::Count::get(),
                        "Number of memory refs"),
-              ADD_STAT(numLoadInsts, statistics::units::Count::get(),
-                       "Number of load instructions"),
-              ADD_STAT(numStoreInsts, statistics::units::Count::get(),
-                       "Number of store instructions"),
               ADD_STAT(numIdleCycles, statistics::units::Cycle::get(),
                        "Number of idle cycles"),
               ADD_STAT(numBusyCycles, statistics::units::Cycle::get(),
@@ -156,8 +144,6 @@ class SimpleExecContext : public ExecContext
                        "Number of branches predicted as taken"),
               ADD_STAT(numBranchMispred, statistics::units::Count::get(),
                        "Number of branch mispredictions"),
-              ADD_STAT(statExecutedInstType, statistics::units::Count::get(),
-                       "Class of executed instruction."),
               numRegReads{
                   &(cpu->executeStats[thread->threadId()]->numIntRegReads),
                   &(cpu->executeStats[thread->threadId()]->numFpRegReads),
@@ -190,14 +176,6 @@ class SimpleExecContext : public ExecContext
             dcacheStallCycles
                 .prereq(dcacheStallCycles);
 
-            statExecutedInstType
-                .init(enums::Num_OpClass)
-                .flags(statistics::total | statistics::pdf | statistics::dist);
-
-            for (unsigned i = 0; i < Num_OpClasses; ++i) {
-                statExecutedInstType.subname(i, enums::OpClassStrings[i]);
-            }
-
             idleFraction = statistics::constant(1.0) - notIdleFraction;
             numIdleCycles = idleFraction * cpu->baseStats.numCycles;
             numBusyCycles = notIdleFraction * cpu->baseStats.numCycles;
@@ -228,18 +206,6 @@ class SimpleExecContext : public ExecContext
         // Number of function calls/returns
         statistics::Scalar numCallsReturns;
 
-        // Conditional control instructions;
-        statistics::Scalar numCondCtrlInsts;
-
-        // Number of int instructions
-        statistics::Scalar numIntInsts;
-
-        // Number of float instructions
-        statistics::Scalar numFpInsts;
-
-        // Number of vector instructions
-        statistics::Scalar numVecInsts;
-
         // Number of matrix instructions
         statistics::Scalar numMatInsts;
 
@@ -273,8 +239,6 @@ class SimpleExecContext : public ExecContext
 
         // Number of simulated memory references
         statistics::Scalar numMemRefs;
-        statistics::Scalar numLoadInsts;
-        statistics::Scalar numStoreInsts;
 
         // Number of idle cycles
         statistics::Formula numIdleCycles;
@@ -299,9 +263,6 @@ class SimpleExecContext : public ExecContext
         statistics::Scalar numBranchMispred;
         /// @}
 
-        // Instruction mix histogram by OpClass
-        statistics::Vector statExecutedInstType;
-
         std::array<statistics::Scalar *, CCRegClass + 1> numRegReads;
         std::array<statistics::Scalar *, CCRegClass + 1> numRegWrites;
 

From 7403a298cc2dc8f928cda42a3726d29dd1473d78 Mon Sep 17 00:00:00 2001
From: Melissa Jost <melissakjost@gmail.com>
Date: Mon, 20 Mar 2023 00:34:31 -0700
Subject: [PATCH 376/492] cpu: Remove duplicated execute stats

This removes ccRegfileReads, ccRegfileWrites, fpRegfileReads,
fpRegfileWrites, intRegfileReads, intRegfileWrites, miscRegfileReads,
miscRegfileWrites, vecPredRegfileReads, vecPredRegfileWrites,
vecRegfileReads, and vecRegfileWrites are removed from cpu.hh and
cpu.cc in O3CPU. The corresponding stats in BaseCPU::ExecuteCPUStats
are used instead. Changed the getReg, getWritableReg, and setReg
functions in the O3 CPU object to take the thread ID as a parameter.
This is because the stats in base are stored in vectors that are
indexed by the thread ID.

The stats moved from SimpleCPU are dcacheStallCycles,
icacheStallCycles, numCCRegReads, numCCRegWrites, numFpAluAccesses,
numFpRegReads, numFpRegWrites, numIntAluAccesses, numIntRegReads,
numIntRegWrites, numMemRefs, numMiscRegReads, numMiscRegWrites,
numVecAluAccesses, numVecPredRegReads, numVecPredRegWrites,
numVecRegReads, numVecRegWrites.

The stat moved from MinorCPU is numDiscardedOps.

Change-Id: I843af63b3db639858083bdea708de961f23b3048
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/69107
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
---
 src/cpu/minor/execute.cc       |   2 -
 src/cpu/minor/stats.cc         |   3 -
 src/cpu/minor/stats.hh         |   3 -
 src/cpu/o3/commit.cc           |  16 ---
 src/cpu/o3/commit.hh           |   4 -
 src/cpu/o3/cpu.cc              | 235 +--------------------------------
 src/cpu/o3/cpu.hh              |  44 ------
 src/cpu/o3/dyn_inst.hh         |  17 +--
 src/cpu/o3/fetch.cc            |  33 +----
 src/cpu/o3/fetch.hh            |  10 --
 src/cpu/o3/iew.cc              |  50 +------
 src/cpu/o3/iew.hh              |  14 --
 src/cpu/simple/base.cc         |   5 -
 src/cpu/simple/exec_context.hh |  98 --------------
 14 files changed, 4 insertions(+), 530 deletions(-)

diff --git a/src/cpu/minor/execute.cc b/src/cpu/minor/execute.cc
index a2f92683c4..5df00d3f39 100644
--- a/src/cpu/minor/execute.cc
+++ b/src/cpu/minor/execute.cc
@@ -1340,8 +1340,6 @@ Execute::commit(ThreadID thread_id, bool only_commit_microops, bool discard,
                 *inst, ex_info.streamSeqNum);
 
             if (fault == NoFault) {
-                // output both old and new stats
-                cpu.stats.numDiscardedOps++;
                 cpu.executeStats[thread_id]->numDiscardedOps++;
             }
         }
diff --git a/src/cpu/minor/stats.cc b/src/cpu/minor/stats.cc
index 512a67bf36..818db8c360 100644
--- a/src/cpu/minor/stats.cc
+++ b/src/cpu/minor/stats.cc
@@ -49,9 +49,6 @@ MinorStats::MinorStats(BaseCPU *base_cpu)
              "Number of instructions committed"),
     ADD_STAT(numOps, statistics::units::Count::get(),
              "Number of ops (including micro ops) committed"),
-    ADD_STAT(numDiscardedOps, statistics::units::Count::get(),
-             "Number of ops (including micro ops) which were discarded before "
-             "commit"),
     ADD_STAT(quiesceCycles, statistics::units::Cycle::get(),
              "Total number of cycles that CPU has spent quiesced or waiting "
              "for an interrupt"),
diff --git a/src/cpu/minor/stats.hh b/src/cpu/minor/stats.hh
index 4ab8743c77..f7d5e71dfa 100644
--- a/src/cpu/minor/stats.hh
+++ b/src/cpu/minor/stats.hh
@@ -65,9 +65,6 @@ struct MinorStats : public statistics::Group
     /** Number of simulated insts and microops */
     statistics::Scalar numOps;
 
-    /** Number of ops discarded before committing */
-    statistics::Scalar numDiscardedOps;
-
     /** Number of cycles in quiescent state */
     statistics::Scalar quiesceCycles;
 
diff --git a/src/cpu/o3/commit.cc b/src/cpu/o3/commit.cc
index 5a0a6b21e3..266e59e2e3 100644
--- a/src/cpu/o3/commit.cc
+++ b/src/cpu/o3/commit.cc
@@ -156,10 +156,6 @@ Commit::CommitStats::CommitStats(CPU *cpu, Commit *commit)
                "The number of times a branch was mispredicted"),
       ADD_STAT(numCommittedDist, statistics::units::Count::get(),
                "Number of insts commited each cycle"),
-      ADD_STAT(instsCommitted, statistics::units::Count::get(),
-               "Number of instructions committed"),
-      ADD_STAT(opsCommitted, statistics::units::Count::get(),
-               "Number of ops (including micro ops) committed"),
       ADD_STAT(amos, statistics::units::Count::get(),
                "Number of atomic instructions committed"),
       ADD_STAT(membars, statistics::units::Count::get(),
@@ -181,14 +177,6 @@ Commit::CommitStats::CommitStats(CPU *cpu, Commit *commit)
         .init(0,commit->commitWidth,1)
         .flags(statistics::pdf);
 
-    instsCommitted
-        .init(cpu->numThreads)
-        .flags(total);
-
-    opsCommitted
-        .init(cpu->numThreads)
-        .flags(total);
-
     amos
         .init(cpu->numThreads)
         .flags(total);
@@ -1351,13 +1339,9 @@ Commit::updateComInstStats(const DynInstPtr &inst)
     ThreadID tid = inst->threadNumber;
 
     if (!inst->isMicroop() || inst->isLastMicroop()) {
-        // update both old and new stats
-        stats.instsCommitted[tid]++;
         cpu->commitStats[tid]->numInsts++;
         cpu->baseStats.numInsts++;
     }
-    // update both old and new stats
-    stats.opsCommitted[tid]++;
     cpu->commitStats[tid]->numOps++;
     cpu->baseStats.numOps++;
 
diff --git a/src/cpu/o3/commit.hh b/src/cpu/o3/commit.hh
index 6591360197..eccd023d45 100644
--- a/src/cpu/o3/commit.hh
+++ b/src/cpu/o3/commit.hh
@@ -479,10 +479,6 @@ class Commit
         /** Distribution of the number of committed instructions each cycle. */
         statistics::Distribution numCommittedDist;
 
-        /** Total number of instructions committed. */
-        statistics::Vector instsCommitted;
-        /** Total number of ops (including micro ops) committed. */
-        statistics::Vector opsCommitted;
         /** Stat for the total number of committed atomics. */
         statistics::Vector amos;
         /** Total number of committed memory barriers. */
diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc
index 444692d47f..85cc3dbf71 100644
--- a/src/cpu/o3/cpu.cc
+++ b/src/cpu/o3/cpu.cc
@@ -328,47 +328,7 @@ CPU::CPUStats::CPUStats(CPU *cpu)
                "to idling"),
       ADD_STAT(quiesceCycles, statistics::units::Cycle::get(),
                "Total number of cycles that CPU has spent quiesced or waiting "
-               "for an interrupt"),
-      ADD_STAT(committedInsts, statistics::units::Count::get(),
-               "Number of Instructions Simulated"),
-      ADD_STAT(committedOps, statistics::units::Count::get(),
-               "Number of Ops (including micro ops) Simulated"),
-      ADD_STAT(cpi, statistics::units::Rate<
-                    statistics::units::Cycle, statistics::units::Count>::get(),
-               "CPI: Cycles Per Instruction"),
-      ADD_STAT(totalCpi, statistics::units::Rate<
-                    statistics::units::Cycle, statistics::units::Count>::get(),
-               "CPI: Total CPI of All Threads"),
-      ADD_STAT(ipc, statistics::units::Rate<
-                    statistics::units::Count, statistics::units::Cycle>::get(),
-               "IPC: Instructions Per Cycle"),
-      ADD_STAT(totalIpc, statistics::units::Rate<
-                    statistics::units::Count, statistics::units::Cycle>::get(),
-               "IPC: Total IPC of All Threads"),
-      ADD_STAT(intRegfileReads, statistics::units::Count::get(),
-               "Number of integer regfile reads"),
-      ADD_STAT(intRegfileWrites, statistics::units::Count::get(),
-               "Number of integer regfile writes"),
-      ADD_STAT(fpRegfileReads, statistics::units::Count::get(),
-               "Number of floating regfile reads"),
-      ADD_STAT(fpRegfileWrites, statistics::units::Count::get(),
-               "Number of floating regfile writes"),
-      ADD_STAT(vecRegfileReads, statistics::units::Count::get(),
-               "number of vector regfile reads"),
-      ADD_STAT(vecRegfileWrites, statistics::units::Count::get(),
-               "number of vector regfile writes"),
-      ADD_STAT(vecPredRegfileReads, statistics::units::Count::get(),
-               "number of predicate regfile reads"),
-      ADD_STAT(vecPredRegfileWrites, statistics::units::Count::get(),
-               "number of predicate regfile writes"),
-      ADD_STAT(ccRegfileReads, statistics::units::Count::get(),
-               "number of cc regfile reads"),
-      ADD_STAT(ccRegfileWrites, statistics::units::Count::get(),
-               "number of cc regfile writes"),
-      ADD_STAT(miscRegfileReads, statistics::units::Count::get(),
-               "number of misc regfile reads"),
-      ADD_STAT(miscRegfileWrites, statistics::units::Count::get(),
-               "number of misc regfile writes")
+               "for an interrupt")
 {
     // Register any of the O3CPU's stats here.
     timesIdled
@@ -379,70 +339,6 @@ CPU::CPUStats::CPUStats(CPU *cpu)
 
     quiesceCycles
         .prereq(quiesceCycles);
-
-    // Number of Instructions simulated
-    // --------------------------------
-    // Should probably be in Base CPU but need templated
-    // MaxThreads so put in here instead
-    committedInsts
-        .init(cpu->numThreads)
-        .flags(statistics::total);
-
-    committedOps
-        .init(cpu->numThreads)
-        .flags(statistics::total);
-
-    cpi
-        .precision(6);
-    cpi = cpu->baseStats.numCycles / committedInsts;
-
-    totalCpi
-        .precision(6);
-    totalCpi = cpu->baseStats.numCycles / sum(committedInsts);
-
-    ipc
-        .precision(6);
-    ipc = committedInsts / cpu->baseStats.numCycles;
-
-    totalIpc
-        .precision(6);
-    totalIpc = sum(committedInsts) / cpu->baseStats.numCycles;
-
-    intRegfileReads
-        .prereq(intRegfileReads);
-
-    intRegfileWrites
-        .prereq(intRegfileWrites);
-
-    fpRegfileReads
-        .prereq(fpRegfileReads);
-
-    fpRegfileWrites
-        .prereq(fpRegfileWrites);
-
-    vecRegfileReads
-        .prereq(vecRegfileReads);
-
-    vecRegfileWrites
-        .prereq(vecRegfileWrites);
-
-    vecPredRegfileReads
-        .prereq(vecPredRegfileReads);
-
-    vecPredRegfileWrites
-        .prereq(vecPredRegfileWrites);
-
-    ccRegfileReads
-        .prereq(ccRegfileReads);
-
-    ccRegfileWrites
-        .prereq(ccRegfileWrites);
-
-    miscRegfileReads
-        .prereq(miscRegfileReads);
-
-    miscRegfileWrites
-        .prereq(miscRegfileWrites);
 }
 
 void
@@ -1019,9 +915,6 @@ CPU::readMiscRegNoEffect(int misc_reg, ThreadID tid) const
 RegVal
 CPU::readMiscReg(int misc_reg, ThreadID tid)
 {
-    // output both old and new stats, keep
-    // return value the same
-    cpuStats.miscRegfileReads++;
     executeStats[tid]->numMiscRegReads++;
     return isa[tid]->readMiscReg(misc_reg);
 }
@@ -1035,132 +928,10 @@ CPU::setMiscRegNoEffect(int misc_reg, RegVal val, ThreadID tid)
 void
 CPU::setMiscReg(int misc_reg, RegVal val, ThreadID tid)
 {
-    // output both old and new stats
-    cpuStats.miscRegfileWrites++;
     executeStats[tid]->numMiscRegWrites++;
     isa[tid]->setMiscReg(misc_reg, val);
 }
 
-RegVal
-CPU::getReg(PhysRegIdPtr phys_reg)
-{
-    switch (phys_reg->classValue()) {
-      case IntRegClass:
-        cpuStats.intRegfileReads++;
-        break;
-      case FloatRegClass:
-        cpuStats.fpRegfileReads++;
-        break;
-      case CCRegClass:
-        cpuStats.ccRegfileReads++;
-        break;
-      case VecRegClass:
-      case VecElemClass:
-        cpuStats.vecRegfileReads++;
-        break;
-      case VecPredRegClass:
-        cpuStats.vecPredRegfileReads++;
-        break;
-      default:
-        break;
-    }
-    return regFile.getReg(phys_reg);
-}
-
-void
-CPU::getReg(PhysRegIdPtr phys_reg, void *val)
-{
-    switch (phys_reg->classValue()) {
-      case IntRegClass:
-        cpuStats.intRegfileReads++;
-        break;
-      case FloatRegClass:
-        cpuStats.fpRegfileReads++;
-        break;
-      case CCRegClass:
-        cpuStats.ccRegfileReads++;
-        break;
-      case VecRegClass:
-      case VecElemClass:
-        cpuStats.vecRegfileReads++;
-        break;
-      case VecPredRegClass:
-        cpuStats.vecPredRegfileReads++;
-        break;
-      default:
-        break;
-    }
-    regFile.getReg(phys_reg, val);
-}
-
-void *
-CPU::getWritableReg(PhysRegIdPtr phys_reg)
-{
-    switch (phys_reg->classValue()) {
-      case VecRegClass:
-        cpuStats.vecRegfileReads++;
-        break;
-      case VecPredRegClass:
-        cpuStats.vecPredRegfileReads++;
-        break;
-      default:
-        break;
-    }
-    return regFile.getWritableReg(phys_reg);
-}
-
-void
-CPU::setReg(PhysRegIdPtr phys_reg, RegVal val)
-{
-    switch (phys_reg->classValue()) {
-      case IntRegClass:
-        cpuStats.intRegfileWrites++;
-        break;
-      case FloatRegClass:
-        cpuStats.fpRegfileWrites++;
-        break;
-      case CCRegClass:
-        cpuStats.ccRegfileWrites++;
-        break;
-      case VecRegClass:
-      case VecElemClass:
-        cpuStats.vecRegfileWrites++;
-        break;
-      case VecPredRegClass:
-        cpuStats.vecPredRegfileWrites++;
-        break;
-      default:
-        break;
-    }
-    regFile.setReg(phys_reg, val);
-}
-
-void
-CPU::setReg(PhysRegIdPtr phys_reg, const void *val)
-{
-    switch (phys_reg->classValue()) {
-      case IntRegClass:
-        cpuStats.intRegfileWrites++;
-        break;
-      case FloatRegClass:
-        cpuStats.fpRegfileWrites++;
-        break;
-      case CCRegClass:
-        cpuStats.ccRegfileWrites++;
-        break;
-      case VecRegClass:
-      case VecElemClass:
-        cpuStats.vecRegfileWrites++;
-        break;
-      case VecPredRegClass:
-        cpuStats.vecPredRegfileWrites++;
-        break;
-      default:
-        break;
-    }
-    regFile.setReg(phys_reg, val);
-}
-
 RegVal
 CPU::getReg(PhysRegIdPtr phys_reg, ThreadID tid)
 {
@@ -1353,19 +1124,15 @@ CPU::instDone(ThreadID tid, const DynInstPtr &inst)
 {
     // Keep an instruction count.
     if (!inst->isMicroop() || inst->isLastMicroop()) {
-        // update both old and new stats
         thread[tid]->numInst++;
         thread[tid]->threadStats.numInsts++;
-        cpuStats.committedInsts[tid]++;
         commitStats[tid]->numInstsNotNOP++;
 
         // Check for instruction-count-based events.
         thread[tid]->comInstEventQueue.serviceEvents(thread[tid]->numInst);
     }
-    // update both old and new stats
     thread[tid]->numOp++;
     thread[tid]->threadStats.numOps++;
-    cpuStats.committedOps[tid]++;
     commitStats[tid]->numOpsNotNOP++;
 
     probeInstCommit(inst->staticInst, inst->pcState().instAddr());
diff --git a/src/cpu/o3/cpu.hh b/src/cpu/o3/cpu.hh
index d6317d6ea2..1d100ab330 100644
--- a/src/cpu/o3/cpu.hh
+++ b/src/cpu/o3/cpu.hh
@@ -310,18 +310,6 @@ class CPU : public BaseCPU
      */
     void setMiscReg(int misc_reg, RegVal val, ThreadID tid);
 
-    RegVal getReg(PhysRegIdPtr phys_reg);
-    void getReg(PhysRegIdPtr phys_reg, void *val);
-    void *getWritableReg(PhysRegIdPtr phys_reg);
-
-    void setReg(PhysRegIdPtr phys_reg, RegVal val);
-    void setReg(PhysRegIdPtr phys_reg, const void *val);
-
-    /** These functions are duplicated so that one set
-     * doesn't use thread ID, while the other does.
-     * This allows us to still output both old and
-     * new versions of the stats.
-    */
     RegVal getReg(PhysRegIdPtr phys_reg, ThreadID tid);
     void getReg(PhysRegIdPtr phys_reg, void *val, ThreadID tid);
     void *getWritableReg(PhysRegIdPtr phys_reg, ThreadID tid);
@@ -593,38 +581,6 @@ class CPU : public BaseCPU
         /** Stat for total number of cycles the CPU spends descheduled due to a
          * quiesce operation or waiting for an interrupt. */
         statistics::Scalar quiesceCycles;
-        /** Stat for the number of committed instructions per thread. */
-        statistics::Vector committedInsts;
-        /** Stat for the number of committed ops (including micro ops) per
-         *  thread. */
-        statistics::Vector committedOps;
-        /** Stat for the CPI per thread. */
-        statistics::Formula cpi;
-        /** Stat for the total CPI. */
-        statistics::Formula totalCpi;
-        /** Stat for the IPC per thread. */
-        statistics::Formula ipc;
-        /** Stat for the total IPC. */
-        statistics::Formula totalIpc;
-
-        //number of integer register file accesses
-        statistics::Scalar intRegfileReads;
-        statistics::Scalar intRegfileWrites;
-        //number of float register file accesses
-        statistics::Scalar fpRegfileReads;
-        statistics::Scalar fpRegfileWrites;
-        //number of vector register file accesses
-        mutable statistics::Scalar vecRegfileReads;
-        statistics::Scalar vecRegfileWrites;
-        //number of predicate register file accesses
-        mutable statistics::Scalar vecPredRegfileReads;
-        statistics::Scalar vecPredRegfileWrites;
-        //number of CC register file accesses
-        statistics::Scalar ccRegfileReads;
-        statistics::Scalar ccRegfileWrites;
-        //number of misc
-        statistics::Scalar miscRegfileReads;
-        statistics::Scalar miscRegfileWrites;
     } cpuStats;
 
   public:
diff --git a/src/cpu/o3/dyn_inst.hh b/src/cpu/o3/dyn_inst.hh
index 4f762b4551..63bf5ac59d 100644
--- a/src/cpu/o3/dyn_inst.hh
+++ b/src/cpu/o3/dyn_inst.hh
@@ -1085,15 +1085,10 @@ class DynInst : public ExecContext, public RefCounted
                 continue;
 
             if (bytes == sizeof(RegVal)) {
-                // call both old and new functions
-                setRegOperand(staticInst.get(), idx,
-                        cpu->getReg(prev_phys_reg));
                 setRegOperand(staticInst.get(), idx,
                         cpu->getReg(prev_phys_reg, threadNumber));
             } else {
                 uint8_t val[original_dest_reg.regClass().regBytes()];
-                // call both old and new functions
-                cpu->getReg(prev_phys_reg, val);
                 cpu->getReg(prev_phys_reg, val, threadNumber);
                 setRegOperand(staticInst.get(), idx, val);
             }
@@ -1121,9 +1116,7 @@ class DynInst : public ExecContext, public RefCounted
         const PhysRegIdPtr reg = renamedSrcIdx(idx);
         if (reg->is(InvalidRegClass))
             return 0;
-        // call new function, only return old value
-        cpu->getReg(reg, threadNumber);
-        return cpu->getReg(reg);
+        return cpu->getReg(reg, threadNumber);
     }
 
     void
@@ -1132,17 +1125,13 @@ class DynInst : public ExecContext, public RefCounted
         const PhysRegIdPtr reg = renamedSrcIdx(idx);
         if (reg->is(InvalidRegClass))
             return;
-        // call both old and new function
-        cpu->getReg(reg, val);
         cpu->getReg(reg, val, threadNumber);
     }
 
     void *
     getWritableRegOperand(const StaticInst *si, int idx) override
     {
-        // call both old and new function
         return cpu->getWritableReg(renamedDestIdx(idx), threadNumber);
-        return cpu->getWritableReg(renamedDestIdx(idx));
     }
 
     /** @todo: Make results into arrays so they can handle multiple dest
@@ -1154,8 +1143,6 @@ class DynInst : public ExecContext, public RefCounted
         const PhysRegIdPtr reg = renamedDestIdx(idx);
         if (reg->is(InvalidRegClass))
             return;
-        // call both old and new functions
-        cpu->setReg(reg, val);
         cpu->setReg(reg, val, threadNumber);
         setResult(reg->regClass(), val);
     }
@@ -1166,8 +1153,6 @@ class DynInst : public ExecContext, public RefCounted
         const PhysRegIdPtr reg = renamedDestIdx(idx);
         if (reg->is(InvalidRegClass))
             return;
-        // call both old and new functions
-        cpu->setReg(reg, val);
         cpu->setReg(reg, val, threadNumber);
         setResult(reg->regClass(), val);
     }
diff --git a/src/cpu/o3/fetch.cc b/src/cpu/o3/fetch.cc
index 89d1b81197..8cd84cbf05 100644
--- a/src/cpu/o3/fetch.cc
+++ b/src/cpu/o3/fetch.cc
@@ -158,12 +158,6 @@ Fetch::regProbePoints()
 
 Fetch::FetchStatGroup::FetchStatGroup(CPU *cpu, Fetch *fetch)
     : statistics::Group(cpu, "fetch"),
-    ADD_STAT(icacheStallCycles, statistics::units::Cycle::get(),
-             "Number of cycles fetch is stalled on an Icache miss"),
-    ADD_STAT(insts, statistics::units::Count::get(),
-             "Number of instructions fetch has processed"),
-    ADD_STAT(branches, statistics::units::Count::get(),
-             "Number of branches that fetch encountered"),
     ADD_STAT(predictedBranches, statistics::units::Count::get(),
              "Number of branches that fetch has predicted taken"),
     ADD_STAT(cycles, statistics::units::Cycle::get(),
@@ -200,21 +194,8 @@ Fetch::FetchStatGroup::FetchStatGroup(CPU *cpu, Fetch *fetch)
              "Number of instructions fetched each cycle (Total)"),
     ADD_STAT(idleRate, statistics::units::Ratio::get(),
              "Ratio of cycles fetch was idle",
-             idleCycles / cpu->baseStats.numCycles),
-    ADD_STAT(branchRate, statistics::units::Ratio::get(),
-             "Number of branch fetches per cycle",
-             branches / cpu->baseStats.numCycles),
-    ADD_STAT(rate, statistics::units::Rate<
-                    statistics::units::Count, statistics::units::Cycle>::get(),
-             "Number of inst fetches per cycle",
-             insts / cpu->baseStats.numCycles)
+             idleCycles / cpu->baseStats.numCycles)
 {
-        icacheStallCycles
-            .prereq(icacheStallCycles);
-        insts
-            .prereq(insts);
-        branches
-            .prereq(branches);
         predictedBranches
             .prereq(predictedBranches);
         cycles
@@ -252,10 +233,6 @@ Fetch::FetchStatGroup::FetchStatGroup(CPU *cpu, Fetch *fetch)
             .flags(statistics::pdf);
         idleRate
             .prereq(idleRate);
-        branchRate
-            .flags(statistics::total);
-        rate
-            .flags(statistics::total);
 }
 void
 Fetch::setTimeBuffer(TimeBuffer<TimeStruct> *time_buffer)
@@ -540,9 +517,7 @@ Fetch::lookupAndUpdateNextPC(const DynInstPtr &inst, PCStateBase &next_pc)
     inst->setPredTarg(next_pc);
     inst->setPredTaken(predict_taken);
 
-    // update both old and new stats
     cpu->fetchStats[tid]->numBranches++;
-    ++fetchStats.branches;
 
     if (predict_taken) {
         ++fetchStats.predictedBranches;
@@ -1148,8 +1123,6 @@ Fetch::fetch(bool &status_change)
             fetchCacheLine(fetchAddr, tid, this_pc.instAddr());
 
             if (fetchStatus[tid] == IcacheWaitResponse) {
-                // update both old and new stats
-                ++fetchStats.icacheStallCycles;
                 cpu->fetchStats[tid]->icacheStallCycles++;
             }
             else if (fetchStatus[tid] == ItlbWait)
@@ -1247,8 +1220,6 @@ Fetch::fetch(bool &status_change)
                     staticInst = dec_ptr->decode(this_pc);
 
                     // Increment stat of fetched instructions.
-                    // Update both old and new stats
-                    ++fetchStats.insts;
                     cpu->fetchStats[tid]->numInsts++;
 
                     if (staticInst->isMacroop()) {
@@ -1579,9 +1550,7 @@ Fetch::profileStall(ThreadID tid)
         ++fetchStats.squashCycles;
         DPRINTF(Fetch, "[tid:%i] Fetch is squashing!\n", tid);
     } else if (fetchStatus[tid] == IcacheWaitResponse) {
-        // update both old and new stats
         cpu->fetchStats[tid]->icacheStallCycles++;
-        ++fetchStats.icacheStallCycles;
         DPRINTF(Fetch, "[tid:%i] Fetch is waiting cache response!\n",
                 tid);
     } else if (fetchStatus[tid] == ItlbWait) {
diff --git a/src/cpu/o3/fetch.hh b/src/cpu/o3/fetch.hh
index cd311913f5..6add31444d 100644
--- a/src/cpu/o3/fetch.hh
+++ b/src/cpu/o3/fetch.hh
@@ -533,12 +533,6 @@ class Fetch
         FetchStatGroup(CPU *cpu, Fetch *fetch);
         // @todo: Consider making these
         // vectors and tracking on a per thread basis.
-        /** Stat for total number of cycles stalled due to an icache miss. */
-        statistics::Scalar icacheStallCycles;
-        /** Stat for total number of fetched instructions. */
-        statistics::Scalar insts;
-        /** Total number of fetched branches. */
-        statistics::Scalar branches;
         /** Stat for total number of predicted branches. */
         statistics::Scalar predictedBranches;
         /** Stat for total number of cycles spent fetching. */
@@ -581,10 +575,6 @@ class Fetch
         statistics::Distribution nisnDist;
         /** Rate of how often fetch was idle. */
         statistics::Formula idleRate;
-        /** Number of branch fetches per cycle. */
-        statistics::Formula branchRate;
-        /** Number of instruction fetched per cycle. */
-        statistics::Formula rate;
     } fetchStats;
 };
 
diff --git a/src/cpu/o3/iew.cc b/src/cpu/o3/iew.cc
index 1b3598cea7..a8acb4c762 100644
--- a/src/cpu/o3/iew.cc
+++ b/src/cpu/o3/iew.cc
@@ -217,52 +217,14 @@ IEW::IEWStats::IEWStats(CPU *cpu)
 
 IEW::IEWStats::ExecutedInstStats::ExecutedInstStats(CPU *cpu)
     : statistics::Group(cpu),
-    ADD_STAT(numInsts, statistics::units::Count::get(),
-             "Number of executed instructions"),
-    ADD_STAT(numLoadInsts, statistics::units::Count::get(),
-             "Number of load instructions executed"),
     ADD_STAT(numSquashedInsts, statistics::units::Count::get(),
              "Number of squashed instructions skipped in execute"),
     ADD_STAT(numSwp, statistics::units::Count::get(),
-             "Number of swp insts executed"),
-    ADD_STAT(numNop, statistics::units::Count::get(),
-             "Number of nop insts executed"),
-    ADD_STAT(numRefs, statistics::units::Count::get(),
-             "Number of memory reference insts executed"),
-    ADD_STAT(numBranches, statistics::units::Count::get(),
-             "Number of branches executed"),
-    ADD_STAT(numStoreInsts, statistics::units::Count::get(),
-             "Number of stores executed"),
-    ADD_STAT(numRate, statistics::units::Rate<
-                statistics::units::Count, statistics::units::Cycle>::get(),
-             "Inst execution rate", numInsts / cpu->baseStats.numCycles)
+             "Number of swp insts executed")
 {
-    numLoadInsts
-        .init(cpu->numThreads)
-        .flags(statistics::total);
-
     numSwp
         .init(cpu->numThreads)
         .flags(statistics::total);
-
-    numNop
-        .init(cpu->numThreads)
-        .flags(statistics::total);
-
-    numRefs
-        .init(cpu->numThreads)
-        .flags(statistics::total);
-
-    numBranches
-        .init(cpu->numThreads)
-        .flags(statistics::total);
-
-    numStoreInsts
-        .flags(statistics::total);
-    numStoreInsts = numRefs - numLoadInsts;
-
-    numRate
-        .flags(statistics::total);
 }
 
 void
@@ -1053,8 +1015,6 @@ IEW::dispatchInsts(ThreadID tid)
 
             instQueue.recordProducer(inst);
 
-            // update both old and new stats
-            iewStats.executedInstStats.numNop[tid]++;
             cpu->executeStats[tid]->numNop++;
 
             add_to_iq = false;
@@ -1563,8 +1523,6 @@ IEW::updateExeInstStats(const DynInstPtr& inst)
 {
     ThreadID tid = inst->threadNumber;
 
-    // update both old and new stats
-    iewStats.executedInstStats.numInsts++;
     cpu->executeStats[tid]->numInsts++;
 
 #if TRACING_ON
@@ -1577,8 +1535,6 @@ IEW::updateExeInstStats(const DynInstPtr& inst)
     //  Control operations
     //
     if (inst->isControl()) {
-        // update both old and new stats
-        iewStats.executedInstStats.numBranches[tid]++;
         cpu->executeStats[tid]->numBranches++;
     }
 
@@ -1586,13 +1542,9 @@ IEW::updateExeInstStats(const DynInstPtr& inst)
     //  Memory operations
     //
     if (inst->isMemRef()) {
-        // update both old and new stats
-        iewStats.executedInstStats.numRefs[tid]++;
         cpu->executeStats[tid]->numMemRefs++;
 
         if (inst->isLoad()) {
-            // update both old and new stats
-            iewStats.executedInstStats.numLoadInsts[tid]++;
             cpu->executeStats[tid]->numLoadInsts++;
         }
     }
diff --git a/src/cpu/o3/iew.hh b/src/cpu/o3/iew.hh
index 80fed295df..4fe8227dcc 100644
--- a/src/cpu/o3/iew.hh
+++ b/src/cpu/o3/iew.hh
@@ -455,25 +455,11 @@ class IEW
         {
             ExecutedInstStats(CPU *cpu);
 
-            /** Stat for total number of executed instructions. */
-            statistics::Scalar numInsts;
-            /** Stat for total number of executed load instructions. */
-            statistics::Vector numLoadInsts;
             /** Stat for total number of squashed instructions skipped at
              *  execute. */
             statistics::Scalar numSquashedInsts;
             /** Number of executed software prefetches. */
             statistics::Vector numSwp;
-            /** Number of executed nops. */
-            statistics::Vector numNop;
-            /** Number of executed meomory references. */
-            statistics::Vector numRefs;
-            /** Number of executed branches. */
-            statistics::Vector numBranches;
-            /** Number of executed store instructions. */
-            statistics::Formula numStoreInsts;
-            /** Number of instructions executed per cycle. */
-            statistics::Formula numRate;
         } executedInstStats;
 
         /** Number of instructions sent to commit. */
diff --git a/src/cpu/simple/base.cc b/src/cpu/simple/base.cc
index eeb927ffed..ca86b0b412 100644
--- a/src/cpu/simple/base.cc
+++ b/src/cpu/simple/base.cc
@@ -421,8 +421,6 @@ BaseSimpleCPU::postExecute()
     Addr instAddr = threadContexts[curThread]->pcState().instAddr();
 
     if (curStaticInst->isMemRef()) {
-        // update both old and new stats
-        t_info.execContextStats.numMemRefs++;
         executeStats[t_info.thread->threadId()]->numMemRefs++;
     }
 
@@ -439,21 +437,18 @@ BaseSimpleCPU::postExecute()
     if (curStaticInst->isInteger()){
         executeStats[t_info.thread->threadId()]->numIntAluAccesses++;
         commitStats[t_info.thread->threadId()]->numIntInsts++;
-        t_info.execContextStats.numIntAluAccesses++;
     }
 
     //float alu accesses
     if (curStaticInst->isFloating()){
         executeStats[t_info.thread->threadId()]->numFpAluAccesses++;
         commitStats[t_info.thread->threadId()]->numFpInsts++;
-        t_info.execContextStats.numFpAluAccesses++;
     }
 
     //vector alu accesses
     if (curStaticInst->isVector()){
         executeStats[t_info.thread->threadId()]->numVecAluAccesses++;
         commitStats[t_info.thread->threadId()]->numVecInsts++;
-        t_info.execContextStats.numVecAluAccesses++;
     }
 
     //Matrix alu accesses
diff --git a/src/cpu/simple/exec_context.hh b/src/cpu/simple/exec_context.hh
index 78952cbe75..9639f43058 100644
--- a/src/cpu/simple/exec_context.hh
+++ b/src/cpu/simple/exec_context.hh
@@ -90,44 +90,12 @@ class SimpleExecContext : public ExecContext
                        "Number of instructions committed"),
               ADD_STAT(numOps, statistics::units::Count::get(),
                        "Number of ops (including micro ops) committed"),
-              ADD_STAT(numIntAluAccesses, statistics::units::Count::get(),
-                       "Number of integer alu accesses"),
-              ADD_STAT(numFpAluAccesses, statistics::units::Count::get(),
-                       "Number of float alu accesses"),
-              ADD_STAT(numVecAluAccesses, statistics::units::Count::get(),
-                       "Number of vector alu accesses"),
               ADD_STAT(numMatAluAccesses, statistics::units::Count::get(),
                        "Number of matrix alu accesses"),
               ADD_STAT(numCallsReturns, statistics::units::Count::get(),
                        "Number of times a function call or return occured"),
               ADD_STAT(numMatInsts, statistics::units::Count::get(),
                        "Number of matrix instructions"),
-              ADD_STAT(numIntRegReads, statistics::units::Count::get(),
-                       "Number of times the integer registers were read"),
-              ADD_STAT(numIntRegWrites, statistics::units::Count::get(),
-                       "Number of times the integer registers were written"),
-              ADD_STAT(numFpRegReads, statistics::units::Count::get(),
-                       "Number of times the floating registers were read"),
-              ADD_STAT(numFpRegWrites, statistics::units::Count::get(),
-                       "Number of times the floating registers were written"),
-              ADD_STAT(numVecRegReads, statistics::units::Count::get(),
-                       "Number of times the vector registers were read"),
-              ADD_STAT(numVecRegWrites, statistics::units::Count::get(),
-                       "Number of times the vector registers were written"),
-              ADD_STAT(numVecPredRegReads, statistics::units::Count::get(),
-                       "Number of times the predicate registers were read"),
-              ADD_STAT(numVecPredRegWrites, statistics::units::Count::get(),
-                       "Number of times the predicate registers were written"),
-              ADD_STAT(numCCRegReads, statistics::units::Count::get(),
-                       "Number of times the CC registers were read"),
-              ADD_STAT(numCCRegWrites, statistics::units::Count::get(),
-                       "Number of times the CC registers were written"),
-              ADD_STAT(numMiscRegReads, statistics::units::Count::get(),
-                       "Number of times the Misc registers were read"),
-              ADD_STAT(numMiscRegWrites, statistics::units::Count::get(),
-                       "Number of times the Misc registers were written"),
-              ADD_STAT(numMemRefs, statistics::units::Count::get(),
-                       "Number of memory refs"),
               ADD_STAT(numIdleCycles, statistics::units::Cycle::get(),
                        "Number of idle cycles"),
               ADD_STAT(numBusyCycles, statistics::units::Cycle::get(),
@@ -136,10 +104,6 @@ class SimpleExecContext : public ExecContext
                        "Percentage of non-idle cycles"),
               ADD_STAT(idleFraction, statistics::units::Ratio::get(),
                        "Percentage of idle cycles"),
-              ADD_STAT(icacheStallCycles, statistics::units::Cycle::get(),
-                       "ICache total stall cycles"),
-              ADD_STAT(dcacheStallCycles, statistics::units::Cycle::get(),
-                       "DCache total stall cycles"),
               ADD_STAT(numPredictedBranches, statistics::units::Count::get(),
                        "Number of branches predicted as taken"),
               ADD_STAT(numBranchMispred, statistics::units::Count::get(),
@@ -164,18 +128,6 @@ class SimpleExecContext : public ExecContext
                   &numMatRegWrites
               }
         {
-            numCCRegReads
-                .flags(statistics::nozero);
-
-            numCCRegWrites
-                .flags(statistics::nozero);
-
-            icacheStallCycles
-                .prereq(icacheStallCycles);
-
-            dcacheStallCycles
-                .prereq(dcacheStallCycles);
-
             idleFraction = statistics::constant(1.0) - notIdleFraction;
             numIdleCycles = idleFraction * cpu->baseStats.numCycles;
             numBusyCycles = notIdleFraction * cpu->baseStats.numCycles;
@@ -191,15 +143,6 @@ class SimpleExecContext : public ExecContext
         statistics::Scalar numInsts;
         statistics::Scalar numOps;
 
-        // Number of integer alu accesses
-        statistics::Scalar numIntAluAccesses;
-
-        // Number of float alu accesses
-        statistics::Scalar numFpAluAccesses;
-
-        // Number of vector alu accesses
-        statistics::Scalar numVecAluAccesses;
-
         // Number of matrix alu accesses
         statistics::Scalar numMatAluAccesses;
 
@@ -209,37 +152,10 @@ class SimpleExecContext : public ExecContext
         // Number of matrix instructions
         statistics::Scalar numMatInsts;
 
-        // Number of integer register file accesses
-        statistics::Scalar numIntRegReads;
-        statistics::Scalar numIntRegWrites;
-
-        // Number of float register file accesses
-        statistics::Scalar numFpRegReads;
-        statistics::Scalar numFpRegWrites;
-
-        // Number of vector register file accesses
-        mutable statistics::Scalar numVecRegReads;
-        statistics::Scalar numVecRegWrites;
-
-        // Number of predicate register file accesses
-        mutable statistics::Scalar numVecPredRegReads;
-        statistics::Scalar numVecPredRegWrites;
-
         // Number of matrix register file accesses
         mutable statistics::Scalar numMatRegReads;
         statistics::Scalar numMatRegWrites;
 
-        // Number of condition code register file accesses
-        statistics::Scalar numCCRegReads;
-        statistics::Scalar numCCRegWrites;
-
-        // Number of misc register file accesses
-        statistics::Scalar numMiscRegReads;
-        statistics::Scalar numMiscRegWrites;
-
-        // Number of simulated memory references
-        statistics::Scalar numMemRefs;
-
         // Number of idle cycles
         statistics::Formula numIdleCycles;
 
@@ -250,12 +166,6 @@ class SimpleExecContext : public ExecContext
         statistics::Average notIdleFraction;
         statistics::Formula idleFraction;
 
-        // Number of cycles stalled for I-cache responses
-        statistics::Scalar icacheStallCycles;
-
-        // Number of cycles stalled for D-cache responses
-        statistics::Scalar dcacheStallCycles;
-
         /// @{
         /// Number of branches predicted as taken
         statistics::Scalar numPredictedBranches;
@@ -323,8 +233,6 @@ class SimpleExecContext : public ExecContext
     RegVal
     readMiscRegOperand(const StaticInst *si, int idx) override
     {
-        // update both old and new stats
-        execContextStats.numMiscRegReads++;
         cpu->executeStats[thread->threadId()]->numMiscRegReads++;
         const RegId& reg = si->srcRegIdx(idx);
         assert(reg.is(MiscRegClass));
@@ -334,8 +242,6 @@ class SimpleExecContext : public ExecContext
     void
     setMiscRegOperand(const StaticInst *si, int idx, RegVal val) override
     {
-        // update both old and new stats
-        execContextStats.numMiscRegWrites++;
         cpu->executeStats[thread->threadId()]->numMiscRegWrites++;
         const RegId& reg = si->destRegIdx(idx);
         assert(reg.is(MiscRegClass));
@@ -349,8 +255,6 @@ class SimpleExecContext : public ExecContext
     RegVal
     readMiscReg(int misc_reg) override
     {
-        // update both old and new stats
-        execContextStats.numMiscRegReads++;
         cpu->executeStats[thread->threadId()]->numMiscRegReads++;
         return thread->readMiscReg(misc_reg);
     }
@@ -362,8 +266,6 @@ class SimpleExecContext : public ExecContext
     void
     setMiscReg(int misc_reg, RegVal val) override
     {
-        // update both old and new stats
-        execContextStats.numMiscRegWrites++;
         cpu->executeStats[thread->threadId()]->numMiscRegWrites++;
         thread->setMiscReg(misc_reg, val);
     }

From f969c08ee2e69eed37f585404545b0058466cf29 Mon Sep 17 00:00:00 2001
From: Melissa Jost <melissakjost@gmail.com>
Date: Mon, 20 Mar 2023 02:13:57 -0700
Subject: [PATCH 377/492] cpu: Remove duplicate base inst and op stats

This change removes any duplicated numInsts, numOps, ipc, and
cpi stats, and makes sure that numInsts is only tracked per
thread.

Change-Id: I45d0f6cb5c523e53c0602b5152a5108108476936
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/69109
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
Tested-by: kokoro <noreply+kokoro@google.com>
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
---
 src/cpu/base.cc                |  4 ----
 src/cpu/kvm/base.cc            |  4 ----
 src/cpu/kvm/base.hh            |  1 -
 src/cpu/minor/execute.cc       |  4 ----
 src/cpu/minor/stats.cc         | 19 +------------------
 src/cpu/minor/stats.hh         | 10 ----------
 src/cpu/o3/commit.cc           |  1 -
 src/cpu/simple/base.cc         |  5 +----
 src/cpu/simple/exec_context.hh |  8 --------
 9 files changed, 2 insertions(+), 54 deletions(-)

diff --git a/src/cpu/base.cc b/src/cpu/base.cc
index e5a661b6ed..a61c99796c 100644
--- a/src/cpu/base.cc
+++ b/src/cpu/base.cc
@@ -407,10 +407,6 @@ BaseCPU::probeInstCommit(const StaticInstPtr &inst, Addr pc)
 BaseCPU::
 BaseCPUStats::BaseCPUStats(statistics::Group *parent)
     : statistics::Group(parent),
-      ADD_STAT(numInsts, statistics::units::Count::get(),
-               "Number of instructions committed (core level)"),
-      ADD_STAT(numOps, statistics::units::Count::get(),
-               "Number of ops (including micro ops) committed (core level)"),
       ADD_STAT(numCycles, statistics::units::Cycle::get(),
                "Number of cpu cycles simulated"),
       ADD_STAT(cpi, statistics::units::Rate<
diff --git a/src/cpu/kvm/base.cc b/src/cpu/kvm/base.cc
index 5857f696dd..e22e1628d2 100644
--- a/src/cpu/kvm/base.cc
+++ b/src/cpu/kvm/base.cc
@@ -261,8 +261,6 @@ BaseKvmCPU::restartEqThread()
 
 BaseKvmCPU::StatGroup::StatGroup(statistics::Group *parent)
     : statistics::Group(parent),
-    ADD_STAT(committedInsts, statistics::units::Count::get(),
-             "Number of instructions committed"),
     ADD_STAT(numVMExits, statistics::units::Count::get(),
              "total number of KVM exits"),
     ADD_STAT(numVMHalfEntries, statistics::units::Count::get(),
@@ -778,8 +776,6 @@ BaseKvmCPU::kvmRun(Tick ticks)
 
         /* Update statistics */
         baseStats.numCycles += simCyclesExecuted;;
-        stats.committedInsts += instsExecuted;
-        // update both old and new stats
         commitStats[thread->threadId()]->numInsts += instsExecuted;
         baseStats.numInsts += instsExecuted;
         ctrInsts += instsExecuted;
diff --git a/src/cpu/kvm/base.hh b/src/cpu/kvm/base.hh
index 2d81c7c7eb..7bbf393f9b 100644
--- a/src/cpu/kvm/base.hh
+++ b/src/cpu/kvm/base.hh
@@ -804,7 +804,6 @@ class BaseKvmCPU : public BaseCPU
     struct StatGroup : public statistics::Group
     {
         StatGroup(statistics::Group *parent);
-        statistics::Scalar committedInsts;
         statistics::Scalar numVMExits;
         statistics::Scalar numVMHalfEntries;
         statistics::Scalar numExitSignal;
diff --git a/src/cpu/minor/execute.cc b/src/cpu/minor/execute.cc
index 5df00d3f39..4e0fa42087 100644
--- a/src/cpu/minor/execute.cc
+++ b/src/cpu/minor/execute.cc
@@ -871,8 +871,6 @@ Execute::doInstCommitAccounting(MinorDynInstPtr inst)
     {
         thread->numInst++;
         thread->threadStats.numInsts++;
-        cpu.stats.numInsts++;
-        // update both old and new stas
         cpu.commitStats[inst->id.threadId]->numInsts++;
         cpu.baseStats.numInsts++;
 
@@ -881,9 +879,7 @@ Execute::doInstCommitAccounting(MinorDynInstPtr inst)
     }
     thread->numOp++;
     thread->threadStats.numOps++;
-    cpu.stats.numOps++;
     cpu.commitStats[inst->id.threadId]->numOps++;
-    cpu.baseStats.numOps++;
     cpu.commitStats[inst->id.threadId]
         ->committedInstType[inst->staticInst->opClass()]++;
 
diff --git a/src/cpu/minor/stats.cc b/src/cpu/minor/stats.cc
index 818db8c360..e4eebd3355 100644
--- a/src/cpu/minor/stats.cc
+++ b/src/cpu/minor/stats.cc
@@ -45,28 +45,11 @@ namespace minor
 
 MinorStats::MinorStats(BaseCPU *base_cpu)
     : statistics::Group(base_cpu),
-    ADD_STAT(numInsts, statistics::units::Count::get(),
-             "Number of instructions committed"),
-    ADD_STAT(numOps, statistics::units::Count::get(),
-             "Number of ops (including micro ops) committed"),
     ADD_STAT(quiesceCycles, statistics::units::Cycle::get(),
              "Total number of cycles that CPU has spent quiesced or waiting "
-             "for an interrupt"),
-    ADD_STAT(cpi, statistics::units::Rate<
-                statistics::units::Cycle, statistics::units::Count>::get(),
-             "CPI: cycles per instruction"),
-    ADD_STAT(ipc, statistics::units::Rate<
-                statistics::units::Count, statistics::units::Cycle>::get(),
-             "IPC: instructions per cycle")
-
+             "for an interrupt")
 {
     quiesceCycles.prereq(quiesceCycles);
-
-    cpi.precision(6);
-    cpi = base_cpu->baseStats.numCycles / numInsts;
-
-    ipc.precision(6);
-    ipc = numInsts / base_cpu->baseStats.numCycles;
 }
 
 } // namespace minor
diff --git a/src/cpu/minor/stats.hh b/src/cpu/minor/stats.hh
index f7d5e71dfa..98ac80f15c 100644
--- a/src/cpu/minor/stats.hh
+++ b/src/cpu/minor/stats.hh
@@ -59,19 +59,9 @@ struct MinorStats : public statistics::Group
 {
     MinorStats(BaseCPU *parent);
 
-    /** Number of simulated instructions */
-    statistics::Scalar numInsts;
-
-    /** Number of simulated insts and microops */
-    statistics::Scalar numOps;
-
     /** Number of cycles in quiescent state */
     statistics::Scalar quiesceCycles;
 
-    /** CPI/IPC for total cycle counts and macro insts */
-    statistics::Formula cpi;
-    statistics::Formula ipc;
-
 };
 
 } // namespace minor
diff --git a/src/cpu/o3/commit.cc b/src/cpu/o3/commit.cc
index 266e59e2e3..538505bcf9 100644
--- a/src/cpu/o3/commit.cc
+++ b/src/cpu/o3/commit.cc
@@ -1343,7 +1343,6 @@ Commit::updateComInstStats(const DynInstPtr &inst)
         cpu->baseStats.numInsts++;
     }
     cpu->commitStats[tid]->numOps++;
-    cpu->baseStats.numOps++;
 
     // To match the old model, don't count nops and instruction
     // prefetches towards the total commit count.
diff --git a/src/cpu/simple/base.cc b/src/cpu/simple/base.cc
index ca86b0b412..b42b68992f 100644
--- a/src/cpu/simple/base.cc
+++ b/src/cpu/simple/base.cc
@@ -154,10 +154,8 @@ BaseSimpleCPU::countInst()
 
     if (!curStaticInst->isMicroop() || curStaticInst->isLastMicroop()) {
         t_info.numInst++;
-        t_info.execContextStats.numInsts++;
     }
     t_info.numOp++;
-    t_info.execContextStats.numOps++;
 }
 
 void
@@ -183,9 +181,8 @@ BaseSimpleCPU::countCommitInst()
         commitStats[t_info.thread->threadId()]->numInsts++;
         baseStats.numInsts++;
     }
-    // increment thread level and core level numOps count
+    // increment thread level numOps count
     commitStats[t_info.thread->threadId()]->numOps++;
-    baseStats.numOps++;
 }
 
 Counter
diff --git a/src/cpu/simple/exec_context.hh b/src/cpu/simple/exec_context.hh
index 9639f43058..c8c7076ec2 100644
--- a/src/cpu/simple/exec_context.hh
+++ b/src/cpu/simple/exec_context.hh
@@ -86,10 +86,6 @@ class SimpleExecContext : public ExecContext
             : statistics::Group(cpu,
                            csprintf("exec_context.thread_%i",
                                     thread->threadId()).c_str()),
-              ADD_STAT(numInsts, statistics::units::Count::get(),
-                       "Number of instructions committed"),
-              ADD_STAT(numOps, statistics::units::Count::get(),
-                       "Number of ops (including micro ops) committed"),
               ADD_STAT(numMatAluAccesses, statistics::units::Count::get(),
                        "Number of matrix alu accesses"),
               ADD_STAT(numCallsReturns, statistics::units::Count::get(),
@@ -139,10 +135,6 @@ class SimpleExecContext : public ExecContext
                 .prereq(numBranchMispred);
         }
 
-        // Number of simulated instructions
-        statistics::Scalar numInsts;
-        statistics::Scalar numOps;
-
         // Number of matrix alu accesses
         statistics::Scalar numMatAluAccesses;
 

From c0103aa2c67cc45e6306dae8235931b26b8e6a20 Mon Sep 17 00:00:00 2001
From: Melissa Jost <melissakjost@gmail.com>
Date: Mon, 20 Mar 2023 00:57:09 -0700
Subject: [PATCH 378/492] cpu-o3: Remove duplicated O3 stats

This removes instsCommitted, opsCommitted, icacheStallCycles,
insts, branches, branchRate, rate, cpi, totalCpi, ipc, totalIpc,
numInsts, numLoadInsts, numNop, numRefs, numBranches,
numStoreInsts, and numRate, so they only exist in the BaseCPU

Change-Id: Ic3910402a065712a07d59a41099edb336d605e96
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/69108
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>

From fcb36458e21fc9b56ddcf23ff5fd257642e12ea1 Mon Sep 17 00:00:00 2001
From: "Bobby R. Bruce" <bbruce@ucdavis.edu>
Date: Thu, 4 May 2023 17:16:28 -0700
Subject: [PATCH 379/492] misc: Fix 'unused variable' clang errors with
 gem5.fast

Change-Id: I2bb8ac10e8db69fa82abe41577cd8e5db575e93d
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/70297
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Reviewed-by: Daniel Carvalho <odanrc@yahoo.com.br>
---
 src/arch/arm/self_debug.cc       | 2 --
 src/arch/sparc/faults.cc         | 3 ---
 src/cpu/activity.cc              | 2 ++
 src/cpu/minor/execute.cc         | 8 +-------
 src/cpu/simple/atomic.cc         | 2 +-
 src/dev/storage/ide_disk.cc      | 2 +-
 src/mem/ruby/system/Sequencer.cc | 9 +--------
 7 files changed, 6 insertions(+), 22 deletions(-)

diff --git a/src/arch/arm/self_debug.cc b/src/arch/arm/self_debug.cc
index a4e685fce8..a99cf0a81e 100644
--- a/src/arch/arm/self_debug.cc
+++ b/src/arch/arm/self_debug.cc
@@ -127,9 +127,7 @@ SelfDebug::testWatchPoints(ThreadContext *tc, Addr vaddr, bool write,
         return NoFault;
 
     ExceptionLevel el = (ExceptionLevel) currEL(tc);
-    int idxtmp = -1;
     for (auto &p: arWatchPoints){
-        idxtmp ++;
         if (p.enable) {
             if (p.test(tc, vaddr, el, write, atomic, size)) {
                 return triggerWatchpointException(tc, vaddr, write, cm);
diff --git a/src/arch/sparc/faults.cc b/src/arch/sparc/faults.cc
index ff80ec1baf..d3d5ae4856 100644
--- a/src/arch/sparc/faults.cc
+++ b/src/arch/sparc/faults.cc
@@ -302,7 +302,6 @@ enterREDState(ThreadContext *tc)
 void
 doREDFault(ThreadContext *tc, TrapType tt)
 {
-    RegVal TL = tc->readMiscRegNoEffect(MISCREG_TL);
     RegVal TSTATE = tc->readMiscRegNoEffect(MISCREG_TSTATE);
     PSTATE pstate = tc->readMiscRegNoEffect(MISCREG_PSTATE);
     HPSTATE hpstate = tc->readMiscRegNoEffect(MISCREG_HPSTATE);
@@ -313,8 +312,6 @@ doREDFault(ThreadContext *tc, TrapType tt)
     RegVal GL = tc->readMiscRegNoEffect(MISCREG_GL);
     auto &pc = tc->pcState().as<PCState>();
 
-    TL++;
-
     Addr pcMask = pstate.am ? mask(32) : mask(64);
 
     // set TSTATE.gl to gl
diff --git a/src/cpu/activity.cc b/src/cpu/activity.cc
index f10b1ced59..cae5932276 100644
--- a/src/cpu/activity.cc
+++ b/src/cpu/activity.cc
@@ -151,6 +151,7 @@ ActivityRecorder::dump()
 void
 ActivityRecorder::validate()
 {
+#ifdef DEBUG
     int count = 0;
     for (int i = 0; i <= longestLatency; ++i) {
         if (activityBuffer[-i]) {
@@ -165,6 +166,7 @@ ActivityRecorder::validate()
     }
 
     assert(count == activityCount);
+#endif
 }
 
 } // namespace gem5
diff --git a/src/cpu/minor/execute.cc b/src/cpu/minor/execute.cc
index 4e0fa42087..ba4032e39c 100644
--- a/src/cpu/minor/execute.cc
+++ b/src/cpu/minor/execute.cc
@@ -568,10 +568,6 @@ Execute::issue(ThreadID thread_id)
     /* Number of memory ops issues this cycle to check for memoryIssueLimit */
     unsigned num_mem_insts_issued = 0;
 
-    /* Number of instructions discarded this cycle in order to enforce a
-     *  discardLimit. @todo, add that parameter? */
-    unsigned num_insts_discarded = 0;
-
     do {
         MinorDynInstPtr inst = insts_in->insts[thread.inputIndex];
         Fault fault = inst->fault;
@@ -800,9 +796,7 @@ Execute::issue(ThreadID thread_id)
             if (issued_mem_ref)
                 num_mem_insts_issued++;
 
-            if (discarded) {
-                num_insts_discarded++;
-            } else if (!inst->isBubble()) {
+            if (!discarded && !inst->isBubble()) {
                 num_insts_issued++;
 
                 if (num_insts_issued == issueLimit)
diff --git a/src/cpu/simple/atomic.cc b/src/cpu/simple/atomic.cc
index 5c9fc29b64..2cbb62da0c 100644
--- a/src/cpu/simple/atomic.cc
+++ b/src/cpu/simple/atomic.cc
@@ -462,7 +462,7 @@ AtomicSimpleCPU::writeMem(uint8_t *data, unsigned size, Addr addr,
     Addr frag_addr = addr;
     int frag_size = 0;
     int size_left = size;
-    int curr_frag_id = 0;
+    [[maybe_unused]] int curr_frag_id = 0;
     bool predicate;
     Fault fault = NoFault;
 
diff --git a/src/dev/storage/ide_disk.cc b/src/dev/storage/ide_disk.cc
index e43437f1a4..cb3a58a1da 100644
--- a/src/dev/storage/ide_disk.cc
+++ b/src/dev/storage/ide_disk.cc
@@ -1072,7 +1072,7 @@ IdeDisk::serialize(CheckpointOut &cp) const
     Tick reschedule = 0;
     Events_t event = None;
 
-    int eventCount = 0;
+    [[maybe_unused]] int eventCount = 0;
 
     if (dmaTransferEvent.scheduled()) {
         reschedule = dmaTransferEvent.when();
diff --git a/src/mem/ruby/system/Sequencer.cc b/src/mem/ruby/system/Sequencer.cc
index 24439d6487..3b75619969 100644
--- a/src/mem/ruby/system/Sequencer.cc
+++ b/src/mem/ruby/system/Sequencer.cc
@@ -464,8 +464,6 @@ Sequencer::writeCallback(Addr address, DataBlock& data,
     // ruby request was outstanding. Since only 1 ruby request was made,
     // profile the ruby latency once.
     bool ruby_request = true;
-    int aliased_stores = 0;
-    int aliased_loads = 0;
     while (!seq_req_list.empty()) {
         SequencerRequest &seq_req = seq_req_list.front();
 
@@ -520,9 +518,8 @@ Sequencer::writeCallback(Addr address, DataBlock& data,
                 recordMissLatency(&seq_req, success, mach, externalHit,
                                   initialRequestTime, forwardRequestTime,
                                   firstResponseTime);
-            } else {
-                aliased_stores++;
             }
+
             markRemoved();
             hitCallback(&seq_req, data, success, mach, externalHit,
                         initialRequestTime, forwardRequestTime,
@@ -532,7 +529,6 @@ Sequencer::writeCallback(Addr address, DataBlock& data,
             // handle read request
             assert(!ruby_request);
             markRemoved();
-            aliased_loads++;
             hitCallback(&seq_req, data, true, mach, externalHit,
                         initialRequestTime, forwardRequestTime,
                         firstResponseTime, !ruby_request);
@@ -565,15 +561,12 @@ Sequencer::readCallback(Addr address, DataBlock& data,
     // ruby request was outstanding. Since only 1 ruby request was made,
     // profile the ruby latency once.
     bool ruby_request = true;
-    int aliased_loads = 0;
     while (!seq_req_list.empty()) {
         SequencerRequest &seq_req = seq_req_list.front();
         if (ruby_request) {
             assert((seq_req.m_type == RubyRequestType_LD) ||
                    (seq_req.m_type == RubyRequestType_Load_Linked) ||
                    (seq_req.m_type == RubyRequestType_IFETCH));
-        } else {
-            aliased_loads++;
         }
         if ((seq_req.m_type != RubyRequestType_LD) &&
             (seq_req.m_type != RubyRequestType_Load_Linked) &&

From 6dd60a6c1adb88f8f50f510ffa0520fc84a10dc6 Mon Sep 17 00:00:00 2001
From: "Bobby R. Bruce" <bbruce@ucdavis.edu>
Date: Mon, 8 May 2023 10:54:56 -0700
Subject: [PATCH 380/492] base,arch,mem: Remove {GE}M5_VAR_USED instances

`[[maybe_unused]]` is to be used to specify that a variable is used.

Change-Id: Ife2ac96111b3af13e182baba1f3456e48c3a9f9b
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/70397
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Daniel Carvalho <odanrc@yahoo.com.br>
---
 src/arch/arm/isa/insts/sme.isa          | 18 +++++++++---------
 src/base/gtest/serialization_fixture.hh |  4 ++--
 src/base/inet.cc                        |  6 +++---
 src/base/stats/group.cc                 |  3 ++-
 src/mem/ruby/system/Sequencer.cc        |  2 +-
 5 files changed, 17 insertions(+), 16 deletions(-)

diff --git a/src/arch/arm/isa/insts/sme.isa b/src/arch/arm/isa/insts/sme.isa
index b9f6115432..03a1b2c6e2 100644
--- a/src/arch/arm/isa/insts/sme.isa
+++ b/src/arch/arm/isa/insts/sme.isa
@@ -100,12 +100,12 @@ let {{
                             xc->tcBase());
 
             uint8_t offset = imm & (0xf >> (findMsbSet(sizeof(TPElem))));
-            M5_VAR_USED uint8_t tile_idx =
+            [[maybe_unused]] uint8_t tile_idx =
                 imm >> (4 - findMsbSet(sizeof(TPElem)));
-            M5_VAR_USED uint8_t vec_idx = (WOp2 + offset) % eCount;
+            [[maybe_unused]] uint8_t vec_idx = (WOp2 + offset) % eCount;
 
             // Calculate the address
-            M5_VAR_USED Addr EA = XOp1 + XOp3 * sizeof(TPElem);
+            [[maybe_unused]] Addr EA = XOp1 + XOp3 * sizeof(TPElem);
 
             // Calculate the read predicate. One boolean per byte,
             // initialised to all true.
@@ -164,10 +164,10 @@ let {{
             unsigned eCount = ArmStaticInst::getCurSmeVecLen<uint8_t>(
                             xc->tcBase());
 
-            M5_VAR_USED uint8_t vec_index = (WOp2 + imm) % eCount;
+            [[maybe_unused]] uint8_t vec_index = (WOp2 + imm) % eCount;
 
             // Calculate the address
-            M5_VAR_USED Addr EA = XOp1 + imm;
+            [[maybe_unused]] Addr EA = XOp1 + imm;
             '''
 
         iop = InstObjParams(name, "Sme" + Name, "SmeLdrStrOp",
@@ -378,12 +378,12 @@ let {{
                             xc->tcBase());
 
             uint8_t offset = imm & (0xf >> (findMsbSet(sizeof(TPElem))));
-            M5_VAR_USED uint8_t tile_idx =
+            [[maybe_unused]] uint8_t tile_idx =
                 imm >> (4 - findMsbSet(sizeof(TPElem)));
-            M5_VAR_USED uint8_t vec_idx = (WOp2 + offset) % eCount;
+            [[maybe_unused]] uint8_t vec_idx = (WOp2 + offset) % eCount;
 
             // Calculate the address
-            M5_VAR_USED Addr EA = XOp1 + XOp3 * sizeof(TPElem);
+            [[maybe_unused]] Addr EA = XOp1 + XOp3 * sizeof(TPElem);
 
             // Calculate the write predicate. One boolean per byte,
             // initialised to all true.
@@ -446,7 +446,7 @@ let {{
             auto row = getTileHSlice<uint8_t>(ZA, 0, vec_index);
 
             // Calculate the address
-            M5_VAR_USED Addr EA = XOp1 + imm;
+            [[maybe_unused]] Addr EA = XOp1 + imm;
 
             uint8_t data[MaxSmeVecLenInBytes];
 
diff --git a/src/base/gtest/serialization_fixture.hh b/src/base/gtest/serialization_fixture.hh
index 65269e094c..9184e7f572 100644
--- a/src/base/gtest/serialization_fixture.hh
+++ b/src/base/gtest/serialization_fixture.hh
@@ -99,7 +99,7 @@ class SerializationFixture : public ::testing::Test
     {
         // Create the directory
         dirName = generateTempDirName();
-        M5_VAR_USED int success = mkdir(dirName.c_str(), 0775);
+        [[maybe_unused]] int success = mkdir(dirName.c_str(), 0775);
         assert(!(success == -1 && errno != EEXIST));
     }
 
@@ -110,7 +110,7 @@ class SerializationFixture : public ::testing::Test
         // rmdir does not work
         std::remove(getCptPath().c_str());
         // Remove the directory we created on SetUp
-        M5_VAR_USED int success = rmdir(dirName.c_str());
+        [[maybe_unused]] int success = rmdir(dirName.c_str());
         assert(success == 0);
     }
 };
diff --git a/src/base/inet.cc b/src/base/inet.cc
index fc7505ecb7..24c8e7eb9e 100644
--- a/src/base/inet.cc
+++ b/src/base/inet.cc
@@ -301,7 +301,7 @@ Ip6Hdr::extensionLength() const
     const uint8_t *data = bytes() + IP6_HDR_LEN;
     uint8_t nxt = ip6_nxt;
     int len = 0;
-    GEM5_VAR_USED int all = plen();
+    [[maybe_unused]] int all = plen();
 
     while (ip6Extension(nxt)) {
         const Ip6Opt *ext = (const Ip6Opt *)data;
@@ -324,7 +324,7 @@ Ip6Hdr::getExt(uint8_t ext_type) const
     const uint8_t *data = bytes() + IP6_HDR_LEN;
     uint8_t nxt = ip6_nxt;
     Ip6Opt* opt = NULL;
-    GEM5_VAR_USED int all = plen();
+    [[maybe_unused]] int all = plen();
 
     while (ip6Extension(nxt)) {
         opt = (Ip6Opt *)data;
@@ -349,7 +349,7 @@ Ip6Hdr::proto() const
 {
     const uint8_t *data = bytes() + IP6_HDR_LEN;
     uint8_t nxt = ip6_nxt;
-    GEM5_VAR_USED int all = plen();
+    [[maybe_unused]] int all = plen();
 
     while (ip6Extension(nxt)) {
         const Ip6Opt *ext = (const Ip6Opt *)data;
diff --git a/src/base/stats/group.cc b/src/base/stats/group.cc
index 93e7183f0e..addfdf9c28 100644
--- a/src/base/stats/group.cc
+++ b/src/base/stats/group.cc
@@ -72,7 +72,8 @@ Group::regStats()
 
     for (auto &g : statGroups) {
         if (debug::Stats) {
-            M5_VAR_USED const Named *named = dynamic_cast<const Named *>(this);
+            [[maybe_unused]] const Named *named = \
+                dynamic_cast<const Named *>(this);
             DPRINTF(Stats, "%s: regStats in group %s\n",
                     named ? named->name() : "?",
                     g.first);
diff --git a/src/mem/ruby/system/Sequencer.cc b/src/mem/ruby/system/Sequencer.cc
index 3b75619969..82fc19b57c 100644
--- a/src/mem/ruby/system/Sequencer.cc
+++ b/src/mem/ruby/system/Sequencer.cc
@@ -230,7 +230,7 @@ Sequencer::wakeup()
     Cycles current_time = curCycle();
 
     // Check across all outstanding requests
-    GEM5_VAR_USED int total_outstanding = 0;
+    [[maybe_unused]] int total_outstanding = 0;
 
     for (const auto &table_entry : m_RequestTable) {
         for (const auto &seq_req : table_entry.second) {

From e90bd5feb9a7d6672b231190783433bf3f7d6706 Mon Sep 17 00:00:00 2001
From: Richard Cooper <richard.cooper@arm.com>
Date: Thu, 6 Apr 2023 11:41:06 +0100
Subject: [PATCH 381/492] configs: Add `--with-pmu` option to the simple Arm FS
 configs

Add an option to add a PMU to the CPUs in `starter_fs.py` and
`baremetal.py`. By default PMUs will not be added.

Also adds an `--arm-ppi-number` option. Each PMU will be connected to
its core using the specified PPI.

Change-Id: I9cfb5781f211338919550f2320a7133d88801f6a
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/69957
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
---
 configs/example/arm/baremetal.py  | 26 ++++++++++++++++++++++++++
 configs/example/arm/starter_fs.py | 26 ++++++++++++++++++++++++++
 2 files changed, 52 insertions(+)

diff --git a/configs/example/arm/baremetal.py b/configs/example/arm/baremetal.py
index 8ffd2b48e0..be72ebec4c 100644
--- a/configs/example/arm/baremetal.py
+++ b/configs/example/arm/baremetal.py
@@ -157,6 +157,11 @@ def create(args):
     workload_class = workloads.workload_list.get(args.workload)
     system.workload = workload_class(object_file, system)
 
+    if args.with_pmu:
+        for cluster in system.cpu_cluster:
+            interrupt_numbers = [args.pmu_ppi_number] * len(cluster)
+            cluster.addPMUs(interrupt_numbers)
+
     if args.exit_on_uart_eot:
         for uart in system.realview.uart:
             uart.end_on_eot = True
@@ -182,6 +187,15 @@ def run(args):
             break
 
 
+def arm_ppi_arg(int_num: int) -> int:
+    """Argparse argument parser for valid Arm PPI numbers."""
+    # PPIs (1056 <= int_num <= 1119) are not yet supported by gem5
+    int_num = int(int_num)
+    if 16 <= int_num <= 31:
+        return int_num
+    raise ValueError(f"{int_num} is not a valid Arm PPI number")
+
+
 def main():
     parser = argparse.ArgumentParser(epilog=__doc__)
 
@@ -257,6 +271,18 @@ def main():
         default="stdoutput",
         help="Destination for the Tarmac trace output. [Default: stdoutput]",
     )
+    parser.add_argument(
+        "--with-pmu",
+        action="store_true",
+        help="Add a PMU to each core in the cluster.",
+    )
+    parser.add_argument(
+        "--pmu-ppi-number",
+        type=arm_ppi_arg,
+        default=23,
+        help="The number of the PPI to use to connect each PMU to its core. "
+        "Must be an integer and a valid PPI number (16 <= int_num <= 31).",
+    )
     parser.add_argument(
         "--exit-on-uart-eot",
         action="store_true",
diff --git a/configs/example/arm/starter_fs.py b/configs/example/arm/starter_fs.py
index ebed18864d..07280bd204 100644
--- a/configs/example/arm/starter_fs.py
+++ b/configs/example/arm/starter_fs.py
@@ -177,6 +177,11 @@ def create(args):
     ]
     system.workload.command_line = " ".join(kernel_cmd)
 
+    if args.with_pmu:
+        for cluster in system.cpu_cluster:
+            interrupt_numbers = [args.pmu_ppi_number] * len(cluster)
+            cluster.addPMUs(interrupt_numbers)
+
     return system
 
 
@@ -198,6 +203,15 @@ def run(args):
             break
 
 
+def arm_ppi_arg(int_num: int) -> int:
+    """Argparse argument parser for valid Arm PPI numbers."""
+    # PPIs (1056 <= int_num <= 1119) are not yet supported by gem5
+    int_num = int(int_num)
+    if 16 <= int_num <= 31:
+        return int_num
+    raise ValueError(f"{int_num} is not a valid Arm PPI number")
+
+
 def main():
     parser = argparse.ArgumentParser(epilog=__doc__)
 
@@ -272,6 +286,18 @@ def main():
         default="stdoutput",
         help="Destination for the Tarmac trace output. [Default: stdoutput]",
     )
+    parser.add_argument(
+        "--with-pmu",
+        action="store_true",
+        help="Add a PMU to each core in the cluster.",
+    )
+    parser.add_argument(
+        "--pmu-ppi-number",
+        type=arm_ppi_arg,
+        default=23,
+        help="The number of the PPI to use to connect each PMU to its core. "
+        "Must be an integer and a valid PPI number (16 <= int_num <= 31).",
+    )
     parser.add_argument("--checkpoint", action="store_true")
     parser.add_argument("--restore", type=str, default=None)
 

From 5fc8188ab32542bca485bb80f8519c27d20637e3 Mon Sep 17 00:00:00 2001
From: Nicholas Lindsay <Nicholas.Lindsey@arm.com>
Date: Thu, 7 Jun 2018 14:48:03 +0100
Subject: [PATCH 382/492] arch-arm: Add support to exit the simloop on PMU
 control

PMU enables/disables/resets are often used to identify and demark
regions of interest in a workload intended for sampled
simulation (e.g. fast-forward, warm-up, detailed simulation).

This patch adds the option to exit the simulation loop when these
events occur so additional simulation control can be effected (e.g.
stats dump/reset, CPU switch, etc).

Original patch by Nicholas Lindsay <Nicholas.Lindsey@arm.com>.
Updated by Richard Cooper <richard.cooper@arm.com>.

Change-Id: I19be0def8d52fa036a3eee6bafeb63cc1f41694a
Signed-off-by: Richard Cooper <richard.cooper@arm.com>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/70417
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/arch/arm/ArmPMU.py                 |  7 +++++--
 src/arch/arm/pmu.cc                    | 21 +++++++++++++++++++--
 src/arch/arm/pmu.hh                    |  8 +++++++-
 src/python/gem5/simulate/exit_event.py |  9 +++++++++
 4 files changed, 40 insertions(+), 5 deletions(-)

diff --git a/src/arch/arm/ArmPMU.py b/src/arch/arm/ArmPMU.py
index 3eaed077eb..a07239bb29 100644
--- a/src/arch/arm/ArmPMU.py
+++ b/src/arch/arm/ArmPMU.py
@@ -1,5 +1,5 @@
 # -*- mode:python -*-
-# Copyright (c) 2009-2014, 2017, 2020, 2022 Arm Limited
+# Copyright (c) 2009-2014, 2017-2018, 2020, 2022-2023 Arm Limited
 # All rights reserved.
 #
 # The license below extends only to copyright in the software and shall
@@ -215,11 +215,14 @@ class ArmPMU(SimObject):
     platform = Param.Platform(Parent.any, "Platform this device is part of.")
     eventCounters = Param.Int(31, "Number of supported PMU counters")
     interrupt = Param.ArmInterruptPin("PMU interrupt")
+    exitOnPMUControl = Param.Bool(
+        False, "Exit on PMU enable, disable, or reset"
+    )
 
     # 64-bit PMU event counters are officially supported when
     # Armv8.5-A FEAT_PMUv3p5 is implemented. This parameter is not a
     # full implementation of FEAT_PMUv3p5.
     use64bitCounters = Param.Bool(
         False,
-        "Choose whether to use 64-bit or " "32-bit PMEVCNTR<n>_EL0 registers.",
+        "Choose whether to use 64-bit or 32-bit PMEVCNTR<n>_EL0 registers.",
     )
diff --git a/src/arch/arm/pmu.cc b/src/arch/arm/pmu.cc
index 89dc2c8412..efd641797f 100644
--- a/src/arch/arm/pmu.cc
+++ b/src/arch/arm/pmu.cc
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011-2014, 2017-2019, 2022 Arm Limited
+ * Copyright (c) 2011-2014, 2017-2019, 2022-2023 Arm Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -46,6 +46,7 @@
 #include "dev/arm/base_gic.hh"
 #include "dev/arm/generic_timer.hh"
 #include "params/ArmPMU.hh"
+#include "sim/sim_exit.hh"
 
 namespace gem5
 {
@@ -66,7 +67,8 @@ PMU::PMU(const ArmPMUParams &p)
       cycleCounterEventId(p.cycleEventId),
       swIncrementEvent(nullptr),
       reg_pmcr_conf(0),
-      interrupt(nullptr)
+      interrupt(nullptr),
+      exitOnPMUControl(p.exitOnPMUControl)
 {
     DPRINTF(PMUVerbose, "Initializing the PMU.\n");
 
@@ -412,6 +414,21 @@ PMU::setControlReg(PMCR_t val)
     if (reg_pmcr.d != val.d)
         clock_remainder = 0;
 
+    // Optionally exit the simulation on various PMU control events.
+    // Exit on enable/disable takes precedence over exit on reset.
+    if (exitOnPMUControl) {
+        if (!reg_pmcr.e && val.e) {
+            inform("Exiting simulation: PMU enable detected");
+            exitSimLoop("performance counter enabled", 0);
+        } else if (reg_pmcr.e && !val.e) {
+            inform("Exiting simulation: PMU disable detected");
+            exitSimLoop("performance counter disabled", 0);
+        } else if (val.p) {
+            inform("Exiting simulation: PMU reset detected");
+            exitSimLoop("performance counter reset", 0);
+        }
+    }
+
     reg_pmcr = val & reg_pmcr_wr_mask;
     updateAllCounters();
 }
diff --git a/src/arch/arm/pmu.hh b/src/arch/arm/pmu.hh
index ec60c6b7f5..49990311c0 100644
--- a/src/arch/arm/pmu.hh
+++ b/src/arch/arm/pmu.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011-2014, 2017-2018, 2022 Arm Limited
+ * Copyright (c) 2011-2014, 2017-2018, 2022-2023 Arm Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -632,6 +632,12 @@ class PMU : public SimObject, public ArmISA::BaseISADevice
      * List of event types supported by this PMU.
      */
     std::map<EventTypeId, std::shared_ptr<PMUEvent>> eventMap;
+
+    /**
+     * Exit simloop on PMU reset or disable
+     */
+    const bool exitOnPMUControl;
+
 };
 
 } // namespace ArmISA
diff --git a/src/python/gem5/simulate/exit_event.py b/src/python/gem5/simulate/exit_event.py
index 605fb6e556..61462e47aa 100644
--- a/src/python/gem5/simulate/exit_event.py
+++ b/src/python/gem5/simulate/exit_event.py
@@ -49,6 +49,9 @@ class ExitEvent(Enum):
     )
     SIMPOINT_BEGIN = "simpoint begins"
     MAX_INSTS = "number of instructions reached"
+    PERF_COUNTER_ENABLE = "performance counter enabled"
+    PERF_COUNTER_DISABLE = "performance counter disabled"
+    PERF_COUNTER_RESET = "performance counter reset"
 
     @classmethod
     def translate_exit_status(cls, exit_string: str) -> "ExitEvent":
@@ -90,6 +93,12 @@ class ExitEvent(Enum):
             return ExitEvent.SIMPOINT_BEGIN
         elif exit_string == "a thread reached the max instruction count":
             return ExitEvent.MAX_INSTS
+        elif exit_string == "performance counter enabled":
+            return ExitEvent.PERF_COUNTER_ENABLE
+        elif exit_string == "performance counter disabled":
+            return ExitEvent.PERF_COUNTER_DISABLE
+        elif exit_string == "performance counter reset":
+            return ExitEvent.PERF_COUNTER_RESET
         elif exit_string.endswith("will terminate the simulation.\n"):
             # This is for the traffic generator exit event
             return ExitEvent.EXIT

From add5e51e498e27aabee0f41b6c2a1573d24b1aa4 Mon Sep 17 00:00:00 2001
From: Richard Cooper <richard.cooper@arm.com>
Date: Wed, 3 May 2023 18:36:00 +0100
Subject: [PATCH 383/492] arch-arm: Add support to exit the simloop on PMU
 interrupt

Add an option `exitOnPMUInterrupt` to ArmPMU.

The PMU is often used to identify and demark regions of interest in a
workload intended for sampled simulation (e.g. fast-forward, warm-up,
detailed simulation). Often the PMU is enabled and disabled to demark
these regions, but for some workloads PMU interrupts are used to count
committed instructions directly.

This patch adds the option to exit the simulation loop when a PMU
interrupt is triggered so additional simulation control can be
effected (e.g. stats dump/reset, CPU switch, etc).

Change-Id: Ife02fe8e467dec91a2d4fda3f7dc9540a092f1ec
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/69958
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
---
 src/arch/arm/ArmPMU.py                 | 1 +
 src/arch/arm/pmu.cc                    | 7 ++++++-
 src/arch/arm/pmu.hh                    | 4 ++++
 src/python/gem5/simulate/exit_event.py | 3 +++
 4 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/src/arch/arm/ArmPMU.py b/src/arch/arm/ArmPMU.py
index a07239bb29..a4a2ebe843 100644
--- a/src/arch/arm/ArmPMU.py
+++ b/src/arch/arm/ArmPMU.py
@@ -218,6 +218,7 @@ class ArmPMU(SimObject):
     exitOnPMUControl = Param.Bool(
         False, "Exit on PMU enable, disable, or reset"
     )
+    exitOnPMUInterrupt = Param.Bool(False, "Exit on PMU interrupt")
 
     # 64-bit PMU event counters are officially supported when
     # Armv8.5-A FEAT_PMUv3p5 is implemented. This parameter is not a
diff --git a/src/arch/arm/pmu.cc b/src/arch/arm/pmu.cc
index efd641797f..2cb7085ca2 100644
--- a/src/arch/arm/pmu.cc
+++ b/src/arch/arm/pmu.cc
@@ -68,7 +68,8 @@ PMU::PMU(const ArmPMUParams &p)
       swIncrementEvent(nullptr),
       reg_pmcr_conf(0),
       interrupt(nullptr),
-      exitOnPMUControl(p.exitOnPMUControl)
+      exitOnPMUControl(p.exitOnPMUControl),
+      exitOnPMUInterrupt(p.exitOnPMUInterrupt)
 {
     DPRINTF(PMUVerbose, "Initializing the PMU.\n");
 
@@ -677,6 +678,10 @@ PMU::setOverflowStatus(RegVal new_val)
 void
 PMU::raiseInterrupt()
 {
+    if (exitOnPMUInterrupt) {
+        inform("Exiting simulation: PMU interrupt detected");
+        exitSimLoop("performance counter interrupt", 0);
+    }
     if (interrupt) {
         DPRINTF(PMUVerbose, "Delivering PMU interrupt.\n");
         interrupt->raise();
diff --git a/src/arch/arm/pmu.hh b/src/arch/arm/pmu.hh
index 49990311c0..64f8f133f8 100644
--- a/src/arch/arm/pmu.hh
+++ b/src/arch/arm/pmu.hh
@@ -638,6 +638,10 @@ class PMU : public SimObject, public ArmISA::BaseISADevice
      */
     const bool exitOnPMUControl;
 
+    /**
+     * Exit simloop on PMU interrupt
+     */
+    bool exitOnPMUInterrupt;
 };
 
 } // namespace ArmISA
diff --git a/src/python/gem5/simulate/exit_event.py b/src/python/gem5/simulate/exit_event.py
index 61462e47aa..cffe864f06 100644
--- a/src/python/gem5/simulate/exit_event.py
+++ b/src/python/gem5/simulate/exit_event.py
@@ -52,6 +52,7 @@ class ExitEvent(Enum):
     PERF_COUNTER_ENABLE = "performance counter enabled"
     PERF_COUNTER_DISABLE = "performance counter disabled"
     PERF_COUNTER_RESET = "performance counter reset"
+    PERF_COUNTER_INTERRUPT = "performance counter interrupt"
 
     @classmethod
     def translate_exit_status(cls, exit_string: str) -> "ExitEvent":
@@ -99,6 +100,8 @@ class ExitEvent(Enum):
             return ExitEvent.PERF_COUNTER_DISABLE
         elif exit_string == "performance counter reset":
             return ExitEvent.PERF_COUNTER_RESET
+        elif exit_string == "performance counter interrupt":
+            return ExitEvent.PERF_COUNTER_INTERRUPT
         elif exit_string.endswith("will terminate the simulation.\n"):
             # This is for the traffic generator exit event
             return ExitEvent.EXIT

From bc4322ede9b606cd3bce674921f6f208455c5f20 Mon Sep 17 00:00:00 2001
From: Richard Cooper <richard.cooper@arm.com>
Date: Wed, 14 Sep 2022 14:59:15 +0100
Subject: [PATCH 384/492] configs: Add --pmu-{dump,reset}-stats-on to Arm
 baremetal.py.

Add `--pmu-dump-stats-on <event>` and `--pmu-reset-stats-on <event>`
options to the Arm `baremetal.py` config to optionally dump and/or
reset stats on various PMU events.

These options allow the user to specify which PMU events should cause
the dumping or resetting of gem5 stats. The available `<event>`s are
PMU `enable`, `disable`, `reset`, and `interrupt`. Both these CLI
options may be specified multiple times to enable more than one event
to cause a stats dump/reset if desired. Stats are dumped before they
are reset.

These options are useful for sampled simulation workloads (e.g.
SimPoints) which are controlled by the PMU.

Change-Id: Ie2ffe11c6aa1f3a57a58425ccec3681c780065c8
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/69959
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
---
 configs/example/arm/baremetal.py | 73 ++++++++++++++++++++++++++++++--
 configs/example/arm/devices.py   | 19 ++++++++-
 2 files changed, 87 insertions(+), 5 deletions(-)

diff --git a/configs/example/arm/baremetal.py b/configs/example/arm/baremetal.py
index be72ebec4c..08af3ef435 100644
--- a/configs/example/arm/baremetal.py
+++ b/configs/example/arm/baremetal.py
@@ -44,6 +44,7 @@ import m5
 from m5.util import addToPath
 from m5.objects import *
 from m5.options import *
+from gem5.simulate.exit_event import ExitEvent
 import argparse
 
 m5.util.addToPath("../..")
@@ -72,6 +73,18 @@ cpu_types = {
     ),
 }
 
+pmu_control_events = {
+    "enable": ExitEvent.PERF_COUNTER_ENABLE,
+    "disable": ExitEvent.PERF_COUNTER_DISABLE,
+    "reset": ExitEvent.PERF_COUNTER_RESET,
+}
+
+pmu_interrupt_events = {
+    "interrupt": ExitEvent.PERF_COUNTER_INTERRUPT,
+}
+
+pmu_stats_events = dict(**pmu_control_events, **pmu_interrupt_events)
+
 
 def create_cow_image(name):
     """Helper function to create a Copy-on-Write disk image"""
@@ -158,9 +171,22 @@ def create(args):
     system.workload = workload_class(object_file, system)
 
     if args.with_pmu:
+        enabled_pmu_events = set(
+            (*args.pmu_dump_stats_on, *args.pmu_reset_stats_on)
+        )
+        exit_sim_on_control = bool(
+            enabled_pmu_events & set(pmu_control_events.keys())
+        )
+        exit_sim_on_interrupt = bool(
+            enabled_pmu_events & set(pmu_interrupt_events.keys())
+        )
         for cluster in system.cpu_cluster:
             interrupt_numbers = [args.pmu_ppi_number] * len(cluster)
-            cluster.addPMUs(interrupt_numbers)
+            cluster.addPMUs(
+                interrupt_numbers,
+                exit_sim_on_control=exit_sim_on_control,
+                exit_sim_on_interrupt=exit_sim_on_interrupt,
+            )
 
     if args.exit_on_uart_eot:
         for uart in system.realview.uart:
@@ -174,14 +200,35 @@ def run(args):
     if args.checkpoint:
         print(f"Checkpoint directory: {cptdir}")
 
+    pmu_exit_msgs = tuple(evt.value for evt in pmu_stats_events.values())
+    pmu_stats_dump_msgs = tuple(
+        pmu_stats_events[evt].value for evt in set(args.pmu_dump_stats_on)
+    )
+    pmu_stats_reset_msgs = tuple(
+        pmu_stats_events[evt].value for evt in set(args.pmu_reset_stats_on)
+    )
+
     while True:
         event = m5.simulate()
         exit_msg = event.getCause()
-        if exit_msg == "checkpoint":
-            print("Dropping checkpoint at tick %d" % m5.curTick())
+        if exit_msg == ExitEvent.CHECKPOINT.value:
+            print(f"Dropping checkpoint at tick {m5.curTick():d}")
             cpt_dir = os.path.join(m5.options.outdir, "cpt.%d" % m5.curTick())
             m5.checkpoint(os.path.join(cpt_dir))
             print("Checkpoint done.")
+        elif exit_msg in pmu_exit_msgs:
+            if exit_msg in pmu_stats_dump_msgs:
+                print(
+                    f"Dumping stats at tick {m5.curTick():d}, "
+                    f"due to {exit_msg}"
+                )
+                m5.stats.dump()
+            if exit_msg in pmu_stats_reset_msgs:
+                print(
+                    f"Resetting stats at tick {m5.curTick():d}, "
+                    f"due to {exit_msg}"
+                )
+                m5.stats.reset()
         else:
             print(f"{exit_msg} ({event.getCode()}) @ {m5.curTick()}")
             break
@@ -283,6 +330,26 @@ def main():
         help="The number of the PPI to use to connect each PMU to its core. "
         "Must be an integer and a valid PPI number (16 <= int_num <= 31).",
     )
+    parser.add_argument(
+        "--pmu-dump-stats-on",
+        type=str,
+        default=[],
+        action="append",
+        choices=pmu_stats_events.keys(),
+        help="Specify the PMU events on which to dump the gem5 stats. "
+        "This option may be specified multiple times to enable multiple "
+        "PMU events.",
+    )
+    parser.add_argument(
+        "--pmu-reset-stats-on",
+        type=str,
+        default=[],
+        action="append",
+        choices=pmu_stats_events.keys(),
+        help="Specify the PMU events on which to reset the gem5 stats. "
+        "This option may be specified multiple times to enable multiple "
+        "PMU events.",
+    )
     parser.add_argument(
         "--exit-on-uart-eot",
         action="store_true",
diff --git a/configs/example/arm/devices.py b/configs/example/arm/devices.py
index 02574d2802..6c6474ca2b 100644
--- a/configs/example/arm/devices.py
+++ b/configs/example/arm/devices.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2016-2017, 2019, 2021-2022 Arm Limited
+# Copyright (c) 2016-2017, 2019, 2021-2023 Arm Limited
 # All rights reserved.
 #
 # The license below extends only to copyright in the software and shall
@@ -147,7 +147,13 @@ class ArmCpuCluster(CpuCluster):
             cpu.connectCachedPorts(self.toL2Bus.cpu_side_ports)
         self.toL2Bus.mem_side_ports = self.l2.cpu_side
 
-    def addPMUs(self, ints, events=[]):
+    def addPMUs(
+        self,
+        ints,
+        events=[],
+        exit_sim_on_control=False,
+        exit_sim_on_interrupt=False,
+    ):
         """
         Instantiates 1 ArmPMU per PE. The method is accepting a list of
         interrupt numbers (ints) used by the PMU and a list of events to
@@ -159,12 +165,21 @@ class ArmCpuCluster(CpuCluster):
         :type ints: List[int]
         :param events: Additional events to be measured by the PMUs
         :type events: List[Union[ProbeEvent, SoftwareIncrement]]
+        :param exit_sim_on_control: If true, exit the sim loop when the PMU is
+            enabled, disabled, or reset.
+        :type exit_on_control: bool
+        :param exit_sim_on_interrupt: If true, exit the sim loop when the PMU
+            triggers an interrupt.
+        :type exit_on_control: bool
+
         """
         assert len(ints) == len(self.cpus)
         for cpu, pint in zip(self.cpus, ints):
             int_cls = ArmPPI if pint < 32 else ArmSPI
             for isa in cpu.isa:
                 isa.pmu = ArmPMU(interrupt=int_cls(num=pint))
+                isa.pmu.exitOnPMUControl = exit_sim_on_control
+                isa.pmu.exitOnPMUInterrupt = exit_sim_on_interrupt
                 isa.pmu.addArchEvents(
                     cpu=cpu,
                     itb=cpu.mmu.itb,

From cb15939bdf9ded57434bf880a1e0e303eb01df78 Mon Sep 17 00:00:00 2001
From: Giacomo Travaglini <giacomo.travaglini@arm.com>
Date: Fri, 3 Feb 2023 15:03:06 +0100
Subject: [PATCH 385/492] arch-arm: Replace 0ing of miscRegs with assignment of
 reset value

The reset variable in the MiscRegLUTEntry class defines the per-register
reset value. Rather than simply zeroing the misc registers we should
assign them their reset value when clearing them.

As of now the reset variable is unused so using it is functionally
equivalent of calling memset. This will however change once we start
using the reset field

Signed-off-by: Giacomo Travaglini <giacomo.travaglini@arm.com>
Change-Id: If352501738729927c1c9b300e5b0b8c27ce41b79
Reviewed-by: Richard Cooper <richard.cooper@arm.com>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/70457
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
---
 src/arch/arm/isa.cc | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/arch/arm/isa.cc b/src/arch/arm/isa.cc
index aec824387b..5a0dec58f4 100644
--- a/src/arch/arm/isa.cc
+++ b/src/arch/arm/isa.cc
@@ -141,7 +141,9 @@ ISA::clear()
     }
 
     SCTLR sctlr_rst = miscRegs[MISCREG_SCTLR_RST];
-    memset(miscRegs, 0, sizeof(miscRegs));
+    for (auto idx = 0; idx < NUM_MISCREGS; idx++) {
+        miscRegs[idx] = lookUpMiscReg[idx].reset();
+    }
 
     initID32(p);
 

From 72f08cca89f0abeae70618521925568483960b05 Mon Sep 17 00:00:00 2001
From: Giacomo Travaglini <giacomo.travaglini@arm.com>
Date: Fri, 3 Feb 2023 15:08:56 +0100
Subject: [PATCH 386/492] arch-arm: Make MISCREGs reset value configurable

Signed-off-by: Giacomo Travaglini <giacomo.travaglini@arm.com>
Change-Id: I536065a2de5faeb8ab64391f8ca2aa83fb2cc82f
Reviewed-by: Richard Cooper <richard.cooper@arm.com>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/70458
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/arch/arm/regs/misc.hh | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/arch/arm/regs/misc.hh b/src/arch/arm/regs/misc.hh
index bf25ea3144..69d1461ee6 100644
--- a/src/arch/arm/regs/misc.hh
+++ b/src/arch/arm/regs/misc.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2010-2022 Arm Limited
+ * Copyright (c) 2010-2023 Arm Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -1231,6 +1231,12 @@ namespace ArmISA
             return *this;
         }
         chain
+        reset(uint64_t res_val) const
+        {
+            entry._reset = res_val;
+            return *this;
+        }
+        chain
         res0(uint64_t mask) const
         {
             entry._res0 = mask;

From acdf3a63de84cc31025af4ae3976347995e49fd5 Mon Sep 17 00:00:00 2001
From: Giacomo Travaglini <giacomo.travaglini@arm.com>
Date: Fri, 3 Feb 2023 15:00:42 +0100
Subject: [PATCH 387/492] arch-arm: Generalize SCTLR_RST behaviour

This is supposed to be employed as a reset value for SCTLR.
Rather than implementing this misc reg specific feature, we
provide a more general logic for changing the reset value
of any register.

Signed-off-by: Giacomo Travaglini <giacomo.travaglini@arm.com>
Change-Id: Ib61019ec499b35382289fe18740c90eee5de4907
Reviewed-by: Richard Cooper <richard.cooper@arm.com>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/70459
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
---
 src/arch/arm/isa.cc       | 27 +++++++++++++++++----------
 src/arch/arm/isa.hh       |  7 +++++--
 src/arch/arm/regs/misc.cc |  2 --
 src/arch/arm/regs/misc.hh |  2 --
 4 files changed, 22 insertions(+), 16 deletions(-)

diff --git a/src/arch/arm/isa.cc b/src/arch/arm/isa.cc
index 5a0dec58f4..b0a856e4b0 100644
--- a/src/arch/arm/isa.cc
+++ b/src/arch/arm/isa.cc
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2010-2022 Arm Limited
+ * Copyright (c) 2010-2023 Arm Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -91,8 +91,6 @@ ISA::ISA(const Params &p) : BaseISA(p), system(NULL),
     _regClasses.push_back(&ccRegClass);
     _regClasses.push_back(&miscRegClass);
 
-    miscRegs[MISCREG_SCTLR_RST] = 0;
-
     // Hook up a dummy device if we haven't been configured with a
     // real PMU. By using a dummy device, we don't need to check that
     // the PMU exist every time we try to access a PMU register.
@@ -140,7 +138,6 @@ ISA::clear()
         getMMUPtr(tc)->invalidateMiscReg();
     }
 
-    SCTLR sctlr_rst = miscRegs[MISCREG_SCTLR_RST];
     for (auto idx = 0; idx < NUM_MISCREGS; idx++) {
         miscRegs[idx] = lookUpMiscReg[idx].reset();
     }
@@ -221,11 +218,11 @@ ISA::clear()
     }
 
     // Initialize AArch32 state...
-    clear32(p, sctlr_rst);
+    clear32(p);
 }
 
 void
-ISA::clear32(const ArmISAParams &p, const SCTLR &sctlr_rst)
+ISA::clear32(const ArmISAParams &p)
 {
     CPSR cpsr = 0;
     cpsr.mode = MODE_USER;
@@ -238,9 +235,6 @@ ISA::clear32(const ArmISAParams &p, const SCTLR &sctlr_rst)
     updateRegMap(cpsr);
 
     SCTLR sctlr = 0;
-    sctlr.te = (bool) sctlr_rst.te;
-    sctlr.nmfi = (bool) sctlr_rst.nmfi;
-    sctlr.v = (bool) sctlr_rst.v;
     sctlr.u = 1;
     sctlr.xp = 1;
     sctlr.rao2 = 1;
@@ -249,7 +243,6 @@ ISA::clear32(const ArmISAParams &p, const SCTLR &sctlr_rst)
     sctlr.uci = 1;
     sctlr.dze = 1;
     miscRegs[MISCREG_SCTLR_NS] = sctlr;
-    miscRegs[MISCREG_SCTLR_RST] = sctlr_rst;
     miscRegs[MISCREG_HCPTR] = 0;
 
     miscRegs[MISCREG_CPACR] = 0;
@@ -2084,6 +2077,20 @@ ISA::setMiscReg(RegIndex idx, RegVal val)
     }
 }
 
+RegVal
+ISA::readMiscRegReset(RegIndex idx) const
+{
+    int flat_idx = flattenMiscIndex(idx);
+    return lookUpMiscReg[flat_idx].reset();
+}
+
+void
+ISA::setMiscRegReset(RegIndex idx, RegVal val)
+{
+    int flat_idx = flattenMiscIndex(idx);
+    InitReg(flat_idx).reset(val);
+}
+
 BaseISADevice &
 ISA::getGenericTimer()
 {
diff --git a/src/arch/arm/isa.hh b/src/arch/arm/isa.hh
index 512799feed..5dd1b38b28 100644
--- a/src/arch/arm/isa.hh
+++ b/src/arch/arm/isa.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2010, 2012-2022 ARM Limited
+ * Copyright (c) 2010, 2012-2023 Arm Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -170,7 +170,7 @@ namespace ArmISA
         void clear() override;
 
       protected:
-        void clear32(const ArmISAParams &p, const SCTLR &sctlr_rst);
+        void clear32(const ArmISAParams &p);
         void clear64(const ArmISAParams &p);
         void initID32(const ArmISAParams &p);
         void initID64(const ArmISAParams &p);
@@ -201,6 +201,9 @@ namespace ArmISA
         void setMiscRegNoEffect(RegIndex idx, RegVal val) override;
         void setMiscReg(RegIndex, RegVal val) override;
 
+        RegVal readMiscRegReset(RegIndex) const;
+        void setMiscRegReset(RegIndex, RegVal val);
+
         int
         flattenMiscIndex(int reg) const
         {
diff --git a/src/arch/arm/regs/misc.cc b/src/arch/arm/regs/misc.cc
index 382b63e8bd..e984164fe8 100644
--- a/src/arch/arm/regs/misc.cc
+++ b/src/arch/arm/regs/misc.cc
@@ -2196,8 +2196,6 @@ ISA::initializeMiscRegMetadata()
       .bankedChild();
     InitReg(MISCREG_PMXEVTYPER_PMCCFILTR)
       .mutex();
-    InitReg(MISCREG_SCTLR_RST)
-      .allPrivileges();
     InitReg(MISCREG_SEV_MAILBOX)
       .allPrivileges();
     InitReg(MISCREG_TLBINEEDSYNC)
diff --git a/src/arch/arm/regs/misc.hh b/src/arch/arm/regs/misc.hh
index 69d1461ee6..265a697781 100644
--- a/src/arch/arm/regs/misc.hh
+++ b/src/arch/arm/regs/misc.hh
@@ -93,7 +93,6 @@ namespace ArmISA
         MISCREG_NMRR_MAIR1_NS,
         MISCREG_NMRR_MAIR1_S,
         MISCREG_PMXEVTYPER_PMCCFILTR,
-        MISCREG_SCTLR_RST,
         MISCREG_SEV_MAILBOX,
         MISCREG_TLBINEEDSYNC,
 
@@ -1752,7 +1751,6 @@ namespace ArmISA
         "nmrr_mair1_ns",
         "nmrr_mair1_s",
         "pmxevtyper_pmccfiltr",
-        "sctlr_rst",
         "sev_mailbox",
         "tlbi_needsync",
 

From 0d1161c56ec6907eb1f0267cab4ae5488fa289c8 Mon Sep 17 00:00:00 2001
From: Giacomo Travaglini <giacomo.travaglini@arm.com>
Date: Fri, 3 Feb 2023 16:02:45 +0100
Subject: [PATCH 388/492] arch-arm: Map MVFR0_EL1/MVFR1_EL1 to AArch32 version

Signed-off-by: Giacomo Travaglini <giacomo.travaglini@arm.com>
Change-Id: I28753de7b437be58e5ac891ac2e549bbab6b53b0
Reviewed-by: Richard Cooper <richard.cooper@arm.com>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/70460
Tested-by: kokoro <noreply+kokoro@google.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
---
 src/arch/arm/regs/misc.cc | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/arch/arm/regs/misc.cc b/src/arch/arm/regs/misc.cc
index e984164fe8..4221a15aa6 100644
--- a/src/arch/arm/regs/misc.cc
+++ b/src/arch/arm/regs/misc.cc
@@ -3579,10 +3579,12 @@ ISA::initializeMiscRegMetadata()
       .mapsTo(MISCREG_ID_ISAR6);
     InitReg(MISCREG_MVFR0_EL1)
       .faultRead(EL1, HCR_TRAP(tid3))
-      .allPrivileges().exceptUserMode().writes(0);
+      .allPrivileges().exceptUserMode().writes(0)
+      .mapsTo(MISCREG_MVFR0);
     InitReg(MISCREG_MVFR1_EL1)
       .faultRead(EL1, HCR_TRAP(tid3))
-      .allPrivileges().exceptUserMode().writes(0);
+      .allPrivileges().exceptUserMode().writes(0)
+      .mapsTo(MISCREG_MVFR1);
     InitReg(MISCREG_MVFR2_EL1)
       .faultRead(EL1, HCR_TRAP(tid3))
       .allPrivileges().exceptUserMode().writes(0);

From 9dcafac2e7ce98408ee68d2317decd7d64f35989 Mon Sep 17 00:00:00 2001
From: Giacomo Travaglini <giacomo.travaglini@arm.com>
Date: Fri, 3 Feb 2023 19:18:55 +0100
Subject: [PATCH 389/492] arch-arm: Map MIDR_EL1 to AArch32 version

Signed-off-by: Giacomo Travaglini <giacomo.travaglini@arm.com>
Change-Id: Id3ddc18ebfc296389bed6dc7615899bef83178ea
Reviewed-by: Richard Cooper <richard.cooper@arm.com>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/70461
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
---
 src/arch/arm/isa.cc       | 1 -
 src/arch/arm/regs/misc.cc | 3 ++-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/arch/arm/isa.cc b/src/arch/arm/isa.cc
index b0a856e4b0..65d8b97404 100644
--- a/src/arch/arm/isa.cc
+++ b/src/arch/arm/isa.cc
@@ -334,7 +334,6 @@ ISA::initID32(const ArmISAParams &p)
         midr = 0x410fc0f0;
 
     miscRegs[MISCREG_MIDR] = midr;
-    miscRegs[MISCREG_MIDR_EL1] = midr;
     miscRegs[MISCREG_VPIDR] = midr;
 
     miscRegs[MISCREG_ID_ISAR0] = p.id_isar0;
diff --git a/src/arch/arm/regs/misc.cc b/src/arch/arm/regs/misc.cc
index 4221a15aa6..000124c7ad 100644
--- a/src/arch/arm/regs/misc.cc
+++ b/src/arch/arm/regs/misc.cc
@@ -3507,7 +3507,8 @@ ISA::initializeMiscRegMetadata()
 
     // AArch64 registers (Op0=1,3);
     InitReg(MISCREG_MIDR_EL1)
-      .allPrivileges().exceptUserMode().writes(0);
+      .allPrivileges().exceptUserMode().writes(0)
+      .mapsTo(MISCREG_MIDR);
     InitReg(MISCREG_MPIDR_EL1)
       .allPrivileges().exceptUserMode().writes(0);
     InitReg(MISCREG_REVIDR_EL1)

From 1aa8f14f2cef8c52bb8ca529593d013713d7886a Mon Sep 17 00:00:00 2001
From: Giacomo Travaglini <giacomo.travaglini@arm.com>
Date: Tue, 7 Feb 2023 09:25:23 +0000
Subject: [PATCH 390/492] arch-arm: Map MPIDR_EL1 to AArch32 version

As of now the mapping is not actually needed:
the MPIDR and MPIDR_EL1 registes are both read using the same helper
(readMPIDR). In the future we could store the getMPIDR result in the
AArch32 version without the need to re-calculate the fix affinity
numbers

Change-Id: Id42d1994cdd1722f07874ffa7364154cf011e00a
Signed-off-by: Giacomo Travaglini <giacomo.travaglini@arm.com>
Reviewed-by: Richard Cooper <richard.cooper@arm.com>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/70462
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/arch/arm/regs/misc.cc | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/arch/arm/regs/misc.cc b/src/arch/arm/regs/misc.cc
index 000124c7ad..706716eb1c 100644
--- a/src/arch/arm/regs/misc.cc
+++ b/src/arch/arm/regs/misc.cc
@@ -3510,7 +3510,8 @@ ISA::initializeMiscRegMetadata()
       .allPrivileges().exceptUserMode().writes(0)
       .mapsTo(MISCREG_MIDR);
     InitReg(MISCREG_MPIDR_EL1)
-      .allPrivileges().exceptUserMode().writes(0);
+      .allPrivileges().exceptUserMode().writes(0)
+      .mapsTo(MISCREG_MPIDR);
     InitReg(MISCREG_REVIDR_EL1)
       .faultRead(EL1, HCR_TRAP(tid1))
       .allPrivileges().exceptUserMode().writes(0);

From 89483caebd1c055bbbcc1f4411e12602483b52be Mon Sep 17 00:00:00 2001
From: Giacomo Travaglini <giacomo.travaglini@arm.com>
Date: Tue, 7 Feb 2023 10:05:41 +0000
Subject: [PATCH 391/492] arch-arm: Map CTR_EL0 to AArch32 version

Change-Id: Ia3e0cafa1bd2a3054b286e79ac378c895d6910e8
Signed-off-by: Giacomo Travaglini <giacomo.travaglini@arm.com>
Reviewed-by: Richard Cooper <richard.cooper@arm.com>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/70463
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
---
 src/arch/arm/regs/misc.cc | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/arch/arm/regs/misc.cc b/src/arch/arm/regs/misc.cc
index 706716eb1c..a31b6dec57 100644
--- a/src/arch/arm/regs/misc.cc
+++ b/src/arch/arm/regs/misc.cc
@@ -3681,7 +3681,8 @@ ISA::initializeMiscRegMetadata()
     InitReg(MISCREG_CTR_EL0)
       .faultRead(EL0, faultCtrEL0)
       .faultRead(EL1, HCR_TRAP(tid2))
-      .reads(1);
+      .reads(1)
+      .mapsTo(MISCREG_CTR);
     InitReg(MISCREG_DCZID_EL0)
       .reads(1);
     InitReg(MISCREG_VPIDR_EL2)

From ec491446f36c5a445243c79abecbb3605fc4f745 Mon Sep 17 00:00:00 2001
From: Giacomo Travaglini <giacomo.travaglini@arm.com>
Date: Fri, 3 Feb 2023 19:22:44 +0100
Subject: [PATCH 392/492] arch-arm: Fix read redirection for MIDR register

This patch is fixing read redirection for the MIDR register
in the following ways:

1) Is allowing a virtualization of the register (via VPIDR)
even in secure mode (available with FEAT_SEL2)
2) Is extending this logic to the AArch64 version (MIDR_EL1)

It is also rewriting the base logic using Armv8 terminology
(checking the EL rather than the mode as an example).

Signed-off-by: Giacomo Travaglini <giacomo.travaglini@arm.com>
Change-Id: I5cf09240206287cab877ea7ff6e46cf823aa8c35
Reviewed-by: Richard Cooper <richard.cooper@arm.com>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/70464
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/arch/arm/isa.cc | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/src/arch/arm/isa.cc b/src/arch/arm/isa.cc
index 65d8b97404..da23e0b8ce 100644
--- a/src/arch/arm/isa.cc
+++ b/src/arch/arm/isa.cc
@@ -864,12 +864,11 @@ ISA::readMiscReg(RegIndex idx)
       case MISCREG_ID_AFR0: // not implemented, so alias MIDR
       case MISCREG_REVIDR:  // not implemented, so alias MIDR
       case MISCREG_MIDR:
-        cpsr = readMiscRegNoEffect(MISCREG_CPSR);
-        scr  = readMiscRegNoEffect(MISCREG_SCR_EL3);
-        if ((cpsr.mode == MODE_HYP) || isSecure(tc)) {
-            return readMiscRegNoEffect(idx);
-        } else {
+      case MISCREG_MIDR_EL1:
+        if (currEL() == EL1 && EL2Enabled(tc)) {
             return readMiscRegNoEffect(MISCREG_VPIDR);
+        } else {
+            return readMiscRegNoEffect(idx);
         }
         break;
       case MISCREG_JOSCR: // Jazelle trivial implementation, RAZ/WI

From c3769affa4ab8b3e16ada6105daefe3d22716d54 Mon Sep 17 00:00:00 2001
From: Giacomo Travaglini <giacomo.travaglini@arm.com>
Date: Fri, 3 Feb 2023 15:38:59 +0100
Subject: [PATCH 393/492] arch-arm: Move MISCREG init logic from ISA to reset
 field

Rather than recomputing the reset value every time a system
reset happens (and the ISA::clear method gets called), we
calculate it once and construction time.

We when simply apply the pre-computed reset value to the miscReg
storage, as implemented by a previous patch [1]

[1]: Change-Id: If352501738729927c1c9b300e5b0b8c27ce41b79

Signed-off-by: Giacomo Travaglini <giacomo.travaglini@arm.com>
Change-Id: Iecffff4878217c38707be4ce7d4746ff95a208b4
Reviewed-by: Richard Cooper <richard.cooper@arm.com>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/70465
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
---
 src/arch/arm/isa.cc       | 61 ---------------------------------------
 src/arch/arm/regs/misc.cc | 56 ++++++++++++++++++++++++++++++++++-
 2 files changed, 55 insertions(+), 62 deletions(-)

diff --git a/src/arch/arm/isa.cc b/src/arch/arm/isa.cc
index da23e0b8ce..a66a938606 100644
--- a/src/arch/arm/isa.cc
+++ b/src/arch/arm/isa.cc
@@ -150,67 +150,6 @@ ISA::clear()
     // AArch32 or AArch64
     initID64(p);
 
-    // Start with an event in the mailbox
-    miscRegs[MISCREG_SEV_MAILBOX] = 1;
-
-    // Separate Instruction and Data TLBs
-    miscRegs[MISCREG_TLBTR] = 1;
-
-    MVFR0 mvfr0 = 0;
-    mvfr0.advSimdRegisters = 2;
-    mvfr0.singlePrecision = 2;
-    mvfr0.doublePrecision = 2;
-    mvfr0.vfpExceptionTrapping = 0;
-    mvfr0.divide = 1;
-    mvfr0.squareRoot = 1;
-    mvfr0.shortVectors = 1;
-    mvfr0.roundingModes = 1;
-    miscRegs[MISCREG_MVFR0] = mvfr0;
-
-    MVFR1 mvfr1 = 0;
-    mvfr1.flushToZero = 1;
-    mvfr1.defaultNaN = 1;
-    mvfr1.advSimdLoadStore = 1;
-    mvfr1.advSimdInteger = 1;
-    mvfr1.advSimdSinglePrecision = 1;
-    mvfr1.advSimdHalfPrecision = 1;
-    mvfr1.vfpHalfPrecision = 1;
-    miscRegs[MISCREG_MVFR1] = mvfr1;
-
-    // Reset values of PRRR and NMRR are implementation dependent
-
-    // @todo: PRRR and NMRR in secure state?
-    miscRegs[MISCREG_PRRR_NS] =
-        (1 << 19) | // 19
-        (0 << 18) | // 18
-        (0 << 17) | // 17
-        (1 << 16) | // 16
-        (2 << 14) | // 15:14
-        (0 << 12) | // 13:12
-        (2 << 10) | // 11:10
-        (2 << 8)  | // 9:8
-        (2 << 6)  | // 7:6
-        (2 << 4)  | // 5:4
-        (1 << 2)  | // 3:2
-        0;          // 1:0
-
-    miscRegs[MISCREG_NMRR_NS] =
-        (1 << 30) | // 31:30
-        (0 << 26) | // 27:26
-        (0 << 24) | // 25:24
-        (3 << 22) | // 23:22
-        (2 << 20) | // 21:20
-        (0 << 18) | // 19:18
-        (0 << 16) | // 17:16
-        (1 << 14) | // 15:14
-        (0 << 12) | // 13:12
-        (2 << 10) | // 11:10
-        (0 << 8)  | // 9:8
-        (3 << 6)  | // 7:6
-        (2 << 4)  | // 5:4
-        (0 << 2)  | // 3:2
-        0;          // 1:0
-
     if (FullSystem && system->highestELIs64()) {
         // Initialize AArch64 state
         clear64(p);
diff --git a/src/arch/arm/regs/misc.cc b/src/arch/arm/regs/misc.cc
index a31b6dec57..3b23a204c6 100644
--- a/src/arch/arm/regs/misc.cc
+++ b/src/arch/arm/regs/misc.cc
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2010-2013, 2015-2022 Arm Limited
+ * Copyright (c) 2010-2013, 2015-2023 Arm Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -2155,8 +2155,31 @@ ISA::initializeMiscRegMetadata()
     InitReg(MISCREG_FPSCR)
       .allPrivileges();
     InitReg(MISCREG_MVFR1)
+      .reset([] () {
+        MVFR1 mvfr1 = 0;
+        mvfr1.flushToZero = 1;
+        mvfr1.defaultNaN = 1;
+        mvfr1.advSimdLoadStore = 1;
+        mvfr1.advSimdInteger = 1;
+        mvfr1.advSimdSinglePrecision = 1;
+        mvfr1.advSimdHalfPrecision = 1;
+        mvfr1.vfpHalfPrecision = 1;
+        return mvfr1;
+      }())
       .allPrivileges();
     InitReg(MISCREG_MVFR0)
+      .reset([] () {
+        MVFR0 mvfr0 = 0;
+        mvfr0.advSimdRegisters = 2;
+        mvfr0.singlePrecision = 2;
+        mvfr0.doublePrecision = 2;
+        mvfr0.vfpExceptionTrapping = 0;
+        mvfr0.divide = 1;
+        mvfr0.squareRoot = 1;
+        mvfr0.shortVectors = 1;
+        mvfr0.roundingModes = 1;
+        return mvfr0;
+      }())
       .allPrivileges();
     InitReg(MISCREG_FPEXC)
       .allPrivileges();
@@ -2197,6 +2220,7 @@ ISA::initializeMiscRegMetadata()
     InitReg(MISCREG_PMXEVTYPER_PMCCFILTR)
       .mutex();
     InitReg(MISCREG_SEV_MAILBOX)
+      .reset(1) // Start with an event in the mailbox
       .allPrivileges();
     InitReg(MISCREG_TLBINEEDSYNC)
       .allPrivileges().exceptUserMode();
@@ -2447,6 +2471,7 @@ ISA::initializeMiscRegMetadata()
     InitReg(MISCREG_TCMTR)
       .allPrivileges().exceptUserMode().writes(0);
     InitReg(MISCREG_TLBTR)
+      .reset(1) // Separate Instruction and Data TLBs
       .allPrivileges().exceptUserMode().writes(0);
     InitReg(MISCREG_MPIDR)
       .allPrivileges().exceptUserMode().writes(0);
@@ -2870,6 +2895,19 @@ ISA::initializeMiscRegMetadata()
       .banked();
     InitReg(MISCREG_PRRR_NS)
       .bankedChild()
+      .reset(
+        (1 << 19) | // 19
+        (0 << 18) | // 18
+        (0 << 17) | // 17
+        (1 << 16) | // 16
+        (2 << 14) | // 15:14
+        (0 << 12) | // 13:12
+        (2 << 10) | // 11:10
+        (2 << 8)  | // 9:8
+        (2 << 6)  | // 7:6
+        (2 << 4)  | // 5:4
+        (1 << 2)  | // 3:2
+        0)
       .privSecure(!aarch32EL3)
       .nonSecure().exceptUserMode();
     InitReg(MISCREG_PRRR_S)
@@ -2888,6 +2926,22 @@ ISA::initializeMiscRegMetadata()
       .banked();
     InitReg(MISCREG_NMRR_NS)
       .bankedChild()
+      .reset(
+        (1 << 30) | // 31:30
+        (0 << 26) | // 27:26
+        (0 << 24) | // 25:24
+        (3 << 22) | // 23:22
+        (2 << 20) | // 21:20
+        (0 << 18) | // 19:18
+        (0 << 16) | // 17:16
+        (1 << 14) | // 15:14
+        (0 << 12) | // 13:12
+        (2 << 10) | // 11:10
+        (0 << 8)  | // 9:8
+        (3 << 6)  | // 7:6
+        (2 << 4)  | // 5:4
+        (0 << 2)  | // 3:2
+        0)
       .privSecure(!aarch32EL3)
       .nonSecure().exceptUserMode();
     InitReg(MISCREG_NMRR_S)

From 7abece9d0f5c2e1479d2dbb033cbeacaa060da84 Mon Sep 17 00:00:00 2001
From: Giacomo Travaglini <giacomo.travaglini@arm.com>
Date: Mon, 6 Feb 2023 08:49:02 +0000
Subject: [PATCH 394/492] arch-arm: Rewrite ISA::initID32 using BitUnions

Signed-off-by: Giacomo Travaglini <giacomo.travaglini@arm.com>
Change-Id: I38460766bb5ed363b176bc6faca8e770a8a5e4c6
Reviewed-by: Richard Cooper <richard.cooper@arm.com>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/70466
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
---
 src/arch/arm/isa.cc             | 37 ++++++++++++++++-----------------
 src/arch/arm/regs/misc_types.hh | 21 +++++++++++++++++++
 2 files changed, 39 insertions(+), 19 deletions(-)

diff --git a/src/arch/arm/isa.cc b/src/arch/arm/isa.cc
index a66a938606..4033d0ff4f 100644
--- a/src/arch/arm/isa.cc
+++ b/src/arch/arm/isa.cc
@@ -280,8 +280,6 @@ ISA::initID32(const ArmISAParams &p)
     miscRegs[MISCREG_ID_ISAR2] = p.id_isar2;
     miscRegs[MISCREG_ID_ISAR3] = p.id_isar3;
     miscRegs[MISCREG_ID_ISAR4] = p.id_isar4;
-    miscRegs[MISCREG_ID_ISAR5] = p.id_isar5;
-    miscRegs[MISCREG_ID_ISAR6] = p.id_isar6;
 
     miscRegs[MISCREG_ID_MMFR0] = p.id_mmfr0;
     miscRegs[MISCREG_ID_MMFR1] = p.id_mmfr1;
@@ -289,24 +287,25 @@ ISA::initID32(const ArmISAParams &p)
     miscRegs[MISCREG_ID_MMFR3] = p.id_mmfr3;
     miscRegs[MISCREG_ID_MMFR4] = p.id_mmfr4;
 
-    /** MISCREG_ID_ISAR5 */
-    // Crypto
-    miscRegs[MISCREG_ID_ISAR5] = insertBits(
-        miscRegs[MISCREG_ID_ISAR5], 19, 4,
-        release->has(ArmExtension::CRYPTO) ? 0x1112 : 0x0);
-    // RDM
-    miscRegs[MISCREG_ID_ISAR5] = insertBits(
-        miscRegs[MISCREG_ID_ISAR5], 27, 24,
-        release->has(ArmExtension::FEAT_RDM) ? 0x1 : 0x0);
-    // FCMA
-    miscRegs[MISCREG_ID_ISAR5] = insertBits(
-        miscRegs[MISCREG_ID_ISAR5], 31, 28,
-        release->has(ArmExtension::FEAT_FCMA) ? 0x1 : 0x0);
+    ISAR5 isar5 = p.id_isar5;
+    if (release->has(ArmExtension::CRYPTO)) {
+        isar5.crc32 = 1;
+        isar5.sha2 = 1;
+        isar5.sha1 = 1;
+        isar5.aes = 2;
+    } else {
+        isar5.crc32 = 0;
+        isar5.sha2 = 0;
+        isar5.sha1 = 0;
+        isar5.aes = 0;
+    }
+    isar5.rdm = release->has(ArmExtension::FEAT_RDM) ? 0x1 : 0x0;
+    isar5.vcma = release->has(ArmExtension::FEAT_FCMA) ? 0x1 : 0x0;
+    miscRegs[MISCREG_ID_ISAR5] = isar5;
 
-    /** ID_ISAR6 */
-    miscRegs[MISCREG_ID_ISAR6] = insertBits(
-        miscRegs[MISCREG_ID_ISAR6], 3, 0,
-        release->has(ArmExtension::FEAT_JSCVT) ? 0x1 : 0x0);
+    ISAR6 isar6 = p.id_isar6;
+    isar6.jscvt = release->has(ArmExtension::FEAT_JSCVT) ? 0x1 : 0x0;
+    miscRegs[MISCREG_ID_ISAR6] = isar6;
 }
 
 void
diff --git a/src/arch/arm/regs/misc_types.hh b/src/arch/arm/regs/misc_types.hh
index 0fe5a00ae8..9af5337a33 100644
--- a/src/arch/arm/regs/misc_types.hh
+++ b/src/arch/arm/regs/misc_types.hh
@@ -75,6 +75,27 @@ namespace ArmISA
         Bitfield<0> sp;         // AArch64
     EndBitUnion(CPSR)
 
+    BitUnion32(ISAR5)
+        Bitfield<31, 28> vcma;
+        Bitfield<27, 24> rdm;
+        Bitfield<19, 16> crc32;
+        Bitfield<15, 12> sha2;
+        Bitfield<11, 8> sha1;
+        Bitfield<7, 4> aes;
+        Bitfield<3, 0> sevl;
+    EndBitUnion(ISAR5)
+
+    BitUnion32(ISAR6)
+        Bitfield<31, 28> clrbhb;
+        Bitfield<27, 24> i8mm;
+        Bitfield<23, 20> bf16;
+        Bitfield<19, 16> specres;
+        Bitfield<15, 12> sb;
+        Bitfield<11, 8> fhm;
+        Bitfield<7, 4> dp;
+        Bitfield<3, 0> jscvt;
+    EndBitUnion(ISAR6)
+
     BitUnion64(AA64DFR0)
         Bitfield<43, 40> tracefilt;
         Bitfield<39, 36> doublelock;

From b7c16f0dad2e9e36af1f435c74095a92177a9dc0 Mon Sep 17 00:00:00 2001
From: Giacomo Travaglini <giacomo.travaglini@arm.com>
Date: Fri, 3 Feb 2023 19:38:24 +0100
Subject: [PATCH 395/492] arch-arm: Remove ISA::initID32

Signed-off-by: Giacomo Travaglini <giacomo.travaglini@arm.com>
Change-Id: I70cce0b9d99ed5fe146e64c6ee55fa8cedf98ac6
Reviewed-by: Richard Cooper <richard.cooper@arm.com>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/70467
Tested-by: kokoro <noreply+kokoro@google.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
---
 src/arch/arm/isa.cc       | 53 ---------------------------------------
 src/arch/arm/isa.hh       |  1 -
 src/arch/arm/regs/misc.cc | 46 +++++++++++++++++++++++++++++++++
 3 files changed, 46 insertions(+), 54 deletions(-)

diff --git a/src/arch/arm/isa.cc b/src/arch/arm/isa.cc
index 4033d0ff4f..8424db582a 100644
--- a/src/arch/arm/isa.cc
+++ b/src/arch/arm/isa.cc
@@ -142,8 +142,6 @@ ISA::clear()
         miscRegs[idx] = lookUpMiscReg[idx].reset();
     }
 
-    initID32(p);
-
     // We always initialize AArch64 ID registers even
     // if we are in AArch32. This is done since if we
     // are in SE mode we don't know if our ArmProcess is
@@ -257,57 +255,6 @@ ISA::clear64(const ArmISAParams &p)
     }
 }
 
-void
-ISA::initID32(const ArmISAParams &p)
-{
-    // Initialize configurable default values
-
-    uint32_t midr;
-    if (p.midr != 0x0)
-        midr = p.midr;
-    else if (highestELIs64)
-        // Cortex-A57 TRM r0p0 MIDR
-        midr = 0x410fd070;
-    else
-        // Cortex-A15 TRM r0p0 MIDR
-        midr = 0x410fc0f0;
-
-    miscRegs[MISCREG_MIDR] = midr;
-    miscRegs[MISCREG_VPIDR] = midr;
-
-    miscRegs[MISCREG_ID_ISAR0] = p.id_isar0;
-    miscRegs[MISCREG_ID_ISAR1] = p.id_isar1;
-    miscRegs[MISCREG_ID_ISAR2] = p.id_isar2;
-    miscRegs[MISCREG_ID_ISAR3] = p.id_isar3;
-    miscRegs[MISCREG_ID_ISAR4] = p.id_isar4;
-
-    miscRegs[MISCREG_ID_MMFR0] = p.id_mmfr0;
-    miscRegs[MISCREG_ID_MMFR1] = p.id_mmfr1;
-    miscRegs[MISCREG_ID_MMFR2] = p.id_mmfr2;
-    miscRegs[MISCREG_ID_MMFR3] = p.id_mmfr3;
-    miscRegs[MISCREG_ID_MMFR4] = p.id_mmfr4;
-
-    ISAR5 isar5 = p.id_isar5;
-    if (release->has(ArmExtension::CRYPTO)) {
-        isar5.crc32 = 1;
-        isar5.sha2 = 1;
-        isar5.sha1 = 1;
-        isar5.aes = 2;
-    } else {
-        isar5.crc32 = 0;
-        isar5.sha2 = 0;
-        isar5.sha1 = 0;
-        isar5.aes = 0;
-    }
-    isar5.rdm = release->has(ArmExtension::FEAT_RDM) ? 0x1 : 0x0;
-    isar5.vcma = release->has(ArmExtension::FEAT_FCMA) ? 0x1 : 0x0;
-    miscRegs[MISCREG_ID_ISAR5] = isar5;
-
-    ISAR6 isar6 = p.id_isar6;
-    isar6.jscvt = release->has(ArmExtension::FEAT_JSCVT) ? 0x1 : 0x0;
-    miscRegs[MISCREG_ID_ISAR6] = isar6;
-}
-
 void
 ISA::initID64(const ArmISAParams &p)
 {
diff --git a/src/arch/arm/isa.hh b/src/arch/arm/isa.hh
index 5dd1b38b28..841964113b 100644
--- a/src/arch/arm/isa.hh
+++ b/src/arch/arm/isa.hh
@@ -172,7 +172,6 @@ namespace ArmISA
       protected:
         void clear32(const ArmISAParams &p);
         void clear64(const ArmISAParams &p);
-        void initID32(const ArmISAParams &p);
         void initID64(const ArmISAParams &p);
 
         void addressTranslation(MMU::ArmTranslationType tran_type,
diff --git a/src/arch/arm/regs/misc.cc b/src/arch/arm/regs/misc.cc
index 3b23a204c6..f5e2502338 100644
--- a/src/arch/arm/regs/misc.cc
+++ b/src/arch/arm/regs/misc.cc
@@ -45,6 +45,7 @@
 #include "cpu/thread_context.hh"
 #include "dev/arm/gic_v3_cpu_interface.hh"
 #include "sim/full_system.hh"
+#include "params/ArmISA.hh"
 
 namespace gem5
 {
@@ -2116,6 +2117,17 @@ ISA::initializeMiscRegMetadata()
     const bool vhe_implemented = release->has(ArmExtension::FEAT_VHE);
     const bool sel2_implemented = release->has(ArmExtension::FEAT_SEL2);
 
+    const Params &p(params());
+
+    uint32_t midr;
+    if (p.midr != 0x0)
+        midr = p.midr;
+    else if (highestELIs64)
+        // Cortex-A57 TRM r0p0 MIDR
+        midr = 0x410fd070;
+    else
+        // Cortex-A15 TRM r0p0 MIDR
+        midr = 0x410fc0f0;
 
     /**
      * Some registers alias with others, and therefore need to be translated.
@@ -2465,6 +2477,7 @@ ISA::initializeMiscRegMetadata()
 
     // AArch32 CP15 registers
     InitReg(MISCREG_MIDR)
+      .reset(midr)
       .allPrivileges().exceptUserMode().writes(0);
     InitReg(MISCREG_CTR)
       .allPrivileges().exceptUserMode().writes(0);
@@ -2488,28 +2501,60 @@ ISA::initializeMiscRegMetadata()
     InitReg(MISCREG_ID_AFR0)
       .allPrivileges().exceptUserMode().writes(0);
     InitReg(MISCREG_ID_MMFR0)
+      .reset(p.id_mmfr0)
       .allPrivileges().exceptUserMode().writes(0);
     InitReg(MISCREG_ID_MMFR1)
+      .reset(p.id_mmfr1)
       .allPrivileges().exceptUserMode().writes(0);
     InitReg(MISCREG_ID_MMFR2)
+      .reset(p.id_mmfr2)
       .allPrivileges().exceptUserMode().writes(0);
     InitReg(MISCREG_ID_MMFR3)
+      .reset(p.id_mmfr3)
       .allPrivileges().exceptUserMode().writes(0);
     InitReg(MISCREG_ID_MMFR4)
+      .reset(p.id_mmfr4)
       .allPrivileges().exceptUserMode().writes(0);
     InitReg(MISCREG_ID_ISAR0)
+      .reset(p.id_isar0)
       .allPrivileges().exceptUserMode().writes(0);
     InitReg(MISCREG_ID_ISAR1)
+      .reset(p.id_isar1)
       .allPrivileges().exceptUserMode().writes(0);
     InitReg(MISCREG_ID_ISAR2)
+      .reset(p.id_isar2)
       .allPrivileges().exceptUserMode().writes(0);
     InitReg(MISCREG_ID_ISAR3)
+      .reset(p.id_isar3)
       .allPrivileges().exceptUserMode().writes(0);
     InitReg(MISCREG_ID_ISAR4)
+      .reset(p.id_isar4)
       .allPrivileges().exceptUserMode().writes(0);
     InitReg(MISCREG_ID_ISAR5)
+      .reset([p,release=release] () {
+        ISAR5 isar5 = p.id_isar5;
+        if (release->has(ArmExtension::CRYPTO)) {
+            isar5.crc32 = 1;
+            isar5.sha2 = 1;
+            isar5.sha1 = 1;
+            isar5.aes = 2;
+        } else {
+            isar5.crc32 = 0;
+            isar5.sha2 = 0;
+            isar5.sha1 = 0;
+            isar5.aes = 0;
+        }
+        isar5.rdm = release->has(ArmExtension::FEAT_RDM) ? 0x1 : 0x0;
+        isar5.vcma = release->has(ArmExtension::FEAT_FCMA) ? 0x1 : 0x0;
+        return isar5;
+      }())
       .allPrivileges().exceptUserMode().writes(0);
     InitReg(MISCREG_ID_ISAR6)
+      .reset([p,release=release] () {
+        ISAR6 isar6 = p.id_isar6;
+        isar6.jscvt = release->has(ArmExtension::FEAT_JSCVT) ? 0x1 : 0x0;
+        return isar6;
+      }())
       .allPrivileges().exceptUserMode().writes(0);
     InitReg(MISCREG_CCSIDR)
       .allPrivileges().exceptUserMode().writes(0);
@@ -2527,6 +2572,7 @@ ISA::initializeMiscRegMetadata()
       .bankedChild()
       .secure().exceptUserMode();
     InitReg(MISCREG_VPIDR)
+      .reset(midr)
       .hyp().monNonSecure();
     InitReg(MISCREG_VMPIDR)
       .hyp().monNonSecure();

From e3db30912e471cb9e460984ae433332d05ea2ba3 Mon Sep 17 00:00:00 2001
From: Giacomo Travaglini <giacomo.travaglini@arm.com>
Date: Mon, 6 Feb 2023 09:58:28 +0000
Subject: [PATCH 396/492] arch-arm: Rewrite ISA::initID64 using BitUnions

Signed-off-by: Giacomo Travaglini <giacomo.travaglini@arm.com>
Change-Id: I3e8c7bdcf86c01eccbd90fccaa2d4306a501ed13
Reviewed-by: Richard Cooper <richard.cooper@arm.com>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/70468
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/arch/arm/isa.cc             | 178 +++++++++++---------------------
 src/arch/arm/regs/misc.cc       |   1 +
 src/arch/arm/regs/misc_types.hh |  12 +++
 3 files changed, 74 insertions(+), 117 deletions(-)

diff --git a/src/arch/arm/isa.cc b/src/arch/arm/isa.cc
index 8424db582a..b1e463a86c 100644
--- a/src/arch/arm/isa.cc
+++ b/src/arch/arm/isa.cc
@@ -261,21 +261,12 @@ ISA::initID64(const ArmISAParams &p)
     // Initialize configurable id registers
     miscRegs[MISCREG_ID_AA64AFR0_EL1] = p.id_aa64afr0_el1;
     miscRegs[MISCREG_ID_AA64AFR1_EL1] = p.id_aa64afr1_el1;
-    miscRegs[MISCREG_ID_AA64DFR0_EL1] =
-        (p.id_aa64dfr0_el1 & 0xfffffffffffff0ffULL) |
-        (p.pmu ?             0x0000000000000100ULL : 0); // Enable PMUv3
+
+    AA64DFR0 dfr0_el1 = p.id_aa64dfr0_el1;
+    dfr0_el1.pmuver = p.pmu ? 1 : 0; // Enable PMUv3
+    miscRegs[MISCREG_ID_AA64DFR0_EL1] = dfr0_el1;
 
     miscRegs[MISCREG_ID_AA64DFR1_EL1] = p.id_aa64dfr1_el1;
-    miscRegs[MISCREG_ID_AA64ISAR0_EL1] = p.id_aa64isar0_el1;
-    miscRegs[MISCREG_ID_AA64ISAR1_EL1] = p.id_aa64isar1_el1;
-    miscRegs[MISCREG_ID_AA64MMFR0_EL1] = p.id_aa64mmfr0_el1;
-    miscRegs[MISCREG_ID_AA64MMFR1_EL1] = p.id_aa64mmfr1_el1;
-    miscRegs[MISCREG_ID_AA64MMFR2_EL1] = p.id_aa64mmfr2_el1;
-
-    miscRegs[MISCREG_ID_DFR0_EL1] =
-        (p.pmu ? 0x03000000ULL : 0); // Enable PMUv3
-
-    miscRegs[MISCREG_ID_DFR0] = miscRegs[MISCREG_ID_DFR0_EL1];
 
     // SVE
     miscRegs[MISCREG_ID_AA64ZFR0_EL1] = 0;  // SVEver 0
@@ -296,22 +287,25 @@ ISA::initID64(const ArmISAParams &p)
     // [15]    SMPS - We don't do priorities in gem5, so disable
     // [14:12] RES0
     // [11:0]  Affinity - we implement per-CPU SME, so set to 0 (no SMCU)
-    miscRegs[MISCREG_SMIDR_EL1] = 0 | // Affinity
-        0 << 15 |                     // SMPS
-        0x41 << 24;                   // Implementer
+    SMIDR smidr_el1 = 0;
+    smidr_el1.affinity = 0;
+    smidr_el1.smps = 0;
+    smidr_el1.implementer = 0x41;
+    miscRegs[MISCREG_SMIDR_EL1] = smidr_el1;
 
-    miscRegs[MISCREG_ID_AA64SMFR0_EL1] = 0;
-    miscRegs[MISCREG_ID_AA64SMFR0_EL1] |= 0x1UL << 32; // F32F32
+    AA64SMFR0 smfr0_el1 = 0;
+    smfr0_el1.f32f32 = 0x1;
     // The following BF16F32 is actually not implemented due to a lack
     // of BF16 support in gem5's fplib. However, as per the SME spec the
     // _only_ allowed value is 0x1.
-    miscRegs[MISCREG_ID_AA64SMFR0_EL1] |= 0x1UL << 34; // BF16F32
-    miscRegs[MISCREG_ID_AA64SMFR0_EL1] |= 0x1UL << 35; // F16F32
-    miscRegs[MISCREG_ID_AA64SMFR0_EL1] |= 0xFUL << 36; // I8I32
-    miscRegs[MISCREG_ID_AA64SMFR0_EL1] |= 0x1UL << 48; // F64F64
-    miscRegs[MISCREG_ID_AA64SMFR0_EL1] |= 0xFUL << 52; // I16I64
-    miscRegs[MISCREG_ID_AA64SMFR0_EL1] |= 0x0UL << 56; // SMEver
-    miscRegs[MISCREG_ID_AA64SMFR0_EL1] |= 0x1UL << 32; // FA64
+    smfr0_el1.b16f32 = 0x1;
+    smfr0_el1.f16f32 = 0x1;
+    smfr0_el1.i8i32 = 0xF;
+    smfr0_el1.f64f64 = 0x1;
+    smfr0_el1.i16i64 = 0xF;
+    smfr0_el1.smEver = 0;
+    smfr0_el1.fa64 = 0x1;
+    miscRegs[MISCREG_ID_AA64SMFR0_EL1] = smfr0_el1;
 
     // We want to support FEAT_SME_FA64. Therefore, we enable it in all
     // SMCR_ELx registers by default. Runtime software might change this
@@ -330,103 +324,53 @@ ISA::initID64(const ArmISAParams &p)
         miscRegs[MISCREG_SMCR_EL1] |= ((smeVL - 1) & 0xF);
     }
 
-    // Enforce consistency with system-level settings...
+    AA64PFR0 pfr0_el1 = 0;
+    pfr0_el1.el3 = release->has(ArmExtension::SECURITY) ? 0x2 : 0x0;
+    pfr0_el1.el2 = release->has(ArmExtension::VIRTUALIZATION) ? 0x2 : 0x0;
+    pfr0_el1.sve = release->has(ArmExtension::FEAT_SVE) ? 0x1 : 0x0;
+    pfr0_el1.sel2 = release->has(ArmExtension::FEAT_SEL2) ? 0x1 : 0x0;
+    miscRegs[MISCREG_ID_AA64PFR0_EL1] = pfr0_el1;
 
-    // EL3
-    miscRegs[MISCREG_ID_AA64PFR0_EL1] = insertBits(
-        miscRegs[MISCREG_ID_AA64PFR0_EL1], 15, 12,
-        release->has(ArmExtension::SECURITY) ? 0x2 : 0x0);
-    // EL2
-    miscRegs[MISCREG_ID_AA64PFR0_EL1] = insertBits(
-        miscRegs[MISCREG_ID_AA64PFR0_EL1], 11, 8,
-        release->has(ArmExtension::VIRTUALIZATION) ? 0x2 : 0x0);
-    // SVE
-    miscRegs[MISCREG_ID_AA64PFR0_EL1] = insertBits(
-        miscRegs[MISCREG_ID_AA64PFR0_EL1], 35, 32,
-        release->has(ArmExtension::FEAT_SVE) ? 0x1 : 0x0);
-    // SME
-    miscRegs[MISCREG_ID_AA64PFR1_EL1] = insertBits(
-        miscRegs[MISCREG_ID_AA64PFR1_EL1], 27, 24,
-        release->has(ArmExtension::FEAT_SME) ? 0x1 : 0x0);
-    // SecEL2
-    miscRegs[MISCREG_ID_AA64PFR0_EL1] = insertBits(
-        miscRegs[MISCREG_ID_AA64PFR0_EL1], 39, 36,
-        release->has(ArmExtension::FEAT_SEL2) ? 0x1 : 0x0);
+    AA64MMFR0 mmfr0_el1 = p.id_aa64mmfr0_el1;
+    mmfr0_el1.asidbits = haveLargeAsid64 ? 0x2 : 0x0;
+    mmfr0_el1.parange = encodePhysAddrRange64(physAddrRange);
+    miscRegs[MISCREG_ID_AA64MMFR0_EL1] = mmfr0_el1;
 
-    // Large ASID support
-    miscRegs[MISCREG_ID_AA64MMFR0_EL1] = insertBits(
-        miscRegs[MISCREG_ID_AA64MMFR0_EL1], 7, 4,
-        haveLargeAsid64 ? 0x2 : 0x0);
-    // Physical address size
-    miscRegs[MISCREG_ID_AA64MMFR0_EL1] = insertBits(
-        miscRegs[MISCREG_ID_AA64MMFR0_EL1], 3, 0,
-        encodePhysAddrRange64(physAddrRange));
+    AA64ISAR0 isar0_el1 = p.id_aa64isar0_el1;
+    if (release->has(ArmExtension::CRYPTO)) {
+        isar0_el1.crc32 = 1;
+        isar0_el1.sha2 = 1;
+        isar0_el1.sha1 = 1;
+        isar0_el1.aes = 2;
+    } else {
+        isar0_el1.crc32 = 0;
+        isar0_el1.sha2 = 0;
+        isar0_el1.sha1 = 0;
+        isar0_el1.aes = 0;
+    }
+    isar0_el1.atomic = release->has(ArmExtension::FEAT_LSE) ? 0x2 : 0x0;
+    isar0_el1.rdm = release->has(ArmExtension::FEAT_RDM) ? 0x1 : 0x0;
+    isar0_el1.tme = release->has(ArmExtension::TME) ? 0x1 : 0x0;
+    miscRegs[MISCREG_ID_AA64ISAR0_EL1] = isar0_el1;
 
-    /** MISCREG_ID_AA64ISAR0_EL1 */
-    // Crypto
-    miscRegs[MISCREG_ID_AA64ISAR0_EL1] = insertBits(
-        miscRegs[MISCREG_ID_AA64ISAR0_EL1], 19, 4,
-        release->has(ArmExtension::CRYPTO) ? 0x1112 : 0x0);
-    // LSE
-    miscRegs[MISCREG_ID_AA64ISAR0_EL1] = insertBits(
-        miscRegs[MISCREG_ID_AA64ISAR0_EL1], 23, 20,
-        release->has(ArmExtension::FEAT_LSE) ? 0x2 : 0x0);
-    // RDM
-    miscRegs[MISCREG_ID_AA64ISAR0_EL1] = insertBits(
-        miscRegs[MISCREG_ID_AA64ISAR0_EL1], 31, 28,
-        release->has(ArmExtension::FEAT_RDM) ? 0x1 : 0x0);
+    AA64ISAR1 isar1_el1 = p.id_aa64isar1_el1;
+    isar1_el1.apa = release->has(ArmExtension::FEAT_PAuth) ? 0x1 : 0x0;
+    isar1_el1.jscvt = release->has(ArmExtension::FEAT_JSCVT) ? 0x1 : 0x0;
+    isar1_el1.fcma = release->has(ArmExtension::FEAT_FCMA) ? 0x1 : 0x0;
+    isar1_el1.gpa = release->has(ArmExtension::FEAT_PAuth) ? 0x1 : 0x0;
+    miscRegs[MISCREG_ID_AA64ISAR1_EL1] = isar1_el1;
 
-    /** MISCREG_ID_AA64ISAR1_EL1 */
-    // PAuth, APA
-    miscRegs[MISCREG_ID_AA64ISAR1_EL1] = insertBits(
-        miscRegs[MISCREG_ID_AA64ISAR1_EL1], 7, 4,
-        release->has(ArmExtension::FEAT_PAuth) ? 0x1 : 0x0);
-    // JSCVT
-    miscRegs[MISCREG_ID_AA64ISAR1_EL1] = insertBits(
-        miscRegs[MISCREG_ID_AA64ISAR1_EL1], 15, 12,
-        release->has(ArmExtension::FEAT_JSCVT) ? 0x1 : 0x0);
-    // FCMA
-    miscRegs[MISCREG_ID_AA64ISAR1_EL1] = insertBits(
-        miscRegs[MISCREG_ID_AA64ISAR1_EL1], 19, 16,
-        release->has(ArmExtension::FEAT_FCMA) ? 0x1 : 0x0);
-    // PAuth, GPA
-    miscRegs[MISCREG_ID_AA64ISAR1_EL1] = insertBits(
-        miscRegs[MISCREG_ID_AA64ISAR1_EL1], 27, 24,
-        release->has(ArmExtension::FEAT_PAuth) ? 0x1 : 0x0);
+    AA64MMFR1 mmfr1_el1 = p.id_aa64mmfr1_el1;
+    mmfr1_el1.vmidbits = release->has(ArmExtension::FEAT_VMID16) ? 0x2 : 0x0;
+    mmfr1_el1.vh = release->has(ArmExtension::FEAT_VHE) ? 0x1 : 0x0;
+    mmfr1_el1.hpds = release->has(ArmExtension::FEAT_HPDS) ? 0x1 : 0x0;
+    mmfr1_el1.pan = release->has(ArmExtension::FEAT_PAN) ? 0x1 : 0x0;
+    miscRegs[MISCREG_ID_AA64MMFR1_EL1] = mmfr1_el1;
 
-    /** MISCREG_ID_AA64MMFR1_EL1 */
-    // VMID16
-    miscRegs[MISCREG_ID_AA64MMFR1_EL1] = insertBits(
-        miscRegs[MISCREG_ID_AA64MMFR1_EL1], 7, 4,
-        release->has(ArmExtension::FEAT_VMID16) ? 0x2 : 0x0);
-    // VHE
-    miscRegs[MISCREG_ID_AA64MMFR1_EL1] = insertBits(
-        miscRegs[MISCREG_ID_AA64MMFR1_EL1], 11, 8,
-        release->has(ArmExtension::FEAT_VHE) ? 0x1 : 0x0);
-    // HPDS
-    miscRegs[MISCREG_ID_AA64MMFR1_EL1] = insertBits(
-        miscRegs[MISCREG_ID_AA64MMFR1_EL1], 15, 12,
-        release->has(ArmExtension::FEAT_HPDS) ? 0x1 : 0x0);
-    // PAN
-    miscRegs[MISCREG_ID_AA64MMFR1_EL1] = insertBits(
-        miscRegs[MISCREG_ID_AA64MMFR1_EL1], 23, 20,
-        release->has(ArmExtension::FEAT_PAN) ? 0x1 : 0x0);
-
-    /** MISCREG_ID_AA64MMFR2_EL1 */
-    // UAO
-    miscRegs[MISCREG_ID_AA64MMFR2_EL1] = insertBits(
-        miscRegs[MISCREG_ID_AA64MMFR2_EL1], 7, 4,
-        release->has(ArmExtension::FEAT_UAO) ? 0x1 : 0x0);
-    // LVA
-    miscRegs[MISCREG_ID_AA64MMFR2_EL1] = insertBits(
-        miscRegs[MISCREG_ID_AA64MMFR2_EL1], 19, 16,
-        release->has(ArmExtension::FEAT_LVA) ? 0x1 : 0x0);
-
-
-    // TME
-    miscRegs[MISCREG_ID_AA64ISAR0_EL1] = insertBits(
-        miscRegs[MISCREG_ID_AA64ISAR0_EL1], 27, 24,
-        release->has(ArmExtension::TME) ? 0x1 : 0x0);
+    AA64MMFR2 mmfr2_el1 = p.id_aa64mmfr2_el1;
+    mmfr2_el1.uao = release->has(ArmExtension::FEAT_UAO) ? 0x1 : 0x0;
+    mmfr2_el1.varange = release->has(ArmExtension::FEAT_LVA) ? 0x1 : 0x0;
+    miscRegs[MISCREG_ID_AA64MMFR2_EL1] = mmfr2_el1;
 }
 
 void
diff --git a/src/arch/arm/regs/misc.cc b/src/arch/arm/regs/misc.cc
index f5e2502338..7cff4caf1c 100644
--- a/src/arch/arm/regs/misc.cc
+++ b/src/arch/arm/regs/misc.cc
@@ -2497,6 +2497,7 @@ ISA::initializeMiscRegMetadata()
     InitReg(MISCREG_ID_PFR1)
       .allPrivileges().exceptUserMode().writes(0);
     InitReg(MISCREG_ID_DFR0)
+      .reset(p.pmu ? 0x03000000 : 0)
       .allPrivileges().exceptUserMode().writes(0);
     InitReg(MISCREG_ID_AFR0)
       .allPrivileges().exceptUserMode().writes(0);
diff --git a/src/arch/arm/regs/misc_types.hh b/src/arch/arm/regs/misc_types.hh
index 9af5337a33..e446ce5fd0 100644
--- a/src/arch/arm/regs/misc_types.hh
+++ b/src/arch/arm/regs/misc_types.hh
@@ -118,6 +118,7 @@ namespace ArmISA
         Bitfield<39, 36> sm3;
         Bitfield<35, 32> sha3;
         Bitfield<31, 28> rdm;
+        Bitfield<27, 24> tme;
         Bitfield<23, 20> atomic;
         Bitfield<19, 16> crc32;
         Bitfield<15, 12> sha2;
@@ -202,6 +203,17 @@ namespace ArmISA
         Bitfield<3, 0> el0;
     EndBitUnion(AA64PFR0)
 
+    BitUnion64(AA64SMFR0)
+        Bitfield<63> fa64;
+        Bitfield<59, 56> smEver;
+        Bitfield<55, 52> i16i64;
+        Bitfield<48> f64f64;
+        Bitfield<39, 36> i8i32;
+        Bitfield<35> f16f32;
+        Bitfield<34> b16f32;
+        Bitfield<32> f32f32;
+    EndBitUnion(AA64SMFR0)
+
     BitUnion32(HDCR)
         Bitfield<27>   tdcc;
         Bitfield<11>   tdra;

From db47e9f9a12c172fb30c03e22439e1d8f3155446 Mon Sep 17 00:00:00 2001
From: Giacomo Travaglini <giacomo.travaglini@arm.com>
Date: Mon, 6 Feb 2023 10:53:14 +0000
Subject: [PATCH 397/492] arch-arm: Remove ISA::initID64

Signed-off-by: Giacomo Travaglini <giacomo.travaglini@arm.com>
Change-Id: I3d03ee15df46fa7d9a9ec439b26e99baf33cbb5e
Reviewed-by: Richard Cooper <richard.cooper@arm.com>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/70469
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/arch/arm/isa.cc       | 124 --------------------------------------
 src/arch/arm/isa.hh       |   1 -
 src/arch/arm/regs/misc.cc | 123 +++++++++++++++++++++++++++++++++++++
 3 files changed, 123 insertions(+), 125 deletions(-)

diff --git a/src/arch/arm/isa.cc b/src/arch/arm/isa.cc
index b1e463a86c..d87e9c5188 100644
--- a/src/arch/arm/isa.cc
+++ b/src/arch/arm/isa.cc
@@ -142,12 +142,6 @@ ISA::clear()
         miscRegs[idx] = lookUpMiscReg[idx].reset();
     }
 
-    // We always initialize AArch64 ID registers even
-    // if we are in AArch32. This is done since if we
-    // are in SE mode we don't know if our ArmProcess is
-    // AArch32 or AArch64
-    initID64(p);
-
     if (FullSystem && system->highestELIs64()) {
         // Initialize AArch64 state
         clear64(p);
@@ -255,124 +249,6 @@ ISA::clear64(const ArmISAParams &p)
     }
 }
 
-void
-ISA::initID64(const ArmISAParams &p)
-{
-    // Initialize configurable id registers
-    miscRegs[MISCREG_ID_AA64AFR0_EL1] = p.id_aa64afr0_el1;
-    miscRegs[MISCREG_ID_AA64AFR1_EL1] = p.id_aa64afr1_el1;
-
-    AA64DFR0 dfr0_el1 = p.id_aa64dfr0_el1;
-    dfr0_el1.pmuver = p.pmu ? 1 : 0; // Enable PMUv3
-    miscRegs[MISCREG_ID_AA64DFR0_EL1] = dfr0_el1;
-
-    miscRegs[MISCREG_ID_AA64DFR1_EL1] = p.id_aa64dfr1_el1;
-
-    // SVE
-    miscRegs[MISCREG_ID_AA64ZFR0_EL1] = 0;  // SVEver 0
-    if (release->has(ArmExtension::SECURITY)) {
-        miscRegs[MISCREG_ZCR_EL3] = sveVL - 1;
-    } else if (release->has(ArmExtension::VIRTUALIZATION)) {
-        miscRegs[MISCREG_ZCR_EL2] = sveVL - 1;
-    } else {
-        miscRegs[MISCREG_ZCR_EL1] = sveVL - 1;
-    }
-
-    // SME
-
-    // Set up the SME SMIDR
-    // [63:32] RES0
-    // [31:24] Implementer - default this to Arm Limited
-    // [23:16] SMCU Revision - set to 0 as we don't model an SMCU
-    // [15]    SMPS - We don't do priorities in gem5, so disable
-    // [14:12] RES0
-    // [11:0]  Affinity - we implement per-CPU SME, so set to 0 (no SMCU)
-    SMIDR smidr_el1 = 0;
-    smidr_el1.affinity = 0;
-    smidr_el1.smps = 0;
-    smidr_el1.implementer = 0x41;
-    miscRegs[MISCREG_SMIDR_EL1] = smidr_el1;
-
-    AA64SMFR0 smfr0_el1 = 0;
-    smfr0_el1.f32f32 = 0x1;
-    // The following BF16F32 is actually not implemented due to a lack
-    // of BF16 support in gem5's fplib. However, as per the SME spec the
-    // _only_ allowed value is 0x1.
-    smfr0_el1.b16f32 = 0x1;
-    smfr0_el1.f16f32 = 0x1;
-    smfr0_el1.i8i32 = 0xF;
-    smfr0_el1.f64f64 = 0x1;
-    smfr0_el1.i16i64 = 0xF;
-    smfr0_el1.smEver = 0;
-    smfr0_el1.fa64 = 0x1;
-    miscRegs[MISCREG_ID_AA64SMFR0_EL1] = smfr0_el1;
-
-    // We want to support FEAT_SME_FA64. Therefore, we enable it in all
-    // SMCR_ELx registers by default. Runtime software might change this
-    // later, but given that gem5 doesn't disable instructions based on
-    // this flag we default to the most representative value.
-    miscRegs[MISCREG_SMCR_EL3] = 0x1 << 31;
-    miscRegs[MISCREG_SMCR_EL2] = 0x1 << 31;
-    miscRegs[MISCREG_SMCR_EL1] = 0x1 << 31;
-
-    // Set the vector default vector length
-    if (release->has(ArmExtension::SECURITY)) {
-        miscRegs[MISCREG_SMCR_EL3] |= ((smeVL - 1) & 0xF);
-    } else if (release->has(ArmExtension::VIRTUALIZATION)) {
-        miscRegs[MISCREG_SMCR_EL2] |= ((smeVL - 1) & 0xF);
-    } else {
-        miscRegs[MISCREG_SMCR_EL1] |= ((smeVL - 1) & 0xF);
-    }
-
-    AA64PFR0 pfr0_el1 = 0;
-    pfr0_el1.el3 = release->has(ArmExtension::SECURITY) ? 0x2 : 0x0;
-    pfr0_el1.el2 = release->has(ArmExtension::VIRTUALIZATION) ? 0x2 : 0x0;
-    pfr0_el1.sve = release->has(ArmExtension::FEAT_SVE) ? 0x1 : 0x0;
-    pfr0_el1.sel2 = release->has(ArmExtension::FEAT_SEL2) ? 0x1 : 0x0;
-    miscRegs[MISCREG_ID_AA64PFR0_EL1] = pfr0_el1;
-
-    AA64MMFR0 mmfr0_el1 = p.id_aa64mmfr0_el1;
-    mmfr0_el1.asidbits = haveLargeAsid64 ? 0x2 : 0x0;
-    mmfr0_el1.parange = encodePhysAddrRange64(physAddrRange);
-    miscRegs[MISCREG_ID_AA64MMFR0_EL1] = mmfr0_el1;
-
-    AA64ISAR0 isar0_el1 = p.id_aa64isar0_el1;
-    if (release->has(ArmExtension::CRYPTO)) {
-        isar0_el1.crc32 = 1;
-        isar0_el1.sha2 = 1;
-        isar0_el1.sha1 = 1;
-        isar0_el1.aes = 2;
-    } else {
-        isar0_el1.crc32 = 0;
-        isar0_el1.sha2 = 0;
-        isar0_el1.sha1 = 0;
-        isar0_el1.aes = 0;
-    }
-    isar0_el1.atomic = release->has(ArmExtension::FEAT_LSE) ? 0x2 : 0x0;
-    isar0_el1.rdm = release->has(ArmExtension::FEAT_RDM) ? 0x1 : 0x0;
-    isar0_el1.tme = release->has(ArmExtension::TME) ? 0x1 : 0x0;
-    miscRegs[MISCREG_ID_AA64ISAR0_EL1] = isar0_el1;
-
-    AA64ISAR1 isar1_el1 = p.id_aa64isar1_el1;
-    isar1_el1.apa = release->has(ArmExtension::FEAT_PAuth) ? 0x1 : 0x0;
-    isar1_el1.jscvt = release->has(ArmExtension::FEAT_JSCVT) ? 0x1 : 0x0;
-    isar1_el1.fcma = release->has(ArmExtension::FEAT_FCMA) ? 0x1 : 0x0;
-    isar1_el1.gpa = release->has(ArmExtension::FEAT_PAuth) ? 0x1 : 0x0;
-    miscRegs[MISCREG_ID_AA64ISAR1_EL1] = isar1_el1;
-
-    AA64MMFR1 mmfr1_el1 = p.id_aa64mmfr1_el1;
-    mmfr1_el1.vmidbits = release->has(ArmExtension::FEAT_VMID16) ? 0x2 : 0x0;
-    mmfr1_el1.vh = release->has(ArmExtension::FEAT_VHE) ? 0x1 : 0x0;
-    mmfr1_el1.hpds = release->has(ArmExtension::FEAT_HPDS) ? 0x1 : 0x0;
-    mmfr1_el1.pan = release->has(ArmExtension::FEAT_PAN) ? 0x1 : 0x0;
-    miscRegs[MISCREG_ID_AA64MMFR1_EL1] = mmfr1_el1;
-
-    AA64MMFR2 mmfr2_el1 = p.id_aa64mmfr2_el1;
-    mmfr2_el1.uao = release->has(ArmExtension::FEAT_UAO) ? 0x1 : 0x0;
-    mmfr2_el1.varange = release->has(ArmExtension::FEAT_LVA) ? 0x1 : 0x0;
-    miscRegs[MISCREG_ID_AA64MMFR2_EL1] = mmfr2_el1;
-}
-
 void
 ISA::startup()
 {
diff --git a/src/arch/arm/isa.hh b/src/arch/arm/isa.hh
index 841964113b..55fbd030ee 100644
--- a/src/arch/arm/isa.hh
+++ b/src/arch/arm/isa.hh
@@ -172,7 +172,6 @@ namespace ArmISA
       protected:
         void clear32(const ArmISAParams &p);
         void clear64(const ArmISAParams &p);
-        void initID64(const ArmISAParams &p);
 
         void addressTranslation(MMU::ArmTranslationType tran_type,
             BaseMMU::Mode mode, Request::Flags flags, RegVal val);
diff --git a/src/arch/arm/regs/misc.cc b/src/arch/arm/regs/misc.cc
index 7cff4caf1c..ef5d74100f 100644
--- a/src/arch/arm/regs/misc.cc
+++ b/src/arch/arm/regs/misc.cc
@@ -3692,36 +3692,101 @@ ISA::initializeMiscRegMetadata()
       .faultRead(EL1, HCR_TRAP(tid3))
       .allPrivileges().exceptUserMode().writes(0);
     InitReg(MISCREG_ID_AA64PFR0_EL1)
+      .reset([this,release=release,tc=tc](){
+          AA64PFR0 pfr0_el1 = 0;
+          pfr0_el1.el0 = 0x2;
+          pfr0_el1.el1 = 0x2;
+          pfr0_el1.el2 = release->has(ArmExtension::VIRTUALIZATION) ? 0x2 : 0x0;
+          pfr0_el1.el3 = release->has(ArmExtension::SECURITY) ? 0x2 : 0x0;
+          pfr0_el1.sve = release->has(ArmExtension::FEAT_SVE) ? 0x1 : 0x0;
+          pfr0_el1.sel2 = release->has(ArmExtension::FEAT_SEL2) ? 0x1 : 0x0;
+          pfr0_el1.gic = FullSystem && getGICv3CPUInterface(tc) ? 0x1 : 0;
+          return pfr0_el1;
+      }())
       .faultRead(EL1, HCR_TRAP(tid3))
       .allPrivileges().exceptUserMode().writes(0);
     InitReg(MISCREG_ID_AA64PFR1_EL1)
       .faultRead(EL1, HCR_TRAP(tid3))
       .allPrivileges().exceptUserMode().writes(0);
     InitReg(MISCREG_ID_AA64DFR0_EL1)
+      .reset([p](){
+          AA64DFR0 dfr0_el1 = p.id_aa64dfr0_el1;
+          dfr0_el1.pmuver = p.pmu ? 1 : 0; // Enable PMUv3
+          return dfr0_el1;
+      }())
       .faultRead(EL1, HCR_TRAP(tid3))
       .allPrivileges().exceptUserMode().writes(0);
     InitReg(MISCREG_ID_AA64DFR1_EL1)
+      .reset(p.id_aa64dfr1_el1)
       .faultRead(EL1, HCR_TRAP(tid3))
       .allPrivileges().exceptUserMode().writes(0);
     InitReg(MISCREG_ID_AA64AFR0_EL1)
+      .reset(p.id_aa64afr0_el1)
       .faultRead(EL1, HCR_TRAP(tid3))
       .allPrivileges().exceptUserMode().writes(0);
     InitReg(MISCREG_ID_AA64AFR1_EL1)
+      .reset(p.id_aa64afr1_el1)
       .faultRead(EL1, HCR_TRAP(tid3))
       .allPrivileges().exceptUserMode().writes(0);
     InitReg(MISCREG_ID_AA64ISAR0_EL1)
+      .reset([p,release=release](){
+          AA64ISAR0 isar0_el1 = p.id_aa64isar0_el1;
+          if (release->has(ArmExtension::CRYPTO)) {
+              isar0_el1.crc32 = 1;
+              isar0_el1.sha2 = 1;
+              isar0_el1.sha1 = 1;
+              isar0_el1.aes = 2;
+          } else {
+              isar0_el1.crc32 = 0;
+              isar0_el1.sha2 = 0;
+              isar0_el1.sha1 = 0;
+              isar0_el1.aes = 0;
+          }
+          isar0_el1.atomic = release->has(ArmExtension::FEAT_LSE) ? 0x2 : 0x0;
+          isar0_el1.rdm = release->has(ArmExtension::FEAT_RDM) ? 0x1 : 0x0;
+          isar0_el1.tme = release->has(ArmExtension::TME) ? 0x1 : 0x0;
+          return isar0_el1;
+      }())
       .faultRead(EL1, HCR_TRAP(tid3))
       .allPrivileges().exceptUserMode().writes(0);
     InitReg(MISCREG_ID_AA64ISAR1_EL1)
+      .reset([p,release=release](){
+          AA64ISAR1 isar1_el1 = p.id_aa64isar1_el1;
+          isar1_el1.apa = release->has(ArmExtension::FEAT_PAuth) ? 0x1 : 0x0;
+          isar1_el1.jscvt = release->has(ArmExtension::FEAT_JSCVT) ? 0x1 : 0x0;
+          isar1_el1.fcma = release->has(ArmExtension::FEAT_FCMA) ? 0x1 : 0x0;
+          isar1_el1.gpa = release->has(ArmExtension::FEAT_PAuth) ? 0x1 : 0x0;
+          return isar1_el1;
+      }())
       .faultRead(EL1, HCR_TRAP(tid3))
       .allPrivileges().exceptUserMode().writes(0);
     InitReg(MISCREG_ID_AA64MMFR0_EL1)
+      .reset([p,asidbits=haveLargeAsid64,parange=physAddrRange](){
+          AA64MMFR0 mmfr0_el1 = p.id_aa64mmfr0_el1;
+          mmfr0_el1.asidbits = asidbits ? 0x2 : 0x0;
+          mmfr0_el1.parange = encodePhysAddrRange64(parange);
+          return mmfr0_el1;
+      }())
       .faultRead(EL1, HCR_TRAP(tid3))
       .allPrivileges().exceptUserMode().writes(0);
     InitReg(MISCREG_ID_AA64MMFR1_EL1)
+      .reset([p,release=release](){
+          AA64MMFR1 mmfr1_el1 = p.id_aa64mmfr1_el1;
+          mmfr1_el1.vmidbits = release->has(ArmExtension::FEAT_VMID16) ? 0x2 : 0x0;
+          mmfr1_el1.vh = release->has(ArmExtension::FEAT_VHE) ? 0x1 : 0x0;
+          mmfr1_el1.hpds = release->has(ArmExtension::FEAT_HPDS) ? 0x1 : 0x0;
+          mmfr1_el1.pan = release->has(ArmExtension::FEAT_PAN) ? 0x1 : 0x0;
+          return mmfr1_el1;
+      }())
       .faultRead(EL1, HCR_TRAP(tid3))
       .allPrivileges().exceptUserMode().writes(0);
     InitReg(MISCREG_ID_AA64MMFR2_EL1)
+      .reset([p,release=release](){
+          AA64MMFR2 mmfr2_el1 = p.id_aa64mmfr2_el1;
+          mmfr2_el1.uao = release->has(ArmExtension::FEAT_UAO) ? 0x1 : 0x0;
+          mmfr2_el1.varange = release->has(ArmExtension::FEAT_LVA) ? 0x1 : 0x0;
+          return mmfr2_el1;
+      }())
       .faultRead(EL1, HCR_TRAP(tid3))
       .allPrivileges().exceptUserMode().writes(0);
 
@@ -5033,9 +5098,11 @@ ISA::initializeMiscRegMetadata()
         .faultRead(EL1, HCR_TRAP(tid3))
         .allPrivileges().exceptUserMode().writes(0);
     InitReg(MISCREG_ZCR_EL3)
+        .reset(sveVL - 1)
         .fault(EL3, faultZcrEL3)
         .mon();
     InitReg(MISCREG_ZCR_EL2)
+        .reset(sveVL - 1)
         .fault(EL2, faultZcrEL2)
         .fault(EL3, faultZcrEL3)
         .hyp().mon();
@@ -5044,6 +5111,7 @@ ISA::initializeMiscRegMetadata()
         .fault(EL3, defaultFaultE2H_EL3)
         .mapsTo(MISCREG_ZCR_EL1);
     InitReg(MISCREG_ZCR_EL1)
+        .reset(sveVL - 1)
         .fault(EL1, faultZcrEL1)
         .fault(EL2, faultZcrEL2)
         .fault(EL3, faultZcrEL3)
@@ -5051,22 +5119,77 @@ ISA::initializeMiscRegMetadata()
 
     // SME
     InitReg(MISCREG_ID_AA64SMFR0_EL1)
+        .reset([](){
+            AA64SMFR0 smfr0_el1 = 0;
+            smfr0_el1.f32f32 = 0x1;
+            // The following BF16F32 is actually not implemented due to a
+            // lack of BF16 support in gem5's fplib. However, as per the
+            // SME spec the _only_ allowed value is 0x1.
+            smfr0_el1.b16f32 = 0x1;
+            smfr0_el1.f16f32 = 0x1;
+            smfr0_el1.i8i32 = 0xF;
+            smfr0_el1.f64f64 = 0x1;
+            smfr0_el1.i16i64 = 0xF;
+            smfr0_el1.smEver = 0;
+            smfr0_el1.fa64 = 0x1;
+            return smfr0_el1;
+        }())
         .allPrivileges().exceptUserMode().writes(0);
     InitReg(MISCREG_SVCR)
         .allPrivileges();
     InitReg(MISCREG_SMIDR_EL1)
+        .reset([](){
+            SMIDR smidr_el1 = 0;
+            smidr_el1.affinity = 0;
+            smidr_el1.smps = 0;
+            smidr_el1.implementer = 0x41;
+            return smidr_el1;
+        }())
         .allPrivileges().exceptUserMode().writes(0);
     InitReg(MISCREG_SMPRI_EL1)
         .allPrivileges().exceptUserMode().reads(1);
     InitReg(MISCREG_SMPRIMAP_EL2)
         .hyp().mon();
     InitReg(MISCREG_SMCR_EL3)
+        .reset([this](){
+            // We want to support FEAT_SME_FA64. Therefore, we enable it in
+            // all SMCR_ELx registers by default. Runtime software might
+            // change this later, but given that gem5 doesn't disable
+            // instructions based on this flag we default to the most
+            // representative value.
+            SMCR smcr_el3 = 0;
+            smcr_el3.fa64 = 1;
+            smcr_el3.len = smeVL - 1;
+            return smcr_el3;
+        }())
         .mon();
     InitReg(MISCREG_SMCR_EL2)
+        .reset([this](){
+            // We want to support FEAT_SME_FA64. Therefore, we enable it in
+            // all SMCR_ELx registers by default. Runtime software might
+            // change this later, but given that gem5 doesn't disable
+            // instructions based on this flag we default to the most
+            // representative value.
+            SMCR smcr_el2 = 0;
+            smcr_el2.fa64 = 1;
+            smcr_el2.len = smeVL - 1;
+            return smcr_el2;
+        }())
         .hyp().mon();
     InitReg(MISCREG_SMCR_EL12)
         .allPrivileges().exceptUserMode();
     InitReg(MISCREG_SMCR_EL1)
+        .reset([this](){
+            // We want to support FEAT_SME_FA64. Therefore, we enable it in
+            // all SMCR_ELx registers by default. Runtime software might
+            // change this later, but given that gem5 doesn't disable
+            // instructions based on this flag we default to the most
+            // representative value.
+            SMCR smcr_el1 = 0;
+            smcr_el1.fa64 = 1;
+            smcr_el1.len = smeVL - 1;
+            return smcr_el1;
+        }())
         .allPrivileges().exceptUserMode();
     InitReg(MISCREG_TPIDR2_EL0)
         .allPrivileges();

From fbca328487a9d792c069f392626e64f198f4d283 Mon Sep 17 00:00:00 2001
From: Giacomo Travaglini <giacomo.travaglini@arm.com>
Date: Mon, 6 Feb 2023 16:20:33 +0000
Subject: [PATCH 398/492] arch-arm: Remove clear32/64 methods

Change-Id: I62d2dc0612298fdb4cdc3bf368e080c8ebebe23a
Signed-off-by: Giacomo Travaglini <giacomo.travaglini@arm.com>
Reviewed-by: Richard Cooper <richard.cooper@arm.com>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/70470
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/arch/arm/isa.cc       | 108 +-------------------------------------
 src/arch/arm/isa.hh       |   3 --
 src/arch/arm/regs/misc.cc |  78 ++++++++++++++++++++++++++-
 3 files changed, 78 insertions(+), 111 deletions(-)

diff --git a/src/arch/arm/isa.cc b/src/arch/arm/isa.cc
index d87e9c5188..ffd9cfc6b8 100644
--- a/src/arch/arm/isa.cc
+++ b/src/arch/arm/isa.cc
@@ -131,8 +131,6 @@ ISA::ISA(const Params &p) : BaseISA(p), system(NULL),
 void
 ISA::clear()
 {
-    const Params &p(params());
-
     // Invalidate cached copies of miscregs in the TLBs
     if (tc) {
         getMMUPtr(tc)->invalidateMiscReg();
@@ -142,111 +140,7 @@ ISA::clear()
         miscRegs[idx] = lookUpMiscReg[idx].reset();
     }
 
-    if (FullSystem && system->highestELIs64()) {
-        // Initialize AArch64 state
-        clear64(p);
-        return;
-    }
-
-    // Initialize AArch32 state...
-    clear32(p);
-}
-
-void
-ISA::clear32(const ArmISAParams &p)
-{
-    CPSR cpsr = 0;
-    cpsr.mode = MODE_USER;
-
-    if (FullSystem) {
-        miscRegs[MISCREG_MVBAR] = system->resetAddr();
-    }
-
-    miscRegs[MISCREG_CPSR] = cpsr;
-    updateRegMap(cpsr);
-
-    SCTLR sctlr = 0;
-    sctlr.u = 1;
-    sctlr.xp = 1;
-    sctlr.rao2 = 1;
-    sctlr.rao3 = 1;
-    sctlr.rao4 = 0xf;  // SCTLR[6:3]
-    sctlr.uci = 1;
-    sctlr.dze = 1;
-    miscRegs[MISCREG_SCTLR_NS] = sctlr;
-    miscRegs[MISCREG_HCPTR] = 0;
-
-    miscRegs[MISCREG_CPACR] = 0;
-
-    miscRegs[MISCREG_FPSID] = p.fpsid;
-
-    if (release->has(ArmExtension::LPAE)) {
-        TTBCR ttbcr = miscRegs[MISCREG_TTBCR_NS];
-        ttbcr.eae = 0;
-        miscRegs[MISCREG_TTBCR_NS] = ttbcr;
-        // Enforce consistency with system-level settings
-        miscRegs[MISCREG_ID_MMFR0] = (miscRegs[MISCREG_ID_MMFR0] & ~0xf) | 0x5;
-    }
-
-    if (release->has(ArmExtension::SECURITY)) {
-        miscRegs[MISCREG_SCTLR_S] = sctlr;
-        miscRegs[MISCREG_SCR] = 0;
-        miscRegs[MISCREG_VBAR_S] = 0;
-    } else {
-        // we're always non-secure
-        miscRegs[MISCREG_SCR] = 1;
-    }
-
-    //XXX We need to initialize the rest of the state.
-}
-
-void
-ISA::clear64(const ArmISAParams &p)
-{
-    CPSR cpsr = 0;
-    Addr rvbar = system->resetAddr();
-    switch (system->highestEL()) {
-        // Set initial EL to highest implemented EL using associated stack
-        // pointer (SP_ELx); set RVBAR_ELx to implementation defined reset
-        // value
-      case EL3:
-        cpsr.mode = MODE_EL3H;
-        miscRegs[MISCREG_RVBAR_EL3] = rvbar;
-        break;
-      case EL2:
-        cpsr.mode = MODE_EL2H;
-        miscRegs[MISCREG_RVBAR_EL2] = rvbar;
-        break;
-      case EL1:
-        cpsr.mode = MODE_EL1H;
-        miscRegs[MISCREG_RVBAR_EL1] = rvbar;
-        break;
-      default:
-        panic("Invalid highest implemented exception level");
-        break;
-    }
-
-    // Initialize rest of CPSR
-    cpsr.daif = 0xf;  // Mask all interrupts
-    cpsr.ss = 0;
-    cpsr.il = 0;
-    miscRegs[MISCREG_CPSR] = cpsr;
-    updateRegMap(cpsr);
-
-    // Initialize other control registers
-    miscRegs[MISCREG_MPIDR_EL1] = 0x80000000;
-    if (release->has(ArmExtension::SECURITY)) {
-        miscRegs[MISCREG_SCTLR_EL3] = 0x30c50830;
-        miscRegs[MISCREG_SCR_EL3]   = 0x00000030;  // RES1 fields
-    } else if (release->has(ArmExtension::VIRTUALIZATION)) {
-        // also  MISCREG_SCTLR_EL2 (by mapping)
-        miscRegs[MISCREG_HSCTLR] = 0x30c50830;
-    } else {
-        // also  MISCREG_SCTLR_EL1 (by mapping)
-        miscRegs[MISCREG_SCTLR_NS] = 0x30d00800 | 0x00050030; // RES1 | init
-        // Always non-secure
-        miscRegs[MISCREG_SCR_EL3] = 1;
-    }
+    updateRegMap(miscRegs[MISCREG_CPSR]);
 }
 
 void
diff --git a/src/arch/arm/isa.hh b/src/arch/arm/isa.hh
index 55fbd030ee..8ed37ba861 100644
--- a/src/arch/arm/isa.hh
+++ b/src/arch/arm/isa.hh
@@ -170,9 +170,6 @@ namespace ArmISA
         void clear() override;
 
       protected:
-        void clear32(const ArmISAParams &p);
-        void clear64(const ArmISAParams &p);
-
         void addressTranslation(MMU::ArmTranslationType tran_type,
             BaseMMU::Mode mode, Request::Flags flags, RegVal val);
         void addressTranslation64(MMU::ArmTranslationType tran_type,
diff --git a/src/arch/arm/regs/misc.cc b/src/arch/arm/regs/misc.cc
index ef5d74100f..7a06da1aeb 100644
--- a/src/arch/arm/regs/misc.cc
+++ b/src/arch/arm/regs/misc.cc
@@ -2079,6 +2079,38 @@ MiscRegLUTEntryInitializer::highest(ArmSystem *const sys) const
     return *this;
 }
 
+static CPSR
+resetCPSR(ArmSystem *system)
+{
+    CPSR cpsr = 0;
+    if (!FullSystem) {
+        cpsr.mode = MODE_USER;
+    } else {
+        switch (system->highestEL()) {
+            // Set initial EL to highest implemented EL using associated stack
+            // pointer (SP_ELx); set RVBAR_ELx to implementation defined reset
+            // value
+          case EL3:
+            cpsr.mode = MODE_EL3H;
+            break;
+          case EL2:
+            cpsr.mode = MODE_EL2H;
+            break;
+          case EL1:
+            cpsr.mode = MODE_EL1H;
+            break;
+          default:
+            panic("Invalid highest implemented exception level");
+            break;
+        }
+
+        // Initialize rest of CPSR
+        cpsr.daif = 0xf;  // Mask all interrupts
+        cpsr.ss = 0;
+        cpsr.il = 0;
+    }
+    return cpsr;
+}
 
 void
 ISA::initializeMiscRegMetadata()
@@ -2143,6 +2175,7 @@ ISA::initializeMiscRegMetadata()
      */
 
     InitReg(MISCREG_CPSR)
+      .reset(resetCPSR(system))
       .allPrivileges();
     InitReg(MISCREG_SPSR)
       .allPrivileges();
@@ -2163,6 +2196,7 @@ ISA::initializeMiscRegMetadata()
     InitReg(MISCREG_ELR_HYP)
       .allPrivileges();
     InitReg(MISCREG_FPSID)
+      .reset(p.fpsid)
       .allPrivileges();
     InitReg(MISCREG_FPSCR)
       .allPrivileges();
@@ -2487,6 +2521,7 @@ ISA::initializeMiscRegMetadata()
       .reset(1) // Separate Instruction and Data TLBs
       .allPrivileges().exceptUserMode().writes(0);
     InitReg(MISCREG_MPIDR)
+      .reset(0x80000000)
       .allPrivileges().exceptUserMode().writes(0);
     InitReg(MISCREG_REVIDR)
       .unimplemented()
@@ -2502,7 +2537,12 @@ ISA::initializeMiscRegMetadata()
     InitReg(MISCREG_ID_AFR0)
       .allPrivileges().exceptUserMode().writes(0);
     InitReg(MISCREG_ID_MMFR0)
-      .reset(p.id_mmfr0)
+      .reset([p,release=release](){
+          RegVal mmfr0 = p.id_mmfr0;
+          if (release->has(ArmExtension::LPAE))
+              mmfr0 = (mmfr0 & ~0xf) | 0x5;
+          return mmfr0;
+      }())
       .allPrivileges().exceptUserMode().writes(0);
     InitReg(MISCREG_ID_MMFR1)
       .reset(p.id_mmfr1)
@@ -2585,11 +2625,37 @@ ISA::initializeMiscRegMetadata()
       .res1(0x00400800 | (SPAN   ? 0 : 0x800000)
                        | (LSMAOE ? 0 :     0x10)
                        | (nTLSMD ? 0 :      0x8));
+
+    auto sctlr_reset = [aarch64=highestELIs64] ()
+    {
+        SCTLR sctlr = 0;
+        if (aarch64) {
+            sctlr.afe = 1;
+            sctlr.tre = 1;
+            sctlr.span = 1;
+            sctlr.uwxn = 1;
+            sctlr.ntwe = 1;
+            sctlr.ntwi = 1;
+            sctlr.cp15ben = 1;
+            sctlr.sa0 = 1;
+        } else {
+            sctlr.u = 1;
+            sctlr.xp = 1;
+            sctlr.uci = 1;
+            sctlr.dze = 1;
+            sctlr.rao2 = 1;
+            sctlr.rao3 = 1;
+            sctlr.rao4 = 0xf;
+        }
+        return sctlr;
+    }();
     InitReg(MISCREG_SCTLR_NS)
+      .reset(sctlr_reset)
       .bankedChild()
       .privSecure(!aarch32EL3)
       .nonSecure().exceptUserMode();
     InitReg(MISCREG_SCTLR_S)
+      .reset(sctlr_reset)
       .bankedChild()
       .secure().exceptUserMode();
     InitReg(MISCREG_ACTLR)
@@ -2606,6 +2672,7 @@ ISA::initializeMiscRegMetadata()
     InitReg(MISCREG_SDCR)
       .mon();
     InitReg(MISCREG_SCR)
+      .reset(release->has(ArmExtension::SECURITY) ? 0 : 1)
       .mon().secure().exceptUserMode()
       .res0(0xff40)  // [31:16], [6]
       .res1(0x0030); // [5:4]
@@ -2614,6 +2681,7 @@ ISA::initializeMiscRegMetadata()
     InitReg(MISCREG_NSACR)
       .allPrivileges().hypWrite(0).privNonSecureWrite(0).exceptUserMode();
     InitReg(MISCREG_HSCTLR)
+      .reset(0x30c50830)
       .hyp().monNonSecure()
       .res0(0x0512c7c0 | (EnDB   ? 0 :     0x2000)
                        | (IESB   ? 0 :   0x200000)
@@ -3043,6 +3111,7 @@ ISA::initializeMiscRegMetadata()
       .bankedChild()
       .secure().exceptUserMode();
     InitReg(MISCREG_MVBAR)
+      .reset(FullSystem ? system->resetAddr() : 0)
       .mon().secure()
       .hypRead(FullSystem && system->highestEL() == EL2)
       .privRead(FullSystem && system->highestEL() == EL1)
@@ -3925,6 +3994,7 @@ ISA::initializeMiscRegMetadata()
       .hyp().mon()
       .mapsTo(MISCREG_HACR);
     InitReg(MISCREG_SCTLR_EL3)
+      .reset(0x30c50830)
       .mon()
       .res0(0x0512c7c0 | (EnDB   ? 0 :     0x2000)
                        | (IESB   ? 0 :   0x200000)
@@ -4369,6 +4439,8 @@ ISA::initializeMiscRegMetadata()
       .fault(EL3, defaultFaultE2H_EL3)
       .mapsTo(MISCREG_VBAR_NS);
     InitReg(MISCREG_RVBAR_EL1)
+      .reset(FullSystem && system->highestEL() == EL1 ?
+          system->resetAddr() : 0)
       .privRead(FullSystem && system->highestEL() == EL1);
     InitReg(MISCREG_ISR_EL1)
       .allPrivileges().exceptUserMode().writes(0);
@@ -4377,10 +4449,14 @@ ISA::initializeMiscRegMetadata()
       .res0(0x7ff)
       .mapsTo(MISCREG_HVBAR);
     InitReg(MISCREG_RVBAR_EL2)
+      .reset(FullSystem && system->highestEL() == EL2 ?
+          system->resetAddr() : 0)
       .hypRead(FullSystem && system->highestEL() == EL2);
     InitReg(MISCREG_VBAR_EL3)
       .mon();
     InitReg(MISCREG_RVBAR_EL3)
+      .reset(FullSystem && system->highestEL() == EL3 ?
+          system->resetAddr() : 0)
       .mon().writes(0);
     InitReg(MISCREG_RMR_EL3)
       .mon();

From 0fa843c9cbeaca05e9b5cdc409b296c669156156 Mon Sep 17 00:00:00 2001
From: Giacomo Travaglini <giacomo.travaglini@arm.com>
Date: Mon, 6 Feb 2023 17:04:47 +0000
Subject: [PATCH 399/492] arch-arm: VMPIDR_EL2 can be used in secure mode as
 well

This was some old code still assuming EL2 is not implemented in secure
mode. This is wrong since the introduction of FEAT_SEL2 in gem5

Change-Id: Ie7e112a83e64f33a98885e88504c2d6bc5070218
Signed-off-by: Giacomo Travaglini <giacomo.travaglini@arm.com>
Reviewed-by: Richard Cooper <richard.cooper@arm.com>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/70471
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
---
 src/arch/arm/utility.cc | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/arch/arm/utility.cc b/src/arch/arm/utility.cc
index 67645691df..05d1cab06c 100644
--- a/src/arch/arm/utility.cc
+++ b/src/arch/arm/utility.cc
@@ -139,8 +139,6 @@ readMPIDR(ArmSystem *arm_sys, ThreadContext *tc)
 {
     const ExceptionLevel current_el = currEL(tc);
 
-    const bool is_secure = isSecureBelowEL3(tc);
-
     switch (current_el) {
       case EL0:
         // Note: in MsrMrs instruction we read the register value before
@@ -150,7 +148,7 @@ readMPIDR(ArmSystem *arm_sys, ThreadContext *tc)
         warn_once("Trying to read MPIDR at EL0\n");
         [[fallthrough]];
       case EL1:
-        if (ArmSystem::haveEL(tc, EL2) && !is_secure)
+        if (EL2Enabled(tc))
             return tc->readMiscReg(MISCREG_VMPIDR_EL2);
         else
             return getMPIDR(arm_sys, tc);

From 331ef9e82bf0a0c73854b3e29725a26ac8e0a068 Mon Sep 17 00:00:00 2001
From: Roger Chang <rogerycchang@google.com>
Date: Thu, 11 May 2023 14:26:56 +0800
Subject: [PATCH 400/492] arch-riscv: Add missing zbkb instructions

Add the following instructions:
pack
packh
packw

Change-Id: I74730c8132187d9a6eb8fc2389c2c28a8a9516df
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/70537
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
---
 src/arch/riscv/isa/decoder.isa | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/src/arch/riscv/isa/decoder.isa b/src/arch/riscv/isa/decoder.isa
index 6f66c98df0..755be3db39 100644
--- a/src/arch/riscv/isa/decoder.isa
+++ b/src/arch/riscv/isa/decoder.isa
@@ -1282,11 +1282,13 @@ decode QUADRANT default Unknown::unknown() {
                             }
                         }}, IntDivOp);
                     }
-                    0x4: decode RVTYPE {
-                        0x0: rv32_zext_h({{
-                            Rd = Rs1_uh;
-                        }});
-                    }
+                    0x4: pack({{
+                        int xlen = rvSelect(32, 64);
+                        Rd = rvSext(
+                            (bits(Rs2, xlen/2-1, 0) << (xlen / 2)) | \
+                            bits(Rs1, xlen/2-1, 0)
+                        );
+                    }});
                     0x5: min({{
                         Rd_sd = std::min(rvSext(Rs1_sd), rvSext(Rs2_sd));
                     }});
@@ -1402,6 +1404,10 @@ decode QUADRANT default Unknown::unknown() {
                             }
                         }}, IntDivOp);
                     }
+                    0x4: packh({{
+                        // It doesn't need to sign ext as MSB is always 0
+                        Rd = (Rs2_ub << 8) | Rs1_ub;
+                    }});
                     0x5: maxu({{
                         Rd = rvSext(std::max(rvZext(Rs1), rvZext(Rs2)));
                     }});
@@ -1459,8 +1465,8 @@ decode QUADRANT default Unknown::unknown() {
                                 Rd_sd = Rs1_sw/Rs2_sw;
                             }
                         }}, IntDivOp);
-                        0x4: zext_h({{
-                            Rd = Rs1_uh;
+                        0x4: packw({{
+                            Rd_sd = sext<32>((Rs2_uh << 16) | Rs1_uh);
                         }});
                         0x10: sh2add_uw({{
                             Rd = (((uint64_t)Rs1_uw) << 2) + Rs2;

From 27967a40de5df9ecfa36e9784da9ae4b5f6322b0 Mon Sep 17 00:00:00 2001
From: Roger Chang <rogerycchang@google.com>
Date: Mon, 8 May 2023 16:18:48 +0800
Subject: [PATCH 401/492] arch-riscv: Fix the fflags issue for fcvt_d_w,
 fcvt_d_wu, fcvt_d_l fcvt_d_lu

These instructions use type casting methods to convert integer to
float, so the fflags couldn't trace the event of these. It should
use the function xx_to_f64 to convert from integer to float

Change-Id: Idd87306f0ca47b65d3faf17f249568330f374b72
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/70377
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
---
 src/arch/riscv/isa/decoder.isa | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/src/arch/riscv/isa/decoder.isa b/src/arch/riscv/isa/decoder.isa
index 755be3db39..69b3055f1d 100644
--- a/src/arch/riscv/isa/decoder.isa
+++ b/src/arch/riscv/isa/decoder.isa
@@ -2069,22 +2069,30 @@ decode QUADRANT default Unknown::unknown() {
                 0x69: decode CONV_SGN {
                     0x0: fcvt_d_w({{
                         RM_REQUIRED;
-                        Fd = (double)Rs1_sw;
+                        freg_t fd;
+                        fd = freg(i32_to_f64(Rs1_sw));
+                        Fd_bits = fd.v;
                     }}, FloatCvtOp);
                     0x1: fcvt_d_wu({{
                         RM_REQUIRED;
-                        Fd = (double)Rs1_uw;
+                        freg_t fd;
+                        fd = freg(ui32_to_f64(Rs1_uw));
+                        Fd_bits = fd.v;
                     }}, FloatCvtOp);
                     0x2: decode RVTYPE {
                         0x1: fcvt_d_l({{
                             RM_REQUIRED;
-                            Fd = (double)Rs1_sd;
+                            freg_t fd;
+                            fd = freg(i64_to_f64(Rs1_sd));
+                            Fd_bits = fd.v;
                         }}, FloatCvtOp);
                     }
                     0x3: decode RVTYPE {
                         0x1: fcvt_d_lu({{
                             RM_REQUIRED;
-                            Fd = (double)Rs1;
+                            freg_t fd;
+                            fd = freg(ui64_to_f64(Rs1));
+                            Fd_bits = fd.v;
                         }}, FloatCvtOp);
                     }
                 }

From d2aed4f5c51a1556c824e921095c9e57f950c48c Mon Sep 17 00:00:00 2001
From: Roger Chang <rogerycchang@google.com>
Date: Mon, 8 May 2023 14:06:21 +0800
Subject: [PATCH 402/492] arch-riscv: Treat RVC HINT as nops rather than trap

The RVC HINT can be implemented as no-op to ignore them. See the
section 18.7 of RISC-V spec Volume I for more details

Change-Id: I88a62fd5722ac542ecfef5fcb80fef2ce04f010f
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/70357
Tested-by: kokoro <noreply+kokoro@google.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
---
 src/arch/riscv/isa/decoder.isa | 37 +++++++++-------------------------
 1 file changed, 10 insertions(+), 27 deletions(-)

diff --git a/src/arch/riscv/isa/decoder.isa b/src/arch/riscv/isa/decoder.isa
index 69b3055f1d..d89a0c9d5e 100644
--- a/src/arch/riscv/isa/decoder.isa
+++ b/src/arch/riscv/isa/decoder.isa
@@ -152,11 +152,9 @@ decode QUADRANT default Unknown::unknown() {
         }}, {{
             if ((RC1 == 0) != (imm == 0)) {
                 if (RC1 == 0) {
-                    return std::make_shared<IllegalInstFault>(
-                            "source reg x0", machInst);
-                } else { // imm == 0
-                    return std::make_shared<IllegalInstFault>(
-                            "immediate = 0", machInst);
+                    // imm != 0 is HINT
+                } else {
+                    // imm == 0 is HINT
                 }
             }
             Rc1_sd = rvSext(Rc1_sd + imm);
@@ -179,10 +177,7 @@ decode QUADRANT default Unknown::unknown() {
         0x2: CIOp::c_li({{
             imm = sext<6>(CIMM5 | (CIMM1 << 5));
         }}, {{
-            if (RC1 == 0) {
-                return std::make_shared<IllegalInstFault>(
-                        "source reg x0", machInst);
-            }
+            // RC1 == 0 is HINT
             Rc1_sd = imm;
         }});
         0x3: decode RC1 {
@@ -202,10 +197,7 @@ decode QUADRANT default Unknown::unknown() {
             default: CIOp::c_lui({{
                 imm = sext<6>(CIMM5 | (CIMM1 << 5)) << 12;
             }}, {{
-                if (RC1 == 0 || RC1 == 2) {
-                    return std::make_shared<IllegalInstFault>(
-                            "source reg x0", machInst);
-                }
+                // RC1 == 0 is HINT
                 if (imm == 0) {
                     return std::make_shared<IllegalInstFault>(
                             "immediate = 0", machInst);
@@ -223,8 +215,7 @@ decode QUADRANT default Unknown::unknown() {
                                 "shmat[5] != 0", machInst);
                     }
                     if (imm == 0) {
-                        return std::make_shared<IllegalInstFault>(
-                                "immediate = 0", machInst);
+                        // C.SRLI64, HINT for RV32/RV64
                     }
                     // The MSB can never be 1, hence no need to sign ext.
                     Rp1 = rvZext(Rp1) >> imm;
@@ -237,8 +228,7 @@ decode QUADRANT default Unknown::unknown() {
                                 "shmat[5] != 0", machInst);
                     }
                     if (imm == 0) {
-                        return std::make_shared<IllegalInstFault>(
-                                "immediate = 0", machInst);
+                        // C.SRAI64, HINT for RV32/RV64
                     }
                     Rp1_sd = rvSext(Rp1_sd) >> imm;
                 }}, uint64_t);
@@ -306,13 +296,9 @@ decode QUADRANT default Unknown::unknown() {
                         "shmat[5] != 0", machInst);
             }
             if (imm == 0) {
-                return std::make_shared<IllegalInstFault>(
-                        "immediate = 0", machInst);
-            }
-            if (RC1 == 0) {
-                return std::make_shared<IllegalInstFault>(
-                        "source reg x0", machInst);
+                // C.SLLI64, HINT for RV32/RV64
             }
+            // RC1 == 0 is HINT
             Rc1 = rvSext(Rc1 << imm);
         }}, uint64_t);
         format CompressedLoad {
@@ -375,10 +361,7 @@ decode QUADRANT default Unknown::unknown() {
                     NPC = rvZext(Rc1);
                 }}, IsIndirectControl, IsUncondControl);
                 default: CROp::c_mv({{
-                    if (RC1 == 0) {
-                        return std::make_shared<IllegalInstFault>(
-                                "source reg x0", machInst);
-                    }
+                    // RC1 == 0 is HINT
                     Rc1 = rvSext(Rc2);
                 }});
             }

From 8dac7f572b2f490e16674cc7ed333dac5c93d280 Mon Sep 17 00:00:00 2001
From: Roger Chang <rogerycchang@google.com>
Date: Mon, 8 May 2023 14:30:04 +0800
Subject: [PATCH 403/492] arch-riscv: Refactor RVC decode flow when
 funct4==0b1001 and op==C2

the compressed instruction 0x901e (c.add zero, t2) should be decoded as
"c_add zero, t2" not c_ebreak

Change-Id: Ib2bd4b4d9739aa27ad290ead313e95b11b1727d1
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/70358
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/arch/riscv/isa/decoder.isa | 28 +++++++++++-----------------
 1 file changed, 11 insertions(+), 17 deletions(-)

diff --git a/src/arch/riscv/isa/decoder.isa b/src/arch/riscv/isa/decoder.isa
index d89a0c9d5e..f22efb0bf0 100644
--- a/src/arch/riscv/isa/decoder.isa
+++ b/src/arch/riscv/isa/decoder.isa
@@ -365,27 +365,21 @@ decode QUADRANT default Unknown::unknown() {
                     Rc1 = rvSext(Rc2);
                 }});
             }
-            0x1: decode RC1 {
-                0x0: SystemOp::c_ebreak({{
-                    if (RC2 != 0) {
-                        return std::make_shared<IllegalInstFault>(
-                                "source reg x1", machInst);
-                    }
-                    return std::make_shared<BreakpointFault>(xc->pcState());
-                }}, IsSerializeAfter, IsNonSpeculative, No_OpClass);
-                default: decode RC2 {
-                    0x0: Jump::c_jalr({{
-                        if (RC1 == 0) {
-                            return std::make_shared<IllegalInstFault>(
-                                    "source reg x0", machInst);
-                        }
+            0x1: decode RC2 {
+                0x0: decode RC1 {
+                    0x0: SystemOp::c_ebreak({{
+                        return std::make_shared<BreakpointFault>(
+                            xc->pcState());
+                    }}, IsSerializeAfter, IsNonSpeculative, No_OpClass);
+                    default: Jump::c_jalr({{
                         ra = rvSext(NPC);
                         NPC = rvZext(Rc1);
                     }}, IsIndirectControl, IsUncondControl, IsCall);
-                    default: CompressedROp::c_add({{
-                        Rc1_sd = rvSext(Rc1_sd + Rc2_sd);
-                    }});
                 }
+                default: CompressedROp::c_add({{
+                    // RC1 == 0 is HINT
+                    Rc1_sd = rvSext(Rc1_sd + Rc2_sd);
+                }});
             }
         }
         format CompressedStore {

From ae7476bcdca9c44fc45a057b1204b5be0ccfea4a Mon Sep 17 00:00:00 2001
From: Matthew Poremba <matthew.poremba@amd.com>
Date: Fri, 12 May 2023 18:28:00 -0500
Subject: [PATCH 404/492] arch-gcn3,arch-vega: Fix ds_read2st64_b32

This instruction has two issues. The first is that it should write two
consecutive registers, starting with vdst because it is writing two
dwords. The second is that the data assignment to the lanes from the
dynamic instruction should cast to a U32 type otherwise the array index
goes out of bounds and returns the wrong data.

The first issue was fixed in GCN3 a few years ago in this review:
https://gem5-review.googlesource.com/c/public/gem5/+/32236. This
changeset makes the same change for Vega and applies the U32 cast in
both ISAs.

Tested with rocPRIM unit test. The test was failing before this
changeset and now passes.

Change-Id: Ifb110fc9a36ad198da7eaf86b1e3e37eccd3bb10
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/70577
Maintainer: Matt Sinclair <mattdsinclair@gmail.com>
Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/arch/amdgpu/gcn3/insts/instructions.cc | 4 ++--
 src/arch/amdgpu/vega/insts/instructions.cc | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/arch/amdgpu/gcn3/insts/instructions.cc b/src/arch/amdgpu/gcn3/insts/instructions.cc
index 8c51af5187..478b1d38d0 100644
--- a/src/arch/amdgpu/gcn3/insts/instructions.cc
+++ b/src/arch/amdgpu/gcn3/insts/instructions.cc
@@ -32123,9 +32123,9 @@ namespace Gcn3ISA
 
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (gpuDynInst->exec_mask[lane]) {
-                vdst0[lane] = (reinterpret_cast<VecElemU64*>(
+                vdst0[lane] = (reinterpret_cast<VecElemU32*>(
                     gpuDynInst->d_data))[lane * 2];
-                vdst1[lane] = (reinterpret_cast<VecElemU64*>(
+                vdst1[lane] = (reinterpret_cast<VecElemU32*>(
                     gpuDynInst->d_data))[lane * 2 + 1];
             }
         }
diff --git a/src/arch/amdgpu/vega/insts/instructions.cc b/src/arch/amdgpu/vega/insts/instructions.cc
index 45c84910f2..6c014bc107 100644
--- a/src/arch/amdgpu/vega/insts/instructions.cc
+++ b/src/arch/amdgpu/vega/insts/instructions.cc
@@ -35665,13 +35665,13 @@ namespace VegaISA
     Inst_DS__DS_READ2ST64_B32::completeAcc(GPUDynInstPtr gpuDynInst)
     {
         VecOperandU32 vdst0(gpuDynInst, extData.VDST);
-        VecOperandU32 vdst1(gpuDynInst, extData.VDST + 2);
+        VecOperandU32 vdst1(gpuDynInst, extData.VDST + 1);
 
         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
             if (gpuDynInst->exec_mask[lane]) {
-                vdst0[lane] = (reinterpret_cast<VecElemU64*>(
+                vdst0[lane] = (reinterpret_cast<VecElemU32*>(
                     gpuDynInst->d_data))[lane * 2];
-                vdst1[lane] = (reinterpret_cast<VecElemU64*>(
+                vdst1[lane] = (reinterpret_cast<VecElemU32*>(
                     gpuDynInst->d_data))[lane * 2 + 1];
             }
         }

From b923cbe840c15f8d9e4cd0950c4e0df5772aab6d Mon Sep 17 00:00:00 2001
From: Yan Lee <yanlee@google.com>
Date: Mon, 15 May 2023 00:27:22 -0700
Subject: [PATCH 405/492] base: add Activate to enable log of particular
 targets

When the activate list is not empty, only the target name in the list
will be enabled on logging. Even if the target name is in the activate
list, it will need to enable the flag with --debug-flags to enable the
log correctly.

Change-Id: Ic41cb97c8a2530fdc01e954d6cab76ae475d8722
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/70617
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
Reviewed-by: Daniel Carvalho <odanrc@yahoo.com.br>
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
---
 src/base/match.hh      |  4 +++
 src/base/trace.cc      |  4 +--
 src/base/trace.hh      | 26 +++++++++++++-
 src/base/trace.test.cc | 77 ++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 108 insertions(+), 3 deletions(-)

diff --git a/src/base/match.hh b/src/base/match.hh
index 36c5ad2b6f..8bbe2b88bc 100644
--- a/src/base/match.hh
+++ b/src/base/match.hh
@@ -66,6 +66,10 @@ class ObjectMatch
     void setExpression(const std::string &expression);
     void setExpression(const std::vector<std::string> &expression);
     std::vector<std::vector<std::string> > getExpressions();
+    bool empty() const
+    {
+        return tokens.empty();
+    }
     bool match(const std::string &name) const
     {
         return tokens.empty() ? false : domatch(name);
diff --git a/src/base/trace.cc b/src/base/trace.cc
index 272b035e0f..9aa5852ac9 100644
--- a/src/base/trace.cc
+++ b/src/base/trace.cc
@@ -108,7 +108,7 @@ void
 Logger::dump(Tick when, const std::string &name,
          const void *d, int len, const std::string &flag)
 {
-    if (!name.empty() && ignore.match(name))
+    if (!isEnabled(name))
         return;
 
     const char *data = static_cast<const char *>(d);
@@ -148,7 +148,7 @@ void
 OstreamLogger::logMessage(Tick when, const std::string &name,
         const std::string &flag, const std::string &message)
 {
-    if (!name.empty() && ignore.match(name))
+    if (!isEnabled(name))
         return;
 
     if (!debug::FmtTicksOff && (when != MaxTick))
diff --git a/src/base/trace.hh b/src/base/trace.hh
index a7f52ebe23..7989279025 100644
--- a/src/base/trace.hh
+++ b/src/base/trace.hh
@@ -39,6 +39,7 @@
 #include "base/compiler.hh"
 #include "base/cprintf.hh"
 #include "base/debug.hh"
+#include "base/logging.hh"
 #include "base/match.hh"
 #include "base/types.hh"
 #include "sim/cur_tick.hh"
@@ -60,6 +61,23 @@ class Logger
   protected:
     /** Name match for objects to ignore */
     ObjectMatch ignore;
+    /** Name match for objects to activate log */
+    ObjectMatch activate;
+
+    bool isEnabled(const std::string &name) const
+    {
+        if (name.empty()) // Enable the logger with a empty name.
+            return true;
+        bool ignore_match = ignore.match(name);
+        bool activate_match = activate.match(name);
+        if (ignore_match && activate_match)
+            panic("%s in both ignore and activate.\n", name);
+        if (ignore_match)
+            return false;
+        if (!activate.empty() && !activate_match)
+            return false;
+        return true;
+    }
 
   public:
     /** Log a single message */
@@ -76,7 +94,7 @@ class Logger
             const std::string &flag,
             const char *fmt, const Args &...args)
     {
-        if (!name.empty() && ignore.match(name))
+        if (!isEnabled(name))
             return;
         std::ostringstream line;
         ccprintf(line, fmt, args...);
@@ -104,6 +122,12 @@ class Logger
     /** Add objects to ignore */
     void addIgnore(const ObjectMatch &ignore_) { ignore.add(ignore_); }
 
+    /** Set objects to activate */
+    void setActivate(ObjectMatch &activate_) { activate = activate_; }
+
+    /** Add objects to activate */
+    void addActivate(const ObjectMatch &activate_) { activate.add(activate_); }
+
     virtual ~Logger() { }
 };
 
diff --git a/src/base/trace.test.cc b/src/base/trace.test.cc
index c53dcd7324..a72ef54578 100644
--- a/src/base/trace.test.cc
+++ b/src/base/trace.test.cc
@@ -166,6 +166,61 @@ TEST(TraceTest, LogMessageFlagEnabled)
     trace::disable();
 }
 
+/** Test that log messages are displayed for activated objects (single). */
+TEST(TraceTest, LogMessageActivateOne)
+{
+    std::stringstream ss;
+    trace::OstreamLogger logger(ss);
+
+    ObjectMatch activate_foo("Foo");
+    ObjectMatch activate_bar("Bar");
+
+    // Activate foo.
+    logger.setActivate(activate_foo);
+    logger.logMessage(Tick(100), "Foo", "", "Test message");
+    ASSERT_EQ(getString(&logger), "    100: Foo: Test message");
+    logger.logMessage(Tick(100), "Bar", "", "Test message");
+    ASSERT_EQ(getString(&logger), "");
+
+    // When setting a new activate, the old activates are not kept.
+    logger.setActivate(activate_bar);
+    logger.logMessage(Tick(100), "Foo", "", "Test message");
+    ASSERT_EQ(getString(&logger), "");
+    logger.logMessage(Tick(100), "Bar", "", "Test message");
+    ASSERT_EQ(getString(&logger), "    100: Bar: Test message");
+}
+
+/** Test that log messages are displayed for activated objects (multiple). */
+TEST(TraceTest, LogMessageActivateMultiple)
+{
+    std::stringstream ss;
+    trace::OstreamLogger logger(ss);
+
+    ObjectMatch activate_foo("Foo");
+    ObjectMatch activate_bar("Bar");
+    ObjectMatch activate_thy("Thy");
+
+    // Activate foo and bar
+    logger.setActivate(activate_foo);
+    logger.addActivate(activate_bar);
+    logger.logMessage(Tick(100), "Foo", "", "Test message");
+    ASSERT_EQ(getString(&logger), "    100: Foo: Test message");
+    logger.logMessage(Tick(100), "Bar", "", "Test message");
+    ASSERT_EQ(getString(&logger), "    100: Bar: Test message");
+    logger.logMessage(Tick(100), "Thy", "", "Test message");
+    ASSERT_EQ(getString(&logger), "");
+
+    // Make sure that when setting a new activate, the old activates
+    // are not kept
+    logger.setActivate(activate_thy);
+    logger.logMessage(Tick(100), "Foo", "", "Test message");
+    ASSERT_EQ(getString(&logger), "");
+    logger.logMessage(Tick(100), "Bar", "", "Test message");
+    ASSERT_EQ(getString(&logger), "");
+    logger.logMessage(Tick(100), "Thy", "", "Test message");
+    ASSERT_EQ(getString(&logger), "    100: Thy: Test message");
+}
+
 /** Test that log messages are not displayed for ignored objects (single). */
 TEST(TraceTest, LogMessageIgnoreOne)
 {
@@ -221,6 +276,28 @@ TEST(TraceTest, LogMessageIgnoreMultiple)
     ASSERT_EQ(getString(&logger), "");
 }
 
+/** Test that log messages are displayed properly within ignore and activate */
+TEST(TraceTest, LogMessageActivateAndIgnore)
+{
+    std::stringstream ss;
+    trace::OstreamLogger logger(ss);
+
+    ObjectMatch foo("Foo");
+    ObjectMatch bar("Bar");
+
+    // Activate foo and ignore bar
+    logger.setActivate(foo);
+    logger.setIgnore(bar);
+    logger.logMessage(Tick(100), "Foo", "", "Test message");
+    ASSERT_EQ(getString(&logger), "    100: Foo: Test message");
+    logger.logMessage(Tick(100), "Bar", "", "Test message");
+    ASSERT_EQ(getString(&logger), "");
+    logger.logMessage(Tick(100), "Thy", "", "Test message");
+    // When the Activate list is not empty and thy is not in the list,
+    // log of Thy will not be displayed.
+    ASSERT_EQ(getString(&logger), "");
+}
+
 /** Test that dumping for an ignored name does not log anything. */
 TEST(TraceTest, DumpIgnored)
 {

From 48ae255762126d96bc1918d6ffaaa0199eb24051 Mon Sep 17 00:00:00 2001
From: Yan Lee <yanlee@google.com>
Date: Mon, 15 May 2023 00:32:32 -0700
Subject: [PATCH 406/492] sim,python: add activate option and method

With --debug-activate option, user can add the target names into
activate list of debug log.

For example, with "--debug-activate=system.AAA.bus --debug-flags=IOXBar"
We can enable the logs of a specified bus.

Change-Id: I89ce87d784ae9736708bbc976a6bad58732bd5da
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/70618
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
---
 src/python/m5/main.py        | 11 +++++++++++
 src/python/m5/trace.py       |  2 +-
 src/python/pybind11/debug.cc |  9 +++++++++
 3 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/src/python/m5/main.py b/src/python/m5/main.py
index b4a3472187..a68279b633 100644
--- a/src/python/m5/main.py
+++ b/src/python/m5/main.py
@@ -275,6 +275,13 @@ def parse_options():
         help="Sets the output file for debug. Append '.gz' to the name for it"
         " to be compressed automatically [Default: %default]",
     )
+    option(
+        "--debug-activate",
+        metavar="EXPR[,EXPR]",
+        action="append",
+        split=",",
+        help="Activate EXPR sim objects",
+    )
     option(
         "--debug-ignore",
         metavar="EXPR",
@@ -557,6 +564,10 @@ def main():
 
     trace.output(options.debug_file)
 
+    for activate in options.debug_activate:
+        _check_tracing()
+        trace.activate(activate)
+
     for ignore in options.debug_ignore:
         _check_tracing()
         trace.ignore(ignore)
diff --git a/src/python/m5/trace.py b/src/python/m5/trace.py
index 9603914ca8..759f96e5bf 100644
--- a/src/python/m5/trace.py
+++ b/src/python/m5/trace.py
@@ -25,4 +25,4 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 # Export native methods to Python
-from _m5.trace import output, ignore, disable, enable
+from _m5.trace import output, activate, ignore, disable, enable
diff --git a/src/python/pybind11/debug.cc b/src/python/pybind11/debug.cc
index 313ca81e6e..0087ffadd4 100644
--- a/src/python/pybind11/debug.cc
+++ b/src/python/pybind11/debug.cc
@@ -67,6 +67,14 @@ output(const char *filename)
     trace::setDebugLogger(new trace::OstreamLogger(*file_stream->stream()));
 }
 
+static void
+activate(const char *expr)
+{
+    ObjectMatch activate(expr);
+
+    trace::getDebugLogger()->addActivate(activate);
+}
+
 static void
 ignore(const char *expr)
 {
@@ -121,6 +129,7 @@ pybind_init_debug(py::module_ &m_native)
     py::module_ m_trace = m_native.def_submodule("trace");
     m_trace
         .def("output", &output)
+        .def("activate", &activate)
         .def("ignore", &ignore)
         .def("enable", &trace::enable)
         .def("disable", &trace::disable)

From 5c60160f3efcd2c53241fe2fa1f1c77f77920489 Mon Sep 17 00:00:00 2001
From: Giacomo Travaglini <giacomo.travaglini@arm.com>
Date: Mon, 15 May 2023 11:42:56 +0100
Subject: [PATCH 407/492] arch-arm: Fix position of AA64ISAR0.AES bitfield

The bitfield was wrongly [1] placed in the LSBs of the register

[1]: https://developer.arm.com/documentation/ddi0601/2022-03/\
    AArch64-Registers/\
    ID-AA64ISAR0-EL1--AArch64-Instruction-Set-Attribute-Register-0

Change-Id: I577a79e16931a0e1334a9b24459553e2899341f0
Signed-off-by: Giacomo Travaglini <giacomo.travaglini@arm.com>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/70637
Reviewed-by: Richard Cooper <richard.cooper@arm.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/arch/arm/regs/misc_types.hh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/arch/arm/regs/misc_types.hh b/src/arch/arm/regs/misc_types.hh
index e446ce5fd0..e6f7e406f2 100644
--- a/src/arch/arm/regs/misc_types.hh
+++ b/src/arch/arm/regs/misc_types.hh
@@ -123,7 +123,7 @@ namespace ArmISA
         Bitfield<19, 16> crc32;
         Bitfield<15, 12> sha2;
         Bitfield<11, 8> sha1;
-        Bitfield<3, 0> aes;
+        Bitfield<7, 4> aes;
     EndBitUnion(AA64ISAR0)
 
     BitUnion64(AA64ISAR1)

From 9ef7be902bc68beef49b698195af3c0b6a037f69 Mon Sep 17 00:00:00 2001
From: Giacomo Travaglini <giacomo.travaglini@arm.com>
Date: Wed, 10 May 2023 15:51:12 +0100
Subject: [PATCH 408/492] arch-arm: Add UNSERIALIZE flag to address cpt
 compatibility

This patch is adding the MISCREG_UNSERIALIZE flag to expose
the user to the following checkpoint compatibility problem:

What happens when a checkpoint is restored with a different
architectural configuration?

The current behaviour is to silently restore the checkpoint
and to populate the ISA registers accordingly. However some of
these restored values will be used and some of them will
be actually discarded.

For example the value of the MISCREG_ID_AA64ISAR0_EL1 register
(initially configured at construction time [1]) will be overwritten by
the checkpointed value in ISA::unserialize (checkpointed params win over
current params). On the other hand we "discard" the checkpointed value
for registers handled in the ISA::readMiscReg method (not accessing the
storage) like MISCREG_ID_AA64PFR0_EL1 [2] (current params win over
checkpointed params).

In other words some registers will be unserialized while some others
will discard the checkpointed value in favour of the current
configuration setup. This categorization is currently implicit and it
ultimately depends on whether or not a register read access its storage
(see MISCREG_ID_AA64PFR0_EL1 above).

With this patch we formalize this distinction. We allow the developer to
be explict on which register should not be unserialized and should
instead use the new simulation parameters.

If there is a mismatch between the reset value of such register and
the checkpointed one, we warn the user and we undo the unserialization
for such register.

[1]: https://github.com/gem5/gem5/blob/v22.1.0.0/src/arch/arm/isa.cc#L437
[2]: https://github.com/gem5/gem5/blob/v22.1.0.0/src/arch/arm/isa.cc#L1019

Change-Id: Icea6563ee5816b14a097926b5734f2fce10530c7
Signed-off-by: Giacomo Travaglini <giacomo.travaglini@arm.com>
Reviewed-by: Richard Cooper <richard.cooper@arm.com>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/70557
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/arch/arm/isa.cc       | 12 ++++++++++++
 src/arch/arm/regs/misc.hh |  9 ++++++++-
 2 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/src/arch/arm/isa.cc b/src/arch/arm/isa.cc
index ffd9cfc6b8..f55235da11 100644
--- a/src/arch/arm/isa.cc
+++ b/src/arch/arm/isa.cc
@@ -1879,6 +1879,18 @@ ISA::unserialize(CheckpointIn &cp)
 {
     DPRINTF(Checkpoint, "Unserializing Arm Misc Registers\n");
     UNSERIALIZE_MAPPING(miscRegs, miscRegName, NUM_PHYS_MISCREGS);
+
+    for (auto idx = 0; idx < NUM_MISCREGS; idx++) {
+        if (!lookUpMiscReg[idx].info[MISCREG_UNSERIALIZE] &&
+            miscRegs[idx] != lookUpMiscReg[idx].reset()) {
+            warn("Checkpoint value for register %s does not match "
+                 "current configuration (checkpointed: %#x, current: %#x)",
+                 miscRegName[idx], miscRegs[idx],
+                 lookUpMiscReg[idx].reset());
+            miscRegs[idx] = lookUpMiscReg[idx].reset();
+        }
+    }
+
     CPSR tmp_cpsr = miscRegs[MISCREG_CPSR];
     updateRegMap(tmp_cpsr);
 }
diff --git a/src/arch/arm/regs/misc.hh b/src/arch/arm/regs/misc.hh
index 265a697781..3a32623292 100644
--- a/src/arch/arm/regs/misc.hh
+++ b/src/arch/arm/regs/misc.hh
@@ -1125,6 +1125,7 @@ namespace ArmISA
         MISCREG_IMPLEMENTED,
         MISCREG_UNVERIFIABLE,   // Does the value change on every read (e.g. a
                                 // arch generic counter)
+        MISCREG_UNSERIALIZE,    // Should the checkpointed value be restored?
         MISCREG_WARN_NOT_FAIL,  // If MISCREG_IMPLEMENTED is deasserted, it
                                 // tells whether the instruction should raise a
                                 // warning or fail
@@ -1277,6 +1278,12 @@ namespace ArmISA
             return *this;
         }
         chain
+        unserialize(bool v = true) const
+        {
+            entry.info[MISCREG_UNSERIALIZE] = v;
+            return *this;
+        }
+        chain
         warnNotFail(bool v = true) const
         {
             entry.info[MISCREG_WARN_NOT_FAIL] = v;
@@ -1595,7 +1602,7 @@ namespace ArmISA
           : entry(e)
         {
             // force unimplemented registers to be thusly declared
-            implemented(1);
+            implemented(1).unserialize(1);
         }
     };
 

From de2503f7ceae4832892d64a4440de4a97aa3e211 Mon Sep 17 00:00:00 2001
From: Giacomo Travaglini <giacomo.travaglini@arm.com>
Date: Tue, 7 Feb 2023 09:49:53 +0000
Subject: [PATCH 409/492] arch-arm: Move RO values from ISA::read to the reset
 field

This is simplyfying the ISA::readMiscReg, and it is stopping
us from recomputing values that won't change throughout the
simulation

Change-Id: I62270cdb59f39b8a143e9554c8beaa8cd15824aa
Signed-off-by: Giacomo Travaglini <giacomo.travaglini@arm.com>
Reviewed-by: Richard Cooper <richard.cooper@arm.com>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/70558
Tested-by: kokoro <noreply+kokoro@google.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
---
 src/arch/arm/isa.cc       | 66 ---------------------------------------
 src/arch/arm/regs/misc.cc | 49 +++++++++++++++++++++++++++++
 2 files changed, 49 insertions(+), 66 deletions(-)

diff --git a/src/arch/arm/isa.cc b/src/arch/arm/isa.cc
index f55235da11..14349b1440 100644
--- a/src/arch/arm/isa.cc
+++ b/src/arch/arm/isa.cc
@@ -486,35 +486,6 @@ ISA::readMiscReg(RegIndex idx)
         warn_once("The ccsidr register isn't implemented and "
                 "always reads as 0.\n");
         break;
-      case MISCREG_CTR:                 // AArch32, ARMv7, top bit set
-      case MISCREG_CTR_EL0:             // AArch64
-        {
-            //all caches have the same line size in gem5
-            //4 byte words in ARM
-            unsigned lineSizeWords =
-                tc->getSystemPtr()->cacheLineSize() / 4;
-            unsigned log2LineSizeWords = 0;
-
-            while (lineSizeWords >>= 1) {
-                ++log2LineSizeWords;
-            }
-
-            CTR ctr = 0;
-            //log2 of minimun i-cache line size (words)
-            ctr.iCacheLineSize = log2LineSizeWords;
-            //b11 - gem5 uses pipt
-            ctr.l1IndexPolicy = 0x3;
-            //log2 of minimum d-cache line size (words)
-            ctr.dCacheLineSize = log2LineSizeWords;
-            //log2 of max reservation size (words)
-            ctr.erg = log2LineSizeWords;
-            //log2 of max writeback size (words)
-            ctr.cwg = log2LineSizeWords;
-            //b100 - gem5 format is ARMv7
-            ctr.format = 0x4;
-
-            return ctr;
-        }
       case MISCREG_ACTLR:
         warn("Not doing anything for miscreg ACTLR\n");
         break;
@@ -615,11 +586,6 @@ ISA::readMiscReg(RegIndex idx)
             l2ctlr.numCPUs = tc->getSystemPtr()->threads.size() - 1;
             return l2ctlr;
         }
-      case MISCREG_DBGDIDR:
-        /* For now just implement the version number.
-         * ARMv7, v7.1 Debug architecture (0b0101 --> 0x5)
-         */
-        return 0x5 << 16;
       case MISCREG_DBGDSCRint:
         return readMiscRegNoEffect(MISCREG_DBGDSCRint);
       case MISCREG_ISR:
@@ -632,8 +598,6 @@ ISA::readMiscReg(RegIndex idx)
                 readMiscRegNoEffect(MISCREG_CPSR),
                 readMiscRegNoEffect(MISCREG_SCR_EL3));
         }
-      case MISCREG_DCZID_EL0:
-        return 0x04;  // DC ZVA clear 64-byte chunks
       case MISCREG_HCPTR:
         {
             RegVal val = readMiscRegNoEffect(idx);
@@ -656,36 +620,6 @@ ISA::readMiscReg(RegIndex idx)
       case MISCREG_HIFAR: // alias for secure IFAR
         return readMiscRegNoEffect(MISCREG_IFAR_S);
 
-      case MISCREG_ID_PFR0:
-        // !ThumbEE | !Jazelle | Thumb | ARM
-        return 0x00000031;
-      case MISCREG_ID_PFR1:
-        {   // Timer | Virti | !M Profile | TrustZone | ARMv4
-            bool have_timer = (system->getGenericTimer() != nullptr);
-            return 0x00000001 |
-                (release->has(ArmExtension::SECURITY) ?
-                    0x00000010 : 0x0) |
-                (release->has(ArmExtension::VIRTUALIZATION) ?
-                    0x00001000 : 0x0) |
-                (have_timer ? 0x00010000 : 0x0);
-        }
-      case MISCREG_ID_AA64PFR0_EL1:
-        return 0x0000000000000002 | // AArch{64,32} supported at EL0
-               0x0000000000000020 | // EL1
-               (release->has(ArmExtension::VIRTUALIZATION) ?
-                    0x0000000000000200 : 0) | // EL2
-               (release->has(ArmExtension::SECURITY) ?
-                    0x0000000000002000 : 0) | // EL3
-               (release->has(ArmExtension::FEAT_SVE) ?
-                    0x0000000100000000 : 0) | // SVE
-               (release->has(ArmExtension::FEAT_SEL2) ?
-                    0x0000001000000000 : 0) | // SecEL2
-               (gicv3CpuInterface     ? 0x0000000001000000 : 0);
-      case MISCREG_ID_AA64PFR1_EL1:
-        return 0x0 |
-               (release->has(ArmExtension::FEAT_SME) ?
-                    0x1 << 24 : 0); // SME
-
       // Generic Timer registers
       case MISCREG_CNTFRQ ... MISCREG_CNTVOFF:
       case MISCREG_CNTFRQ_EL0 ... MISCREG_CNTVOFF_EL2:
diff --git a/src/arch/arm/regs/misc.cc b/src/arch/arm/regs/misc.cc
index 7a06da1aeb..960c2befc3 100644
--- a/src/arch/arm/regs/misc.cc
+++ b/src/arch/arm/regs/misc.cc
@@ -2273,6 +2273,10 @@ ISA::initializeMiscRegMetadata()
 
     // AArch32 CP14 registers
     InitReg(MISCREG_DBGDIDR)
+      /* For now just implement the version number.
+       * ARMv7, v7.1 Debug architecture (0b0101 --> 0x5)
+       */
+      .reset(0x5 << 16)
       .allPrivileges().monSecureWrite(0).monNonSecureWrite(0);
     InitReg(MISCREG_DBGDSCRint)
       .allPrivileges().monSecureWrite(0).monNonSecureWrite(0);
@@ -2514,6 +2518,34 @@ ISA::initializeMiscRegMetadata()
       .reset(midr)
       .allPrivileges().exceptUserMode().writes(0);
     InitReg(MISCREG_CTR)
+      .reset([system=p.system](){
+          //all caches have the same line size in gem5
+          //4 byte words in ARM
+          unsigned line_size_words =
+              system->cacheLineSize() / 4;
+          unsigned log2_line_size_words = 0;
+
+          while (line_size_words >>= 1) {
+              ++log2_line_size_words;
+          }
+
+          CTR ctr = 0;
+          //log2 of minimun i-cache line size (words)
+          ctr.iCacheLineSize = log2_line_size_words;
+          //b11 - gem5 uses pipt
+          ctr.l1IndexPolicy = 0x3;
+          //log2 of minimum d-cache line size (words)
+          ctr.dCacheLineSize = log2_line_size_words;
+          //log2 of max reservation size (words)
+          ctr.erg = log2_line_size_words;
+          //log2 of max writeback size (words)
+          ctr.cwg = log2_line_size_words;
+          //b100 - gem5 format is ARMv7
+          ctr.format = 0x4;
+
+          return ctr;
+      }())
+      .unserialize(0)
       .allPrivileges().exceptUserMode().writes(0);
     InitReg(MISCREG_TCMTR)
       .allPrivileges().exceptUserMode().writes(0);
@@ -2528,8 +2560,20 @@ ISA::initializeMiscRegMetadata()
       .warnNotFail()
       .allPrivileges().exceptUserMode().writes(0);
     InitReg(MISCREG_ID_PFR0)
+      .reset(0x00000031) // !ThumbEE | !Jazelle | Thumb | ARM
       .allPrivileges().exceptUserMode().writes(0);
     InitReg(MISCREG_ID_PFR1)
+      .reset([release=release,system=system](){
+          // Timer | Virti | !M Profile | TrustZone | ARMv4
+          bool have_timer = (system && system->getGenericTimer() != nullptr);
+          return 0x00000001 |
+              (release->has(ArmExtension::SECURITY) ?
+                  0x00000010 : 0x0) |
+              (release->has(ArmExtension::VIRTUALIZATION) ?
+                  0x00001000 : 0x0) |
+              (have_timer ? 0x00010000 : 0x0);
+      }())
+      .unserialize(0)
       .allPrivileges().exceptUserMode().writes(0);
     InitReg(MISCREG_ID_DFR0)
       .reset(p.pmu ? 0x03000000 : 0)
@@ -3772,9 +3816,13 @@ ISA::initializeMiscRegMetadata()
           pfr0_el1.gic = FullSystem && getGICv3CPUInterface(tc) ? 0x1 : 0;
           return pfr0_el1;
       }())
+      .unserialize(0)
       .faultRead(EL1, HCR_TRAP(tid3))
       .allPrivileges().exceptUserMode().writes(0);
     InitReg(MISCREG_ID_AA64PFR1_EL1)
+      .reset(release->has(ArmExtension::FEAT_SME) ?
+          0x1 << 24 : 0)
+      .unserialize(0)
       .faultRead(EL1, HCR_TRAP(tid3))
       .allPrivileges().exceptUserMode().writes(0);
     InitReg(MISCREG_ID_AA64DFR0_EL1)
@@ -3919,6 +3967,7 @@ ISA::initializeMiscRegMetadata()
       .reads(1)
       .mapsTo(MISCREG_CTR);
     InitReg(MISCREG_DCZID_EL0)
+      .reset(0x04) // DC ZVA clear 64-byte chunks
       .reads(1);
     InitReg(MISCREG_VPIDR_EL2)
       .hyp().mon()

From c85aa11ad061d7ed62fd2a1c8e5245d9b4a4680c Mon Sep 17 00:00:00 2001
From: Giacomo Travaglini <giacomo.travaglini@arm.com>
Date: Tue, 7 Feb 2023 10:29:42 +0000
Subject: [PATCH 410/492] arch-arm: Provide default mask for raz/rao helpers

Rather than forcing raz registers to write something like:

.raz(uint64_t(-1))

we provide a shorter version where if
no bitmask is specified we assume the entire register is
raz/rao. This won't be probably used by rao but I
am striving for symmetry and providing a default won't
probably hurt

Change-Id: I309e345fc8336df3a74474f8f9202bf7e2095b41
Signed-off-by: Giacomo Travaglini <giacomo.travaglini@arm.com>
Reviewed-by: Richard Cooper <richard.cooper@arm.com>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/70559
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/arch/arm/regs/misc.hh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/arch/arm/regs/misc.hh b/src/arch/arm/regs/misc.hh
index 3a32623292..abbd1c6057 100644
--- a/src/arch/arm/regs/misc.hh
+++ b/src/arch/arm/regs/misc.hh
@@ -1249,13 +1249,13 @@ namespace ArmISA
             return *this;
         }
         chain
-        raz(uint64_t mask) const
+        raz(uint64_t mask = (uint64_t)-1) const
         {
             entry._raz  = mask;
             return *this;
         }
         chain
-        rao(uint64_t mask) const
+        rao(uint64_t mask = (uint64_t)-1) const
         {
             entry._rao  = mask;
             return *this;

From 7c735d131d2525afbff26ee34ec9db7d4e27ba12 Mon Sep 17 00:00:00 2001
From: Giacomo Travaglini <giacomo.travaglini@arm.com>
Date: Tue, 7 Feb 2023 10:03:04 +0000
Subject: [PATCH 411/492] arch-arm: Implement RAZ/WI with raz specifier

Change-Id: I195f042fbeb10c0ca1f9095a0d26e6c213496ee5
Signed-off-by: Giacomo Travaglini <giacomo.travaglini@arm.com>
Reviewed-by: Richard Cooper <richard.cooper@arm.com>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/70560
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/arch/arm/isa.cc       | 6 ------
 src/arch/arm/regs/misc.cc | 5 +++++
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/src/arch/arm/isa.cc b/src/arch/arm/isa.cc
index 14349b1440..7df8978b00 100644
--- a/src/arch/arm/isa.cc
+++ b/src/arch/arm/isa.cc
@@ -470,12 +470,6 @@ ISA::readMiscReg(RegIndex idx)
             return readMiscRegNoEffect(idx);
         }
         break;
-      case MISCREG_JOSCR: // Jazelle trivial implementation, RAZ/WI
-      case MISCREG_JMCR:  // Jazelle trivial implementation, RAZ/WI
-      case MISCREG_JIDR:  // Jazelle trivial implementation, RAZ/WI
-      case MISCREG_AIDR:  // AUX ID set to 0
-      case MISCREG_TCMTR: // No TCM's
-        return 0;
 
       case MISCREG_CLIDR:
         warn_once("The clidr register always reports 0 caches.\n");
diff --git a/src/arch/arm/regs/misc.cc b/src/arch/arm/regs/misc.cc
index 960c2befc3..6c5a9ddac9 100644
--- a/src/arch/arm/regs/misc.cc
+++ b/src/arch/arm/regs/misc.cc
@@ -2505,12 +2505,15 @@ ISA::initializeMiscRegMetadata()
       .unimplemented()
       .allPrivileges();
     InitReg(MISCREG_JIDR)
+      .raz() // Jazelle trivial implementation, RAZ/WI
       .allPrivileges();
     InitReg(MISCREG_TEEHBR)
       .allPrivileges();
     InitReg(MISCREG_JOSCR)
+      .raz() // Jazelle trivial implementation, RAZ/WI
       .allPrivileges();
     InitReg(MISCREG_JMCR)
+      .raz() // Jazelle trivial implementation, RAZ/WI
       .allPrivileges();
 
     // AArch32 CP15 registers
@@ -2548,6 +2551,7 @@ ISA::initializeMiscRegMetadata()
       .unserialize(0)
       .allPrivileges().exceptUserMode().writes(0);
     InitReg(MISCREG_TCMTR)
+      .raz() // No TCM's
       .allPrivileges().exceptUserMode().writes(0);
     InitReg(MISCREG_TLBTR)
       .reset(1) // Separate Instruction and Data TLBs
@@ -2646,6 +2650,7 @@ ISA::initializeMiscRegMetadata()
     InitReg(MISCREG_CLIDR)
       .allPrivileges().exceptUserMode().writes(0);
     InitReg(MISCREG_AIDR)
+      .raz() // AUX ID set to 0
       .allPrivileges().exceptUserMode().writes(0);
     InitReg(MISCREG_CSSELR)
       .banked();

From 32b4ab376c08b48cf973dbafc3a7119a76c642cb Mon Sep 17 00:00:00 2001
From: Giacomo Travaglini <giacomo.travaglini@arm.com>
Date: Tue, 7 Feb 2023 10:56:30 +0000
Subject: [PATCH 412/492] arch-arm: Update MISCREG_DBGDIDR to point to Armv8
 debug arch

Change-Id: I20691ecdaedde6740c706782635b1f9a4491dc51
Signed-off-by: Giacomo Travaglini <giacomo.travaglini@arm.com>
Reviewed-by: Richard Cooper <richard.cooper@arm.com>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/70561
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/arch/arm/regs/misc.cc | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/src/arch/arm/regs/misc.cc b/src/arch/arm/regs/misc.cc
index 6c5a9ddac9..6f918b2a6e 100644
--- a/src/arch/arm/regs/misc.cc
+++ b/src/arch/arm/regs/misc.cc
@@ -2273,10 +2273,7 @@ ISA::initializeMiscRegMetadata()
 
     // AArch32 CP14 registers
     InitReg(MISCREG_DBGDIDR)
-      /* For now just implement the version number.
-       * ARMv7, v7.1 Debug architecture (0b0101 --> 0x5)
-       */
-      .reset(0x5 << 16)
+      .reset(0x6 << 16) // Armv8 Debug architecture
       .allPrivileges().monSecureWrite(0).monNonSecureWrite(0);
     InitReg(MISCREG_DBGDSCRint)
       .allPrivileges().monSecureWrite(0).monNonSecureWrite(0);

From bc5b00cd2b5dea850acf63f6a1055ff260c8fba7 Mon Sep 17 00:00:00 2001
From: Giacomo Travaglini <giacomo.travaglini@arm.com>
Date: Tue, 7 Feb 2023 11:12:23 +0000
Subject: [PATCH 413/492] arch-arm: Group self hosted debug writes in ISA
 switch

Change-Id: If9c0675743856b603e7b5ec1898f5cdd650f3ce6
Signed-off-by: Giacomo Travaglini <giacomo.travaglini@arm.com>
Reviewed-by: Richard Cooper <richard.cooper@arm.com>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/70562
Tested-by: kokoro <noreply+kokoro@google.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
---
 src/arch/arm/isa.cc | 196 ++------------------------------------------
 1 file changed, 8 insertions(+), 188 deletions(-)

diff --git a/src/arch/arm/isa.cc b/src/arch/arm/isa.cc
index 7df8978b00..bef2db5707 100644
--- a/src/arch/arm/isa.cc
+++ b/src/arch/arm/isa.cc
@@ -962,101 +962,11 @@ ISA::setMiscReg(RegIndex idx, RegVal val)
                 tc->setMiscReg(MISCREG_DBGOSLSR, r);
             }
             break;
-          case MISCREG_DBGBCR0:
-            selfDebug->updateDBGBCR(0, val);
+          case MISCREG_DBGBCR0 ... MISCREG_DBGBCR15:
+            selfDebug->updateDBGBCR(idx - MISCREG_DBGBCR0, val);
             break;
-          case MISCREG_DBGBCR1:
-            selfDebug->updateDBGBCR(1, val);
-            break;
-          case MISCREG_DBGBCR2:
-            selfDebug->updateDBGBCR(2, val);
-            break;
-          case MISCREG_DBGBCR3:
-            selfDebug->updateDBGBCR(3, val);
-            break;
-          case MISCREG_DBGBCR4:
-            selfDebug->updateDBGBCR(4, val);
-            break;
-          case MISCREG_DBGBCR5:
-            selfDebug->updateDBGBCR(5, val);
-            break;
-          case MISCREG_DBGBCR6:
-            selfDebug->updateDBGBCR(6, val);
-            break;
-          case MISCREG_DBGBCR7:
-            selfDebug->updateDBGBCR(7, val);
-            break;
-          case MISCREG_DBGBCR8:
-            selfDebug->updateDBGBCR(8, val);
-            break;
-          case MISCREG_DBGBCR9:
-            selfDebug->updateDBGBCR(9, val);
-            break;
-          case MISCREG_DBGBCR10:
-            selfDebug->updateDBGBCR(10, val);
-            break;
-          case MISCREG_DBGBCR11:
-            selfDebug->updateDBGBCR(11, val);
-            break;
-          case MISCREG_DBGBCR12:
-            selfDebug->updateDBGBCR(12, val);
-            break;
-          case MISCREG_DBGBCR13:
-            selfDebug->updateDBGBCR(13, val);
-            break;
-          case MISCREG_DBGBCR14:
-            selfDebug->updateDBGBCR(14, val);
-            break;
-          case MISCREG_DBGBCR15:
-            selfDebug->updateDBGBCR(15, val);
-            break;
-          case MISCREG_DBGWCR0:
-            selfDebug->updateDBGWCR(0, val);
-            break;
-          case MISCREG_DBGWCR1:
-            selfDebug->updateDBGWCR(1, val);
-            break;
-          case MISCREG_DBGWCR2:
-            selfDebug->updateDBGWCR(2, val);
-            break;
-          case MISCREG_DBGWCR3:
-            selfDebug->updateDBGWCR(3, val);
-            break;
-          case MISCREG_DBGWCR4:
-            selfDebug->updateDBGWCR(4, val);
-            break;
-          case MISCREG_DBGWCR5:
-            selfDebug->updateDBGWCR(5, val);
-            break;
-          case MISCREG_DBGWCR6:
-            selfDebug->updateDBGWCR(6, val);
-            break;
-          case MISCREG_DBGWCR7:
-            selfDebug->updateDBGWCR(7, val);
-            break;
-          case MISCREG_DBGWCR8:
-            selfDebug->updateDBGWCR(8, val);
-            break;
-          case MISCREG_DBGWCR9:
-            selfDebug->updateDBGWCR(9, val);
-            break;
-          case MISCREG_DBGWCR10:
-            selfDebug->updateDBGWCR(10, val);
-            break;
-          case MISCREG_DBGWCR11:
-            selfDebug->updateDBGWCR(11, val);
-            break;
-          case MISCREG_DBGWCR12:
-            selfDebug->updateDBGWCR(12, val);
-            break;
-          case MISCREG_DBGWCR13:
-            selfDebug->updateDBGWCR(13, val);
-            break;
-          case MISCREG_DBGWCR14:
-            selfDebug->updateDBGWCR(14, val);
-            break;
-          case MISCREG_DBGWCR15:
-            selfDebug->updateDBGWCR(15, val);
+          case MISCREG_DBGWCR0 ... MISCREG_DBGWCR15:
+            selfDebug->updateDBGWCR(idx - MISCREG_DBGWCR0, val);
             break;
 
           case MISCREG_MDCR_EL2:
@@ -1100,101 +1010,11 @@ ISA::setMiscReg(RegIndex idx, RegVal val)
             }
             break;
 
-          case MISCREG_DBGBCR0_EL1:
-            selfDebug->updateDBGBCR(0, val);
+          case MISCREG_DBGBCR0_EL1 ... MISCREG_DBGBCR15_EL1:
+            selfDebug->updateDBGBCR(idx - MISCREG_DBGBCR0_EL1, val);
             break;
-          case MISCREG_DBGBCR1_EL1:
-            selfDebug->updateDBGBCR(1, val);
-            break;
-          case MISCREG_DBGBCR2_EL1:
-            selfDebug->updateDBGBCR(2, val);
-            break;
-          case MISCREG_DBGBCR3_EL1:
-            selfDebug->updateDBGBCR(3, val);
-            break;
-          case MISCREG_DBGBCR4_EL1:
-            selfDebug->updateDBGBCR(4, val);
-            break;
-          case MISCREG_DBGBCR5_EL1:
-            selfDebug->updateDBGBCR(5, val);
-            break;
-          case MISCREG_DBGBCR6_EL1:
-            selfDebug->updateDBGBCR(6, val);
-            break;
-          case MISCREG_DBGBCR7_EL1:
-            selfDebug->updateDBGBCR(7, val);
-            break;
-          case MISCREG_DBGBCR8_EL1:
-            selfDebug->updateDBGBCR(8, val);
-            break;
-          case MISCREG_DBGBCR9_EL1:
-            selfDebug->updateDBGBCR(9, val);
-            break;
-          case MISCREG_DBGBCR10_EL1:
-            selfDebug->updateDBGBCR(10, val);
-            break;
-          case MISCREG_DBGBCR11_EL1:
-            selfDebug->updateDBGBCR(11, val);
-            break;
-          case MISCREG_DBGBCR12_EL1:
-            selfDebug->updateDBGBCR(12, val);
-            break;
-          case MISCREG_DBGBCR13_EL1:
-            selfDebug->updateDBGBCR(13, val);
-            break;
-          case MISCREG_DBGBCR14_EL1:
-            selfDebug->updateDBGBCR(14, val);
-            break;
-          case MISCREG_DBGBCR15_EL1:
-            selfDebug->updateDBGBCR(15, val);
-            break;
-          case MISCREG_DBGWCR0_EL1:
-            selfDebug->updateDBGWCR(0, val);
-            break;
-          case MISCREG_DBGWCR1_EL1:
-            selfDebug->updateDBGWCR(1, val);
-            break;
-          case MISCREG_DBGWCR2_EL1:
-            selfDebug->updateDBGWCR(2, val);
-            break;
-          case MISCREG_DBGWCR3_EL1:
-            selfDebug->updateDBGWCR(3, val);
-            break;
-          case MISCREG_DBGWCR4_EL1:
-            selfDebug->updateDBGWCR(4, val);
-            break;
-          case MISCREG_DBGWCR5_EL1:
-            selfDebug->updateDBGWCR(5, val);
-            break;
-          case MISCREG_DBGWCR6_EL1:
-            selfDebug->updateDBGWCR(6, val);
-            break;
-          case MISCREG_DBGWCR7_EL1:
-            selfDebug->updateDBGWCR(7, val);
-            break;
-          case MISCREG_DBGWCR8_EL1:
-            selfDebug->updateDBGWCR(8, val);
-            break;
-          case MISCREG_DBGWCR9_EL1:
-            selfDebug->updateDBGWCR(9, val);
-            break;
-          case MISCREG_DBGWCR10_EL1:
-            selfDebug->updateDBGWCR(10, val);
-            break;
-          case MISCREG_DBGWCR11_EL1:
-            selfDebug->updateDBGWCR(11, val);
-            break;
-          case MISCREG_DBGWCR12_EL1:
-            selfDebug->updateDBGWCR(12, val);
-            break;
-          case MISCREG_DBGWCR13_EL1:
-            selfDebug->updateDBGWCR(13, val);
-            break;
-          case MISCREG_DBGWCR14_EL1:
-            selfDebug->updateDBGWCR(14, val);
-            break;
-          case MISCREG_DBGWCR15_EL1:
-            selfDebug->updateDBGWCR(15, val);
+          case MISCREG_DBGWCR0_EL1 ... MISCREG_DBGWCR15_EL1:
+            selfDebug->updateDBGWCR(idx - MISCREG_DBGWCR0_EL1, val);
             break;
           case MISCREG_IFSR:
             {

From f72d22cc3830099ef5c99f0312e84b54d7296315 Mon Sep 17 00:00:00 2001
From: Giacomo Travaglini <giacomo.travaglini@arm.com>
Date: Tue, 7 Feb 2023 14:47:40 +0000
Subject: [PATCH 414/492] arch-arm: Implement RES0/RES1 with miscreg specifiers

Change-Id: Ic2caea121e02f63f069f1576760c849bcbdac894
Signed-off-by: Giacomo Travaglini <giacomo.travaglini@arm.com>
Reviewed-by: Richard Cooper <richard.cooper@arm.com>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/70563
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/arch/arm/isa.cc       | 76 ++++-----------------------------------
 src/arch/arm/regs/misc.cc | 26 +++++++++++---
 2 files changed, 29 insertions(+), 73 deletions(-)

diff --git a/src/arch/arm/isa.cc b/src/arch/arm/isa.cc
index bef2db5707..ab6e3f7273 100644
--- a/src/arch/arm/isa.cc
+++ b/src/arch/arm/isa.cc
@@ -421,11 +421,6 @@ ISA::readMiscReg(RegIndex idx)
     idx = redirectRegVHE(idx);
 
     switch (unflattenMiscReg(idx)) {
-      case MISCREG_HCR:
-      case MISCREG_HCR2:
-            if (!release->has(ArmExtension::VIRTUALIZATION))
-                return 0;
-            break;
       case MISCREG_CPACR:
         {
             const uint32_t ones = (uint32_t)(-1);
@@ -456,10 +451,6 @@ ISA::readMiscReg(RegIndex idx)
       case MISCREG_MPIDR:
       case MISCREG_MPIDR_EL1:
         return readMPIDR(system, tc);
-      case MISCREG_VMPIDR:
-      case MISCREG_VMPIDR_EL2:
-        // top bit defined as RES1
-        return readMiscRegNoEffect(idx) | 0x80000000;
       case MISCREG_ID_AFR0: // not implemented, so alias MIDR
       case MISCREG_REVIDR:  // not implemented, so alias MIDR
       case MISCREG_MIDR:
@@ -568,10 +559,6 @@ ISA::readMiscReg(RegIndex idx)
         {
             return miscRegs[MISCREG_CPSR] & 0x800000;
         }
-      case MISCREG_SVCR:
-        {
-            return miscRegs[MISCREG_SVCR];
-        }
       case MISCREG_L2CTLR:
         {
             // mostly unimplemented, just set NumCPUs field from sim and return
@@ -594,20 +581,17 @@ ISA::readMiscReg(RegIndex idx)
         }
       case MISCREG_HCPTR:
         {
-            RegVal val = readMiscRegNoEffect(idx);
-            // The trap bit associated with CP14 is defined as RAZ
-            val &= ~(1 << 14);
-            // If a CP bit in NSACR is 0 then the corresponding bit in
-            // HCPTR is RAO/WI
+            HCPTR val = readMiscRegNoEffect(idx);
             bool secure_lookup = release->has(ArmExtension::SECURITY) &&
                 isSecure(tc);
             if (!secure_lookup) {
-                RegVal mask = readMiscRegNoEffect(MISCREG_NSACR);
-                val |= (mask ^ 0x7FFF) & 0xBFFF;
+                NSACR nsacr = readMiscRegNoEffect(MISCREG_NSACR);
+                if (!nsacr.cp10) {
+                    val.tcp10 = 1;
+                    val.tcp11 = 1;
+                }
             }
-            // Set the bits for unimplemented coprocessors to RAO/WI
-            val |= 0x33FF;
-            return (val);
+            return val;
         }
       case MISCREG_HDFAR: // alias for secure DFAR
         return readMiscRegNoEffect(MISCREG_DFAR_S);
@@ -934,16 +918,10 @@ ISA::setMiscReg(RegIndex idx, RegVal val)
                          (readMiscRegNoEffect(MISCREG_FPEXC) & ~fpexcMask);
             }
             break;
-          case MISCREG_HCR2:
-                if (!release->has(ArmExtension::VIRTUALIZATION))
-                    return;
-                break;
           case MISCREG_HCR:
             {
                 const HDCR mdcr  = tc->readMiscRegNoEffect(MISCREG_MDCR_EL2);
                 selfDebug->setenableTDETGE((HCR)val, mdcr);
-                if (!release->has(ArmExtension::VIRTUALIZATION))
-                    return;
             }
             break;
 
@@ -1016,31 +994,6 @@ ISA::setMiscReg(RegIndex idx, RegVal val)
           case MISCREG_DBGWCR0_EL1 ... MISCREG_DBGWCR15_EL1:
             selfDebug->updateDBGWCR(idx - MISCREG_DBGWCR0_EL1, val);
             break;
-          case MISCREG_IFSR:
-            {
-                // ARM ARM (ARM DDI 0406C.b) B4.1.96
-                const uint32_t ifsrMask =
-                    mask(31, 13) | mask(11, 11) | mask(8, 6);
-                newVal = newVal & ~ifsrMask;
-            }
-            break;
-          case MISCREG_DFSR:
-            {
-                // ARM ARM (ARM DDI 0406C.b) B4.1.52
-                const uint32_t dfsrMask = mask(31, 14) | mask(8, 8);
-                newVal = newVal & ~dfsrMask;
-            }
-            break;
-          case MISCREG_AMAIR0:
-          case MISCREG_AMAIR1:
-            {
-                // ARM ARM (ARM DDI 0406C.b) B4.1.5
-                // Valid only with LPAE
-                if (!release->has(ArmExtension::LPAE))
-                    return;
-                DPRINTF(MiscRegs, "Writing AMAIR: %#x\n", newVal);
-            }
-            break;
           case MISCREG_SCR:
             getMMUPtr(tc)->invalidateMiscReg();
             break;
@@ -1327,21 +1280,6 @@ ISA::setMiscReg(RegIndex idx, RegVal val)
                 idx = MISCREG_CPSR;
             }
             break;
-          case MISCREG_SVCR:
-            {
-                SVCR svcr = miscRegs[MISCREG_SVCR];
-                SVCR newSvcr = newVal;
-
-                // Don't allow other bits to be set
-                svcr.sm = newSvcr.sm;
-                svcr.za = newSvcr.za;
-                newVal = svcr;
-            }
-            break;
-          case MISCREG_SMPRI_EL1:
-            // Only the bottom 4 bits are settable
-            newVal = newVal & 0xF;
-            break;
           case MISCREG_AT_S1E1R_Xt:
             addressTranslation64(MMU::S1E1Tran, BaseMMU::Read, 0, val);
             return;
diff --git a/src/arch/arm/regs/misc.cc b/src/arch/arm/regs/misc.cc
index 6f918b2a6e..9203810306 100644
--- a/src/arch/arm/regs/misc.cc
+++ b/src/arch/arm/regs/misc.cc
@@ -2662,6 +2662,7 @@ ISA::initializeMiscRegMetadata()
       .reset(midr)
       .hyp().monNonSecure();
     InitReg(MISCREG_VMPIDR)
+      .res1(mask(31, 31))
       .hyp().monNonSecure();
     InitReg(MISCREG_SCTLR)
       .banked()
@@ -2739,13 +2740,17 @@ ISA::initializeMiscRegMetadata()
       .hyp().monNonSecure();
     InitReg(MISCREG_HCR)
       .hyp().monNonSecure()
-      .res0(0x90000000);
+      .res0(release->has(ArmExtension::VIRTUALIZATION) ?
+          0x90000000 : mask(31, 0));
     InitReg(MISCREG_HCR2)
       .hyp().monNonSecure()
-      .res0(0xffa9ff8c);
+      .res0(release->has(ArmExtension::VIRTUALIZATION) ?
+          0xffa9ff8c : mask(31, 0));
     InitReg(MISCREG_HDCR)
       .hyp().monNonSecure();
     InitReg(MISCREG_HCPTR)
+      .res0(mask(29, 21) | mask(19, 16) | mask(14, 14))
+      .res1(mask(13, 12) | mask(9, 0))
       .hyp().monNonSecure();
     InitReg(MISCREG_HSTR)
       .hyp().monNonSecure();
@@ -2794,7 +2799,8 @@ ISA::initializeMiscRegMetadata()
       .bankedChild()
       .secure().exceptUserMode();
     InitReg(MISCREG_DFSR)
-      .banked();
+      .banked()
+      .res0(mask(31, 14) | mask(8, 8));
     InitReg(MISCREG_DFSR_NS)
       .bankedChild()
       .privSecure(!aarch32EL3)
@@ -2803,7 +2809,8 @@ ISA::initializeMiscRegMetadata()
       .bankedChild()
       .secure().exceptUserMode();
     InitReg(MISCREG_IFSR)
-      .banked();
+      .banked()
+      .res0(mask(31, 13) | mask(11, 11) | mask(8, 6));
     InitReg(MISCREG_IFSR_NS)
       .bankedChild()
       .privSecure(!aarch32EL3)
@@ -3118,6 +3125,7 @@ ISA::initializeMiscRegMetadata()
       .bankedChild()
       .secure().exceptUserMode();
     InitReg(MISCREG_AMAIR0)
+      .res0(release->has(ArmExtension::LPAE) ? 0 : mask(31, 0))
       .banked();
     InitReg(MISCREG_AMAIR0_NS)
       .bankedChild()
@@ -3127,6 +3135,7 @@ ISA::initializeMiscRegMetadata()
       .bankedChild()
       .secure().exceptUserMode();
     InitReg(MISCREG_AMAIR1)
+      .res0(release->has(ArmExtension::LPAE) ? 0 : mask(31, 0))
       .banked();
     InitReg(MISCREG_AMAIR1_NS)
       .bankedChild()
@@ -3976,6 +3985,8 @@ ISA::initializeMiscRegMetadata()
       .mapsTo(MISCREG_VPIDR);
     InitReg(MISCREG_VMPIDR_EL2)
       .hyp().mon()
+      .res0(mask(63, 40) | mask(29, 25))
+      .res1(mask(31, 31))
       .mapsTo(MISCREG_VMPIDR);
     InitReg(MISCREG_SCTLR_EL1)
       .allPrivileges().exceptUserMode()
@@ -5263,6 +5274,12 @@ ISA::initializeMiscRegMetadata()
         }())
         .allPrivileges().exceptUserMode().writes(0);
     InitReg(MISCREG_SVCR)
+        .res0([](){
+            SVCR svcr_mask = 0;
+            svcr_mask.sm = 1;
+            svcr_mask.za = 1;
+            return ~svcr_mask;
+        }())
         .allPrivileges();
     InitReg(MISCREG_SMIDR_EL1)
         .reset([](){
@@ -5274,6 +5291,7 @@ ISA::initializeMiscRegMetadata()
         }())
         .allPrivileges().exceptUserMode().writes(0);
     InitReg(MISCREG_SMPRI_EL1)
+        .res0(mask(63, 4))
         .allPrivileges().exceptUserMode().reads(1);
     InitReg(MISCREG_SMPRIMAP_EL2)
         .hyp().mon();

From 3e1b9dfc0f8ca9bd0e869a6f1bf04dd31f7acb18 Mon Sep 17 00:00:00 2001
From: Giacomo Travaglini <giacomo.travaglini@arm.com>
Date: Tue, 7 Feb 2023 15:26:47 +0000
Subject: [PATCH 415/492] arch-arm: Remove unnecessary case in ISA::readMiscReg

Change-Id: I8b95a75fbfec2626fbe8b455ae9b3f30acda538f
Signed-off-by: Giacomo Travaglini <giacomo.travaglini@arm.com>
Reviewed-by: Richard Cooper <richard.cooper@arm.com>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/70564
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/arch/arm/isa.cc | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/arch/arm/isa.cc b/src/arch/arm/isa.cc
index ab6e3f7273..83df61fb40 100644
--- a/src/arch/arm/isa.cc
+++ b/src/arch/arm/isa.cc
@@ -567,8 +567,6 @@ ISA::readMiscReg(RegIndex idx)
             l2ctlr.numCPUs = tc->getSystemPtr()->threads.size() - 1;
             return l2ctlr;
         }
-      case MISCREG_DBGDSCRint:
-        return readMiscRegNoEffect(MISCREG_DBGDSCRint);
       case MISCREG_ISR:
       case MISCREG_ISR_EL1:
         {

From 60dd3c7d05c41988abfa9833556e2247466f4b26 Mon Sep 17 00:00:00 2001
From: Giacomo Travaglini <giacomo.travaglini@arm.com>
Date: Wed, 8 Feb 2023 10:48:29 +0000
Subject: [PATCH 416/492] arch-arm: Simplify FPSCR writes

The old logic was setting up a mask which was covering pretty much
the entire register, except for the FPSCR[14:13] and FPSCR[6:5]
register fields. Those RES0 fields were treated as WI.
We simplify this by explicitly marking them as RES0 at construction
time

Change-Id: I59942bd98c074349307d27e3a99351ee25f4db95
Signed-off-by: Giacomo Travaglini <giacomo.travaglini@arm.com>
Reviewed-by: Richard Cooper <richard.cooper@arm.com>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/70565
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/arch/arm/isa.cc       | 33 +--------------------------------
 src/arch/arm/regs/misc.cc |  1 +
 2 files changed, 2 insertions(+), 32 deletions(-)

diff --git a/src/arch/arm/isa.cc b/src/arch/arm/isa.cc
index 83df61fb40..9c8e282e20 100644
--- a/src/arch/arm/isa.cc
+++ b/src/arch/arm/isa.cc
@@ -815,38 +815,7 @@ ISA::setMiscReg(RegIndex idx, RegVal val)
             return;
 
           case MISCREG_FPSCR:
-            {
-                const uint32_t ones = (uint32_t)(-1);
-                FPSCR fpscrMask = 0;
-                fpscrMask.ioc = ones;
-                fpscrMask.dzc = ones;
-                fpscrMask.ofc = ones;
-                fpscrMask.ufc = ones;
-                fpscrMask.ixc = ones;
-                fpscrMask.idc = ones;
-                fpscrMask.ioe = ones;
-                fpscrMask.dze = ones;
-                fpscrMask.ofe = ones;
-                fpscrMask.ufe = ones;
-                fpscrMask.ixe = ones;
-                fpscrMask.ide = ones;
-                fpscrMask.len = ones;
-                fpscrMask.fz16 = ones;
-                fpscrMask.stride = ones;
-                fpscrMask.rMode = ones;
-                fpscrMask.fz = ones;
-                fpscrMask.dn = ones;
-                fpscrMask.ahp = ones;
-                fpscrMask.qc = ones;
-                fpscrMask.v = ones;
-                fpscrMask.c = ones;
-                fpscrMask.z = ones;
-                fpscrMask.n = ones;
-                newVal = (newVal & (uint32_t)fpscrMask) |
-                         (readMiscRegNoEffect(MISCREG_FPSCR) &
-                          ~(uint32_t)fpscrMask);
-                tc->getDecoderPtr()->as<Decoder>().setContext(newVal);
-            }
+            tc->getDecoderPtr()->as<Decoder>().setContext(newVal);
             break;
           case MISCREG_FPSR:
             {
diff --git a/src/arch/arm/regs/misc.cc b/src/arch/arm/regs/misc.cc
index 9203810306..2d76143e08 100644
--- a/src/arch/arm/regs/misc.cc
+++ b/src/arch/arm/regs/misc.cc
@@ -2199,6 +2199,7 @@ ISA::initializeMiscRegMetadata()
       .reset(p.fpsid)
       .allPrivileges();
     InitReg(MISCREG_FPSCR)
+      .res0(mask(14, 13) | mask(6, 5))
       .allPrivileges();
     InitReg(MISCREG_MVFR1)
       .reset([] () {

From 20bf5e17e3b69065dda95609bd0070da4f123cec Mon Sep 17 00:00:00 2001
From: Giacomo Travaglini <giacomo.travaglini@arm.com>
Date: Tue, 21 Feb 2023 13:25:02 +0000
Subject: [PATCH 417/492] arch-arm: Extend SCTLR to be 64-bit wide

In AArch64 SCTLR_EL1/_EL2/_EL3 is 64-bit wide

Change-Id: I80931f9dd1a57f3132229b84d32a8ab08eee3371
Signed-off-by: Giacomo Travaglini <giacomo.travaglini@arm.com>
Reviewed-by: Richard Cooper <richard.cooper@arm.com>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/70566
Tested-by: kokoro <noreply+kokoro@google.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
---
 src/arch/arm/regs/misc_types.hh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/arch/arm/regs/misc_types.hh b/src/arch/arm/regs/misc_types.hh
index e6f7e406f2..c139f1a38e 100644
--- a/src/arch/arm/regs/misc_types.hh
+++ b/src/arch/arm/regs/misc_types.hh
@@ -371,7 +371,7 @@ namespace ArmISA
         Bitfield<0> ns;
     EndBitUnion(SCR)
 
-    BitUnion32(SCTLR)
+    BitUnion64(SCTLR)
         Bitfield<31>   enia;    // ARMv8.3 PAuth
         Bitfield<30>   enib;    // ARMv8.3 PAuth
         Bitfield<30>   te;      // Thumb Exception Enable (AArch32 only)

From aff1ddb196ab08ec3d60437e8d85c20df5de5b26 Mon Sep 17 00:00:00 2001
From: Giacomo Travaglini <giacomo.travaglini@arm.com>
Date: Thu, 21 Jul 2022 16:14:18 +0100
Subject: [PATCH 418/492] arch-arm: Implement FEAT_TLBIOS

This feature is mandatory in Armv8.4
We are currently not distinguishing Inner and Outer domains.
We therefore implement TLBIOS instructions as TLBIIS

Change-Id: I2198e6155f1eea7c5f8083c6ffb178d3a3d163d3
Signed-off-by: Giacomo Travaglini <giacomo.travaglini@arm.com>
Reviewed-by: Richard Cooper <richard.cooper@arm.com>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/70567
Tested-by: kokoro <noreply+kokoro@google.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
---
 src/arch/arm/ArmSystem.py    |  4 ++-
 src/arch/arm/insts/misc64.cc | 61 ++++++++++++++++++++++++++++++++++++
 src/arch/arm/regs/misc.cc    | 55 ++++++++++++++++++++++++++++++++
 src/arch/arm/regs/misc.hh    | 32 +++++++++++++++++++
 4 files changed, 151 insertions(+), 1 deletion(-)

diff --git a/src/arch/arm/ArmSystem.py b/src/arch/arm/ArmSystem.py
index 7367d80eec..9e2da8e255 100644
--- a/src/arch/arm/ArmSystem.py
+++ b/src/arch/arm/ArmSystem.py
@@ -84,6 +84,7 @@ class ArmExtension(ScopedEnum):
         "FEAT_PAuth",
         # Armv8.4
         "FEAT_SEL2",
+        "FEAT_TLBIOS",
         # Armv9.2
         "FEAT_SME",  # Optional in Armv9.2
         # Others
@@ -162,6 +163,7 @@ class ArmDefaultRelease(Armv8):
         "FEAT_PAuth",
         # Armv8.4
         "FEAT_SEL2",
+        "FEAT_TLBIOS",
         # Armv9.2
         "FEAT_SME",
     ]
@@ -192,7 +194,7 @@ class Armv83(Armv82):
 
 
 class Armv84(Armv83):
-    extensions = Armv83.extensions + ["FEAT_SEL2"]
+    extensions = Armv83.extensions + ["FEAT_SEL2", "FEAT_TLBIOS"]
 
 
 class Armv92(Armv84):
diff --git a/src/arch/arm/insts/misc64.cc b/src/arch/arm/insts/misc64.cc
index 40a6ca4ce5..c7423d9e72 100644
--- a/src/arch/arm/insts/misc64.cc
+++ b/src/arch/arm/insts/misc64.cc
@@ -241,6 +241,10 @@ TlbiOp64::performTlbi(ExecContext *xc, MiscRegIndex dest_idx, RegVal value) cons
         }
       // AArch64 TLB Invalidate All, EL3, Inner Shareable
       case MISCREG_TLBI_ALLE3IS:
+      // AArch64 TLB Invalidate All, EL3, Outer Shareable
+      // We are currently not distinguishing Inner and Outer domains.
+      // We therefore implement TLBIOS instructions as TLBIIS
+      case MISCREG_TLBI_ALLE3OS:
         {
             TLBIALLEL tlbiOp(EL3, true);
             tlbiOp.broadcast(tc);
@@ -258,6 +262,10 @@ TlbiOp64::performTlbi(ExecContext *xc, MiscRegIndex dest_idx, RegVal value) cons
         }
       // AArch64 TLB Invalidate All, EL2, Inner Shareable
       case MISCREG_TLBI_ALLE2IS:
+      // AArch64 TLB Invalidate All, EL2, Outer Shareable
+      // We are currently not distinguishing Inner and Outer domains.
+      // We therefore implement TLBIOS instructions as TLBIIS
+      case MISCREG_TLBI_ALLE2OS:
         {
             SCR scr = tc->readMiscReg(MISCREG_SCR_EL3);
 
@@ -278,6 +286,10 @@ TlbiOp64::performTlbi(ExecContext *xc, MiscRegIndex dest_idx, RegVal value) cons
         }
       // AArch64 TLB Invalidate All, EL1, Inner Shareable
       case MISCREG_TLBI_ALLE1IS:
+      // AArch64 TLB Invalidate All, EL1, Outer Shareable
+      // We are currently not distinguishing Inner and Outer domains.
+      // We therefore implement TLBIOS instructions as TLBIIS
+      case MISCREG_TLBI_ALLE1OS:
         {
             SCR scr = tc->readMiscReg(MISCREG_SCR_EL3);
 
@@ -313,6 +325,9 @@ TlbiOp64::performTlbi(ExecContext *xc, MiscRegIndex dest_idx, RegVal value) cons
             return;
         }
       case MISCREG_TLBI_VMALLS12E1IS:
+      // We are currently not distinguishing Inner and Outer domains.
+      // We therefore implement TLBIOS instructions as TLBIIS
+      case MISCREG_TLBI_VMALLS12E1OS:
         {
             SCR scr = tc->readMiscReg(MISCREG_SCR_EL3);
 
@@ -322,6 +337,9 @@ TlbiOp64::performTlbi(ExecContext *xc, MiscRegIndex dest_idx, RegVal value) cons
             return;
         }
       case MISCREG_TLBI_VMALLE1IS:
+      // We are currently not distinguishing Inner and Outer domains.
+      // We therefore implement TLBIOS instructions as TLBIIS
+      case MISCREG_TLBI_VMALLE1OS:
         {
             SCR scr = tc->readMiscReg(MISCREG_SCR_EL3);
 
@@ -360,6 +378,10 @@ TlbiOp64::performTlbi(ExecContext *xc, MiscRegIndex dest_idx, RegVal value) cons
         }
       // AArch64 TLB Invalidate by VA, EL3, Inner Shareable
       case MISCREG_TLBI_VAE3IS_Xt:
+      // AArch64 TLB Invalidate by VA, EL3, Outer Shareable
+      // We are currently not distinguishing Inner and Outer domains.
+      // We therefore implement TLBIOS instructions as TLBIIS
+      case MISCREG_TLBI_VAE3OS_Xt:
         {
             TLBIMVAA tlbiOp(EL3, true,
                             static_cast<Addr>(bits(value, 43, 0)) << 12,
@@ -370,6 +392,10 @@ TlbiOp64::performTlbi(ExecContext *xc, MiscRegIndex dest_idx, RegVal value) cons
         }
       // AArch64 TLB Invalidate by VA, Last Level, EL3, Inner Shareable
       case MISCREG_TLBI_VALE3IS_Xt:
+      // AArch64 TLB Invalidate by VA, Last Level, EL3, Outer Shareable
+      // We are currently not distinguishing Inner and Outer domains.
+      // We therefore implement TLBIOS instructions as TLBIIS
+      case MISCREG_TLBI_VALE3OS_Xt:
         {
             TLBIMVAA tlbiOp(EL3, true,
                             static_cast<Addr>(bits(value, 43, 0)) << 12,
@@ -430,6 +456,10 @@ TlbiOp64::performTlbi(ExecContext *xc, MiscRegIndex dest_idx, RegVal value) cons
         }
       // AArch64 TLB Invalidate by VA, EL2, Inner Shareable
       case MISCREG_TLBI_VAE2IS_Xt:
+      // AArch64 TLB Invalidate by VA, EL2, Outer Shareable
+      // We are currently not distinguishing Inner and Outer domains.
+      // We therefore implement TLBIOS instructions as TLBIIS
+      case MISCREG_TLBI_VAE2OS_Xt:
         {
             SCR scr = tc->readMiscReg(MISCREG_SCR_EL3);
             HCR hcr = tc->readMiscReg(MISCREG_HCR_EL2);
@@ -455,6 +485,10 @@ TlbiOp64::performTlbi(ExecContext *xc, MiscRegIndex dest_idx, RegVal value) cons
         }
       // AArch64 TLB Invalidate by VA, Last Level, EL2, Inner Shareable
       case MISCREG_TLBI_VALE2IS_Xt:
+      // AArch64 TLB Invalidate by VA, Last Level, EL2, Outer Shareable
+      // We are currently not distinguishing Inner and Outer domains.
+      // We therefore implement TLBIOS instructions as TLBIIS
+      case MISCREG_TLBI_VALE2OS_Xt:
         {
             SCR scr = tc->readMiscReg(MISCREG_SCR_EL3);
             HCR hcr = tc->readMiscReg(MISCREG_HCR_EL2);
@@ -526,6 +560,10 @@ TlbiOp64::performTlbi(ExecContext *xc, MiscRegIndex dest_idx, RegVal value) cons
         }
       // AArch64 TLB Invalidate by VA, EL1, Inner Shareable
       case MISCREG_TLBI_VAE1IS_Xt:
+      // AArch64 TLB Invalidate by VA, EL1, Outer Shareable
+      // We are currently not distinguishing Inner and Outer domains.
+      // We therefore implement TLBIOS instructions as TLBIIS
+      case MISCREG_TLBI_VAE1OS_Xt:
         {
             SCR scr = tc->readMiscReg(MISCREG_SCR_EL3);
             auto asid = asid_16bits ? bits(value, 63, 48) :
@@ -591,6 +629,10 @@ TlbiOp64::performTlbi(ExecContext *xc, MiscRegIndex dest_idx, RegVal value) cons
         }
       // AArch64 TLB Invalidate by ASID, EL1, Inner Shareable
       case MISCREG_TLBI_ASIDE1IS_Xt:
+      // AArch64 TLB Invalidate by ASID, EL1, Outer Shareable
+      // We are currently not distinguishing Inner and Outer domains.
+      // We therefore implement TLBIOS instructions as TLBIIS
+      case MISCREG_TLBI_ASIDE1OS_Xt:
         {
             SCR scr = tc->readMiscReg(MISCREG_SCR_EL3);
             auto asid = asid_16bits ? bits(value, 63, 48) :
@@ -653,6 +695,10 @@ TlbiOp64::performTlbi(ExecContext *xc, MiscRegIndex dest_idx, RegVal value) cons
         }
       // AArch64 TLB Invalidate by VA, All ASID, EL1, Inner Shareable
       case MISCREG_TLBI_VAAE1IS_Xt:
+      // AArch64 TLB Invalidate by VA, All ASID, EL1, Outer Shareable
+      // We are currently not distinguishing Inner and Outer domains.
+      // We therefore implement TLBIOS instructions as TLBIIS
+      case MISCREG_TLBI_VAAE1OS_Xt:
         {
             SCR scr = tc->readMiscReg(MISCREG_SCR_EL3);
 
@@ -675,6 +721,11 @@ TlbiOp64::performTlbi(ExecContext *xc, MiscRegIndex dest_idx, RegVal value) cons
       // AArch64 TLB Invalidate by VA, All ASID,
       // Last Level, EL1, Inner Shareable
       case MISCREG_TLBI_VAALE1IS_Xt:
+      // AArch64 TLB Invalidate by VA, All ASID,
+      // Last Level, EL1, Outer Shareable
+      // We are currently not distinguishing Inner and Outer domains.
+      // We therefore implement TLBIOS instructions as TLBIIS
+      case MISCREG_TLBI_VAALE1OS_Xt:
         {
             SCR scr = tc->readMiscReg(MISCREG_SCR_EL3);
 
@@ -735,6 +786,11 @@ TlbiOp64::performTlbi(ExecContext *xc, MiscRegIndex dest_idx, RegVal value) cons
       // AArch64 TLB Invalidate by Intermediate Physical Address,
       // Stage 2, EL1, Inner Shareable
       case MISCREG_TLBI_IPAS2E1IS_Xt:
+      // AArch64 TLB Invalidate by Intermediate Physical Address,
+      // Stage 2, EL1, Outer Shareable
+      // We are currently not distinguishing Inner and Outer domains.
+      // We therefore implement TLBIOS instructions as TLBIIS
+      case MISCREG_TLBI_IPAS2E1OS_Xt:
         {
             if (EL2Enabled(tc)) {
                 SCR scr = tc->readMiscReg(MISCREG_SCR_EL3);
@@ -755,6 +811,11 @@ TlbiOp64::performTlbi(ExecContext *xc, MiscRegIndex dest_idx, RegVal value) cons
       // AArch64 TLB Invalidate by Intermediate Physical Address,
       // Stage 2, Last Level, EL1, Inner Shareable
       case MISCREG_TLBI_IPAS2LE1IS_Xt:
+      // AArch64 TLB Invalidate by Intermediate Physical Address,
+      // Stage 2, Last Level, EL1, Outer Shareable
+      // We are currently not distinguishing Inner and Outer domains.
+      // We therefore implement TLBIOS instructions as TLBIIS
+      case MISCREG_TLBI_IPAS2LE1OS_Xt:
         {
             if (EL2Enabled(tc)) {
                 SCR scr = tc->readMiscReg(MISCREG_SCR_EL3);
diff --git a/src/arch/arm/regs/misc.cc b/src/arch/arm/regs/misc.cc
index 2d76143e08..ec5670e647 100644
--- a/src/arch/arm/regs/misc.cc
+++ b/src/arch/arm/regs/misc.cc
@@ -753,6 +753,12 @@ std::unordered_map<MiscRegNum64, MiscRegIndex> miscRegNumToIdx{
     { MiscRegNum64(1, 0, 7, 8, 3), MISCREG_AT_S1E0W_Xt },
     { MiscRegNum64(1, 0, 7, 10, 2), MISCREG_DC_CSW_Xt },
     { MiscRegNum64(1, 0, 7, 14, 2), MISCREG_DC_CISW_Xt },
+    { MiscRegNum64(1, 0, 8, 1, 0), MISCREG_TLBI_VMALLE1OS },
+    { MiscRegNum64(1, 0, 8, 1, 1), MISCREG_TLBI_VAE1OS_Xt },
+    { MiscRegNum64(1, 0, 8, 1, 2), MISCREG_TLBI_ASIDE1OS_Xt },
+    { MiscRegNum64(1, 0, 8, 1, 3), MISCREG_TLBI_VAAE1OS_Xt },
+    { MiscRegNum64(1, 0, 8, 1, 5), MISCREG_TLBI_VALE1OS_Xt },
+    { MiscRegNum64(1, 0, 8, 1, 7), MISCREG_TLBI_VAALE1OS_Xt },
     { MiscRegNum64(1, 0, 8, 3, 0), MISCREG_TLBI_VMALLE1IS },
     { MiscRegNum64(1, 0, 8, 3, 1), MISCREG_TLBI_VAE1IS_Xt },
     { MiscRegNum64(1, 0, 8, 3, 2), MISCREG_TLBI_ASIDE1IS_Xt },
@@ -778,12 +784,19 @@ std::unordered_map<MiscRegNum64, MiscRegIndex> miscRegNumToIdx{
     { MiscRegNum64(1, 4, 7, 8, 7), MISCREG_AT_S12E0W_Xt },
     { MiscRegNum64(1, 4, 8, 0, 1), MISCREG_TLBI_IPAS2E1IS_Xt },
     { MiscRegNum64(1, 4, 8, 0, 5), MISCREG_TLBI_IPAS2LE1IS_Xt },
+    { MiscRegNum64(1, 4, 8, 1, 0), MISCREG_TLBI_ALLE2OS },
+    { MiscRegNum64(1, 4, 8, 1, 1), MISCREG_TLBI_VAE2OS_Xt },
+    { MiscRegNum64(1, 4, 8, 1, 4), MISCREG_TLBI_ALLE1OS },
+    { MiscRegNum64(1, 4, 8, 1, 5), MISCREG_TLBI_VALE2OS_Xt },
+    { MiscRegNum64(1, 4, 8, 1, 6), MISCREG_TLBI_VMALLS12E1OS },
     { MiscRegNum64(1, 4, 8, 3, 0), MISCREG_TLBI_ALLE2IS },
     { MiscRegNum64(1, 4, 8, 3, 1), MISCREG_TLBI_VAE2IS_Xt },
     { MiscRegNum64(1, 4, 8, 3, 4), MISCREG_TLBI_ALLE1IS },
     { MiscRegNum64(1, 4, 8, 3, 5), MISCREG_TLBI_VALE2IS_Xt },
     { MiscRegNum64(1, 4, 8, 3, 6), MISCREG_TLBI_VMALLS12E1IS },
+    { MiscRegNum64(1, 4, 8, 4, 0), MISCREG_TLBI_IPAS2E1OS_Xt },
     { MiscRegNum64(1, 4, 8, 4, 1), MISCREG_TLBI_IPAS2E1_Xt },
+    { MiscRegNum64(1, 4, 8, 4, 4), MISCREG_TLBI_IPAS2LE1OS_Xt },
     { MiscRegNum64(1, 4, 8, 4, 5), MISCREG_TLBI_IPAS2LE1_Xt },
     { MiscRegNum64(1, 4, 8, 7, 0), MISCREG_TLBI_ALLE2 },
     { MiscRegNum64(1, 4, 8, 7, 1), MISCREG_TLBI_VAE2_Xt },
@@ -792,6 +805,9 @@ std::unordered_map<MiscRegNum64, MiscRegIndex> miscRegNumToIdx{
     { MiscRegNum64(1, 4, 8, 7, 6), MISCREG_TLBI_VMALLS12E1 },
     { MiscRegNum64(1, 6, 7, 8, 0), MISCREG_AT_S1E3R_Xt },
     { MiscRegNum64(1, 6, 7, 8, 1), MISCREG_AT_S1E3W_Xt },
+    { MiscRegNum64(1, 6, 8, 1, 0), MISCREG_TLBI_ALLE3OS },
+    { MiscRegNum64(1, 6, 8, 1, 1), MISCREG_TLBI_VAE3OS_Xt },
+    { MiscRegNum64(1, 6, 8, 1, 5), MISCREG_TLBI_VALE3OS_Xt },
     { MiscRegNum64(1, 6, 8, 3, 0), MISCREG_TLBI_ALLE3IS },
     { MiscRegNum64(1, 6, 8, 3, 1), MISCREG_TLBI_VAE3IS_Xt },
     { MiscRegNum64(1, 6, 8, 3, 5), MISCREG_TLBI_VALE3IS_Xt },
@@ -3874,6 +3890,7 @@ ISA::initializeMiscRegMetadata()
           isar0_el1.atomic = release->has(ArmExtension::FEAT_LSE) ? 0x2 : 0x0;
           isar0_el1.rdm = release->has(ArmExtension::FEAT_RDM) ? 0x1 : 0x0;
           isar0_el1.tme = release->has(ArmExtension::TME) ? 0x1 : 0x0;
+          isar0_el1.tlb = release->has(ArmExtension::FEAT_TLBIOS) ? 0x1 : 0x0;
           return isar0_el1;
       }())
       .faultRead(EL1, HCR_TRAP(tid3))
@@ -4339,6 +4356,24 @@ ISA::initializeMiscRegMetadata()
       .monSecureWrite().monNonSecureWrite();
     InitReg(MISCREG_AT_S1E3W_Xt)
       .monSecureWrite().monNonSecureWrite();
+    InitReg(MISCREG_TLBI_VMALLE1OS)
+      .faultWrite(EL1, HCR_TRAP(ttlb))
+      .writes(1).exceptUserMode();
+    InitReg(MISCREG_TLBI_VAE1OS_Xt)
+      .faultWrite(EL1, HCR_TRAP(ttlb))
+      .writes(1).exceptUserMode();
+    InitReg(MISCREG_TLBI_ASIDE1OS_Xt)
+      .faultWrite(EL1, HCR_TRAP(ttlb))
+      .writes(1).exceptUserMode();
+    InitReg(MISCREG_TLBI_VAAE1OS_Xt)
+      .faultWrite(EL1, HCR_TRAP(ttlb))
+      .writes(1).exceptUserMode();
+    InitReg(MISCREG_TLBI_VALE1OS_Xt)
+      .faultWrite(EL1, HCR_TRAP(ttlb))
+      .writes(1).exceptUserMode();
+    InitReg(MISCREG_TLBI_VAALE1OS_Xt)
+      .faultWrite(EL1, HCR_TRAP(ttlb))
+      .writes(1).exceptUserMode();
     InitReg(MISCREG_TLBI_VMALLE1IS)
       .faultWrite(EL1, HCR_TRAP(ttlb))
       .writes(1).exceptUserMode();
@@ -4375,6 +4410,20 @@ ISA::initializeMiscRegMetadata()
     InitReg(MISCREG_TLBI_VAALE1_Xt)
       .faultWrite(EL1, HCR_TRAP(ttlb))
       .writes(1).exceptUserMode();
+    InitReg(MISCREG_TLBI_IPAS2E1OS_Xt)
+      .hypWrite().monSecureWrite().monNonSecureWrite();
+    InitReg(MISCREG_TLBI_IPAS2LE1OS_Xt)
+      .hypWrite().monSecureWrite().monNonSecureWrite();
+    InitReg(MISCREG_TLBI_ALLE2OS)
+      .monNonSecureWrite().hypWrite();
+    InitReg(MISCREG_TLBI_VAE2OS_Xt)
+      .monNonSecureWrite().hypWrite();
+    InitReg(MISCREG_TLBI_ALLE1OS)
+      .hypWrite().monSecureWrite().monNonSecureWrite();
+    InitReg(MISCREG_TLBI_VALE2OS_Xt)
+      .monNonSecureWrite().hypWrite();
+    InitReg(MISCREG_TLBI_VMALLS12E1OS)
+      .hypWrite().monSecureWrite().monNonSecureWrite();
     InitReg(MISCREG_TLBI_IPAS2E1IS_Xt)
       .hypWrite().monSecureWrite().monNonSecureWrite();
     InitReg(MISCREG_TLBI_IPAS2LE1IS_Xt)
@@ -4403,6 +4452,12 @@ ISA::initializeMiscRegMetadata()
       .monNonSecureWrite().hypWrite();
     InitReg(MISCREG_TLBI_VMALLS12E1)
       .hypWrite().monSecureWrite().monNonSecureWrite();
+    InitReg(MISCREG_TLBI_ALLE3OS)
+      .monSecureWrite().monNonSecureWrite();
+    InitReg(MISCREG_TLBI_VAE3OS_Xt)
+      .monSecureWrite().monNonSecureWrite();
+    InitReg(MISCREG_TLBI_VALE3OS_Xt)
+      .monSecureWrite().monNonSecureWrite();
     InitReg(MISCREG_TLBI_ALLE3IS)
       .monSecureWrite().monNonSecureWrite();
     InitReg(MISCREG_TLBI_VAE3IS_Xt)
diff --git a/src/arch/arm/regs/misc.hh b/src/arch/arm/regs/misc.hh
index abbd1c6057..c43cf74489 100644
--- a/src/arch/arm/regs/misc.hh
+++ b/src/arch/arm/regs/misc.hh
@@ -681,11 +681,17 @@ namespace ArmISA
         MISCREG_AT_S1E3R_Xt,
         MISCREG_AT_S1E3W_Xt,
         MISCREG_TLBI_VMALLE1IS,
+        MISCREG_TLBI_VMALLE1OS,
         MISCREG_TLBI_VAE1IS_Xt,
+        MISCREG_TLBI_VAE1OS_Xt,
         MISCREG_TLBI_ASIDE1IS_Xt,
+        MISCREG_TLBI_ASIDE1OS_Xt,
         MISCREG_TLBI_VAAE1IS_Xt,
+        MISCREG_TLBI_VAAE1OS_Xt,
         MISCREG_TLBI_VALE1IS_Xt,
+        MISCREG_TLBI_VALE1OS_Xt,
         MISCREG_TLBI_VAALE1IS_Xt,
+        MISCREG_TLBI_VAALE1OS_Xt,
         MISCREG_TLBI_VMALLE1,
         MISCREG_TLBI_VAE1_Xt,
         MISCREG_TLBI_ASIDE1_Xt,
@@ -693,12 +699,19 @@ namespace ArmISA
         MISCREG_TLBI_VALE1_Xt,
         MISCREG_TLBI_VAALE1_Xt,
         MISCREG_TLBI_IPAS2E1IS_Xt,
+        MISCREG_TLBI_IPAS2E1OS_Xt,
         MISCREG_TLBI_IPAS2LE1IS_Xt,
+        MISCREG_TLBI_IPAS2LE1OS_Xt,
         MISCREG_TLBI_ALLE2IS,
+        MISCREG_TLBI_ALLE2OS,
         MISCREG_TLBI_VAE2IS_Xt,
+        MISCREG_TLBI_VAE2OS_Xt,
         MISCREG_TLBI_ALLE1IS,
+        MISCREG_TLBI_ALLE1OS,
         MISCREG_TLBI_VALE2IS_Xt,
+        MISCREG_TLBI_VALE2OS_Xt,
         MISCREG_TLBI_VMALLS12E1IS,
+        MISCREG_TLBI_VMALLS12E1OS,
         MISCREG_TLBI_IPAS2E1_Xt,
         MISCREG_TLBI_IPAS2LE1_Xt,
         MISCREG_TLBI_ALLE2,
@@ -707,8 +720,11 @@ namespace ArmISA
         MISCREG_TLBI_VALE2_Xt,
         MISCREG_TLBI_VMALLS12E1,
         MISCREG_TLBI_ALLE3IS,
+        MISCREG_TLBI_ALLE3OS,
         MISCREG_TLBI_VAE3IS_Xt,
+        MISCREG_TLBI_VAE3OS_Xt,
         MISCREG_TLBI_VALE3IS_Xt,
+        MISCREG_TLBI_VALE3OS_Xt,
         MISCREG_TLBI_ALLE3,
         MISCREG_TLBI_VAE3_Xt,
         MISCREG_TLBI_VALE3_Xt,
@@ -2344,11 +2360,17 @@ namespace ArmISA
         "at_s1e3r_xt",
         "at_s1e3w_xt",
         "tlbi_vmalle1is",
+        "tlbi_vmalle1os",
         "tlbi_vae1is_xt",
+        "tlbi_vae1os_xt",
         "tlbi_aside1is_xt",
+        "tlbi_aside1os_xt",
         "tlbi_vaae1is_xt",
+        "tlbi_vaae1os_xt",
         "tlbi_vale1is_xt",
+        "tlbi_vale1os_xt",
         "tlbi_vaale1is_xt",
+        "tlbi_vaale1os_xt",
         "tlbi_vmalle1",
         "tlbi_vae1_xt",
         "tlbi_aside1_xt",
@@ -2356,12 +2378,19 @@ namespace ArmISA
         "tlbi_vale1_xt",
         "tlbi_vaale1_xt",
         "tlbi_ipas2e1is_xt",
+        "tlbi_ipas2e1os_xt",
         "tlbi_ipas2le1is_xt",
+        "tlbi_ipas2le1os_xt",
         "tlbi_alle2is",
+        "tlbi_alle2os",
         "tlbi_vae2is_xt",
+        "tlbi_vae2os_xt",
         "tlbi_alle1is",
+        "tlbi_alle1os",
         "tlbi_vale2is_xt",
+        "tlbi_vale2os_xt",
         "tlbi_vmalls12e1is",
+        "tlbi_vmalls12e1os",
         "tlbi_ipas2e1_xt",
         "tlbi_ipas2le1_xt",
         "tlbi_alle2",
@@ -2370,8 +2399,11 @@ namespace ArmISA
         "tlbi_vale2_xt",
         "tlbi_vmalls12e1",
         "tlbi_alle3is",
+        "tlbi_alle3os",
         "tlbi_vae3is_xt",
+        "tlbi_vae3os_xt",
         "tlbi_vale3is_xt",
+        "tlbi_vale3os_xt",
         "tlbi_alle3",
         "tlbi_vae3_xt",
         "tlbi_vale3_xt",

From bc63da39dc3c92e9a2a7c55145645cfcb8c96a5c Mon Sep 17 00:00:00 2001
From: "Bobby R. Bruce" <bbruce@ucdavis.edu>
Date: Mon, 15 May 2023 17:25:59 -0700
Subject: [PATCH 419/492] arch-riscv: Fix WFI for O3 CPU

This commit:
https://gem5-review.googlesource.com/c/public/gem5/+/61511
introduced a bug where the O3 CPU hangs. This is because WFI must be
tagged as `IsNonSpeculative`, `IsQuiesce`, and `IsSerializeAfter` to
function correctly with O3 CPUs.

Change-Id: I8b6cb049710d05f37f89a9ce22acc604112bc445
Issue-on: https://gem5.atlassian.net/browse/GEM5-1323
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/70657
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Reviewed-by: Roger Chang <rogerycchang@google.com>
Reviewed-by: Jui-min Lee <fcrh@google.com>
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/arch/riscv/isa/decoder.isa | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/arch/riscv/isa/decoder.isa b/src/arch/riscv/isa/decoder.isa
index f22efb0bf0..3acd80ebf0 100644
--- a/src/arch/riscv/isa/decoder.isa
+++ b/src/arch/riscv/isa/decoder.isa
@@ -2278,7 +2278,8 @@ decode QUADRANT default Unknown::unknown() {
                                 && xc->readMiscReg(MISCREG_NMIP) == 0) {
                                 tc->quiesce();
                             }
-                        }}, No_OpClass);
+                        }}, IsNonSpeculative, IsQuiesce,
+                            IsSerializeAfter, No_OpClass);
                     }
                     0x9: sfence_vma({{
                         STATUS status = xc->readMiscReg(MISCREG_STATUS);

From fccd13ac85b7b11409e0f92b81cb64345e51b079 Mon Sep 17 00:00:00 2001
From: Alex Richardson <alexrichardson@google.com>
Date: Wed, 8 Mar 2023 09:37:34 +0000
Subject: [PATCH 420/492] scons: default to not duplicating sources in the
 build directory

We now default to --no-duplicate-sources, but keep --duplicate-sources to
opt-out of this new build behaviour in case it introduces regressions.

Change-Id: I2f01ceaef7b6b9bff80f4402081f007110f7e6f3
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/68758
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 SConstruct | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/SConstruct b/SConstruct
index e09f0d5a5b..9b25b33783 100755
--- a/SConstruct
+++ b/SConstruct
@@ -145,7 +145,13 @@ AddOption('--gprof', action='store_true',
           help='Enable support for the gprof profiler')
 AddOption('--pprof', action='store_true',
           help='Enable support for the pprof profiler')
-AddOption('--no-duplicate-sources', action='store_false', default=True,
+# Default to --no-duplicate-sources, but keep --duplicate-sources to opt-out
+# of this new build behaviour in case it introduces regressions. We could use
+# action=argparse.BooleanOptionalAction here once Python 3.9 is required.
+AddOption('--duplicate-sources', action='store_true', default=False,
+          dest='duplicate_sources',
+          help='Create symlinks to sources in the build directory')
+AddOption('--no-duplicate-sources', action='store_false',
           dest='duplicate_sources',
           help='Do not create symlinks to sources in the build directory')
 

From c4d61ca1439c677c7c9f0dfd06b7093d9779b611 Mon Sep 17 00:00:00 2001
From: "Bobby R. Bruce" <bbruce@ucdavis.edu>
Date: Wed, 10 May 2023 12:19:38 -0700
Subject: [PATCH 421/492] tests: Add '--duplicate-sources' to libgem5 SST build

Unfortunately the building of SST (in "ext/sst") depends on generated
header files. Therefore it adds the build directory to the include path.
For this to work the libgem5.so must be build with
"--duplicate-sources".

Change-Id: I5ed26a89c81402d421f5dfa110de7cf758f28694
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/70497
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 ext/sst/INSTALL.md | 5 ++++-
 tests/nightly.sh   | 2 +-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/ext/sst/INSTALL.md b/ext/sst/INSTALL.md
index 1034d7dd3f..91f92eb7ff 100644
--- a/ext/sst/INSTALL.md
+++ b/ext/sst/INSTALL.md
@@ -61,9 +61,12 @@ echo "export PKG_CONFIG_PATH=$PKG_CONFIG_PATH:$SST_CORE_HOME/lib/pkgconfig/" >>
 At the root of gem5 folder,
 
 ```sh
-scons build/RISCV/libgem5_opt.so -j $(nproc) --without-tcmalloc
+scons build/RISCV/libgem5_opt.so -j $(nproc) --without-tcmalloc --duplicate-sources
 ```
 
+**Note:** `--without-tcmalloc` is required to avoid a conflict with SST's malloc.
+`--duplicate-sources` is required as the compilation of SST depends on sources to be present in the "build" directory.
+
 ### Compiling the SST integration
 
 At the root of gem5 folder,
diff --git a/tests/nightly.sh b/tests/nightly.sh
index f5f9295f3b..9286c545cd 100755
--- a/tests/nightly.sh
+++ b/tests/nightly.sh
@@ -159,7 +159,7 @@ build_and_run_SST () {
         "${gem5_root}" --rm  --memory="${docker_mem_limit}" \
         gcr.io/gem5-test/sst-env:${tag} bash -c "\
 scons build/${isa}/libgem5_${variant}.so -j${compile_threads} \
---without-tcmalloc --ignore-style && \
+--without-tcmalloc --duplicate-sources --ignore-style && \
 cd ext/sst && \
 make clean; make -j ${compile_threads} && \
 sst --add-lib-path=./ sst/example.py && \

From 44919c1c4d02d980316d84a3749279810acd1362 Mon Sep 17 00:00:00 2001
From: Ayaz Akram <yazakram@ucdavis.edu>
Date: Sun, 30 Apr 2023 16:25:04 -0700
Subject: [PATCH 422/492] configs: Update riscv/fs_linux.py script

This change fixes a couple of small issues with
the configs/example/riscv/fs_linux.py script to ensure
that it works with the latest version of gem5.

Change-Id: I9753ca4c8dd0b87d05681b167cf51e3c097e9152
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/70177
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Reviewed-by: Alex Richardson <alexrichardson@google.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Roger Chang <rogerycchang@google.com>
---
 configs/example/riscv/fs_linux.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/configs/example/riscv/fs_linux.py b/configs/example/riscv/fs_linux.py
index aec126ab0d..949c7e2623 100644
--- a/configs/example/riscv/fs_linux.py
+++ b/configs/example/riscv/fs_linux.py
@@ -187,6 +187,7 @@ system.platform = HiFive()
 # RTCCLK (Set to 100MHz for faster simulation)
 system.platform.rtc = RiscvRTC(frequency=Frequency("100MHz"))
 system.platform.clint.int_pin = system.platform.rtc.int_pin
+system.platform.pci_host.pio = system.iobus.mem_side_ports
 
 # VirtIOMMIO
 if args.disk_image:
@@ -236,8 +237,6 @@ system.cpu_clk_domain = SrcClockDomain(
     clock=args.cpu_clock, voltage_domain=system.cpu_voltage_domain
 )
 
-system.workload.object_file = args.kernel
-
 # NOTE: Not yet tested
 if args.script is not None:
     system.readfile = args.script

From 08644a76707ac8ee14f9ff0d52af7c3e324209f0 Mon Sep 17 00:00:00 2001
From: Matthew Poremba <matthew.poremba@amd.com>
Date: Tue, 16 May 2023 19:46:50 -0500
Subject: [PATCH 423/492] dev-amdgpu: Fix nbio psp ring assert

The size of the packet changes between ROCm 4.x and ROCm 5.x. Change how
the address is set based on the incoming packet size so that both
versions continue to work for now.

Change-Id: I91694e4760198fd9129e60140df4e863666be2e2
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/70677
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com>
Maintainer: Matt Sinclair <mattdsinclair@gmail.com>
---
 src/dev/amdgpu/amdgpu_nbio.cc | 20 +++++++++++++++++---
 1 file changed, 17 insertions(+), 3 deletions(-)

diff --git a/src/dev/amdgpu/amdgpu_nbio.cc b/src/dev/amdgpu/amdgpu_nbio.cc
index 8064fd2a0e..69e4373e64 100644
--- a/src/dev/amdgpu/amdgpu_nbio.cc
+++ b/src/dev/amdgpu/amdgpu_nbio.cc
@@ -162,9 +162,23 @@ void
 AMDGPUNbio::writeFrame(PacketPtr pkt, Addr offset)
 {
     if (offset == psp_ring_listen_addr) {
-        assert(pkt->getSize() == 8);
-        psp_ring_dev_addr = pkt->getLE<uint64_t>()
-                          - gpuDevice->getVM().getSysAddrRangeLow();
+        DPRINTF(AMDGPUDevice, "Saw psp_ring_listen_addr with size %ld value "
+                "%ld\n", pkt->getSize(), pkt->getUintX(ByteOrder::little));
+
+        /*
+         * In ROCm versions 4.x this packet is a 4 byte value. In ROCm 5.x
+         * the packet is 8 bytes and mapped as a system address which needs
+         * to be subtracted out to get the framebuffer address.
+         */
+        if (pkt->getSize() == 4) {
+            psp_ring_dev_addr = pkt->getLE<uint32_t>();
+        } else if (pkt->getSize() == 8) {
+            psp_ring_dev_addr = pkt->getUintX(ByteOrder::little)
+                              - gpuDevice->getVM().getSysAddrRangeLow();
+        } else {
+            panic("Invalid write size to psp_ring_listen_addr\n");
+        }
+
         DPRINTF(AMDGPUDevice, "Setting PSP ring device address to %#lx\n",
                 psp_ring_dev_addr);
     }

From 9c0f337d7813589207d0c3f48a0c785ba5378a79 Mon Sep 17 00:00:00 2001
From: Roger Chang <rogerycchang@google.com>
Date: Fri, 12 May 2023 23:22:14 +0800
Subject: [PATCH 424/492] arch-riscv: Simplify amd merge RV32/RV64 the RVM
 instructions

The change move the details implementation to utility.hh and merge
the RV32 and RV64 versions into one.

Change-Id: I438bfb0fc511f0f27e83f247d386c58493db65b4
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/70597
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Yu-hsin Wang <yuhsingw@google.com>
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
---
 src/arch/riscv/isa/decoder.isa | 236 ++++++++-------------------------
 src/arch/riscv/utility.hh      |  96 ++++++++++++++
 2 files changed, 149 insertions(+), 183 deletions(-)

diff --git a/src/arch/riscv/isa/decoder.isa b/src/arch/riscv/isa/decoder.isa
index 3acd80ebf0..47519eeeb4 100644
--- a/src/arch/riscv/isa/decoder.isa
+++ b/src/arch/riscv/isa/decoder.isa
@@ -1084,34 +1084,13 @@ decode QUADRANT default Unknown::unknown() {
                     0x0: sll({{
                         Rd = rvSext(Rs1 << rvSelect(Rs2<4:0>, Rs2<5:0>));
                     }});
-                    0x1: decode RVTYPE {
-                        0x0: rv32_mulh({{
-                            Rd_sw = ((int64_t)Rs1_sw * Rs2_sw) >> 32;
-                        }}, IntMultOp);
-                        0x1: mulh({{
-                            bool negate = (Rs1_sd < 0) != (Rs2_sd < 0);
-
-                            uint64_t Rs1_lo = (uint32_t)std::abs(Rs1_sd);
-                            uint64_t Rs1_hi = (uint64_t)std::abs(Rs1_sd) >> 32;
-                            uint64_t Rs2_lo = (uint32_t)std::abs(Rs2_sd);
-                            uint64_t Rs2_hi = (uint64_t)std::abs(Rs2_sd) >> 32;
-
-                            uint64_t hi = Rs1_hi*Rs2_hi;
-                            uint64_t mid1 = Rs1_hi*Rs2_lo;
-                            uint64_t mid2 = Rs1_lo*Rs2_hi;
-                            uint64_t lo = Rs2_lo*Rs1_lo;
-                            uint64_t carry = ((uint64_t)(uint32_t)mid1
-                                    + (uint64_t)(uint32_t)mid2
-                                    + (lo >> 32)) >> 32;
-
-                            uint64_t res = hi +
-                                          (mid1 >> 32) +
-                                          (mid2 >> 32) +
-                                          carry;
-                            Rd = negate ? ~res + (Rs1_sd*Rs2_sd == 0 ? 1 : 0)
-                                        : res;
-                        }}, IntMultOp);
-                    }
+                    0x1: mulh({{
+                        if (machInst.rv_type == RV32) {
+                            Rd_sd = mulh_32(Rs1_sd, Rs2_sd);
+                        } else {
+                            Rd_sd = mulh_64(Rs1_sd, Rs2_sd);
+                        }
+                    }}, IntMultOp);
                     0x5: clmul({{
                         uint64_t result = 0;
                         for (int i = 0; i < rvSelect(32, 64); i++) {
@@ -1144,32 +1123,13 @@ decode QUADRANT default Unknown::unknown() {
                     0x0: slt({{
                         Rd = (rvSext(Rs1_sd) < rvSext(Rs2_sd)) ? 1 : 0;
                     }});
-                    0x1: decode RVTYPE {
-                        0x0: rv32_mulhsu({{
-                            Rd_sw = ((int64_t)Rs1_sw * Rs2_uw) >> 32;
-                        }}, IntMultOp);
-                        0x1: mulhsu({{
-                            bool negate = Rs1_sd < 0;
-                            uint64_t Rs1_lo = (uint32_t)std::abs(Rs1_sd);
-                            uint64_t Rs1_hi = (uint64_t)std::abs(Rs1_sd) >> 32;
-                            uint64_t Rs2_lo = (uint32_t)Rs2;
-                            uint64_t Rs2_hi = Rs2 >> 32;
-
-                            uint64_t hi = Rs1_hi*Rs2_hi;
-                            uint64_t mid1 = Rs1_hi*Rs2_lo;
-                            uint64_t mid2 = Rs1_lo*Rs2_hi;
-                            uint64_t lo = Rs1_lo*Rs2_lo;
-                            uint64_t carry = ((uint64_t)(uint32_t)mid1
-                                    + (uint64_t)(uint32_t)mid2
-                                    + (lo >> 32)) >> 32;
-
-                            uint64_t res = hi +
-                                          (mid1 >> 32) +
-                                          (mid2 >> 32) +
-                                          carry;
-                            Rd = negate ? ~res + (Rs1_sd*Rs2 == 0 ? 1 : 0) : res;
-                        }}, IntMultOp);
-                    }
+                    0x1: mulhsu({{
+                        if (machInst.rv_type == RV32) {
+                            Rd_sd = mulhsu_32(Rs1_sd, Rs2);
+                        } else {
+                            Rd_sd = mulhsu_64(Rs1_sd, Rs2);
+                        }
+                    }}, IntMultOp);
                     0x5: clmulr({{
                         uint64_t result = 0;
                         uint64_t xlen = rvSelect(32, 64);
@@ -1197,27 +1157,13 @@ decode QUADRANT default Unknown::unknown() {
                     0x0: sltu({{
                         Rd = (rvZext(Rs1) < rvZext(Rs2)) ? 1 : 0;
                     }});
-                    0x1: decode RVTYPE {
-                        0x0: rv32_mulhu({{
-                            Rd_sw = ((uint64_t)Rs1_uw * Rs2_uw) >> 32;
-                        }}, IntMultOp);
-                        0x1: mulhu({{
-                            uint64_t Rs1_lo = (uint32_t)Rs1;
-                            uint64_t Rs1_hi = Rs1 >> 32;
-                            uint64_t Rs2_lo = (uint32_t)Rs2;
-                            uint64_t Rs2_hi = Rs2 >> 32;
-
-                            uint64_t hi = Rs1_hi*Rs2_hi;
-                            uint64_t mid1 = Rs1_hi*Rs2_lo;
-                            uint64_t mid2 = Rs1_lo*Rs2_hi;
-                            uint64_t lo = Rs1_lo*Rs2_lo;
-                            uint64_t carry = ((uint64_t)(uint32_t)mid1
-                                    + (uint64_t)(uint32_t)mid2
-                                    + (lo >> 32)) >> 32;
-
-                            Rd = hi + (mid1 >> 32) + (mid2 >> 32) + carry;
-                        }}, IntMultOp);
-                    }
+                    0x1: mulhu({{
+                        if (machInst.rv_type == RV32) {
+                            Rd = (int32_t)mulhu_32(Rs1, Rs2);
+                        } else {
+                            Rd = mulhu_64(Rs1, Rs2);
+                        }
+                    }}, IntMultOp);
                     0x5: clmulh({{
                         uint64_t result = 0;
                         uint64_t xlen = rvSelect(32, 64);
@@ -1235,30 +1181,13 @@ decode QUADRANT default Unknown::unknown() {
                     0x0: xor({{
                         Rd = rvSext(Rs1 ^ Rs2);
                     }});
-                    0x1: decode RVTYPE {
-                        0x0: rv32_div({{
-                            constexpr int32_t kRsMin = \
-                                std::numeric_limits<int32_t>::min();
-                            if (Rs2_sw == 0) {
-                                Rd_sw = -1;
-                            } else if (Rs1_sw == kRsMin && Rs2_sw == -1) {
-                                Rd_sw = kRsMin;
-                            } else {
-                                Rd_sw = Rs1_sw/Rs2_sw;
-                            }
-                        }}, IntDivOp);
-                        0x1: div({{
-                            constexpr int64_t kRsMin = \
-                                std::numeric_limits<int64_t>::min();
-                            if (Rs2_sd == 0) {
-                                Rd_sd = -1;
-                            } else if (Rs1_sd == kRsMin && Rs2_sd == -1) {
-                                Rd_sd = kRsMin;
-                            } else {
-                                Rd_sd = Rs1_sd/Rs2_sd;
-                            }
-                        }}, IntDivOp);
-                    }
+                    0x1: div({{
+                        if (machInst.rv_type == RV32) {
+                            Rd_sd = div<int32_t>(Rs1, Rs2);
+                        } else {
+                            Rd_sd = div<int64_t>(Rs1, Rs2);
+                        }
+                    }}, IntDivOp);
                     0x4: pack({{
                         int xlen = rvSelect(32, 64);
                         Rd = rvSext(
@@ -1289,22 +1218,13 @@ decode QUADRANT default Unknown::unknown() {
                         Rd = rvSext(rvZext(Rs1) >>
                                     rvSelect(Rs2<4:0>, Rs2<5:0>));
                     }});
-                    0x1: decode RVTYPE {
-                        0x0: rv32_divu({{
-                            if (Rs2_uw == 0) {
-                                Rd_sw = std::numeric_limits<uint32_t>::max();
-                            } else {
-                                Rd_sw = Rs1_uw/Rs2_uw;
-                            }
-                        }}, IntDivOp);
-                        0x1: divu({{
-                            if (Rs2 == 0) {
-                                Rd = std::numeric_limits<uint64_t>::max();
-                            } else {
-                                Rd = Rs1/Rs2;
-                            }
-                        }}, IntDivOp);
-                    }
+                    0x1: divu({{
+                        if (machInst.rv_type == RV32) {
+                            Rd = (int32_t)divu<uint32_t>(Rs1, Rs2);
+                        } else {
+                            Rd = divu<uint64_t>(Rs1, Rs2);
+                        }
+                    }}, IntDivOp);
                     0x20: sra({{
                         Rd = rvSext(Rs1_sd) >> rvSelect(Rs2<4:0>, Rs2<5:0>);
                     }});
@@ -1327,30 +1247,13 @@ decode QUADRANT default Unknown::unknown() {
                     0x0: or({{
                         Rd = rvSext(Rs1 | Rs2);
                     }});
-                    0x1: decode RVTYPE {
-                        0x0: rv32_rem({{
-                            constexpr int32_t kRsMin = \
-                                std::numeric_limits<int32_t>::min();
-                            if (Rs2_sw == 0) {
-                                Rd_sw = Rs1_sw;
-                            } else if (Rs1_sw == kRsMin && Rs2_sw == -1) {
-                                Rd_sw = 0;
-                            } else {
-                                Rd_sw = Rs1_sw%Rs2_sw;
-                            }
-                        }}, IntDivOp);
-                        0x1: rem({{
-                            constexpr int64_t kRsMin = \
-                                std::numeric_limits<int64_t>::min();
-                            if (Rs2_sd == 0) {
-                                Rd = Rs1_sd;
-                            } else if (Rs1_sd == kRsMin && Rs2_sd == -1) {
-                                Rd = 0;
-                            } else {
-                                Rd = Rs1_sd%Rs2_sd;
-                            }
-                        }}, IntDivOp);
-                    }
+                    0x1: rem({{
+                        if (machInst.rv_type == RV32) {
+                            Rd_sd = rem<int32_t>(Rs1, Rs2);
+                        } else {
+                            Rd_sd = rem<int64_t>(Rs1, Rs2);
+                        }
+                    }}, IntDivOp);
                     0x5: max({{
                         Rd_sd = std::max(rvSext(Rs1_sd), rvSext(Rs2_sd));
                     }});
@@ -1365,22 +1268,13 @@ decode QUADRANT default Unknown::unknown() {
                     0x0: and({{
                         Rd = rvSext(Rs1 & Rs2);
                     }});
-                    0x1: decode RVTYPE {
-                        0x0: rv32_remu({{
-                            if (Rs2_uw == 0) {
-                                Rd_sw = Rs1_uw;
-                            } else {
-                                Rd_sw = Rs1_uw%Rs2_uw;
-                            }
-                        }}, IntDivOp);
-                        0x1: remu({{
-                            if (Rs2 == 0) {
-                                Rd = Rs1;
-                            } else {
-                                Rd = Rs1%Rs2;
-                            }
-                        }}, IntDivOp);
-                    }
+                    0x1: remu({{
+                        if (machInst.rv_type == RV32) {
+                            Rd = (int32_t)remu<uint32_t>(Rs1, Rs2);
+                        } else {
+                            Rd = remu<uint64_t>(Rs1, Rs2);
+                        }
+                    }}, IntDivOp);
                     0x4: packh({{
                         // It doesn't need to sign ext as MSB is always 0
                         Rd = (Rs2_ub << 8) | Rs1_ub;
@@ -1432,15 +1326,7 @@ decode QUADRANT default Unknown::unknown() {
                     }
                     0x4: decode FUNCT7 {
                         0x1: divw({{
-                            constexpr int32_t kRsMin = \
-                                std::numeric_limits<int32_t>::min();
-                            if (Rs2_sw == 0) {
-                                Rd_sd = -1;
-                            } else if (Rs1_sw == kRsMin && Rs2_sw == -1) {
-                                Rd_sd = kRsMin;
-                            } else {
-                                Rd_sd = Rs1_sw/Rs2_sw;
-                            }
+                            Rd_sd = div<int32_t>(Rs1, Rs2);
                         }}, IntDivOp);
                         0x4: packw({{
                             Rd_sd = sext<32>((Rs2_uh << 16) | Rs1_uh);
@@ -1454,11 +1340,7 @@ decode QUADRANT default Unknown::unknown() {
                             Rd_sd = (int32_t)(Rs1_uw >> Rs2<4:0>);
                         }});
                         0x1: divuw({{
-                            if (Rs2_uw == 0) {
-                                Rd_sd = std::numeric_limits<uint64_t>::max();
-                            } else {
-                                Rd_sd = (int32_t)(Rs1_uw/Rs2_uw);
-                            }
+                            Rd = sext<32>(divu<uint32_t>(Rs1, Rs2));
                         }}, IntDivOp);
                         0x20: sraw({{
                             Rd_sd = Rs1_sw >> Rs2<4:0>;
@@ -1470,26 +1352,14 @@ decode QUADRANT default Unknown::unknown() {
                     }
                     0x6:  decode FUNCT7 {
                         0x1: remw({{
-                            constexpr int32_t kRsMin = \
-                                std::numeric_limits<int32_t>::min();
-                            if (Rs2_sw == 0) {
-                                Rd_sd = Rs1_sw;
-                            } else if (Rs1_sw == kRsMin && Rs2_sw == -1) {
-                                Rd_sd = 0;
-                            } else {
-                                Rd_sd = Rs1_sw%Rs2_sw;
-                            }
+                            Rd_sd = rem<int32_t>(Rs1, Rs2);
                         }}, IntDivOp);
                         0x10: sh3add_uw({{
                             Rd = (((uint64_t)Rs1_uw) << 3) + Rs2;
                         }});
                     }
                     0x7: remuw({{
-                        if (Rs2_uw == 0) {
-                            Rd_sd = (int32_t)Rs1_uw;
-                        } else {
-                            Rd_sd = (int32_t)(Rs1_uw%Rs2_uw);
-                        }
+                        Rd = sext<32>(remu<uint32_t>(Rs1, Rs2));
                     }}, IntDivOp);
                 }
             }
diff --git a/src/arch/riscv/utility.hh b/src/arch/riscv/utility.hh
index 3bd34c4801..5fccc84c79 100644
--- a/src/arch/riscv/utility.hh
+++ b/src/arch/riscv/utility.hh
@@ -55,6 +55,7 @@
 #include "cpu/reg_class.hh"
 #include "cpu/static_inst.hh"
 #include "cpu/thread_context.hh"
+#include "enums/RiscvType.hh"
 #include "rvk.hh"
 
 namespace gem5
@@ -137,6 +138,101 @@ registerName(RegId reg)
     }
 }
 
+inline uint32_t
+mulhu_32(uint32_t rs1, uint32_t rs2)
+{
+    return ((uint64_t)rs1 * rs2) >> 32;
+}
+
+inline uint64_t
+mulhu_64(uint64_t rs1, uint64_t rs2)
+{
+    uint64_t rs1_lo = (uint32_t)rs1;
+    uint64_t rs1_hi = rs1 >> 32;
+    uint64_t rs2_lo = (uint32_t)rs2;
+    uint64_t rs2_hi = rs2 >> 32;
+
+    uint64_t hi = rs1_hi * rs2_hi;
+    uint64_t mid1 = rs1_hi * rs2_lo;
+    uint64_t mid2 = rs1_lo * rs2_hi;
+    uint64_t lo = rs1_lo * rs2_lo;
+    uint64_t carry = ((uint64_t)(uint32_t)mid1
+            + (uint64_t)(uint32_t)mid2
+            + (lo >> 32)) >> 32;
+
+    return hi + (mid1 >> 32) + (mid2 >> 32) + carry;
+}
+
+inline int32_t
+mulh_32(int32_t rs1, int32_t rs2)
+{
+    return ((int64_t)rs1 * rs2) >> 32;
+}
+
+inline int64_t
+mulh_64(int64_t rs1, int64_t rs2)
+{
+    bool negate = (rs1 < 0) != (rs2 < 0);
+    uint64_t res = mulhu_64(std::abs(rs1), std::abs(rs2));
+    return negate ? ~res + (rs1 * rs2 == 0 ? 1 : 0) : res;
+}
+
+inline int32_t
+mulhsu_32(int32_t rs1, uint32_t rs2)
+{
+    return ((int64_t)rs1 * rs2) >> 32;
+}
+
+inline int64_t
+mulhsu_64(int64_t rs1, uint64_t rs2)
+{
+    bool negate = rs1 < 0;
+    uint64_t res = mulhu_64(std::abs(rs1), rs2);
+    return negate ? ~res + (rs1 * rs2 == 0 ? 1 : 0) : res;
+}
+
+template<typename T> inline T
+div(T rs1, T rs2)
+{
+    constexpr T kRsMin = std::numeric_limits<T>::min();
+    if (rs2 == 0) {
+        return -1;
+    } else if (rs1 == kRsMin && rs2 == -1) {
+        return kRsMin;
+    } else {
+        return rs1 / rs2;
+    }
+}
+
+template<typename T> inline T
+divu(T rs1, T rs2)
+{
+    if (rs2 == 0) {
+        return std::numeric_limits<T>::max();
+    } else {
+        return rs1 / rs2;
+    }
+}
+
+template<typename T> inline T
+rem(T rs1, T rs2)
+{
+    constexpr T kRsMin = std::numeric_limits<T>::min();
+    if (rs2 == 0) {
+        return rs1;
+    } else if (rs1 == kRsMin && rs2 == -1) {
+        return 0;
+    } else {
+        return rs1 % rs2;
+    }
+}
+
+template<typename T> inline T
+remu(T rs1, T rs2)
+{
+    return (rs2 == 0) ? rs1 : rs1 % rs2;
+}
+
 } // namespace RiscvISA
 } // namespace gem5
 

From 4198d027ac2673457953eecfbec9db681991f78d Mon Sep 17 00:00:00 2001
From: "Bobby R. Bruce" <bbruce@ucdavis.edu>
Date: Mon, 22 May 2023 13:59:24 -0700
Subject: [PATCH 425/492] tests,systemc: Fix nightly systemc test

This fixes these nightly failing tests:
https://jenkins.gem5.org/job/nightly/609/

Due to this commit:
https://gem5-review.googlesource.com/c/public/gem5/+/68758
The source files are not copied to the "build" directory by default.
This caused the systemc tests to fail as the
"util/systemc/gem5_within_systemc/Makefile" depends on generated source
files in the "build" directory.

This patch adds the "--duplicate-sources" flag to the building of the
ARM binaries necessisary for running systemc. The README has been
updated to reflect this.

Change-Id: I3006005e43276097be98f7d4685f3d98c180d3f9
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/70860
Tested-by: kokoro <noreply+kokoro@google.com>
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
---
 tests/nightly.sh                        |  8 +++++---
 util/systemc/gem5_within_systemc/README | 10 +++++++---
 2 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/tests/nightly.sh b/tests/nightly.sh
index 9286c545cd..cea1ad0be9 100755
--- a/tests/nightly.sh
+++ b/tests/nightly.sh
@@ -173,9 +173,11 @@ build_and_run_systemc () {
     docker run -u $UID:$GID --volume "${gem5_root}":"${gem5_root}" -w \
         "${gem5_root}" --memory="${docker_mem_limit}" --rm \
         gcr.io/gem5-test/ubuntu-22.04_min-dependencies:${tag} bash -c "\
-scons -j${compile_threads} --ignore-style build/ARM/gem5.opt && \
-scons --with-cxx-config --without-python --without-tcmalloc USE_SYSTEMC=0 \
-    -j${compile_threads} build/ARM/libgem5_opt.so \
+scons -j${compile_threads} --ignore-style --duplicate-sources \
+build/ARM/gem5.opt && \
+scons --with-cxx-config --without-python --without-tcmalloc \
+--duplicate-sources USE_SYSTEMC=0  \
+-j${compile_threads} build/ARM/libgem5_opt.so \
 "
 
     docker run -u $UID:$GID --volume "${gem5_root}":"${gem5_root}" -w \
diff --git a/util/systemc/gem5_within_systemc/README b/util/systemc/gem5_within_systemc/README
index b50ed71003..bcaaceeb77 100644
--- a/util/systemc/gem5_within_systemc/README
+++ b/util/systemc/gem5_within_systemc/README
@@ -28,17 +28,21 @@ To build:
 First build gem5 as a library with cxx-config support and (optionally)
 without python.  When building the library, disable gem5's native SystemC
 API support, as that will conflict with the external version.  Also build a
-normal gem5 (cxx-config not needed, Python needed):
+normal gem5 (cxx-config not needed, Python needed)
+
+Note: The `--duplicate-source` option is also needed as
+"util/systemc/gem5_within_systemc" depends on generated source files to be
+present in the "build" directory.
 
 > cd ../../..
 > scons build/ARM/gem5.opt
 > scons --with-cxx-config --without-python --without-tcmalloc USE_SYSTEMC=0 \
->       build/ARM/libgem5_opt.so
+>       --duplicate-source build/ARM/libgem5_opt.so
 > cd util/systemc
 
 Note: For MAC / OSX this command should be used:
 > scons --with-cxx-config --without-python --without-tcmalloc USE_SYSTEMC=0 \
->       build/ARM/libgem5_opt.dylib
+>       --duplicate-sources build/ARM/libgem5_opt.dylib
 
 Set a proper LD_LIBRARY_PATH e.g. for bash:
 > export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/path/to/gem5/build/ARM/"

From 00426eea99b6f71ac48630cd81bf4145213bc738 Mon Sep 17 00:00:00 2001
From: Giacomo Travaglini <giacomo.travaglini@arm.com>
Date: Wed, 17 May 2023 15:33:34 +0100
Subject: [PATCH 426/492] arch-arm: Define remaining fields of the arm64
 AT_HWCAP entry

Change-Id: I4db4884d677f6d25417ae6edceb7f1e8dfad36cb
Signed-off-by: Giacomo Travaglini <giacomo.travaglini@arm.com>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/70758
Reviewed-by: Richard Cooper <richard.cooper@arm.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/arch/arm/process.cc | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/arch/arm/process.cc b/src/arch/arm/process.cc
index 9770ea68ab..6b5f69e4e1 100644
--- a/src/arch/arm/process.cc
+++ b/src/arch/arm/process.cc
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2010, 2012, 2017-2018 ARM Limited
+ * Copyright (c) 2010, 2012, 2017-2018, 2023 Arm Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -207,7 +207,11 @@ ArmProcess64::armHwcapImpl() const
         Arm_Dit = 1 << 24,
         Arm_Uscat = 1 << 25,
         Arm_Ilrcpc = 1 << 26,
-        Arm_Flagm = 1 << 27
+        Arm_Flagm = 1 << 27,
+        Arm_Sbss = 1 << 28,
+        Arm_Sb = 1 << 29,
+        Arm_Paca = 1 << 30,
+        Arm_Pacg = 1 << 31
     };
 
     uint32_t hwcap = 0;

From a3cae504019bad10f8e5fc6fd661ef12848c9cbe Mon Sep 17 00:00:00 2001
From: Giacomo Travaglini <giacomo.travaglini@arm.com>
Date: Wed, 17 May 2023 15:34:50 +0100
Subject: [PATCH 427/492] arch-arm: Enable FEAT_PAuth in SE mode

It was in theory already possible to use FEAT_PAuth instructions in
SE mode, however its presence was hidden to userspace code as
the cpu feature was not listed in the auxiliary vectors

Change-Id: I6da5da0878dde56c22ffdba25eff15e36f5022fe
Signed-off-by: Giacomo Travaglini <giacomo.travaglini@arm.com>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/70759
Reviewed-by: Richard Cooper <richard.cooper@arm.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/arch/arm/process.cc | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/arch/arm/process.cc b/src/arch/arm/process.cc
index 6b5f69e4e1..02771aed00 100644
--- a/src/arch/arm/process.cc
+++ b/src/arch/arm/process.cc
@@ -251,6 +251,8 @@ ArmProcess64::armHwcapImpl() const
     hwcap |= (isa_r1.fcma >= 1) ? Arm_Fcma : 0;
     hwcap |= (isa_r1.lrcpc >= 1) ? Arm_Lrcpc : 0;
     hwcap |= (isa_r1.lrcpc >= 2) ? Arm_Ilrcpc : 0;
+    hwcap |= (isa_r1.apa >= 1 || isa_r1.api >= 1) ? Arm_Paca : 0;
+    hwcap |= (isa_r1.gpa >= 1 || isa_r1.gpi >= 1) ? Arm_Pacg : 0;
 
     const AA64MMFR2 mm_fr2 = tc->readMiscReg(MISCREG_ID_AA64MMFR2_EL1);
 

From 98821e365cfe04b1e1463dba69a9e0788d9ae10c Mon Sep 17 00:00:00 2001
From: Giacomo Travaglini <giacomo.travaglini@arm.com>
Date: Thu, 18 May 2023 10:47:55 +0100
Subject: [PATCH 428/492] arch-arm: Extend auxiliary vector with AT_HWCAP2
 entry

The presence of some of the new extensions is reported via
the AT_HWCAP2 entry

Change-Id: I7a2d813ea84bf528b1f9df09121f9e97456a11c0
Signed-off-by: Giacomo Travaglini <giacomo.travaglini@arm.com>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/70760
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Richard Cooper <richard.cooper@arm.com>
---
 src/arch/arm/process.cc | 61 +++++++++++++++++++++++++++++++++++++++--
 src/arch/arm/process.hh | 12 +++++++-
 2 files changed, 69 insertions(+), 4 deletions(-)

diff --git a/src/arch/arm/process.cc b/src/arch/arm/process.cc
index 02771aed00..9b0f3b269f 100644
--- a/src/arch/arm/process.cc
+++ b/src/arch/arm/process.cc
@@ -261,6 +261,62 @@ ArmProcess64::armHwcapImpl() const
     return hwcap;
 }
 
+uint64_t
+ArmProcess64::armHwcapImpl2() const
+{
+    enum ArmCpuFeature : uint64_t
+    {
+        Arm_None = 0,
+        Arm_Dcpodp = 1ULL << 0,
+        Arm_Sve2 = 1ULL<< 1,
+        Arm_Sveaes = 1ULL << 2,
+        Arm_Svepmull = 1ULL << 3,
+        Arm_Svebitperm = 1ULL << 4,
+        Arm_Svesha3 = 1ULL << 5,
+        Arm_Svesm4 = 1ULL << 6,
+        Arm_Flagm2 = 1ULL << 7,
+        Arm_Frint = 1ULL << 8,
+        Arm_Svei8mm = 1ULL << 9,
+        Arm_Svef32mm = 1ULL << 10,
+        Arm_Svef64mm = 1ULL << 11,
+        Arm_Svebf16 = 1ULL << 12,
+        Arm_I8mm = 1ULL << 13,
+        Arm_Bf16 = 1ULL << 14,
+        Arm_Dgh = 1ULL << 15,
+        Arm_Rng = 1ULL << 16,
+        Arm_Bti = 1ULL << 17,
+        Arm_Mte = 1ULL << 18,
+        Arm_Ecv = 1ULL << 19,
+        Arm_Afp = 1ULL << 20,
+        Arm_Rpres = 1ULL << 21,
+        Arm_Mte3 = 1ULL << 22,
+        Arm_Sme = 1ULL << 23,
+        Arm_Sme_I16i64 = 1ULL << 24,
+        Arm_Sme_F64f64 = 1ULL << 25,
+        Arm_Sme_I8i32 = 1ULL << 26,
+        Arm_Sme_F16f32 = 1ULL << 27,
+        Arm_Sme_B16f32 = 1ULL << 28,
+        Arm_Sme_F32f32 = 1ULL << 29,
+        Arm_Sme_Fa64 = 1ULL << 30,
+        Arm_Wfxt = 1ULL << 31,
+        Arm_Ebf16 = 1ULL << 32,
+        Arm_Sve_Ebf16 = 1ULL << 33,
+        Arm_Cssc = 1ULL << 34,
+        Arm_Rprfm = 1ULL << 35,
+        Arm_Sve2p1 = 1ULL << 36,
+        Arm_Sme2 = 1ULL << 37,
+        Arm_Sme2p1 = 1ULL << 38,
+        Arm_Sme_I16i32 = 1ULL << 39,
+        Arm_Sme_Bi32i32 = 1ULL << 40,
+        Arm_Sme_B16b16 = 1ULL << 41,
+        Arm_Sme_F16f16 = 1ULL << 42
+    };
+
+    uint64_t hwcap = 0;
+
+    return hwcap;
+}
+
 template <class IntType>
 void
 ArmProcess::argsInit(int pageSize, const RegId &spId)
@@ -284,11 +340,10 @@ ArmProcess::argsInit(int pageSize, const RegId &spId)
     if (elfObject) {
 
         if (objFile->getOpSys() == loader::Linux) {
-            IntType features = armHwcap<IntType>();
-
             //Bits which describe the system hardware capabilities
             //XXX Figure out what these should be
-            auxv.emplace_back(gem5::auxv::Hwcap, features);
+            auxv.emplace_back(gem5::auxv::Hwcap, armHwcap<IntType>());
+            auxv.emplace_back(gem5::auxv::Hwcap2, armHwcap2<IntType>());
             //Frequency at which times() increments
             auxv.emplace_back(gem5::auxv::Clktck, 0x64);
             //Whether to enable "secure mode" in the executable
diff --git a/src/arch/arm/process.hh b/src/arch/arm/process.hh
index 6bdabefb45..0aee6dc9d9 100644
--- a/src/arch/arm/process.hh
+++ b/src/arch/arm/process.hh
@@ -1,5 +1,5 @@
 /*
-* Copyright (c) 2012, 2018 ARM Limited
+* Copyright (c) 2012, 2018, 2023 Arm Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -69,10 +69,18 @@ class ArmProcess : public Process
         return static_cast<IntType>(armHwcapImpl());
     }
 
+    template<class IntType>
+    IntType
+    armHwcap2() const
+    {
+        return static_cast<IntType>(armHwcapImpl2());
+    }
+
     /**
      * AT_HWCAP is 32-bit wide on AArch64 as well so we can
      * safely return an uint32_t */
     virtual uint32_t armHwcapImpl() const = 0;
+    virtual uint64_t armHwcapImpl2() const = 0;
 };
 
 class ArmProcess32 : public ArmProcess
@@ -86,6 +94,7 @@ class ArmProcess32 : public ArmProcess
 
     /** AArch32 AT_HWCAP */
     uint32_t armHwcapImpl() const override;
+    uint64_t armHwcapImpl2() const override { return 0; }
 };
 
 class ArmProcess64 : public ArmProcess
@@ -99,6 +108,7 @@ class ArmProcess64 : public ArmProcess
 
     /** AArch64 AT_HWCAP */
     uint32_t armHwcapImpl() const override;
+    uint64_t armHwcapImpl2() const override;
 };
 
 } // namespace gem5

From 3b3911f521a83ec0c2d5e2c30e06cd8f3432bf49 Mon Sep 17 00:00:00 2001
From: Giacomo Travaglini <giacomo.travaglini@arm.com>
Date: Tue, 7 Mar 2023 12:40:24 +0000
Subject: [PATCH 429/492] arch-arm: Split decodeDataProcReg into subfunctions

This will increase readibility, it will make it easier
for devs to add new instructions, and it removes some
duplication (some register indexes were read more than
once)

Change-Id: Ifa03a93cb73de0b2dc93d7784f9011e0e55dfc1e
Signed-off-by: Giacomo Travaglini <giacomo.travaglini@arm.com>
Reviewed-by: Richard Cooper <richard.cooper@arm.com>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/70717
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/arch/arm/isa/formats/aarch64.isa | 670 +++++++++++++++------------
 1 file changed, 361 insertions(+), 309 deletions(-)

diff --git a/src/arch/arm/isa/formats/aarch64.isa b/src/arch/arm/isa/formats/aarch64.isa
index 2fd28f8209..0aafa9e465 100644
--- a/src/arch/arm/isa/formats/aarch64.isa
+++ b/src/arch/arm/isa/formats/aarch64.isa
@@ -1958,6 +1958,359 @@ namespace Aarch64
 output decoder {{
 namespace Aarch64
 {
+
+    StaticInstPtr
+    decodeLogical(ExtMachInst machInst)
+    {
+        uint8_t imm6 = bits(machInst, 15, 10);
+        bool sf = bits(machInst, 31);
+        if (!sf && (imm6 & 0x20))
+            return new Unknown64(machInst);
+
+        RegIndex rd = (RegIndex)(uint8_t)bits(machInst, 4, 0);
+        RegIndex rdzr = makeZero(rd);
+        RegIndex rn = (RegIndex)(uint8_t)bits(machInst, 9, 5);
+        RegIndex rm = (RegIndex)(uint8_t)bits(machInst, 20, 16);
+        ArmShiftType type = (ArmShiftType)(uint8_t)bits(machInst, 23, 22);
+
+        uint8_t switch_val = (bits(machInst, 21) << 0) |
+                            (bits(machInst, 30, 29) << 1);
+
+        switch (switch_val) {
+          case 0x0:
+            return new AndXSReg(machInst, rdzr, rn, rm, imm6, type);
+          case 0x1:
+            return new BicXSReg(machInst, rdzr, rn, rm, imm6, type);
+          case 0x2:
+            return new OrrXSReg(machInst, rdzr, rn, rm, imm6, type);
+          case 0x3:
+            return new OrnXSReg(machInst, rdzr, rn, rm, imm6, type);
+          case 0x4:
+            return new EorXSReg(machInst, rdzr, rn, rm, imm6, type);
+          case 0x5:
+            return new EonXSReg(machInst, rdzr, rn, rm, imm6, type);
+          case 0x6:
+            return new AndXSRegCc(machInst, rdzr, rn, rm, imm6, type);
+          case 0x7:
+            return new BicXSRegCc(machInst, rdzr, rn, rm, imm6, type);
+          default:
+            GEM5_UNREACHABLE;
+        }
+    }
+
+    StaticInstPtr
+    decodeAddSub(ExtMachInst machInst)
+    {
+        uint8_t switch_val = bits(machInst, 30, 29);
+        if (bits(machInst, 21) == 0) {
+            ArmShiftType type =
+                (ArmShiftType)(uint8_t)bits(machInst, 23, 22);
+            if (type == ROR)
+                return new Unknown64(machInst);
+            uint8_t imm6 = bits(machInst, 15, 10);
+            if (!bits(machInst, 31) && bits(imm6, 5))
+                return new Unknown64(machInst);
+            RegIndex rd = (RegIndex)(uint8_t)bits(machInst, 4, 0);
+            RegIndex rdzr = makeZero(rd);
+            RegIndex rn = (RegIndex)(uint8_t)bits(machInst, 9, 5);
+            RegIndex rm = (RegIndex)(uint8_t)bits(machInst, 20, 16);
+            switch (switch_val) {
+              case 0x0:
+                return new AddXSReg(machInst, rdzr, rn, rm, imm6, type);
+              case 0x1:
+                return new AddXSRegCc(machInst, rdzr, rn, rm, imm6, type);
+              case 0x2:
+                return new SubXSReg(machInst, rdzr, rn, rm, imm6, type);
+              case 0x3:
+                return new SubXSRegCc(machInst, rdzr, rn, rm, imm6, type);
+              default:
+                GEM5_UNREACHABLE;
+            }
+        } else {
+            if (bits(machInst, 23, 22) != 0 || bits(machInst, 12, 10) > 0x4)
+               return new Unknown64(machInst);
+            ArmExtendType type =
+                (ArmExtendType)(uint8_t)bits(machInst, 15, 13);
+            uint8_t imm3 = bits(machInst, 12, 10);
+            RegIndex rd = (RegIndex)(uint8_t)bits(machInst, 4, 0);
+            RegIndex rdsp = makeSP(rd);
+            RegIndex rdzr = makeZero(rd);
+            RegIndex rn = (RegIndex)(uint8_t)bits(machInst, 9, 5);
+            RegIndex rnsp = makeSP(rn);
+            RegIndex rm = (RegIndex)(uint8_t)bits(machInst, 20, 16);
+
+            switch (switch_val) {
+              case 0x0:
+                return new AddXEReg(machInst, rdsp, rnsp, rm, type, imm3);
+              case 0x1:
+                return new AddXERegCc(machInst, rdzr, rnsp, rm, type, imm3);
+              case 0x2:
+                return new SubXEReg(machInst, rdsp, rnsp, rm, type, imm3);
+              case 0x3:
+                return new SubXERegCc(machInst, rdzr, rnsp, rm, type, imm3);
+              default:
+                GEM5_UNREACHABLE;
+            }
+        }
+    }
+
+    StaticInstPtr
+    decodeAddSubWithCarry(ExtMachInst machInst)
+    {
+        if (bits(machInst, 15, 10))
+            return new Unknown64(machInst);
+
+        RegIndex rd = (RegIndex)(uint8_t)bits(machInst, 4, 0);
+        RegIndex rdzr = makeZero(rd);
+        RegIndex rn = (RegIndex)(uint8_t)bits(machInst, 9, 5);
+        RegIndex rm = (RegIndex)(uint8_t)bits(machInst, 20, 16);
+
+        uint8_t switch_val = bits(machInst, 30, 29);
+        switch (switch_val) {
+          case 0x0:
+            return new AdcXSReg(machInst, rdzr, rn, rm, 0, LSL);
+          case 0x1:
+            return new AdcXSRegCc(machInst, rdzr, rn, rm, 0, LSL);
+          case 0x2:
+            return new SbcXSReg(machInst, rdzr, rn, rm, 0, LSL);
+          case 0x3:
+            return new SbcXSRegCc(machInst, rdzr, rn, rm, 0, LSL);
+          default:
+            GEM5_UNREACHABLE;
+        }
+    }
+
+    StaticInstPtr
+    decodeCondCompare(ExtMachInst machInst)
+    {
+        if ((bits(machInst, 4) == 1) ||
+            (bits(machInst, 10) == 1) ||
+            (bits(machInst, 29) == 0)) {
+            return new Unknown64(machInst);
+        }
+
+        RegIndex rn = (RegIndex)(uint8_t)bits(machInst, 9, 5);
+        RegIndex rm = (RegIndex)(uint8_t)bits(machInst, 20, 16);
+
+        ConditionCode cond =
+            (ConditionCode)(uint8_t)bits(machInst, 15, 12);
+        uint8_t flags = bits(machInst, 3, 0);
+        if (bits(machInst, 11) == 0) {
+            if (bits(machInst, 30) == 0) {
+                return new CcmnReg64(machInst, rn, rm, cond, flags);
+            } else {
+                return new CcmpReg64(machInst, rn, rm, cond, flags);
+            }
+        } else {
+            uint8_t imm5 = bits(machInst, 20, 16);
+            if (bits(machInst, 30) == 0) {
+                return new CcmnImm64(machInst, rn, imm5, cond, flags);
+            } else {
+                return new CcmpImm64(machInst, rn, imm5, cond, flags);
+            }
+        }
+    }
+
+    StaticInstPtr
+    decodeCondSelect(ExtMachInst machInst)
+    {
+        if (bits(machInst, 29) == 1 ||
+                bits(machInst, 11) == 1) {
+            return new Unknown64(machInst);
+        }
+
+        RegIndex rd = (RegIndex)(uint8_t)bits(machInst, 4, 0);
+        RegIndex rdzr = makeZero(rd);
+        RegIndex rn = (RegIndex)(uint8_t)bits(machInst, 9, 5);
+        RegIndex rm = (RegIndex)(uint8_t)bits(machInst, 20, 16);
+
+        ConditionCode cond =
+            (ConditionCode)(uint8_t)bits(machInst, 15, 12);
+
+        uint8_t switch_val = (bits(machInst, 10) << 0) |
+                             (bits(machInst, 30) << 1);
+        switch (switch_val) {
+          case 0x0:
+            return new Csel64(machInst, rdzr, rn, rm, cond);
+          case 0x1:
+            return new Csinc64(machInst, rdzr, rn, rm, cond);
+          case 0x2:
+            return new Csinv64(machInst, rdzr, rn, rm, cond);
+          case 0x3:
+            return new Csneg64(machInst, rdzr, rn, rm, cond);
+          default:
+            GEM5_UNREACHABLE;
+        }
+    }
+
+    StaticInstPtr
+    decodeDataProcTwoS(ExtMachInst machInst)
+    {
+        if (bits(machInst, 29) != 0)
+            return new Unknown64(machInst);
+
+        RegIndex rd = (RegIndex)(uint8_t)bits(machInst, 4, 0);
+        RegIndex rdzr = makeZero(rd);
+        RegIndex rn = (RegIndex)(uint8_t)bits(machInst, 9, 5);
+        RegIndex rm = (RegIndex)(uint8_t)bits(machInst, 20, 16);
+
+        uint8_t switch_val = bits(machInst, 15, 10);
+        switch (switch_val) {
+          case 0x2:
+            return new Udiv64(machInst, rdzr, rn, rm);
+          case 0x3:
+            return new Sdiv64(machInst, rdzr, rn, rm);
+          case 0x8:
+            return new Lslv64(machInst, rdzr, rn, rm);
+          case 0x9:
+            return new Lsrv64(machInst, rdzr, rn, rm);
+          case 0xa:
+            return new Asrv64(machInst, rdzr, rn, rm);
+          case 0xb:
+            return new Rorv64(machInst, rdzr, rn, rm);
+         case 0xc:
+            return new Pacga(machInst, rd, rn, makeSP(rm));
+          case 0x10:
+            return new Crc32b64(machInst, rdzr, rn, rm);
+          case 0x11:
+            return new Crc32h64(machInst, rdzr, rn, rm);
+          case 0x12:
+            return new Crc32w64(machInst, rdzr, rn, rm);
+          case 0x13:
+            return new Crc32x64(machInst, rdzr, rn, rm);
+          case 0x14:
+            return new Crc32cb64(machInst, rdzr, rn, rm);
+          case 0x15:
+            return new Crc32ch64(machInst, rdzr, rn, rm);
+          case 0x16:
+            return new Crc32cw64(machInst, rdzr, rn, rm);
+          case 0x17:
+            return new Crc32cx64(machInst, rdzr, rn, rm);
+          default:
+            return new Unknown64(machInst);
+        }
+    }
+
+    StaticInstPtr
+    decodeDataProcOneS(ExtMachInst machInst)
+    {
+        RegIndex rd = (RegIndex)(uint8_t)bits(machInst, 4, 0);
+        RegIndex rdzr = makeZero(rd);
+        RegIndex rn = (RegIndex)(uint8_t)bits(machInst, 9, 5);
+
+        uint8_t dm = bits(machInst, 20, 14);
+        switch(dm){
+            case 0x4:
+            {
+              uint8_t zflags = bits(machInst, 13, 10);
+              switch (zflags) {
+                case 0x0:
+                    return new Pacia(machInst, rd, makeSP(rn));
+                case 0x1:
+                    return new Pacib(machInst, rd, makeSP(rn));
+                case 0x2:
+                    return new Pacda(machInst, rd, makeSP(rn));
+                case 0x3:
+                    return new Pacdb(machInst, rd, makeSP(rn));
+                case 0x4:
+                    return new Autia(machInst, rd, makeSP(rn));
+                case 0x5:
+                    return new Autib(machInst, rd, makeSP(rn));
+                case 0x6:
+                    return new Autda(machInst, rd, makeSP(rn));
+                case 0x7:
+                    return new Autdb(machInst, rd, makeSP(rn));
+                case 0x8:
+                    if (rn == 0x1f)
+                        return new Paciza(machInst, rd,
+                                          int_reg::Zero);
+                    else
+                        return new Unknown64(machInst);
+                case 0x9:
+                    if (rn == 0x1f)
+                        return new Pacizb(machInst, rd,
+                                          int_reg::Zero);
+                    else
+                        return new Unknown64(machInst);
+                case 0xa:
+                    if (rn == 0x1f)
+                        return new Pacdza(machInst, rd,
+                                          int_reg::Zero);
+                    else
+                        return new Unknown64(machInst);
+                case 0xb:
+                    if (rn == 0x1f)
+                        return new Pacdzb(machInst, rd,
+                                          int_reg::Zero);
+                    else
+                        return new Unknown64(machInst);
+                case 0xc:
+                    if (rn == 0x1f)
+                        return new Autiza(machInst, rd,
+                                          int_reg::Zero);
+                    else
+                        return new Unknown64(machInst);
+                case 0xd:
+                    if (rn == 0x1f)
+                        return new Autizb(machInst, rd,
+                                          int_reg::Zero);
+                    else
+                        return new Unknown64(machInst);
+                case 0xe:
+                    if (rn == 0x1f)
+                        return new Autdza(machInst, rd,
+                                          int_reg::Zero);
+                    else
+                        return new Unknown64(machInst);
+                case 0xf:
+                    if (rn == 0x1f)
+                        return new Autdzb(machInst, rd,
+                                          int_reg::Zero);
+                    else
+                        return new Unknown64(machInst);
+                default:
+                    return new Unknown64(machInst);
+              }
+            }
+            case 0x5:
+              {
+                if (rn != 0x1f)
+                    return new Unknown64(machInst);
+                bool d = bits(machInst,10);
+                if (d)
+                    return new Xpacd(machInst, rd);
+                else
+                    return new Xpaci(machInst, rd);
+              }
+        }
+        if (dm != 0 || bits(machInst, 29) != 0) {
+            // dm !=0 and dm != 0x1
+            return new Unknown64(machInst);
+        }
+        uint8_t switchVal = bits(machInst, 15, 10);
+        switch (switchVal) {
+          case 0x0:
+            return new Rbit64(machInst, rdzr, rn);
+          case 0x1:
+            return new Rev1664(machInst, rdzr, rn);
+          case 0x2:
+            if (bits(machInst, 31) == 0)
+                return new Rev64(machInst, rdzr, rn);
+            else
+                return new Rev3264(machInst, rdzr, rn);
+          case 0x3:
+            if (bits(machInst, 31) != 1)
+                return new Unknown64(machInst);
+            return new Rev64(machInst, rdzr, rn);
+          case 0x4:
+            return new Clz64(machInst, rdzr, rn);
+          case 0x5:
+            return new Cls64(machInst, rdzr, rn);
+          default:
+            return new Unknown64(machInst);
+        }
+    }
+
     StaticInstPtr
     decodeDataProcReg(ExtMachInst machInst)
     {
@@ -1965,327 +2318,26 @@ namespace Aarch64
                             (bits(machInst, 24) << 0);
         switch (switchVal) {
           case 0x0:
-          {
-            uint8_t switchVal = (bits(machInst, 21) << 0) |
-                                (bits(machInst, 30, 29) << 1);
-            ArmShiftType type = (ArmShiftType)(uint8_t)bits(machInst, 23, 22);
-            uint8_t imm6 = bits(machInst, 15, 10);
-            bool sf = bits(machInst, 31);
-            if (!sf && (imm6 & 0x20))
-                return new Unknown64(machInst);
-            RegIndex rd = (RegIndex)(uint8_t)bits(machInst, 4, 0);
-            RegIndex rdzr = makeZero(rd);
-            RegIndex rn = (RegIndex)(uint8_t)bits(machInst, 9, 5);
-            RegIndex rm = (RegIndex)(uint8_t)bits(machInst, 20, 16);
-
-            switch (switchVal) {
-              case 0x0:
-                return new AndXSReg(machInst, rdzr, rn, rm, imm6, type);
-              case 0x1:
-                return new BicXSReg(machInst, rdzr, rn, rm, imm6, type);
-              case 0x2:
-                return new OrrXSReg(machInst, rdzr, rn, rm, imm6, type);
-              case 0x3:
-                return new OrnXSReg(machInst, rdzr, rn, rm, imm6, type);
-              case 0x4:
-                return new EorXSReg(machInst, rdzr, rn, rm, imm6, type);
-              case 0x5:
-                return new EonXSReg(machInst, rdzr, rn, rm, imm6, type);
-              case 0x6:
-                return new AndXSRegCc(machInst, rdzr, rn, rm, imm6, type);
-              case 0x7:
-                return new BicXSRegCc(machInst, rdzr, rn, rm, imm6, type);
-              default:
-                GEM5_UNREACHABLE;
-            }
-          }
+            return decodeLogical(machInst);
           case 0x1:
-          {
-            uint8_t switchVal = bits(machInst, 30, 29);
-            if (bits(machInst, 21) == 0) {
-                ArmShiftType type =
-                    (ArmShiftType)(uint8_t)bits(machInst, 23, 22);
-                if (type == ROR)
-                    return new Unknown64(machInst);
-                uint8_t imm6 = bits(machInst, 15, 10);
-                if (!bits(machInst, 31) && bits(imm6, 5))
-                    return new Unknown64(machInst);
-                RegIndex rd = (RegIndex)(uint8_t)bits(machInst, 4, 0);
-                RegIndex rdzr = makeZero(rd);
-                RegIndex rn = (RegIndex)(uint8_t)bits(machInst, 9, 5);
-                RegIndex rm = (RegIndex)(uint8_t)bits(machInst, 20, 16);
-                switch (switchVal) {
-                  case 0x0:
-                    return new AddXSReg(machInst, rdzr, rn, rm, imm6, type);
-                  case 0x1:
-                    return new AddXSRegCc(machInst, rdzr, rn, rm, imm6, type);
-                  case 0x2:
-                    return new SubXSReg(machInst, rdzr, rn, rm, imm6, type);
-                  case 0x3:
-                    return new SubXSRegCc(machInst, rdzr, rn, rm, imm6, type);
-                  default:
-                    GEM5_UNREACHABLE;
-                }
-            } else {
-                if (bits(machInst, 23, 22) != 0 || bits(machInst, 12, 10) > 0x4)
-                   return new Unknown64(machInst);
-                ArmExtendType type =
-                    (ArmExtendType)(uint8_t)bits(machInst, 15, 13);
-                uint8_t imm3 = bits(machInst, 12, 10);
-                RegIndex rd = (RegIndex)(uint8_t)bits(machInst, 4, 0);
-                RegIndex rdsp = makeSP(rd);
-                RegIndex rdzr = makeZero(rd);
-                RegIndex rn = (RegIndex)(uint8_t)bits(machInst, 9, 5);
-                RegIndex rnsp = makeSP(rn);
-                RegIndex rm = (RegIndex)(uint8_t)bits(machInst, 20, 16);
-
-                switch (switchVal) {
-                  case 0x0:
-                    return new AddXEReg(machInst, rdsp, rnsp, rm, type, imm3);
-                  case 0x1:
-                    return new AddXERegCc(machInst, rdzr, rnsp, rm, type, imm3);
-                  case 0x2:
-                    return new SubXEReg(machInst, rdsp, rnsp, rm, type, imm3);
-                  case 0x3:
-                    return new SubXERegCc(machInst, rdzr, rnsp, rm, type, imm3);
-                  default:
-                    GEM5_UNREACHABLE;
-                }
-            }
-          }
+            return decodeAddSub(machInst);
           case 0x2:
           {
             if (bits(machInst, 21) == 1)
                 return new Unknown64(machInst);
-            RegIndex rd = (RegIndex)(uint8_t)bits(machInst, 4, 0);
-            RegIndex rdzr = makeZero(rd);
-            RegIndex rn = (RegIndex)(uint8_t)bits(machInst, 9, 5);
-            RegIndex rm = (RegIndex)(uint8_t)bits(machInst, 20, 16);
+
             switch (bits(machInst, 23, 22)) {
               case 0x0:
-              {
-                if (bits(machInst, 15, 10))
-                    return new Unknown64(machInst);
-                uint8_t switchVal = bits(machInst, 30, 29);
-                switch (switchVal) {
-                  case 0x0:
-                    return new AdcXSReg(machInst, rdzr, rn, rm, 0, LSL);
-                  case 0x1:
-                    return new AdcXSRegCc(machInst, rdzr, rn, rm, 0, LSL);
-                  case 0x2:
-                    return new SbcXSReg(machInst, rdzr, rn, rm, 0, LSL);
-                  case 0x3:
-                    return new SbcXSRegCc(machInst, rdzr, rn, rm, 0, LSL);
-                  default:
-                    GEM5_UNREACHABLE;
-                }
-              }
+                return decodeAddSubWithCarry(machInst);
               case 0x1:
-              {
-                if ((bits(machInst, 4) == 1) ||
-                        (bits(machInst, 10) == 1) ||
-                        (bits(machInst, 29) == 0)) {
-                    return new Unknown64(machInst);
-                }
-                ConditionCode cond =
-                    (ConditionCode)(uint8_t)bits(machInst, 15, 12);
-                uint8_t flags = bits(machInst, 3, 0);
-                RegIndex rn = (RegIndex)(uint8_t)bits(machInst, 9, 5);
-                if (bits(machInst, 11) == 0) {
-                    RegIndex rm =
-                        (RegIndex)(uint8_t)bits(machInst, 20, 16);
-                    if (bits(machInst, 30) == 0) {
-                        return new CcmnReg64(machInst, rn, rm, cond, flags);
-                    } else {
-                        return new CcmpReg64(machInst, rn, rm, cond, flags);
-                    }
-                } else {
-                    uint8_t imm5 = bits(machInst, 20, 16);
-                    if (bits(machInst, 30) == 0) {
-                        return new CcmnImm64(machInst, rn, imm5, cond, flags);
-                    } else {
-                        return new CcmpImm64(machInst, rn, imm5, cond, flags);
-                    }
-                }
-              }
+                return decodeCondCompare(machInst);
               case 0x2:
-              {
-                if (bits(machInst, 29) == 1 ||
-                        bits(machInst, 11) == 1) {
-                    return new Unknown64(machInst);
-                }
-                uint8_t switchVal = (bits(machInst, 10) << 0) |
-                                    (bits(machInst, 30) << 1);
-                RegIndex rd = (RegIndex)(uint8_t)bits(machInst, 4, 0);
-                RegIndex rdzr = makeZero(rd);
-                RegIndex rn = (RegIndex)(uint8_t)bits(machInst, 9, 5);
-                RegIndex rm = (RegIndex)(uint8_t)bits(machInst, 20, 16);
-                ConditionCode cond =
-                    (ConditionCode)(uint8_t)bits(machInst, 15, 12);
-                switch (switchVal) {
-                  case 0x0:
-                    return new Csel64(machInst, rdzr, rn, rm, cond);
-                  case 0x1:
-                    return new Csinc64(machInst, rdzr, rn, rm, cond);
-                  case 0x2:
-                    return new Csinv64(machInst, rdzr, rn, rm, cond);
-                  case 0x3:
-                    return new Csneg64(machInst, rdzr, rn, rm, cond);
-                  default:
-                    GEM5_UNREACHABLE;
-                }
-              }
+                return decodeCondSelect(machInst);
               case 0x3:
                 if (bits(machInst, 30) == 0) {
-                    if (bits(machInst, 29) != 0)
-                        return new Unknown64(machInst);
-                    uint8_t switchVal = bits(machInst, 15, 10);
-                    switch (switchVal) {
-                      case 0x2:
-                        return new Udiv64(machInst, rdzr, rn, rm);
-                      case 0x3:
-                        return new Sdiv64(machInst, rdzr, rn, rm);
-                      case 0x8:
-                        return new Lslv64(machInst, rdzr, rn, rm);
-                      case 0x9:
-                        return new Lsrv64(machInst, rdzr, rn, rm);
-                      case 0xa:
-                        return new Asrv64(machInst, rdzr, rn, rm);
-                      case 0xb:
-                        return new Rorv64(machInst, rdzr, rn, rm);
-                     case 0xc:
-                        return new Pacga(machInst, rd, rn, makeSP(rm));
-                      case 0x10:
-                        return new Crc32b64(machInst, rdzr, rn, rm);
-                      case 0x11:
-                        return new Crc32h64(machInst, rdzr, rn, rm);
-                      case 0x12:
-                        return new Crc32w64(machInst, rdzr, rn, rm);
-                      case 0x13:
-                        return new Crc32x64(machInst, rdzr, rn, rm);
-                      case 0x14:
-                        return new Crc32cb64(machInst, rdzr, rn, rm);
-                      case 0x15:
-                        return new Crc32ch64(machInst, rdzr, rn, rm);
-                      case 0x16:
-                        return new Crc32cw64(machInst, rdzr, rn, rm);
-                      case 0x17:
-                        return new Crc32cx64(machInst, rdzr, rn, rm);
-                      default:
-                        return new Unknown64(machInst);
-                    }
+                    return decodeDataProcTwoS(machInst);
                 } else {
-                    uint8_t dm = bits(machInst, 20, 14);
-                    switch(dm){
-                        case 0x4:
-                        {
-                          uint8_t zflags = bits(machInst, 13, 10);
-                          switch (zflags) {
-                            case 0x0:
-                                return new Pacia(machInst, rd, makeSP(rn));
-                            case 0x1:
-                                return new Pacib(machInst, rd, makeSP(rn));
-                            case 0x2:
-                                return new Pacda(machInst, rd, makeSP(rn));
-                            case 0x3:
-                                return new Pacdb(machInst, rd, makeSP(rn));
-                            case 0x4:
-                                return new Autia(machInst, rd, makeSP(rn));
-                            case 0x5:
-                                return new Autib(machInst, rd, makeSP(rn));
-                            case 0x6:
-                                return new Autda(machInst, rd, makeSP(rn));
-                            case 0x7:
-                                return new Autdb(machInst, rd, makeSP(rn));
-                            case 0x8:
-                                if (rn == 0x1f)
-                                    return new Paciza(machInst, rd,
-                                                      int_reg::Zero);
-                                else
-                                    return new Unknown64(machInst);
-                            case 0x9:
-                                if (rn == 0x1f)
-                                    return new Pacizb(machInst, rd,
-                                                      int_reg::Zero);
-                                else
-                                    return new Unknown64(machInst);
-                            case 0xa:
-                                if (rn == 0x1f)
-                                    return new Pacdza(machInst, rd,
-                                                      int_reg::Zero);
-                                else
-                                    return new Unknown64(machInst);
-                            case 0xb:
-                                if (rn == 0x1f)
-                                    return new Pacdzb(machInst, rd,
-                                                      int_reg::Zero);
-                                else
-                                    return new Unknown64(machInst);
-                            case 0xc:
-                                if (rn == 0x1f)
-                                    return new Autiza(machInst, rd,
-                                                      int_reg::Zero);
-                                else
-                                    return new Unknown64(machInst);
-                            case 0xd:
-                                if (rn == 0x1f)
-                                    return new Autizb(machInst, rd,
-                                                      int_reg::Zero);
-                                else
-                                    return new Unknown64(machInst);
-                            case 0xe:
-                                if (rn == 0x1f)
-                                    return new Autdza(machInst, rd,
-                                                      int_reg::Zero);
-                                else
-                                    return new Unknown64(machInst);
-                            case 0xf:
-                                if (rn == 0x1f)
-                                    return new Autdzb(machInst, rd,
-                                                      int_reg::Zero);
-                                else
-                                    return new Unknown64(machInst);
-                            default:
-                                return new Unknown64(machInst);
-                          }
-                        }
-                        case 0x5:
-                          {
-                            if (rn != 0x1f)
-                                return new Unknown64(machInst);
-                            bool d = bits(machInst,10);
-                            if (d)
-                                return new Xpacd(machInst, rd);
-                            else
-                                return new Xpaci(machInst, rd);
-                          }
-                    }
-                    if (dm != 0 || bits(machInst, 29) != 0) {
-                        // dm !=0 and dm != 0x1
-                        return new Unknown64(machInst);
-                    }
-                    uint8_t switchVal = bits(machInst, 15, 10);
-                    switch (switchVal) {
-                      case 0x0:
-                        return new Rbit64(machInst, rdzr, rn);
-                      case 0x1:
-                        return new Rev1664(machInst, rdzr, rn);
-                      case 0x2:
-                        if (bits(machInst, 31) == 0)
-                            return new Rev64(machInst, rdzr, rn);
-                        else
-                            return new Rev3264(machInst, rdzr, rn);
-                      case 0x3:
-                        if (bits(machInst, 31) != 1)
-                            return new Unknown64(machInst);
-                        return new Rev64(machInst, rdzr, rn);
-                      case 0x4:
-                        return new Clz64(machInst, rdzr, rn);
-                      case 0x5:
-                        return new Cls64(machInst, rdzr, rn);
-                      default:
-                        return new Unknown64(machInst);
-                    }
+                    return decodeDataProcOneS(machInst);
                 }
               default:
                 GEM5_UNREACHABLE;

From 223a07031f0a0c53e72c445675d52d6921eb6e8e Mon Sep 17 00:00:00 2001
From: Giacomo Travaglini <giacomo.travaglini@arm.com>
Date: Tue, 7 Mar 2023 14:58:15 +0000
Subject: [PATCH 430/492] arch-arm: Improve debugging of CC regs accesses

As of now we are simply printing the CC reg index which is
not particularly helpful. With this patch we actually print
the (NZ|C|V) reg name.

Change-Id: Ib4b56a372b25e5bc2b6b762d2ef3ff2084097cce
Signed-off-by: Giacomo Travaglini <giacomo.travaglini@arm.com>
Reviewed-by: Richard Cooper <richard.cooper@arm.com>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/70718
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/arch/arm/regs/cc.hh | 34 +++++++++++++++++++++++-----------
 1 file changed, 23 insertions(+), 11 deletions(-)

diff --git a/src/arch/arm/regs/cc.hh b/src/arch/arm/regs/cc.hh
index ba7552799a..474e48e9e9 100644
--- a/src/arch/arm/regs/cc.hh
+++ b/src/arch/arm/regs/cc.hh
@@ -61,10 +61,31 @@ enum : RegIndex
     NumRegs
 };
 
+const char * const RegName[NumRegs] = {
+    "nz",
+    "c",
+    "v",
+    "ge",
+    "fp",
+    "zero"
+};
+
 } // namespace cc_reg
 
-inline constexpr RegClass ccRegClass(CCRegClass, CCRegClassName,
-        cc_reg::NumRegs, debug::CCRegs);
+class CCRegClassOps : public RegClassOps
+{
+  public:
+    std::string
+    regName(const RegId &id) const override
+    {
+        return cc_reg::RegName[id.index()];
+    }
+};
+
+static inline CCRegClassOps ccRegClassOps;
+
+inline constexpr RegClass ccRegClass = RegClass(CCRegClass, CCRegClassName,
+        cc_reg::NumRegs, debug::CCRegs).ops(ccRegClassOps);
 
 namespace cc_reg
 {
@@ -77,15 +98,6 @@ inline constexpr RegId
     Fp = ccRegClass[_FpIdx],
     Zero = ccRegClass[_ZeroIdx];
 
-const char * const RegName[NumRegs] = {
-    "nz",
-    "c",
-    "v",
-    "ge",
-    "fp",
-    "zero"
-};
-
 } // namespace cc_reg
 
 enum ConditionCode

From e3d2191b73581cf2bcae88ba4fa8c2f46c35efd9 Mon Sep 17 00:00:00 2001
From: Giacomo Travaglini <giacomo.travaglini@arm.com>
Date: Mon, 6 Mar 2023 19:37:39 +0000
Subject: [PATCH 431/492] arch-arm: Implement FEAT_FLAGM(2)

Change-Id: I21f1eb91ad9acb019a776a7d5edd38754571a62e
Signed-off-by: Giacomo Travaglini <giacomo.travaglini@arm.com>
Reviewed-by: Richard Cooper <richard.cooper@arm.com>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/70719
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/arch/arm/ArmISA.py                |  4 ++
 src/arch/arm/ArmSystem.py             | 16 ++++--
 src/arch/arm/insts/misc64.cc          | 21 ++++++++
 src/arch/arm/insts/misc64.hh          | 32 +++++++++++
 src/arch/arm/isa/formats/aarch64.isa  | 40 +++++++++++++-
 src/arch/arm/isa/insts/misc64.isa     | 77 +++++++++++++++++++++++++++
 src/arch/arm/isa/templates/misc64.isa | 50 +++++++++++++++++
 src/arch/arm/process.cc               |  5 ++
 src/arch/arm/regs/misc.cc             |  3 ++
 9 files changed, 244 insertions(+), 4 deletions(-)

diff --git a/src/arch/arm/ArmISA.py b/src/arch/arm/ArmISA.py
index e73046d08b..37970dce83 100644
--- a/src/arch/arm/ArmISA.py
+++ b/src/arch/arm/ArmISA.py
@@ -58,6 +58,10 @@ class ArmDefaultSERelease(ArmRelease):
         "FEAT_FCMA",
         "FEAT_JSCVT",
         "FEAT_PAuth",
+        # Armv8.4
+        "FEAT_FLAGM",
+        # Armv8.5
+        "FEAT_FLAGM2",
         # Armv9.2
         "FEAT_SME",
         # Other
diff --git a/src/arch/arm/ArmSystem.py b/src/arch/arm/ArmSystem.py
index 9e2da8e255..e08108fa07 100644
--- a/src/arch/arm/ArmSystem.py
+++ b/src/arch/arm/ArmSystem.py
@@ -85,6 +85,9 @@ class ArmExtension(ScopedEnum):
         # Armv8.4
         "FEAT_SEL2",
         "FEAT_TLBIOS",
+        "FEAT_FLAGM",
+        # Armv8.5
+        "FEAT_FLAGM2",
         # Armv9.2
         "FEAT_SME",  # Optional in Armv9.2
         # Others
@@ -164,6 +167,9 @@ class ArmDefaultRelease(Armv8):
         # Armv8.4
         "FEAT_SEL2",
         "FEAT_TLBIOS",
+        "FEAT_FLAGM",
+        # Armv8.5
+        "FEAT_FLAGM2",
         # Armv9.2
         "FEAT_SME",
     ]
@@ -194,11 +200,15 @@ class Armv83(Armv82):
 
 
 class Armv84(Armv83):
-    extensions = Armv83.extensions + ["FEAT_SEL2", "FEAT_TLBIOS"]
+    extensions = Armv83.extensions + ["FEAT_SEL2", "FEAT_TLBIOS", "FEAT_FLAGM"]
 
 
-class Armv92(Armv84):
-    extensions = Armv84.extensions + ["FEAT_SME"]
+class Armv85(Armv84):
+    extensions = Armv84.extensions + ["FEAT_FLAGM2"]
+
+
+class Armv92(Armv85):
+    extensions = Armv85.extensions + ["FEAT_SME"]
 
 
 class ArmSystem(System):
diff --git a/src/arch/arm/insts/misc64.cc b/src/arch/arm/insts/misc64.cc
index c7423d9e72..4f573fca83 100644
--- a/src/arch/arm/insts/misc64.cc
+++ b/src/arch/arm/insts/misc64.cc
@@ -54,6 +54,27 @@ ImmOp64::generateDisassembly(Addr pc, const loader::SymbolTable *symtab) const
     return ss.str();
 }
 
+std::string
+RegOp64::generateDisassembly(Addr pc, const loader::SymbolTable *symtab) const
+{
+    std::stringstream ss;
+    printMnemonic(ss, "", false);
+    printIntReg(ss, op1);
+    return ss.str();
+}
+
+std::string
+RegImmImmOp64::generateDisassembly(Addr pc, const loader::SymbolTable *symtab) const
+{
+    std::stringstream ss;
+    printMnemonic(ss, "", false);
+    printIntReg(ss, op1);
+    ccprintf(ss, "#0x%x", imm1);
+    ss << ", ";
+    ccprintf(ss, "#0x%x", imm2);
+    return ss.str();
+}
+
 std::string
 RegRegImmImmOp64::generateDisassembly(
         Addr pc, const loader::SymbolTable *symtab) const
diff --git a/src/arch/arm/insts/misc64.hh b/src/arch/arm/insts/misc64.hh
index b7b66c2674..3a67210b92 100644
--- a/src/arch/arm/insts/misc64.hh
+++ b/src/arch/arm/insts/misc64.hh
@@ -57,6 +57,38 @@ class ImmOp64 : public ArmISA::ArmStaticInst
             Addr pc, const loader::SymbolTable *symtab) const override;
 };
 
+class RegOp64 : public ArmISA::ArmStaticInst
+{
+  protected:
+    RegIndex op1;
+
+    RegOp64(const char *mnem, ArmISA::ExtMachInst _machInst,
+            OpClass __opClass, RegIndex _op1) :
+        ArmISA::ArmStaticInst(mnem, _machInst, __opClass), op1(_op1)
+    {}
+
+    std::string generateDisassembly(
+            Addr pc, const loader::SymbolTable *symtab) const override;
+};
+
+class RegImmImmOp64 : public ArmISA::ArmStaticInst
+{
+  protected:
+    RegIndex op1;
+    uint64_t imm1;
+    uint64_t imm2;
+
+    RegImmImmOp64(const char *mnem, ArmISA::ExtMachInst _machInst,
+                  OpClass __opClass, RegIndex _op1,
+                  uint64_t _imm1, uint64_t _imm2) :
+        ArmISA::ArmStaticInst(mnem, _machInst, __opClass),
+        op1(_op1), imm1(_imm1), imm2(_imm2)
+    {}
+
+    std::string generateDisassembly(
+            Addr pc, const loader::SymbolTable *symtab) const override;
+};
+
 class RegRegImmImmOp64 : public ArmISA::ArmStaticInst
 {
   protected:
diff --git a/src/arch/arm/isa/formats/aarch64.isa b/src/arch/arm/isa/formats/aarch64.isa
index 0aafa9e465..9ad2de2c72 100644
--- a/src/arch/arm/isa/formats/aarch64.isa
+++ b/src/arch/arm/isa/formats/aarch64.isa
@@ -424,6 +424,15 @@ namespace Aarch64
                         // MSR immediate: moving immediate value to selected
                         // bits of the PSTATE
                         switch (op1 << 3 | op2) {
+                          case 0x0:
+                            // CFINV
+                            return new Cfinv(machInst);
+                          case 0x1:
+                            // XAFLAG
+                            return new Xaflag(machInst);
+                          case 0x2:
+                            // AXFLAG
+                            return new Axflag(machInst);
                           case 0x3:
                             // UAO
                             return new MsrImm64(
@@ -2080,6 +2089,26 @@ namespace Aarch64
         }
     }
 
+    StaticInstPtr
+    decodeRotIntoFlags(ExtMachInst machInst)
+    {
+        RegIndex rn = (RegIndex)(uint8_t)bits(machInst, 9, 5);
+        uint8_t imm6 = bits(machInst, 20, 15);
+        uint8_t mask = bits(machInst, 3, 0);
+        return new Rmif(machInst, rn, imm6, mask);
+    }
+
+    StaticInstPtr
+    decodeEvalIntoFlags(ExtMachInst machInst)
+    {
+        RegIndex rn = (RegIndex)(uint8_t)bits(machInst, 9, 5);
+        int sz = bits(machInst, 14);
+        if (sz)
+            return new Setf16(machInst, rn);
+        else
+            return new Setf8(machInst, rn);
+    }
+
     StaticInstPtr
     decodeCondCompare(ExtMachInst machInst)
     {
@@ -2328,7 +2357,16 @@ namespace Aarch64
 
             switch (bits(machInst, 23, 22)) {
               case 0x0:
-                return decodeAddSubWithCarry(machInst);
+                switch (bits(machInst, 11, 10)) {
+                  case 0b00:
+                    return decodeAddSubWithCarry(machInst);
+                  case 0b01:
+                    return decodeRotIntoFlags(machInst);
+                  case 0b10:
+                    return decodeEvalIntoFlags(machInst);
+                  default: // 0b11
+                    return new Unknown64(machInst);
+                }
               case 0x1:
                 return decodeCondCompare(machInst);
               case 0x2:
diff --git a/src/arch/arm/isa/insts/misc64.isa b/src/arch/arm/isa/insts/misc64.isa
index abe30fce63..46d72d21c3 100644
--- a/src/arch/arm/isa/insts/misc64.isa
+++ b/src/arch/arm/isa/insts/misc64.isa
@@ -248,4 +248,81 @@ let {{
     header_output += ImmOp64Declare.subst(hltIop)
     decoder_output += SemihostConstructor64.subst(hltIop)
     exec_output += BasicExecute.subst(hltIop)
+
+    flagmCheckCode = '''
+        if (!HaveExt(xc->tcBase(), ArmExtension::FEAT_FLAGM)) {
+            return std::make_shared<UndefinedInstruction>(
+                machInst, true);
+        }
+    '''
+    cfinvCode = 'CondCodesC = ~CondCodesC'
+    cfinvIop = ArmInstObjParams("cfinv", "Cfinv", "ArmStaticInst",
+                                flagmCheckCode + cfinvCode)
+    header_output += BasicDeclare.subst(cfinvIop)
+    decoder_output += BasicConstructor64.subst(cfinvIop)
+    exec_output += BasicExecute.subst(cfinvIop)
+
+    axflagCode = '''
+        bool z = CondCodesNZ || CondCodesV;
+        bool c = CondCodesC && !CondCodesV;
+        CondCodesNZ = z; // This implies zeroing PSTATE.N
+        CondCodesC = c;
+        CondCodesV = 0;
+    '''
+    axflagIop = ArmInstObjParams("axflag", "Axflag", "ArmStaticInst",
+                                 flagmCheckCode + axflagCode)
+    header_output += BasicDeclare.subst(axflagIop)
+    decoder_output += BasicConstructor64.subst(axflagIop)
+    exec_output += BasicExecute.subst(axflagIop)
+
+    xaflagCode = '''
+        const RegVal nz = CondCodesNZ;
+        const RegVal n = !CondCodesC && !bits(nz, 0);
+        const RegVal z = CondCodesC && bits(nz, 0);
+        const RegVal c = CondCodesC || bits(nz, 0);
+        const RegVal v = !CondCodesC && bits(nz, 0);
+
+        CondCodesNZ = (n << 1) | z;
+        CondCodesC = c;
+        CondCodesV = v;
+    '''
+    xaflagIop = ArmInstObjParams("xaflag", "Xaflag", "ArmStaticInst",
+                                 flagmCheckCode + xaflagCode)
+    header_output += BasicDeclare.subst(xaflagIop)
+    decoder_output += BasicConstructor64.subst(xaflagIop)
+    exec_output += BasicExecute.subst(xaflagIop)
+
+    rmifCode = '''
+        RegVal tmp = XOp1 << imm1;
+        int nz = CondCodesNZ;
+        if (bits(imm2, 0)) CondCodesV = bits(tmp, 0);
+        if (bits(imm2, 1)) CondCodesC = bits(tmp, 1);
+        if (bits(imm2, 2)) nz = insertBits(nz, 0, bits(tmp, 2));
+        if (bits(imm2, 3)) nz = insertBits(nz, 1, bits(tmp, 3));
+
+        CondCodesNZ = nz;
+    '''
+    rmifIop = ArmInstObjParams("rmif", "Rmif", "RegImmImmOp64",
+                               flagmCheckCode + rmifCode)
+    header_output += RegImmImmOp64Declare.subst(rmifIop)
+    decoder_output += RegImmImmOp64Constructor.subst(rmifIop)
+    exec_output += BasicExecute.subst(rmifIop)
+
+    setfCode = '''
+        const int msb = %d;
+        RegVal tmp = Op1;
+        CondCodesNZ = (bits(tmp, msb) << 1) | (bits(tmp, msb, 0) ? 0 : 1);
+        CondCodesV = bits(tmp, msb) ^ bits(tmp, msb + 1);
+    '''
+    setf8Iop = ArmInstObjParams("setf8", "Setf8", "RegOp64",
+                                flagmCheckCode + setfCode % 7)
+    header_output += RegOp64Declare.subst(setf8Iop)
+    decoder_output += RegOp64Constructor.subst(setf8Iop)
+    exec_output += BasicExecute.subst(setf8Iop)
+
+    setf16Iop = ArmInstObjParams("setf16", "Setf16", "RegOp64",
+                                 flagmCheckCode + setfCode % 15)
+    header_output += RegOp64Declare.subst(setf16Iop)
+    decoder_output += RegOp64Constructor.subst(setf16Iop)
+    exec_output += BasicExecute.subst(setf16Iop)
 }};
diff --git a/src/arch/arm/isa/templates/misc64.isa b/src/arch/arm/isa/templates/misc64.isa
index af6b4c6888..a2024f713c 100644
--- a/src/arch/arm/isa/templates/misc64.isa
+++ b/src/arch/arm/isa/templates/misc64.isa
@@ -58,6 +58,56 @@ def template ImmOp64Constructor {{
     }
 }};
 
+def template RegOp64Declare {{
+class %(class_name)s : public %(base_class)s
+{
+  private:
+    %(reg_idx_arr_decl)s;
+
+  public:
+    // Constructor
+    %(class_name)s(ExtMachInst machInst, RegIndex _op1);
+
+    Fault execute(ExecContext *, trace::InstRecord *) const override;
+};
+}};
+
+def template RegOp64Constructor {{
+    %(class_name)s::%(class_name)s(ExtMachInst machInst, RegIndex _op1) :
+        %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, _op1)
+    {
+        %(set_reg_idx_arr)s;
+        %(constructor)s;
+    }
+}};
+
+def template RegImmImmOp64Declare {{
+class %(class_name)s : public %(base_class)s
+{
+  private:
+    %(reg_idx_arr_decl)s;
+
+  public:
+    // Constructor
+    %(class_name)s(ExtMachInst machInst,
+                   RegIndex _op1,
+                   uint64_t _imm1, uint64_t _imm2);
+    Fault execute(ExecContext *, trace::InstRecord *) const override;
+};
+}};
+
+def template RegImmImmOp64Constructor {{
+    %(class_name)s::%(class_name)s(ExtMachInst machInst,
+                                   RegIndex _op1,
+                                   uint64_t _imm1, uint64_t _imm2) :
+        %(base_class)s("%(mnemonic)s", machInst, %(op_class)s,
+                       _op1, _imm1, _imm2)
+    {
+        %(set_reg_idx_arr)s;
+        %(constructor)s;
+    }
+}};
+
 def template RegRegImmImmOp64Declare {{
 class %(class_name)s : public %(base_class)s
 {
diff --git a/src/arch/arm/process.cc b/src/arch/arm/process.cc
index 9b0f3b269f..b2378cc505 100644
--- a/src/arch/arm/process.cc
+++ b/src/arch/arm/process.cc
@@ -314,6 +314,11 @@ ArmProcess64::armHwcapImpl2() const
 
     uint64_t hwcap = 0;
 
+    ThreadContext *tc = system->threads[contextIds[0]];
+
+    const AA64ISAR0 isa_r0 = tc->readMiscReg(MISCREG_ID_AA64ISAR0_EL1);
+    hwcap |= (isa_r0.ts >= 2) ? Arm_Flagm2 : Arm_None;
+
     return hwcap;
 }
 
diff --git a/src/arch/arm/regs/misc.cc b/src/arch/arm/regs/misc.cc
index ec5670e647..9e633c0c84 100644
--- a/src/arch/arm/regs/misc.cc
+++ b/src/arch/arm/regs/misc.cc
@@ -3891,6 +3891,9 @@ ISA::initializeMiscRegMetadata()
           isar0_el1.rdm = release->has(ArmExtension::FEAT_RDM) ? 0x1 : 0x0;
           isar0_el1.tme = release->has(ArmExtension::TME) ? 0x1 : 0x0;
           isar0_el1.tlb = release->has(ArmExtension::FEAT_TLBIOS) ? 0x1 : 0x0;
+          isar0_el1.ts = release->has(ArmExtension::FEAT_FLAGM2) ?
+              0x2 : release->has(ArmExtension::FEAT_FLAGM) ?
+                  0x1 : 0x0;
           return isar0_el1;
       }())
       .faultRead(EL1, HCR_TRAP(tid3))

From 2a5c427c5c406aca25bfaaa40af7216f25623ffa Mon Sep 17 00:00:00 2001
From: Giacomo Travaglini <giacomo.travaglini@arm.com>
Date: Tue, 7 Mar 2023 16:59:16 +0000
Subject: [PATCH 432/492] arch-arm: Extend SCR to be 64-bit wide

Change-Id: I9928de3db61957404269d189a15a951fd6707c8a
Signed-off-by: Giacomo Travaglini <giacomo.travaglini@arm.com>
Reviewed-by: Richard Cooper <richard.cooper@arm.com>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/70720
Tested-by: kokoro <noreply+kokoro@google.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
---
 src/arch/arm/regs/misc_types.hh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/arch/arm/regs/misc_types.hh b/src/arch/arm/regs/misc_types.hh
index c139f1a38e..71fdd605ce 100644
--- a/src/arch/arm/regs/misc_types.hh
+++ b/src/arch/arm/regs/misc_types.hh
@@ -345,7 +345,7 @@ namespace ArmISA
         Bitfield<0>  cp0;
     EndBitUnion(NSACR)
 
-    BitUnion32(SCR)
+    BitUnion64(SCR)
         Bitfield<21> fien;
         Bitfield<20> nmea;
         Bitfield<19> ease;

From 1629ee71c7794f97de4d9e24fd0cf339576fadea Mon Sep 17 00:00:00 2001
From: Giacomo Travaglini <giacomo.travaglini@arm.com>
Date: Tue, 7 Mar 2023 16:13:45 +0000
Subject: [PATCH 433/492] arch-arm: Implement FEAT_RNG

Change-Id: I9d60d249172ef4bbaf5d9b38ef279eff344b80d8
Signed-off-by: Giacomo Travaglini <giacomo.travaglini@arm.com>
Reviewed-by: Richard Cooper <richard.cooper@arm.com>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/70721
Tested-by: kokoro <noreply+kokoro@google.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
---
 src/arch/arm/ArmSystem.py       |  8 +++++++-
 src/arch/arm/isa.cc             | 16 ++++++++++++++++
 src/arch/arm/process.cc         |  1 +
 src/arch/arm/regs/misc.cc       | 32 ++++++++++++++++++++++++++++++++
 src/arch/arm/regs/misc.hh       |  7 +++++++
 src/arch/arm/regs/misc_types.hh |  1 +
 6 files changed, 64 insertions(+), 1 deletion(-)

diff --git a/src/arch/arm/ArmSystem.py b/src/arch/arm/ArmSystem.py
index e08108fa07..c3b3cf6354 100644
--- a/src/arch/arm/ArmSystem.py
+++ b/src/arch/arm/ArmSystem.py
@@ -88,6 +88,8 @@ class ArmExtension(ScopedEnum):
         "FEAT_FLAGM",
         # Armv8.5
         "FEAT_FLAGM2",
+        "FEAT_RNG",
+        "FEAT_RNG_TRAP",
         # Armv9.2
         "FEAT_SME",  # Optional in Armv9.2
         # Others
@@ -204,7 +206,11 @@ class Armv84(Armv83):
 
 
 class Armv85(Armv84):
-    extensions = Armv84.extensions + ["FEAT_FLAGM2"]
+    extensions = Armv84.extensions + [
+        "FEAT_FLAGM2",
+        "FEAT_RNG",
+        "FEAT_RNG_TRAP",
+    ]
 
 
 class Armv92(Armv85):
diff --git a/src/arch/arm/isa.cc b/src/arch/arm/isa.cc
index 9c8e282e20..02129266cf 100644
--- a/src/arch/arm/isa.cc
+++ b/src/arch/arm/isa.cc
@@ -49,6 +49,7 @@
 #include "arch/arm/utility.hh"
 #include "arch/generic/decoder.hh"
 #include "base/cprintf.hh"
+#include "base/random.hh"
 #include "cpu/base.hh"
 #include "cpu/checker/cpu.hh"
 #include "cpu/reg_class.hh"
@@ -596,6 +597,21 @@ ISA::readMiscReg(RegIndex idx)
       case MISCREG_HIFAR: // alias for secure IFAR
         return readMiscRegNoEffect(MISCREG_IFAR_S);
 
+      case MISCREG_RNDR:
+        tc->setReg(cc_reg::Nz, (RegVal)0);
+        tc->setReg(cc_reg::C, (RegVal)0);
+        tc->setReg(cc_reg::V, (RegVal)0);
+        return random_mt.random<RegVal>();
+      case MISCREG_RNDRRS:
+        tc->setReg(cc_reg::Nz, (RegVal)0);
+        tc->setReg(cc_reg::C, (RegVal)0);
+        tc->setReg(cc_reg::V, (RegVal)0);
+        // Note: we are not reseeding
+        // The random number generator already has an hardcoded
+        // seed for the sake of determinism. There is no point
+        // in simulating non-determinism here
+        return random_mt.random<RegVal>();
+
       // Generic Timer registers
       case MISCREG_CNTFRQ ... MISCREG_CNTVOFF:
       case MISCREG_CNTFRQ_EL0 ... MISCREG_CNTVOFF_EL2:
diff --git a/src/arch/arm/process.cc b/src/arch/arm/process.cc
index b2378cc505..fda9415356 100644
--- a/src/arch/arm/process.cc
+++ b/src/arch/arm/process.cc
@@ -318,6 +318,7 @@ ArmProcess64::armHwcapImpl2() const
 
     const AA64ISAR0 isa_r0 = tc->readMiscReg(MISCREG_ID_AA64ISAR0_EL1);
     hwcap |= (isa_r0.ts >= 2) ? Arm_Flagm2 : Arm_None;
+    hwcap |= (isa_r0.rndr >= 1) ? Arm_Rng : Arm_None;
 
     return hwcap;
 }
diff --git a/src/arch/arm/regs/misc.cc b/src/arch/arm/regs/misc.cc
index 9e633c0c84..0e92e3d2e1 100644
--- a/src/arch/arm/regs/misc.cc
+++ b/src/arch/arm/regs/misc.cc
@@ -1057,6 +1057,8 @@ std::unordered_map<MiscRegNum64, MiscRegIndex> miscRegNumToIdx{
     { MiscRegNum64(3, 2, 0, 0, 0), MISCREG_CSSELR_EL1 },
     { MiscRegNum64(3, 3, 0, 0, 1), MISCREG_CTR_EL0 },
     { MiscRegNum64(3, 3, 0, 0, 7), MISCREG_DCZID_EL0 },
+    { MiscRegNum64(3, 3, 2, 4, 0), MISCREG_RNDR },
+    { MiscRegNum64(3, 3, 2, 4, 1), MISCREG_RNDRRS },
     { MiscRegNum64(3, 3, 4, 2, 0), MISCREG_NZCV },
     { MiscRegNum64(3, 3, 4, 2, 1), MISCREG_DAIF },
     { MiscRegNum64(3, 3, 4, 2, 2), MISCREG_SVCR },
@@ -1999,6 +2001,20 @@ faultImpdefUnimplEL1(const MiscRegLUTEntry &entry,
     }
 }
 
+Fault
+faultRng(const MiscRegLUTEntry &entry,
+    ThreadContext *tc, const MiscRegOp64 &inst)
+{
+    const SCR scr = tc->readMiscReg(MISCREG_SCR_EL3);
+    if (HaveExt(tc, ArmExtension::FEAT_RNG_TRAP) && scr.trndr) {
+        return inst.generateTrap(EL3);
+    } else if (!HaveExt(tc, ArmExtension::FEAT_RNG)) {
+        return inst.undefined();
+    } else {
+        return NoFault;
+    }
+}
+
 }
 
 MiscRegIndex
@@ -3894,6 +3910,7 @@ ISA::initializeMiscRegMetadata()
           isar0_el1.ts = release->has(ArmExtension::FEAT_FLAGM2) ?
               0x2 : release->has(ArmExtension::FEAT_FLAGM) ?
                   0x1 : 0x0;
+          isar0_el1.rndr = release->has(ArmExtension::FEAT_RNG) ? 0x1 : 0x0;
           return isar0_el1;
       }())
       .faultRead(EL1, HCR_TRAP(tid3))
@@ -5400,6 +5417,21 @@ ISA::initializeMiscRegMetadata()
     InitReg(MISCREG_MPAMSM_EL1)
         .allPrivileges().exceptUserMode();
 
+    InitReg(MISCREG_RNDR)
+        .faultRead(EL0, faultRng)
+        .faultRead(EL1, faultRng)
+        .faultRead(EL2, faultRng)
+        .faultRead(EL3, faultRng)
+        .unverifiable()
+        .allPrivileges().writes(0);
+    InitReg(MISCREG_RNDRRS)
+        .faultRead(EL0, faultRng)
+        .faultRead(EL1, faultRng)
+        .faultRead(EL2, faultRng)
+        .faultRead(EL3, faultRng)
+        .unverifiable()
+        .allPrivileges().writes(0);
+
     // Dummy registers
     InitReg(MISCREG_NOP)
       .allPrivileges();
diff --git a/src/arch/arm/regs/misc.hh b/src/arch/arm/regs/misc.hh
index c43cf74489..429fcb59cc 100644
--- a/src/arch/arm/regs/misc.hh
+++ b/src/arch/arm/regs/misc.hh
@@ -1091,6 +1091,10 @@ namespace ArmISA
         MISCREG_TPIDR2_EL0,
         MISCREG_MPAMSM_EL1,
 
+        // FEAT_RNG
+        MISCREG_RNDR,
+        MISCREG_RNDRRS,
+
         // NUM_PHYS_MISCREGS specifies the number of actual physical
         // registers, not considering the following pseudo-registers
         // (dummy registers), like MISCREG_UNKNOWN, MISCREG_IMPDEF_UNIMPL.
@@ -2760,6 +2764,9 @@ namespace ArmISA
         "tpidr2_el0",
         "mpamsm_el1",
 
+        "rndr",
+        "rndrrs",
+
         "num_phys_regs",
 
         // Dummy registers
diff --git a/src/arch/arm/regs/misc_types.hh b/src/arch/arm/regs/misc_types.hh
index 71fdd605ce..214d4180d3 100644
--- a/src/arch/arm/regs/misc_types.hh
+++ b/src/arch/arm/regs/misc_types.hh
@@ -346,6 +346,7 @@ namespace ArmISA
     EndBitUnion(NSACR)
 
     BitUnion64(SCR)
+        Bitfield<40> trndr;
         Bitfield<21> fien;
         Bitfield<20> nmea;
         Bitfield<19> ease;

From e005e6f250536c751fc2d4718e0d3af0bc547583 Mon Sep 17 00:00:00 2001
From: Giacomo Travaglini <giacomo.travaglini@arm.com>
Date: Wed, 8 Mar 2023 07:59:35 +0000
Subject: [PATCH 434/492] arch-arm: Implement trapping of SME registers

Change-Id: Ic5bcc79a535c928265fbc1db1cd0c85ba1a1b152
Signed-off-by: Giacomo Travaglini <giacomo.travaglini@arm.com>
Reviewed-by: Richard Cooper <richard.cooper@arm.com>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/70722
Tested-by: kokoro <noreply+kokoro@google.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
---
 src/arch/arm/regs/misc.cc | 81 ++++++++++++++++++++++++++++++++++++++-
 1 file changed, 80 insertions(+), 1 deletion(-)

diff --git a/src/arch/arm/regs/misc.cc b/src/arch/arm/regs/misc.cc
index 0e92e3d2e1..56644e9170 100644
--- a/src/arch/arm/regs/misc.cc
+++ b/src/arch/arm/regs/misc.cc
@@ -2001,6 +2001,68 @@ faultImpdefUnimplEL1(const MiscRegLUTEntry &entry,
     }
 }
 
+Fault
+faultEsm(const MiscRegLUTEntry &entry,
+    ThreadContext *tc, const MiscRegOp64 &inst)
+{
+    const CPTR cptr_el3 = tc->readMiscReg(MISCREG_CPTR_EL3);
+    if (ArmSystem::haveEL(tc, EL3) && !cptr_el3.esm) {
+        return inst.generateTrap(EL3, ExceptionClass::TRAPPED_SME, 0);
+    } else {
+        return NoFault;
+    }
+}
+
+Fault
+faultTsmSmen(const MiscRegLUTEntry &entry,
+    ThreadContext *tc, const MiscRegOp64 &inst)
+{
+    const HCR hcr_el2 = tc->readMiscReg(MISCREG_HCR_EL2);
+    const CPTR cptr_el2 = tc->readMiscReg(MISCREG_CPTR_EL2);
+    const bool el2_enabled = EL2Enabled(tc);
+    if (el2_enabled && !hcr_el2.e2h && cptr_el2.tsm) {
+        return inst.generateTrap(EL2, ExceptionClass::TRAPPED_SME, 0);
+    } else if (el2_enabled && hcr_el2.e2h && !(cptr_el2.smen & 0b1)) {
+        return inst.generateTrap(EL2, ExceptionClass::TRAPPED_SME, 0);
+    } else {
+        return faultEsm(entry, tc, inst);
+    }
+}
+
+Fault
+faultSmenEL1(const MiscRegLUTEntry &entry,
+    ThreadContext *tc, const MiscRegOp64 &inst)
+{
+    const CPACR cpacr = tc->readMiscReg(MISCREG_CPACR_EL1);
+    if (!(cpacr.smen & 0b1)) {
+        return inst.generateTrap(EL1, ExceptionClass::TRAPPED_SME, 0);
+    } else {
+        return faultTsmSmen(entry, tc, inst);
+    }
+}
+
+Fault
+faultSmenEL0(const MiscRegLUTEntry &entry,
+    ThreadContext *tc, const MiscRegOp64 &inst)
+{
+    const bool el2_enabled = EL2Enabled(tc);
+    const HCR hcr = tc->readMiscRegNoEffect(MISCREG_HCR_EL2);
+    const bool in_host = hcr.e2h && hcr.tge;
+
+    const CPACR cpacr = tc->readMiscReg(MISCREG_CPACR_EL1);
+    const CPTR cptr_el2 = tc->readMiscReg(MISCREG_CPTR_EL2);
+    if (!(el2_enabled && in_host) && cpacr.smen != 0b11) {
+        if (el2_enabled && hcr.tge)
+            return inst.generateTrap(EL2, ExceptionClass::TRAPPED_SME, 0);
+        else
+            return inst.generateTrap(EL1, ExceptionClass::TRAPPED_SME, 0);
+    } else if (el2_enabled && in_host && cptr_el2.smen != 0b11) {
+        return inst.generateTrap(EL2, ExceptionClass::TRAPPED_SME, 0);
+    } else {
+        return faultTsmSmen(entry, tc, inst);
+    }
+}
+
 Fault
 faultRng(const MiscRegLUTEntry &entry,
     ThreadContext *tc, const MiscRegOp64 &inst)
@@ -5348,6 +5410,7 @@ ISA::initializeMiscRegMetadata()
             smfr0_el1.fa64 = 0x1;
             return smfr0_el1;
         }())
+        .faultRead(EL1, HCR_TRAP(tid3))
         .allPrivileges().exceptUserMode().writes(0);
     InitReg(MISCREG_SVCR)
         .res0([](){
@@ -5356,6 +5419,10 @@ ISA::initializeMiscRegMetadata()
             svcr_mask.za = 1;
             return ~svcr_mask;
         }())
+        .fault(EL0, faultSmenEL0)
+        .fault(EL1, faultSmenEL1)
+        .fault(EL2, faultTsmSmen)
+        .fault(EL3, faultEsm)
         .allPrivileges();
     InitReg(MISCREG_SMIDR_EL1)
         .reset([](){
@@ -5365,11 +5432,17 @@ ISA::initializeMiscRegMetadata()
             smidr_el1.implementer = 0x41;
             return smidr_el1;
         }())
+        .faultRead(EL1, HCR_TRAP(tid1))
         .allPrivileges().exceptUserMode().writes(0);
     InitReg(MISCREG_SMPRI_EL1)
         .res0(mask(63, 4))
-        .allPrivileges().exceptUserMode().reads(1);
+        .fault(EL1, faultEsm)
+        .fault(EL2, faultEsm)
+        .fault(EL3, faultEsm)
+        .allPrivileges().exceptUserMode();
     InitReg(MISCREG_SMPRIMAP_EL2)
+        .fault(EL2, faultEsm)
+        .fault(EL3, faultEsm)
         .hyp().mon();
     InitReg(MISCREG_SMCR_EL3)
         .reset([this](){
@@ -5383,6 +5456,7 @@ ISA::initializeMiscRegMetadata()
             smcr_el3.len = smeVL - 1;
             return smcr_el3;
         }())
+        .fault(EL3, faultEsm)
         .mon();
     InitReg(MISCREG_SMCR_EL2)
         .reset([this](){
@@ -5396,6 +5470,8 @@ ISA::initializeMiscRegMetadata()
             smcr_el2.len = smeVL - 1;
             return smcr_el2;
         }())
+        .fault(EL2, faultTsmSmen)
+        .fault(EL3, faultEsm)
         .hyp().mon();
     InitReg(MISCREG_SMCR_EL12)
         .allPrivileges().exceptUserMode();
@@ -5411,6 +5487,9 @@ ISA::initializeMiscRegMetadata()
             smcr_el1.len = smeVL - 1;
             return smcr_el1;
         }())
+        .fault(EL1, faultSmenEL1)
+        .fault(EL2, faultTsmSmen)
+        .fault(EL3, faultEsm)
         .allPrivileges().exceptUserMode();
     InitReg(MISCREG_TPIDR2_EL0)
         .allPrivileges();

From ae115fcfd5b4180fe9bb0273f47e457f64e036b5 Mon Sep 17 00:00:00 2001
From: Giacomo Travaglini <giacomo.travaglini@arm.com>
Date: Tue, 7 Mar 2023 22:33:31 +0000
Subject: [PATCH 435/492] arch-arm: Implement FEAT_IDST

Change-Id: I3cabcfdb10f4eefaf2ab039376d840cc4c54609a
Signed-off-by: Giacomo Travaglini <giacomo.travaglini@arm.com>
Reviewed-by: Richard Cooper <richard.cooper@arm.com>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/70723
Tested-by: kokoro <noreply+kokoro@google.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
---
 src/arch/arm/ArmSystem.py |  9 +++++-
 src/arch/arm/regs/misc.cc | 67 +++++++++++++++++++++++++++++----------
 2 files changed, 59 insertions(+), 17 deletions(-)

diff --git a/src/arch/arm/ArmSystem.py b/src/arch/arm/ArmSystem.py
index c3b3cf6354..b826f0dec7 100644
--- a/src/arch/arm/ArmSystem.py
+++ b/src/arch/arm/ArmSystem.py
@@ -86,6 +86,7 @@ class ArmExtension(ScopedEnum):
         "FEAT_SEL2",
         "FEAT_TLBIOS",
         "FEAT_FLAGM",
+        "FEAT_IDST",
         # Armv8.5
         "FEAT_FLAGM2",
         "FEAT_RNG",
@@ -170,6 +171,7 @@ class ArmDefaultRelease(Armv8):
         "FEAT_SEL2",
         "FEAT_TLBIOS",
         "FEAT_FLAGM",
+        "FEAT_IDST",
         # Armv8.5
         "FEAT_FLAGM2",
         # Armv9.2
@@ -202,7 +204,12 @@ class Armv83(Armv82):
 
 
 class Armv84(Armv83):
-    extensions = Armv83.extensions + ["FEAT_SEL2", "FEAT_TLBIOS", "FEAT_FLAGM"]
+    extensions = Armv83.extensions + [
+        "FEAT_SEL2",
+        "FEAT_TLBIOS",
+        "FEAT_FLAGM",
+        "FEAT_IDST",
+    ]
 
 
 class Armv85(Armv84):
diff --git a/src/arch/arm/regs/misc.cc b/src/arch/arm/regs/misc.cc
index 56644e9170..53e92687b9 100644
--- a/src/arch/arm/regs/misc.cc
+++ b/src/arch/arm/regs/misc.cc
@@ -2077,6 +2077,22 @@ faultRng(const MiscRegLUTEntry &entry,
     }
 }
 
+Fault
+faultIdst(const MiscRegLUTEntry &entry,
+    ThreadContext *tc, const MiscRegOp64 &inst)
+{
+    if (HaveExt(tc, ArmExtension::FEAT_IDST)) {
+        const HCR hcr = tc->readMiscReg(MISCREG_HCR_EL2);
+        if (EL2Enabled(tc) && hcr.tge) {
+            return inst.generateTrap(EL2);
+        } else {
+            return inst.generateTrap(EL1);
+        }
+    } else {
+        return inst.undefined();
+    }
+}
+
 }
 
 MiscRegIndex
@@ -3828,6 +3844,7 @@ ISA::initializeMiscRegMetadata()
     // AArch64 registers (Op0=1,3);
     InitReg(MISCREG_MIDR_EL1)
       .allPrivileges().exceptUserMode().writes(0)
+      .faultRead(EL0, faultIdst)
       .mapsTo(MISCREG_MIDR);
     InitReg(MISCREG_MPIDR_EL1)
       .allPrivileges().exceptUserMode().writes(0)
@@ -3923,34 +3940,40 @@ ISA::initializeMiscRegMetadata()
           return pfr0_el1;
       }())
       .unserialize(0)
+      .faultRead(EL0, faultIdst)
       .faultRead(EL1, HCR_TRAP(tid3))
-      .allPrivileges().exceptUserMode().writes(0);
+      .allPrivileges().writes(0);
     InitReg(MISCREG_ID_AA64PFR1_EL1)
       .reset(release->has(ArmExtension::FEAT_SME) ?
           0x1 << 24 : 0)
       .unserialize(0)
+      .faultRead(EL0, faultIdst)
       .faultRead(EL1, HCR_TRAP(tid3))
-      .allPrivileges().exceptUserMode().writes(0);
+      .allPrivileges().writes(0);
     InitReg(MISCREG_ID_AA64DFR0_EL1)
       .reset([p](){
           AA64DFR0 dfr0_el1 = p.id_aa64dfr0_el1;
           dfr0_el1.pmuver = p.pmu ? 1 : 0; // Enable PMUv3
           return dfr0_el1;
       }())
+      .faultRead(EL0, faultIdst)
       .faultRead(EL1, HCR_TRAP(tid3))
-      .allPrivileges().exceptUserMode().writes(0);
+      .allPrivileges().writes(0);
     InitReg(MISCREG_ID_AA64DFR1_EL1)
       .reset(p.id_aa64dfr1_el1)
+      .faultRead(EL0, faultIdst)
       .faultRead(EL1, HCR_TRAP(tid3))
-      .allPrivileges().exceptUserMode().writes(0);
+      .allPrivileges().writes(0);
     InitReg(MISCREG_ID_AA64AFR0_EL1)
       .reset(p.id_aa64afr0_el1)
+      .faultRead(EL0, faultIdst)
       .faultRead(EL1, HCR_TRAP(tid3))
-      .allPrivileges().exceptUserMode().writes(0);
+      .allPrivileges().writes(0);
     InitReg(MISCREG_ID_AA64AFR1_EL1)
       .reset(p.id_aa64afr1_el1)
+      .faultRead(EL0, faultIdst)
       .faultRead(EL1, HCR_TRAP(tid3))
-      .allPrivileges().exceptUserMode().writes(0);
+      .allPrivileges().writes(0);
     InitReg(MISCREG_ID_AA64ISAR0_EL1)
       .reset([p,release=release](){
           AA64ISAR0 isar0_el1 = p.id_aa64isar0_el1;
@@ -3975,8 +3998,9 @@ ISA::initializeMiscRegMetadata()
           isar0_el1.rndr = release->has(ArmExtension::FEAT_RNG) ? 0x1 : 0x0;
           return isar0_el1;
       }())
+      .faultRead(EL0, faultIdst)
       .faultRead(EL1, HCR_TRAP(tid3))
-      .allPrivileges().exceptUserMode().writes(0);
+      .allPrivileges().writes(0);
     InitReg(MISCREG_ID_AA64ISAR1_EL1)
       .reset([p,release=release](){
           AA64ISAR1 isar1_el1 = p.id_aa64isar1_el1;
@@ -3986,8 +4010,9 @@ ISA::initializeMiscRegMetadata()
           isar1_el1.gpa = release->has(ArmExtension::FEAT_PAuth) ? 0x1 : 0x0;
           return isar1_el1;
       }())
+      .faultRead(EL0, faultIdst)
       .faultRead(EL1, HCR_TRAP(tid3))
-      .allPrivileges().exceptUserMode().writes(0);
+      .allPrivileges().writes(0);
     InitReg(MISCREG_ID_AA64MMFR0_EL1)
       .reset([p,asidbits=haveLargeAsid64,parange=physAddrRange](){
           AA64MMFR0 mmfr0_el1 = p.id_aa64mmfr0_el1;
@@ -3995,8 +4020,9 @@ ISA::initializeMiscRegMetadata()
           mmfr0_el1.parange = encodePhysAddrRange64(parange);
           return mmfr0_el1;
       }())
+      .faultRead(EL0, faultIdst)
       .faultRead(EL1, HCR_TRAP(tid3))
-      .allPrivileges().exceptUserMode().writes(0);
+      .allPrivileges().writes(0);
     InitReg(MISCREG_ID_AA64MMFR1_EL1)
       .reset([p,release=release](){
           AA64MMFR1 mmfr1_el1 = p.id_aa64mmfr1_el1;
@@ -4006,17 +4032,20 @@ ISA::initializeMiscRegMetadata()
           mmfr1_el1.pan = release->has(ArmExtension::FEAT_PAN) ? 0x1 : 0x0;
           return mmfr1_el1;
       }())
+      .faultRead(EL0, faultIdst)
       .faultRead(EL1, HCR_TRAP(tid3))
-      .allPrivileges().exceptUserMode().writes(0);
+      .allPrivileges().writes(0);
     InitReg(MISCREG_ID_AA64MMFR2_EL1)
       .reset([p,release=release](){
           AA64MMFR2 mmfr2_el1 = p.id_aa64mmfr2_el1;
           mmfr2_el1.uao = release->has(ArmExtension::FEAT_UAO) ? 0x1 : 0x0;
           mmfr2_el1.varange = release->has(ArmExtension::FEAT_LVA) ? 0x1 : 0x0;
+          mmfr2_el1.ids = release->has(ArmExtension::FEAT_IDST) ? 0x1 : 0x0;
           return mmfr2_el1;
       }())
+      .faultRead(EL0, faultIdst)
       .faultRead(EL1, HCR_TRAP(tid3))
-      .allPrivileges().exceptUserMode().writes(0);
+      .allPrivileges().writes(0);
 
     InitReg(MISCREG_APDAKeyHi_EL1)
       .fault(EL1, faultPauthEL1)
@@ -4060,14 +4089,17 @@ ISA::initializeMiscRegMetadata()
       .allPrivileges().exceptUserMode();
 
     InitReg(MISCREG_CCSIDR_EL1)
+      .faultRead(EL0, faultIdst)
       .faultRead(EL1, HCR_TRAP(tid2))
-      .allPrivileges().exceptUserMode().writes(0);
+      .allPrivileges().writes(0);
     InitReg(MISCREG_CLIDR_EL1)
+      .faultRead(EL0, faultIdst)
       .faultRead(EL1, HCR_TRAP(tid2))
-      .allPrivileges().exceptUserMode().writes(0);
+      .allPrivileges().writes(0);
     InitReg(MISCREG_AIDR_EL1)
+      .faultRead(EL0, faultIdst)
       .faultRead(EL1, HCR_TRAP(tid1))
-      .allPrivileges().exceptUserMode().writes(0);
+      .allPrivileges().writes(0);
     InitReg(MISCREG_CSSELR_EL1)
       .allPrivileges().exceptUserMode()
       .fault(EL1, HCR_TRAP(tid2))
@@ -5371,6 +5403,7 @@ ISA::initializeMiscRegMetadata()
 
     // SVE
     InitReg(MISCREG_ID_AA64ZFR0_EL1)
+        .faultRead(EL0, faultIdst)
         .faultRead(EL1, HCR_TRAP(tid3))
         .allPrivileges().exceptUserMode().writes(0);
     InitReg(MISCREG_ZCR_EL3)
@@ -5410,8 +5443,9 @@ ISA::initializeMiscRegMetadata()
             smfr0_el1.fa64 = 0x1;
             return smfr0_el1;
         }())
+        .faultRead(EL0, faultIdst)
         .faultRead(EL1, HCR_TRAP(tid3))
-        .allPrivileges().exceptUserMode().writes(0);
+        .allPrivileges().writes(0);
     InitReg(MISCREG_SVCR)
         .res0([](){
             SVCR svcr_mask = 0;
@@ -5432,8 +5466,9 @@ ISA::initializeMiscRegMetadata()
             smidr_el1.implementer = 0x41;
             return smidr_el1;
         }())
+        .faultRead(EL0, faultIdst)
         .faultRead(EL1, HCR_TRAP(tid1))
-        .allPrivileges().exceptUserMode().writes(0);
+        .allPrivileges().writes(0);
     InitReg(MISCREG_SMPRI_EL1)
         .res0(mask(63, 4))
         .fault(EL1, faultEsm)

From 3787ab5b200d4a60459e6f9fa8b7688672a70591 Mon Sep 17 00:00:00 2001
From: Giacomo Travaglini <giacomo.travaglini@arm.com>
Date: Thu, 13 Apr 2023 10:52:06 +0100
Subject: [PATCH 436/492] arch-arm: Rename AdvSIMD instruction pool

The decoding function was wrongly named decodeNeon3SameExtra,
referring to the "AdvSIMD three same Extra" instruction pool

This might be an old name as I can only find the
"AdvSIMD *scalar* three same Extra" in the Arm arm. The
encoding space reserved to the pool bears the
"Advanced SIMD three-register extension" name; we
therefore rename the function to decodeNeon3RegExtension

Change-Id: I056da8f0c7808935d12a4b05490d30654178071f
Signed-off-by: Giacomo Travaglini <giacomo.travaglini@arm.com>
Reviewed-by: Richard Cooper <richard.cooper@arm.com>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/70724
Tested-by: kokoro <noreply+kokoro@google.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
---
 src/arch/arm/isa/formats/aarch64.isa | 2 +-
 src/arch/arm/isa/formats/neon64.isa  | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/arch/arm/isa/formats/aarch64.isa b/src/arch/arm/isa/formats/aarch64.isa
index 9ad2de2c72..47d509e808 100644
--- a/src/arch/arm/isa/formats/aarch64.isa
+++ b/src/arch/arm/isa/formats/aarch64.isa
@@ -2461,7 +2461,7 @@ namespace Aarch64
                 return new Unknown64(machInst);
             }
         } else if (bits(machInst, 15) == 1) {
-            return decodeNeon3SameExtra<DecoderFeatures>(machInst);
+            return decodeNeon3RegExtension<DecoderFeatures>(machInst);
         } else if (bits(machInst, 10) == 1) {
             if (bits(machInst, 23, 22))
                 return new Unknown64(machInst);
diff --git a/src/arch/arm/isa/formats/neon64.isa b/src/arch/arm/isa/formats/neon64.isa
index 72b7e28d42..c200da74a8 100644
--- a/src/arch/arm/isa/formats/neon64.isa
+++ b/src/arch/arm/isa/formats/neon64.isa
@@ -39,9 +39,9 @@ namespace Aarch64
     // AdvSIMD three same
     template <typename DecoderFeatures>
     StaticInstPtr decodeNeon3Same(ExtMachInst machInst);
-    // AdvSIMD three same Extra
+    // AdvSIMD three register extension
     template <typename DecoderFeatures>
-    StaticInstPtr decodeNeon3SameExtra(ExtMachInst machInst);
+    StaticInstPtr decodeNeon3RegExtension(ExtMachInst machInst);
     // AdvSIMD three different
     inline StaticInstPtr decodeNeon3Diff(ExtMachInst machInst);
     // AdvSIMD two-reg misc
@@ -507,7 +507,7 @@ namespace Aarch64
 
     template <typename DecoderFeatures>
     StaticInstPtr
-    decodeNeon3SameExtra(ExtMachInst machInst)
+    decodeNeon3RegExtension(ExtMachInst machInst)
     {
         uint8_t q      = bits(machInst, 30);
         uint8_t size   = bits(machInst, 23, 22);

From 7b91521c605be23ea512d4e59b6cd1ac95a087a0 Mon Sep 17 00:00:00 2001
From: Giacomo Travaglini <giacomo.travaglini@arm.com>
Date: Fri, 14 Apr 2023 11:02:07 +0100
Subject: [PATCH 437/492] arch-arm: Define a AA64ZFR0 data type

Change-Id: I6b0dcf0c1882f356783934f625c2bc3a25fbb885
Signed-off-by: Giacomo Travaglini <giacomo.travaglini@arm.com>
Reviewed-by: Richard Cooper <richard.cooper@arm.com>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/70725
Tested-by: kokoro <noreply+kokoro@google.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
---
 src/arch/arm/regs/misc_types.hh | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/src/arch/arm/regs/misc_types.hh b/src/arch/arm/regs/misc_types.hh
index 214d4180d3..b7a1207cf5 100644
--- a/src/arch/arm/regs/misc_types.hh
+++ b/src/arch/arm/regs/misc_types.hh
@@ -203,6 +203,19 @@ namespace ArmISA
         Bitfield<3, 0> el0;
     EndBitUnion(AA64PFR0)
 
+    BitUnion64(AA64ZFR0)
+        Bitfield<59, 56> f64mm;
+        Bitfield<55, 52> f32mm;
+        Bitfield<47, 44> i8mm;
+        Bitfield<43, 40> sm4;
+        Bitfield<35, 32> sha3;
+        Bitfield<27, 24> b16b16;
+        Bitfield<23, 20> bf16;
+        Bitfield<19, 16> bitPerm;
+        Bitfield<7, 4> aes;
+        Bitfield<3, 0> sveVer;
+    EndBitUnion(AA64ZFR0)
+
     BitUnion64(AA64SMFR0)
         Bitfield<63> fa64;
         Bitfield<59, 56> smEver;

From 332ef131dc0d8c71dc76bf9d0726390d0ec3abbd Mon Sep 17 00:00:00 2001
From: Luming Wang <wlm199558@126.com>
Date: Sat, 20 May 2023 17:29:59 +0800
Subject: [PATCH 438/492] scons: fix build failed caused by Non-ASCII directory
 path

This patch addresses the issue of gem5 failing to build when
the build directory path contains non-ASCII characters.
The previous patches[1] that attempted to fix this issue
became ineffective after the upgrade of Python and pybind11
to new versions. This new patch manually sets the locale in
marshal.py based on the `LC_CTYPE` environment variable,
providing a comprehensive solution that works with Non-ASCII
build directory paths.

[1] https://gem5-review.googlesource.com/c/public/gem5/+/58369

Change-Id: I3ad28b6ee52fd347d2fe71f279baab629e88d12c
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/70818
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
---
 build_tools/marshal.py            | 6 ++++++
 site_scons/gem5_scons/defaults.py | 1 +
 2 files changed, 7 insertions(+)

diff --git a/build_tools/marshal.py b/build_tools/marshal.py
index 18afe2ca52..4a1522f0b8 100644
--- a/build_tools/marshal.py
+++ b/build_tools/marshal.py
@@ -48,6 +48,7 @@ interpretters, and so the exact same interpretter should be used both to run
 this script, and to read in and execute the marshalled code later.
 """
 
+import locale
 import marshal
 import sys
 import zlib
@@ -65,6 +66,11 @@ if len(sys.argv) < 4:
     print(f"Usage: {sys.argv[0]} CPP PY MODPATH ABSPATH", file=sys.stderr)
     sys.exit(1)
 
+# Set the Python's locale settings manually based on the `LC_CTYPE`
+# environment variable
+if "LC_CTYPE" in os.environ:
+    locale.setlocale(locale.LC_CTYPE, os.environ["LC_CTYPE"])
+
 _, cpp, python, modpath, abspath = sys.argv
 
 with open(python, "r") as f:
diff --git a/site_scons/gem5_scons/defaults.py b/site_scons/gem5_scons/defaults.py
index a07b7ffa4b..996cfd495f 100644
--- a/site_scons/gem5_scons/defaults.py
+++ b/site_scons/gem5_scons/defaults.py
@@ -66,6 +66,7 @@ def EnvDefaults(env):
             "GEM5PY_LINKFLAGS_EXTRA",
             "LINKFLAGS_EXTRA",
             "LANG",
+            "LC_CTYPE",
         ]
     )
 

From 4d18546bfb24570ada5d524260c68d2eb3b92a63 Mon Sep 17 00:00:00 2001
From: Matthew Poremba <matthew.poremba@amd.com>
Date: Mon, 22 May 2023 16:03:58 -0500
Subject: [PATCH 439/492] dev-amdgpu: Update SDMA checkpointing

Patch https://gem5-review.googlesource.com/c/public/gem5/+/70040 added
support for a variable number of SDMA engines to support newer GPU
models. As part of this an SDMA IDs map was added to map from SDMA ID
number to the SDMA SimObject pointer. In order to get the correct
pointer in unserialize now, we need to store the ID in the checkpoint
and use that to index the new map. We can't simply assign using the loop
variable as the SDMAs might not be in order in the checkpoint and
additionally the checkpoint contains both the gfx and page offset for
the SDMA engines, so each SDMA is inserted into the SDMA offset map
(sdmaEngs) twice.

Change-Id: I08e9a8d785f467b6eebff8ab0a9336851c87258d
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/70878
Maintainer: Matt Sinclair <mattdsinclair@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com>
---
 src/dev/amdgpu/amdgpu_device.cc | 7 ++++---
 src/dev/amdgpu/sdma_engine.hh   | 1 +
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/dev/amdgpu/amdgpu_device.cc b/src/dev/amdgpu/amdgpu_device.cc
index f58d1f7242..7037e6fb1c 100644
--- a/src/dev/amdgpu/amdgpu_device.cc
+++ b/src/dev/amdgpu/amdgpu_device.cc
@@ -604,7 +604,7 @@ AMDGPUDevice::serialize(CheckpointOut &cp) const
     idx = 0;
     for (auto & it : sdmaEngs) {
         sdma_engs_offset[idx] = it.first;
-        sdma_engs[idx] = idx;
+        sdma_engs[idx] = it.second->getId();
         ++idx;
     }
 
@@ -675,8 +675,9 @@ AMDGPUDevice::unserialize(CheckpointIn &cp)
         UNSERIALIZE_ARRAY(sdma_engs, sizeof(sdma_engs)/sizeof(sdma_engs[0]));
 
         for (int idx = 0; idx < sdma_engs_size; ++idx) {
-            assert(sdmaIds.count(idx));
-            SDMAEngine *sdma = sdmaIds[idx];
+            int sdma_id = sdma_engs[idx];
+            assert(sdmaIds.count(sdma_id));
+            SDMAEngine *sdma = sdmaIds[sdma_id];
             sdmaEngs.insert(std::make_pair(sdma_engs_offset[idx], sdma));
         }
     }
diff --git a/src/dev/amdgpu/sdma_engine.hh b/src/dev/amdgpu/sdma_engine.hh
index 1e4f965920..bcbd497e8a 100644
--- a/src/dev/amdgpu/sdma_engine.hh
+++ b/src/dev/amdgpu/sdma_engine.hh
@@ -165,6 +165,7 @@ class SDMAEngine : public DmaVirtDevice
     void setGPUDevice(AMDGPUDevice *gpu_device);
 
     void setId(int _id) { id = _id; }
+    int getId() const { return id; }
     /**
      * Returns the client id for the Interrupt Handler.
      */

From d537ded9d225f04ebdcb2d64585f30c8e7f7756d Mon Sep 17 00:00:00 2001
From: Giacomo Travaglini <giacomo.travaglini@arm.com>
Date: Tue, 16 May 2023 15:42:00 +0100
Subject: [PATCH 440/492] arch-arm: Fix printing of VecElemClass registers

At the moment it is not possible to trace the value of VecElemClass
registers. If a AArch32 SIMD binary is run with tracing on,
simulation will fail the following assertion [1].

std::string
valString(const void *val, size_t size) const override
{
    assert(size == sizeof(ValueType));

The problem is that Arm VecElems are stored in RegVal (uint64_t),
but the VecElem data type (ValueType above) per se is a uint32_t.

So valString is getting called with size = 8 (coming from RegVal)
but ValueType has size = 4. We fix this problem by using RegVal as
a VecElemRegClassOps template parameter to make them match.
This is not changing anything from a functionality perspective.
The result will be that we will be able to print VecElems as 64bit
values.

This solution is the most simple one but a bit dirty. I believe
in the long term we should make the VecElemClass use the void* interface
rather than the RegVal one. In this way we will be able to correctly
print the VecElem size as 32bit value.

[1]: https://github.com/gem5/gem5/blob/v22.1.0.0/src/cpu/reg_class.hh#L362

Change-Id: Ic3fc252d41449f828b77f938fefc0cd4274b1c57
Signed-off-by: Giacomo Travaglini <giacomo.travaglini@arm.com>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/70697
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Richard Cooper <richard.cooper@arm.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
---
 src/arch/arm/regs/vec.hh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/arch/arm/regs/vec.hh b/src/arch/arm/regs/vec.hh
index 00ab87fbcb..19f37c916d 100644
--- a/src/arch/arm/regs/vec.hh
+++ b/src/arch/arm/regs/vec.hh
@@ -93,7 +93,7 @@ const int VECREG_UREG0 = 32;
 const int PREDREG_FFR = 16;
 const int PREDREG_UREG0 = 17;
 
-static inline VecElemRegClassOps<ArmISA::VecElem>
+static inline VecElemRegClassOps<RegVal>
     vecRegElemClassOps(NumVecElemPerVecReg);
 static inline TypedRegClassOps<ArmISA::VecRegContainer> vecRegClassOps;
 static inline TypedRegClassOps<ArmISA::VecPredRegContainer> vecPredRegClassOps;

From 1a2904e021671374d20527b46a10d2de2ae310ce Mon Sep 17 00:00:00 2001
From: Jason Lowe-Power <jason@lowepower.com>
Date: Tue, 23 May 2023 08:41:31 -0700
Subject: [PATCH 441/492] scons: Add os import to marshall

This file was missing the `import os` after
I3ad28b6ee52fd347d2fe71f279baab629e88d12c

Change-Id: I7fde59e92f03fd240f48a304488d77628bfdb852
Signed-off-by: Jason Lowe-Power <jason@lowepower.com>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/70918
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
---
 build_tools/marshal.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/build_tools/marshal.py b/build_tools/marshal.py
index 4a1522f0b8..58c78e1632 100644
--- a/build_tools/marshal.py
+++ b/build_tools/marshal.py
@@ -50,6 +50,7 @@ this script, and to read in and execute the marshalled code later.
 
 import locale
 import marshal
+import os
 import sys
 import zlib
 

From 4dccd7dd6c69971541be2cbb0de4a2fe2b52f3ac Mon Sep 17 00:00:00 2001
From: Roger Chang <rogerycchang@google.com>
Date: Sat, 13 May 2023 21:42:39 +0800
Subject: [PATCH 442/492] arch-riscv: Add BS format isa

This format is helper for aes32dsi, aes32dsmi, aes32esi, aes32esmi,
sm4ed, sm4ks disassembly

Change-Id: Ieff1932e267efc0a8c5fd8e557fc467dc376da4e
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/70598
Reviewed-by: Yu-hsin Wang <yuhsingw@google.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
---
 src/arch/riscv/insts/SConscript        |  1 +
 src/arch/riscv/insts/bs.cc             | 53 ++++++++++++++++++++++++
 src/arch/riscv/insts/bs.hh             | 56 ++++++++++++++++++++++++++
 src/arch/riscv/isa/decoder.isa         | 24 +++++------
 src/arch/riscv/isa/formats/bs.isa      | 48 ++++++++++++++++++++++
 src/arch/riscv/isa/formats/formats.isa |  1 +
 src/arch/riscv/isa/includes.isa        |  1 +
 7 files changed, 172 insertions(+), 12 deletions(-)
 create mode 100644 src/arch/riscv/insts/bs.cc
 create mode 100644 src/arch/riscv/insts/bs.hh
 create mode 100644 src/arch/riscv/isa/formats/bs.isa

diff --git a/src/arch/riscv/insts/SConscript b/src/arch/riscv/insts/SConscript
index 80592a34ed..704152c040 100644
--- a/src/arch/riscv/insts/SConscript
+++ b/src/arch/riscv/insts/SConscript
@@ -28,6 +28,7 @@
 Import('*')
 
 Source('amo.cc', tags='riscv isa')
+Source('bs.cc', tags='riscv isa')
 Source('compressed.cc', tags='riscv isa')
 Source('mem.cc', tags='riscv isa')
 Source('standard.cc', tags='riscv isa')
diff --git a/src/arch/riscv/insts/bs.cc b/src/arch/riscv/insts/bs.cc
new file mode 100644
index 0000000000..7a9e6e7f3a
--- /dev/null
+++ b/src/arch/riscv/insts/bs.cc
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2023 Google LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "arch/riscv/insts/bs.hh"
+
+#include <sstream>
+#include <string>
+
+#include "arch/riscv/utility.hh"
+
+namespace gem5
+{
+
+namespace RiscvISA
+{
+
+std::string
+BSOp::generateDisassembly(Addr pc, const loader::SymbolTable *symtab) const
+{
+    std::stringstream ss;
+    ss << mnemonic << ' ' << registerName(destRegIdx(0)) << ", " <<
+        registerName(srcRegIdx(0)) << ", " << registerName(srcRegIdx(1)) <<
+        ", " << (uint32_t)bs;
+    return ss.str();
+}
+
+} // namespace RiscvISA
+} // namespace gem5
diff --git a/src/arch/riscv/insts/bs.hh b/src/arch/riscv/insts/bs.hh
new file mode 100644
index 0000000000..d4db5c9dc1
--- /dev/null
+++ b/src/arch/riscv/insts/bs.hh
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2023 Google LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __ARCH_RISCV_BS_INST_HH__
+#define __ARCH_RISCV_BS_INST_HH__
+
+#include "arch/riscv/insts/static_inst.hh"
+
+namespace gem5
+{
+
+namespace RiscvISA
+{
+
+class BSOp : public RiscvStaticInst
+{
+  protected:
+    uint8_t bs;
+
+    BSOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass)
+        : RiscvStaticInst(mnem, _machInst, __opClass), bs(0)
+    {}
+
+    std::string generateDisassembly(
+        Addr pc, const loader::SymbolTable *symtab) const override;
+};
+
+} // namespace RiscvISA
+} // namespace gem5
+
+#endif // __ARCH_RISCV_BS_INST_HH__
diff --git a/src/arch/riscv/isa/decoder.isa b/src/arch/riscv/isa/decoder.isa
index 47519eeeb4..45e294649c 100644
--- a/src/arch/riscv/isa/decoder.isa
+++ b/src/arch/riscv/isa/decoder.isa
@@ -1021,27 +1021,27 @@ decode QUADRANT default Unknown::unknown() {
                         }
                     }
                     0x11: decode RVTYPE {
-                        0x0: aes32esi({{
-                            Rd_sw = _rvk_emu_aes32esi(Rs1_sw, Rs2_sw, (uint8_t)BS);
+                        0x0: BSOp::aes32esi({{
+                            Rd_sw = _rvk_emu_aes32esi(Rs1_sw, Rs2_sw, bs);
                         }});
                     }
                     0x13: decode RVTYPE {
-                        0x0: aes32esmi({{
-                            Rd_sw = _rvk_emu_aes32esmi(Rs1_sw, Rs2_sw, (uint8_t)BS);
+                        0x0: BSOp::aes32esmi({{
+                            Rd_sw = _rvk_emu_aes32esmi(Rs1_sw, Rs2_sw, bs);
                         }});
                     }
                     0x15: decode RVTYPE {
-                        0x0: aes32dsi({{
-                            Rd_sw = _rvk_emu_aes32dsi(Rs1_sw, Rs2_sw, (uint8_t)BS);
+                        0x0: BSOp::aes32dsi({{
+                            Rd_sw = _rvk_emu_aes32dsi(Rs1_sw, Rs2_sw, bs);
                         }});
                     }
                     0x17: decode RVTYPE {
-                        0x0: aes32dsmi({{
-                            Rd_sw = _rvk_emu_aes32dsmi(Rs1_sw, Rs2_sw, (uint8_t)BS);
+                        0x0: BSOp::aes32dsmi({{
+                            Rd_sw = _rvk_emu_aes32dsmi(Rs1_sw, Rs2_sw, bs);
                         }});
                     }
-                    0x18: sm4ed({{
-                        Rd_sw = _rvk_emu_sm4ed(Rs1_sw, Rs2_sw, (uint8_t)BS);
+                    0x18: BSOp::sm4ed({{
+                        Rd_sw = _rvk_emu_sm4ed(Rs1_sw, Rs2_sw, bs);
                     }});
                     0x19: decode BS {
                         0x0: decode RVTYPE {
@@ -1050,8 +1050,8 @@ decode QUADRANT default Unknown::unknown() {
                             }});
                         }
                     }
-                    0x1a: sm4ks({{
-                        Rd_sw = _rvk_emu_sm4ks(Rs1_sw, Rs2_sw, (uint8_t)BS);
+                    0x1a: BSOp::sm4ks({{
+                        Rd_sw = _rvk_emu_sm4ks(Rs1_sw, Rs2_sw, bs);
                     }});
                     0x1b: decode BS {
                         0x0: decode RVTYPE {
diff --git a/src/arch/riscv/isa/formats/bs.isa b/src/arch/riscv/isa/formats/bs.isa
new file mode 100644
index 0000000000..d413502dba
--- /dev/null
+++ b/src/arch/riscv/isa/formats/bs.isa
@@ -0,0 +1,48 @@
+// -*- mode:c++ -*-
+
+// Copyright (c) 2023 Google LLC
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met: redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer;
+// redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution;
+// neither the name of the copyright holders nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// Declaration templates.
+
+def template BSConstructor {{
+    %(class_name)s::%(class_name)s(ExtMachInst machInst)
+        : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s)
+    {
+        %(set_reg_idx_arr)s;
+        %(constructor)s;
+        %(bs_code)s;
+    }
+}};
+
+def format BSOp(code, bs_code='bs = (uint8_t)BS;', *opt_flags) {{
+    iop = InstObjParams(name, Name, 'BSOp',
+        {'bs_code': bs_code, 'code': code}, opt_flags)
+    header_output = BasicDeclare.subst(iop)
+    decoder_output = BSConstructor.subst(iop)
+    decode_block = BasicDecode.subst(iop)
+    exec_output = BasicExecute.subst(iop)
+}};
diff --git a/src/arch/riscv/isa/formats/formats.isa b/src/arch/riscv/isa/formats/formats.isa
index 2a6b91024d..19749438a8 100644
--- a/src/arch/riscv/isa/formats/formats.isa
+++ b/src/arch/riscv/isa/formats/formats.isa
@@ -36,6 +36,7 @@
 ##include "mem.isa"
 ##include "fp.isa"
 ##include "amo.isa"
+##include "bs.isa"
 
 // Include formats for nonstandard extensions
 ##include "compressed.isa"
diff --git a/src/arch/riscv/isa/includes.isa b/src/arch/riscv/isa/includes.isa
index a5cc5e85cc..8dddc2fb59 100644
--- a/src/arch/riscv/isa/includes.isa
+++ b/src/arch/riscv/isa/includes.isa
@@ -46,6 +46,7 @@ output header {{
 #include <specialize.h>
 
 #include "arch/riscv/insts/amo.hh"
+#include "arch/riscv/insts/bs.hh"
 #include "arch/riscv/insts/compressed.hh"
 #include "arch/riscv/insts/mem.hh"
 #include "arch/riscv/insts/pseudo.hh"

From 5fa81af8c65416e51a99fff1e9aef7dd10fe347f Mon Sep 17 00:00:00 2001
From: Roger Chang <rogerycchang@google.com>
Date: Sat, 13 May 2023 23:04:46 +0800
Subject: [PATCH 443/492] arch-riscv: Simplify the rev8 and brev8 instructions

These mnemonic of instructions should not have 'rv32_' prefix

Change-Id: Ic072ba8b84e5a51be060e5d7ca16dd913c318957
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/70599
Reviewed-by: Yu-hsin Wang <yuhsingw@google.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/arch/riscv/isa/decoder.isa | 56 +++++++++++-----------------------
 1 file changed, 18 insertions(+), 38 deletions(-)

diff --git a/src/arch/riscv/isa/decoder.isa b/src/arch/riscv/isa/decoder.isa
index 45e294649c..ae2f0a4f55 100644
--- a/src/arch/riscv/isa/decoder.isa
+++ b/src/arch/riscv/isa/decoder.isa
@@ -667,44 +667,24 @@ decode QUADRANT default Unknown::unknown() {
                             | (Rs1 << ((xlen - imm) & (xlen - 1))));
                     }}, imm_type = uint64_t, imm_code = {{ imm = SHAMT6; }});
                     0xd: decode RS2 {
-                        0x18: decode BIT25 {
-                            0x0: rv32_rev8({{
-                                uint32_t result = 0;
-                                result |=
-                                    ((Rs1_uw & 0xffUL) << 24)
-                                    | (((Rs1_uw >> 24) & 0xffUL));
-                                result |=
-                                    (((Rs1_uw >> 8) & 0xffUL) << 16)
-                                    | (((Rs1_uw >> 16) & 0xffUL) << 8);
-                                Rd = rvSext(result);
-                            }},
-                            imm_type = uint64_t, imm_code = {{ imm = SHAMT5; }});
-                            0x1: rev8({{
-                                uint64_t result = 0;
-                                result |=
-                                    ((Rs1 & 0xffULL) << 56)
-                                    | (((Rs1 >> 56) & 0xffULL));
-                                result |=
-                                    (((Rs1 >> 8) & 0xffULL) << 48)
-                                    | (((Rs1 >> 48) & 0xffULL) << 8);
-                                result |=
-                                    (((Rs1 >> 16) & 0xffULL) << 40)
-                                    | (((Rs1 >> 40) & 0xffULL) << 16);
-                                result |=
-                                    (((Rs1 >> 24) & 0xffULL) << 32)
-                                    | (((Rs1 >> 32) & 0xffULL) << 24);
-                                Rd = result;
-                            }},
-                            imm_type = uint64_t, imm_code = {{ imm = SHAMT6; }});
-                        }
-                        0x07: decode RVTYPE {
-                            0x0: rv32_brev8({{
-                                Rd_sw = _rvk_emu_brev8_32(Rs1_sw);
-                            }}, imm_code = {{ imm = SHAMT5; }});
-                            0x1: brev8({{
-                                Rd = _rvk_emu_brev8_64(Rs1);
-                            }}, imm_code = {{ imm = SHAMT6; }});
-                        }
+                        0x18: ROp::rev8({{
+                            if (rvSelect((bool)SHAMT6BIT5, false)) {
+                                return std::make_shared<IllegalInstFault>(
+                                        "shmat[5] != 0", machInst);
+                            }
+                            if (machInst.rv_type == RV32) {
+                                Rd_sd = _rvk_emu_grev_32(Rs1_sd, 0x18);
+                            } else {
+                                Rd_sd = _rvk_emu_grev_64(Rs1_sd, 0x38);
+                            }
+                        }});
+                        0x07: ROp::brev8({{
+                            if (machInst.rv_type == RV32) {
+                                Rd_sd = _rvk_emu_brev8_32(Rs1_sd);
+                            } else {
+                                Rd_sd = _rvk_emu_brev8_64(Rs1_sd);
+                            }
+                        }});
                     }
                 }
                 0x6: ori({{

From 2579bacf06eb57b75b7fae1c9b1b133fa64f9841 Mon Sep 17 00:00:00 2001
From: Roger Chang <rogerycchang@google.com>
Date: Sat, 13 May 2023 23:09:48 +0800
Subject: [PATCH 444/492] arch-riscv: Merge rv32 and rv64 version of xperm4 and
 xperm8

Remove unessential postfix like '_32' and '_64' from mnemonic

Change-Id: I83d47eeccd04fe61ac8ee0addd7221abbdcefbd1
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/70600
Reviewed-by: Yu-hsin Wang <yuhsingw@google.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
---
 src/arch/riscv/isa/decoder.isa | 26 ++++++++++++--------------
 1 file changed, 12 insertions(+), 14 deletions(-)

diff --git a/src/arch/riscv/isa/decoder.isa b/src/arch/riscv/isa/decoder.isa
index ae2f0a4f55..c7eefbc79c 100644
--- a/src/arch/riscv/isa/decoder.isa
+++ b/src/arch/riscv/isa/decoder.isa
@@ -1124,14 +1124,13 @@ decode QUADRANT default Unknown::unknown() {
                     0x10: sh1add({{
                         Rd = rvSext((Rs1 << 1) + Rs2);
                     }});
-                    0x14: decode RVTYPE {
-                        0x0: xperm4_32({{
-                            Rd_sw = _rvk_emu_xperm4_32(Rs1_sw, Rs2_sw);
-                        }});
-                        0x1: xperm4_64({{
+                    0x14: xperm4({{
+                        if (machInst.rv_type == RV32) {
+                            Rd_sd = _rvk_emu_xperm4_32(Rs1_sd, Rs2_sd);
+                        } else {
                             Rd_sd = _rvk_emu_xperm4_64(Rs1_sd, Rs2_sd);
-                        }});
-                    }
+                        }
+                    }});
                 }
                 0x3: decode FUNCT7 {
                     0x0: sltu({{
@@ -1181,14 +1180,13 @@ decode QUADRANT default Unknown::unknown() {
                     0x10: sh2add({{
                         Rd = rvSext((Rs1 << 2) + Rs2);
                     }});
-                    0x14: decode RVTYPE {
-                        0x0: xperm8_32({{
-                            Rd_sw = _rvk_emu_xperm8_32(Rs1_sw, Rs2_sw);
-                        }});
-                        0x1: xperm8_64({{
+                    0x14: xperm8({{
+                        if (machInst.rv_type == RV32) {
+                            Rd_sd = _rvk_emu_xperm8_32(Rs1_sd, Rs2_sd);
+                        } else {
                             Rd_sd = _rvk_emu_xperm8_64(Rs1_sd, Rs2_sd);
-                        }});
-                    }
+                        }
+                    }});
                     0x20: xnor({{
                         Rd = rvSext(~(Rs1 ^ Rs2));
                     }});

From dfa3c073cfd0112ab834fbf7d1056e0aa2a090e3 Mon Sep 17 00:00:00 2001
From: Prajwal Hegde <prhegde@wisc.edu>
Date: Thu, 27 Apr 2023 14:38:36 -0500
Subject: [PATCH 445/492] arch-arm,cpu: Add four Arm SVE2 int instructions

This changeset adds ARM SVE2 integer instructions
- ADCLB, ADCLT, SBCLB, SBCLT
- Decoding logic as per sve encoding of Version: 2023-03

Change-Id: I1bd3fe24b33677baa0b6da3c1dd7423f2b13b2c6
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/70137
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Giacomo Travaglini <giacomo.travaglini@arm.com>
Maintainer: Giacomo Travaglini <giacomo.travaglini@arm.com>
---
 src/arch/arm/insts/sve.cc                  | 14 ++++++
 src/arch/arm/insts/sve.hh                  | 17 +++++++
 src/arch/arm/isa/formats/sve_2nd_level.isa | 46 +++++++++++++++++
 src/arch/arm/isa/formats/sve_top_level.isa | 22 ++++++--
 src/arch/arm/isa/insts/sve.isa             | 58 ++++++++++++++++++++++
 src/arch/arm/isa/templates/sve.isa         | 26 ++++++++++
 6 files changed, 179 insertions(+), 4 deletions(-)

diff --git a/src/arch/arm/insts/sve.cc b/src/arch/arm/insts/sve.cc
index 546074c8fd..b0512817a8 100644
--- a/src/arch/arm/insts/sve.cc
+++ b/src/arch/arm/insts/sve.cc
@@ -435,6 +435,20 @@ SveTerPredOp::generateDisassembly(
     return ss.str();
 }
 
+std::string
+SveTerUnpredOp::generateDisassembly(
+        Addr pc, const loader::SymbolTable *symtab) const
+{
+    std::stringstream ss;
+    printMnemonic(ss, "", false);
+    printVecReg(ss, dest, true);
+    ccprintf(ss, ", ");
+    printVecReg(ss, op1, true);
+    ccprintf(ss, ", ");
+    printVecReg(ss, op2, true);
+    return ss.str();
+}
+
 std::string
 SveTerImmUnpredOp::generateDisassembly(
         Addr pc, const loader::SymbolTable *symtab) const
diff --git a/src/arch/arm/insts/sve.hh b/src/arch/arm/insts/sve.hh
index 66d82f0a3f..de1163ee81 100644
--- a/src/arch/arm/insts/sve.hh
+++ b/src/arch/arm/insts/sve.hh
@@ -498,6 +498,23 @@ class SveTerPredOp : public ArmStaticInst
             Addr pc, const loader::SymbolTable *symtab) const override;
 };
 
+///SVE2 Accumulate instructions
+class SveTerUnpredOp : public ArmStaticInst
+{
+  protected:
+    RegIndex dest, op1, op2;
+
+    SveTerUnpredOp(const char* mnem, ExtMachInst _machInst,
+                    OpClass __opClass, RegIndex _dest,
+                    RegIndex _op1, RegIndex _op2) :
+        ArmStaticInst(mnem, _machInst, __opClass),
+        dest(_dest), op1(_op1), op2(_op2)
+    {}
+
+    std::string generateDisassembly(
+            Addr pc, const loader::SymbolTable *symtab) const override;
+};
+
 /// Ternary with immediate, destructive, unpredicated SVE instruction.
 class SveTerImmUnpredOp : public ArmStaticInst
 {
diff --git a/src/arch/arm/isa/formats/sve_2nd_level.isa b/src/arch/arm/isa/formats/sve_2nd_level.isa
index 2ee3817445..4281eeb632 100644
--- a/src/arch/arm/isa/formats/sve_2nd_level.isa
+++ b/src/arch/arm/isa/formats/sve_2nd_level.isa
@@ -2283,6 +2283,52 @@ namespace Aarch64
         return new Unknown64(machInst);
     } // decodeSveMultiplyIndexed
 
+    StaticInstPtr
+    decodeSve2ArithCarry(ExtMachInst machInst)
+    {
+        RegIndex zn = (RegIndex) (uint8_t) bits(machInst, 9, 5);
+        RegIndex zm = (RegIndex) (uint8_t) bits(machInst, 20, 16);
+        RegIndex zda = (RegIndex) (uint8_t) bits(machInst, 4, 0);
+
+        uint8_t size = bits(machInst, 23, 22);
+        if (size & 0x2) {
+            if (bits(machInst, 10)){
+                return decodeSveTerUnpred<Sve2Sbclt>(
+                        size, machInst, zda, zn, zm);
+            } else {
+                return decodeSveTerUnpred<Sve2Sbclb>(
+                        size, machInst, zda, zn, zm);
+            }
+        } else {
+            if (bits(machInst, 10)){
+                return decodeSveTerUnpred<Sve2Adclt>(
+                        size, machInst, zda, zn, zm);
+            } else {
+                return decodeSveTerUnpred<Sve2Adclb>(
+                    size, machInst, zda, zn, zm);
+            }
+        }
+        return new Unknown64(machInst);
+    } //decodeSve2ArithCarry
+
+    StaticInstPtr
+    decodeSve2Accum(ExtMachInst machInst)
+    {
+        uint8_t op0 = bits(machInst, 20, 17);
+        uint8_t op1 = bits(machInst, 13, 11);
+        if (op0 != 0 && op1 == 3) {
+            return new Unknown64(machInst);
+        }
+        switch (op1) {
+            case 2:
+                return decodeSve2ArithCarry(machInst);
+            default:
+                break;
+        }
+
+        return new Unknown64(machInst);
+    } //decodeSve2Accum
+
     StaticInstPtr
     decodeSveFpFastReduc(ExtMachInst machInst)
     {
diff --git a/src/arch/arm/isa/formats/sve_top_level.isa b/src/arch/arm/isa/formats/sve_top_level.isa
index 155ec1c42f..41861a87bc 100644
--- a/src/arch/arm/isa/formats/sve_top_level.isa
+++ b/src/arch/arm/isa/formats/sve_top_level.isa
@@ -69,6 +69,7 @@ namespace Aarch64
     StaticInstPtr decodeSvePsel(ExtMachInst machInst);
     StaticInstPtr decodeSveIntWideImmUnpred(ExtMachInst machInst);
     StaticInstPtr decodeSveClamp(ExtMachInst machInst);
+    StaticInstPtr decodeSve2Accum(ExtMachInst machInst);
 
     StaticInstPtr decodeSveMultiplyAddUnpred(ExtMachInst machInst);
     StaticInstPtr decodeSveMultiplyIndexed(ExtMachInst machInst);
@@ -173,10 +174,23 @@ namespace Aarch64
                 break;
             }
           case 0x2:
-            if (bits(machInst, 20)) {
-                return decodeSveIntWideImmPred(machInst);
-            } else {
-                return decodeSveLogMaskImm(machInst);
+            {
+              if (bits(machInst, 30) == 0) {
+                  if (bits(machInst, 20)) {
+                      return decodeSveIntWideImmPred(machInst);
+                } else {
+                    return decodeSveLogMaskImm(machInst);
+                }
+              } else {
+                uint8_t b_15_14 = bits(machInst, 15, 14);
+                switch (b_15_14) {
+                  case 3:
+                    return decodeSve2Accum(machInst);
+                  default :
+                    break;
+                }
+              }
+              break;
             }
           case 0x3:
             {
diff --git a/src/arch/arm/isa/insts/sve.isa b/src/arch/arm/isa/insts/sve.isa
index 97d4ec7e56..4c10cd6443 100644
--- a/src/arch/arm/isa/insts/sve.isa
+++ b/src/arch/arm/isa/insts/sve.isa
@@ -926,6 +926,19 @@ output header {{
         }
     }
 
+    // Decodes ternary, unpredicated SVE2 instructions
+    template <template <typename T> class Base>
+    StaticInstPtr
+    decodeSveTerUnpred(unsigned size, ExtMachInst machInst,
+                      RegIndex dest, RegIndex op1, RegIndex op2)
+    {
+        if (bits(size, 0) == 0) {
+            return new Base<uint32_t>(machInst, dest, op1, op2);
+        } else {
+            return new Base<uint64_t>(machInst, dest, op1, op2);
+        }
+    }
+
     // Decodes ternary with immediate operand, destructive, unpredicated SVE
     // instructions handling floating-point variants only.
     template <template <typename T> class Base>
@@ -2085,6 +2098,35 @@ let {{
                          'class_name' : 'Sve' + Name}
             exec_output += SveOpExecDeclare.subst(substDict)
 
+    # Generates definitions for ternary SVE2 intructions (unpredicated)
+    def sveTerInstUnpred(name, Name, opClass, types, op,
+                         isTop=False, isAdd=True, decoder='Generic'):
+        global header_output, exec_output, decoders
+        code = sveEnabledCheckCode + '''
+        unsigned const pairs = ArmStaticInst::getCurSveVecLen<Element>(
+                xc->tcBase()) / 2;
+        for (unsigned p = 0; p < pairs; p++) {
+            const Element& srcElem1 = AA64FpDestMerge_x[2 * p + 0];
+            const Element& srcElem2 = AA64FpOp1_x[%(src_top_elem)s];
+            int carryIn = bits(AA64FpOp2_x[2 * p + 1], 0);
+            Element %(op)s
+            int carryOut = findCarry(
+                                    sizeof(Element) * 8, res, srcElem1,
+                                    %(src_elem2)s) & 1;
+            AA64FpDest_x[2 * p + 0] = res;
+            AA64FpDest_x[2 * p + 1] = (Element)0x0 + carryOut;
+        }''' % {'op': op,
+                'src_top_elem' : '2 * p + 1' if isTop else '2 * p + 0',
+                'src_elem2' : 'srcElem2' if isAdd else '~(srcElem2)'}
+        iop = ArmInstObjParams(name, 'Sve2' + Name, 'SveTerUnpredOp',
+                               {'code': code, 'op_class': opClass}, [])
+        header_output += sveTerUnpredOpDeclare.subst(iop)
+        exec_output += SveOpExecute.subst(iop)
+        for type in types:
+            substDict = {'targs' : type,
+                         'class_name' : 'Sve2' + Name}
+            exec_output += SveOpExecDeclare.subst(substDict)
+
     # Generates definitions for ternary SVE instructions with indexed operand
     def sveTerIdxInst(name, Name, opClass, types, op, decoder='Generic'):
         global header_output, exec_output, decoders
@@ -4052,6 +4094,22 @@ let {{
     # MLS
     mlsCode = 'destElem -= srcElem1 * srcElem2;'
     sveTerInst('mls', 'Mls', 'SimdMultAccOp', signedTypes, mlsCode)
+    # ADCLT
+    adcltCode = 'res = srcElem1 + srcElem2 + carryIn;'
+    sveTerInstUnpred('adclt', 'Adclt', 'VectorIntegerArithOp', unsignedTypes,
+                       adcltCode, isTop=True, isAdd=True)
+    # ADCLB
+    adclbCode = 'res = srcElem1 + srcElem2 + carryIn;'
+    sveTerInstUnpred('adclb', 'Adclb', 'VectorIntegerArithOp', unsignedTypes,
+                      adclbCode, isTop=False, isAdd=True)
+    # SBCLT
+    sbcltCode = 'res = srcElem1 + ~(srcElem2) + carryIn;'
+    sveTerInstUnpred('sbclt', 'Sbclt', 'VectorIntegerArithOp', unsignedTypes,
+                      sbcltCode, isTop=True, isAdd=False)
+    # SBCLB
+    sbclbCode = 'res = srcElem1 + ~(srcElem2) + carryIn;'
+    sveTerInstUnpred('sbclb', 'Sbclb', 'VectorIntegerArithOp', unsignedTypes,
+                      sbclbCode, isTop=False, isAdd=False)
     # MOVPRFX (predicated)
     movCode = 'destElem = srcElem1;'
     sveUnaryInst('movprfx', 'MovprfxPredM', 'SimdMiscOp', unsignedTypes,
diff --git a/src/arch/arm/isa/templates/sve.isa b/src/arch/arm/isa/templates/sve.isa
index 9260441c2c..886fd7a0b2 100644
--- a/src/arch/arm/isa/templates/sve.isa
+++ b/src/arch/arm/isa/templates/sve.isa
@@ -489,6 +489,32 @@ class %(class_name)s : public %(base_class)s
 };
 }};
 
+def template sveTerUnpredOpDeclare {{
+template <class _Element>
+class %(class_name)s : public %(base_class)s
+{
+  private:
+    %(reg_idx_arr_decl)s;
+
+  protected:
+    typedef _Element Element;
+    typedef _Element TPElem;
+
+  public:
+    // Constructor
+    %(class_name)s(ExtMachInst machInst, RegIndex _dest,
+                   RegIndex _op1, RegIndex _op2) :
+        %(base_class)s("%(mnemonic)s", machInst, %(op_class)s,
+                       _dest, _op1, _op2)
+    {
+        %(set_reg_idx_arr)s;
+        %(constructor)s;
+    }
+
+    Fault execute(ExecContext *, trace::InstRecord *) const override;
+};
+}};
+
 def template SveTerImmUnpredOpDeclare {{
 template <class _Element>
 class %(class_name)s : public %(base_class)s

From dc76c00c9bb69ebb43899b0fb07b4f57c9add715 Mon Sep 17 00:00:00 2001
From: Giacomo Travaglini <giacomo.travaglini@arm.com>
Date: Wed, 24 May 2023 13:10:16 +0100
Subject: [PATCH 446/492] arch-arm: Add an ArmAllRelease containing every
 defined extension

This is probably the easiest way to instantiate a release containing
any implemented extension. It is alternatively possible to use the
latest release (e.g. Armv92 as of now).
This could be preferrable for consistency across simulations.
However if users want to always be up to date with development,
using ArmAllRelease will allow them to do so without the need
to change their configuration script

Change-Id: Ibca629e99da9b571f233de9d05a5a9186d02aa99
Signed-off-by: Giacomo Travaglini <giacomo.travaglini@arm.com>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/70958
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Richard Cooper <richard.cooper@arm.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
---
 src/arch/arm/ArmSystem.py | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/src/arch/arm/ArmSystem.py b/src/arch/arm/ArmSystem.py
index b826f0dec7..c1f5e9fd10 100644
--- a/src/arch/arm/ArmSystem.py
+++ b/src/arch/arm/ArmSystem.py
@@ -224,6 +224,18 @@ class Armv92(Armv85):
     extensions = Armv85.extensions + ["FEAT_SME"]
 
 
+class ArmAllRelease(ArmRelease):
+    """
+    A release containing any implemented extension.  It is alternatively
+    possible to use the latest release (e.g. Armv92 as of now).  This could be
+    preferrable for consistency across simulations.  However if users want to
+    always be up to date with development, using ArmAllRelease will allow them
+    to do so without the need to change their configuration script
+    """
+
+    extensions = ArmExtension.vals
+
+
 class ArmSystem(System):
     type = "ArmSystem"
     cxx_header = "arch/arm/system.hh"

From 2aa95ccc7d4695975c3ee686fc68c32bc616b36b Mon Sep 17 00:00:00 2001
From: Matthew Poremba <matthew.poremba@amd.com>
Date: Fri, 19 May 2023 13:39:43 -0500
Subject: [PATCH 447/492] arch-x86: Fix CPUID function 0

This should return the number of standard features, not the number of
extended features.

Change-Id: Ieb3a36d832cee603f1efd39b4f430b5ac0478561
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/70778
Maintainer: Matt Sinclair <mattdsinclair@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com>
---
 src/arch/x86/cpuid.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/arch/x86/cpuid.cc b/src/arch/x86/cpuid.cc
index 4ce66df777..ac4709ce0e 100644
--- a/src/arch/x86/cpuid.cc
+++ b/src/arch/x86/cpuid.cc
@@ -162,7 +162,7 @@ namespace X86ISA {
                   ISA *isa = dynamic_cast<ISA *>(tc->getIsaPtr());
                   auto vendor_string = isa->getVendorString();
                   result = CpuidResult(
-                          NumExtendedCpuidFuncs - 1,
+                          NumStandardCpuidFuncs - 1,
                           stringToRegister(vendor_string.c_str()),
                           stringToRegister(vendor_string.c_str() + 4),
                           stringToRegister(vendor_string.c_str() + 8));

From 6b4a1020bed8e2251adbc3f4cb065865e9006b12 Mon Sep 17 00:00:00 2001
From: Matthew Poremba <matthew.poremba@amd.com>
Date: Fri, 5 May 2023 10:50:48 -0500
Subject: [PATCH 448/492] configs,dev-amdgpu: GPUFS MI200/gfx90a support

Add support for MI200-like device. This includes adding PCI IDs and new
MMIOs for the device, a different MAP_PROCESS packet, and a different
calculation for the number of VGPRs.

Change-Id: I0fb7b3ad928826beaa5386d52a94ba504369cb0d
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/70317
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 configs/example/gpufs/runfs.py           |  5 +-
 configs/example/gpufs/system/amdgpu.py   |  4 ++
 configs/example/gpufs/system/system.py   | 10 ++++
 src/dev/amdgpu/amdgpu_device.cc          | 11 +++-
 src/dev/amdgpu/amdgpu_device.hh          |  7 +++
 src/dev/amdgpu/amdgpu_nbio.cc            |  2 +
 src/dev/amdgpu/amdgpu_nbio.hh            |  5 ++
 src/dev/amdgpu/amdgpu_vm.hh              |  4 ++
 src/dev/amdgpu/pm4_defines.hh            | 58 +++++++++++++++++++++
 src/dev/amdgpu/pm4_packet_processor.cc   | 66 ++++++++++++++++++------
 src/dev/amdgpu/pm4_packet_processor.hh   |  4 +-
 src/gpu-compute/GPU.py                   |  2 +-
 src/gpu-compute/gpu_command_processor.cc |  3 +-
 src/gpu-compute/hsa_queue_entry.hh       | 19 +++++--
 14 files changed, 173 insertions(+), 27 deletions(-)

diff --git a/configs/example/gpufs/runfs.py b/configs/example/gpufs/runfs.py
index 4c906019c1..f8ef70d5a2 100644
--- a/configs/example/gpufs/runfs.py
+++ b/configs/example/gpufs/runfs.py
@@ -132,8 +132,9 @@ def addRunFSOptions(parser):
     parser.add_argument(
         "--gpu-device",
         default="Vega10",
-        choices=["Vega10", "MI100"],
-        help="GPU model to run: Vega10 (gfx900) or MI100 (gfx908)",
+        choices=["Vega10", "MI100", "MI200"],
+        help="GPU model to run: Vega10 (gfx900), MI100 (gfx908), or "
+        "MI200 (gfx90a)",
     )
 
 
diff --git a/configs/example/gpufs/system/amdgpu.py b/configs/example/gpufs/system/amdgpu.py
index 5f98b55c32..9697e50a04 100644
--- a/configs/example/gpufs/system/amdgpu.py
+++ b/configs/example/gpufs/system/amdgpu.py
@@ -177,6 +177,10 @@ def connectGPU(system, args):
         system.pc.south_bridge.gpu.DeviceID = 0x738C
         system.pc.south_bridge.gpu.SubsystemVendorID = 0x1002
         system.pc.south_bridge.gpu.SubsystemID = 0x0C34
+    elif args.gpu_device == "MI200":
+        system.pc.south_bridge.gpu.DeviceID = 0x740F
+        system.pc.south_bridge.gpu.SubsystemVendorID = 0x1002
+        system.pc.south_bridge.gpu.SubsystemID = 0x0C34
     elif args.gpu_device == "Vega10":
         system.pc.south_bridge.gpu.DeviceID = 0x6863
     else:
diff --git a/configs/example/gpufs/system/system.py b/configs/example/gpufs/system/system.py
index 90c5c01091..263ffc0a43 100644
--- a/configs/example/gpufs/system/system.py
+++ b/configs/example/gpufs/system/system.py
@@ -152,6 +152,16 @@ def makeGpuFSSystem(args):
             0x7D000,
         ]
         sdma_sizes = [0x1000] * 8
+    elif args.gpu_device == "MI200":
+        num_sdmas = 5
+        sdma_bases = [
+            0x4980,
+            0x6180,
+            0x78000,
+            0x79000,
+            0x7A000,
+        ]
+        sdma_sizes = [0x1000] * 5
     else:
         m5.util.panic(f"Unknown GPU device {args.gpu_device}")
 
diff --git a/src/dev/amdgpu/amdgpu_device.cc b/src/dev/amdgpu/amdgpu_device.cc
index 7037e6fb1c..3260d058b0 100644
--- a/src/dev/amdgpu/amdgpu_device.cc
+++ b/src/dev/amdgpu/amdgpu_device.cc
@@ -115,7 +115,7 @@ AMDGPUDevice::AMDGPUDevice(const AMDGPUDeviceParams &p)
         sdmaFunc.insert({0x10b, &SDMAEngine::setPageDoorbellOffsetLo});
         sdmaFunc.insert({0xe0, &SDMAEngine::setPageSize});
         sdmaFunc.insert({0x113, &SDMAEngine::setPageWptrLo});
-    } else if (p.device_name == "MI100") {
+    } else if (p.device_name == "MI100" || p.device_name == "MI200") {
         sdmaFunc.insert({0xd9, &SDMAEngine::setPageBaseLo});
         sdmaFunc.insert({0xe1, &SDMAEngine::setPageRptrLo});
         sdmaFunc.insert({0xe0, &SDMAEngine::setPageRptrHi});
@@ -144,10 +144,19 @@ AMDGPUDevice::AMDGPUDevice(const AMDGPUDeviceParams &p)
     if (p.device_name == "Vega10") {
         setRegVal(VEGA10_FB_LOCATION_BASE, mmhubBase >> 24);
         setRegVal(VEGA10_FB_LOCATION_TOP, mmhubTop >> 24);
+        gfx_version = GfxVersion::gfx900;
     } else if (p.device_name == "MI100") {
         setRegVal(MI100_FB_LOCATION_BASE, mmhubBase >> 24);
         setRegVal(MI100_FB_LOCATION_TOP, mmhubTop >> 24);
         setRegVal(MI100_MEM_SIZE_REG, 0x3ff0); // 16GB of memory
+        gfx_version = GfxVersion::gfx908;
+    } else if (p.device_name == "MI200") {
+        // This device can have either 64GB or 128GB of device memory.
+        // This limits to 16GB for simulation.
+        setRegVal(MI200_FB_LOCATION_BASE, mmhubBase >> 24);
+        setRegVal(MI200_FB_LOCATION_TOP, mmhubTop >> 24);
+        setRegVal(MI200_MEM_SIZE_REG, 0x3ff0);
+        gfx_version = GfxVersion::gfx90a;
     } else {
         panic("Unknown GPU device %s\n", p.device_name);
     }
diff --git a/src/dev/amdgpu/amdgpu_device.hh b/src/dev/amdgpu/amdgpu_device.hh
index cab799147e..56ed2f4fa8 100644
--- a/src/dev/amdgpu/amdgpu_device.hh
+++ b/src/dev/amdgpu/amdgpu_device.hh
@@ -42,6 +42,7 @@
 #include "dev/amdgpu/mmio_reader.hh"
 #include "dev/io_device.hh"
 #include "dev/pci/device.hh"
+#include "enums/GfxVersion.hh"
 #include "params/AMDGPUDevice.hh"
 
 namespace gem5
@@ -145,6 +146,9 @@ class AMDGPUDevice : public PciDevice
      */
     memory::PhysicalMemory deviceMem;
 
+    /* Device information */
+    GfxVersion gfx_version = GfxVersion::gfx900;
+
   public:
     AMDGPUDevice(const AMDGPUDeviceParams &p);
 
@@ -206,6 +210,9 @@ class AMDGPUDevice : public PciDevice
     uint16_t getVMID(Addr doorbell) { return doorbellVMIDMap[doorbell]; }
     std::unordered_map<uint16_t, std::set<int>>& getUsedVMIDs();
     void insertQId(uint16_t vmid, int id);
+
+    /* Device information */
+    GfxVersion getGfxVersion() const { return gfx_version; }
 };
 
 } // namespace gem5
diff --git a/src/dev/amdgpu/amdgpu_nbio.cc b/src/dev/amdgpu/amdgpu_nbio.cc
index 69e4373e64..07027c3765 100644
--- a/src/dev/amdgpu/amdgpu_nbio.cc
+++ b/src/dev/amdgpu/amdgpu_nbio.cc
@@ -75,12 +75,14 @@ AMDGPUNbio::readMMIO(PacketPtr pkt, Addr offset)
       case VEGA10_INV_ENG17_ACK2:
       case MI100_INV_ENG17_ACK2:
       case MI100_INV_ENG17_ACK3:
+      case MI200_INV_ENG17_ACK2:
         pkt->setLE<uint32_t>(0x10001);
         break;
       case VEGA10_INV_ENG17_SEM1:
       case VEGA10_INV_ENG17_SEM2:
       case MI100_INV_ENG17_SEM2:
       case MI100_INV_ENG17_SEM3:
+      case MI200_INV_ENG17_SEM2:
         pkt->setLE<uint32_t>(0x1);
         break;
       // PSP responds with bit 31 set when ready
diff --git a/src/dev/amdgpu/amdgpu_nbio.hh b/src/dev/amdgpu/amdgpu_nbio.hh
index d1e5391ec4..dc95443916 100644
--- a/src/dev/amdgpu/amdgpu_nbio.hh
+++ b/src/dev/amdgpu/amdgpu_nbio.hh
@@ -80,6 +80,11 @@ class AMDGPUDevice;
 #define MI100_INV_ENG17_SEM2                              0x6a888
 #define MI100_INV_ENG17_SEM3                              0x76888
 
+#define MI200_INV_ENG17_ACK1                              0x0a318
+#define MI200_INV_ENG17_ACK2                              0x6b018
+#define MI200_INV_ENG17_SEM1                              0x0a288
+#define MI200_INV_ENG17_SEM2                              0x6af88
+
 class AMDGPUNbio
 {
   public:
diff --git a/src/dev/amdgpu/amdgpu_vm.hh b/src/dev/amdgpu/amdgpu_vm.hh
index ac35a11968..f35a735111 100644
--- a/src/dev/amdgpu/amdgpu_vm.hh
+++ b/src/dev/amdgpu/amdgpu_vm.hh
@@ -81,6 +81,10 @@
 #define MI100_FB_LOCATION_BASE                                       0x6ac00
 #define MI100_FB_LOCATION_TOP                                        0x6ac04
 
+#define MI200_MEM_SIZE_REG                                           0x0378c
+#define MI200_FB_LOCATION_BASE                                       0x6b300
+#define MI200_FB_LOCATION_TOP                                        0x6b304
+
 // AMD GPUs support 16 different virtual address spaces
 static constexpr int AMDGPU_VM_COUNT = 16;
 
diff --git a/src/dev/amdgpu/pm4_defines.hh b/src/dev/amdgpu/pm4_defines.hh
index 42832d50bf..a303f8ef84 100644
--- a/src/dev/amdgpu/pm4_defines.hh
+++ b/src/dev/amdgpu/pm4_defines.hh
@@ -273,6 +273,64 @@ typedef struct GEM5_PACKED
 }  PM4MapProcess;
 static_assert(sizeof(PM4MapProcess) == 60);
 
+typedef struct GEM5_PACKED
+{
+    uint32_t pasid : 16;
+    uint32_t reserved0 : 8;
+    uint32_t diq : 1;
+    uint32_t processQuantum : 7;
+    union
+    {
+        struct
+        {
+            uint32_t ptBaseLo;
+            uint32_t ptBaseHi;
+        };
+        uint64_t ptBase;
+    };
+    uint32_t shMemBases;
+    uint32_t shMemConfig;
+    uint32_t sqShaderTbaLo;
+    uint32_t sqShaderTbaHi;
+    uint32_t sqShaderTmaLo;
+    uint32_t sqShaderTmaHi;
+    uint32_t reserved1;
+    union
+    {
+        struct
+        {
+            uint32_t gdsAddrLo;
+            uint32_t gdsAddrHi;
+        };
+        uint64_t gdsAddr;
+    };
+    union
+    {
+        struct
+        {
+            uint32_t numGws : 7;
+            uint32_t sdma_enable : 1;
+            uint32_t numOac : 4;
+            uint32_t reserved3 : 4;
+            uint32_t gdsSize : 6;
+            uint32_t numQueues : 10;
+        };
+        uint32_t ordinal14;
+    };
+    uint32_t spiGdbgPerVmidCntl;
+    uint32_t tcpWatchCntl[4];
+    union
+    {
+        struct
+        {
+            uint32_t completionSignalLo;
+            uint32_t completionSignalHi;
+        };
+        uint64_t completionSignal;
+    };
+}  PM4MapProcessMI200;
+static_assert(sizeof(PM4MapProcessMI200) == 80);
+
 typedef struct GEM5_PACKED
 {
     uint32_t function : 4;
diff --git a/src/dev/amdgpu/pm4_packet_processor.cc b/src/dev/amdgpu/pm4_packet_processor.cc
index 3690113ac4..e7b846529e 100644
--- a/src/dev/amdgpu/pm4_packet_processor.cc
+++ b/src/dev/amdgpu/pm4_packet_processor.cc
@@ -271,12 +271,21 @@ PM4PacketProcessor::decodeHeader(PM4Queue *q, PM4Header header)
                     dmaBuffer);
         } break;
       case IT_MAP_PROCESS: {
-        dmaBuffer = new PM4MapProcess();
-        cb = new DmaVirtCallback<uint64_t>(
-            [ = ] (const uint64_t &)
-                { mapProcess(q, (PM4MapProcess *)dmaBuffer); });
-        dmaReadVirt(getGARTAddr(q->rptr()), sizeof(PM4MapProcess), cb,
-                    dmaBuffer);
+        if (gpuDevice->getGfxVersion() == GfxVersion::gfx90a) {
+            dmaBuffer = new PM4MapProcessMI200();
+            cb = new DmaVirtCallback<uint64_t>(
+                [ = ] (const uint64_t &)
+                    { mapProcessGfx90a(q, (PM4MapProcessMI200 *)dmaBuffer); });
+            dmaReadVirt(getGARTAddr(q->rptr()), sizeof(PM4MapProcessMI200),
+                        cb, dmaBuffer);
+        } else {
+            dmaBuffer = new PM4MapProcess();
+            cb = new DmaVirtCallback<uint64_t>(
+                [ = ] (const uint64_t &)
+                    { mapProcessGfx9(q, (PM4MapProcess *)dmaBuffer); });
+            dmaReadVirt(getGARTAddr(q->rptr()), sizeof(PM4MapProcess), cb,
+                        dmaBuffer);
+        }
         } break;
 
       case IT_UNMAP_QUEUES: {
@@ -613,27 +622,50 @@ PM4PacketProcessor::doneMQDWrite(Addr mqdAddr, Addr addr) {
 }
 
 void
-PM4PacketProcessor::mapProcess(PM4Queue *q, PM4MapProcess *pkt)
+PM4PacketProcessor::mapProcess(uint32_t pasid, uint64_t ptBase,
+                               uint32_t shMemBases)
 {
-    q->incRptr(sizeof(PM4MapProcess));
-    uint16_t vmid = gpuDevice->allocateVMID(pkt->pasid);
+    uint16_t vmid = gpuDevice->allocateVMID(pasid);
 
-    DPRINTF(PM4PacketProcessor, "PM4 map_process pasid: %p vmid: %d quantum: "
-            "%d pt: %p signal: %p\n", pkt->pasid, vmid, pkt->processQuantum,
-            pkt->ptBase, pkt->completionSignal);
-
-    gpuDevice->getVM().setPageTableBase(vmid, pkt->ptBase);
-    gpuDevice->CP()->shader()->setHwReg(HW_REG_SH_MEM_BASES, pkt->shMemBases);
+    gpuDevice->getVM().setPageTableBase(vmid, ptBase);
+    gpuDevice->CP()->shader()->setHwReg(HW_REG_SH_MEM_BASES, shMemBases);
 
     // Setup the apertures that gem5 uses. These values are bits [63:48].
-    Addr lds_base = (Addr)bits(pkt->shMemBases, 31, 16) << 48;
-    Addr scratch_base = (Addr)bits(pkt->shMemBases, 15, 0) << 48;
+    Addr lds_base = (Addr)bits(shMemBases, 31, 16) << 48;
+    Addr scratch_base = (Addr)bits(shMemBases, 15, 0) << 48;
 
     // There does not seem to be any register for the limit, but the driver
     // assumes scratch and LDS have a 4GB aperture, so use that.
     gpuDevice->CP()->shader()->setLdsApe(lds_base, lds_base + 0xFFFFFFFF);
     gpuDevice->CP()->shader()->setScratchApe(scratch_base,
                                              scratch_base + 0xFFFFFFFF);
+}
+
+void
+PM4PacketProcessor::mapProcessGfx9(PM4Queue *q, PM4MapProcess *pkt)
+{
+    q->incRptr(sizeof(PM4MapProcess));
+
+    DPRINTF(PM4PacketProcessor, "PM4 map_process pasid: %p quantum: "
+            "%d pt: %p signal: %p\n", pkt->pasid, pkt->processQuantum,
+            pkt->ptBase, pkt->completionSignal);
+
+    mapProcess(pkt->pasid, pkt->ptBase, pkt->shMemBases);
+
+    delete pkt;
+    decodeNext(q);
+}
+
+void
+PM4PacketProcessor::mapProcessGfx90a(PM4Queue *q, PM4MapProcessMI200 *pkt)
+{
+    q->incRptr(sizeof(PM4MapProcessMI200));
+
+    DPRINTF(PM4PacketProcessor, "PM4 map_process pasid: %p quantum: "
+            "%d pt: %p signal: %p\n", pkt->pasid, pkt->processQuantum,
+            pkt->ptBase, pkt->completionSignal);
+
+    mapProcess(pkt->pasid, pkt->ptBase, pkt->shMemBases);
 
     delete pkt;
     decodeNext(q);
diff --git a/src/dev/amdgpu/pm4_packet_processor.hh b/src/dev/amdgpu/pm4_packet_processor.hh
index 4617a21a06..3fb055148c 100644
--- a/src/dev/amdgpu/pm4_packet_processor.hh
+++ b/src/dev/amdgpu/pm4_packet_processor.hh
@@ -141,7 +141,9 @@ class PM4PacketProcessor : public DmaVirtDevice
     void mapQueues(PM4Queue *q, PM4MapQueues *pkt);
     void unmapQueues(PM4Queue *q, PM4UnmapQueues *pkt);
     void doneMQDWrite(Addr mqdAddr, Addr addr);
-    void mapProcess(PM4Queue *q, PM4MapProcess *pkt);
+    void mapProcess(uint32_t pasid, uint64_t ptBase, uint32_t shMemBases);
+    void mapProcessGfx9(PM4Queue *q, PM4MapProcess *pkt);
+    void mapProcessGfx90a(PM4Queue *q, PM4MapProcessMI200 *pkt);
     void processMQD(PM4MapQueues *pkt, PM4Queue *q, Addr addr, QueueDesc *mqd,
                     uint16_t vmid);
     void processSDMAMQD(PM4MapQueues *pkt, PM4Queue *q, Addr addr,
diff --git a/src/gpu-compute/GPU.py b/src/gpu-compute/GPU.py
index 3a87186a30..c5449cc398 100644
--- a/src/gpu-compute/GPU.py
+++ b/src/gpu-compute/GPU.py
@@ -45,7 +45,7 @@ class PrefetchType(Enum):
 
 
 class GfxVersion(ScopedEnum):
-    vals = ["gfx801", "gfx803", "gfx900", "gfx902"]
+    vals = ["gfx801", "gfx803", "gfx900", "gfx902", "gfx908", "gfx90a"]
 
 
 class PoolManager(SimObject):
diff --git a/src/gpu-compute/gpu_command_processor.cc b/src/gpu-compute/gpu_command_processor.cc
index af59b7822b..9755180204 100644
--- a/src/gpu-compute/gpu_command_processor.cc
+++ b/src/gpu-compute/gpu_command_processor.cc
@@ -228,7 +228,8 @@ GPUCommandProcessor::submitDispatchPkt(void *raw_pkt, uint32_t queue_id,
     DPRINTF(GPUKernelInfo, "Kernel name: %s\n", kernel_name.c_str());
 
     HSAQueueEntry *task = new HSAQueueEntry(kernel_name, queue_id,
-        dynamic_task_id, raw_pkt, &akc, host_pkt_addr, machine_code_addr);
+        dynamic_task_id, raw_pkt, &akc, host_pkt_addr, machine_code_addr,
+        gpuDevice->getGfxVersion());
 
     DPRINTF(GPUCommandProc, "Task ID: %i Got AQL: wg size (%dx%dx%d), "
         "grid size (%dx%dx%d) kernarg addr: %#x, completion "
diff --git a/src/gpu-compute/hsa_queue_entry.hh b/src/gpu-compute/hsa_queue_entry.hh
index fbe0efef21..4083c1c85a 100644
--- a/src/gpu-compute/hsa_queue_entry.hh
+++ b/src/gpu-compute/hsa_queue_entry.hh
@@ -51,6 +51,7 @@
 #include "base/types.hh"
 #include "dev/hsa/hsa_packet.hh"
 #include "dev/hsa/hsa_queue.hh"
+#include "enums/GfxVersion.hh"
 #include "gpu-compute/kernel_code.hh"
 
 namespace gem5
@@ -61,7 +62,7 @@ class HSAQueueEntry
   public:
     HSAQueueEntry(std::string kernel_name, uint32_t queue_id,
                   int dispatch_id, void *disp_pkt, AMDKernelCode *akc,
-                  Addr host_pkt_addr, Addr code_addr)
+                  Addr host_pkt_addr, Addr code_addr, GfxVersion gfx_version)
         : kernName(kernel_name),
           _wgSize{{(int)((_hsa_dispatch_packet_t*)disp_pkt)->workgroup_size_x,
                   (int)((_hsa_dispatch_packet_t*)disp_pkt)->workgroup_size_y,
@@ -92,9 +93,19 @@ class HSAQueueEntry
         // we need to rip register usage from the resource registers.
         //
         // We can't get an exact number of registers from the resource
-        // registers because they round, but we can get an upper bound on it
-        if (!numVgprs)
-            numVgprs = (akc->granulated_workitem_vgpr_count + 1) * 4;
+        // registers because they round, but we can get an upper bound on it.
+        // We determine the number of registers by solving for "vgprs_used"
+        // in the LLVM docs: https://www.llvm.org/docs/AMDGPUUsage.html
+        //     #code-object-v3-kernel-descriptor
+        // Currently, the only supported gfx version in gem5 that computes
+        // this differently is gfx90a.
+        if (!numVgprs) {
+            if (gfx_version == GfxVersion::gfx90a) {
+                numVgprs = (akc->granulated_workitem_vgpr_count + 1) * 8;
+            } else {
+                numVgprs = (akc->granulated_workitem_vgpr_count + 1) * 4;
+            }
+        }
 
         if (!numSgprs || numSgprs ==
             std::numeric_limits<decltype(akc->wavefront_sgpr_count)>::max()) {

From 87ec6919a39083c9e54eb686c235316c2b2062d3 Mon Sep 17 00:00:00 2001
From: Matthew Poremba <matthew.poremba@amd.com>
Date: Wed, 19 Apr 2023 15:57:47 -0500
Subject: [PATCH 449/492] mem: Handle DRAM write queue drain and disabled power
 down

Write queue drain logic seems off currently. An event is scheduled if
the write queue is empty instead of non-empty. There is no check to see
if draining is complete when bus is in write mode. Finally the power
down check on drain always fails if DRAM powerdown is disabled.

This changeset reverses the drain conditional for the write queue to
schedule an event if the write queue is *not* empty and checks in the
event processing method that the queues are all empty so that
signalDrainDone can be called. Lastly the powerdown state is ignored if
DRAM powerdown is disabled. Powerdown is disabled in the GPU_VIPER
protocol by default. This changeset successfully drains and checkpoints
a GPUFS simulation using GPU_VIPER protocol.

Change-Id: I5459856a694c9054b28677049a06b99b9ad91bbb
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/69917
Tested-by: kokoro <noreply+kokoro@google.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
---
 src/mem/dram_interface.hh | 13 ++++++++++++-
 src/mem/mem_ctrl.cc       | 14 +++++++++++---
 2 files changed, 23 insertions(+), 4 deletions(-)

diff --git a/src/mem/dram_interface.hh b/src/mem/dram_interface.hh
index fa9d319a0c..e20e33faf9 100644
--- a/src/mem/dram_interface.hh
+++ b/src/mem/dram_interface.hh
@@ -380,7 +380,18 @@ class DRAMInterface : public MemInterface
          * @param Return true if the rank is idle from a bank
          *        and power point of view
          */
-        bool inPwrIdleState() const { return pwrState == PWR_IDLE; }
+        bool
+        inPwrIdleState() const
+        {
+            // If powerdown is not enabled, then the ranks never go to idle
+            // states. In that case return true here to prevent checkpointing
+            // from getting stuck waiting for DRAM to be idle.
+            if (!dram.enableDRAMPowerdown) {
+                return true;
+            }
+
+            return pwrState == PWR_IDLE;
+        }
 
         /**
          * Trigger a self-refresh exit if there are entries enqueued
diff --git a/src/mem/mem_ctrl.cc b/src/mem/mem_ctrl.cc
index 543d6373d9..290db3ebe7 100644
--- a/src/mem/mem_ctrl.cc
+++ b/src/mem/mem_ctrl.cc
@@ -908,6 +908,13 @@ MemCtrl::processNextReqEvent(MemInterface* mem_intr,
         }
     }
 
+    if (drainState() == DrainState::Draining && !totalWriteQueueSize &&
+        !totalReadQueueSize && respQEmpty() && allIntfDrained()) {
+
+        DPRINTF(Drain, "MemCtrl controller done draining\n");
+        signalDrainDone();
+    }
+
     // updates current state
     busState = busStateNext;
 
@@ -1411,8 +1418,8 @@ MemCtrl::drain()
 {
     // if there is anything in any of our internal queues, keep track
     // of that as well
-    if (!(!totalWriteQueueSize && !totalReadQueueSize && respQueue.empty() &&
-          allIntfDrained())) {
+    if (totalWriteQueueSize || totalReadQueueSize || !respQueue.empty() ||
+          !allIntfDrained()) {
 
         DPRINTF(Drain, "Memory controller not drained, write: %d, read: %d,"
                 " resp: %d\n", totalWriteQueueSize, totalReadQueueSize,
@@ -1420,7 +1427,8 @@ MemCtrl::drain()
 
         // the only queue that is not drained automatically over time
         // is the write queue, thus kick things into action if needed
-        if (!totalWriteQueueSize && !nextReqEvent.scheduled()) {
+        if (totalWriteQueueSize && !nextReqEvent.scheduled()) {
+            DPRINTF(Drain,"Scheduling nextReqEvent from drain\n");
             schedule(nextReqEvent, curTick());
         }
 

From 94a629b527174676fba1b04af5df0f51ed27ad15 Mon Sep 17 00:00:00 2001
From: Richard Cooper <richard.cooper@arm.com>
Date: Fri, 21 Aug 2020 20:11:31 +0100
Subject: [PATCH 450/492] arch-arm: Add support for Arm SVE fmmla instruction.

Add support for the Arm SVE Floating Point Matrix Multiply-Accumulate
(FMMLA) instruction. Both 32-bit element (single precision) and 64-bit
element (double precision) encodings are implemented, but because the
associated required instructions (LD1RO*, etc) have not yet been
implemented, the SVE Feature ID register 0 (ID_AA64ZFR0_EL1) has only
been updated to indicate 32-bit element support at this time.

For more information please refer to the "ARM Architecture Reference
Manual Supplement - The Scalable Vector Extension (SVE), for ARMv8-A"
(https://developer.arm.com/architectures/cpu-architecture/a-profile/
docs/arm-architecture-reference-manual-supplement-armv8-a)

Additional Contributors: Giacomo Travaglini

Change-Id: If3547378ffa48527fe540767399bcc37a5dab524
Reviewed-by: Richard Cooper <richard.cooper@arm.com>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/70726
Reviewed-by: Andreas Sandberg <andreas.sandberg@arm.com>
Maintainer: Andreas Sandberg <andreas.sandberg@arm.com>
Maintainer: Giacomo Travaglini <giacomo.travaglini@arm.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Giacomo Travaglini <giacomo.travaglini@arm.com>
---
 src/arch/arm/ArmISA.py                      |   1 +
 src/arch/arm/ArmSystem.py                   |   3 +
 src/arch/arm/insts/sve.hh                   |   2 +-
 src/arch/arm/insts/vector_element_traits.hh |  73 ++++++++++++
 src/arch/arm/isa/formats/sve_2nd_level.isa  |  25 ++++-
 src/arch/arm/isa/formats/sve_top_level.isa  |   8 +-
 src/arch/arm/isa/includes.isa               |   1 +
 src/arch/arm/isa/insts/sve.isa              | 117 +++++++++++++++++++-
 src/arch/arm/isa/operands.isa               |   2 +
 src/arch/arm/isa/templates/sve.isa          |  58 +++++++++-
 src/arch/arm/process.cc                     |   3 +
 src/arch/arm/regs/misc.cc                   |   5 +
 12 files changed, 291 insertions(+), 7 deletions(-)
 create mode 100644 src/arch/arm/insts/vector_element_traits.hh

diff --git a/src/arch/arm/ArmISA.py b/src/arch/arm/ArmISA.py
index 37970dce83..31ecbcbd15 100644
--- a/src/arch/arm/ArmISA.py
+++ b/src/arch/arm/ArmISA.py
@@ -53,6 +53,7 @@ class ArmDefaultSERelease(ArmRelease):
         "FEAT_LSE",
         "FEAT_RDM",
         # Armv8.2
+        "FEAT_F32MM",
         "FEAT_SVE",
         # Armv8.3
         "FEAT_FCMA",
diff --git a/src/arch/arm/ArmSystem.py b/src/arch/arm/ArmSystem.py
index c1f5e9fd10..5a7ae799b7 100644
--- a/src/arch/arm/ArmSystem.py
+++ b/src/arch/arm/ArmSystem.py
@@ -78,6 +78,7 @@ class ArmExtension(ScopedEnum):
         "FEAT_UAO",
         "FEAT_LVA",  # Optional in Armv8.2
         "FEAT_LPA",  # Optional in Armv8.2
+        "FEAT_F32MM",  # Optional in Armv8.2
         # Armv8.3
         "FEAT_FCMA",
         "FEAT_JSCVT",
@@ -163,6 +164,7 @@ class ArmDefaultRelease(Armv8):
         "FEAT_LVA",
         "FEAT_LPA",
         "FEAT_SVE",
+        "FEAT_F32MM",
         # Armv8.3
         "FEAT_FCMA",
         "FEAT_JSCVT",
@@ -196,6 +198,7 @@ class Armv82(Armv81):
         "FEAT_LVA",
         "FEAT_LPA",
         "FEAT_SVE",
+        "FEAT_F32MM",
     ]
 
 
diff --git a/src/arch/arm/insts/sve.hh b/src/arch/arm/insts/sve.hh
index de1163ee81..dc18ff30a7 100644
--- a/src/arch/arm/insts/sve.hh
+++ b/src/arch/arm/insts/sve.hh
@@ -498,7 +498,7 @@ class SveTerPredOp : public ArmStaticInst
             Addr pc, const loader::SymbolTable *symtab) const override;
 };
 
-///SVE2 Accumulate instructions
+/// Ternary, destructive, unpredicated SVE instruction.
 class SveTerUnpredOp : public ArmStaticInst
 {
   protected:
diff --git a/src/arch/arm/insts/vector_element_traits.hh b/src/arch/arm/insts/vector_element_traits.hh
new file mode 100644
index 0000000000..3495bef2f9
--- /dev/null
+++ b/src/arch/arm/insts/vector_element_traits.hh
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2020 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __ARCH_ARM_VECTOR_ELEMENT_TRAITS_HH__
+#define __ARCH_ARM_VECTOR_ELEMENT_TRAITS_HH__
+
+#include <type_traits>
+
+namespace gem5 {
+namespace ArmISA {
+namespace vector_element_traits {
+
+
+// Make an integral type with the size of IntDestElemType but the
+// signed-ness of IntSrcElemType. The size of IntDestElemType must be
+// greater than or equal to the size of IntSrcElemType.
+template<typename IntDestElemType,
+         typename IntSrcElemType>
+class extend_element
+{
+  public:
+    static_assert(std::is_integral<IntDestElemType>::value
+                  && std::is_integral<IntSrcElemType>::value
+                  && sizeof(IntDestElemType) >= sizeof(IntSrcElemType),
+                  "Extended Element Dest and Src types must both be "
+                  "integer types, and Dest must be at least as large "
+                  "as Src.");
+    using type = typename std::conditional<
+        std::is_signed<IntSrcElemType>::value,
+        typename std::make_signed<IntDestElemType>::type,
+        typename std::make_unsigned<IntDestElemType>::type>::type;
+};
+
+
+} // namespace vector_element_traits
+} // namespace ArmISA
+} // namespace gem5
+
+#endif // __ARCH_ARM_VECTOR_ELEMENT_TRAITS_HH__
diff --git a/src/arch/arm/isa/formats/sve_2nd_level.isa b/src/arch/arm/isa/formats/sve_2nd_level.isa
index 4281eeb632..440722ac72 100644
--- a/src/arch/arm/isa/formats/sve_2nd_level.isa
+++ b/src/arch/arm/isa/formats/sve_2nd_level.isa
@@ -1,4 +1,4 @@
-// Copyright (c) 2017-2019 ARM Limited
+// Copyright (c) 2017-2020 ARM Limited
 // All rights reserved
 //
 // The license below extends only to copyright in the software and shall
@@ -2882,6 +2882,29 @@ namespace Aarch64
         return new Unknown64(machInst);
     }  // decodeSveFpFusedMulAdd
 
+    StaticInstPtr
+    decodeSveFpFusedMatMulAdd(ExtMachInst machInst)
+    {
+        RegIndex zda = (RegIndex) (uint8_t) bits(machInst, 4, 0);
+        RegIndex zn = (RegIndex) (uint8_t) bits(machInst, 9, 5);
+        RegIndex zm = (RegIndex) (uint8_t) bits(machInst, 20, 16);
+
+        uint8_t size = bits(machInst, 23, 22);
+        switch (size) {
+          case 0x1:
+            // BFMMLA goes here when implemented.
+            return new Unknown64(machInst);
+          case 0x2:
+            return new SveFmmla<uint32_t,uint32_t,uint32_t>(
+                machInst, zda, zn, zm);
+          case 0x3:
+            return new SveFmmla<uint64_t,uint64_t,uint64_t>(
+                machInst, zda, zn, zm);
+          default:
+            return new Unknown64(machInst);
+        }
+    }  // decodeSveFpFusedMatMulAdd
+
     StaticInstPtr
     decodeSveFpCplxAdd(ExtMachInst machInst)
     {
diff --git a/src/arch/arm/isa/formats/sve_top_level.isa b/src/arch/arm/isa/formats/sve_top_level.isa
index 41861a87bc..b0579fba04 100644
--- a/src/arch/arm/isa/formats/sve_top_level.isa
+++ b/src/arch/arm/isa/formats/sve_top_level.isa
@@ -1,4 +1,4 @@
-// Copyright (c) 2017-2019 ARM Limited
+// Copyright (c) 2017-2020 ARM Limited
 // All rights reserved
 //
 // The license below extends only to copyright in the software and shall
@@ -83,6 +83,7 @@ namespace Aarch64
     StaticInstPtr decodeSveFpUnaryPred(ExtMachInst machInst);
     StaticInstPtr decodeSveFpCmpVec(ExtMachInst machInst);
     StaticInstPtr decodeSveFpFusedMulAdd(ExtMachInst machInst);
+    StaticInstPtr decodeSveFpFusedMatMulAdd(ExtMachInst machInst);
     StaticInstPtr decodeSveFpCplxAdd(ExtMachInst machInst);
     StaticInstPtr decodeSveFpCplxMulAddVec(ExtMachInst machInst);
     StaticInstPtr decodeSveFpMulAddIndexed(ExtMachInst machInst);
@@ -269,9 +270,10 @@ namespace Aarch64
               case 0:
                 return decodeSveFpMulAddIndexed(machInst);
               case 4:
-                if (!bits(machInst, 10))
+                if (bits(machInst, 10))
+                   return decodeSveFpFusedMatMulAdd(machInst);
+                else
                     return decodeSveFpMulIndexed(machInst);
-                [[fallthrough]];
               default:
                 return new Unknown64(machInst);
             }
diff --git a/src/arch/arm/isa/includes.isa b/src/arch/arm/isa/includes.isa
index e2534a6728..cde035a563 100644
--- a/src/arch/arm/isa/includes.isa
+++ b/src/arch/arm/isa/includes.isa
@@ -66,6 +66,7 @@ output header {{
 #include "arch/arm/insts/sve.hh"
 #include "arch/arm/insts/sve_mem.hh"
 #include "arch/arm/insts/tme64.hh"
+#include "arch/arm/insts/vector_element_traits.hh"
 #include "arch/arm/insts/vfp.hh"
 #include "enums/DecoderFlavor.hh"
 #include "mem/packet.hh"
diff --git a/src/arch/arm/isa/insts/sve.isa b/src/arch/arm/isa/insts/sve.isa
index 4c10cd6443..74eacb8db7 100644
--- a/src/arch/arm/isa/insts/sve.isa
+++ b/src/arch/arm/isa/insts/sve.isa
@@ -1,4 +1,4 @@
-// Copyright (c) 2017-2019 ARM Limited
+// Copyright (c) 2017-2020 ARM Limited
 // All rights reserved
 //
 // The license below extends only to copyright in the software and shall
@@ -2188,6 +2188,111 @@ let {{
                          'class_name' : 'Sve' + Name}
             exec_output += SveOpExecDeclare.subst(substDict)
 
+    # Generates definitions for ternary destructive SVE Matrix
+    # Multiplication instructions (not predicated)
+    #
+    # `type_specs` can either be a sequence of types for cases where
+    # the dest and source matrices have the same element types, or a
+    # sequence of 3-tuples for case where the dest and source matrices
+    # have differnet element types.
+    #
+    # The calculation Z = Z + A x B is performed for full matrices
+    # Z (numDestRows x numDestCols), A (numDestRows x K), and
+    # B(K x numDestCols), and remaining elemnts of Z are set to zero.
+    # The vector length must be large enough for one full matrix or
+    # an UndefinedInstruction Fault is generated.
+    #
+    def sveMatMulInst(name, Name, opClass, type_specs,
+                      numDestRows, numDestCols, K,
+                      elt_mul_op):
+        global header_output, exec_output
+        code = sveEnabledCheckCode + '''
+        // Types of the extended versions of the source elements.
+        // Required to make sure the itermediate calculations don't overflow.
+        using ExtendedElementA = typename vector_element_traits::
+                                   extend_element<DestElement,
+                                                  SrcElementA>::type;
+        using ExtendedElementB = typename vector_element_traits::
+                                   extend_element<DestElement,
+                                                  SrcElementB>::type;
+
+        // Element count of destination vector
+        unsigned eCount = ArmStaticInst::getCurSveVecLen<DestElement>(
+                xc->tcBase());
+
+        // SVE Matrix operations require that there are at least 4
+        // elements (one full matrix). Further matrices may be partial,
+        // in which case the trailing dest elements are filled with zeros.
+        if (eCount < 4) {
+            return std::make_shared<UndefinedInstruction>(machInst, false,
+                                                          "%(mnemonic)s");
+        }
+
+        // Some properties of the source and dest matrix dimensions
+        //   ( numDestRows x numDestCols ) <- (numDestRows x K) .
+        //                                        (K x numDestCols)
+        constexpr unsigned numDestRows = %(numDestRows)d;
+        constexpr unsigned numDestCols = %(numDestCols)d;
+        constexpr unsigned K = %(K)d;
+
+        constexpr unsigned eltsPerDestMatrix = numDestRows * numDestCols;
+        constexpr unsigned eltsPerSrcAMatrix = numDestRows * K;
+        constexpr unsigned eltsPerSrcBMatrix = K * numDestCols;
+
+        // Number of full matrices - there may be some elements left over
+        const unsigned mCount = eCount / eltsPerDestMatrix;
+
+        // Calculate z_ij = Sum[k=1..K](a_ik * b_kj)
+
+        unsigned zEltIdx = 0; // Index of the result element being produced
+        unsigned aMatIdx = 0; // Index of the first element of the A matrix
+        unsigned bMatIdx = 0; // Index of the first element of the B matrix
+        for (unsigned matIdx = 0; matIdx < mCount; ++matIdx) {
+            for (unsigned rowIdx = 0; rowIdx < numDestRows; ++rowIdx) {
+                for (unsigned colIdx = 0; colIdx < numDestCols; ++colIdx) {
+                    DestElement destElem =
+                        static_cast<DestElement>(AA64FpDestMerge_x[zEltIdx]);
+                    for (unsigned k = 0; k < K; ++k) {
+                        const ExtendedElementA srcElemA =
+                            static_cast<ExtendedElementA>
+                                                (AA64FpOp1_srcA[aMatIdx + K * rowIdx + k]);
+                        const ExtendedElementB srcElemB =
+                            static_cast<ExtendedElementB>
+                                                (AA64FpOp2_srcB[bMatIdx + K * colIdx + k]);
+
+                        // Do the math operation. Should be of form:
+                        //   destElem += f(destElem, srcElemA, srcElemB);
+                        %(elt_mul_op)s;
+                    }
+                    AA64FpDest_x[zEltIdx++] = destElem;
+                }
+            }
+            aMatIdx += eltsPerSrcAMatrix;
+            bMatIdx += eltsPerSrcBMatrix;
+        }
+
+        // Zero-fill any trailing elements
+        for (unsigned i = mCount * eltsPerDestMatrix; i < eCount; ++i) {
+            AA64FpDest_x[i] = static_cast<DestElement>(0);
+        }
+        ''' % {'elt_mul_op': elt_mul_op, 'mnemonic': name,
+               'numDestRows': numDestRows, 'numDestCols': numDestCols,
+               'K': K}
+        iop = InstObjParams(name, 'Sve' + Name, 'SveTerUnpredOp',
+                            {'code': code, 'op_class': opClass}, [])
+        header_output += SveMatMulOpDeclare.subst(iop)
+        exec_output += SveMatMulOpExecute.subst(iop)
+        for type_spec in type_specs:
+            try:
+                destEltType, srcEltAType, srcEltBType = type_spec
+            except ValueError:
+                destEltType, srcEltAType, srcEltBType = (type_spec,) * 3
+            substDict = {'destEltType': destEltType,
+                         'srcEltAType': srcEltAType,
+                         'srcEltBType': srcEltBType,
+                         'class_name': 'Sve' + Name}
+            exec_output += SveMatMulOpExecDeclare.subst(substDict)
+
     # Generates definitions for PTRUE and PTRUES instructions.
     def svePtrueInst(name, Name, opClass, types, isFlagSetting=False,
                      decoder='Generic'):
@@ -3822,6 +3927,16 @@ let {{
     # FMLS (indexed)
     sveTerIdxInst('fmls', 'FmlsIdx', 'SimdFloatMultAccOp', floatTypes,
                   fmlsCode, PredType.MERGE)
+
+    fmmlaCode = fpOp % '''
+        fplibAdd<DestElement>(destElem,
+            fplibMul<DestElement>(srcElemA, srcElemB, fpscr), fpscr);
+    '''
+    # FMMLA (vectors)
+    sveMatMulInst('fmmla', 'Fmmla', 'SimdFloatMultAccOp', floatTypes,
+                  numDestRows=2, numDestCols=2, K=2,
+                  elt_mul_op=fmmlaCode)
+
     # FMLS (vectors)
     sveTerInst('fmls', 'Fmls', 'SimdFloatMultAccOp', floatTypes, fmlsCode,
                PredType.MERGE)
diff --git a/src/arch/arm/isa/operands.isa b/src/arch/arm/isa/operands.isa
index 24a0af9155..5bba00f138 100644
--- a/src/arch/arm/isa/operands.isa
+++ b/src/arch/arm/isa/operands.isa
@@ -60,6 +60,8 @@ def operand_types {{
     'xs1' : 'TPS1Elem',
     'xs2' : 'TPS2Elem',
     'xd' : 'TPDElem',
+    'srcA' : 'TPSrcAElem',
+    'srcB' : 'TPSrcBElem',
     'pc' : 'ArmISA::VecPredRegContainer',
     'pb' : 'uint8_t'
 }};
diff --git a/src/arch/arm/isa/templates/sve.isa b/src/arch/arm/isa/templates/sve.isa
index 886fd7a0b2..65abb1bcbb 100644
--- a/src/arch/arm/isa/templates/sve.isa
+++ b/src/arch/arm/isa/templates/sve.isa
@@ -1,4 +1,4 @@
-// Copyright (c) 2018-2019 ARM Limited
+// Copyright (c) 2018-2020 ARM Limited
 // All rights reserved
 //
 // The license below extends only to copyright in the software and shall
@@ -515,6 +515,33 @@ class %(class_name)s : public %(base_class)s
 };
 }};
 
+def template SveMatMulOpDeclare {{
+template <typename DestElement,
+          typename SrcElementA,
+          typename SrcElementB>
+class %(class_name)s : public %(base_class)s
+{
+  private:
+    %(reg_idx_arr_decl)s;
+  protected:
+    typedef DestElement TPElem;
+    typedef SrcElementA TPSrcAElem;
+    typedef SrcElementB TPSrcBElem;
+  public:
+    // Constructor
+    %(class_name)s(ExtMachInst machInst,
+                   RegIndex _dest, RegIndex _op1, RegIndex _op2)
+        : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s,
+                         _dest, _op1, _op2)
+    {
+        %(set_reg_idx_arr)s;
+        %(constructor)s;
+    }
+
+    Fault execute(ExecContext *, trace::InstRecord *) const override;
+};
+}};
+
 def template SveTerImmUnpredOpDeclare {{
 template <class _Element>
 class %(class_name)s : public %(base_class)s
@@ -1310,3 +1337,32 @@ def template SveOpExecDeclare {{
     Fault %(class_name)s<%(targs)s>::execute(
             ExecContext *, trace::InstRecord *) const;
 }};
+
+def template SveMatMulOpExecute {{
+    template <typename DestElement,
+              typename SrcElementA,
+              typename SrcElementB>
+    Fault %(class_name)s<DestElement,SrcElementA,SrcElementB>::execute(
+            ExecContext *xc,
+            trace::InstRecord *traceData) const
+    {
+        Fault fault = NoFault;
+        %(op_decl)s;
+        %(op_rd)s;
+
+        %(code)s;
+        if (fault == NoFault)
+        {
+            %(op_wb)s;
+        }
+
+        return fault;
+    }
+}};
+
+def template SveMatMulOpExecDeclare {{
+    template
+    Fault
+    %(class_name)s<%(destEltType)s,%(srcEltAType)s,%(srcEltBType)s>
+    ::execute(ExecContext *, trace::InstRecord *) const;
+}};
diff --git a/src/arch/arm/process.cc b/src/arch/arm/process.cc
index fda9415356..24e1250da9 100644
--- a/src/arch/arm/process.cc
+++ b/src/arch/arm/process.cc
@@ -320,6 +320,9 @@ ArmProcess64::armHwcapImpl2() const
     hwcap |= (isa_r0.ts >= 2) ? Arm_Flagm2 : Arm_None;
     hwcap |= (isa_r0.rndr >= 1) ? Arm_Rng : Arm_None;
 
+    const AA64ZFR0 zf_r0 = tc->readMiscReg(MISCREG_ID_AA64ZFR0_EL1);
+    hwcap |= (zf_r0.f32mm >= 1) ? Arm_Svef32mm : Arm_None;
+
     return hwcap;
 }
 
diff --git a/src/arch/arm/regs/misc.cc b/src/arch/arm/regs/misc.cc
index 53e92687b9..8925bc00d6 100644
--- a/src/arch/arm/regs/misc.cc
+++ b/src/arch/arm/regs/misc.cc
@@ -5403,6 +5403,11 @@ ISA::initializeMiscRegMetadata()
 
     // SVE
     InitReg(MISCREG_ID_AA64ZFR0_EL1)
+        .reset([this](){
+            AA64ZFR0 zfr0_el1 = 0;
+            zfr0_el1.f32mm = release->has(ArmExtension::FEAT_F32MM) ? 1 : 0;
+            return zfr0_el1;
+        }())
         .faultRead(EL0, faultIdst)
         .faultRead(EL1, HCR_TRAP(tid3))
         .allPrivileges().exceptUserMode().writes(0);

From 19e802304335d78956a2cdaa965ccf010c9152fb Mon Sep 17 00:00:00 2001
From: Richard Cooper <richard.cooper@arm.com>
Date: Mon, 14 Sep 2020 18:55:09 +0100
Subject: [PATCH 451/492] arch-arm: Support Arm SVE Load-Broadcast Octaword
 instructions.

Add support for the Arm SVE Load-Broadcast Octaword (LD1RO{B,H,W,D})
instructions. These are similar to the Load-Broadcast
Quadword (LD1RQ{B,H,W,D}) instructions, but work on a 32-byte memory
segment rather than a 16-byte memory segment. Consequently, the LD1ROx
implementations build on the code for the LD1RQx implementations.

For more information please refer to the "ARM Architecture Reference
Manual Supplement - The Scalable Vector Extension (SVE), for ARMv8-A"
(https://developer.arm.com/architectures/cpu-architecture/a-profile/
docs/arm-architecture-reference-manual-supplement-armv8-a)

Change-Id: I98ee4f56c8099bf40c9034baa488d318ae57d3aa
Reviewed-by: Richard Cooper <richard.cooper@arm.com>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/70727
Reviewed-by: Andreas Sandberg <andreas.sandberg@arm.com>
Maintainer: Andreas Sandberg <andreas.sandberg@arm.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/arch/arm/isa/formats/sve_2nd_level.isa | 88 +++++++++++++++-------
 src/arch/arm/isa/insts/sve_mem.isa         | 75 ++++++++++++------
 2 files changed, 112 insertions(+), 51 deletions(-)

diff --git a/src/arch/arm/isa/formats/sve_2nd_level.isa b/src/arch/arm/isa/formats/sve_2nd_level.isa
index 440722ac72..f74181a062 100644
--- a/src/arch/arm/isa/formats/sve_2nd_level.isa
+++ b/src/arch/arm/isa/formats/sve_2nd_level.isa
@@ -3219,66 +3219,96 @@ namespace Aarch64
     }  // decodeSveMemGather32
 
     StaticInstPtr
-    decodeSveLoadBcastQuadSS(ExtMachInst machInst)
+    decodeSveLoadBcastMultiSS(ExtMachInst machInst)
     {
-        uint8_t num = bits(machInst, 22, 21);
-        if (num != 0x00) {
-            return new Unknown64(machInst);
-        }
-
         RegIndex zt = (RegIndex)(uint8_t) bits(machInst, 4, 0);
         RegIndex rn = makeSP((RegIndex)(uint8_t) bits(machInst, 9, 5));
         RegIndex pg = (RegIndex)(uint8_t) bits(machInst, 12, 10);
         RegIndex rm = (RegIndex)(uint8_t) bits(machInst, 20, 16);
-        uint8_t msz = bits(machInst, 24, 23);
-        switch (msz) {
-            case 0:
+
+        uint8_t msz_esz = bits(machInst, 24, 21);
+
+        switch (msz_esz) {
+            // Load-Broadcast Quad-word Variants
+            case 0b0000: // 0x0:
                 return new SveLd1RqSS<uint8_t, uint8_t>("ld1rqb",
                         machInst, zt, pg, rn, rm);
-            case 1:
-                return new SveLd1RqSS<uint16_t, uint16_t>("ld1rqh",
+            case 0b0100: // 0x4:
+                 return new SveLd1RqSS<uint16_t, uint16_t>("ld1rqh",
                         machInst, zt, pg, rn, rm);
-            case 2:
+            case 0b1000: // 0x8:
                 return new SveLd1RqSS<uint32_t, uint32_t>("ld1rqw",
                         machInst, zt, pg, rn, rm);
-            case 3:
+            case 0b1100: // 0xc:
                 return new SveLd1RqSS<uint64_t, uint64_t>("ld1rqd",
                         machInst, zt, pg, rn, rm);
+
+            // Load-Broadcast Octa-word Variants
+            case 0b0001: // 0x1:
+                return new SveLd1RoSS<uint8_t, uint8_t>("ld1rob",
+                        machInst, zt, pg, rn, rm);
+            case 0b0101: // 0x5:
+                return new SveLd1RoSS<uint16_t, uint16_t>("ld1roh",
+                        machInst, zt, pg, rn, rm);
+            case 0b1001: // 0x9:
+                return new SveLd1RoSS<uint32_t, uint32_t>("ld1row",
+                        machInst, zt, pg, rn, rm);
+            case 0b1101: // 0xd:
+                return new SveLd1RoSS<uint64_t, uint64_t>("ld1rod",
+                        machInst, zt, pg, rn, rm);
+
+            default:
+              return new Unknown64(machInst);
         }
 
         return new Unknown64(machInst);
-    }  // decodeSveLoadBcastQuadSS
+    }  // decodeSveLoadBcastMultiSS
 
     StaticInstPtr
-    decodeSveLoadBcastQuadSI(ExtMachInst machInst)
+    decodeSveLoadBcastMultiSI(ExtMachInst machInst)
     {
-        uint8_t num = bits(machInst, 22, 21);
-        if (num != 0x00) {
-            return new Unknown64(machInst);
-        }
-
         RegIndex zt = (RegIndex)(uint8_t) bits(machInst, 4, 0);
         RegIndex rn = makeSP((RegIndex)(uint8_t) bits(machInst, 9, 5));
         RegIndex pg = (RegIndex)(uint8_t) bits(machInst, 12, 10);
         uint64_t imm = sext<4>(bits(machInst, 19, 16));
-        uint8_t msz = bits(machInst, 24, 23);
-        switch (msz) {
-            case 0:
+
+        uint8_t msz_esz = bits(machInst, 24, 21);
+
+        switch (msz_esz) {
+            // Load-Broadcast Quad-word Variants
+            case 0b0000: // 0x0:
                 return new SveLd1RqSI<uint8_t, uint8_t>("ld1rqb",
                         machInst, zt, pg, rn, imm);
-            case 1:
+            case 0b0100: // 0x4:
                 return new SveLd1RqSI<uint16_t, uint16_t>("ld1rqh",
                         machInst, zt, pg, rn, imm);
-            case 2:
+            case 0b1000: // 0x8:
                 return new SveLd1RqSI<uint32_t, uint32_t>("ld1rqw",
                         machInst, zt, pg, rn, imm);
-            case 3:
+            case 0b1100: // 0xc:
                 return new SveLd1RqSI<uint64_t, uint64_t>("ld1rqd",
                         machInst, zt, pg, rn, imm);
+
+            // Load-Broadcast Octa-word Variants
+            case 0b0001: // 0x1:
+                return new SveLd1RoSI<uint8_t, uint8_t>("ld1rob",
+                        machInst, zt, pg, rn, imm);
+            case 0b0101: // 0x5:
+                return new SveLd1RoSI<uint16_t, uint16_t>("ld1roh",
+                        machInst, zt, pg, rn, imm);
+            case 0b1001: // 0x9:
+                return new SveLd1RoSI<uint32_t, uint32_t>("ld1row",
+                        machInst, zt, pg, rn, imm);
+            case 0b1101: // 0xd:
+                return new SveLd1RoSI<uint64_t, uint64_t>("ld1rod",
+                        machInst, zt, pg, rn, imm);
+
+            default:
+              return new Unknown64(machInst);
         }
 
         return new Unknown64(machInst);
-    }  // decodeSveLoadBcastQuadSI
+    }  // decodeSveLoadBcastMultiSI
 
     StaticInstPtr
     decodeSveContigLoadSS(ExtMachInst machInst)
@@ -3388,10 +3418,10 @@ namespace Aarch64
     {
         switch (bits(machInst, 15, 13)) {
           case 0x0:
-            return decodeSveLoadBcastQuadSS(machInst);
+            return decodeSveLoadBcastMultiSS(machInst);
           case 0x1:
             if (bits(machInst, 20) == 0x0) {
-                return decodeSveLoadBcastQuadSI(machInst);
+                return decodeSveLoadBcastMultiSI(machInst);
             }
             break;
           case 0x2:
diff --git a/src/arch/arm/isa/insts/sve_mem.isa b/src/arch/arm/isa/insts/sve_mem.isa
index 8a73d131ce..bece3689b4 100644
--- a/src/arch/arm/isa/insts/sve_mem.isa
+++ b/src/arch/arm/isa/insts/sve_mem.isa
@@ -1,4 +1,4 @@
-// Copyright (c) 2017-2019 ARM Limited
+// Copyright (c) 2017-2020 ARM Limited
 // All rights reserved
 //
 // The license below extends only to copyright in the software and shall
@@ -1480,20 +1480,33 @@ let {{
             exec_output += SveStructMemExecDeclare.subst(substDict)
 
     # Generates definitions for SVE load-and-replicate quadword instructions
-    def emitSveLoadAndReplQuad(offsetIsImm):
+    def emitSveLoadAndReplMulti(offsetIsImm, numQwordSegments):
         global header_output, exec_output, decoders
+        assert(numQwordSegments in (1, 2)) # Quadword or Octaword
+        from collections import namedtuple
+        InstConfig = namedtuple("_InstConfig", "mnemonic classname baseclass")
+        INST_CONFIGURATIONS = {
+            # (offsetIsImm, numQwordSegments) -> InstConfig Recors
+            (True, 1): InstConfig("ld1rq", "SveLd1RqSI", "SveContigMemSI"),
+            (False, 1): InstConfig("ld1rq", "SveLd1RqSS", "SveContigMemSS"),
+            (True, 2): InstConfig("ld1ro", "SveLd1RoSI", "SveContigMemSI"),
+            (False, 2): InstConfig("ld1ro", "SveLd1RoSS", "SveContigMemSS"),
+        }
+        inst_config = INST_CONFIGURATIONS[(offsetIsImm, numQwordSegments)]
+        memAccessSize = numQwordSegments * 16;
         tplHeader = 'template <class RegElemType, class MemElemType>'
         tplArgs = '<RegElemType, MemElemType>'
         eaCode = SPAlignmentCheckCode + '''
-        int memAccessSize = 16;
-        EA = XBase + '''
+        int memAccessSize = %(memAccessSize)d;
+        EA = XBase + ''' % dict(memAccessSize=memAccessSize)
         if offsetIsImm:
-            eaCode += '(((int64_t) this->imm) * 16);'
+            eaCode += ('(((int64_t) this->imm) * %(memAccessSize)d);'
+                       % dict(memAccessSize=memAccessSize))
         else:
             eaCode += '(XOffset * sizeof(MemElemType));'
         loadRdEnableCode = '''
-        eCount = 16/sizeof(RegElemType);
-        auto rdEn = std::vector<bool>(16, true);
+        eCount = %(memAccessSize)d/sizeof(RegElemType);
+        auto rdEn = std::vector<bool>(%(memAccessSize)d, true);
         for (int i = 0; i < eCount; ++i) {
             if (!GpOp_x[i]) {
                 for (int j = 0; j < sizeof(RegElemType); ++j) {
@@ -1501,26 +1514,40 @@ let {{
                 }
             }
         }
-        '''
+        ''' % dict(memAccessSize=memAccessSize)
         memAccCode = '''
-        __uint128_t qword;
-        RegElemType* qp = reinterpret_cast<RegElemType*>(&qword);
-        for (int i = 0; i < 16/sizeof(RegElemType); ++i) {
+        // Copy active elements of the data from memory into a temporary
+        // quadword/octaword
+        __uint128_t qwords[%(numQwordSegments)d];
+        eCount = %(memAccessSize)d/sizeof(RegElemType);
+        RegElemType* qp = reinterpret_cast<RegElemType*>(&qwords);
+        for (int i = 0; i < eCount; ++i) {
             if (GpOp_x[i]) {
                 qp[i] = memDataView[i];
             } else {
                 qp[i] = 0;
             }
         }
-        eCount = ArmStaticInst::getCurSveVecLen<__uint128_t>(
+        // Repeat the temporary quadword/octaword segment into the
+        // vector register. Zero fill the remainder for non-full
+        // octawords.
+        unsigned numQwords = ArmStaticInst::getCurSveVecLen<__uint128_t>(
                 xc->tcBase());
-        for (int i = 0; i < eCount; ++i) {
-            AA64FpDest_uq[i] = qword;
+        unsigned numFullQwords = numQwords -
+                                 (numQwords %% %(numQwordSegments)d);
+        for (int i = 0; i < numQwords; ++i) {
+            if (i < numFullQwords) {
+                AA64FpDest_uq[i] = qwords[i %% %(numQwordSegments)d];
+            } else {
+                AA64FpDest_uq[i] = 0;
+            }
         }
-        '''
-        iop = ArmInstObjParams('ld1rq',
-                'SveLd1RqSI' if offsetIsImm else 'SveLd1RqSS',
-                'SveContigMemSI' if offsetIsImm else 'SveContigMemSS',
+        ''' % dict(memAccessSize=memAccessSize,
+                   numQwordSegments=numQwordSegments)
+        iop = ArmInstObjParams(
+                inst_config.mnemonic,
+                inst_config.classname,
+                inst_config.baseclass,
                 {'tpl_header': tplHeader,
                  'tpl_args': tplArgs,
                  'rden_code': loadRdEnableCode,
@@ -1539,8 +1566,7 @@ let {{
                 SveContigLoadCompleteAcc.subst(iop))
         for ttype in ('uint8_t', 'uint16_t', 'uint32_t', 'uint64_t'):
             substDict = {'tpl_args': '<%s, %s>' % (ttype, ttype),
-                    'class_name': 'SveLd1RqSI' if offsetIsImm
-                                  else 'SveLd1RqSS'}
+                         'class_name': inst_config.classname}
             exec_output += SveContigMemExecDeclare.subst(substDict)
 
     # LD1[S]{B,H,W,D} (scalar plus immediate)
@@ -1556,9 +1582,14 @@ let {{
     emitSveLoadAndRepl()
 
     # LD1RQ{B,H,W,D} (scalar plus immediate)
-    emitSveLoadAndReplQuad(offsetIsImm = True)
+    emitSveLoadAndReplMulti(offsetIsImm=True, numQwordSegments=1)
     # LD1RQ{B,H,W,D} (scalar plus scalar)
-    emitSveLoadAndReplQuad(offsetIsImm = False)
+    emitSveLoadAndReplMulti(offsetIsImm=False, numQwordSegments=1)
+
+    # LD1RO{B,H,W,D} (scalar plus immediate)
+    emitSveLoadAndReplMulti(offsetIsImm=True, numQwordSegments=2)
+    # LD1RO{B,H,W,D} (scalar plus scalar)
+    emitSveLoadAndReplMulti(offsetIsImm=False, numQwordSegments=2)
 
     # LD{2,3,4}{B,H,W,D} (scalar plus immediate)
     # ST{2,3,4}{B,H,W,D} (scalar plus immediate)

From 8bf89d696737c9d270c5e2d7adde429fa39dd58f Mon Sep 17 00:00:00 2001
From: Richard Cooper <richard.cooper@arm.com>
Date: Fri, 18 Sep 2020 16:40:45 +0100
Subject: [PATCH 452/492] arch-arm: Added 128-bit encodings of SVE TRN, UZP,
 and ZIP insts.

Add support for the 128-bit element encodings of the TRN1, TRN2, UZP1,
UZP2, ZIP1, and ZIP2 instructions, required by the Armv8.2 SVE
Double-precision floating-point Matrix Multiplication
instructions (ARMv8.2-F64MM).

For more information please refer to the "ARM Architecture Reference
Manual Supplement - The Scalable Vector Extension (SVE), for ARMv8-A"
(https://developer.arm.com/architectures/cpu-architecture/a-profile/
docs/arm-architecture-reference-manual-supplement-armv8-a)

Change-Id: I496576340c48410fedb2cf6fc7d1a02e219b3bd4
Reviewed-by: Richard Cooper <richard.cooper@arm.com>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/70728
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Reviewed-by: Andreas Sandberg <andreas.sandberg@arm.com>
Reviewed-by: Giacomo Travaglini <giacomo.travaglini@arm.com>
Maintainer: Andreas Sandberg <andreas.sandberg@arm.com>
---
 src/arch/arm/isa/formats/sve_2nd_level.isa |  18 ++--
 src/arch/arm/isa/formats/sve_top_level.isa |  13 ++-
 src/arch/arm/isa/insts/sve.isa             | 112 ++++++++++++++++-----
 3 files changed, 108 insertions(+), 35 deletions(-)

diff --git a/src/arch/arm/isa/formats/sve_2nd_level.isa b/src/arch/arm/isa/formats/sve_2nd_level.isa
index f74181a062..3d211bc19b 100644
--- a/src/arch/arm/isa/formats/sve_2nd_level.isa
+++ b/src/arch/arm/isa/formats/sve_2nd_level.isa
@@ -1145,29 +1145,31 @@ namespace Aarch64
     }  // decodeSvePermPredicates
 
     StaticInstPtr
-    decodeSvePermIntlv(ExtMachInst machInst)
+    decodeSvePermIntlv(ExtMachInst machInst, bool f64mm)
     {
         RegIndex zd = (RegIndex) (uint8_t) bits(machInst, 4, 0);
         RegIndex zn = (RegIndex) (uint8_t) bits(machInst, 9, 5);
         RegIndex zm = (RegIndex) (uint8_t) bits(machInst, 20, 16);
 
-        uint8_t size = bits(machInst, 23, 22);
+        uint8_t size = f64mm ? 4 : (uint8_t)bits(machInst, 23, 22);
 
         uint8_t opc = bits(machInst, 12, 10);
 
         switch (opc) {
           case 0x0:
-            return decodeSveBinUnpredU<SveZip1>(size, machInst, zd, zn, zm);
+            return decodeSveBinUnpredUQ<SveZip1>(size, machInst, zd, zn, zm);
           case 0x1:
-            return decodeSveBinUnpredU<SveZip2>(size, machInst, zd, zn, zm);
+            return decodeSveBinUnpredUQ<SveZip2>(size, machInst, zd, zn, zm);
           case 0x2:
-            return decodeSveBinUnpredU<SveUzp1>(size, machInst, zd, zn, zm);
+            return decodeSveBinUnpredUQ<SveUzp1>(size, machInst, zd, zn, zm);
           case 0x3:
-            return decodeSveBinUnpredU<SveUzp2>(size, machInst, zd, zn, zm);
+            return decodeSveBinUnpredUQ<SveUzp2>(size, machInst, zd, zn, zm);
           case 0x4:
-            return decodeSveBinUnpredU<SveTrn1>(size, machInst, zd, zn, zm);
+          case 0x6:
+            return decodeSveBinUnpredUQ<SveTrn1>(size, machInst, zd, zn, zm);
           case 0x5:
-            return decodeSveBinUnpredU<SveTrn2>(size, machInst, zd, zn, zm);
+          case 0x7:
+            return decodeSveBinUnpredUQ<SveTrn2>(size, machInst, zd, zn, zm);
         }
         return new Unknown64(machInst);
     }  // decodeSvePermIntlv
diff --git a/src/arch/arm/isa/formats/sve_top_level.isa b/src/arch/arm/isa/formats/sve_top_level.isa
index b0579fba04..61f2f5ca6c 100644
--- a/src/arch/arm/isa/formats/sve_top_level.isa
+++ b/src/arch/arm/isa/formats/sve_top_level.isa
@@ -57,7 +57,7 @@ namespace Aarch64
     StaticInstPtr decodeSvePermExtract(ExtMachInst machInst);
     StaticInstPtr decodeSvePermUnpred(ExtMachInst machInst);
     StaticInstPtr decodeSvePermPredicates(ExtMachInst machInst);
-    StaticInstPtr decodeSvePermIntlv(ExtMachInst machInst);
+    StaticInstPtr decodeSvePermIntlv(ExtMachInst machInst, bool f64mm);
     StaticInstPtr decodeSvePermPred(ExtMachInst machInst);
     StaticInstPtr decodeSveSelVec(ExtMachInst machInst);
     StaticInstPtr decodeSveIntCmpVec(ExtMachInst machInst);
@@ -202,11 +202,18 @@ namespace Aarch64
                     if (b_13) {
                         return decodeSvePermUnpred(machInst);
                     } else {
-                        return decodeSvePermExtract(machInst);
+                        uint8_t b_23 = bits(machInst, 23);
+                        if (b_23) {
+                            // 128-bit element encodings for Armv8.6 F64MM
+                            return decodeSvePermIntlv(machInst, true);
+                        } else {
+                            return decodeSvePermExtract(machInst);
+                        }
                     }
                   case 0x1:
                     if (b_13) {
-                        return decodeSvePermIntlv(machInst);
+                        // 8,16,32,64-bit element encodings
+                        return decodeSvePermIntlv(machInst, false);
                     } else {
                         return decodeSvePermPredicates(machInst);
                     }
diff --git a/src/arch/arm/isa/insts/sve.isa b/src/arch/arm/isa/insts/sve.isa
index 74eacb8db7..cbaa2b528a 100644
--- a/src/arch/arm/isa/insts/sve.isa
+++ b/src/arch/arm/isa/insts/sve.isa
@@ -632,6 +632,29 @@ output header {{
         }
     }
 
+    // Decodes binary, constructive, unpredicated SVE instructions.
+    // Unsigned instructions only, including Quadword variants.
+    template <template <typename T> class Base>
+    StaticInstPtr
+    decodeSveBinUnpredUQ(unsigned size, ExtMachInst machInst, RegIndex dest,
+            RegIndex op1, RegIndex op2)
+    {
+        switch (size) {
+          case 0:
+            return new Base<uint8_t>(machInst, dest, op1, op2);
+          case 1:
+            return new Base<uint16_t>(machInst, dest, op1, op2);
+          case 2:
+            return new Base<uint32_t>(machInst, dest, op1, op2);
+          case 3:
+            return new Base<uint64_t>(machInst, dest, op1, op2);
+          case 4:
+            return new Base<__uint128_t>(machInst, dest, op1, op2);
+          default:
+            return new Unknown64(machInst);
+        }
+    }
+
     // Decodes binary, constructive, unpredicated SVE instructions.
     // Signed instructions only.
     template <template <typename T> class Base>
@@ -3299,6 +3322,8 @@ let {{
     fpTypes = ('uint16_t', 'uint32_t', 'uint64_t')
     signedTypes = ('int8_t', 'int16_t', 'int32_t', 'int64_t')
     unsignedTypes = ('uint8_t', 'uint16_t', 'uint32_t', 'uint64_t')
+    extendedUnsignedTypes = ('uint8_t', 'uint16_t', 'uint32_t', 'uint64_t',
+                             '__uint128_t')
 
     smallSignedTypes = ('int8_t', 'int16_t', 'int32_t')
     bigSignedTypes = ('int16_t', 'int32_t', 'int64_t')
@@ -4754,23 +4779,36 @@ let {{
                        trnPredIterCode % 1)
     # TRN1, TRN2 (vectors)
     trnIterCode = '''
+        // SVE F64MM support requires that there are at least two elements
+        // in the vector.
+        if (eCount < 2) {
+            return std::make_shared<UndefinedInstruction>(machInst, false,
+                                                          "%(mnemonic)s");
+        }
         int s;
-        int part = %d;
+        int part = %(part)d;
         ArmISA::VecRegContainer tmpVecC;
         auto auxDest = tmpVecC.as<Element>();
-        for (unsigned i = 0; i < eCount / 2; i++) {
+        const unsigned eltPairsCount = eCount / 2;
+        const unsigned eltsInPairsCount = eltPairsCount * 2;
+        for (unsigned i = 0; i < eltPairsCount; i++) {
             s = 2 * i + part;
             auxDest[2 * i] = AA64FpOp1_x[s];
             auxDest[2 * i + 1] = AA64FpOp2_x[s];
         }
-        for (unsigned i = 0; i < eCount; i++) {
+        // Fill output vector with pairs of elements
+        for (unsigned i = 0; i < eltsInPairsCount; i++) {
             AA64FpDest_x[i] = auxDest[i];
         }
+        // Fill any trailing non-full pairs with zeros
+        for (unsigned i = eltsInPairsCount; i < eCount; i++) {
+            AA64FpDest_x[i] = 0;
+        }
     '''
-    sveBinInst('trn1', 'Trn1', 'SimdAluOp', unsignedTypes, '',
-               customIterCode=trnIterCode % 0)
-    sveBinInst('trn2', 'Trn2', 'SimdAluOp', unsignedTypes, '',
-               customIterCode=trnIterCode % 1)
+    sveBinInst('trn1', 'Trn1', 'SimdAluOp', extendedUnsignedTypes, '',
+               customIterCode=trnIterCode % dict(mnemonic='trn1', part=0))
+    sveBinInst('trn2', 'Trn2', 'SimdAluOp', extendedUnsignedTypes, '',
+               customIterCode=trnIterCode % dict(mnemonic='trn2', part=1))
     # UABD
     sveBinInst('uabd', 'Uabd', 'SimdAddOp', unsignedTypes, abdCode,
                PredType.MERGE, True)
@@ -4976,26 +5014,39 @@ let {{
                        uzpPredIterCode % 1)
     # UZP1, UZP2 (vectors)
     uzpIterCode = '''
+        // SVE F64MM support requires that there are at least two elements
+        // in the vector.
+        if (eCount < 2) {
+            return std::make_shared<UndefinedInstruction>(machInst, false,
+                                                          "%(mnemonic)s");
+        }
         int s;
-        int part = %d;
+        int part = %(part)d;
         ArmISA::VecRegContainer tmpVecC;
         auto auxDest = tmpVecC.as<Element>();
-        for (unsigned i = 0; i < eCount; i++) {
+        const unsigned eltPairsCount = eCount / 2;
+        const unsigned eltsInPairsCount = eltPairsCount * 2;
+        for (unsigned i = 0; i < eltsInPairsCount; i++) {
             s = 2 * i + part;
-            if (s < eCount) {
+            if (s < eltsInPairsCount) {
                 auxDest[i] = AA64FpOp1_x[s];
             } else {
-                auxDest[i] = AA64FpOp2_x[s - eCount];
+                auxDest[i] = AA64FpOp2_x[s - eltsInPairsCount];
             }
         }
-        for (unsigned i = 0; i < eCount; i++) {
+        // Fill output vector with pairs of elements
+        for (unsigned i = 0; i < eltsInPairsCount; i++) {
             AA64FpDest_x[i] = auxDest[i];
         }
+        // Fill any trailing non-full pairs with zeros
+        for (unsigned i = eltsInPairsCount; i < eCount; i++) {
+            AA64FpDest_x[i] = 0;
+        }
     '''
-    sveBinInst('uzp1', 'Uzp1', 'SimdAluOp', unsignedTypes, '',
-               customIterCode=uzpIterCode % 0)
-    sveBinInst('uzp2', 'Uzp2', 'SimdAluOp', unsignedTypes, '',
-               customIterCode=uzpIterCode % 1)
+    sveBinInst('uzp1', 'Uzp1', 'SimdAluOp', extendedUnsignedTypes, '',
+               customIterCode=uzpIterCode % dict(mnemonic='uzp1', part=0))
+    sveBinInst('uzp2', 'Uzp2', 'SimdAluOp', extendedUnsignedTypes, '',
+               customIterCode=uzpIterCode % dict(mnemonic='uzp2', part=1))
     # WHILELE (32-bit)
     whileLECode = '''
             cond = srcElem1 <= srcElem2;
@@ -5058,22 +5109,35 @@ let {{
                        zipPredIterCode % 1)
     # ZIP1, ZIP2 (vectors)
     zipIterCode = '''
+        // SVE F64MM support requires that there are at least two elements
+        // in the vector.
+        if (eCount < 2) {
+            return std::make_shared<UndefinedInstruction>(machInst, false,
+                                                          "%(mnemonic)s");
+        }
         int s;
-        int part = %d;
+        int part = %(part)d;
         ArmISA::VecRegContainer tmpVecC;
         auto auxDest = tmpVecC.as<Element>();
-        for (unsigned i = 0; i < eCount / 2; i++) {
-            s = i + (part * (eCount / 2));
+        const unsigned eltPairsCount = eCount / 2;
+        const unsigned eltsInPairsCount = eltPairsCount * 2;
+        for (unsigned i = 0; i < eltPairsCount; i++) {
+            s = i + (part * (eltsInPairsCount / 2));
             auxDest[2 * i] = AA64FpOp1_x[s];
             auxDest[2 * i + 1] = AA64FpOp2_x[s];
         }
-        for (unsigned i = 0; i < eCount; i++) {
+        // Fill output vector with pairs of elements
+        for (unsigned i = 0; i < eltsInPairsCount; i++) {
             AA64FpDest_x[i] = auxDest[i];
         }
+        // Fill any trailing non-full pairs with zeros
+        for (unsigned i = eltsInPairsCount; i < eCount; i++) {
+            AA64FpDest_x[i] = 0;
+        }
     '''
-    sveBinInst('zip1', 'Zip1', 'SimdAluOp', unsignedTypes, '',
-               customIterCode=zipIterCode % 0)
-    sveBinInst('zip2', 'Zip2', 'SimdAluOp', unsignedTypes, '',
-               customIterCode=zipIterCode % 1)
+    sveBinInst('zip1', 'Zip1', 'SimdAluOp', extendedUnsignedTypes, '',
+               customIterCode=zipIterCode % dict(mnemonic='zip1', part=0))
+    sveBinInst('zip2', 'Zip2', 'SimdAluOp', extendedUnsignedTypes, '',
+               customIterCode=zipIterCode % dict(mnemonic='zip2', part=1))
 
 }};

From 0f857873f95eac5e99f84c89df7bd1c2f4461a96 Mon Sep 17 00:00:00 2001
From: Richard Cooper <richard.cooper@arm.com>
Date: Thu, 8 Oct 2020 19:35:47 +0100
Subject: [PATCH 453/492] arch-arm: Declare support for Armv8.2-F64MM.

Sets the appropriate bit in the ID_AA64ZFR0_EL1 sysreg that declares
support for ARMv8.2-F64MM.

This indicates that all pre-requisites for Armv8.2 SVE FP64
double-precision floating-point matrix multiplication instructions
have been met.

FMMLA, and LD1RO* instructions have been implemented, as well as the
128-bit element variants of TRN1, TRN2, UZP1, UZP2, ZIP1, and ZIP2.

For more information please refer to the "ARM Architecture Reference
Manual Supplement - The Scalable Vector Extension (SVE), for ARMv8-A"
(https://developer.arm.com/architectures/cpu-architecture/a-profile/
docs/arm-architecture-reference-manual-supplement-armv8-a)

Additional Contributors: Giacomo Travaglini

Change-Id: Idac3a3ca590e6eb2beb217a40a8c10af1e917440
Reviewed-by: Richard Cooper <richard.cooper@arm.com>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/70729
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Giacomo Travaglini <giacomo.travaglini@arm.com>
Reviewed-by: Andreas Sandberg <andreas.sandberg@arm.com>
Maintainer: Andreas Sandberg <andreas.sandberg@arm.com>
Maintainer: Giacomo Travaglini <giacomo.travaglini@arm.com>
---
 src/arch/arm/ArmISA.py    | 1 +
 src/arch/arm/ArmSystem.py | 3 +++
 src/arch/arm/process.cc   | 1 +
 src/arch/arm/regs/misc.cc | 1 +
 4 files changed, 6 insertions(+)

diff --git a/src/arch/arm/ArmISA.py b/src/arch/arm/ArmISA.py
index 31ecbcbd15..fbd93b6bf6 100644
--- a/src/arch/arm/ArmISA.py
+++ b/src/arch/arm/ArmISA.py
@@ -54,6 +54,7 @@ class ArmDefaultSERelease(ArmRelease):
         "FEAT_RDM",
         # Armv8.2
         "FEAT_F32MM",
+        "FEAT_F64MM",
         "FEAT_SVE",
         # Armv8.3
         "FEAT_FCMA",
diff --git a/src/arch/arm/ArmSystem.py b/src/arch/arm/ArmSystem.py
index 5a7ae799b7..49dab3e0e2 100644
--- a/src/arch/arm/ArmSystem.py
+++ b/src/arch/arm/ArmSystem.py
@@ -79,6 +79,7 @@ class ArmExtension(ScopedEnum):
         "FEAT_LVA",  # Optional in Armv8.2
         "FEAT_LPA",  # Optional in Armv8.2
         "FEAT_F32MM",  # Optional in Armv8.2
+        "FEAT_F64MM",  # Optional in Armv8.2
         # Armv8.3
         "FEAT_FCMA",
         "FEAT_JSCVT",
@@ -165,6 +166,7 @@ class ArmDefaultRelease(Armv8):
         "FEAT_LPA",
         "FEAT_SVE",
         "FEAT_F32MM",
+        "FEAT_F64MM",
         # Armv8.3
         "FEAT_FCMA",
         "FEAT_JSCVT",
@@ -199,6 +201,7 @@ class Armv82(Armv81):
         "FEAT_LPA",
         "FEAT_SVE",
         "FEAT_F32MM",
+        "FEAT_F64MM",
     ]
 
 
diff --git a/src/arch/arm/process.cc b/src/arch/arm/process.cc
index 24e1250da9..be8dfff330 100644
--- a/src/arch/arm/process.cc
+++ b/src/arch/arm/process.cc
@@ -322,6 +322,7 @@ ArmProcess64::armHwcapImpl2() const
 
     const AA64ZFR0 zf_r0 = tc->readMiscReg(MISCREG_ID_AA64ZFR0_EL1);
     hwcap |= (zf_r0.f32mm >= 1) ? Arm_Svef32mm : Arm_None;
+    hwcap |= (zf_r0.f64mm >= 1) ? Arm_Svef64mm : Arm_None;
 
     return hwcap;
 }
diff --git a/src/arch/arm/regs/misc.cc b/src/arch/arm/regs/misc.cc
index 8925bc00d6..7e53e0dc24 100644
--- a/src/arch/arm/regs/misc.cc
+++ b/src/arch/arm/regs/misc.cc
@@ -5406,6 +5406,7 @@ ISA::initializeMiscRegMetadata()
         .reset([this](){
             AA64ZFR0 zfr0_el1 = 0;
             zfr0_el1.f32mm = release->has(ArmExtension::FEAT_F32MM) ? 1 : 0;
+            zfr0_el1.f64mm = release->has(ArmExtension::FEAT_F64MM) ? 1 : 0;
             return zfr0_el1;
         }())
         .faultRead(EL0, faultIdst)

From 98e67c8610fd016bc4150a6ab12f1497a2524a50 Mon Sep 17 00:00:00 2001
From: Richard Cooper <richard.cooper@arm.com>
Date: Mon, 28 Sep 2020 17:50:52 +0100
Subject: [PATCH 454/492] arch-arm: Add support for Arm SVE Integer Matrix
 instructions.

Add support for the Arm SVE Integer Matrix Multiply-Accumulate
(SMMLA, USMMLA, UMMLA) instructions. Because the associated SUDOT and
USDOT instructions have not yet been implemented, the SVE Feature ID
register 0 (ID_AA64ZFR0_EL1) has not yet been updated to indicate
support for SVE Int8 matrix multiplication instructions at this time.

For more information please refer to the "ARM Architecture Reference
Manual Supplement - The Scalable Vector Extension (SVE), for ARMv8-A"
(https://developer.arm.com/architectures/cpu-architecture/a-profile/
docs/arm-architecture-reference-manual-supplement-armv8-a)

Additional Contributors: Giacomo Travaglini

Change-Id: Ia50e28fae03634cbe04b42a9900bab65a604817f
Reviewed-by: Richard Cooper <richard.cooper@arm.com>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/70730
Maintainer: Andreas Sandberg <andreas.sandberg@arm.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Andreas Sandberg <andreas.sandberg@arm.com>
---
 src/arch/arm/isa/formats/sve_2nd_level.isa | 43 ++++++++++++++++++++++
 src/arch/arm/isa/formats/sve_top_level.isa | 11 ++++++
 src/arch/arm/isa/insts/sve.isa             | 16 ++++++++
 3 files changed, 70 insertions(+)

diff --git a/src/arch/arm/isa/formats/sve_2nd_level.isa b/src/arch/arm/isa/formats/sve_2nd_level.isa
index 3d211bc19b..4a44bab9b2 100644
--- a/src/arch/arm/isa/formats/sve_2nd_level.isa
+++ b/src/arch/arm/isa/formats/sve_2nd_level.isa
@@ -245,6 +245,33 @@ namespace Aarch64
         return new Unknown64(machInst);
     }  // decodeSveIntMulAdd
 
+    StaticInstPtr
+    decodeSveIntMatMulAdd(ExtMachInst machInst)
+    {
+        RegIndex zda = (RegIndex) (uint8_t) bits(machInst, 4, 0);
+        RegIndex zn = (RegIndex) (uint8_t) bits(machInst, 9, 5);
+        RegIndex zm = (RegIndex) (uint8_t) bits(machInst, 20, 16);
+
+        uint8_t uns = bits(machInst, 23, 22);
+
+        switch (uns) {
+          case 0x0:
+            return new SveSmmla<int32_t, int8_t, int8_t>(
+                         machInst, zda, zn, zm);
+          case 0x2:
+            return new SveUsmmla<int32_t, uint8_t, int8_t>(
+                         machInst, zda, zn, zm);
+          case 0x3:
+            return new SveUmmla<uint32_t, uint8_t, uint8_t>(
+                         machInst, zda, zn, zm);
+          case 0x1:
+          default:
+            return new Unknown64(machInst);
+        }
+
+        return new Unknown64(machInst);
+    }  // decodeSveIntMatMulAdd
+
     StaticInstPtr
     decodeSveShiftByImmPred0(ExtMachInst machInst)
     {
@@ -3809,5 +3836,21 @@ namespace Aarch64
         return new Unknown64(machInst);
     }  // decodeSveMemStore
 
+    StaticInstPtr
+    decodeSveMisc(ExtMachInst machInst) {
+        switch(bits(machInst, 13, 10)) {
+          case 0b0110: {
+              return decodeSveIntMatMulAdd(machInst);
+              break;
+          }
+          default: {
+              return new Unknown64(machInst);
+              break;
+          }
+        }
+        return new Unknown64(machInst);
+    }  // decodeSveMisc
+
+
 }  // namespace Aarch64
 }};
diff --git a/src/arch/arm/isa/formats/sve_top_level.isa b/src/arch/arm/isa/formats/sve_top_level.isa
index 61f2f5ca6c..20a15a2971 100644
--- a/src/arch/arm/isa/formats/sve_top_level.isa
+++ b/src/arch/arm/isa/formats/sve_top_level.isa
@@ -44,6 +44,7 @@ namespace Aarch64
     StaticInstPtr decodeSveShiftByImmPred(ExtMachInst machInst);
     StaticInstPtr decodeSveIntArithUnaryPred(ExtMachInst machInst);
     StaticInstPtr decodeSveIntMulAdd(ExtMachInst machInst);
+    StaticInstPtr decodeSveIntMatMulAdd(ExtMachInst machInst);
     StaticInstPtr decodeSveIntArithUnpred(ExtMachInst machInst);
     StaticInstPtr decodeSveIntLogUnpred(ExtMachInst machInst);
     StaticInstPtr decodeSveIndexGen(ExtMachInst machInst);
@@ -94,6 +95,8 @@ namespace Aarch64
     StaticInstPtr decodeSveMemContigLoad(ExtMachInst machInst);
     StaticInstPtr decodeSveMemGather64(ExtMachInst machInst);
     StaticInstPtr decodeSveMemStore(ExtMachInst machInst);
+
+    StaticInstPtr decodeSveMisc(ExtMachInst machInst);
 }
 }};
 
@@ -104,6 +107,14 @@ namespace Aarch64
     StaticInstPtr
     decodeSveInt(ExtMachInst machInst)
     {
+        if (bits(machInst, 31, 29) == 0b010) {
+            if (bits(machInst, 24) == 0b1 &&
+                bits(machInst, 21) == 0b0 &&
+                bits(machInst, 15, 14)==0b10) {
+                return decodeSveMisc(machInst);
+            }
+        }
+
         uint8_t b_29_24_21 = (bits(machInst, 29) << 2) |
                              (bits(machInst, 24) << 1) |
                              bits(machInst, 21);
diff --git a/src/arch/arm/isa/insts/sve.isa b/src/arch/arm/isa/insts/sve.isa
index cbaa2b528a..6e8313bda5 100644
--- a/src/arch/arm/isa/insts/sve.isa
+++ b/src/arch/arm/isa/insts/sve.isa
@@ -4250,6 +4250,22 @@ let {{
     sbclbCode = 'res = srcElem1 + ~(srcElem2) + carryIn;'
     sveTerInstUnpred('sbclb', 'Sbclb', 'VectorIntegerArithOp', unsignedTypes,
                       sbclbCode, isTop=False, isAdd=False)
+    mmlaCode = ('destElem += srcElemA * srcElemB')
+    # SMMLA (vectors)
+    sveMatMulInst('smmla', 'Smmla', 'SimdMultAccOp',
+                  (('int32_t', 'int8_t', 'int8_t'),),
+                  numDestRows=2, numDestCols=2, K=8,
+                  elt_mul_op=mmlaCode)
+    # USMMLA (vectors)
+    sveMatMulInst('usmmla', 'Usmmla', 'SimdMultAccOp',
+                  (('int32_t', 'uint8_t', 'int8_t'),),
+                  numDestRows=2, numDestCols=2, K=8,
+                  elt_mul_op=mmlaCode)
+    # UMMLA (vectors)
+    sveMatMulInst('ummla', 'Ummla', 'SimdMultAccOp',
+                  (('uint32_t', 'uint8_t', 'uint8_t'),),
+                  numDestRows=2, numDestCols=2, K=8,
+                  elt_mul_op=mmlaCode)
     # MOVPRFX (predicated)
     movCode = 'destElem = srcElem1;'
     sveUnaryInst('movprfx', 'MovprfxPredM', 'SimdMiscOp', unsignedTypes,

From 9421a46d71016378e7991c26cf5c73494d62668e Mon Sep 17 00:00:00 2001
From: Richard Cooper <richard.cooper@arm.com>
Date: Tue, 29 Sep 2020 16:15:23 +0100
Subject: [PATCH 455/492] arch-arm: Re-factor Arm decoder for SVE mixed-sign
 DOT insts.

Re-factored the Arm instruction decoder to add placeholders for the
SVE Integer mixed-sign DOT product instructions. This has involved
moving some existing decode helper functions.

Change-Id: I42b280d4bd1b4ab9d8c633bdc523bd08c281d218
Reviewed-by: Richard Cooper <richard.cooper@arm.com>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/70731
Reviewed-by: Andreas Sandberg <andreas.sandberg@arm.com>
Maintainer: Andreas Sandberg <andreas.sandberg@arm.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/arch/arm/isa/formats/sve_2nd_level.isa | 97 +++++++++++++++++++++-
 src/arch/arm/isa/formats/sve_top_level.isa | 65 +++++++++------
 2 files changed, 133 insertions(+), 29 deletions(-)

diff --git a/src/arch/arm/isa/formats/sve_2nd_level.isa b/src/arch/arm/isa/formats/sve_2nd_level.isa
index 4a44bab9b2..0d12a226e5 100644
--- a/src/arch/arm/isa/formats/sve_2nd_level.isa
+++ b/src/arch/arm/isa/formats/sve_2nd_level.isa
@@ -2241,7 +2241,7 @@ namespace Aarch64
     }
 
     StaticInstPtr
-    decodeSveMultiplyAddUnpred(ExtMachInst machInst)
+    decodeSveIntegerDotProductUnpred(ExtMachInst machInst)
     {
         RegIndex zda = (RegIndex) (uint8_t) bits(machInst, 4, 0);
         RegIndex zn = (RegIndex) (uint8_t) bits(machInst, 9, 5);
@@ -2273,10 +2273,10 @@ namespace Aarch64
         }
 
         return new Unknown64(machInst);
-    } // decodeSveMultiplyAddUnpred
+    } // decodeSveIntegerDotProductUnpred
 
     StaticInstPtr
-    decodeSveMultiplyIndexed(ExtMachInst machInst)
+    decodeSveIntegerDotProductIndexed(ExtMachInst machInst)
     {
         RegIndex zda = (RegIndex) (uint8_t) bits(machInst, 4, 0);
         RegIndex zn = (RegIndex) (uint8_t) bits(machInst, 9, 5);
@@ -2310,7 +2310,59 @@ namespace Aarch64
             }
         }
         return new Unknown64(machInst);
-    } // decodeSveMultiplyIndexed
+    } // decodeSveIntegerDotProductIndexed
+
+    StaticInstPtr
+    decodeSveMixedSignDotProduct(ExtMachInst machInst)
+    {
+        uint8_t size = (uint8_t) bits(machInst, 23, 22);
+        if (size != 0b10) {
+            return new Unknown64(machInst);
+        }
+
+        RegIndex zda M5_VAR_USED = (RegIndex)
+                                          (uint8_t) bits(machInst, 4, 0);
+        RegIndex zn M5_VAR_USED = (RegIndex)
+                                          (uint8_t) bits(machInst, 9, 5);
+        RegIndex zm M5_VAR_USED = (RegIndex)
+                                          (uint8_t) bits(machInst, 20, 16);
+
+        // Placeholder for SveUsdotv
+        //return SveUsdotv<int32_t, uint8_t, int8_t>(machInst, zda, zn, zm);
+        return new Unknown64(machInst);
+    } // decodeSveMixedSignDotProduct
+
+    StaticInstPtr
+    decodeSveMixedSignDotProductIndexed(ExtMachInst machInst)
+    {
+        uint8_t size = (uint8_t) bits(machInst, 23, 22);
+        if (size != 0b10) {
+            return new Unknown64(machInst);
+        }
+
+        RegIndex zda M5_VAR_USED = (RegIndex)
+                                          (uint8_t) bits(machInst, 4, 0);
+        RegIndex zn M5_VAR_USED = (RegIndex)
+                                          (uint8_t) bits(machInst, 9, 5);
+        RegIndex zm M5_VAR_USED = (RegIndex)
+                                          (uint8_t) bits(machInst, 18, 16);
+        uint8_t i2 M5_VAR_USED = (uint8_t) bits(machInst, 20, 19);
+
+        uint8_t usig = (uint8_t) bits(machInst, 10);
+
+        if (usig) {
+            // Placeholder for SveSudoti
+            //return SveSudoti<int32_t, int8_t, uint8_t>
+            //                         (machInst, zda, zn, zm, i2);
+            return new Unknown64(machInst);
+        } else {
+            // Placeholder for SveUsdoti
+            //return SveUsdoti<int32_t, uint8_t, int8_t>
+            //                         (machInst, zda, zn, zm, i2);
+            return new Unknown64(machInst);
+        }
+
+    } // decodeSveMixedSignDotProductIndexed
 
     StaticInstPtr
     decodeSve2ArithCarry(ExtMachInst machInst)
@@ -3851,6 +3903,43 @@ namespace Aarch64
         return new Unknown64(machInst);
     }  // decodeSveMisc
 
+    StaticInstPtr
+    decodeSveIntegerMulAddUnpred(ExtMachInst machInst)
+    {
+        uint8_t op1 = (uint8_t) bits(machInst, 13, 11);
+        if (bits(machInst, 14) == 0b0) {
+            if (op1 == 0b000) {
+                return decodeSveIntegerDotProductUnpred(machInst);
+            } else {
+                return new Unknown64(machInst);
+            }
+        } else {
+            if (op1 == 0b111 &&
+                bits(machInst, 10) == 0b0) {
+                return decodeSveMixedSignDotProduct(machInst);
+            } else {
+                return new Unknown64(machInst);
+            }
+        }
+    }
+
+    StaticInstPtr
+    decodeSveMultiplyIndexed(ExtMachInst machInst)
+    {
+        if (bits(machInst, 15, 13) == 0b000) {
+            switch (bits(machInst, 12, 11)) {
+              case 0b00:
+                return decodeSveIntegerDotProductIndexed(machInst);
+              case 0b11:
+                return decodeSveMixedSignDotProductIndexed(machInst);
+              default:
+                return new Unknown64(machInst);
+            }
+        } else {
+            return new Unknown64(machInst);
+        }
+        return new Unknown64(machInst);
+    }
 
 }  // namespace Aarch64
 }};
diff --git a/src/arch/arm/isa/formats/sve_top_level.isa b/src/arch/arm/isa/formats/sve_top_level.isa
index 20a15a2971..cb390eb972 100644
--- a/src/arch/arm/isa/formats/sve_top_level.isa
+++ b/src/arch/arm/isa/formats/sve_top_level.isa
@@ -72,8 +72,10 @@ namespace Aarch64
     StaticInstPtr decodeSveClamp(ExtMachInst machInst);
     StaticInstPtr decodeSve2Accum(ExtMachInst machInst);
 
-    StaticInstPtr decodeSveMultiplyAddUnpred(ExtMachInst machInst);
-    StaticInstPtr decodeSveMultiplyIndexed(ExtMachInst machInst);
+    StaticInstPtr decodeSveIntegerDotProductUnpred(ExtMachInst machInst);
+    StaticInstPtr decodeSveIntegerDotProductIndexed(ExtMachInst machInst);
+    StaticInstPtr decodeSveMixedSignDotProduct(ExtMachInst machInst);
+    StaticInstPtr decodeSveMixedSignDotProductIndexed(ExtMachInst machInst);
 
     StaticInstPtr decodeSveFpFastReduc(ExtMachInst machInst);
     StaticInstPtr decodeSveFpUnaryUnpred(ExtMachInst machInst);
@@ -97,6 +99,8 @@ namespace Aarch64
     StaticInstPtr decodeSveMemStore(ExtMachInst machInst);
 
     StaticInstPtr decodeSveMisc(ExtMachInst machInst);
+    StaticInstPtr decodeSveIntegerMulAddUnpred(ExtMachInst machInst);
+    StaticInstPtr decodeSveMultiplyIndexed(ExtMachInst machInst);
 }
 }};
 
@@ -108,10 +112,33 @@ namespace Aarch64
     decodeSveInt(ExtMachInst machInst)
     {
         if (bits(machInst, 31, 29) == 0b010) {
-            if (bits(machInst, 24) == 0b1 &&
-                bits(machInst, 21) == 0b0 &&
-                bits(machInst, 15, 14)==0b10) {
-                return decodeSveMisc(machInst);
+            uint8_t op1 = bits(machInst, 24, 23);
+            uint8_t op2 = bits(machInst, 15, 14);
+            switch (op1) {
+              case 0b00:
+              case 0b01:
+                  if (bits(machInst, 21) == 0b0) {
+                      if (bits(machInst, 15) == 0b0) {
+                          return decodeSveIntegerMulAddUnpred(machInst);
+                      } else {
+                          return new Unknown64(machInst);
+                      }
+                  } else {
+                      return decodeSveMultiplyIndexed(machInst);
+                  }
+                  break;
+              case 0b10:
+              case 0b11:
+                  if (bits(machInst, 21) == 0b0 && op2 == 0b10) {
+                      return decodeSveMisc(machInst);
+                  } else if (bits(machInst, 21) == 0b0 && op2 == 0b11) {
+                      return decodeSve2Accum(machInst);
+                  } else {
+                      return new Unknown64(machInst);
+                  }
+                  break;
+              default:
+                  return new Unknown64(machInst);
             }
         }
 
@@ -131,11 +158,10 @@ namespace Aarch64
                                       bits(machInst, 13);
                     switch (b_15_13) {
                       case 0x0:
-                        if (bits(machInst, 30)) {
-                            return decodeSveMultiplyAddUnpred(machInst);
-                        } else {
+                        if (!bits(machInst, 30)) {
                             return decodeSveIntArithBinPred(machInst);
                         }
+                        break;
                       case 0x1:
                         return decodeSveIntReduc(machInst);
                       case 0x2:
@@ -156,12 +182,11 @@ namespace Aarch64
                     if (b_13) {
                         return decodeSveIntLogUnpred(machInst);
                     } else {
-                        if (bits(machInst, 30)) {
-                            return decodeSveMultiplyIndexed(machInst);
-                        } else {
+                        if (!bits(machInst, 30)) {
                             return decodeSveIntArithUnpred(machInst);
                         }
                     }
+                    break;
                   case 0x1:
                     if (b_13) {
                         return new Unknown64(machInst);
@@ -187,22 +212,12 @@ namespace Aarch64
             }
           case 0x2:
             {
-              if (bits(machInst, 30) == 0) {
-                  if (bits(machInst, 20)) {
-                      return decodeSveIntWideImmPred(machInst);
+                if (bits(machInst, 20)) {
+                    return decodeSveIntWideImmPred(machInst);
                 } else {
                     return decodeSveLogMaskImm(machInst);
                 }
-              } else {
-                uint8_t b_15_14 = bits(machInst, 15, 14);
-                switch (b_15_14) {
-                  case 3:
-                    return decodeSve2Accum(machInst);
-                  default :
-                    break;
-                }
-              }
-              break;
+                break;
             }
           case 0x3:
             {

From f8b60b7a1d2b8f7112801c1b97c2ca4959e111d0 Mon Sep 17 00:00:00 2001
From: Richard Cooper <richard.cooper@arm.com>
Date: Thu, 1 Oct 2020 18:31:49 +0100
Subject: [PATCH 456/492] arch-arm: Added Armv8.2-I8MM SVE mixed-sign dot
 product instrs.

Add support for the SVE mixed sign dot product instructions (USDOT,
SUDOT) required by the Armv8.2 SVE Int8 matrix multiplication
extension (ARMv8.2-I8MM).

For more information please refer to the "ARM Architecture Reference
Manual Supplement - The Scalable Vector Extension (SVE), for ARMv8-A"
(https://developer.arm.com/architectures/cpu-architecture/a-profile/
docs/arm-architecture-reference-manual-supplement-armv8-a)

Change-Id: I83841654cee74b940f967b3a37b99d87c01bd92c
Reviewed-by: Richard Cooper <richard.cooper@arm.com>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/70732
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Reviewed-by: Andreas Sandberg <andreas.sandberg@arm.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/arch/arm/isa/formats/sve_2nd_level.isa | 70 +++++++++-------------
 src/arch/arm/isa/insts/sve.isa             | 50 +++++++++++-----
 src/arch/arm/isa/templates/sve.isa         | 39 ++++++++++--
 3 files changed, 98 insertions(+), 61 deletions(-)

diff --git a/src/arch/arm/isa/formats/sve_2nd_level.isa b/src/arch/arm/isa/formats/sve_2nd_level.isa
index 0d12a226e5..86c174d7c4 100644
--- a/src/arch/arm/isa/formats/sve_2nd_level.isa
+++ b/src/arch/arm/isa/formats/sve_2nd_level.isa
@@ -2256,19 +2256,19 @@ namespace Aarch64
         uint8_t usig = (uint8_t) bits(machInst, 10);
         if (size & 0x1) {
             if (usig) {
-                return new SveUdotv<uint16_t, uint64_t>(machInst,
-                                                        zda, zn, zm);
+                return new SveUdotv<uint16_t, uint16_t, uint64_t>
+                                        (machInst, zda, zn, zm);
             } else {
-                return new SveSdotv<int16_t, int64_t>(machInst,
-                                                        zda, zn, zm);
+                return new SveSdotv<int16_t, int16_t, int64_t>
+                                        (machInst, zda, zn, zm);
             }
         } else {
             if (usig) {
-                return new SveUdotv<uint8_t, uint32_t>(machInst,
-                                                        zda, zn, zm);
+                return new SveUdotv<uint8_t, uint8_t, uint32_t>
+                                        (machInst, zda, zn, zm);
             } else {
-                return new SveSdotv<int8_t, int32_t>(machInst,
-                                                        zda, zn, zm);
+                return new SveSdotv<int8_t, int8_t, int32_t>
+                                        (machInst, zda, zn, zm);
             }
         }
 
@@ -2292,21 +2292,21 @@ namespace Aarch64
             RegIndex zm = (RegIndex) (uint8_t) bits(machInst, 19, 16);
             uint8_t i1 = (uint8_t) bits(machInst, 20);
             if (usig) {
-                return new SveUdoti<uint16_t, uint64_t>(machInst,
-                                                        zda, zn, zm, i1);
+                return new SveUdoti<uint16_t, uint16_t, uint64_t>
+                                       (machInst, zda, zn, zm, i1);
             } else {
-                return new SveSdoti<int16_t, int64_t>(machInst,
-                                                        zda, zn, zm, i1);
+                return new SveSdoti<int16_t, int16_t, int64_t>
+                                       (machInst, zda, zn, zm, i1);
             }
         } else {
             RegIndex zm = (RegIndex) (uint8_t) bits(machInst, 18, 16);
             uint8_t i2 = (uint8_t) bits(machInst, 20, 19);
             if (usig) {
-                return new SveUdoti<uint8_t, uint32_t>(machInst,
-                                                        zda, zn, zm, i2);
+                return new SveUdoti<uint8_t, uint8_t, uint32_t>
+                                        (machInst, zda, zn, zm, i2);
             } else {
-                return new SveSdoti<int8_t, int32_t>(machInst,
-                                                        zda, zn, zm, i2);
+                return new SveSdoti<int8_t, int8_t, int32_t>
+                                        (machInst, zda, zn, zm, i2);
             }
         }
         return new Unknown64(machInst);
@@ -2320,16 +2320,12 @@ namespace Aarch64
             return new Unknown64(machInst);
         }
 
-        RegIndex zda M5_VAR_USED = (RegIndex)
-                                          (uint8_t) bits(machInst, 4, 0);
-        RegIndex zn M5_VAR_USED = (RegIndex)
-                                          (uint8_t) bits(machInst, 9, 5);
-        RegIndex zm M5_VAR_USED = (RegIndex)
-                                          (uint8_t) bits(machInst, 20, 16);
+        RegIndex zda = (RegIndex) (uint8_t) bits(machInst, 4, 0);
+        RegIndex zn = (RegIndex) (uint8_t) bits(machInst, 9, 5);
+        RegIndex zm = (RegIndex) (uint8_t) bits(machInst, 20, 16);
 
-        // Placeholder for SveUsdotv
-        //return SveUsdotv<int32_t, uint8_t, int8_t>(machInst, zda, zn, zm);
-        return new Unknown64(machInst);
+        return new SveUsdotv<uint8_t, int8_t, int32_t>
+                                 (machInst, zda, zn, zm);
     } // decodeSveMixedSignDotProduct
 
     StaticInstPtr
@@ -2340,26 +2336,18 @@ namespace Aarch64
             return new Unknown64(machInst);
         }
 
-        RegIndex zda M5_VAR_USED = (RegIndex)
-                                          (uint8_t) bits(machInst, 4, 0);
-        RegIndex zn M5_VAR_USED = (RegIndex)
-                                          (uint8_t) bits(machInst, 9, 5);
-        RegIndex zm M5_VAR_USED = (RegIndex)
-                                          (uint8_t) bits(machInst, 18, 16);
-        uint8_t i2 M5_VAR_USED = (uint8_t) bits(machInst, 20, 19);
-
+        RegIndex zda = (RegIndex) (uint8_t) bits(machInst, 4, 0);
+        RegIndex zn = (RegIndex) (uint8_t) bits(machInst, 9, 5);
+        RegIndex zm = (RegIndex) (uint8_t) bits(machInst, 18, 16);
+        uint8_t i2 = (uint8_t) bits(machInst, 20, 19);
         uint8_t usig = (uint8_t) bits(machInst, 10);
 
         if (usig) {
-            // Placeholder for SveSudoti
-            //return SveSudoti<int32_t, int8_t, uint8_t>
-            //                         (machInst, zda, zn, zm, i2);
-            return new Unknown64(machInst);
+            return new SveSudoti<int8_t, uint8_t, int32_t>
+                                     (machInst, zda, zn, zm, i2);
         } else {
-            // Placeholder for SveUsdoti
-            //return SveUsdoti<int32_t, uint8_t, int8_t>
-            //                         (machInst, zda, zn, zm, i2);
-            return new Unknown64(machInst);
+            return new SveUsdoti<uint8_t, int8_t, int32_t>
+                                     (machInst, zda, zn, zm, i2);
         }
 
     } // decodeSveMixedSignDotProductIndexed
diff --git a/src/arch/arm/isa/insts/sve.isa b/src/arch/arm/isa/insts/sve.isa
index 6e8313bda5..e7a773e3df 100644
--- a/src/arch/arm/isa/insts/sve.isa
+++ b/src/arch/arm/isa/insts/sve.isa
@@ -3098,6 +3098,15 @@ let {{
     def sveDotInst(name, Name, opClass, types, isIndexed = True):
         global header_output, exec_output, decoders
         code = sveEnabledCheckCode + '''
+        // Types of the extended versions of the source elements.
+        // Required to make sure the intermediate calculations don't overflow.
+        using ExtendedElementA = typename vector_element_traits::
+                                   extend_element<DElement,
+                                                  SElementA>::type;
+        using ExtendedElementB = typename vector_element_traits::
+                                   extend_element<DElement,
+                                                  SElementB>::type;
+
         unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
                 xc->tcBase());
         for (int i = 0; i < eCount; ++i) {'''
@@ -3107,17 +3116,21 @@ let {{
             int s = segbase + imm;'''
         code += '''
             DElement res = AA64FpDest_xd[i];
-            DElement srcElem1, srcElem2;
+            ExtendedElementA srcElemA;
+            ExtendedElementB srcElemB;
             for (int j = 0; j <= 3; ++j) {
-                srcElem1 = static_cast<DElement>(AA64FpOp1_xs[4 * i + j]);'''
+                srcElemA = static_cast<ExtendedElementA>
+                                          (AA64FpOp1_srcA[4 * i + j]);'''
         if isIndexed:
             code += '''
-                srcElem2 = static_cast<DElement>(AA64FpOp2_xs[4 * s + j]);'''
+                srcElemB = static_cast<ExtendedElementB>
+                                          (AA64FpOp2_srcB[4 * s + j]);'''
         else:
             code += '''
-                srcElem2 = static_cast<DElement>(AA64FpOp2_xs[4 * i + j]);'''
+                srcElemB = static_cast<ExtendedElementB>
+                                          (AA64FpOp2_srcB[4 * i + j]);'''
         code += '''
-                res += srcElem1 * srcElem2;
+                res += srcElemA * srcElemB;
             }
             AA64FpDestMerge_xd[i] = res;
         }'''
@@ -3129,7 +3142,7 @@ let {{
             header_output += SveWideningTerImmOpDeclare.subst(iop)
         else:
             header_output += SveWideningTerOpDeclare.subst(iop)
-        exec_output += SveWideningOpExecute.subst(iop)
+        exec_output += SveWideningTerOpExecute.subst(iop)
         for type in types:
             substDict = {'targs': type, 'class_name': 'Sve' + Name}
             exec_output += SveOpExecDeclare.subst(substDict)
@@ -4468,11 +4481,14 @@ let {{
     sveBinInst('sdivr', 'Sdivr', 'SimdDivOp', signedTypes, sdivrCode,
                PredType.MERGE, True)
     # SDOT (indexed)
-    sveDotInst('sdot', 'Sdoti', 'SimdAluOp', ['int8_t, int32_t',
-        'int16_t, int64_t'], isIndexed = True)
+    sveDotInst('sdot', 'Sdoti', 'SimdAluOp', ['int8_t, int8_t, int32_t',
+        'int16_t, int16_t, int64_t'], isIndexed = True)
     # SDOT (vectors)
-    sveDotInst('sdot', 'Sdotv', 'SimdAluOp', ['int8_t, int32_t',
-        'int16_t, int64_t'], isIndexed = False)
+    sveDotInst('sdot', 'Sdotv', 'SimdAluOp', ['int8_t, int8_t, int32_t',
+        'int16_t, int16_t, int64_t'], isIndexed = False)
+    # SUDOT (indexed)
+    sveDotInst('sudot', 'Sudoti', 'SimdAluOp', ['int8_t, uint8_t, int32_t'],
+               isIndexed = True)
     # SEL (predicates)
     selCode = 'destElem = srcElem1;'
     svePredLogicalInst('sel', 'PredSel', 'SimdPredAluOp', ('uint8_t',),
@@ -4857,11 +4873,17 @@ let {{
     sveBinInst('udivr', 'Udivr', 'SimdDivOp', unsignedTypes, udivrCode,
                PredType.MERGE, True)
     # UDOT (indexed)
-    sveDotInst('udot', 'Udoti', 'SimdAluOp', ['uint8_t, uint32_t',
-        'uint16_t, uint64_t'], isIndexed = True)
+    sveDotInst('udot', 'Udoti', 'SimdAluOp', ['uint8_t, uint8_t, uint32_t',
+        'uint16_t, uint16_t, uint64_t'], isIndexed = True)
     # UDOT (vectors)
-    sveDotInst('udot', 'Udotv', 'SimdAluOp', ['uint8_t, uint32_t',
-        'uint16_t, uint64_t'], isIndexed = False)
+    sveDotInst('udot', 'Udotv', 'SimdAluOp', ['uint8_t, uint8_t, uint32_t',
+        'uint16_t, uint16_t, uint64_t'], isIndexed = False)
+    # USDOT (indexed)
+    sveDotInst('usdot', 'Usdoti', 'SimdAluOp', ['uint8_t, int8_t, int32_t'],
+               isIndexed = True)
+    # USDOT (vectors)
+    sveDotInst('usdot', 'Usdotv', 'SimdAluOp', ['uint8_t, int8_t, int32_t'],
+               isIndexed = False)
     # UMAX (immediate)
     sveWideImmInst('umax', 'UmaxImm', 'SimdCmpOp', unsignedTypes, maxCode)
     # UMAX (vectors)
diff --git a/src/arch/arm/isa/templates/sve.isa b/src/arch/arm/isa/templates/sve.isa
index 65abb1bcbb..813bda029d 100644
--- a/src/arch/arm/isa/templates/sve.isa
+++ b/src/arch/arm/isa/templates/sve.isa
@@ -1139,17 +1139,22 @@ class %(class_name)s : public %(base_class)s
 }};
 
 def template SveWideningTerImmOpDeclare {{
-template <class _SElement, class _DElement>
+template <class _SElementA, class _SElementB, class _DElement>
 class %(class_name)s : public %(base_class)s
 {
+  static_assert(sizeof(_SElementA) == sizeof(_SElementB),
+                "Source elements must have the same size.");
+
   private:
     %(reg_idx_arr_decl)s;
 
   protected:
     typedef _DElement Element;
-    typedef _SElement SElement;
+    typedef _SElementA SElementA;
+    typedef _SElementB SElementB;
     typedef _DElement DElement;
-    typedef _SElement TPSElem;
+    typedef _SElementA TPSrcAElem;
+    typedef _SElementB TPSrcBElem;
     typedef _DElement TPDElem;
 
   public:
@@ -1168,7 +1173,7 @@ class %(class_name)s : public %(base_class)s
 }};
 
 def template SveWideningTerOpDeclare {{
-template <class _SElement, class _DElement>
+template <class _SElementA, class _SElementB, class _DElement>
 class %(class_name)s : public %(base_class)s
 {
   private:
@@ -1176,9 +1181,11 @@ class %(class_name)s : public %(base_class)s
 
   protected:
     typedef _DElement Element;
-    typedef _SElement SElement;
+    typedef _SElementA SElementA;
+    typedef _SElementB SElementB;
     typedef _DElement DElement;
-    typedef _SElement TPSElem;
+    typedef _SElementA TPSrcAElem;
+    typedef _SElementB TPSrcBElem;
     typedef _DElement TPDElem;
 
   public:
@@ -1295,6 +1302,26 @@ def template SveWideningOpExecute {{
     }
 }};
 
+def template SveWideningTerOpExecute {{
+    template <class SElementA, class SElementB, class DElement>
+    Fault %(class_name)s<SElementA, SElementB, DElement>::execute
+           (ExecContext *xc,
+            trace::InstRecord *traceData) const
+    {
+        Fault fault = NoFault;
+        %(op_decl)s;
+        %(op_rd)s;
+
+        %(code)s;
+        if (fault == NoFault)
+        {
+            %(op_wb)s;
+        }
+
+        return fault;
+    }
+}};
+
 def template SveNonTemplatedOpExecute {{
     Fault
     %(class_name)s::execute(ExecContext *xc,

From 560df49c28352765445a39250a1cc3354b861957 Mon Sep 17 00:00:00 2001
From: Richard Cooper <richard.cooper@arm.com>
Date: Thu, 8 Oct 2020 19:40:15 +0100
Subject: [PATCH 457/492] arch-arm: Declare support for Armv8.2-I8MM.

Sets the appropriate bit in the ID_AA64ZFR0_EL1 sysreg that declares
support for ARMv8.2-I8MM.

This indicates that all pre-requisites for Armv8.2 SVE Int8 matrix
multiplication instructions have been met.

SMMLA, SUDOT, UMMLA, USMMLA, and USDOT instructions are implemented.

For more information please refer to the "ARM Architecture Reference
Manual Supplement - The Scalable Vector Extension (SVE), for ARMv8-A"
(https://developer.arm.com/architectures/cpu-architecture/a-profile/
docs/arm-architecture-reference-manual-supplement-armv8-a)

Additional Contributors: Giacomo Travaglini

Change-Id: Id97e1c5de8c23a25336a6b323034e9eca8e598e4
Reviewed-by: Richard Cooper <richard.cooper@arm.com>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/70733
Maintainer: Giacomo Travaglini <giacomo.travaglini@arm.com>
Reviewed-by: Andreas Sandberg <andreas.sandberg@arm.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Giacomo Travaglini <giacomo.travaglini@arm.com>
Maintainer: Andreas Sandberg <andreas.sandberg@arm.com>
---
 src/arch/arm/ArmISA.py    | 1 +
 src/arch/arm/ArmSystem.py | 3 +++
 src/arch/arm/process.cc   | 1 +
 src/arch/arm/regs/misc.cc | 1 +
 4 files changed, 6 insertions(+)

diff --git a/src/arch/arm/ArmISA.py b/src/arch/arm/ArmISA.py
index fbd93b6bf6..ffe63ebb0a 100644
--- a/src/arch/arm/ArmISA.py
+++ b/src/arch/arm/ArmISA.py
@@ -56,6 +56,7 @@ class ArmDefaultSERelease(ArmRelease):
         "FEAT_F32MM",
         "FEAT_F64MM",
         "FEAT_SVE",
+        "FEAT_I8MM",
         # Armv8.3
         "FEAT_FCMA",
         "FEAT_JSCVT",
diff --git a/src/arch/arm/ArmSystem.py b/src/arch/arm/ArmSystem.py
index 49dab3e0e2..c5c0f436a3 100644
--- a/src/arch/arm/ArmSystem.py
+++ b/src/arch/arm/ArmSystem.py
@@ -80,6 +80,7 @@ class ArmExtension(ScopedEnum):
         "FEAT_LPA",  # Optional in Armv8.2
         "FEAT_F32MM",  # Optional in Armv8.2
         "FEAT_F64MM",  # Optional in Armv8.2
+        "FEAT_I8MM",  # Optional in Armv8.2
         # Armv8.3
         "FEAT_FCMA",
         "FEAT_JSCVT",
@@ -167,6 +168,7 @@ class ArmDefaultRelease(Armv8):
         "FEAT_SVE",
         "FEAT_F32MM",
         "FEAT_F64MM",
+        "FEAT_I8MM",
         # Armv8.3
         "FEAT_FCMA",
         "FEAT_JSCVT",
@@ -202,6 +204,7 @@ class Armv82(Armv81):
         "FEAT_SVE",
         "FEAT_F32MM",
         "FEAT_F64MM",
+        "FEAT_I8MM",
     ]
 
 
diff --git a/src/arch/arm/process.cc b/src/arch/arm/process.cc
index be8dfff330..b63567b6c3 100644
--- a/src/arch/arm/process.cc
+++ b/src/arch/arm/process.cc
@@ -323,6 +323,7 @@ ArmProcess64::armHwcapImpl2() const
     const AA64ZFR0 zf_r0 = tc->readMiscReg(MISCREG_ID_AA64ZFR0_EL1);
     hwcap |= (zf_r0.f32mm >= 1) ? Arm_Svef32mm : Arm_None;
     hwcap |= (zf_r0.f64mm >= 1) ? Arm_Svef64mm : Arm_None;
+    hwcap |= (zf_r0.i8mm >= 1) ? Arm_Svei8mm : Arm_None;
 
     return hwcap;
 }
diff --git a/src/arch/arm/regs/misc.cc b/src/arch/arm/regs/misc.cc
index 7e53e0dc24..b978044855 100644
--- a/src/arch/arm/regs/misc.cc
+++ b/src/arch/arm/regs/misc.cc
@@ -5407,6 +5407,7 @@ ISA::initializeMiscRegMetadata()
             AA64ZFR0 zfr0_el1 = 0;
             zfr0_el1.f32mm = release->has(ArmExtension::FEAT_F32MM) ? 1 : 0;
             zfr0_el1.f64mm = release->has(ArmExtension::FEAT_F64MM) ? 1 : 0;
+            zfr0_el1.i8mm = release->has(ArmExtension::FEAT_I8MM) ? 1 : 0;
             return zfr0_el1;
         }())
         .faultRead(EL0, faultIdst)

From d02ea0dfbbb2fe0edf85ad832ca28c0cfcae0ee5 Mon Sep 17 00:00:00 2001
From: Richard Cooper <richard.cooper@arm.com>
Date: Wed, 28 Oct 2020 22:10:01 +0000
Subject: [PATCH 458/492] arch-arm, cpu, configs: Add new Op Classes for Matrix
 Multiply insts

Add SimdMatMultAcc and SimdFloatMatMultAcc Op Classes for the SVE
Matrix Multiply Accumulate instructions in the SVE F32MM, F64MM and
I8MM extensions.

Initial latencies have been set to be the same as SimdMultAcc and
SimdFloatMultAcc respectively.

Change-Id: Ifab63a0efbb0ccfbd272245e0b0b055279f66e3a
Reviewed-by: Richard Cooper <richard.cooper@arm.com>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/70734
Maintainer: Andreas Sandberg <andreas.sandberg@arm.com>
Reviewed-by: Andreas Sandberg <andreas.sandberg@arm.com>
Maintainer: Giacomo Travaglini <giacomo.travaglini@arm.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Giacomo Travaglini <giacomo.travaglini@arm.com>
---
 configs/common/cores/arm/HPI.py        | 2 ++
 configs/common/cores/arm/O3_ARM_v7a.py | 2 ++
 configs/common/cores/arm/ex5_LITTLE.py | 2 ++
 configs/common/cores/arm/ex5_big.py    | 2 ++
 src/arch/arm/isa/insts/sve.isa         | 8 ++++----
 src/cpu/FuncUnit.py                    | 4 +++-
 src/cpu/minor/BaseMinorCPU.py          | 2 ++
 src/cpu/o3/FuncUnitConfig.py           | 4 +++-
 src/cpu/op_class.hh                    | 4 +++-
 9 files changed, 23 insertions(+), 7 deletions(-)

diff --git a/configs/common/cores/arm/HPI.py b/configs/common/cores/arm/HPI.py
index c7a8127555..d3d46054f1 100644
--- a/configs/common/cores/arm/HPI.py
+++ b/configs/common/cores/arm/HPI.py
@@ -1420,6 +1420,7 @@ class HPI_FloatSimdFU(MinorFU):
             "SimdMisc",
             "SimdMult",
             "SimdMultAcc",
+            "SimdMatMultAcc",
             "SimdShift",
             "SimdShiftAcc",
             "SimdSqrt",
@@ -1431,6 +1432,7 @@ class HPI_FloatSimdFU(MinorFU):
             "SimdFloatMisc",
             "SimdFloatMult",
             "SimdFloatMultAcc",
+            "SimdFloatMatMultAcc",
             "SimdFloatSqrt",
         ]
     )
diff --git a/configs/common/cores/arm/O3_ARM_v7a.py b/configs/common/cores/arm/O3_ARM_v7a.py
index 77dc4e42a4..6a1734235a 100644
--- a/configs/common/cores/arm/O3_ARM_v7a.py
+++ b/configs/common/cores/arm/O3_ARM_v7a.py
@@ -53,6 +53,7 @@ class O3_ARM_v7a_FP(FUDesc):
         OpDesc(opClass="SimdMisc", opLat=3),
         OpDesc(opClass="SimdMult", opLat=5),
         OpDesc(opClass="SimdMultAcc", opLat=5),
+        OpDesc(opClass="SimdMatMultAcc", opLat=5),
         OpDesc(opClass="SimdShift", opLat=3),
         OpDesc(opClass="SimdShiftAcc", opLat=3),
         OpDesc(opClass="SimdSqrt", opLat=9),
@@ -64,6 +65,7 @@ class O3_ARM_v7a_FP(FUDesc):
         OpDesc(opClass="SimdFloatMisc", opLat=3),
         OpDesc(opClass="SimdFloatMult", opLat=3),
         OpDesc(opClass="SimdFloatMultAcc", opLat=5),
+        OpDesc(opClass="SimdFloatMatMultAcc", opLat=5),
         OpDesc(opClass="SimdFloatSqrt", opLat=9),
         OpDesc(opClass="FloatAdd", opLat=5),
         OpDesc(opClass="FloatCmp", opLat=5),
diff --git a/configs/common/cores/arm/ex5_LITTLE.py b/configs/common/cores/arm/ex5_LITTLE.py
index 6974837dc5..982792d2d2 100644
--- a/configs/common/cores/arm/ex5_LITTLE.py
+++ b/configs/common/cores/arm/ex5_LITTLE.py
@@ -56,6 +56,7 @@ class ex5_LITTLE_FP(MinorDefaultFloatSimdFU):
         OpDesc(opClass="SimdMisc", opLat=3),
         OpDesc(opClass="SimdMult", opLat=4),
         OpDesc(opClass="SimdMultAcc", opLat=5),
+        OpDesc(opClass="SimdMatMultAcc", opLat=5),
         OpDesc(opClass="SimdShift", opLat=3),
         OpDesc(opClass="SimdShiftAcc", opLat=3),
         OpDesc(opClass="SimdSqrt", opLat=9),
@@ -67,6 +68,7 @@ class ex5_LITTLE_FP(MinorDefaultFloatSimdFU):
         OpDesc(opClass="SimdFloatMisc", opLat=6),
         OpDesc(opClass="SimdFloatMult", opLat=15),
         OpDesc(opClass="SimdFloatMultAcc", opLat=6),
+        OpDesc(opClass="SimdFloatMatMultAcc", opLat=6),
         OpDesc(opClass="SimdFloatSqrt", opLat=17),
         OpDesc(opClass="FloatAdd", opLat=8),
         OpDesc(opClass="FloatCmp", opLat=6),
diff --git a/configs/common/cores/arm/ex5_big.py b/configs/common/cores/arm/ex5_big.py
index 70af6b8414..0d4d4903cf 100644
--- a/configs/common/cores/arm/ex5_big.py
+++ b/configs/common/cores/arm/ex5_big.py
@@ -58,6 +58,7 @@ class ex5_big_FP(FUDesc):
         OpDesc(opClass="SimdMisc", opLat=3),
         OpDesc(opClass="SimdMult", opLat=6),
         OpDesc(opClass="SimdMultAcc", opLat=5),
+        OpDesc(opClass="SimdMatMultAcc", opLat=5),
         OpDesc(opClass="SimdShift", opLat=3),
         OpDesc(opClass="SimdShiftAcc", opLat=3),
         OpDesc(opClass="SimdSqrt", opLat=9),
@@ -69,6 +70,7 @@ class ex5_big_FP(FUDesc):
         OpDesc(opClass="SimdFloatMisc", opLat=3),
         OpDesc(opClass="SimdFloatMult", opLat=6),
         OpDesc(opClass="SimdFloatMultAcc", opLat=1),
+        OpDesc(opClass="SimdFloatMatMultAcc", opLat=1),
         OpDesc(opClass="SimdFloatSqrt", opLat=9),
         OpDesc(opClass="FloatAdd", opLat=6),
         OpDesc(opClass="FloatCmp", opLat=5),
diff --git a/src/arch/arm/isa/insts/sve.isa b/src/arch/arm/isa/insts/sve.isa
index e7a773e3df..9999843b59 100644
--- a/src/arch/arm/isa/insts/sve.isa
+++ b/src/arch/arm/isa/insts/sve.isa
@@ -3971,7 +3971,7 @@ let {{
             fplibMul<DestElement>(srcElemA, srcElemB, fpscr), fpscr);
     '''
     # FMMLA (vectors)
-    sveMatMulInst('fmmla', 'Fmmla', 'SimdFloatMultAccOp', floatTypes,
+    sveMatMulInst('fmmla', 'Fmmla', 'SimdFloatMatMultAccOp', floatTypes,
                   numDestRows=2, numDestCols=2, K=2,
                   elt_mul_op=fmmlaCode)
 
@@ -4265,17 +4265,17 @@ let {{
                       sbclbCode, isTop=False, isAdd=False)
     mmlaCode = ('destElem += srcElemA * srcElemB')
     # SMMLA (vectors)
-    sveMatMulInst('smmla', 'Smmla', 'SimdMultAccOp',
+    sveMatMulInst('smmla', 'Smmla', 'SimdMatMultAccOp',
                   (('int32_t', 'int8_t', 'int8_t'),),
                   numDestRows=2, numDestCols=2, K=8,
                   elt_mul_op=mmlaCode)
     # USMMLA (vectors)
-    sveMatMulInst('usmmla', 'Usmmla', 'SimdMultAccOp',
+    sveMatMulInst('usmmla', 'Usmmla', 'SimdMatMultAccOp',
                   (('int32_t', 'uint8_t', 'int8_t'),),
                   numDestRows=2, numDestCols=2, K=8,
                   elt_mul_op=mmlaCode)
     # UMMLA (vectors)
-    sveMatMulInst('ummla', 'Ummla', 'SimdMultAccOp',
+    sveMatMulInst('ummla', 'Ummla', 'SimdMatMultAccOp',
                   (('uint32_t', 'uint8_t', 'uint8_t'),),
                   numDestRows=2, numDestCols=2, K=8,
                   elt_mul_op=mmlaCode)
diff --git a/src/cpu/FuncUnit.py b/src/cpu/FuncUnit.py
index a1050de242..012dfd0ee4 100644
--- a/src/cpu/FuncUnit.py
+++ b/src/cpu/FuncUnit.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2010, 2017-2018, 2022 ARM Limited
+# Copyright (c) 2010, 2017-2018, 2020, 2022 ARM Limited
 # All rights reserved.
 #
 # The license below extends only to copyright in the software and shall
@@ -62,6 +62,7 @@ class OpClass(Enum):
         "SimdMisc",
         "SimdMult",
         "SimdMultAcc",
+        "SimdMatMultAcc",
         "SimdShift",
         "SimdShiftAcc",
         "SimdDiv",
@@ -74,6 +75,7 @@ class OpClass(Enum):
         "SimdFloatMisc",
         "SimdFloatMult",
         "SimdFloatMultAcc",
+        "SimdFloatMatMultAcc",
         "SimdFloatSqrt",
         "SimdReduceAdd",
         "SimdReduceAlu",
diff --git a/src/cpu/minor/BaseMinorCPU.py b/src/cpu/minor/BaseMinorCPU.py
index 6641a39b4e..bd27b92540 100644
--- a/src/cpu/minor/BaseMinorCPU.py
+++ b/src/cpu/minor/BaseMinorCPU.py
@@ -189,6 +189,7 @@ class MinorDefaultFloatSimdFU(MinorFU):
             "SimdMisc",
             "SimdMult",
             "SimdMultAcc",
+            "SimdMatMultAcc",
             "SimdShift",
             "SimdShiftAcc",
             "SimdDiv",
@@ -201,6 +202,7 @@ class MinorDefaultFloatSimdFU(MinorFU):
             "SimdFloatMisc",
             "SimdFloatMult",
             "SimdFloatMultAcc",
+            "SimdFloatMatMultAcc",
             "SimdFloatSqrt",
             "SimdReduceAdd",
             "SimdReduceAlu",
diff --git a/src/cpu/o3/FuncUnitConfig.py b/src/cpu/o3/FuncUnitConfig.py
index 3d626c2275..7ba49c93bf 100644
--- a/src/cpu/o3/FuncUnitConfig.py
+++ b/src/cpu/o3/FuncUnitConfig.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2010, 2017 ARM Limited
+# Copyright (c) 2010, 2017, 2020 ARM Limited
 # All rights reserved.
 #
 # The license below extends only to copyright in the software and shall
@@ -87,6 +87,7 @@ class SIMD_Unit(FUDesc):
         OpDesc(opClass="SimdMisc"),
         OpDesc(opClass="SimdMult"),
         OpDesc(opClass="SimdMultAcc"),
+        OpDesc(opClass="SimdMatMultAcc"),
         OpDesc(opClass="SimdShift"),
         OpDesc(opClass="SimdShiftAcc"),
         OpDesc(opClass="SimdDiv"),
@@ -99,6 +100,7 @@ class SIMD_Unit(FUDesc):
         OpDesc(opClass="SimdFloatMisc"),
         OpDesc(opClass="SimdFloatMult"),
         OpDesc(opClass="SimdFloatMultAcc"),
+        OpDesc(opClass="SimdFloatMatMultAcc"),
         OpDesc(opClass="SimdFloatSqrt"),
         OpDesc(opClass="SimdReduceAdd"),
         OpDesc(opClass="SimdReduceAlu"),
diff --git a/src/cpu/op_class.hh b/src/cpu/op_class.hh
index 94d2794c76..0151df06a9 100644
--- a/src/cpu/op_class.hh
+++ b/src/cpu/op_class.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2010, 2017-2018, 2022 ARM Limited
+ * Copyright (c) 2010, 2017-2018, 2020, 2022 ARM Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -72,6 +72,7 @@ static const OpClass SimdCvtOp = enums::SimdCvt;
 static const OpClass SimdMiscOp = enums::SimdMisc;
 static const OpClass SimdMultOp = enums::SimdMult;
 static const OpClass SimdMultAccOp = enums::SimdMultAcc;
+static const OpClass SimdMatMultAccOp = enums::SimdMatMultAcc;
 static const OpClass SimdShiftOp = enums::SimdShift;
 static const OpClass SimdShiftAccOp = enums::SimdShiftAcc;
 static const OpClass SimdDivOp = enums::SimdDiv;
@@ -87,6 +88,7 @@ static const OpClass SimdFloatDivOp = enums::SimdFloatDiv;
 static const OpClass SimdFloatMiscOp = enums::SimdFloatMisc;
 static const OpClass SimdFloatMultOp = enums::SimdFloatMult;
 static const OpClass SimdFloatMultAccOp = enums::SimdFloatMultAcc;
+static const OpClass SimdFloatMatMultAccOp = enums::SimdFloatMatMultAcc;
 static const OpClass SimdFloatSqrtOp = enums::SimdFloatSqrt;
 static const OpClass SimdFloatReduceCmpOp = enums::SimdFloatReduceCmp;
 static const OpClass SimdFloatReduceAddOp = enums::SimdFloatReduceAdd;

From fab3d8a1c1de836bb79b4f5151a0eabe79aefca3 Mon Sep 17 00:00:00 2001
From: Richard Cooper <richard.cooper@arm.com>
Date: Mon, 9 Nov 2020 18:50:16 +0000
Subject: [PATCH 459/492] arch-arm: Fix too long lines in existing Arm NEON
 instructons.

These lines break the current gem5 coding guidelines.

Change-Id: I587fcb2d75c4ab9de47fa53b4ae96526a20afe3f
Reviewed-by: Richard Cooper <richard.cooper@arm.com>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/70735
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Maintainer: Andreas Sandberg <andreas.sandberg@arm.com>
Reviewed-by: Andreas Sandberg <andreas.sandberg@arm.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/arch/arm/isa/formats/neon64.isa | 33 ++++++++++++++++++-----------
 src/arch/arm/isa/insts/neon64.isa   | 32 ++++++++++++++++------------
 2 files changed, 39 insertions(+), 26 deletions(-)

diff --git a/src/arch/arm/isa/formats/neon64.isa b/src/arch/arm/isa/formats/neon64.isa
index c200da74a8..5cce0d7c23 100644
--- a/src/arch/arm/isa/formats/neon64.isa
+++ b/src/arch/arm/isa/formats/neon64.isa
@@ -1,4 +1,4 @@
-// Copyright (c) 2012-2013 ARM Limited
+// Copyright (c) 2012-2013, 2020 ARM Limited
 // All rights reserved
 //
 // The license below extends only to copyright in the software and shall
@@ -1213,13 +1213,17 @@ namespace Aarch64
 
             switch (imm5_pos) {
               case 0:
-                return new InsElemX<uint8_t>(machInst, vd, vn, index1, index2);
+                return new InsElemX<uint8_t>(
+                        machInst, vd, vn, index1, index2);
               case 1:
-                return new InsElemX<uint16_t>(machInst, vd, vn, index1, index2);
+                return new InsElemX<uint16_t>(
+                        machInst, vd, vn, index1, index2);
               case 2:
-                return new InsElemX<uint32_t>(machInst, vd, vn, index1, index2);
+                return new InsElemX<uint32_t>(
+                        machInst, vd, vn, index1, index2);
               case 3:
-                return new InsElemX<uint64_t>(machInst, vd, vn, index1, index2);
+                return new InsElemX<uint64_t>(
+                        machInst, vd, vn, index1, index2);
               default:
                 return new Unknown64(machInst);
             }
@@ -1547,14 +1551,16 @@ namespace Aarch64
             if (u || (size == 0x0 || size == 0x3))
                 return new Unknown64(machInst);
             else
-                return decodeNeonSThreeImmHAndWReg<SqdmullElemX, SqdmullElem2X>(
-                    q, size, machInst, vd, vn, vm, index);
+                return decodeNeonSThreeImmHAndWReg
+                    <SqdmullElemX, SqdmullElem2X>(
+                        q, size, machInst, vd, vn, vm, index);
           case 0xc:
             if (u || (size == 0x0 || size == 0x3))
                 return new Unknown64(machInst);
             else
-                return decodeNeonSThreeImmHAndWReg<SqdmulhElemDX, SqdmulhElemQX>(
-                    q, size, machInst, vd, vn, vm, index);
+                return decodeNeonSThreeImmHAndWReg
+                    <SqdmulhElemDX, SqdmulhElemQX>(
+                        q, size, machInst, vd, vn, vm, index);
           case 0xd:
             if (u)
                 return decodeNeonSThreeImmHAndWReg<SqrdmlahElemDX,
@@ -2176,11 +2182,14 @@ namespace Aarch64
 
         switch (opcode) {
           case 0x9:
-            return decodeNeonSThreeHAndWReg<SqdmlalScX>(size, machInst, vd, vn, vm);
+            return decodeNeonSThreeHAndWReg<SqdmlalScX>(
+                    size, machInst, vd, vn, vm);
           case 0xb:
-            return decodeNeonSThreeHAndWReg<SqdmlslScX>(size, machInst, vd, vn, vm);
+            return decodeNeonSThreeHAndWReg<SqdmlslScX>(
+                    size, machInst, vd, vn, vm);
           case 0xd:
-            return decodeNeonSThreeHAndWReg<SqdmullScX>(size, machInst, vd, vn, vm);
+            return decodeNeonSThreeHAndWReg<SqdmullScX>(
+                    size, machInst, vd, vn, vm);
           default:
             return new Unknown64(machInst);
         }
diff --git a/src/arch/arm/isa/insts/neon64.isa b/src/arch/arm/isa/insts/neon64.isa
index e0083c9fcf..0da7f06ec3 100644
--- a/src/arch/arm/isa/insts/neon64.isa
+++ b/src/arch/arm/isa/insts/neon64.isa
@@ -1,6 +1,6 @@
 // -*- mode: c++ -*-
 
-// Copyright (c) 2012-2013, 2015-2018 ARM Limited
+// Copyright (c) 2012-2013, 2015-2018, 2020 ARM Limited
 // All rights reserved
 //
 // The license below extends only to copyright in the software and shall
@@ -1993,9 +1993,9 @@ let {{
             Element carryBit =
                 (((unsigned)srcElem1 & 0x1) +
                  ((unsigned)srcElem2 & 0x1)) >> 1;
-            // Use division instead of a shift to ensure the sign extension works
-            // right. The compiler will figure out if it can be a shift. Mask the
-            // inputs so they get truncated correctly.
+            // Use division instead of a shift to ensure the sign extension
+            // works right. The compiler will figure out if it can be a shift.
+            // Mask the inputs so they get truncated correctly.
             destElem = (((srcElem1 & ~(Element)1) / 2) +
                         ((srcElem2 & ~(Element)1) / 2)) + carryBit;
     '''
@@ -2035,9 +2035,9 @@ let {{
     hsubCode = '''
             Element borrowBit =
                 (((srcElem1 & 0x1) - (srcElem2 & 0x1)) >> 1) & 0x1;
-            // Use division instead of a shift to ensure the sign extension works
-            // right. The compiler will figure out if it can be a shift. Mask the
-            // inputs so they get truncated correctly.
+            // Use division instead of a shift to ensure the sign extension
+            // works right. The compiler will figure out if it can be a shift.
+            // Mask the inputs so they get truncated correctly.
             destElem = (((srcElem1 & ~(Element)1) / 2) -
                         ((srcElem2 & ~(Element)1) / 2)) - borrowBit;
     '''
@@ -2802,7 +2802,8 @@ let {{
             FPSCR fpscr = (FPSCR) FpscrQc;
             destElem = srcElem1;
             if (srcElem1 < 0 ||
-                    ((BigElement)destElem & mask(sizeof(Element) * 8)) != srcElem1) {
+                    ((BigElement)destElem & mask(sizeof(Element) * 8))
+                     != srcElem1) {
                 fpscr.qc = 1;
                 destElem = mask(sizeof(Element) * 8);
                 if (srcElem1 < 0)
@@ -2821,9 +2822,9 @@ let {{
             Element carryBit =
                 (((unsigned)srcElem1 & 0x1) +
                  ((unsigned)srcElem2 & 0x1) + 1) >> 1;
-            // Use division instead of a shift to ensure the sign extension works
-            // right. The compiler will figure out if it can be a shift. Mask the
-            // inputs so they get truncated correctly.
+            // Use division instead of a shift to ensure the sign extension
+            // works right. The compiler will figure out if it can be a shift.
+            // Mask the inputs so they get truncated correctly.
             destElem = (((srcElem1 & ~(Element)1) / 2) +
                         ((srcElem2 & ~(Element)1) / 2)) + carryBit;
     '''
@@ -3013,7 +3014,8 @@ let {{
             if (bits(destElem, sizeof(Element) * 8 - 1) == 0) {
                 if (bits(tmp, sizeof(Element) * 8 - 1) == 1 ||
                         tmp < srcElem1 || tmp < destElem) {
-                    destElem = (((Element) 1) << (sizeof(Element) * 8 - 1)) - 1;
+                    destElem = (((Element) 1) << (sizeof(Element) * 8 - 1))
+                               - 1;
                     fpscr.qc = 1;
                 } else {
                     destElem = tmp;
@@ -3021,9 +3023,11 @@ let {{
             } else {
                 Element absDestElem = (~destElem) + 1;
                 if (absDestElem < srcElem1) {
-                    // Still check for positive sat., no need to check for negative sat.
+                    // Still check for positive sat., no need to check for
+                    // negative sat.
                     if (bits(tmp, sizeof(Element) * 8 - 1) == 1) {
-                        destElem = (((Element) 1) << (sizeof(Element) * 8 - 1)) - 1;
+                        destElem = (((Element) 1) << (sizeof(Element) * 8 - 1))
+                                   - 1;
                         fpscr.qc = 1;
                     } else {
                         destElem = tmp;

From eb4f83b178605a90ac714f1b2f779847aecd84b2 Mon Sep 17 00:00:00 2001
From: Richard Cooper <richard.cooper@arm.com>
Date: Mon, 9 Nov 2020 18:56:31 +0000
Subject: [PATCH 460/492] arch-arm: Add support for Armv8.2-DotProd NEON
 extension.

Add support for the Armv8.2-DotProd NEON extension. This provides the
SDOT and UDOT SIMD Dot Product instructions.

For more information please refer to the Arm Architecture Reference
Manual (https://developer.arm.com/documentation/ddi0487/latest/).

Change-Id: I4caa3b97a74c65f32421487c55c3e36427194e61
Reviewed-by: Richard Cooper <richard.cooper@arm.com>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/70736
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Reviewed-by: Andreas Sandberg <andreas.sandberg@arm.com>
Maintainer: Andreas Sandberg <andreas.sandberg@arm.com>
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/arch/arm/ArmISA.py              |  1 +
 src/arch/arm/ArmSystem.py           |  3 ++
 src/arch/arm/isa/formats/neon64.isa | 37 ++++++++++++++++
 src/arch/arm/isa/insts/neon64.isa   | 65 +++++++++++++++++++++++++++++
 src/arch/arm/regs/misc.cc           |  1 +
 5 files changed, 107 insertions(+)

diff --git a/src/arch/arm/ArmISA.py b/src/arch/arm/ArmISA.py
index ffe63ebb0a..8c1ee5ae42 100644
--- a/src/arch/arm/ArmISA.py
+++ b/src/arch/arm/ArmISA.py
@@ -57,6 +57,7 @@ class ArmDefaultSERelease(ArmRelease):
         "FEAT_F64MM",
         "FEAT_SVE",
         "FEAT_I8MM",
+        "FEAT_DOTPROD",
         # Armv8.3
         "FEAT_FCMA",
         "FEAT_JSCVT",
diff --git a/src/arch/arm/ArmSystem.py b/src/arch/arm/ArmSystem.py
index c5c0f436a3..eaaf4b1cb3 100644
--- a/src/arch/arm/ArmSystem.py
+++ b/src/arch/arm/ArmSystem.py
@@ -81,6 +81,7 @@ class ArmExtension(ScopedEnum):
         "FEAT_F32MM",  # Optional in Armv8.2
         "FEAT_F64MM",  # Optional in Armv8.2
         "FEAT_I8MM",  # Optional in Armv8.2
+        "FEAT_DOTPROD",  # Optional in Armv8.2
         # Armv8.3
         "FEAT_FCMA",
         "FEAT_JSCVT",
@@ -169,6 +170,7 @@ class ArmDefaultRelease(Armv8):
         "FEAT_F32MM",
         "FEAT_F64MM",
         "FEAT_I8MM",
+        "FEAT_DOTPROD",
         # Armv8.3
         "FEAT_FCMA",
         "FEAT_JSCVT",
@@ -205,6 +207,7 @@ class Armv82(Armv81):
         "FEAT_F32MM",
         "FEAT_F64MM",
         "FEAT_I8MM",
+        "FEAT_DOTPROD",
     ]
 
 
diff --git a/src/arch/arm/isa/formats/neon64.isa b/src/arch/arm/isa/formats/neon64.isa
index 5cce0d7c23..e083f6f25c 100644
--- a/src/arch/arm/isa/formats/neon64.isa
+++ b/src/arch/arm/isa/formats/neon64.isa
@@ -510,6 +510,7 @@ namespace Aarch64
     decodeNeon3RegExtension(ExtMachInst machInst)
     {
         uint8_t q      = bits(machInst, 30);
+        uint8_t qu     = bits(machInst, 30, 29);
         uint8_t size   = bits(machInst, 23, 22);
         uint8_t opcode = bits(machInst, 15, 11);
 
@@ -532,6 +533,19 @@ namespace Aarch64
             else
                 return decodeNeonSThreeHAndWReg<SqrdmlshDX>(
                     size, machInst, vd, vn, vm);
+          case 0x12:
+              switch (qu) {
+                case 0b00:
+                  return new SdotDX<int32_t>(machInst, vd, vn, vm);
+                case 0b01:
+                  return new UdotDX<uint32_t>(machInst, vd, vn, vm);
+                case 0b10:
+                  return new SdotQX<int32_t>(machInst, vd, vn, vm);
+                case 0b11:
+                  return new UdotQX<uint32_t>(machInst, vd, vn, vm);
+                default:
+                  return new Unknown64(machInst);
+              }
           case 0x18:
           case 0x19:
           case 0x1a:
@@ -1351,6 +1365,7 @@ namespace Aarch64
     {
         uint8_t q = bits(machInst, 30);
         uint8_t u = bits(machInst, 29);
+        uint8_t qu = bits(machInst, 30, 29);
         uint8_t size = bits(machInst, 23, 22);
         uint8_t L = bits(machInst, 21);
         uint8_t M = bits(machInst, 20);
@@ -1387,6 +1402,11 @@ namespace Aarch64
         }
         RegIndex vm_fp = (RegIndex) (uint8_t) (vmh << 4 | vm_bf);
 
+        // Index and 2nd register operand for FEAT_DOTPROD and
+        // FEAT_I8MM instructions
+        uint8_t index_dp = (H << 1) | L;
+        RegIndex vm_dp = (RegIndex) (uint8_t) (M << 4 | vm_bf);
+
         switch (opcode) {
           case 0x0:
             if (!u || (size == 0x0 || size == 0x3))
@@ -1573,6 +1593,23 @@ namespace Aarch64
           case 0xf:
             return decodeNeonSThreeImmHAndWReg<SqrdmlshElemDX, SqrdmlshElemQX>(
                     q, size, machInst, vd, vn, vm, index);
+          case 0xe:
+              switch (qu) {
+                case 0b00:
+                  return new SdotElemDX<int32_t>(machInst,
+                                                 vd, vn, vm_dp, index_dp);
+                case 0b01:
+                  return new UdotElemDX<uint32_t>(machInst,
+                                                  vd, vn, vm_dp, index_dp);
+                case 0b10:
+                  return new SdotElemQX<int32_t>(machInst,
+                                                 vd, vn, vm_dp, index_dp);
+                case 0b11:
+                  return new UdotElemQX<uint32_t>(machInst,
+                                                  vd, vn, vm_dp, index_dp);
+                default:
+                  return new Unknown64(machInst);
+              }
           default:
             return new Unknown64(machInst);
         }
diff --git a/src/arch/arm/isa/insts/neon64.isa b/src/arch/arm/isa/insts/neon64.isa
index 0da7f06ec3..53c0f112bf 100644
--- a/src/arch/arm/isa/insts/neon64.isa
+++ b/src/arch/arm/isa/insts/neon64.isa
@@ -1082,6 +1082,71 @@ let {{
                        complex=True)
     threeEqualRegInstX("fcmla", "FcmlaQX", "SimdFloatMultAccOp",
                        floatTypes, 4, fcmla_vec, True, complex=True)
+
+    def intDotInst(name, Name, opClass,
+                   destIsSigned, src1IsSigned, src2IsSigned,
+                   rCount, byElem):
+        destType = "int32_t" if destIsSigned else "uint32_t"
+        src1Type = "int8_t" if src1IsSigned else "uint8_t"
+        src2Type = "int8_t" if src2IsSigned else "uint8_t"
+        dotCode = '''
+        using Src1Element = %(src1Type)s;
+        using Src2Element = %(src2Type)s;
+
+        // Neon dot instructions always generate one output element
+        // from 4 pairs of source elements.
+        static_assert(sizeof(Element) == 4 * sizeof(Src1Element));
+        static_assert(sizeof(Element) == 4 * sizeof(Src2Element));
+
+        // Extended source element types to avoid overflow of intermediate
+        // calculations.
+        using ExtendedSrc1Element =
+                typename vector_element_traits::
+                    extend_element<Element, Src1Element>::type;
+        using ExtendedSrc2Element =
+                typename vector_element_traits::
+                    extend_element<Element, Src2Element>::type;
+
+        for (unsigned i = 0; i < eCount; ++i) {
+            Element src1ElemsPacked = letoh(srcReg1.elements[i]);
+            Element src2ElemsPacked = letoh(srcReg2.elements[%(src2Index)s]);
+
+            Src1Element *src1Elems =
+                reinterpret_cast<Src1Element*>(&src1ElemsPacked);
+            Src2Element *src2Elems =
+                reinterpret_cast<Src2Element*>(&src2ElemsPacked);
+
+            // Dot instructions accumulate into the dest reg
+            Element destElem = letoh(destReg.elements[i]);
+
+            for (unsigned j = 0; j < 4; ++j) {
+                ExtendedSrc1Element src1Elem =
+                    static_cast<ExtendedSrc1Element>(src1Elems[j]);
+                ExtendedSrc2Element src2Elem =
+                    static_cast<ExtendedSrc2Element>(src2Elems[j]);
+                destElem += src1Elem * src2Elem;
+            }
+            destReg.elements[i] = htole(destElem);
+        }
+        ''' % dict(src1Type=src1Type, src2Type=src2Type,
+                   src2Index="imm" if byElem else "i")
+        threeEqualRegInstX(name, Name, opClass, (destType,), rCount,
+                           dotCode, readDest=True, byElem=byElem,
+                           complex=True)
+
+    # SDOT (vector)
+    intDotInst('sdot', 'SdotDX', 'SimdAluOp', True, True, True, 2, False)
+    intDotInst('sdot', 'SdotQX', 'SimdAluOp', True, True, True, 4, False)
+    # SDOT (element)
+    intDotInst('sdot', 'SdotElemDX', 'SimdAluOp', True, True, True, 2, True)
+    intDotInst('sdot', 'SdotElemQX', 'SimdAluOp', True, True, True, 4, True)
+    # UDOT (vector)
+    intDotInst('udot', 'UdotDX', 'SimdAluOp', False, False, False, 2, False)
+    intDotInst('udot', 'UdotQX', 'SimdAluOp', False, False, False, 4, False)
+    # UDOT (element)
+    intDotInst('udot', 'UdotElemDX', 'SimdAluOp', False, False, False, 2, True)
+    intDotInst('udot', 'UdotElemQX', 'SimdAluOp', False, False, False, 4, True)
+
     # CLS
     clsCode = '''
             unsigned count = 0;
diff --git a/src/arch/arm/regs/misc.cc b/src/arch/arm/regs/misc.cc
index b978044855..ed15f25e69 100644
--- a/src/arch/arm/regs/misc.cc
+++ b/src/arch/arm/regs/misc.cc
@@ -3988,6 +3988,7 @@ ISA::initializeMiscRegMetadata()
               isar0_el1.sha1 = 0;
               isar0_el1.aes = 0;
           }
+          isar0_el1.dp = release->has(ArmExtension::FEAT_DOTPROD) ? 0x1 : 0x0;
           isar0_el1.atomic = release->has(ArmExtension::FEAT_LSE) ? 0x2 : 0x0;
           isar0_el1.rdm = release->has(ArmExtension::FEAT_RDM) ? 0x1 : 0x0;
           isar0_el1.tme = release->has(ArmExtension::TME) ? 0x1 : 0x0;

From 9de1443ebb8196950bcfdfc481ee0550feab0d7b Mon Sep 17 00:00:00 2001
From: Richard Cooper <richard.cooper@arm.com>
Date: Mon, 9 Nov 2020 19:05:09 +0000
Subject: [PATCH 461/492] arch-arm: Add support for Armv8.2-I8MM NEON
 extension.

Add support for the Armv8.2-I8MM NEON extension. This provides the
SUDOT and USDOT mixed-sign SIMD Dot Product instructions, as well as
the SMMLA, UMMLA, and USMMLA SIMD Matrix Multiply-Accumulate
instructions.

For more information please refer to the Arm Architecture Reference
Manual (https://developer.arm.com/documentation/ddi0487/latest/).

Additional Contributors: Giacomo Travaglini

Change-Id: I6fb9318f67cc9d2737079283e1a095630c4d2ad9
Reviewed-by: Richard Cooper <richard.cooper@arm.com>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/70737
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Reviewed-by: Andreas Sandberg <andreas.sandberg@arm.com>
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Maintainer: Andreas Sandberg <andreas.sandberg@arm.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/arch/arm/isa/formats/neon64.isa | 51 ++++++++++++++++-
 src/arch/arm/isa/insts/neon64.isa   | 88 +++++++++++++++++++++++++++++
 src/arch/arm/process.cc             |  3 +
 src/arch/arm/regs/misc.cc           |  1 +
 src/arch/arm/regs/misc_types.hh     |  1 +
 5 files changed, 141 insertions(+), 3 deletions(-)

diff --git a/src/arch/arm/isa/formats/neon64.isa b/src/arch/arm/isa/formats/neon64.isa
index e083f6f25c..8d9b74dfa3 100644
--- a/src/arch/arm/isa/formats/neon64.isa
+++ b/src/arch/arm/isa/formats/neon64.isa
@@ -510,6 +510,7 @@ namespace Aarch64
     decodeNeon3RegExtension(ExtMachInst machInst)
     {
         uint8_t q      = bits(machInst, 30);
+        uint8_t u      = bits(machInst, 29);
         uint8_t qu     = bits(machInst, 30, 29);
         uint8_t size   = bits(machInst, 23, 22);
         uint8_t opcode = bits(machInst, 15, 11);
@@ -546,6 +547,20 @@ namespace Aarch64
                 default:
                   return new Unknown64(machInst);
               }
+          case 0x13:
+              if (q) {
+                  return new UsdotQX<int32_t>(machInst, vd, vn, vm);
+              } else {
+                  return new UsdotDX<int32_t>(machInst, vd, vn, vm);
+              }
+          case 0x14:
+              if (u) {
+                  return new UmmlaQX<uint32_t>(machInst, vd, vn, vm);
+              } else {
+                  return new SmmlaQX<int32_t>(machInst, vd, vn, vm);
+              }
+          case 0x15:
+              return new UsmmlaQX<int32_t>(machInst, vd, vn, vm);
           case 0x18:
           case 0x19:
           case 0x1a:
@@ -1590,9 +1605,6 @@ namespace Aarch64
                 return decodeNeonSThreeImmHAndWReg<SqrdmulhElemDX,
                                                    SqrdmulhElemQX>(
                     q, size, machInst, vd, vn, vm, index);
-          case 0xf:
-            return decodeNeonSThreeImmHAndWReg<SqrdmlshElemDX, SqrdmlshElemQX>(
-                    q, size, machInst, vd, vn, vm, index);
           case 0xe:
               switch (qu) {
                 case 0b00:
@@ -1610,6 +1622,39 @@ namespace Aarch64
                 default:
                   return new Unknown64(machInst);
               }
+          case 0xf:
+              if (u) {
+                   // Armv8.2-RDMA SQRDMLSH(elem)
+                   return decodeNeonSThreeImmHAndWReg<SqrdmlshElemDX,
+                                                      SqrdmlshElemQX>(
+                       q, size, machInst, vd, vn, vm, index);
+              } else {
+                  switch (size) {
+                  case 0x0:
+                      if (q) {
+                          return new SudotElemQX<int32_t>(
+                                  machInst, vd, vn, vm_dp, index_dp);
+                      } else {
+                          return new SudotElemDX<int32_t>(
+                                  machInst, vd, vn, vm_dp, index_dp);
+                      }
+                  case 0x1:
+                       // Armv8.2-BF16 BFDOT(elem)
+                       return new Unknown64(machInst);
+                  case 0x2:
+                      if (q) {
+                          return new UsdotElemQX<int32_t>(
+                                  machInst, vd, vn, vm_dp, index_dp);
+                      } else {
+                          return new UsdotElemDX<int32_t>(
+                                  machInst, vd, vn, vm_dp, index_dp);
+                      }
+                  case 0x3:
+                  default:
+                       // Armv8.2-BF16 BFMLALB(elem), BFMLALT(elem)
+                       return new Unknown64(machInst);
+                  }
+              }
           default:
             return new Unknown64(machInst);
         }
diff --git a/src/arch/arm/isa/insts/neon64.isa b/src/arch/arm/isa/insts/neon64.isa
index 53c0f112bf..6608f61688 100644
--- a/src/arch/arm/isa/insts/neon64.isa
+++ b/src/arch/arm/isa/insts/neon64.isa
@@ -1146,6 +1146,94 @@ let {{
     # UDOT (element)
     intDotInst('udot', 'UdotElemDX', 'SimdAluOp', False, False, False, 2, True)
     intDotInst('udot', 'UdotElemQX', 'SimdAluOp', False, False, False, 4, True)
+    # SUDOT (element)
+    intDotInst('sudot', 'SudotElemDX', 'SimdAluOp', True, True, False, 2, True)
+    intDotInst('sudot', 'SudotElemQX', 'SimdAluOp', True, True, False, 4, True)
+    # USDOT (vector)
+    intDotInst('usdot', 'UsdotDX', 'SimdAluOp', True, False, True, 2, False)
+    intDotInst('usdot', 'UsdotQX', 'SimdAluOp', True, False, True, 4, False)
+    # USDOT (element)
+    intDotInst('usdot', 'UsdotElemDX', 'SimdAluOp', True, False, True, 2, True)
+    intDotInst('usdot', 'UsdotElemQX', 'SimdAluOp', True, False, True, 4, True)
+
+    def intMatMulInst(name, Name, opClass,
+                      destIsSigned, src1IsSigned, src2IsSigned):
+        destType = "int32_t" if destIsSigned else "uint32_t"
+        src1Type = "int8_t" if src1IsSigned else "uint8_t"
+        src2Type = "int8_t" if src2IsSigned else "uint8_t"
+        matMulCode = '''
+        using Src1Element = %(src1Type)s;
+        using Src2Element = %(src2Type)s;
+
+        // Neon MM instructions always generate four output elements
+        // from 16 pairs of source elements.
+        static_assert(sizeof(Element) == 4 * sizeof(Src1Element));
+        static_assert(sizeof(Element) == 4 * sizeof(Src2Element));
+
+        // Extended source element types to avoid overflow of intermediate
+        // calculations.
+        using ExtendedSrc1Element =
+                typename vector_element_traits::
+                    extend_element<Element, Src1Element>::type;
+        using ExtendedSrc2Element =
+                typename vector_element_traits::
+                    extend_element<Element, Src2Element>::type;
+
+        // Properties of the matrices
+        constexpr unsigned destMatSize = 2; // Dest Matrices are dim 2x2
+        constexpr unsigned K = 8;           // Src matrices are dim 2x8 & 8x2
+
+        constexpr unsigned eltsPerMatrix = destMatSize * destMatSize;
+
+        Element destMat[eltsPerMatrix] = {0};
+        for (unsigned j = 0; j < eltsPerMatrix; ++j) {
+            destMat[j] = letoh(destReg.elements[j]);
+        }
+
+        Element src1MatPacked[eltsPerMatrix] = {0};
+        Element src2MatPacked[eltsPerMatrix] = {0};
+        for (unsigned j = 0; j < eltsPerMatrix; ++j) {
+            src1MatPacked[j] = letoh(srcReg1.elements[j]);
+            src2MatPacked[j] = letoh(srcReg2.elements[j]);
+        }
+
+        Src1Element *src1Mat =
+            reinterpret_cast<Src1Element*>(&src1MatPacked);
+        Src2Element *src2Mat =
+            reinterpret_cast<Src2Element*>(&src2MatPacked);
+
+        unsigned destEltIdx = 0;
+        for (unsigned rowIdx = 0; rowIdx < destMatSize; ++rowIdx) {
+            for (unsigned colIdx = 0; colIdx < destMatSize; ++colIdx) {
+                Element destElem = destMat[destEltIdx];
+                for (unsigned k = 0; k < K; ++k) {
+                    const ExtendedSrc1Element src1Elem =
+                        static_cast<ExtendedSrc1Element>
+                                            (src1Mat[K * rowIdx + k]);
+                    const ExtendedSrc2Element src2Elem =
+                        static_cast<ExtendedSrc2Element>
+                                            (src2Mat[K * colIdx + k]);
+
+                    destElem += src1Elem * src2Elem;
+                }
+                destMat[destEltIdx++] = destElem;
+            }
+        }
+
+        for (unsigned j = 0; j < eltsPerMatrix; ++j) {
+            destReg.elements[j] = htole(destMat[j]);
+        }
+        ''' % dict(src1Type=src1Type, src2Type=src2Type)
+        threeEqualRegInstX(name, Name, opClass, (destType,), 4,
+                           matMulCode, readDest=True, byElem=False,
+                           complex=True)
+
+    # SMMLA
+    intMatMulInst('smmla', 'SmmlaQX', 'SimdMatMultAccOp', True, True, True)
+    # USMMLA
+    intMatMulInst('usmmla', 'UsmmlaQX', 'SimdMatMultAccOp', True, False, True)
+    # UMMLA
+    intMatMulInst('ummla', 'UmmlaQX', 'SimdMatMultAccOp', False, False, False)
 
     # CLS
     clsCode = '''
diff --git a/src/arch/arm/process.cc b/src/arch/arm/process.cc
index b63567b6c3..9aa519fe36 100644
--- a/src/arch/arm/process.cc
+++ b/src/arch/arm/process.cc
@@ -320,6 +320,9 @@ ArmProcess64::armHwcapImpl2() const
     hwcap |= (isa_r0.ts >= 2) ? Arm_Flagm2 : Arm_None;
     hwcap |= (isa_r0.rndr >= 1) ? Arm_Rng : Arm_None;
 
+    const AA64ISAR1 isa_r1 = tc->readMiscReg(MISCREG_ID_AA64ISAR1_EL1);
+    hwcap |= (isa_r1.i8mm >= 1) ? Arm_I8mm : Arm_None;
+
     const AA64ZFR0 zf_r0 = tc->readMiscReg(MISCREG_ID_AA64ZFR0_EL1);
     hwcap |= (zf_r0.f32mm >= 1) ? Arm_Svef32mm : Arm_None;
     hwcap |= (zf_r0.f64mm >= 1) ? Arm_Svef64mm : Arm_None;
diff --git a/src/arch/arm/regs/misc.cc b/src/arch/arm/regs/misc.cc
index ed15f25e69..dcb6e2b048 100644
--- a/src/arch/arm/regs/misc.cc
+++ b/src/arch/arm/regs/misc.cc
@@ -4005,6 +4005,7 @@ ISA::initializeMiscRegMetadata()
     InitReg(MISCREG_ID_AA64ISAR1_EL1)
       .reset([p,release=release](){
           AA64ISAR1 isar1_el1 = p.id_aa64isar1_el1;
+          isar1_el1.i8mm = release->has(ArmExtension::FEAT_I8MM) ? 0x1 : 0x0;
           isar1_el1.apa = release->has(ArmExtension::FEAT_PAuth) ? 0x1 : 0x0;
           isar1_el1.jscvt = release->has(ArmExtension::FEAT_JSCVT) ? 0x1 : 0x0;
           isar1_el1.fcma = release->has(ArmExtension::FEAT_FCMA) ? 0x1 : 0x0;
diff --git a/src/arch/arm/regs/misc_types.hh b/src/arch/arm/regs/misc_types.hh
index b7a1207cf5..4bb234fd10 100644
--- a/src/arch/arm/regs/misc_types.hh
+++ b/src/arch/arm/regs/misc_types.hh
@@ -127,6 +127,7 @@ namespace ArmISA
     EndBitUnion(AA64ISAR0)
 
     BitUnion64(AA64ISAR1)
+        Bitfield<55, 52> i8mm;
         Bitfield<43, 40> specres;
         Bitfield<39, 36> sb;
         Bitfield<35, 32> frintts;

From 0fae6e8163ab50d24c74098397d1f65c7a1664fd Mon Sep 17 00:00:00 2001
From: Giacomo Travaglini <giacomo.travaglini@arm.com>
Date: Sat, 15 Apr 2023 09:19:31 +0100
Subject: [PATCH 462/492] arch-arm: Implement FEAT_EVT

This extension is optional in Armv8.2 but mandatory since Armv8.5
We only implement this for AArch64

Change-Id: I063642ac24d27f0a81ba79b1d38f72468bb130eb
Signed-off-by: Giacomo Travaglini <giacomo.travaglini@arm.com>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/70938
Reviewed-by: Richard Cooper <richard.cooper@arm.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
---
 src/arch/arm/ArmSystem.py |  3 ++
 src/arch/arm/regs/misc.cc | 88 +++++++++++++++++++++++++++++++--------
 2 files changed, 73 insertions(+), 18 deletions(-)

diff --git a/src/arch/arm/ArmSystem.py b/src/arch/arm/ArmSystem.py
index eaaf4b1cb3..4de35633d3 100644
--- a/src/arch/arm/ArmSystem.py
+++ b/src/arch/arm/ArmSystem.py
@@ -95,6 +95,7 @@ class ArmExtension(ScopedEnum):
         "FEAT_FLAGM2",
         "FEAT_RNG",
         "FEAT_RNG_TRAP",
+        "FEAT_EVT",
         # Armv9.2
         "FEAT_SME",  # Optional in Armv9.2
         # Others
@@ -182,6 +183,7 @@ class ArmDefaultRelease(Armv8):
         "FEAT_IDST",
         # Armv8.5
         "FEAT_FLAGM2",
+        "FEAT_EVT",
         # Armv9.2
         "FEAT_SME",
     ]
@@ -229,6 +231,7 @@ class Armv85(Armv84):
         "FEAT_FLAGM2",
         "FEAT_RNG",
         "FEAT_RNG_TRAP",
+        "FEAT_EVT",
     ]
 
 
diff --git a/src/arch/arm/regs/misc.cc b/src/arch/arm/regs/misc.cc
index dcb6e2b048..69944c57df 100644
--- a/src/arch/arm/regs/misc.cc
+++ b/src/arch/arm/regs/misc.cc
@@ -1445,7 +1445,8 @@ faultPouEL0(const MiscRegLUTEntry &entry,
         }
     } else if (el2_enabled && !in_host && hcr.tpu) {
         return inst.generateTrap(EL2);
-    } else if (el2_enabled && !in_host && hcr.tocu) {
+    } else if (el2_enabled && !in_host &&
+               HaveExt(tc, ArmExtension::FEAT_EVT) && hcr.tocu) {
         return inst.generateTrap(EL2);
     } else if (el2_enabled && in_host && !sctlr2.uci) {
         return inst.generateTrap(EL2);
@@ -1462,7 +1463,8 @@ faultPouEL1(const MiscRegLUTEntry &entry,
     const bool el2_enabled = EL2Enabled(tc);
     if (el2_enabled && hcr.tpu) {
         return inst.generateTrap(EL2);
-    } else if (el2_enabled && hcr.tocu) {
+    } else if (el2_enabled && HaveExt(tc, ArmExtension::FEAT_EVT) &&
+               hcr.tocu) {
         return inst.generateTrap(EL2);
     } else {
         return NoFault;
@@ -1477,7 +1479,8 @@ faultPouIsEL1(const MiscRegLUTEntry &entry,
     const bool el2_enabled = EL2Enabled(tc);
     if (el2_enabled && hcr.tpu) {
         return inst.generateTrap(EL2);
-    } else if (el2_enabled && hcr.ticab) {
+    } else if (el2_enabled && HaveExt(tc, ArmExtension::FEAT_EVT) &&
+               hcr.ticab) {
         return inst.generateTrap(EL2);
     } else {
         return NoFault;
@@ -1749,6 +1752,54 @@ faultCpacrVheEL2(const MiscRegLUTEntry &entry,
     }                                                        \
 }
 
+Fault
+faultTlbiOsEL1(const MiscRegLUTEntry &entry,
+    ThreadContext *tc, const MiscRegOp64 &inst)
+{
+    const HCR hcr = tc->readMiscRegNoEffect(MISCREG_HCR_EL2);
+    const bool el2_enabled = EL2Enabled(tc);
+    if (el2_enabled && hcr.ttlb) {
+        return inst.generateTrap(EL2);
+    } else if (el2_enabled && HaveExt(tc, ArmExtension::FEAT_EVT) &&
+               hcr.ttlbos) {
+        return inst.generateTrap(EL2);
+    } else {
+        return NoFault;
+    }
+}
+
+Fault
+faultTlbiIsEL1(const MiscRegLUTEntry &entry,
+    ThreadContext *tc, const MiscRegOp64 &inst)
+{
+    const HCR hcr = tc->readMiscRegNoEffect(MISCREG_HCR_EL2);
+    const bool el2_enabled = EL2Enabled(tc);
+    if (el2_enabled && hcr.ttlb) {
+        return inst.generateTrap(EL2);
+    } else if (el2_enabled && HaveExt(tc, ArmExtension::FEAT_EVT) &&
+               hcr.ttlbis) {
+        return inst.generateTrap(EL2);
+    } else {
+        return NoFault;
+    }
+}
+
+Fault
+faultCacheEL1(const MiscRegLUTEntry &entry,
+    ThreadContext *tc, const MiscRegOp64 &inst)
+{
+    const HCR hcr = tc->readMiscRegNoEffect(MISCREG_HCR_EL2);
+    const bool el2_enabled = EL2Enabled(tc);
+    if (el2_enabled && hcr.tid2) {
+        return inst.generateTrap(EL2);
+    } else if (el2_enabled && HaveExt(tc, ArmExtension::FEAT_EVT) &&
+               hcr.tid4) {
+        return inst.generateTrap(EL2);
+    } else {
+        return NoFault;
+    }
+}
+
 Fault
 faultPauthEL1(const MiscRegLUTEntry &entry,
     ThreadContext *tc, const MiscRegOp64 &inst)
@@ -4043,6 +4094,7 @@ ISA::initializeMiscRegMetadata()
           mmfr2_el1.uao = release->has(ArmExtension::FEAT_UAO) ? 0x1 : 0x0;
           mmfr2_el1.varange = release->has(ArmExtension::FEAT_LVA) ? 0x1 : 0x0;
           mmfr2_el1.ids = release->has(ArmExtension::FEAT_IDST) ? 0x1 : 0x0;
+          mmfr2_el1.evt = release->has(ArmExtension::FEAT_EVT) ? 0x2 : 0x0;
           return mmfr2_el1;
       }())
       .faultRead(EL0, faultIdst)
@@ -4092,11 +4144,11 @@ ISA::initializeMiscRegMetadata()
 
     InitReg(MISCREG_CCSIDR_EL1)
       .faultRead(EL0, faultIdst)
-      .faultRead(EL1, HCR_TRAP(tid2))
+      .faultRead(EL1, faultCacheEL1)
       .allPrivileges().writes(0);
     InitReg(MISCREG_CLIDR_EL1)
       .faultRead(EL0, faultIdst)
-      .faultRead(EL1, HCR_TRAP(tid2))
+      .faultRead(EL1, faultCacheEL1)
       .allPrivileges().writes(0);
     InitReg(MISCREG_AIDR_EL1)
       .faultRead(EL0, faultIdst)
@@ -4104,7 +4156,7 @@ ISA::initializeMiscRegMetadata()
       .allPrivileges().writes(0);
     InitReg(MISCREG_CSSELR_EL1)
       .allPrivileges().exceptUserMode()
-      .fault(EL1, HCR_TRAP(tid2))
+      .fault(EL1, faultCacheEL1)
       .mapsTo(MISCREG_CSSELR_NS);
     InitReg(MISCREG_CTR_EL0)
       .faultRead(EL0, faultCtrEL0)
@@ -4473,40 +4525,40 @@ ISA::initializeMiscRegMetadata()
     InitReg(MISCREG_AT_S1E3W_Xt)
       .monSecureWrite().monNonSecureWrite();
     InitReg(MISCREG_TLBI_VMALLE1OS)
-      .faultWrite(EL1, HCR_TRAP(ttlb))
+      .faultWrite(EL1, faultTlbiOsEL1)
       .writes(1).exceptUserMode();
     InitReg(MISCREG_TLBI_VAE1OS_Xt)
-      .faultWrite(EL1, HCR_TRAP(ttlb))
+      .faultWrite(EL1, faultTlbiOsEL1)
       .writes(1).exceptUserMode();
     InitReg(MISCREG_TLBI_ASIDE1OS_Xt)
-      .faultWrite(EL1, HCR_TRAP(ttlb))
+      .faultWrite(EL1, faultTlbiOsEL1)
       .writes(1).exceptUserMode();
     InitReg(MISCREG_TLBI_VAAE1OS_Xt)
-      .faultWrite(EL1, HCR_TRAP(ttlb))
+      .faultWrite(EL1, faultTlbiOsEL1)
       .writes(1).exceptUserMode();
     InitReg(MISCREG_TLBI_VALE1OS_Xt)
-      .faultWrite(EL1, HCR_TRAP(ttlb))
+      .faultWrite(EL1, faultTlbiOsEL1)
       .writes(1).exceptUserMode();
     InitReg(MISCREG_TLBI_VAALE1OS_Xt)
-      .faultWrite(EL1, HCR_TRAP(ttlb))
+      .faultWrite(EL1, faultTlbiOsEL1)
       .writes(1).exceptUserMode();
     InitReg(MISCREG_TLBI_VMALLE1IS)
-      .faultWrite(EL1, HCR_TRAP(ttlb))
+      .faultWrite(EL1, faultTlbiIsEL1)
       .writes(1).exceptUserMode();
     InitReg(MISCREG_TLBI_VAE1IS_Xt)
-      .faultWrite(EL1, HCR_TRAP(ttlb))
+      .faultWrite(EL1, faultTlbiIsEL1)
       .writes(1).exceptUserMode();
     InitReg(MISCREG_TLBI_ASIDE1IS_Xt)
-      .faultWrite(EL1, HCR_TRAP(ttlb))
+      .faultWrite(EL1, faultTlbiIsEL1)
       .writes(1).exceptUserMode();
     InitReg(MISCREG_TLBI_VAAE1IS_Xt)
-      .faultWrite(EL1, HCR_TRAP(ttlb))
+      .faultWrite(EL1, faultTlbiIsEL1)
       .writes(1).exceptUserMode();
     InitReg(MISCREG_TLBI_VALE1IS_Xt)
-      .faultWrite(EL1, HCR_TRAP(ttlb))
+      .faultWrite(EL1, faultTlbiIsEL1)
       .writes(1).exceptUserMode();
     InitReg(MISCREG_TLBI_VAALE1IS_Xt)
-      .faultWrite(EL1, HCR_TRAP(ttlb))
+      .faultWrite(EL1, faultTlbiIsEL1)
       .writes(1).exceptUserMode();
     InitReg(MISCREG_TLBI_VMALLE1)
       .faultWrite(EL1, HCR_TRAP(ttlb))

From 5095e29c8e1cc8b0ec08826e60b0c9622629d3a1 Mon Sep 17 00:00:00 2001
From: Giacomo Travaglini <giacomo.travaglini@arm.com>
Date: Mon, 17 Apr 2023 13:09:49 +0100
Subject: [PATCH 463/492] arch-arm: Implement FEAT_HCX

This is just making the HCRX_EL2 register read/writable;
trapping behaviour will be implemented with further extensions

Change-Id: Id1ec42a754b7d999782edde3a8ec6c6099e3331e
Signed-off-by: Giacomo Travaglini <giacomo.travaglini@arm.com>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/70939
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Reviewed-by: Richard Cooper <richard.cooper@arm.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/arch/arm/ArmSystem.py       | 14 ++++++++++++--
 src/arch/arm/regs/misc.cc       | 21 ++++++++++++++++-----
 src/arch/arm/regs/misc.hh       |  6 ++----
 src/arch/arm/regs/misc_types.hh |  2 ++
 4 files changed, 32 insertions(+), 11 deletions(-)

diff --git a/src/arch/arm/ArmSystem.py b/src/arch/arm/ArmSystem.py
index 4de35633d3..40a3a04b90 100644
--- a/src/arch/arm/ArmSystem.py
+++ b/src/arch/arm/ArmSystem.py
@@ -96,6 +96,8 @@ class ArmExtension(ScopedEnum):
         "FEAT_RNG",
         "FEAT_RNG_TRAP",
         "FEAT_EVT",
+        # Armv8.7
+        "FEAT_HCX",
         # Armv9.2
         "FEAT_SME",  # Optional in Armv9.2
         # Others
@@ -184,6 +186,8 @@ class ArmDefaultRelease(Armv8):
         # Armv8.5
         "FEAT_FLAGM2",
         "FEAT_EVT",
+        # Armv8.7
+        "FEAT_HCX",
         # Armv9.2
         "FEAT_SME",
     ]
@@ -235,8 +239,14 @@ class Armv85(Armv84):
     ]
 
 
-class Armv92(Armv85):
-    extensions = Armv85.extensions + ["FEAT_SME"]
+class Armv87(Armv85):
+    extensions = Armv85.extensions + [
+        "FEAT_HCX",
+    ]
+
+
+class Armv92(Armv87):
+    extensions = Armv87.extensions + ["FEAT_SME"]
 
 
 class ArmAllRelease(ArmRelease):
diff --git a/src/arch/arm/regs/misc.cc b/src/arch/arm/regs/misc.cc
index 69944c57df..f32aa7230c 100644
--- a/src/arch/arm/regs/misc.cc
+++ b/src/arch/arm/regs/misc.cc
@@ -1599,6 +1599,18 @@ faultDebugEL2(const MiscRegLUTEntry &entry,
     }
 }
 
+Fault
+faultHcrxEL2(const MiscRegLUTEntry &entry,
+    ThreadContext *tc, const MiscRegOp64 &inst)
+{
+    const SCR scr = tc->readMiscReg(MISCREG_SCR_EL3);
+    if (ArmSystem::haveEL(tc, EL3) && !scr.hxen) {
+        return inst.generateTrap(EL3);
+    } else {
+        return NoFault;
+    }
+}
+
 Fault
 faultZcrEL1(const MiscRegLUTEntry &entry,
     ThreadContext *tc, const MiscRegOp64 &inst)
@@ -4083,6 +4095,7 @@ ISA::initializeMiscRegMetadata()
           mmfr1_el1.vh = release->has(ArmExtension::FEAT_VHE) ? 0x1 : 0x0;
           mmfr1_el1.hpds = release->has(ArmExtension::FEAT_HPDS) ? 0x1 : 0x0;
           mmfr1_el1.pan = release->has(ArmExtension::FEAT_PAN) ? 0x1 : 0x0;
+          mmfr1_el1.hcx = release->has(ArmExtension::FEAT_HCX) ? 0x1 : 0x0;
           return mmfr1_el1;
       }())
       .faultRead(EL0, faultIdst)
@@ -4227,6 +4240,9 @@ ISA::initializeMiscRegMetadata()
     InitReg(MISCREG_HCR_EL2)
       .hyp().mon()
       .mapsTo(MISCREG_HCR, MISCREG_HCR2);
+    InitReg(MISCREG_HCRX_EL2)
+      .hyp().mon()
+      .fault(EL2, faultHcrxEL2);
     InitReg(MISCREG_MDCR_EL2)
       .hyp().mon()
       .fault(EL2, faultDebugEL2)
@@ -5654,11 +5670,6 @@ ISA::initializeMiscRegMetadata()
       .warnNotFail()
       .fault(faultUnimplemented);
 
-    // HCX extension (unimplemented)
-    InitReg(MISCREG_HCRX_EL2)
-      .unimplemented()
-      .warnNotFail();
-
     // FGT extension (unimplemented)
     InitReg(MISCREG_HFGRTR_EL2)
       .unimplemented()
diff --git a/src/arch/arm/regs/misc.hh b/src/arch/arm/regs/misc.hh
index 429fcb59cc..cb03841848 100644
--- a/src/arch/arm/regs/misc.hh
+++ b/src/arch/arm/regs/misc.hh
@@ -589,6 +589,7 @@ namespace ArmISA
         MISCREG_SCTLR_EL2,
         MISCREG_ACTLR_EL2,
         MISCREG_HCR_EL2,
+        MISCREG_HCRX_EL2,
         MISCREG_MDCR_EL2,
         MISCREG_CPTR_EL2,
         MISCREG_HSTR_EL2,
@@ -1125,9 +1126,6 @@ namespace ArmISA
         MISCREG_VSESR_EL2,
         MISCREG_VDISR_EL2,
 
-        // HCX extension (unimplemented)
-        MISCREG_HCRX_EL2,
-
         // FGT extension (unimplemented)
         MISCREG_HFGRTR_EL2,
         MISCREG_HFGWTR_EL2,
@@ -2272,6 +2270,7 @@ namespace ArmISA
         "sctlr_el2",
         "actlr_el2",
         "hcr_el2",
+        "hcrx_el2",
         "mdcr_el2",
         "cptr_el2",
         "hstr_el2",
@@ -2785,7 +2784,6 @@ namespace ArmISA
         "disr_el1",
         "vsesr_el2",
         "vdisr_el2",
-        "hcrx_el2",
         "hfgrtr_el2",
         "hfgwtr_el2",
 
diff --git a/src/arch/arm/regs/misc_types.hh b/src/arch/arm/regs/misc_types.hh
index 4bb234fd10..00640dd339 100644
--- a/src/arch/arm/regs/misc_types.hh
+++ b/src/arch/arm/regs/misc_types.hh
@@ -158,6 +158,7 @@ namespace ArmISA
     EndBitUnion(AA64MMFR0)
 
     BitUnion64(AA64MMFR1)
+        Bitfield<43, 40> hcx;
         Bitfield<31, 28> xnx;
         Bitfield<27, 24> specsei;
         Bitfield<23, 20> pan;
@@ -361,6 +362,7 @@ namespace ArmISA
 
     BitUnion64(SCR)
         Bitfield<40> trndr;
+        Bitfield<38> hxen;
         Bitfield<21> fien;
         Bitfield<20> nmea;
         Bitfield<19> ease;

From e0a28b1a27a5cd79eca7649ad7cb5ec8cce85b08 Mon Sep 17 00:00:00 2001
From: KUNAL PAI <kunpai@ucdavis.edu>
Date: Fri, 19 May 2023 14:38:48 -0700
Subject: [PATCH 464/492] stdlib: Edit RISCVMatched Configuration

This patch changes the RISCVMatched Cache Hierarchy to
private L1 shared L2.
It also changes the RISCVMatched Core's parameters to
better match hardware performance.
Also, sizes are changed to MiB or KiB instead of MB
or KB, to match the datasheet.
All the changes that deviate from the datasheet and the
ARM HPI CPU (reference for pipeline parameters)
are documented.

The core parameters that are changed are:
    - threadPolicy:
        This is initialized to "SingleThreaded".
    - decodeToExecuteForwardDelay:
        This is changed from 1 to 2 to avoid a PMC address fault.
    - fetch1ToFetch2BackwardDelay:
        This is changed from 1 to 0 to better match hardware performance.
    - fetch2InputBufferSize:
        This is changed from 2 to 1 to better match hardware performance.
    - decodeInputBufferSize:
        This is changed from 3 to 2 to better match hardware performance.
    - decodeToExecuteForwardDelay:
        This is changed from 2 to 1 to better match hardware performance.
    - executeInputBufferSize:
        This is changed from 7 to 4 to better match hardware performance.
    - executeMaxAccessesInMemory:
        This is changed from 2 to 1 to better match hardware performance.
    - executeLSQStoreBufferSize:
        This is changed from 5 to 3 to better match hardware performance.
    - executeBranchDelay:
        This is changed from 1 to 2 to better match hardware performance.
    - enableIdling:
        This is changed to False to better match hardware performance.
    - MemReadFU: changed to 2 cycles from 3 cycles.

The changes in the branch predictor are:

      - BTBEntries:
        This is changed from 16 entries to 32 entries.
      - RASSize:
        This is changed from 6 entries to 12 entries.
      - IndirectSets:
        This is changed from 8 sets to 16 sets.
      - localPredictorSize:
        This is changed from 8192 to 16384.
      - globalPredictorSize:
        This is changed from 8192 to 16384.
      - choicePredictorSize:
        This is changed from 8192 to 16384.
      - localCtrBits:
        This is changed from 2 to 4.
      - globalCtrBits:
        This is changed from 2 to 4.
      - choiceCtrBits:
        This is changed from 2 to 4.

Change-Id: I4235140f33be6a3b529a819ae6a7223cb88bb7ab
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/70798
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
---
 .../riscvmatched/riscvmatched_board.py        |  2 +-
 .../riscvmatched/riscvmatched_cache.py        | 41 ++++-----
 .../riscvmatched/riscvmatched_core.py         | 89 ++++++++++++++-----
 3 files changed, 88 insertions(+), 44 deletions(-)

diff --git a/src/python/gem5/prebuilt/riscvmatched/riscvmatched_board.py b/src/python/gem5/prebuilt/riscvmatched/riscvmatched_board.py
index ae483cc401..9ca95839f8 100644
--- a/src/python/gem5/prebuilt/riscvmatched/riscvmatched_board.py
+++ b/src/python/gem5/prebuilt/riscvmatched/riscvmatched_board.py
@@ -109,7 +109,7 @@ class RISCVMatchedBoard(
     def __init__(
         self,
         clk_freq: str = "1.2GHz",
-        l2_size: str = "2MB",
+        l2_size: str = "2MiB",
         is_fs: bool = False,
     ) -> None:
         """
diff --git a/src/python/gem5/prebuilt/riscvmatched/riscvmatched_cache.py b/src/python/gem5/prebuilt/riscvmatched/riscvmatched_cache.py
index dc66af354b..25e55ef310 100644
--- a/src/python/gem5/prebuilt/riscvmatched/riscvmatched_cache.py
+++ b/src/python/gem5/prebuilt/riscvmatched/riscvmatched_cache.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2022 The Regents of the University of California
+# Copyright (c) 2023 The Regents of the University of California
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -42,6 +42,7 @@ from gem5.isas import ISA
 from m5.objects import Cache, L2XBar, BaseXBar, SystemXBar, BadAddr, Port
 
 from gem5.utils.override import *
+from typing import Type
 
 
 class RISCVMatchedCacheHierarchy(
@@ -50,7 +51,7 @@ class RISCVMatchedCacheHierarchy(
     """
 
     A cache setup where each core has a private L1 Data and Instruction Cache,
-    and a private L2 cache.
+    and a shared L2 cache.
     The HiFive board has a partially inclusive cache hierarchy, hence this hierarchy is chosen.
     The details of the cache hierarchy are in Table 7, page 36 of the datasheet.
 
@@ -74,9 +75,9 @@ class RISCVMatchedCacheHierarchy(
         AbstractClassicCacheHierarchy.__init__(self=self)
         AbstractTwoLevelCacheHierarchy.__init__(
             self,
-            l1i_size="32kB",
+            l1i_size="32KiB",
             l1i_assoc=4,
-            l1d_size="32kB",
+            l1d_size="32KiB",
             l1d_assoc=8,
             l2_size=l2_size,
             l2_assoc=16,
@@ -108,16 +109,17 @@ class RISCVMatchedCacheHierarchy(
             for i in range(board.get_processor().get_num_cores())
         ]
         self.l1dcaches = [
-            L1DCache(size=self._l1d_size, assoc=self._l1d_assoc)
-            for i in range(board.get_processor().get_num_cores())
-        ]
-        self.l2buses = [
-            L2XBar() for i in range(board.get_processor().get_num_cores())
-        ]
-        self.l2caches = [
-            L2Cache(size=self._l2_size, assoc=self._l2_assoc)
+            L1DCache(
+                size=self._l1d_size, assoc=self._l1d_assoc, response_latency=10
+            )
             for i in range(board.get_processor().get_num_cores())
         ]
+        self.l2bus = L2XBar()
+
+        self.l2cache = L2Cache(
+            size=self._l2_size, assoc=self._l2_assoc, data_latency=20
+        )
+
         # ITLB Page walk caches
         self.iptw_caches = [
             MMUCache(size="4KiB")
@@ -137,14 +139,10 @@ class RISCVMatchedCacheHierarchy(
             cpu.connect_icache(self.l1icaches[i].cpu_side)
             cpu.connect_dcache(self.l1dcaches[i].cpu_side)
 
-            self.l1icaches[i].mem_side = self.l2buses[i].cpu_side_ports
-            self.l1dcaches[i].mem_side = self.l2buses[i].cpu_side_ports
-            self.iptw_caches[i].mem_side = self.l2buses[i].cpu_side_ports
-            self.dptw_caches[i].mem_side = self.l2buses[i].cpu_side_ports
-
-            self.l2buses[i].mem_side_ports = self.l2caches[i].cpu_side
-
-            self.membus.cpu_side_ports = self.l2caches[i].mem_side
+            self.l1icaches[i].mem_side = self.l2bus.cpu_side_ports
+            self.l1dcaches[i].mem_side = self.l2bus.cpu_side_ports
+            self.iptw_caches[i].mem_side = self.l2bus.cpu_side_ports
+            self.dptw_caches[i].mem_side = self.l2bus.cpu_side_ports
 
             cpu.connect_walker_ports(
                 self.iptw_caches[i].cpu_side, self.dptw_caches[i].cpu_side
@@ -157,6 +155,9 @@ class RISCVMatchedCacheHierarchy(
             else:
                 cpu.connect_interrupt()
 
+        self.l2bus.mem_side_ports = self.l2cache.cpu_side
+        self.membus.cpu_side_ports = self.l2cache.mem_side
+
     def _setup_io_cache(self, board: AbstractBoard) -> None:
         """Create a cache for coherent I/O connections"""
         self.iocache = Cache(
diff --git a/src/python/gem5/prebuilt/riscvmatched/riscvmatched_core.py b/src/python/gem5/prebuilt/riscvmatched/riscvmatched_core.py
index 0b4375ce8d..48291bf670 100644
--- a/src/python/gem5/prebuilt/riscvmatched/riscvmatched_core.py
+++ b/src/python/gem5/prebuilt/riscvmatched/riscvmatched_core.py
@@ -61,8 +61,14 @@ class U74PredFU(MinorDefaultPredFU):
     pass
 
 
-class U74MemFU(MinorDefaultMemFU):
-    opLat = 3
+class U74MemReadFU(MinorDefaultMemFU):
+    opClasses = minorMakeOpClassSet(["MemRead", "FloatMemRead"])
+    opLat = 2
+
+
+class U74MemWriteFU(MinorDefaultMemFU):
+    opClasses = minorMakeOpClassSet(["MemWrite", "FloatMemWrite"])
+    opLat = 2
 
 
 class U74MiscFU(MinorDefaultMiscFU):
@@ -77,18 +83,24 @@ class U74FUPool(MinorFUPool):
         U74IntDivFU(),
         U74FloatSimdFU(),
         U74PredFU(),
-        U74MemFU(),
+        U74MemReadFU(),
+        U74MemWriteFU(),
         U74MiscFU(),
     ]
 
 
 class U74BP(TournamentBP):
-    BTBEntries = 16
-    RASSize = 6
+    BTBEntries = 32
+    RASSize = 12
     localHistoryTableSize = 4096  # is 3.6 KiB but gem5 requires power of 2
-
+    localPredictorSize = 16384
+    globalPredictorSize = 16384
+    choicePredictorSize = 16384
+    localCtrBits = 4
+    globalCtrBits = 4
+    choiceCtrBits = 4
     indirectBranchPred = SimpleIndirectPredictor()
-    indirectBranchPred.indirectSets = 8
+    indirectBranchPred.indirectSets = 16
 
 
 class U74CPU(RiscvMinorCPU):
@@ -97,26 +109,49 @@ class U74CPU(RiscvMinorCPU):
     This information about the CPU can be found on page 15 of
     gem5_rsk_gem5-21.2.pdf at https://github.com/arm-university/arm-gem5-rsk
 
-    The only parameter that is changed is the decodeToExecuteForwardDelay.
-    This is changed from 1 to 2 to avoid a PMC address fault.
+    The parameters that are changed are:
+    - threadPolicy:
+        This is initialized to "SingleThreaded".
+    - decodeToExecuteForwardDelay:
+        This is changed from 1 to 2 to avoid a PMC address fault.
+    - fetch1ToFetch2BackwardDelay:
+        This is changed from 1 to 0 to better match hardware performance.
+    - fetch2InputBufferSize:
+        This is changed from 2 to 1 to better match hardware performance.
+    - decodeInputBufferSize:
+        This is changed from 3 to 2 to better match hardware performance.
+    - decodeToExecuteForwardDelay:
+        This is changed from 2 to 1 to better match hardware performance.
+    - executeInputBufferSize:
+        This is changed from 7 to 4 to better match hardware performance.
+    - executeMaxAccessesInMemory:
+        This is changed from 2 to 1 to better match hardware performance.
+    - executeLSQStoreBufferSize:
+        This is changed from 5 to 3 to better match hardware performance.
+    - executeBranchDelay:
+        This is changed from 1 to 2 to better match hardware performance.
+    - enableIdling:
+        This is changed to False to better match hardware performance.
 
     """
 
+    threadPolicy = "SingleThreaded"
+
     # Fetch1 stage
     fetch1LineSnapWidth = 0
     fetch1LineWidth = 0
     fetch1FetchLimit = 1
     fetch1ToFetch2ForwardDelay = 1
-    fetch1ToFetch2BackwardDelay = 1
+    fetch1ToFetch2BackwardDelay = 0
 
     # Fetch2 stage
-    fetch2InputBufferSize = 2
+    fetch2InputBufferSize = 1
     fetch2ToDecodeForwardDelay = 1
     fetch2CycleInput = True
 
     # Decode stage
-    decodeInputBufferSize = 3
-    decodeToExecuteForwardDelay = 2
+    decodeInputBufferSize = 2
+    decodeToExecuteForwardDelay = 1
     decodeInputWidth = 2
     decodeCycleInput = True
 
@@ -127,17 +162,17 @@ class U74CPU(RiscvMinorCPU):
     executeMemoryIssueLimit = 1
     executeCommitLimit = 2
     executeMemoryCommitLimit = 1
-    executeInputBufferSize = 7
-    executeMaxAccessesInMemory = 2
+    executeInputBufferSize = 4
+    executeMaxAccessesInMemory = 1
     executeLSQMaxStoreBufferStoresPerCycle = 2
     executeLSQRequestsQueueSize = 1
     executeLSQTransfersQueueSize = 2
-    executeLSQStoreBufferSize = 5
-    executeBranchDelay = 1
+    executeLSQStoreBufferSize = 3
+    executeBranchDelay = 2
     executeSetTraceTimeOnCommit = True
     executeSetTraceTimeOnIssue = False
     executeAllowEarlyMemoryIssue = True
-    enableIdling = True
+    enableIdling = False
 
     # Functional Units and Branch Prediction
     executeFuncUnits = U74FUPool()
@@ -152,13 +187,21 @@ class U74Core(BaseCPUCore):
       - IntFU: 1 cycle
       - IntMulFU: 3 cycles
       - IntDivFU: 6 cycles (NOTE: latency is variable, but is set to 6 cycles)
-      - MemFU: 3 cycles
+      - MemReadFU: 2 cycles
+      - MemWriteFU: 2 cycles
     The branch predictor is a TournamentBP, based on Section 4.2.5 on page 38.
-      - BTBEntries: 16 entries
-      - RASSize: 6 entries
-      - IndirectSets: 8 sets
+      - BTBEntries: 32 entries
+      - RASSize: 12 entries
+      - IndirectSets: 16 sets
+      - localPredictorSize: 16384
+      - globalPredictorSize: 16384
+      - choicePredictorSize: 16384
+      - localCtrBits: 4
+      - globalCtrBits: 4
+      - choiceCtrBits: 4
       - localHistoryTableSize: 4096 B
-    NOTE: The BHT of the HiFive Board is 3.6KiB but gem5 requires a power of 2, so the BHT is 4096B.
+    NOTE: The TournamentBP deviates from the actual BP.
+    This configuration performs the best in relation to the hardware.
     """
 
     def __init__(

From 741af7ddaed08be6eda0949471e5fec4f09b81d4 Mon Sep 17 00:00:00 2001
From: Ayaz Akram <yazakram@ucdavis.edu>
Date: Tue, 21 Feb 2023 15:16:38 -0800
Subject: [PATCH 465/492] mem: Add a DDR5 memory interface

This change adds a single DDR5 memory inteface.
A DDR5 DIMM contains two physical channels. Therefore,
two instances of this interface should be used to model
a DDR5 DIMM. The configuration includes 3 different speed
bins models. The configuration is tested with different
types of memory traffic using the traffic generator and shows
performance similar to what is observed in existing
literature [1]. One of the key features of DDR5
"same bank refresh" is yet not supported in gem5, but is
expected to improve the performance of the DDR5 model.

[1] Exploration of DDR5 with the Open-Source Simulator DRAMSys.

Change-Id: I5856a10c8dcd92dbecc7fd4dcea0f674b2412dd7
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/68257
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/mem/DRAMInterface.py                      | 270 +++++++++++++++-
 src/python/SConscript                         |   2 +
 src/python/gem5/components/memory/__init__.py |   3 +
 .../components/memory/dram_interfaces/ddr5.py | 292 ++++++++++++++++++
 .../gem5/components/memory/single_channel.py  |  28 ++
 5 files changed, 588 insertions(+), 7 deletions(-)
 create mode 100644 src/python/gem5/components/memory/dram_interfaces/ddr5.py

diff --git a/src/mem/DRAMInterface.py b/src/mem/DRAMInterface.py
index 9c041e5838..dea62a6be1 100644
--- a/src/mem/DRAMInterface.py
+++ b/src/mem/DRAMInterface.py
@@ -41,6 +41,7 @@
 from m5.objects.MemCtrl import MemCtrl
 from m5.objects.MemInterface import *
 
+
 # Enum for the page policy, either open, open_adaptive, close, or
 # close_adaptive.
 class PageManage(Enum):
@@ -1179,7 +1180,6 @@ class HBM_1000_4H_1x64(HBM_1000_4H_1x128):
 # on HBM gen2 specifications. 4H stack, 8Gb per die and total capacity
 # of 4GiB.
 class HBM_2000_4H_1x64(DRAMInterface):
-
     # 64-bit interface for a single pseudo channel
     device_bus_width = 64
 
@@ -1256,6 +1256,268 @@ class HBM_2000_4H_1x64(DRAMInterface):
     two_cycle_activate = True
 
 
+# A single DDR5-4400 32bit channel (4x8 configuration)
+# A DDR5 DIMM is made up of two (32 bit) channels.
+# Following configuration is modeling only a single 32bit channel.
+# Timings are based on Micron data sheet:
+# https://media-www.micron.com/-/media/client/global/
+# documents/products/data-sheet/dram/ddr5/ddr5_sdram_core.pdf
+# Configuration: 4Gbx8 devices (32Gb addressing)
+# Maximum bandwidth of DDR5_4400_4x8 (4400 MT/s) can be 17.6GB/s
+class DDR5_4400_4x8(DRAMInterface):
+    # size of a single device
+    device_size = "512MiB"
+
+    # single channel of 32bit width
+    # requires 8-bit wide 4 devices
+    device_bus_width = 8
+
+    # DDR5 is a BL16 device
+    burst_length = 16
+
+    # Each device has a page (row buffer) size of 256B
+    # Four devices lead to a page size of 1KB
+    device_rowbuffer_size = "256B"
+
+    # 4Gbx8 configuration
+    devices_per_rank = 4
+
+    ranks_per_channel = 1
+
+    # 4Gbx8 configuration of DDR5 has 8 bank groups,
+    # 4 banks per bank group and 32 banks in total
+    bank_groups_per_rank = 8
+    banks_per_rank = 32
+
+    write_buffer_size = 64
+    read_buffer_size = 64
+
+    # For 4400 MT/s
+    tCK = "0.454ns"
+
+    # 16 beats across an x32 interface translates to 8 clocks @ 2200 MHz
+    tBURST = "3.632ns"
+
+    # For 4400 MT/s, the number is max(8nCK, 5ns)
+    tCCD_L = "5ns"
+
+    # page 389 of the data sheet
+    tRCD = "14.545ns"
+    tCL = "14.545ns"
+    # tCWL = tCL - 2(nCK)
+    tCWL = "13.637ns"
+    tRP = "14.545ns"
+    tRAS = "32ns"
+
+    # RRD_S (different bank group) : 8nCK
+    tRRD = "3.632ns"
+
+    # RRD_L (same bank group) is MAX(8nCK, 5ns) for 1KB page
+    tRRD_L = "5ns"
+
+    # tFAW for 1KB page is MAX(32nCK, 14.545ns)
+    tXAW = "14.545ns"
+    activation_limit = 4
+
+    # Note: could not find the rank to rank delay
+    # from the datasheet, but, since we are modeling
+    # a single rank device, it should not matter.
+    # Using the DDR4 number as default (i.e. ~2tCK)
+    tCS = "1ns"
+
+    # tRFC (Normal) for 16Gb device is 295ns
+    tRFC = "295ns"
+
+    tPPD = "0.908ns"  # 2nCK
+    tWR = "30ns"
+
+    # Rd/Wr turnaround timings
+    ###################################################################
+    # Note: gem5 adds tBURST separately while calculting rd--rd/wr or
+    # wr--wr/rd delays so we can ignore tBURST factor from the following
+    # equations while calculating the final value of the timing parameter
+    ####################################################################
+    # From the datasheet
+    # (1) tCCD_L_RTW =
+    # CL - CWL + RBL/2 + 2 tCK - (Read DQS offset) + (tRPST - 0.5 tCK) + tWPRE
+    # where CWL = CL-2, RBL/2 = tBURST, Read DQS offset = 1ck, tRPST = 1.5tCK
+    # Therefore, tCCD_L_RTW =
+    # (14.545 - 13.637) + (2*0.454) - 0.454 +
+    # ((1.5*0.454)-(0.5*0.454) + (1.5*0.454) = 2.497ns
+
+    # (2) tCCD_S_RTW =
+    # CL - CWL + RBL/2 + 2 tCK - (Read DQS offset) + (tRPST - 0.5 tCK) + tWPRE
+    # Therefore, tCCD_S_RTW = tCCD_L_RTW = 2.497ns
+
+    # (3) tCCD_L_WTR =
+    # CWL + WBL/2 + max(16nCK,10ns)
+    # where WBL/2 = tBURST
+    # Therefore,
+    # tCCD_L_WTR = 13.637+10 = 23.637ns
+
+    # (4) tCCD_S_WTR =
+    # CWL + WBL/2 + max(4nCK,2.5ns)
+    # where WBL/2 = tBURST
+    # Therefore,
+    # tCCD_S_WTR = 13.637+2.5 = 16.137ns
+
+    tRTW = "2.497ns"
+    tWTR_L = "23.637ns"
+    tWTR = "16.137ns"
+
+    # tRTP : max(12nCK, 7.5ns)
+    tRTP = "7.5ns"
+
+    # <=85C, half for >85C
+    tREFI = "3.9us"
+
+    # active powerdown and precharge powerdown exit time max(7.5ns, 8nCK)
+    tXP = "7.5ns"
+
+    # self refresh exit time
+    # According to the datasheet tXS = tRFC = 295ns (normal Refresh mode)
+    tXS = "295ns"
+
+    page_policy = "close_adaptive"
+
+    # Power related parameters
+    # Reference: https://media-www.micron.com/-/media/client/global/
+    # documents/products/data-sheet/dram/ddr5/16gb_ddr5_sdram_diereva.pdf
+    # Using the values for DDR5-4800
+    # DDR5 has one voltage domain, so all the
+    # CurrentVariable2 params should not be used or set to 0
+    IDD0 = "122mA"
+    # Using the value of IDD2P
+    IDD2P0 = "88mA"
+    IDD2N = "92mA"
+    # Using the value of IDD3P
+    IDD3P0 = "140mA"
+    IDD3N = "142mA"
+    IDD4W = "479mA"
+    IDD4R = "530mA"
+    # IDD5B - 277, IDD5C -- 135mA, IDD5F -- 262mA in the datasheet
+    IDD5 = "277mA"
+    # IDD6N
+    IDD6 = "102mA"
+
+    VDD = "1.1V"
+
+
+# Maximum bandwidth of DDR5_6400_4x8 (6400 MT/s) can be 25.6GB/s
+class DDR5_6400_4x8(DDR5_4400_4x8):
+    # For 6400 MT/s
+    tCK = "0.312ns"
+
+    # 16 beats across an x32 interface translates to 8 clocks @ 3200 MHz
+    tBURST = "2.496ns"
+
+    tRCD = "14.375ns"
+    tCL = "14.375ns"
+    # tCWL = tCL - 2(nCK)
+    tCWL = "13.751ns"
+    tRP = "14.375ns"
+
+    # RRD_S (different bank group) : 8nCK
+    tRRD = "2.496ns"
+
+    # RRD_L (same bank group) is MAX(8nCK, 5ns) for 1KB page
+    tRRD_L = "5ns"
+
+    # tFAW for 1KB page is MAX(32 CK, 10.00ns)
+    tXAW = "10ns"
+
+    # Rd/Wr turnaround timings
+    ###################################################################
+    # Note: gem5 adds tBURST separately while calculting rd--rd/wr or
+    # wr--wr/rd delays so we can ignore tBURST factor from the following
+    # equations while calculating the final value of the timing parameter
+    ####################################################################
+    # From the datasheet
+    # (1) tCCD_L_RTW =
+    # CL - CWL + RBL/2 + 2 tCK - (Read DQS offset) + (tRPST - 0.5 tCK) + tWPRE
+    # where CWL = CL-2, RBL/2 = tBURST, Read DQS offset = 1ck, tRPST = 1.5tCK
+    # Therefore, tCCD_L_RTW =
+    # (14.375 - 13.751) + (2*0.312) - 0.312 + ((1.5*0.312)-(0.5*0.312)
+    # + (1.5*0.312) = 1.716ns
+
+    # (2) tCCD_S_RTW =
+    # CL - CWL + RBL/2 + 2 tCK - (Read DQS offset) + (tRPST - 0.5 tCK) + tWPRE
+    # Therefore, tCCD_S_RTW = tCCD_L_RTW = 1.716ns
+
+    # (3) tCCD_L_WTR =
+    # CWL + WBL/2 + max(16nCK,10ns)
+    # where WBL/2 = tBURST
+    # Therefore,
+    # tCCD_L_WTR = 13.751+10 = 23.751ns
+
+    # (4) tCCD_S_WTR =
+    # CWL + WBL/2 + max(4nCK,2.5ns)
+    # where WBL/2 = tBURST
+    # Therefore,
+    # tCCD_S_WTR = 13.751+2.5 = 16.251ns
+
+    tRTW = "1.716ns"
+    tWTR_L = "23.751ns"
+    tWTR = "16.251ns"
+
+
+# Maximum bandwidth of DDR5_8400_4x8 (8400 MT/s) can be 33.6GB/s
+# Most of the timing parameters for DDR5_8400_4x8 are TBD in
+# the datasheet referred above.
+# The TBD parameters are extrapolated from the speed bins mentioned above.
+class DDR5_8400_4x8(DDR5_4400_4x8):
+    # For 8400 MT/s
+    tCK = "0.238ns"
+
+    # 16 beats across an x32 interface translates to 8 clocks @ 4200 MHz
+    tBURST = "1.904ns"
+
+    tRCD = "14.285ns"
+    tCL = "14.285ns"
+    tCWL = "13.809ns"
+    tRP = "14.285ns"
+
+    # RRD_S (different bank group) : 8nCK
+    tRRD = "1.904ns"
+
+    # tFAW for 1KB page is MAX(32 CK, 10.00ns)
+    tXAW = "10ns"
+
+    # Rd/Wr turnaround timings
+    ###################################################################
+    # Note: gem5 adds tBURST separately while calculting rd--rd/wr or
+    # wr--wr/rd delays so we can ignore tBURST factor from the following
+    # equations while calculating the final value of the timing parameter
+    ####################################################################
+    # From the datasheet
+    # (1) tCCD_L_RTW =
+    # CL - CWL + RBL/2 + 2 tCK - (Read DQS offset) + (tRPST - 0.5 tCK) + tWPRE
+    # where CWL = CL-2, RBL/2 = tBURST, Read DQS offset = 1ck, tRPST = 1.5tCK
+    # Therefore, tCCD_L_RTW =
+    # (14.285 - 13.809) + (2*0.238) - 0.238 + ((1.5*0.238)-(0.5*0.238)
+    # + (1.5*0.238) = 1.309ns
+
+    # (2) tCCD_S_RTW =
+    # CL - CWL + RBL/2 + 2 tCK - (Read DQS offset) + (tRPST - 0.5 tCK) + tWPRE
+    # Therefore, tCCD_S_RTW = tCCD_L_RTW = 1.309ns
+
+    # (3) tCCD_L_WTR =
+    # CWL + WBL/2 + max(16nCK,10ns)
+    # where WBL/2 = tBURST
+    # Therefore,
+    # tCCD_L_WTR =13.809+10 = 23.809ns
+
+    # (4) tCCD_S_WTR =
+    # CWL + WBL/2 + max(4nCK,2.5ns)
+    # where WBL/2 = tBURST
+    # Therefore,
+    # tCCD_S_WTR = 13.809+2.5 = 16.309ns
+
+    tRTW = "1.309ns"
+    tWTR_L = "23.809ns"
+    tWTR = "16.309ns"
+
+
 # A single LPDDR5 x16 interface (one command/address bus)
 # for a single x16 channel with default timings based on
 # initial JEDEC specification
@@ -1263,7 +1525,6 @@ class HBM_2000_4H_1x64(DRAMInterface):
 # Configuring for 16-bank mode with bank-group architecture
 # burst of 32, which means bursts can be interleaved
 class LPDDR5_5500_1x16_BG_BL32(DRAMInterface):
-
     # Increase buffer size to account for more bank resources
     read_buffer_size = 64
 
@@ -1390,7 +1651,6 @@ class LPDDR5_5500_1x16_BG_BL32(DRAMInterface):
 # Starting with 5.5Gbps data rates and 8Gbit die
 # Configuring for 16-bank mode with bank-group architecture, burst of 16
 class LPDDR5_5500_1x16_BG_BL16(LPDDR5_5500_1x16_BG_BL32):
-
     # LPDDR5 is a BL16 or BL32 device
     # With BG mode, BL16 and BL32 are supported
     # Use BL16 for smaller access granularity
@@ -1412,7 +1672,6 @@ class LPDDR5_5500_1x16_BG_BL16(LPDDR5_5500_1x16_BG_BL32):
 # Starting with 5.5Gbps data rates and 8Gbit die
 # Configuring for 8-bank mode, burst of 32
 class LPDDR5_5500_1x16_8B_BL32(LPDDR5_5500_1x16_BG_BL32):
-
     # 4KiB page with 8B mode
     device_rowbuffer_size = "4KiB"
 
@@ -1452,7 +1711,6 @@ class LPDDR5_5500_1x16_8B_BL32(LPDDR5_5500_1x16_BG_BL32):
 # Configuring for 16-bank mode with bank-group architecture
 # burst of 32, which means bursts can be interleaved
 class LPDDR5_6400_1x16_BG_BL32(LPDDR5_5500_1x16_BG_BL32):
-
     # 5.5Gb/s DDR with 4:1 WCK:CK ratio for 687.5 MHz CK
     tCK = "1.25ns"
 
@@ -1497,7 +1755,6 @@ class LPDDR5_6400_1x16_BG_BL32(LPDDR5_5500_1x16_BG_BL32):
 # 6.4Gbps data rates and 8Gbit die
 # Configuring for 16-bank mode with bank-group architecture, burst of 16
 class LPDDR5_6400_1x16_BG_BL16(LPDDR5_6400_1x16_BG_BL32):
-
     # LPDDR5 is a BL16 or BL32 device
     # With BG mode, BL16 and BL32 are supported
     # Use BL16 for smaller access granularity
@@ -1519,7 +1776,6 @@ class LPDDR5_6400_1x16_BG_BL16(LPDDR5_6400_1x16_BG_BL32):
 # 6.4Gbps data rates and 8Gbit die
 # Configuring for 8-bank mode, burst of 32
 class LPDDR5_6400_1x16_8B_BL32(LPDDR5_6400_1x16_BG_BL32):
-
     # 4KiB page with 8B mode
     device_rowbuffer_size = "4KiB"
 
diff --git a/src/python/SConscript b/src/python/SConscript
index 3b00b34245..e261bfa7f2 100644
--- a/src/python/SConscript
+++ b/src/python/SConscript
@@ -185,6 +185,8 @@ PySource('gem5.components.memory.dram_interfaces',
     'gem5/components/memory/dram_interfaces/ddr3.py')
 PySource('gem5.components.memory.dram_interfaces',
     'gem5/components/memory/dram_interfaces/ddr4.py')
+PySource('gem5.components.memory.dram_interfaces',
+    'gem5/components/memory/dram_interfaces/ddr5.py')
 PySource('gem5.components.memory.dram_interfaces',
     'gem5/components/memory/dram_interfaces/gddr.py')
 PySource('gem5.components.memory.dram_interfaces',
diff --git a/src/python/gem5/components/memory/__init__.py b/src/python/gem5/components/memory/__init__.py
index 8a7b5ef10a..546d5d98ed 100644
--- a/src/python/gem5/components/memory/__init__.py
+++ b/src/python/gem5/components/memory/__init__.py
@@ -29,6 +29,9 @@ from .single_channel import SingleChannelDDR3_2133
 from .single_channel import SingleChannelDDR4_2400
 from .single_channel import SingleChannelHBM
 from .single_channel import SingleChannelLPDDR3_1600
+from .single_channel import DIMM_DDR5_4400
+from .single_channel import DIMM_DDR5_6400
+from .single_channel import DIMM_DDR5_8400
 from .multi_channel import DualChannelDDR3_1600
 from .multi_channel import DualChannelDDR3_2133
 from .multi_channel import DualChannelDDR4_2400
diff --git a/src/python/gem5/components/memory/dram_interfaces/ddr5.py b/src/python/gem5/components/memory/dram_interfaces/ddr5.py
new file mode 100644
index 0000000000..1ffd7f8cfd
--- /dev/null
+++ b/src/python/gem5/components/memory/dram_interfaces/ddr5.py
@@ -0,0 +1,292 @@
+# Copyright (c) 2023 The Regents of the University of California
+# All Rights Reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""Interfaces for DDR5 memories
+"""
+
+from m5.objects import DRAMInterface
+
+
+# A single DDR5-4400 32bit channel (4x8 configuration)
+# A DDR5 DIMM is made up of two (32 bit) channels.
+# Following configuration is modeling only a single 32bit channel.
+# Timings are based on Micron data sheet:
+# https://media-www.micron.com/-/media/client/global/
+# documents/products/data-sheet/dram/ddr5/ddr5_sdram_core.pdf
+# Configuration: 4Gbx8 devices (32Gb addressing)
+# Maximum bandwidth of DDR5_4400_4x8 (4400 MT/s) can be 17.6GB/s
+class DDR5_4400_4x8(DRAMInterface):
+    # size of a single device
+    device_size = "512MiB"
+
+    # single channel of 32bit width
+    # requires 8-bit wide 4 devices
+    device_bus_width = 8
+
+    # DDR5 is a BL16 device
+    burst_length = 16
+
+    # Each device has a page (row buffer) size of 256B
+    # Four devices lead to a page size of 1KB
+    device_rowbuffer_size = "256B"
+
+    # 4Gbx8 configuration
+    devices_per_rank = 4
+
+    ranks_per_channel = 1
+
+    # 4Gbx8 configuration of DDR5 has 8 bank groups,
+    # 4 banks per bank group and 32 banks in total
+    bank_groups_per_rank = 8
+    banks_per_rank = 32
+
+    write_buffer_size = 64
+    read_buffer_size = 64
+
+    # For 4400 MT/s
+    tCK = "0.454ns"
+
+    # 16 beats across an x32 interface translates to 8 clocks @ 2200 MHz
+    tBURST = "3.632ns"
+
+    # For 4400 MT/s, the number is max(8nCK, 5ns)
+    tCCD_L = "5ns"
+
+    # page 389 of the data sheet
+    tRCD = "14.545ns"
+    tCL = "14.545ns"
+    # tCWL = tCL - 2(nCK)
+    tCWL = "13.637ns"
+    tRP = "14.545ns"
+    tRAS = "32ns"
+
+    # RRD_S (different bank group) : 8nCK
+    tRRD = "3.632ns"
+
+    # RRD_L (same bank group) is MAX(8nCK, 5ns) for 1KB page
+    tRRD_L = "5ns"
+
+    # tFAW for 1KB page is MAX(32nCK, 14.545ns)
+    tXAW = "14.545ns"
+    activation_limit = 4
+
+    # Note: could not find the rank to rank delay
+    # from the datasheet, but, since we are modeling
+    # a single rank device, it should not matter.
+    # Using the DDR4 number as default (i.e. ~2tCK)
+    tCS = "1ns"
+
+    # tRFC (Normal) for 16Gb device is 295ns
+    tRFC = "295ns"
+
+    tPPD = "0.908ns"  # 2nCK
+    tWR = "30ns"
+
+    # Rd/Wr turnaround timings
+    ###################################################################
+    # Note: gem5 adds tBURST separately while calculting rd--rd/wr or
+    # wr--wr/rd delays so we can ignore tBURST factor from the following
+    # equations while calculating the final value of the timing parameter
+    ####################################################################
+    # From the datasheet
+    # (1) tCCD_L_RTW =
+    # CL - CWL + RBL/2 + 2 tCK - (Read DQS offset) + (tRPST - 0.5 tCK) + tWPRE
+    # where CWL = CL-2, RBL/2 = tBURST, Read DQS offset = 1ck, tRPST = 1.5tCK
+    # Therefore, tCCD_L_RTW =
+    # (14.545 - 13.637) + (2*0.454) - 0.454 +
+    # ((1.5*0.454)-(0.5*0.454) + (1.5*0.454) = 2.497ns
+
+    # (2) tCCD_S_RTW =
+    # CL - CWL + RBL/2 + 2 tCK - (Read DQS offset) + (tRPST - 0.5 tCK) + tWPRE
+    # Therefore, tCCD_S_RTW = tCCD_L_RTW = 2.497ns
+
+    # (3) tCCD_L_WTR =
+    # CWL + WBL/2 + max(16nCK,10ns)
+    # where WBL/2 = tBURST
+    # Therefore,
+    # tCCD_L_WTR = 13.637+10 = 23.637ns
+
+    # (4) tCCD_S_WTR =
+    # CWL + WBL/2 + max(4nCK,2.5ns)
+    # where WBL/2 = tBURST
+    # Therefore,
+    # tCCD_S_WTR = 13.637+2.5 = 16.137ns
+
+    tRTW = "2.497ns"
+    tWTR_L = "23.637ns"
+    tWTR = "16.137ns"
+
+    # tRTP : max(12nCK, 7.5ns)
+    tRTP = "7.5ns"
+
+    # <=85C, half for >85C
+    tREFI = "3.9us"
+
+    # active powerdown and precharge powerdown exit time max(7.5ns, 8nCK)
+    tXP = "7.5ns"
+
+    # self refresh exit time
+    # According to the datasheet tXS = tRFC = 295ns (normal Refresh mode)
+    tXS = "295ns"
+
+    page_policy = "close_adaptive"
+
+    # Power related parameters
+    # Reference: https://media-www.micron.com/-/media/client/global/
+    # documents/products/data-sheet/dram/ddr5/16gb_ddr5_sdram_diereva.pdf
+    # Using the values for DDR5-4800
+    # DDR5 has one voltage domain, so all the
+    # CurrentVariable2 params should not be used or set to 0
+    IDD0 = "122mA"
+    # Using the value of IDD2P
+    IDD2P0 = "88mA"
+    IDD2N = "92mA"
+    # Using the value of IDD3P
+    IDD3P0 = "140mA"
+    IDD3N = "142mA"
+    IDD4W = "479mA"
+    IDD4R = "530mA"
+    # IDD5B - 277, IDD5C -- 135mA, IDD5F -- 262mA in the datasheet
+    IDD5 = "277mA"
+    # IDD6N
+    IDD6 = "102mA"
+
+    VDD = "1.1V"
+
+
+# Maximum bandwidth of DDR5_6400_4x8 (6400 MT/s) can be 25.6GB/s
+class DDR5_6400_4x8(DDR5_4400_4x8):
+    # For 6400 MT/s
+    tCK = "0.312ns"
+
+    # 16 beats across an x32 interface translates to 8 clocks @ 3200 MHz
+    tBURST = "2.496ns"
+
+    tRCD = "14.375ns"
+    tCL = "14.375ns"
+    # tCWL = tCL - 2(nCK)
+    tCWL = "13.751ns"
+    tRP = "14.375ns"
+
+    # RRD_S (different bank group) : 8nCK
+    tRRD = "2.496ns"
+
+    # RRD_L (same bank group) is MAX(8nCK, 5ns) for 1KB page
+    tRRD_L = "5ns"
+
+    # tFAW for 1KB page is MAX(32 CK, 10.00ns)
+    tXAW = "10ns"
+
+    # Rd/Wr turnaround timings
+    ###################################################################
+    # Note: gem5 adds tBURST separately while calculting rd--rd/wr or
+    # wr--wr/rd delays so we can ignore tBURST factor from the following
+    # equations while calculating the final value of the timing parameter
+    ####################################################################
+    # From the datasheet
+    # (1) tCCD_L_RTW =
+    # CL - CWL + RBL/2 + 2 tCK - (Read DQS offset) + (tRPST - 0.5 tCK) + tWPRE
+    # where CWL = CL-2, RBL/2 = tBURST, Read DQS offset = 1ck, tRPST = 1.5tCK
+    # Therefore, tCCD_L_RTW =
+    # (14.375 - 13.751) + (2*0.312) - 0.312 + ((1.5*0.312)-(0.5*0.312)
+    # + (1.5*0.312) = 1.716ns
+
+    # (2) tCCD_S_RTW =
+    # CL - CWL + RBL/2 + 2 tCK - (Read DQS offset) + (tRPST - 0.5 tCK) + tWPRE
+    # Therefore, tCCD_S_RTW = tCCD_L_RTW = 1.716ns
+
+    # (3) tCCD_L_WTR =
+    # CWL + WBL/2 + max(16nCK,10ns)
+    # where WBL/2 = tBURST
+    # Therefore,
+    # tCCD_L_WTR = 13.751+10 = 23.751ns
+
+    # (4) tCCD_S_WTR =
+    # CWL + WBL/2 + max(4nCK,2.5ns)
+    # where WBL/2 = tBURST
+    # Therefore,
+    # tCCD_S_WTR = 13.751+2.5 = 16.251ns
+
+    tRTW = "1.716ns"
+    tWTR_L = "23.751ns"
+    tWTR = "16.251ns"
+
+
+# Maximum bandwidth of DDR5_8400_4x8 (8400 MT/s) can be 33.6GB/s
+# Most of the timing parameters for DDR5_8400_4x8 are TBD in
+# the datasheet referred above.
+# The TBD parameters are extrapolated from the speed bins mentioned above.
+class DDR5_8400_4x8(DDR5_4400_4x8):
+    # For 8400 MT/s
+    tCK = "0.238ns"
+
+    # 16 beats across an x32 interface translates to 8 clocks @ 4200 MHz
+    tBURST = "1.904ns"
+
+    tRCD = "14.285ns"
+    tCL = "14.285ns"
+    tCWL = "13.809ns"
+    tRP = "14.285ns"
+
+    # RRD_S (different bank group) : 8nCK
+    tRRD = "1.904ns"
+
+    # tFAW for 1KB page is MAX(32 CK, 10.00ns)
+    tXAW = "10ns"
+
+    # Rd/Wr turnaround timings
+    ###################################################################
+    # Note: gem5 adds tBURST separately while calculting rd--rd/wr or
+    # wr--wr/rd delays so we can ignore tBURST factor from the following
+    # equations while calculating the final value of the timing parameter
+    ####################################################################
+    # From the datasheet
+    # (1) tCCD_L_RTW =
+    # CL - CWL + RBL/2 + 2 tCK - (Read DQS offset) + (tRPST - 0.5 tCK) + tWPRE
+    # where CWL = CL-2, RBL/2 = tBURST, Read DQS offset = 1ck, tRPST = 1.5tCK
+    # Therefore, tCCD_L_RTW =
+    # (14.285 - 13.809) + (2*0.238) - 0.238 + ((1.5*0.238)-(0.5*0.238)
+    # + (1.5*0.238) = 1.309ns
+
+    # (2) tCCD_S_RTW =
+    # CL - CWL + RBL/2 + 2 tCK - (Read DQS offset) + (tRPST - 0.5 tCK) + tWPRE
+    # Therefore, tCCD_S_RTW = tCCD_L_RTW = 1.309ns
+
+    # (3) tCCD_L_WTR =
+    # CWL + WBL/2 + max(16nCK,10ns)
+    # where WBL/2 = tBURST
+    # Therefore,
+    # tCCD_L_WTR =13.809+10 = 23.809ns
+
+    # (4) tCCD_S_WTR =
+    # CWL + WBL/2 + max(4nCK,2.5ns)
+    # where WBL/2 = tBURST
+    # Therefore,
+    # tCCD_S_WTR = 13.809+2.5 = 16.309ns
+
+    tRTW = "1.309ns"
+    tWTR_L = "23.809ns"
+    tWTR = "16.309ns"
diff --git a/src/python/gem5/components/memory/single_channel.py b/src/python/gem5/components/memory/single_channel.py
index 43aab45d76..9235bbd52d 100644
--- a/src/python/gem5/components/memory/single_channel.py
+++ b/src/python/gem5/components/memory/single_channel.py
@@ -29,6 +29,7 @@ from .abstract_memory_system import AbstractMemorySystem
 
 from typing import Optional
 
+from .dram_interfaces.ddr5 import DDR5_4400_4x8, DDR5_6400_4x8, DDR5_8400_4x8
 from .dram_interfaces.ddr4 import DDR4_2400_8x8
 from .dram_interfaces.hbm import HBM_1000_4H_1x128
 from .dram_interfaces.lpddr3 import LPDDR3_1600_1x32
@@ -74,3 +75,30 @@ def SingleChannelHBM(
     if not size:
         size = "256MiB"
     return ChanneledMemory(HBM_1000_4H_1x128, 1, 64, size=size)
+
+
+def DIMM_DDR5_4400(
+    size: Optional[str] = None,
+) -> AbstractMemorySystem:
+    """
+    A single DIMM of DDR5 has two channels
+    """
+    return ChanneledMemory(DDR5_4400_4x8, 2, 64, size=size)
+
+
+def DIMM_DDR5_6400(
+    size: Optional[str] = None,
+) -> AbstractMemorySystem:
+    """
+    A single DIMM of DDR5 has two channels
+    """
+    return ChanneledMemory(DDR5_6400_4x8, 2, 64, size=size)
+
+
+def DIMM_DDR5_8400(
+    size: Optional[str] = None,
+) -> AbstractMemorySystem:
+    """
+    A single DIMM of DDR5 has two channels
+    """
+    return ChanneledMemory(DDR5_8400_4x8, 2, 64, size=size)

From 65d077d79546a5b15b53162821d33c5cab9df3ef Mon Sep 17 00:00:00 2001
From: Melissa Jost <melissakjost@gmail.com>
Date: Thu, 25 May 2023 15:42:20 -0700
Subject: [PATCH 466/492] base: Output link to common errors page

This change adds a link to the common errors page that outputs
whenever a fatal signal is raised, showing when the program
aborts or segfaults.

Change-Id: I88420a594423570af670b460cfce7c03c4208120
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/70978
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Maintainer: Daniel Carvalho <odanrc@yahoo.com.br>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Daniel Carvalho <odanrc@yahoo.com.br>
---
 src/sim/init_signals.cc | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/sim/init_signals.cc b/src/sim/init_signals.cc
index b6db6214aa..b9aa6f6e4d 100644
--- a/src/sim/init_signals.cc
+++ b/src/sim/init_signals.cc
@@ -103,6 +103,8 @@ raiseFatalSignal(int signo)
     // The signal handler should have been reset and unmasked (it was
     // registered with SA_RESETHAND | SA_NODEFER), just raise the
     // signal again to invoke the default handler.
+    STATIC_ERR("For more info on how to address this issue, please visit "
+        "https://www.gem5.org/documentation/general_docs/common-errors/ \n\n");
     pthread_kill(pthread_self(), signo);
 
     // Something is really wrong if the process is alive at this

From 32df25e426d125ea4e00cb8d130f4ee3d3662b73 Mon Sep 17 00:00:00 2001
From: Ayaz Akram <yazakram@ucdavis.edu>
Date: Thu, 10 Nov 2022 13:26:44 -0800
Subject: [PATCH 467/492] mem: HBMCtrl changes to allow PC data buses to be in
 different states

This change updates the HBMCtrl such that both pseudo channels
can be in separate states (read or write) at the same time. In
addition, the controller queues are now always split in two
halves for both pseudo channels.

Change-Id: Ifb599e611ad99f6c511baaf245bad2b5c9210a86
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/65491
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/mem/HBMCtrl.py                       |  2 -
 src/mem/dram_interface.cc                | 20 +++---
 src/mem/hbm_ctrl.cc                      | 86 +++++++++---------------
 src/mem/hbm_ctrl.hh                      |  1 -
 src/mem/mem_ctrl.cc                      | 86 +++++++++++++-----------
 src/mem/mem_ctrl.hh                      |  6 +-
 src/mem/mem_interface.hh                 | 22 ++++++
 src/mem/nvm_interface.cc                 |  8 +--
 src/mem/qos/mem_ctrl.cc                  |  2 +-
 src/mem/qos/mem_ctrl.hh                  |  2 +-
 src/mem/qos/mem_sink.cc                  |  2 +-
 src/python/gem5/components/memory/hbm.py |  1 -
 12 files changed, 122 insertions(+), 116 deletions(-)

diff --git a/src/mem/HBMCtrl.py b/src/mem/HBMCtrl.py
index 0c7c1ea919..45d89a76c9 100644
--- a/src/mem/HBMCtrl.py
+++ b/src/mem/HBMCtrl.py
@@ -46,5 +46,3 @@ class HBMCtrl(MemCtrl):
     # gives the best results with following min_r/w_per_switch
     min_reads_per_switch = 64
     min_writes_per_switch = 64
-
-    partitioned_q = Param.Bool(False, "split queues for pseudo channels")
diff --git a/src/mem/dram_interface.cc b/src/mem/dram_interface.cc
index d745fe5a29..65e06db4d3 100644
--- a/src/mem/dram_interface.cc
+++ b/src/mem/dram_interface.cc
@@ -1068,13 +1068,14 @@ DRAMInterface::minBankPrep(const MemPacketQueue& queue,
 
                 // latest Tick for which ACT can occur without
                 // incurring additoinal delay on the data bus
-                const Tick tRCD = ctrl->inReadBusState(false) ?
-                                                 tRCD_RD : tRCD_WR;
+                const Tick tRCD = ctrl->inReadBusState(false, this) ?
+                                                tRCD_RD : tRCD_WR;
                 const Tick hidden_act_max =
                             std::max(min_col_at - tRCD, curTick());
 
                 // When is the earliest the R/W burst can issue?
-                const Tick col_allowed_at = ctrl->inReadBusState(false) ?
+                const Tick col_allowed_at = ctrl->inReadBusState(false,
+                                              this) ?
                                               ranks[i]->banks[j].rdAllowedAt :
                                               ranks[i]->banks[j].wrAllowedAt;
                 Tick col_at = std::max(col_allowed_at, act_at + tRCD);
@@ -1180,10 +1181,10 @@ bool
 DRAMInterface::Rank::isQueueEmpty() const
 {
     // check commmands in Q based on current bus direction
-    bool no_queued_cmds = (dram.ctrl->inReadBusState(true) &&
-                          (readEntries == 0))
-                       || (dram.ctrl->inWriteBusState(true) &&
-                          (writeEntries == 0));
+    bool no_queued_cmds = (dram.ctrl->inReadBusState(true, &(this->dram))
+                          && (readEntries == 0)) ||
+                          (dram.ctrl->inWriteBusState(true, &(this->dram))
+                          && (writeEntries == 0));
     return no_queued_cmds;
 }
 
@@ -1669,7 +1670,7 @@ DRAMInterface::Rank::processPowerEvent()
         // completed refresh event, ensure next request is scheduled
         if (!(dram.ctrl->requestEventScheduled(dram.pseudoChannel))) {
             DPRINTF(DRAM, "Scheduling next request after refreshing"
-                           " rank %d\n", rank);
+                           " rank %d, PC %d \n", rank, dram.pseudoChannel);
             dram.ctrl->restartScheduler(curTick(), dram.pseudoChannel);
         }
     }
@@ -1831,7 +1832,8 @@ DRAMInterface::Rank::resetStats() {
 bool
 DRAMInterface::Rank::forceSelfRefreshExit() const {
     return (readEntries != 0) ||
-           (dram.ctrl->inWriteBusState(true) && (writeEntries != 0));
+           (dram.ctrl->inWriteBusState(true, &(this->dram))
+            && (writeEntries != 0));
 }
 
 void
diff --git a/src/mem/hbm_ctrl.cc b/src/mem/hbm_ctrl.cc
index 747e714f57..f87fa2dcbb 100644
--- a/src/mem/hbm_ctrl.cc
+++ b/src/mem/hbm_ctrl.cc
@@ -51,8 +51,7 @@ HBMCtrl::HBMCtrl(const HBMCtrlParams &p) :
                          name()),
     respondEventPC1([this] {processRespondEvent(pc1Int, respQueuePC1,
                          respondEventPC1, retryRdReqPC1); }, name()),
-    pc1Int(p.dram_2),
-    partitionedQ(p.partitioned_q)
+    pc1Int(p.dram_2)
 {
     DPRINTF(MemCtrl, "Setting up HBM controller\n");
 
@@ -69,17 +68,8 @@ HBMCtrl::HBMCtrl(const HBMCtrlParams &p) :
     pc0Int->setCtrl(this, commandWindow, 0);
     pc1Int->setCtrl(this, commandWindow, 1);
 
-    if (partitionedQ) {
-        writeHighThreshold = (writeBufferSize * (p.write_high_thresh_perc/2)
-                             / 100.0);
-        writeLowThreshold = (writeBufferSize * (p.write_low_thresh_perc/2)
-                            / 100.0);
-    } else {
-        writeHighThreshold = (writeBufferSize * p.write_high_thresh_perc
-                            / 100.0);
-        writeLowThreshold = (writeBufferSize * p.write_low_thresh_perc
-                            / 100.0);
-    }
+    writeHighThreshold = (writeBufferSize/2 * p.write_high_thresh_perc)/100.0;
+    writeLowThreshold = (writeBufferSize/2 * p.write_low_thresh_perc)/100.0;
 }
 
 void
@@ -109,9 +99,9 @@ HBMCtrl::recvAtomic(PacketPtr pkt)
     Tick latency = 0;
 
     if (pc0Int->getAddrRange().contains(pkt->getAddr())) {
-        latency = MemCtrl::recvAtomicLogic(pkt, pc0Int);
+        latency = recvAtomicLogic(pkt, pc0Int);
     } else if (pc1Int->getAddrRange().contains(pkt->getAddr())) {
-        latency = MemCtrl::recvAtomicLogic(pkt, pc1Int);
+        latency = recvAtomicLogic(pkt, pc1Int);
     } else {
         panic("Can't handle address range for packet %s\n", pkt->print());
     }
@@ -122,10 +112,10 @@ HBMCtrl::recvAtomic(PacketPtr pkt)
 void
 HBMCtrl::recvFunctional(PacketPtr pkt)
 {
-    bool found = MemCtrl::recvFunctionalLogic(pkt, pc0Int);
+    bool found = recvFunctionalLogic(pkt, pc0Int);
 
     if (!found) {
-        found = MemCtrl::recvFunctionalLogic(pkt, pc1Int);
+        found = recvFunctionalLogic(pkt, pc1Int);
     }
 
     if (!found) {
@@ -170,9 +160,9 @@ HBMCtrl::writeQueueFullPC0(unsigned int neededEntries) const
 {
     DPRINTF(MemCtrl,
             "Write queue limit %d, PC0 size %d, entries needed %d\n",
-            writeBufferSize, writeQueueSizePC0, neededEntries);
+            writeBufferSize/2, pc0Int->writeQueueSize, neededEntries);
 
-    unsigned int wrsize_new = (writeQueueSizePC0 + neededEntries);
+    unsigned int wrsize_new = (pc0Int->writeQueueSize + neededEntries);
     return wrsize_new > (writeBufferSize/2);
 }
 
@@ -181,9 +171,9 @@ HBMCtrl::writeQueueFullPC1(unsigned int neededEntries) const
 {
     DPRINTF(MemCtrl,
             "Write queue limit %d, PC1 size %d, entries needed %d\n",
-            writeBufferSize, writeQueueSizePC1, neededEntries);
+            writeBufferSize/2, pc1Int->writeQueueSize, neededEntries);
 
-    unsigned int wrsize_new = (writeQueueSizePC1 + neededEntries);
+    unsigned int wrsize_new = (pc1Int->writeQueueSize + neededEntries);
     return wrsize_new > (writeBufferSize/2);
 }
 
@@ -192,10 +182,10 @@ HBMCtrl::readQueueFullPC0(unsigned int neededEntries) const
 {
     DPRINTF(MemCtrl,
             "Read queue limit %d, PC0 size %d, entries needed %d\n",
-            readBufferSize, readQueueSizePC0 + respQueue.size(),
+            readBufferSize/2, pc0Int->readQueueSize + respQueue.size(),
             neededEntries);
 
-    unsigned int rdsize_new = readQueueSizePC0 + respQueue.size()
+    unsigned int rdsize_new = pc0Int->readQueueSize + respQueue.size()
                                                + neededEntries;
     return rdsize_new > (readBufferSize/2);
 }
@@ -205,26 +195,14 @@ HBMCtrl::readQueueFullPC1(unsigned int neededEntries) const
 {
     DPRINTF(MemCtrl,
             "Read queue limit %d, PC1 size %d, entries needed %d\n",
-            readBufferSize, readQueueSizePC1 + respQueuePC1.size(),
+            readBufferSize/2, pc1Int->readQueueSize + respQueuePC1.size(),
             neededEntries);
 
-    unsigned int rdsize_new = readQueueSizePC1 + respQueuePC1.size()
+    unsigned int rdsize_new = pc1Int->readQueueSize + respQueuePC1.size()
                                                + neededEntries;
     return rdsize_new > (readBufferSize/2);
 }
 
-bool
-HBMCtrl::readQueueFull(unsigned int neededEntries) const
-{
-    DPRINTF(MemCtrl,
-            "HBMCtrl: Read queue limit %d, entries needed %d\n",
-            readBufferSize, neededEntries);
-
-    unsigned int rdsize_new = totalReadQueueSize + respQueue.size() +
-                                respQueuePC1.size() + neededEntries;
-    return rdsize_new > readBufferSize;
-}
-
 bool
 HBMCtrl::recvTimingReq(PacketPtr pkt)
 {
@@ -269,23 +247,23 @@ HBMCtrl::recvTimingReq(PacketPtr pkt)
     // check local buffers and do not accept if full
     if (pkt->isWrite()) {
         if (is_pc0) {
-            if (partitionedQ ? writeQueueFullPC0(pkt_count) :
-                                        writeQueueFull(pkt_count))
-            {
+            if (writeQueueFullPC0(pkt_count)) {
                 DPRINTF(MemCtrl, "Write queue full, not accepting\n");
                 // remember that we have to retry this port
-                MemCtrl::retryWrReq = true;
+                retryWrReq = true;
                 stats.numWrRetry++;
                 return false;
             } else {
                 addToWriteQueue(pkt, pkt_count, pc0Int);
+                if (!nextReqEvent.scheduled()) {
+                    DPRINTF(MemCtrl, "Request scheduled immediately\n");
+                    schedule(nextReqEvent, curTick());
+                }
                 stats.writeReqs++;
                 stats.bytesWrittenSys += size;
             }
         } else {
-            if (partitionedQ ? writeQueueFullPC1(pkt_count) :
-                                        writeQueueFull(pkt_count))
-            {
+            if (writeQueueFullPC1(pkt_count)) {
                 DPRINTF(MemCtrl, "Write queue full, not accepting\n");
                 // remember that we have to retry this port
                 retryWrReqPC1 = true;
@@ -293,6 +271,10 @@ HBMCtrl::recvTimingReq(PacketPtr pkt)
                 return false;
             } else {
                 addToWriteQueue(pkt, pkt_count, pc1Int);
+                if (!nextReqEventPC1.scheduled()) {
+                    DPRINTF(MemCtrl, "Request scheduled immediately\n");
+                    schedule(nextReqEventPC1, curTick());
+                }
                 stats.writeReqs++;
                 stats.bytesWrittenSys += size;
             }
@@ -303,11 +285,10 @@ HBMCtrl::recvTimingReq(PacketPtr pkt)
         assert(size != 0);
 
         if (is_pc0) {
-            if (partitionedQ ? readQueueFullPC0(pkt_count) :
-                                        HBMCtrl::readQueueFull(pkt_count)) {
+            if (readQueueFullPC0(pkt_count)) {
                 DPRINTF(MemCtrl, "Read queue full, not accepting\n");
                 // remember that we have to retry this port
-                retryRdReqPC1 = true;
+                retryRdReq = true;
                 stats.numRdRetry++;
                 return false;
             } else {
@@ -322,8 +303,7 @@ HBMCtrl::recvTimingReq(PacketPtr pkt)
                 stats.bytesReadSys += size;
             }
         } else {
-            if (partitionedQ ? readQueueFullPC1(pkt_count) :
-                                        HBMCtrl::readQueueFull(pkt_count)) {
+            if (readQueueFullPC1(pkt_count)) {
                 DPRINTF(MemCtrl, "Read queue full, not accepting\n");
                 // remember that we have to retry this port
                 retryRdReqPC1 = true;
@@ -351,7 +331,7 @@ HBMCtrl::pruneRowBurstTick()
     auto it = rowBurstTicks.begin();
     while (it != rowBurstTicks.end()) {
         auto current_it = it++;
-        if (MemCtrl::getBurstWindow(curTick()) > *current_it) {
+        if (getBurstWindow(curTick()) > *current_it) {
             DPRINTF(MemCtrl, "Removing burstTick for %d\n", *current_it);
             rowBurstTicks.erase(current_it);
         }
@@ -364,7 +344,7 @@ HBMCtrl::pruneColBurstTick()
     auto it = colBurstTicks.begin();
     while (it != colBurstTicks.end()) {
         auto current_it = it++;
-        if (MemCtrl::getBurstWindow(curTick()) > *current_it) {
+        if (getBurstWindow(curTick()) > *current_it) {
             DPRINTF(MemCtrl, "Removing burstTick for %d\n", *current_it);
             colBurstTicks.erase(current_it);
         }
@@ -385,7 +365,7 @@ HBMCtrl::verifySingleCmd(Tick cmd_tick, Tick max_cmds_per_burst, bool row_cmd)
     Tick cmd_at = cmd_tick;
 
     // get tick aligned to burst window
-    Tick burst_tick = MemCtrl::getBurstWindow(cmd_tick);
+    Tick burst_tick = getBurstWindow(cmd_tick);
 
     // verify that we have command bandwidth to issue the command
     // if not, iterate over next window(s) until slot found
@@ -424,7 +404,7 @@ HBMCtrl::verifyMultiCmd(Tick cmd_tick, Tick max_cmds_per_burst,
     Tick cmd_at = cmd_tick;
 
     // get tick aligned to burst window
-    Tick burst_tick = MemCtrl::getBurstWindow(cmd_tick);
+    Tick burst_tick = getBurstWindow(cmd_tick);
 
     // Command timing requirements are from 2nd command
     // Start with assumption that 2nd command will issue at cmd_at and
diff --git a/src/mem/hbm_ctrl.hh b/src/mem/hbm_ctrl.hh
index a6ecf6c589..b17caa6b49 100644
--- a/src/mem/hbm_ctrl.hh
+++ b/src/mem/hbm_ctrl.hh
@@ -144,7 +144,6 @@ class HBMCtrl : public MemCtrl
      */
     bool readQueueFullPC0(unsigned int pkt_count) const;
     bool readQueueFullPC1(unsigned int pkt_count) const;
-    bool readQueueFull(unsigned int pkt_count) const;
 
     /**
      * Check if the write queue partition of both pseudo
diff --git a/src/mem/mem_ctrl.cc b/src/mem/mem_ctrl.cc
index 290db3ebe7..9a3600f331 100644
--- a/src/mem/mem_ctrl.cc
+++ b/src/mem/mem_ctrl.cc
@@ -72,7 +72,6 @@ MemCtrl::MemCtrl(const MemCtrlParams &p) :
     writeLowThreshold(writeBufferSize * p.write_low_thresh_perc / 100.0),
     minWritesPerSwitch(p.min_writes_per_switch),
     minReadsPerSwitch(p.min_reads_per_switch),
-    writesThisTime(0), readsThisTime(0),
     memSchedPolicy(p.mem_sched_policy),
     frontendLatency(p.static_frontend_latency),
     backendLatency(p.static_backend_latency),
@@ -277,6 +276,8 @@ MemCtrl::addToReadQueue(PacketPtr pkt,
             logRequest(MemCtrl::READ, pkt->requestorId(),
                        pkt->qosValue(), mem_pkt->addr, 1);
 
+            mem_intr->readQueueSize++;
+
             // Update stats
             stats.avgRdQLen = totalReadQueueSize + respQueue.size();
         }
@@ -349,6 +350,8 @@ MemCtrl::addToWriteQueue(PacketPtr pkt, unsigned int pkt_count,
             logRequest(MemCtrl::WRITE, pkt->requestorId(),
                        pkt->qosValue(), mem_pkt->addr, 1);
 
+            mem_intr->writeQueueSize++;
+
             assert(totalWriteQueueSize == isInWriteQueue.size());
 
             // Update stats
@@ -575,6 +578,9 @@ MemCtrl::chooseNext(MemPacketQueue& queue, Tick extra_col_delay,
             // check if there is a packet going to a free rank
             for (auto i = queue.begin(); i != queue.end(); ++i) {
                 MemPacket* mem_pkt = *i;
+                if (mem_pkt->pseudoChannel != mem_intr->pseudoChannel) {
+                    continue;
+                }
                 if (packetReady(mem_pkt, mem_intr)) {
                     ret = i;
                     break;
@@ -761,28 +767,28 @@ MemCtrl::verifyMultiCmd(Tick cmd_tick, Tick max_cmds_per_burst,
 }
 
 bool
-MemCtrl::inReadBusState(bool next_state) const
+MemCtrl::inReadBusState(bool next_state, const MemInterface* mem_intr) const
 {
     // check the bus state
     if (next_state) {
         // use busStateNext to get the state that will be used
         // for the next burst
-        return (busStateNext == MemCtrl::READ);
+        return (mem_intr->busStateNext == MemCtrl::READ);
     } else {
-        return (busState == MemCtrl::READ);
+        return (mem_intr->busState == MemCtrl::READ);
     }
 }
 
 bool
-MemCtrl::inWriteBusState(bool next_state) const
+MemCtrl::inWriteBusState(bool next_state, const MemInterface* mem_intr) const
 {
     // check the bus state
     if (next_state) {
         // use busStateNext to get the state that will be used
         // for the next burst
-        return (busStateNext == MemCtrl::WRITE);
+        return (mem_intr->busStateNext == MemCtrl::WRITE);
     } else {
-        return (busState == MemCtrl::WRITE);
+        return (mem_intr->busState == MemCtrl::WRITE);
     }
 }
 
@@ -813,13 +819,13 @@ MemCtrl::doBurstAccess(MemPacket* mem_pkt, MemInterface* mem_intr)
 
     // Update the common bus stats
     if (mem_pkt->isRead()) {
-        ++readsThisTime;
+        ++(mem_intr->readsThisTime);
         // Update latency stats
         stats.requestorReadTotalLat[mem_pkt->requestorId()] +=
             mem_pkt->readyTime - mem_pkt->entryTime;
         stats.requestorReadBytes[mem_pkt->requestorId()] += mem_pkt->size;
     } else {
-        ++writesThisTime;
+        ++(mem_intr->writesThisTime);
         stats.requestorWriteBytes[mem_pkt->requestorId()] += mem_pkt->size;
         stats.requestorWriteTotalLat[mem_pkt->requestorId()] +=
             mem_pkt->readyTime - mem_pkt->entryTime;
@@ -836,8 +842,8 @@ MemCtrl::memBusy(MemInterface* mem_intr) {
     // Default to busy status and update based on interface specifics
     // Default state of unused interface is 'true'
     bool mem_busy = true;
-    bool all_writes_nvm = mem_intr->numWritesQueued == totalWriteQueueSize;
-    bool read_queue_empty = totalReadQueueSize == 0;
+    bool all_writes_nvm = mem_intr->numWritesQueued == mem_intr->writeQueueSize;
+    bool read_queue_empty = mem_intr->readQueueSize == 0;
     mem_busy = mem_intr->isBusy(read_queue_empty, all_writes_nvm);
     if (mem_busy) {
         // if all ranks are refreshing wait for them to finish
@@ -884,27 +890,27 @@ MemCtrl::processNextReqEvent(MemInterface* mem_intr,
     }
 
     // detect bus state change
-    bool switched_cmd_type = (busState != busStateNext);
+    bool switched_cmd_type = (mem_intr->busState != mem_intr->busStateNext);
     // record stats
-    recordTurnaroundStats();
+    recordTurnaroundStats(mem_intr->busState, mem_intr->busStateNext);
 
     DPRINTF(MemCtrl, "QoS Turnarounds selected state %s %s\n",
-            (busState==MemCtrl::READ)?"READ":"WRITE",
+            (mem_intr->busState==MemCtrl::READ)?"READ":"WRITE",
             switched_cmd_type?"[turnaround triggered]":"");
 
     if (switched_cmd_type) {
-        if (busState == MemCtrl::READ) {
+        if (mem_intr->busState == MemCtrl::READ) {
             DPRINTF(MemCtrl,
-                    "Switching to writes after %d reads with %d reads "
-                    "waiting\n", readsThisTime, totalReadQueueSize);
-            stats.rdPerTurnAround.sample(readsThisTime);
-            readsThisTime = 0;
+            "Switching to writes after %d reads with %d reads "
+            "waiting\n", mem_intr->readsThisTime, mem_intr->readQueueSize);
+            stats.rdPerTurnAround.sample(mem_intr->readsThisTime);
+            mem_intr->readsThisTime = 0;
         } else {
             DPRINTF(MemCtrl,
-                    "Switching to reads after %d writes with %d writes "
-                    "waiting\n", writesThisTime, totalWriteQueueSize);
-            stats.wrPerTurnAround.sample(writesThisTime);
-            writesThisTime = 0;
+            "Switching to reads after %d writes with %d writes "
+            "waiting\n", mem_intr->writesThisTime, mem_intr->writeQueueSize);
+            stats.wrPerTurnAround.sample(mem_intr->writesThisTime);
+            mem_intr->writesThisTime = 0;
         }
     }
 
@@ -916,7 +922,7 @@ MemCtrl::processNextReqEvent(MemInterface* mem_intr,
     }
 
     // updates current state
-    busState = busStateNext;
+    mem_intr->busState = mem_intr->busStateNext;
 
     nonDetermReads(mem_intr);
 
@@ -925,18 +931,18 @@ MemCtrl::processNextReqEvent(MemInterface* mem_intr,
     }
 
     // when we get here it is either a read or a write
-    if (busState == READ) {
+    if (mem_intr->busState == READ) {
 
         // track if we should switch or not
         bool switch_to_writes = false;
 
-        if (totalReadQueueSize == 0) {
+        if (mem_intr->readQueueSize == 0) {
             // In the case there is no read request to go next,
             // trigger writes if we have passed the low threshold (or
             // if we are draining)
-            if (!(totalWriteQueueSize == 0) &&
+            if (!(mem_intr->writeQueueSize == 0) &&
                 (drainState() == DrainState::Draining ||
-                 totalWriteQueueSize > writeLowThreshold)) {
+                 mem_intr->writeQueueSize > writeLowThreshold)) {
 
                 DPRINTF(MemCtrl,
                         "Switching to writes due to read queue empty\n");
@@ -1011,6 +1017,7 @@ MemCtrl::processNextReqEvent(MemInterface* mem_intr,
                         mem_pkt->qosValue(), mem_pkt->getAddr(), 1,
                         mem_pkt->readyTime - mem_pkt->entryTime);
 
+            mem_intr->readQueueSize--;
 
             // Insert into response queue. It will be sent back to the
             // requestor at its readyTime
@@ -1029,8 +1036,9 @@ MemCtrl::processNextReqEvent(MemInterface* mem_intr,
             // there are no other writes that can issue
             // Also ensure that we've issued a minimum defined number
             // of reads before switching, or have emptied the readQ
-            if ((totalWriteQueueSize > writeHighThreshold) &&
-               (readsThisTime >= minReadsPerSwitch || totalReadQueueSize == 0)
+            if ((mem_intr->writeQueueSize > writeHighThreshold) &&
+               (mem_intr->readsThisTime >= minReadsPerSwitch ||
+               mem_intr->readQueueSize == 0)
                && !(nvmWriteBlock(mem_intr))) {
                 switch_to_writes = true;
             }
@@ -1045,7 +1053,7 @@ MemCtrl::processNextReqEvent(MemInterface* mem_intr,
         // draining), or because the writes hit the hight threshold
         if (switch_to_writes) {
             // transition to writing
-            busStateNext = WRITE;
+            mem_intr->busStateNext = WRITE;
         }
     } else {
 
@@ -1099,6 +1107,7 @@ MemCtrl::processNextReqEvent(MemInterface* mem_intr,
                     mem_pkt->qosValue(), mem_pkt->getAddr(), 1,
                     mem_pkt->readyTime - mem_pkt->entryTime);
 
+        mem_intr->writeQueueSize--;
 
         // remove the request from the queue - the iterator is no longer valid
         writeQueue[mem_pkt->qosValue()].erase(to_write);
@@ -1112,15 +1121,15 @@ MemCtrl::processNextReqEvent(MemInterface* mem_intr,
         // If we are interfacing to NVM and have filled the writeRespQueue,
         // with only NVM writes in Q, then switch to reads
         bool below_threshold =
-            totalWriteQueueSize + minWritesPerSwitch < writeLowThreshold;
+            mem_intr->writeQueueSize + minWritesPerSwitch < writeLowThreshold;
 
-        if (totalWriteQueueSize == 0 ||
+        if (mem_intr->writeQueueSize == 0 ||
             (below_threshold && drainState() != DrainState::Draining) ||
-            (totalReadQueueSize && writesThisTime >= minWritesPerSwitch) ||
-            (totalReadQueueSize && (nvmWriteBlock(mem_intr)))) {
+            (mem_intr->readQueueSize && mem_intr->writesThisTime >= minWritesPerSwitch) ||
+            (mem_intr->readQueueSize && (nvmWriteBlock(mem_intr)))) {
 
             // turn the bus back around for reads again
-            busStateNext = MemCtrl::READ;
+            mem_intr->busStateNext = MemCtrl::READ;
 
             // note that the we switch back to reads also in the idle
             // case, which eventually will check for any draining and
@@ -1133,7 +1142,7 @@ MemCtrl::processNextReqEvent(MemInterface* mem_intr,
     if (!next_req_event.scheduled())
         schedule(next_req_event, std::max(mem_intr->nextReqTime, curTick()));
 
-    if (retry_wr_req && totalWriteQueueSize < writeBufferSize) {
+    if (retry_wr_req && mem_intr->writeQueueSize < writeBufferSize) {
         retry_wr_req = false;
         port.sendRetryReq();
     }
@@ -1418,9 +1427,8 @@ MemCtrl::drain()
 {
     // if there is anything in any of our internal queues, keep track
     // of that as well
-    if (totalWriteQueueSize || totalReadQueueSize || !respQueue.empty() ||
+    if (totalWriteQueueSize || totalReadQueueSize || !respQEmpty() ||
           !allIntfDrained()) {
-
         DPRINTF(Drain, "Memory controller not drained, write: %d, read: %d,"
                 " resp: %d\n", totalWriteQueueSize, totalReadQueueSize,
                 respQueue.size());
diff --git a/src/mem/mem_ctrl.hh b/src/mem/mem_ctrl.hh
index 2819fb4caa..917798ffa7 100644
--- a/src/mem/mem_ctrl.hh
+++ b/src/mem/mem_ctrl.hh
@@ -517,8 +517,6 @@ class MemCtrl : public qos::MemCtrl
     uint32_t writeLowThreshold;
     const uint32_t minWritesPerSwitch;
     const uint32_t minReadsPerSwitch;
-    uint32_t writesThisTime;
-    uint32_t readsThisTime;
 
     /**
      * Memory controller configuration initialized based on parameter
@@ -764,7 +762,7 @@ class MemCtrl : public qos::MemCtrl
      * @param next_state Check either the current or next bus state
      * @return True when bus is currently in a read state
      */
-    bool inReadBusState(bool next_state) const;
+    bool inReadBusState(bool next_state, const MemInterface* mem_intr) const;
 
     /**
      * Check the current direction of the memory channel
@@ -772,7 +770,7 @@ class MemCtrl : public qos::MemCtrl
      * @param next_state Check either the current or next bus state
      * @return True when bus is currently in a write state
      */
-    bool inWriteBusState(bool next_state) const;
+    bool inWriteBusState(bool next_state, const MemInterface* mem_intr) const;
 
     Port &getPort(const std::string &if_name,
                   PortID idx=InvalidPortID) override;
diff --git a/src/mem/mem_interface.hh b/src/mem/mem_interface.hh
index 8d6f4fe52b..b0f762fc80 100644
--- a/src/mem/mem_interface.hh
+++ b/src/mem/mem_interface.hh
@@ -189,6 +189,28 @@ class MemInterface : public AbstractMemory
     Tick nextBurstAt = 0;
     Tick nextReqTime = 0;
 
+    /**
+     * Reads/writes performed by the controller for this interface before
+     * bus direction is switched
+     */
+    uint32_t readsThisTime = 0;
+    uint32_t writesThisTime = 0;
+
+    /**
+     * Read/write packets in the read/write queue for this interface
+     * qos/mem_ctrl.hh has similar counters, but they track all packets
+     * in the controller for all memory interfaces connected to the
+     * controller.
+     */
+    uint32_t readQueueSize = 0;
+    uint32_t writeQueueSize = 0;
+
+
+    MemCtrl::BusState busState = MemCtrl::READ;
+
+    /** bus state for next request event triggered */
+    MemCtrl::BusState busStateNext = MemCtrl::READ;
+
     /**
      * pseudo channel number used for HBM modeling
      */
diff --git a/src/mem/nvm_interface.cc b/src/mem/nvm_interface.cc
index b2c4073cd9..366f71d56a 100644
--- a/src/mem/nvm_interface.cc
+++ b/src/mem/nvm_interface.cc
@@ -402,9 +402,9 @@ NVMInterface::processReadReadyEvent()
 
 bool
 NVMInterface::burstReady(MemPacket* pkt) const {
-    bool read_rdy =  pkt->isRead() && (ctrl->inReadBusState(true)) &&
-               (pkt->readyTime <= curTick()) && (numReadDataReady > 0);
-    bool write_rdy =  !pkt->isRead() && !ctrl->inReadBusState(true) &&
+    bool read_rdy =  pkt->isRead() && (ctrl->inReadBusState(true, this)) &&
+                (pkt->readyTime <= curTick()) && (numReadDataReady > 0);
+    bool write_rdy =  !pkt->isRead() && !ctrl->inReadBusState(true, this) &&
                 !writeRespQueueFull();
     return (read_rdy || write_rdy);
 }
@@ -613,7 +613,7 @@ NVMInterface::isBusy(bool read_queue_empty, bool all_writes_nvm)
      // Only assert busy for the write case when there are also
      // no reads in Q and the write queue only contains NVM commands
      // This allows the bus state to switch and service reads
-     return (ctrl->inReadBusState(true) ?
+     return (ctrl->inReadBusState(true, this) ?
                  (numReadDataReady == 0) && !read_queue_empty :
                  writeRespQueueFull() && read_queue_empty &&
                                          all_writes_nvm);
diff --git a/src/mem/qos/mem_ctrl.cc b/src/mem/qos/mem_ctrl.cc
index 9bf13280da..b102ccfc76 100644
--- a/src/mem/qos/mem_ctrl.cc
+++ b/src/mem/qos/mem_ctrl.cc
@@ -355,7 +355,7 @@ MemCtrl::MemCtrlStats::regStats()
 }
 
 void
-MemCtrl::recordTurnaroundStats()
+MemCtrl::recordTurnaroundStats(BusState busState, BusState busStateNext)
 {
     if (busStateNext != busState) {
         if (busState == READ) {
diff --git a/src/mem/qos/mem_ctrl.hh b/src/mem/qos/mem_ctrl.hh
index 359e2858be..2e295d0790 100644
--- a/src/mem/qos/mem_ctrl.hh
+++ b/src/mem/qos/mem_ctrl.hh
@@ -242,7 +242,7 @@ class MemCtrl : public ClockedObject
      * Record statistics on turnarounds based on
      * busStateNext and busState values
      */
-    void recordTurnaroundStats();
+    void recordTurnaroundStats(BusState busState, BusState busStateNext);
 
     /**
      * Escalates/demotes priority of all packets
diff --git a/src/mem/qos/mem_sink.cc b/src/mem/qos/mem_sink.cc
index 66b945153b..9ade691477 100644
--- a/src/mem/qos/mem_sink.cc
+++ b/src/mem/qos/mem_sink.cc
@@ -217,7 +217,7 @@ MemSinkCtrl::processNextReqEvent()
     busStateNext = selectNextBusState();
 
     // Record turnaround stats and update current state direction
-    recordTurnaroundStats();
+    recordTurnaroundStats(busState, busStateNext);
 
     // Set current bus state
     setCurrentBusState();
diff --git a/src/python/gem5/components/memory/hbm.py b/src/python/gem5/components/memory/hbm.py
index 35497c2f89..75db1f9fde 100644
--- a/src/python/gem5/components/memory/hbm.py
+++ b/src/python/gem5/components/memory/hbm.py
@@ -122,7 +122,6 @@ class HighBandwidthMemory(ChanneledMemory):
         # for interleaving across pseudo channels (at 64B currently)
         mask_list.insert(0, 1 << 6)
         for i, ctrl in enumerate(self.mem_ctrl):
-            ctrl.partitioned_q = False
             ctrl.dram.range = AddrRange(
                 start=self._mem_range.start,
                 size=self._mem_range.size(),

From a74695f5bcc3dd6c424e4d708732cc686a98794d Mon Sep 17 00:00:00 2001
From: "Bobby R. Bruce" <bbruce@ucdavis.edu>
Date: Tue, 30 May 2023 10:43:52 -0700
Subject: [PATCH 468/492] stdlib: Fix incorrect path and checks for DRAMsim3

There are three bugs fixed in this patch:

1. The `dram_3_dir` was missing the "dramsim3" directory.
2. Missing `not` when checking if configs is a directory.
3. Missing `not` when checking if input file is a file.

Change-Id: I185f4832c1c2f1ecc4e138c148ad7969ef9b6fd4
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/71058
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/python/gem5/components/memory/dramsim_3.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/python/gem5/components/memory/dramsim_3.py b/src/python/gem5/components/memory/dramsim_3.py
index e5c1877fb5..f154ba354f 100644
--- a/src/python/gem5/components/memory/dramsim_3.py
+++ b/src/python/gem5/components/memory/dramsim_3.py
@@ -32,6 +32,7 @@ def config_ds3(mem_type: str, num_chnls: int) -> Tuple[str, str]:
         os.pardir,
         os.pardir,
         "ext",
+        "dramsim3",
         "DRAMsim3",
     )
 
@@ -47,11 +48,11 @@ def config_ds3(mem_type: str, num_chnls: int) -> Tuple[str, str]:
             "Please navigate to `ext` and run:\n"
             "git clone git@github.com:umd-memsys/DRAMsim3.git"
         )
-    elif os.path.isdir(dramsim_3_mem_configs):
+    elif not os.path.isdir(dramsim_3_mem_configs):
         raise Exception(
             "The `ext/DRAMsim3/configs` directory cannot be found."
         )
-    elif os.path.isfile(input_file):
+    elif not os.path.isfile(input_file):
         raise Exception(
             "The configuration file '" + input_file + "' cannot  be found."
         )

From cf2d5b68a91aafb46efd7e59b789b35681dd6ff3 Mon Sep 17 00:00:00 2001
From: Matthew Poremba <matthew.poremba@amd.com>
Date: Tue, 30 May 2023 13:57:09 -0500
Subject: [PATCH 469/492] gpu-compute: Gfx version check for FS and SE mode

There is no GPU device in SE mode to get version from and no GPU driver
in FS mode to get version from, so a conditional needs to be added
depending on the mode to get the gfx version.

Change-Id: I33fdafb60d351ebc5148e2248244537fb5bebd31
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/71078
Tested-by: kokoro <noreply+kokoro@google.com>
Maintainer: Matt Sinclair <mattdsinclair@gmail.com>
Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com>
(cherry picked from commit ebd5b3e4ae49b24dec4efc299d0b0198be9f4df3)
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/71158
Reviewed-by: Matthew Poremba <matthew.poremba@amd.com>
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
---
 src/gpu-compute/gpu_command_processor.cc | 4 +++-
 src/gpu-compute/gpu_compute_driver.hh    | 2 ++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/gpu-compute/gpu_command_processor.cc b/src/gpu-compute/gpu_command_processor.cc
index 9755180204..8f748bdc31 100644
--- a/src/gpu-compute/gpu_command_processor.cc
+++ b/src/gpu-compute/gpu_command_processor.cc
@@ -227,9 +227,11 @@ GPUCommandProcessor::submitDispatchPkt(void *raw_pkt, uint32_t queue_id,
 
     DPRINTF(GPUKernelInfo, "Kernel name: %s\n", kernel_name.c_str());
 
+    GfxVersion gfxVersion = FullSystem ? gpuDevice->getGfxVersion()
+                          : driver()->getGfxVersion();
     HSAQueueEntry *task = new HSAQueueEntry(kernel_name, queue_id,
         dynamic_task_id, raw_pkt, &akc, host_pkt_addr, machine_code_addr,
-        gpuDevice->getGfxVersion());
+        gfxVersion);
 
     DPRINTF(GPUCommandProc, "Task ID: %i Got AQL: wg size (%dx%dx%d), "
         "grid size (%dx%dx%d) kernarg addr: %#x, completion "
diff --git a/src/gpu-compute/gpu_compute_driver.hh b/src/gpu-compute/gpu_compute_driver.hh
index def40f4557..9a3c6479c3 100644
--- a/src/gpu-compute/gpu_compute_driver.hh
+++ b/src/gpu-compute/gpu_compute_driver.hh
@@ -142,6 +142,8 @@ class GPUComputeDriver final : public EmulatedDriver
     };
     typedef class EventTableEntry ETEntry;
 
+    GfxVersion getGfxVersion() const { return gfxVersion; }
+
   private:
     /**
      * GPU that is controlled by this driver.

From 70ec55ce2a4ee221fbb15bc2fafd38cee6ff8c65 Mon Sep 17 00:00:00 2001
From: Kunal Pai <62979320+kunpai@users.noreply.github.com>
Date: Sun, 21 May 2023 23:35:40 -0700
Subject: [PATCH 470/492] stdlib, tests, configs: Introduce gem5 Vision to
 resources

This patch makes changes to the stdlib based on the gem5 Vision project.
Firstly, a MongoDB database is supported.
A JSON database's support is continued.
The JSON can either be a local path or a raw GitHub link.

The data for these databases is stored in src/python
under "gem5-config.json".
This will be used by default.
However, the configuration can be overridden:
- by providing a path using the GEM5_CONFIG env variable.
- by placing a gem5-config.json file in the current working directory.

An AbstractClient is an abstract class that implements
searching and sorting relevant to the databases.

Clients is an optional list that can be passed
while defining any Resource class and obtain_resource.
These databases can be defined in the config JSON.

Resources now have versions. This allows for a
single version, e.g., 'x86-ubuntu-boot', to have
multiple versions. As such, the key of a resource is
its ID and Version (e.g., 'x86-ubuntu-boot/v2.1.0').
Different versions of a resource might be compatible
with different versions of gem5.

By default, it picks the latest version compatible with the gem5 Version
of the user.

A gem5 resource schema now has additional fields.
These are:
- source_url: Stores URL of GitHub Source of the resource.
- license: License information of the resource.
- tags: Words to identify a resource better, like hello for hello-world
- example_usage: How to use the resource in a simulation.
- gem5_versions: List of gem5 versions that resource is compatible with.
- resource_version: The version of the resource itself.
- size: The download size of the resource, if it exists.
- code_examples: List of objects.
These objects contain the path to where a resource is
used in gem5 example config scripts,
and if the resource itself is used in tests or not.
- category: Category of the resource, as defined by classes in
src/python/gem5/resources/resource.py.

Some fields have been renamed:
- "name" is changed to "id"
- "documentation" is changed to "description"

Besides these, the schema also supports resource specialization.
It adds fields relevant to a specific resource as specified in
src/python/gem5/resources/resource.py
These changes have been made to better present
information on the new gem5 Resources website.

But, they do not affect the way resources are used by a gem5 user.
This patch is also backwards compatible.
Existing code doesn't break with this new infrastructure.

Also, refs in the tests have been changed to match this new schema.
Tests have been changed to work with the two clients.

Change-Id: Ia9bf47f7900763827fd5e873bcd663cc3ecdba40
Co-authored-by: Kunal Pai <kunpai@ucdavis.edu>
Co-authored-by: Parth Shah <helloparthshah@gmail.com>
Co-authored-by: Harshil Patel <harshilp2107@gmail.com>
Co-authored-by: aarsli <arsli@ucdavis.edu>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/71278
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
---
 src/python/SConscript                         |  12 +
 src/python/gem5/resources/client.py           |  84 +++
 .../gem5/resources/client_api/__init__.py     |   0
 .../resources/client_api/abstract_client.py   |  71 +++
 .../gem5/resources/client_api/atlasclient.py  |  91 ++++
 .../resources/client_api/client_wrapper.py    | 228 ++++++++
 .../gem5/resources/client_api/jsonclient.py   |  70 +++
 src/python/gem5/resources/downloader.py       |  23 +-
 src/python/gem5/resources/resource.py         | 155 ++++--
 src/python/gem5/resources/workload.py         |  27 +-
 src/python/gem5/utils/simpoint.py             |  12 +-
 src/python/gem5_default_config.py             |  39 ++
 tests/gem5/verifier.py                        |   1 +
 .../resources/pyunit_client_wrapper_checks.py | 511 ++++++++++++++++++
 .../resources/pyunit_downloader_checks.py     | 155 ++++--
 .../pyunit_obtain_resources_check.py          | 192 +++++++
 .../pyunit_resource_download_checks.py        |  72 +++
 .../pyunit_resource_specialization.py         |  68 ++-
 .../resources/pyunit_workload_checks.py       |  42 +-
 .../stdlib/resources/refs/mongo_dup_mock.json |  30 +
 .../stdlib/resources/refs/mongo_mock.json     |  56 ++
 .../resources/refs/obtain-resource.json       |  59 ++
 .../refs/resource-specialization.json         | 311 ++++++-----
 .../stdlib/resources/refs/resources.json      | 329 +++++++++++
 .../refs/workload-checks-custom-workload.json |  33 +-
 .../resources/refs/workload-checks.json       |  84 +--
 26 files changed, 2413 insertions(+), 342 deletions(-)
 create mode 100644 src/python/gem5/resources/client.py
 create mode 100644 src/python/gem5/resources/client_api/__init__.py
 create mode 100644 src/python/gem5/resources/client_api/abstract_client.py
 create mode 100644 src/python/gem5/resources/client_api/atlasclient.py
 create mode 100644 src/python/gem5/resources/client_api/client_wrapper.py
 create mode 100644 src/python/gem5/resources/client_api/jsonclient.py
 create mode 100644 src/python/gem5_default_config.py
 create mode 100644 tests/pyunit/stdlib/resources/pyunit_client_wrapper_checks.py
 create mode 100644 tests/pyunit/stdlib/resources/pyunit_obtain_resources_check.py
 create mode 100644 tests/pyunit/stdlib/resources/pyunit_resource_download_checks.py
 create mode 100644 tests/pyunit/stdlib/resources/refs/mongo_dup_mock.json
 create mode 100644 tests/pyunit/stdlib/resources/refs/mongo_mock.json
 create mode 100644 tests/pyunit/stdlib/resources/refs/obtain-resource.json
 create mode 100644 tests/pyunit/stdlib/resources/refs/resources.json

diff --git a/src/python/SConscript b/src/python/SConscript
index e261bfa7f2..f98b5700c7 100644
--- a/src/python/SConscript
+++ b/src/python/SConscript
@@ -264,12 +264,24 @@ PySource('gem5.prebuilt.riscvmatched',
 PySource('gem5.prebuilt.riscvmatched',
     'gem5/prebuilt/riscvmatched/riscvmatched_core.py')
 PySource('gem5.resources', 'gem5/resources/__init__.py')
+PySource('gem5.resources', 'gem5/resources/client.py')
 PySource('gem5.resources', 'gem5/resources/downloader.py')
 PySource('gem5.resources', 'gem5/resources/md5_utils.py')
 PySource('gem5.resources', 'gem5/resources/resource.py')
 PySource('gem5.resources', 'gem5/resources/workload.py')
 PySource('gem5.resources', 'gem5/resources/looppoint.py')
 PySource('gem5.resources', 'gem5/resources/elfie.py')
+PySource('gem5.resources.client_api',
+         'gem5/resources/client_api/__init__.py')
+PySource('gem5.resources.client_api',
+         'gem5/resources/client_api/jsonclient.py')
+PySource('gem5.resources.client_api',
+         'gem5/resources/client_api/atlasclient.py')
+PySource('gem5.resources.client_api',
+         'gem5/resources/client_api/client_wrapper.py')
+PySource('gem5.resources.client_api',
+         'gem5/resources/client_api/abstract_client.py')
+PySource('gem5', 'gem5_default_config.py')
 PySource('gem5.utils', 'gem5/utils/__init__.py')
 PySource('gem5.utils', 'gem5/utils/filelock.py')
 PySource('gem5.utils', 'gem5/utils/override.py')
diff --git a/src/python/gem5/resources/client.py b/src/python/gem5/resources/client.py
new file mode 100644
index 0000000000..bd473eb038
--- /dev/null
+++ b/src/python/gem5/resources/client.py
@@ -0,0 +1,84 @@
+# Copyright (c) 2023 The Regents of the University of California
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import json
+from pathlib import Path
+import os
+from typing import Optional, Dict, List
+from .client_api.client_wrapper import ClientWrapper
+from gem5.gem5_default_config import config
+from m5.util import inform
+
+
+def getFileContent(file_path: Path) -> Dict:
+    """
+    Get the content of the file at the given path
+    :param file_path: The path of the file
+    :return: The content of the file
+    """
+    if file_path.exists():
+        with open(file_path, "r") as file:
+            return json.load(file)
+    else:
+        raise Exception(f"File not found at {file_path}")
+
+
+clientwrapper = None
+
+
+def get_resource_json_obj(
+    resource_id,
+    resource_version: Optional[str] = None,
+    clients: Optional[List[str]] = None,
+) -> Dict:
+    """
+    Get the resource json object from the clients wrapper
+    :param resource_id: The resource id
+    :param resource_version: The resource version
+    :param clients: The list of clients to query
+    """
+    global clientwrapper
+    if clientwrapper is None:
+        # First check if the config file path is provided in the environment variable
+        if "GEM5_CONFIG" in os.environ:
+            config_file_path = Path(os.environ["GEM5_CONFIG"])
+            gem5_config = getFileContent(config_file_path)
+            inform("Using config file specified by $GEM5_CONFIG")
+            inform(f"Using config file at {os.environ['GEM5_CONFIG']}")
+        # If not, check if the config file is present in the current directory
+        elif (Path().cwd().resolve() / "gem5-config.json").exists():
+            config_file_path = Path().resolve() / "gem5-config.json"
+            gem5_config = getFileContent(config_file_path)
+            inform(f"Using config file at {config_file_path}")
+        # If not, use the default config in the build directory
+        else:
+            gem5_config = config
+            inform("Using default config")
+        clientwrapper = ClientWrapper(gem5_config)
+
+    return clientwrapper.get_resource_json_obj_from_client(
+        resource_id, resource_version, clients
+    )
diff --git a/src/python/gem5/resources/client_api/__init__.py b/src/python/gem5/resources/client_api/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/src/python/gem5/resources/client_api/abstract_client.py b/src/python/gem5/resources/client_api/abstract_client.py
new file mode 100644
index 0000000000..74a513fc56
--- /dev/null
+++ b/src/python/gem5/resources/client_api/abstract_client.py
@@ -0,0 +1,71 @@
+# Copyright (c) 2023 The Regents of the University of California
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from abc import ABC, abstractmethod
+from typing import Any, Dict, List
+import urllib.parse
+
+
+class AbstractClient(ABC):
+    def verify_status_code(self, status_code: int) -> None:
+        """
+        Verifies that the status code is 200.
+        :param status_code: The status code to verify.
+        """
+        if status_code == 200:
+            return
+        if status_code == 429:
+            raise Exception("Panic: Too many requests")
+        if status_code == 401:
+            raise Exception("Panic: Unauthorized")
+        if status_code == 404:
+            raise Exception("Panic: Not found")
+        if status_code == 400:
+            raise Exception("Panic: Bad request")
+        if status_code == 500:
+            raise Exception("Panic: Internal server error")
+
+        raise Exception(f"Panic: Unknown status code {status_code}")
+
+    def _url_validator(self, url: str) -> bool:
+        """
+        Validates the provided URL.
+        :param url: The URL to be validated.
+        :return: True if the URL is valid, False otherwise.
+        """
+        try:
+            result = urllib.parse.urlparse(url)
+            return all([result.scheme, result.netloc, result.path])
+        except:
+            return False
+
+    @abstractmethod
+    def get_resources_by_id(self, resource_id: str) -> List[Dict[str, Any]]:
+        """
+        :param resource_id: The ID of the Resource.
+        :return: A list of all the Resources with the given ID.
+        """
+        raise NotImplementedError
diff --git a/src/python/gem5/resources/client_api/atlasclient.py b/src/python/gem5/resources/client_api/atlasclient.py
new file mode 100644
index 0000000000..4a6e5cf691
--- /dev/null
+++ b/src/python/gem5/resources/client_api/atlasclient.py
@@ -0,0 +1,91 @@
+# Copyright (c) 2023 The Regents of the University of California
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from urllib import request, parse
+from urllib.error import HTTPError, URLError
+from typing import Optional, Dict, Union, Type, Tuple, List, Any
+import json
+from .abstract_client import AbstractClient
+
+
+class AtlasClient(AbstractClient):
+    def __init__(self, config: Dict[str, str]):
+        """
+        Initializes a connection to a MongoDB Atlas database.
+        :param uri: The URI for connecting to the MongoDB server.
+        :param db: The name of the database to connect to.
+        :param collection: The name of the collection within the database.
+        """
+        self.apiKey = config["apiKey"]
+        self.url = config["url"]
+        self.collection = config["collection"]
+        self.database = config["database"]
+        self.dataSource = config["dataSource"]
+        self.authUrl = config["authUrl"]
+
+    def get_token(self):
+        data = {"key": self.apiKey}
+        data = json.dumps(data).encode("utf-8")
+
+        req = request.Request(
+            self.authUrl,
+            data=data,
+            headers={"Content-Type": "application/json"},
+        )
+        try:
+            response = request.urlopen(req)
+        except HTTPError as e:
+            self.verify_status_code(e.status)
+            return None
+        result = json.loads(response.read().decode("utf-8"))
+        token = result["access_token"]
+        return token
+
+    def get_resources_by_id(self, resource_id: str) -> List[Dict[str, Any]]:
+        url = f"{self.url}/action/find"
+        data = {
+            "dataSource": self.dataSource,
+            "collection": self.collection,
+            "database": self.database,
+            "filter": {"id": resource_id},
+        }
+        data = json.dumps(data).encode("utf-8")
+
+        headers = {
+            "Authorization": f"Bearer {self.get_token()}",
+            "Content-Type": "application/json",
+        }
+
+        req = request.Request(url, data=data, headers=headers)
+        try:
+            response = request.urlopen(req)
+        except HTTPError as e:
+            self.verify_status_code(e.status)
+            return None
+        result = json.loads(response.read().decode("utf-8"))
+        resources = result["documents"]
+
+        return resources
diff --git a/src/python/gem5/resources/client_api/client_wrapper.py b/src/python/gem5/resources/client_api/client_wrapper.py
new file mode 100644
index 0000000000..74ee831c1f
--- /dev/null
+++ b/src/python/gem5/resources/client_api/client_wrapper.py
@@ -0,0 +1,228 @@
+# Copyright (c) 2023 The Regents of the University of California
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from .jsonclient import JSONClient
+from .atlasclient import AtlasClient
+from _m5 import core
+from typing import Optional, Dict, List
+from distutils.version import StrictVersion
+import itertools
+from m5.util import warn
+
+
+class ClientWrapper:
+    def __init__(self, config):
+        self.clients = self.create_clients(config)
+
+    def create_clients(
+        self,
+        config: Dict,
+    ) -> Dict:
+        """
+        This function creates respective client object for each source in the
+        config file according to the type of source.
+        Params: config: config file containing the source information
+        Returns: clients: dictionary of clients for each source
+        """
+        clients = {}
+        for client in config["sources"]:
+            client_source = config["sources"][client]
+            try:
+                if client_source["isMongo"]:
+                    clients[client] = AtlasClient(client_source)
+                else:
+                    clients[client] = JSONClient(client_source["url"])
+            except Exception as e:
+                warn(f"Error creating client {client}: {str(e)}")
+        return clients
+
+    def get_all_resources_by_id(
+        self,
+        resource_id: str,
+        clients: Optional[List[str]] = None,
+    ) -> List[Dict]:
+        """
+        This function returns all the resources with the given id from all the
+        sources.
+        :param resource_id: The id of the resource to search for.
+        :param clients: A list of clients to search through. If None, all
+        clients are searched.
+        :return: A list of resources as Python dictionaries.
+        """
+        resources = []
+        if not clients:
+            clients = list(self.clients.keys())
+        for client in clients:
+            if client not in self.clients:
+                raise Exception(f"Client: {client} does not exist")
+            try:
+                resources.extend(
+                    self.clients[client].get_resources_by_id(resource_id)
+                )
+            except Exception as e:
+                warn(f"Error getting resources from client {client}: {str(e)}")
+        # check if no 2 resources have the same id and version
+        for res1, res2 in itertools.combinations(resources, 2):
+            if res1["resource_version"] == res2["resource_version"]:
+                raise Exception(
+                    f"Resource {resource_id} has multiple resources with "
+                    f"the same version: {res1['resource_version']}"
+                )
+        return resources
+
+    def get_resource_json_obj_from_client(
+        self,
+        resource_id: str,
+        resource_version: Optional[str] = None,
+        clients: Optional[List[str]] = None,
+    ) -> Dict:
+        """
+        This function returns the resource object from the client with the
+        given id and version.
+        :param resource_id: The id of the resource to search for.
+        :param resource_version: The version of the resource to search for.
+        :param clients: A list of clients to search through. If None, all
+        clients are searched.
+        :return: The resource object as a Python dictionary if found.
+        If not found, exception is thrown.
+        """
+        # getting all the resources with the given id from the dictionary
+        resources = self.get_all_resources_by_id(resource_id, clients)
+        # if no resource with the given id is found, return None
+        if len(resources) == 0:
+            raise Exception(f"Resource with ID '{resource_id}' not found.")
+
+        resource_to_return = None
+
+        if resource_version:
+            resource_to_return = self._search_version_in_resources(
+                resources, resource_id, resource_version
+            )
+
+        else:
+            compatible_resources = (
+                self._get_resources_compatible_with_gem5_version(resources)
+            )
+            if len(compatible_resources) == 0:
+                resource_to_return = self._sort_resources(resources)[0]
+            else:
+                resource_to_return = self._sort_resources(
+                    compatible_resources
+                )[0]
+
+        self._check_resource_version_compatibility(resource_to_return)
+
+        return resource_to_return
+
+    def _search_version_in_resources(
+        self, resources: List, resource_id: str, resource_version: str
+    ) -> Dict:
+        """
+        Searches for the resource with the given version. If the resource is
+        not found, an exception is thrown.
+        :param resources: A list of resources to search through.
+        :param resource_version: The version of the resource to search for.
+        :return: The resource object as a Python dictionary if found.
+        If not found, None is returned.
+        """
+        return_resource = next(
+            iter(
+                [
+                    resource
+                    for resource in resources
+                    if resource["resource_version"] == resource_version
+                ]
+            ),
+            None,
+        )
+        if not return_resource:
+            raise Exception(
+                f"Resource {resource_id} with version '{resource_version}'"
+                " not found.\nResource versions can be found at: "
+                "https://resources.gem5.org/"
+                f"resources/{resource_id}/versions"
+            )
+        return return_resource
+
+    def _get_resources_compatible_with_gem5_version(
+        self, resources: List, gem5_version: str = core.gem5Version
+    ) -> List:
+        """
+        Returns a list of compatible resources with the current gem5 version.
+        :param resources: A list of resources to filter.
+        :return: A list of compatible resources as Python dictionaries.
+        If no compatible resources are found, the original list of resources
+        is returned.
+        """
+        compatible_resources = [
+            resource
+            for resource in resources
+            if gem5_version in resource["gem5_versions"]
+        ]
+        return compatible_resources
+
+    def _sort_resources(self, resources: List) -> List:
+        """
+        Sorts the resources by ID.
+        If the IDs are the same, the resources are sorted by version.
+        :param resources: A list of resources to sort.
+        :return: A list of sorted resources.
+        """
+        return sorted(
+            resources,
+            key=lambda resource: (
+                resource["id"].lower(),
+                StrictVersion(resource["resource_version"]),
+            ),
+            reverse=True,
+        )
+
+    def _check_resource_version_compatibility(
+        self, resource: dict, gem5_version: Optional[str] = core.gem5Version
+    ) -> bool:
+        """
+        Checks if the resource is compatible with the gem5 version.
+        Prints a warning if the resource is not compatible.
+        :param resource: The resource to check.
+        :optional param gem5_version: The gem5 version to check
+        compatibility with.
+        :return: True if the resource is compatible, False otherwise.
+        """
+        if not resource:
+            return False
+        if gem5_version not in resource["gem5_versions"]:
+            warn(
+                f"Resource {resource['id']} with version "
+                f"{resource['resource_version']} is not known to be compatible"
+                f" with gem5 version {gem5_version}. "
+                "This may cause problems with your simulation. "
+                "This resource's compatibility "
+                "with different gem5 versions can be found here: "
+                "https://resources.gem5.org"
+                f"/resources/{resource['id']}/versions"
+            )
+            return False
+        return True
diff --git a/src/python/gem5/resources/client_api/jsonclient.py b/src/python/gem5/resources/client_api/jsonclient.py
new file mode 100644
index 0000000000..225126e2a8
--- /dev/null
+++ b/src/python/gem5/resources/client_api/jsonclient.py
@@ -0,0 +1,70 @@
+# Copyright (c) 2023 The Regents of the University of California
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import json
+from pathlib import Path
+from urllib import request
+from typing import Optional, Dict, Union, Type, Tuple, List, Any
+from .abstract_client import AbstractClient
+from urllib.error import URLError
+from m5.util import warn
+
+
+class JSONClient(AbstractClient):
+    def __init__(self, path: str):
+        """
+        Initializes a JSON client.
+        :param path: The path to the Resource, either URL or local.
+        """
+        self.path = path
+        self.resources = []
+
+        if Path(self.path).is_file():
+            self.resources = json.load(open(self.path))
+        elif not self._url_validator(self.path):
+            raise Exception(
+                f"Resources location '{self.path}' is not a valid path or URL."
+            )
+        else:
+            req = request.Request(self.path)
+            try:
+                response = request.urlopen(req)
+            except URLError as e:
+                raise Exception(
+                    f"Unable to open Resources location '{self.path}': {e}"
+                )
+            self.resources = json.loads(response.read().decode("utf-8"))
+
+    def get_resources_by_id(self, resource_id: str) -> List[Dict[str, Any]]:
+        """
+        :param resource_id: The ID of the Resource.
+        :return: A list of all the Resources with the given ID.
+        """
+        return [
+            resource
+            for resource in self.resources
+            if resource["id"] == resource_id
+        ]
diff --git a/src/python/gem5/resources/downloader.py b/src/python/gem5/resources/downloader.py
index 16b0147b74..0781d9b15a 100644
--- a/src/python/gem5/resources/downloader.py
+++ b/src/python/gem5/resources/downloader.py
@@ -41,6 +41,7 @@ from tempfile import gettempdir
 from urllib.error import HTTPError
 from typing import List, Dict, Set, Optional
 
+from .client import get_resource_json_obj
 from .md5_utils import md5_file, md5_dir
 from ..utils.progress_bar import tqdm, progress_hook
 
@@ -398,6 +399,8 @@ def get_resource(
     unzip: bool = True,
     untar: bool = True,
     download_md5_mismatch: bool = True,
+    resource_version: Optional[str] = None,
+    clients: Optional[List] = None,
 ) -> None:
     """
     Obtains a gem5 resource and stored it to a specified location. If the
@@ -419,6 +422,13 @@ def get_resource(
     will delete this local resource and re-download it if this parameter is
     True. True by default.
 
+    :param resource_version: The version of the resource to be obtained. If
+    None, the latest version of the resource compatible with the working
+    directory's gem5 version will be obtained. None by default.
+
+    :param clients: A list of clients to use when obtaining the resource. If
+    None, all clients will be used. None by default.
+
     :raises Exception: An exception is thrown if a file is already present at
     `to_path` but it does not have the correct md5 sum. An exception will also
     be thrown is a directory is present at `to_path`
@@ -430,11 +440,13 @@ def get_resource(
     # minutes.Most resources should be downloaded and decompressed in this
     # timeframe, even on the most constrained of systems.
     with FileLock(f"{to_path}.lock", timeout=900):
-
-        resource_json = get_resources_json_obj(resource_name)
+        resource_json = get_resource_json_obj(
+            resource_name,
+            resource_version=resource_version,
+            clients=clients,
+        )
 
         if os.path.exists(to_path):
-
             if os.path.isfile(to_path):
                 md5 = md5_file(Path(to_path))
             else:
@@ -495,9 +507,8 @@ def get_resource(
             )
         )
 
-        # Get the URL. The URL may contain '{url_base}' which needs replaced
-        # with the correct value.
-        url = resource_json["url"].format(url_base=_get_url_base())
+        # Get the URL.
+        url = resource_json["url"]
 
         _download(url=url, download_to=download_dest)
         print(f"Finished downloading resource '{resource_name}'.")
diff --git a/src/python/gem5/resources/resource.py b/src/python/gem5/resources/resource.py
index 9cba9cf88f..22adf15670 100644
--- a/src/python/gem5/resources/resource.py
+++ b/src/python/gem5/resources/resource.py
@@ -29,13 +29,15 @@ import os
 from pathlib import Path
 from m5.util import warn, fatal
 
-from .downloader import get_resource, get_resources_json_obj
+from .downloader import get_resource
 
 from .looppoint import LooppointCsvLoader, LooppointJsonLoader
 from ..isas import ISA, get_isa_from_str
 
 from typing import Optional, Dict, Union, Type, Tuple, List
 
+from .client import get_resource_json_obj
+
 """
 Resources are items needed to run a simulation, such as a disk image, kernel,
 or binary. The gem5 project provides pre-built resources, with sources, at
@@ -67,18 +69,20 @@ class AbstractResource:
 
     def __init__(
         self,
+        resource_version: Optional[str] = None,
         local_path: Optional[str] = None,
-        documentation: Optional[str] = None,
+        description: Optional[str] = None,
         source: Optional[str] = None,
     ):
         """
         :param local_path: The path on the host system where this resource is
         located.
-        :param documentation: Documentation describing this resource. Not a
+        :param description: Description describing this resource. Not a
         required parameter. By default is None.
         :param source: The source (as in "source code") for this resource. This
         string should navigate users to where the source for this resource
         may be found. Not a required parameter. By default is None.
+        :param resource_version: Version of the resource itself.
         """
 
         if local_path and not os.path.exists(local_path):
@@ -88,16 +92,21 @@ class AbstractResource:
             )
 
         self._local_path = local_path
-        self._documentation = documentation
+        self._description = description
         self._source = source
+        self._version = resource_version
+
+    def get_resource_version(self) -> str:
+        """Returns the version of the resource."""
+        return self._version
 
     def get_local_path(self) -> Optional[str]:
         """Returns the local path of the resource."""
         return self._local_path
 
-    def get_documentation(self) -> Optional[str]:
-        """Returns documentation associated with this resource."""
-        return self._documentation
+    def get_description(self) -> Optional[str]:
+        """Returns description associated with this resource."""
+        return self._description
 
     def get_source(self) -> Optional[str]:
         """Returns information as to where the source for this resource may be
@@ -112,7 +121,8 @@ class FileResource(AbstractResource):
     def __init__(
         self,
         local_path: str,
-        documentation: Optional[str] = None,
+        resource_version: Optional[str] = None,
+        description: Optional[str] = None,
         source: Optional[str] = None,
         **kwargs,
     ):
@@ -123,8 +133,9 @@ class FileResource(AbstractResource):
 
         super().__init__(
             local_path=local_path,
-            documentation=documentation,
+            description=description,
             source=source,
+            resource_version=resource_version,
         )
 
 
@@ -134,11 +145,11 @@ class DirectoryResource(AbstractResource):
     def __init__(
         self,
         local_path: str,
-        documentation: Optional[str] = None,
+        resource_version: Optional[str] = None,
+        description: Optional[str] = None,
         source: Optional[str] = None,
         **kwargs,
     ):
-
         if not os.path.isdir(local_path):
             raise Exception(
                 f"DirectoryResource path specified, {local_path}, is not a "
@@ -147,8 +158,9 @@ class DirectoryResource(AbstractResource):
 
         super().__init__(
             local_path=local_path,
-            documentation=documentation,
+            description=description,
             source=source,
+            resource_version=resource_version,
         )
 
 
@@ -158,15 +170,17 @@ class DiskImageResource(FileResource):
     def __init__(
         self,
         local_path: str,
-        documentation: Optional[str] = None,
+        resource_version: Optional[str] = None,
+        description: Optional[str] = None,
         source: Optional[str] = None,
         root_partition: Optional[str] = None,
         **kwargs,
     ):
         super().__init__(
             local_path=local_path,
-            documentation=documentation,
+            description=description,
             source=source,
+            resource_version=resource_version,
         )
         self._root_partition = root_partition
 
@@ -181,15 +195,17 @@ class BinaryResource(FileResource):
     def __init__(
         self,
         local_path: str,
-        documentation: Optional[str] = None,
+        resource_version: Optional[str] = None,
+        description: Optional[str] = None,
         source: Optional[str] = None,
         architecture: Optional[Union[ISA, str]] = None,
         **kwargs,
     ):
         super().__init__(
             local_path=local_path,
-            documentation=documentation,
+            description=description,
             source=source,
+            resource_version=resource_version,
         )
 
         self._architecture = None
@@ -210,16 +226,18 @@ class BootloaderResource(BinaryResource):
     def __init__(
         self,
         local_path: str,
-        documentation: Optional[str] = None,
+        resource_version: Optional[str] = None,
+        description: Optional[str] = None,
         source: Optional[str] = None,
         architecture: Optional[Union[ISA, str]] = None,
         **kwargs,
     ):
         super().__init__(
             local_path=local_path,
-            documentation=documentation,
+            description=description,
             architecture=architecture,
             source=source,
+            resource_version=resource_version,
         )
 
 
@@ -229,14 +247,16 @@ class GitResource(DirectoryResource):
     def __init__(
         self,
         local_path: str,
-        documentation: Optional[str] = None,
+        resource_version: Optional[str] = None,
+        description: Optional[str] = None,
         source: Optional[str] = None,
         **kwargs,
     ):
         super().__init__(
             local_path=local_path,
-            documentation=documentation,
+            description=description,
             source=source,
+            resource_version=resource_version,
         )
 
 
@@ -246,16 +266,18 @@ class KernelResource(BinaryResource):
     def __init__(
         self,
         local_path: str,
-        documentation: Optional[str] = None,
+        resource_version: Optional[str] = None,
+        description: Optional[str] = None,
         source: Optional[str] = None,
         architecture: Optional[Union[ISA, str]] = None,
         **kwargs,
     ):
         super().__init__(
             local_path=local_path,
-            documentation=documentation,
+            description=description,
             source=source,
             architecture=architecture,
+            resource_version=resource_version,
         )
 
 
@@ -270,14 +292,16 @@ class CheckpointResource(DirectoryResource):
     def __init__(
         self,
         local_path: str,
-        documentation: Optional[str] = None,
+        resource_version: Optional[str] = None,
+        description: Optional[str] = None,
         source: Optional[str] = None,
         **kwargs,
     ):
         super().__init__(
             local_path=local_path,
-            documentation=documentation,
+            description=description,
             source=source,
+            resource_version=resource_version,
         )
 
 
@@ -290,12 +314,13 @@ class SimpointResource(AbstractResource):
 
     def __init__(
         self,
+        resource_version: Optional[str] = None,
         simpoint_interval: int = None,
         simpoint_list: List[int] = None,
         weight_list: List[float] = None,
         warmup_interval: int = 0,
         workload_name: Optional[str] = None,
-        documentation: Optional[str] = None,
+        description: Optional[str] = None,
         source: Optional[str] = None,
         local_path: Optional[str] = None,
         **kwargs,
@@ -314,8 +339,9 @@ class SimpointResource(AbstractResource):
 
         super().__init__(
             local_path=local_path,
-            documentation=documentation,
+            description=description,
             source=source,
+            resource_version=resource_version,
         )
 
         self._weight_list = weight_list
@@ -402,15 +428,17 @@ class LooppointCsvResource(FileResource, LooppointCsvLoader):
     def __init__(
         self,
         local_path: str,
-        documentation: Optional[str] = None,
+        resource_version: Optional[str] = None,
+        description: Optional[str] = None,
         source: Optional[str] = None,
         **kwargs,
     ):
         FileResource.__init__(
             self,
             local_path=local_path,
-            documentation=documentation,
+            description=description,
             source=source,
+            resource_version=resource_version,
         )
         LooppointCsvLoader.__init__(self, pinpoints_file=Path(local_path))
 
@@ -419,16 +447,18 @@ class LooppointJsonResource(FileResource, LooppointJsonLoader):
     def __init__(
         self,
         local_path: str,
+        resource_version: Optional[str] = None,
         region_id: Optional[Union[str, int]] = None,
-        documentation: Optional[str] = None,
+        description: Optional[str] = None,
         source: Optional[str] = None,
         **kwargs,
     ):
         FileResource.__init__(
             self,
             local_path=local_path,
-            documentation=documentation,
+            description=description,
             source=source,
+            resource_version=resource_version,
         )
         LooppointJsonLoader.__init__(
             self, looppoint_file=local_path, region_id=region_id
@@ -446,8 +476,9 @@ class SimpointDirectoryResource(SimpointResource):
         weight_file: str,
         simpoint_interval: int,
         warmup_interval: int,
+        resource_version: Optional[str] = None,
         workload_name: Optional[str] = None,
-        documentation: Optional[str] = None,
+        description: Optional[str] = None,
         source: Optional[str] = None,
         **kwargs,
     ):
@@ -478,8 +509,9 @@ class SimpointDirectoryResource(SimpointResource):
             warmup_interval=warmup_interval,
             workload_name=workload_name,
             local_path=local_path,
-            documentation=documentation,
+            description=description,
             source=source,
+            resource_version=resource_version,
         )
 
     def get_simpoint_file(self) -> Path:
@@ -522,9 +554,11 @@ class SimpointDirectoryResource(SimpointResource):
 
 
 def obtain_resource(
-    resource_name: str,
+    resource_id: str,
     resource_directory: Optional[str] = None,
     download_md5_mismatch: bool = True,
+    resource_version: Optional[str] = None,
+    clients: Optional[List] = None,
 ) -> AbstractResource:
     """
     This function primarily serves as a factory for resources. It will return
@@ -544,10 +578,16 @@ def obtain_resource(
     have the correct md5 value, the resoruce will be deleted and
     re-downloaded if this value is True. Otherwise an exception will be
     thrown. True by default.
+    :param resource_version: Version of the resource itself.
+    Not a required parameter. None by default.
+    :param clients: A list of clients to search for the resource. If this
+    parameter is not set, it will default search all clients.
     """
 
-    # Obtain the JSON resource entry for this resource
-    resource_json = get_resources_json_obj(resource_name)
+    # Obtain the resource object entry for this resource
+    resource_json = get_resource_json_obj(
+        resource_id, resource_version=resource_version, clients=clients
+    )
 
     to_path = None
     # If the "url" field is specified, the resoruce must be downloaded.
@@ -580,38 +620,37 @@ def obtain_resource(
             os.makedirs(resource_directory, exist_ok=True)
 
         # This is the path to which the resource is to be stored.
-        to_path = os.path.join(resource_directory, resource_name)
+        to_path = os.path.join(resource_directory, resource_id)
 
         # Download the resource if it does not already exist.
         get_resource(
-            resource_name=resource_name,
-            to_path=os.path.join(resource_directory, resource_name),
+            resource_name=resource_id,
+            to_path=os.path.join(resource_directory, resource_id),
             download_md5_mismatch=download_md5_mismatch,
+            resource_version=resource_version,
+            clients=clients,
         )
 
     # Obtain the type from the JSON. From this we will determine what subclass
     # of `AbstractResource` we are to create and return.
-    resources_type = resource_json["type"]
+    resources_category = resource_json["category"]
 
-    if resources_type == "resource":
+    if resources_category == "resource":
         # This is a stop-gap measure to ensure to work with older versions of
         # the "resource.json" file. These should be replaced with their
         # respective specializations ASAP and this case removed.
-        if (
-            "additional_metadata" in resource_json
-            and "root_partition" in resource_json["additional_metadata"]
-        ):
+        if "root_partition" in resource_json:
             # In this case we should return a DiskImageResource.
-            root_partition = resource_json["additional_metadata"][
-                "root_partition"
-            ]
+            root_partition = resource_json["root_partition"]
             return DiskImageResource(
-                local_path=to_path, root_partition=root_partition
+                local_path=to_path,
+                root_partition=root_partition,
+                **resource_json,
             )
         return CustomResource(local_path=to_path)
 
-    assert resources_type in _get_resource_json_type_map
-    resource_class = _get_resource_json_type_map[resources_type]
+    assert resources_category in _get_resource_json_type_map
+    resource_class = _get_resource_json_type_map[resources_category]
 
     # Once we know what AbstractResource subclass we are using, we create it.
     # The fields in the JSON object are assumed to map like-for-like to the
@@ -694,6 +733,7 @@ class CustomDiskImageResource(DiskImageResource):
     def __init__(
         self,
         local_path: str,
+        resource_version: Optional[str] = None,
         root_partition: Optional[str] = None,
         metadata: Dict = {},
     ):
@@ -702,6 +742,7 @@ class CustomDiskImageResource(DiskImageResource):
         :param root_partition: The root disk partition to use.
         :param metadata: Metadata for the resource. **Warning:** As of "
         "v22.1.1, this parameter is not used.
+        :param resource_version: Version of the resource itself.
         """
         warn(
             "The `CustomDiskImageResource` class is deprecated. Please use "
@@ -713,13 +754,19 @@ class CustomDiskImageResource(DiskImageResource):
                 "`CustomDiskImageResource` constructor. This parameter is not "
                 "used."
             )
-        super().__init__(local_path=local_path, root_partition=root_partition)
+        super().__init__(
+            local_path=local_path,
+            root_partition=root_partition,
+            resource_version=resource_version,
+        )
 
 
 def Resource(
-    resource_name: str,
+    resource_id: str,
     resource_directory: Optional[str] = None,
     download_md5_mismatch: bool = True,
+    resource_version: Optional[str] = None,
+    clients: Optional[List[str]] = None,
 ) -> AbstractResource:
     """
     This function was created to maintain backwards compability for v21.1.0
@@ -737,9 +784,11 @@ def Resource(
     )
 
     return obtain_resource(
-        resource_name=resource_name,
+        resource_id=resource_id,
         resource_directory=resource_directory,
         download_md5_mismatch=download_md5_mismatch,
+        resource_version=resource_version,
+        clients=clients,
     )
 
 
diff --git a/src/python/gem5/resources/workload.py b/src/python/gem5/resources/workload.py
index e0a19d0792..148ab3f35a 100644
--- a/src/python/gem5/resources/workload.py
+++ b/src/python/gem5/resources/workload.py
@@ -24,10 +24,10 @@
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-from .downloader import get_workload_json_obj
 from .resource import obtain_resource
+from .client import get_resource_json_obj
 
-from typing import Dict, Any, Optional
+from typing import Dict, Any, List, Optional
 
 
 class AbstractWorkload:
@@ -155,7 +155,11 @@ class Workload(AbstractWorkload):
     """
 
     def __init__(
-        self, workload_name: str, resource_directory: Optional[str] = None
+        self,
+        workload_name: str,
+        resource_directory: Optional[str] = None,
+        resource_version: Optional[str] = None,
+        clients: Optional[List] = None,
     ) -> None:
         """
         This constructor will load the workload details from the workload with
@@ -167,13 +171,13 @@ class Workload(AbstractWorkload):
 
         ```json
         {
-            "type" : "workload",
-            "name" : "x86-ubuntu-18.04-echo-hello",
-            "documentation" : "Description of workload here",
+            "category" : "workload",
+            "id" : "x86-ubuntu-18.04-echo-hello",
+            "description" : "Description of workload here",
             "function" : "set_kernel_disk_workload",
             "resources" : {
                 "kernel" : "x86-linux-kernel-5.4.49",
-                "disk_image" : "x86-ubuntu-18.04-img"
+                "disk-image" : "x86-ubuntu-18.04-img"
             },
             "additional_params" : {
                 "readfile_contents" : "m5_exit; echo 'hello'; m5_exit"
@@ -187,7 +191,7 @@ class Workload(AbstractWorkload):
         ```python
         board.set_kernel_disk_workload(
             kernel = Resource("x86-linux-kernel-5.4.49"),
-            disk_image = Resource("x86-ubuntu-18.04-img"),
+            disk-image = Resource("x86-ubuntu-18.04-img"),
             readfile_contents = "m5_exit; echo 'hello'; m5_exit",
         )
         ```
@@ -198,7 +202,12 @@ class Workload(AbstractWorkload):
         any resources should be download and accessed from. If None, a default
         location will be used. None by default.
         """
-        workload_json = get_workload_json_obj(workload_name=workload_name)
+
+        workload_json = get_resource_json_obj(
+            workload_name,
+            resource_version=resource_version,
+            clients=clients,
+        )
 
         func = workload_json["function"]
         assert isinstance(func, str)
diff --git a/src/python/gem5/utils/simpoint.py b/src/python/gem5/utils/simpoint.py
index eab92e2291..0d1af4b1cf 100644
--- a/src/python/gem5/utils/simpoint.py
+++ b/src/python/gem5/utils/simpoint.py
@@ -83,15 +83,11 @@ class SimPoint:
 
             simpoint_file_path = simpoint_directory.get_simpoint_file()
             weight_file_path = simpoint_resource.get_weight_file()
-            simpoint_interval = (
-                simpoint_resource.get_metadata()
-                .get("additional_metadata")
-                .get("simpoint_interval")
+            simpoint_interval = simpoint_resource.get_metadata().get(
+                "simpoint_interval"
             )
-            warmup_interval = (
-                simpoint_resource.get_metadata()
-                .get("additional_metadata")
-                .get("warmup_interval")
+            warmup_interval = simpoint_resource.get_metadata().get(
+                "warmup_interval"
             )
 
         self._simpoint_interval = simpoint_interval
diff --git a/src/python/gem5_default_config.py b/src/python/gem5_default_config.py
new file mode 100644
index 0000000000..980c883109
--- /dev/null
+++ b/src/python/gem5_default_config.py
@@ -0,0 +1,39 @@
+# Copyright (c) 2023 The Regents of the University of California
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+config = {
+    "sources": {
+        "gem5-resources": {
+            "dataSource": "gem5-vision",
+            "database": "gem5-vision",
+            "collection": "versions_test",
+            "url": "https://data.mongodb-api.com/app/data-ejhjf/endpoint/data/v1",
+            "authUrl": "https://realm.mongodb.com/api/client/v2.0/app/data-ejhjf/auth/providers/api-key/login",
+            "apiKey": "OIi5bAP7xxIGK782t8ZoiD2BkBGEzMdX3upChf9zdCxHSnMoiTnjI22Yw5kOSgy9",
+            "isMongo": True,
+        }
+    }
+}
diff --git a/tests/gem5/verifier.py b/tests/gem5/verifier.py
index 93d47c8d10..c725fc68b9 100644
--- a/tests/gem5/verifier.py
+++ b/tests/gem5/verifier.py
@@ -172,6 +172,7 @@ class MatchStdout(DerivedGoldStandard):
         re.compile("^Using GPU kernel code file\(s\) "),
         re.compile("^.* not found locally\. Downloading"),
         re.compile("^Finished downloading"),
+        re.compile("^info: Using default config"),
     ]
 
 
diff --git a/tests/pyunit/stdlib/resources/pyunit_client_wrapper_checks.py b/tests/pyunit/stdlib/resources/pyunit_client_wrapper_checks.py
new file mode 100644
index 0000000000..96aadf6879
--- /dev/null
+++ b/tests/pyunit/stdlib/resources/pyunit_client_wrapper_checks.py
@@ -0,0 +1,511 @@
+# Copyright (c) 2023 The Regents of the University of California
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import unittest
+from gem5.isas import ISA
+from gem5.resources.client import get_resource_json_obj
+import gem5.resources.client
+from gem5.resources.client_api.client_wrapper import ClientWrapper
+from typing import Dict
+from unittest.mock import patch
+from unittest import mock
+import json
+from urllib.error import HTTPError
+import io
+import contextlib
+from pathlib import Path
+
+mock_json_path = Path(__file__).parent / "refs/resources.json"
+mock_config_json = {
+    "sources": {
+        "baba": {
+            "url": mock_json_path,
+            "isMongo": False,
+        }
+    },
+}
+
+mock_config_mongo = {
+    "sources": {
+        "gem5-resources": {
+            "dataSource": "gem5-vision",
+            "database": "gem5-vision",
+            "collection": "versions_test",
+            "url": "https://data.mongodb-api.com/app/data-ejhjf/endpoint/data/v1",
+            "authUrl": "https://realm.mongodb.com/api/client/v2.0/app/data-ejhjf/auth/providers/api-key/login",
+            "apiKey": "OIi5bAP7xxIGK782t8ZoiD2BkBGEzMdX3upChf9zdCxHSnMoiTnjI22Yw5kOSgy9",
+            "isMongo": True,
+        }
+    },
+}
+
+mock_config_combined = {
+    "sources": {
+        "gem5-resources": {
+            "dataSource": "gem5-vision",
+            "database": "gem5-vision",
+            "collection": "versions_test",
+            "url": "https://data.mongodb-api.com/app/data-ejhjf/endpoint/data/v1",
+            "authUrl": "https://realm.mongodb.com/api/client/v2.0/app/data-ejhjf/auth/providers/api-key/login",
+            "apiKey": "OIi5bAP7xxIGK782t8ZoiD2BkBGEzMdX3upChf9zdCxHSnMoiTnjI22Yw5kOSgy9",
+            "isMongo": True,
+        },
+        "baba": {
+            "url": mock_json_path,
+            "isMongo": False,
+        },
+    },
+}
+
+mock_json = {}
+
+with open(Path(__file__).parent / "refs/mongo_mock.json", "r") as f:
+    mock_json = json.load(f)
+
+duplicate_mock_json = {}
+
+with open(Path(__file__).parent / "refs/mongo_dup_mock.json", "r") as f:
+    duplicate_mock_json = json.load(f)
+
+
+def mocked_requests_post(*args):
+    # mokcing urllib.request.urlopen
+    class MockResponse:
+        def __init__(self, json_data, status_code):
+            self.json_data = json_data
+            self.status = status_code
+
+        def read(self):
+            return json.dumps(self.json_data).encode("utf-8")
+
+    data = json.loads(args[0].data)
+    if "/api-key/login" in args[0].full_url:
+        return MockResponse({"access_token": "test-token"}, 200)
+    if "/endpoint/data/v1/action/find" in args[0].full_url:
+        if data:
+            if data["filter"]["id"] == "x86-ubuntu-18.04-img":
+                return MockResponse(
+                    {
+                        "documents": mock_json,
+                    },
+                    200,
+                )
+            if data["filter"]["id"] == "test-duplicate":
+                return MockResponse(
+                    {
+                        "documents": duplicate_mock_json,
+                    },
+                    200,
+                )
+            if data["filter"]["id"] == "test-too-many":
+                error_file = io.BytesIO()
+                error_file.status = 429
+                raise HTTPError(
+                    args[0].full_url, 429, "Too Many Requests", {}, error_file
+                )
+        return MockResponse(
+            {
+                "documents": [],
+            },
+            200,
+        )
+    error_file = io.BytesIO()
+    error_file.status = 404
+    raise HTTPError(args[0].full_url, 404, "Not Found", {}, error_file)
+
+
+class ClientWrapperTestSuite(unittest.TestCase):
+    @patch(
+        "gem5.resources.client.clientwrapper",
+        ClientWrapper(mock_config_json),
+    )
+    def test_get_resource_json_obj(self):
+        # Test that the resource object is correctly returned
+        resource = "this-is-a-test-resource"
+        resource = get_resource_json_obj(resource)
+        self.assertEqual(resource["id"], "this-is-a-test-resource")
+        self.assertEqual(resource["resource_version"], "2.0.0")
+        self.assertEqual(resource["category"], "binary")
+        self.assertEqual(
+            resource["description"], "This is a test resource but double newer"
+        )
+        self.assertEqual(
+            resource["source_url"],
+            "https://github.com/gem5/gem5-resources/tree/develop/src/asmtest",
+        )
+        self.assertEqual(resource["architecture"], "X86")
+
+    @patch(
+        "gem5.resources.client.clientwrapper",
+        ClientWrapper(mock_config_json),
+    )
+    def test_get_resource_json_obj_invalid_client(self):
+        # Test that an exception is raised when an invalid client is passed
+        resource_id = "test-id"
+        client = "invalid"
+        with self.assertRaises(Exception) as context:
+            get_resource_json_obj(resource_id, clients=[client])
+        self.assertTrue(
+            f"Client: {client} does not exist" in str(context.exception)
+        )
+
+    @patch(
+        "gem5.resources.client.clientwrapper",
+        ClientWrapper(mock_config_json),
+    )
+    def test_get_resource_json_obj_with_version(self):
+        # Test that the resource object is correctly returned
+        resource_id = "this-is-a-test-resource"
+        resource_version = "1.0.0"
+        resource = get_resource_json_obj(
+            resource_id, resource_version=resource_version
+        )
+        self.assertEqual(resource["id"], "this-is-a-test-resource")
+        self.assertEqual(resource["resource_version"], "1.0.0")
+        self.assertEqual(resource["category"], "binary")
+        self.assertEqual(resource["description"], "This is a test resource")
+        self.assertEqual(
+            resource["source_url"],
+            "https://github.com/gem5/gem5-resources/tree/develop/src/asmtest",
+        )
+        self.assertEqual(resource["architecture"], "X86")
+
+    @patch(
+        "gem5.resources.client.clientwrapper",
+        ClientWrapper(mock_config_mongo),
+    )
+    @patch("urllib.request.urlopen", side_effect=mocked_requests_post)
+    def test_get_resource_json_obj_1(self, mock_get):
+        resource = "x86-ubuntu-18.04-img"
+        resource = get_resource_json_obj(resource)
+        self.assertEqual(resource["id"], "x86-ubuntu-18.04-img")
+        self.assertEqual(resource["resource_version"], "1.1.0")
+        self.assertEqual(resource["category"], "disk-image")
+        self.assertEqual(
+            resource["description"],
+            "A disk image containing Ubuntu 18.04 for x86. This image will run an `m5 readfile` instruction after booting. If no script file is specified an `m5 exit` instruction will be executed.",
+        )
+        self.assertEqual(
+            resource["source_url"],
+            "https://github.com/gem5/gem5-resources/tree/develop/src/x86-ubuntu",
+        )
+        self.assertEqual(resource["architecture"], "X86")
+
+    @patch(
+        "gem5.resources.client.clientwrapper",
+        ClientWrapper(mock_config_mongo),
+    )
+    @patch("urllib.request.urlopen", side_effect=mocked_requests_post)
+    def test_get_resource_json_obj_with_version_mongodb(self, mock_get):
+        # Test that the resource object is correctly returned
+        resource_id = "x86-ubuntu-18.04-img"
+        resource_version = "1.0.0"
+        resource = get_resource_json_obj(
+            resource_id,
+            resource_version=resource_version,
+            clients=["gem5-resources"],
+        )
+        self.assertEqual(resource["id"], "x86-ubuntu-18.04-img")
+        self.assertEqual(resource["resource_version"], "1.0.0")
+        self.assertEqual(resource["category"], "disk-image")
+        self.assertEqual(resource["description"], "This is a test resource")
+        self.assertEqual(
+            resource["source_url"],
+            "https://github.com/gem5/gem5-resources/tree/develop/src/x86-ubuntu",
+        )
+        self.assertEqual(resource["architecture"], "X86")
+
+    @patch(
+        "gem5.resources.client.clientwrapper",
+        ClientWrapper(mock_config_mongo),
+    )
+    @patch("urllib.request.urlopen", side_effect=mocked_requests_post)
+    def test_get_resource_json_obj_with_id_invalid_mongodb(self, mock_get):
+        resource_id = "invalid-id"
+        with self.assertRaises(Exception) as context:
+            get_resource_json_obj(resource_id, clients=["gem5-resources"])
+        self.assertTrue(
+            "Resource with ID 'invalid-id' not found."
+            in str(context.exception)
+        )
+
+    @patch(
+        "gem5.resources.client.clientwrapper",
+        ClientWrapper(mock_config_mongo),
+    )
+    @patch("urllib.request.urlopen", side_effect=mocked_requests_post)
+    def test_get_resource_json_obj_with_version_invalid_mongodb(
+        self, mock_get
+    ):
+        resource_id = "x86-ubuntu-18.04-img"
+        resource_version = "2.5.0"
+        with self.assertRaises(Exception) as context:
+            get_resource_json_obj(
+                resource_id,
+                resource_version=resource_version,
+                clients=["gem5-resources"],
+            )
+        self.assertTrue(
+            f"Resource x86-ubuntu-18.04-img with version '2.5.0'"
+            " not found.\nResource versions can be found at: "
+            f"https://resources.gem5.org/resources/x86-ubuntu-18.04-img/versions"
+            in str(context.exception)
+        )
+
+    @patch(
+        "gem5.resources.client.clientwrapper",
+        ClientWrapper(mock_config_json),
+    )
+    def test_get_resource_json_obj_with_version_invalid_json(self):
+        resource_id = "this-is-a-test-resource"
+        resource_version = "2.5.0"
+        with self.assertRaises(Exception) as context:
+            get_resource_json_obj(
+                resource_id,
+                resource_version=resource_version,
+            )
+        self.assertTrue(
+            f"Resource this-is-a-test-resource with version '2.5.0'"
+            " not found.\nResource versions can be found at: "
+            f"https://resources.gem5.org/resources/this-is-a-test-resource/versions"
+            in str(context.exception)
+        )
+
+    @patch(
+        "gem5.resources.client.clientwrapper",
+        ClientWrapper(mock_config_combined),
+    )
+    @patch("urllib.request.urlopen", side_effect=mocked_requests_post)
+    def test_get_resource_json_obj_combine(self, mock_get):
+        resource_id_mongo = "x86-ubuntu-18.04-img"
+        resource_version_mongo = "1.0.0"
+        resource_id_json = "this-is-a-test-resource"
+        resource_version_json = "1.0.0"
+        resource_mongo = get_resource_json_obj(
+            resource_id_mongo,
+            resource_version=resource_version_mongo,
+            clients=["gem5-resources"],
+        )
+        resource_json = get_resource_json_obj(
+            resource_id_json,
+            resource_version=resource_version_json,
+            clients=["baba"],
+        )
+        self.assertEqual(resource_mongo["id"], "x86-ubuntu-18.04-img")
+        self.assertEqual(resource_mongo["resource_version"], "1.0.0")
+        self.assertEqual(resource_mongo["category"], "disk-image")
+        self.assertEqual(
+            resource_mongo["description"], "This is a test resource"
+        )
+        self.assertEqual(
+            resource_mongo["source_url"],
+            "https://github.com/gem5/gem5-resources/tree/develop/src/x86-ubuntu",
+        )
+        self.assertEqual(resource_mongo["architecture"], "X86")
+
+        self.assertEqual(resource_json["id"], "this-is-a-test-resource")
+        self.assertEqual(resource_json["resource_version"], "1.0.0")
+        self.assertEqual(resource_json["category"], "binary")
+        self.assertEqual(
+            resource_json["description"], "This is a test resource"
+        )
+        self.assertEqual(
+            resource_json["source_url"],
+            "https://github.com/gem5/gem5-resources/tree/develop/src/asmtest",
+        )
+        self.assertEqual(resource_json["architecture"], "X86")
+
+    @patch(
+        "gem5.resources.client.clientwrapper",
+        ClientWrapper(mock_config_combined),
+    )
+    @patch("urllib.request.urlopen", side_effect=mocked_requests_post)
+    def test_get_resource_json_obj_multi_database_second_only(self, mock_get):
+        resource_id = "simpoint-resource"
+        resource = get_resource_json_obj(
+            resource_id,
+        )
+        self.assertEqual(resource["id"], resource_id)
+        self.assertEqual(resource["resource_version"], "0.2.0")
+        self.assertEqual(resource["category"], "file")
+        self.assertEqual(
+            resource["description"],
+            (
+                "Simpoints for running the 'x86-print-this' resource with"
+                ' the parameters `"print this" 15000`. This is encapsulated'
+                " in the 'x86-print-this-15000-with-simpoints' workload."
+            ),
+        )
+
+    @patch(
+        "gem5.resources.client.clientwrapper",
+        ClientWrapper(mock_config_combined),
+    )
+    @patch("urllib.request.urlopen", side_effect=mocked_requests_post)
+    def test_get_resource_json_same_resource_different_versions(
+        self, mock_get
+    ):
+        resource_id = "x86-ubuntu-18.04-img"
+        resource_json = get_resource_json_obj(
+            resource_id,
+        )
+
+        self.assertEqual(resource_json["id"], "x86-ubuntu-18.04-img")
+        self.assertEqual(resource_json["resource_version"], "2.0.0")
+        self.assertEqual(resource_json["category"], "disk-image")
+
+        resource_json = get_resource_json_obj(
+            resource_id,
+            resource_version="1.0.0",
+        )
+
+        self.assertEqual(resource_json["id"], "x86-ubuntu-18.04-img")
+        self.assertEqual(resource_json["resource_version"], "1.0.0")
+        self.assertEqual(resource_json["category"], "disk-image")
+
+    @patch(
+        "gem5.resources.client.clientwrapper",
+        ClientWrapper(mock_config_combined),
+    )
+    @patch("urllib.request.urlopen", side_effect=mocked_requests_post)
+    def test_get_resource_same_resource_same_version(self, mock_get):
+        resource_id = "test-duplicate"
+        with self.assertRaises(Exception) as context:
+            get_resource_json_obj(
+                resource_id,
+            )
+        self.assertTrue(
+            f"Resource {resource_id} has multiple resources with"
+            f" the same version: 0.2.0" in str(context.exception)
+        )
+
+    @patch(
+        "gem5.resources.client.clientwrapper",
+        ClientWrapper(
+            {
+                "sources": {
+                    "gem5-resources": {
+                        "dataSource": "gem5-vision",
+                        "database": "gem5-vision",
+                        "collection": "versions_test",
+                        "url": "https://data.mongodb-api.com/app/data-ejhjf/endpoint/data/v1",
+                        "authUrl": "https://realm.mongodb.com/api/client/v2.0/app/data-ejhjf/auth/providers/api-key/logi",
+                        "apiKey": "OIi5bAP7xxIGK782t8ZoiD2BkBGEzMdX3upChf9zdCxHSnMoiTnjI22Yw5kOSgy9",
+                        "isMongo": True,
+                    }
+                },
+            }
+        ),
+    )
+    @patch("urllib.request.urlopen", side_effect=mocked_requests_post)
+    def test_invalid_auth_url(self, mock_get):
+        resource_id = "test-resource"
+        f = io.StringIO()
+        with self.assertRaises(Exception) as context:
+            with contextlib.redirect_stderr(f):
+                get_resource_json_obj(
+                    resource_id,
+                )
+        self.assertTrue(
+            "Error getting resources from client gem5-resources:"
+            " Panic: Not found" in str(f.getvalue())
+        )
+        self.assertTrue(
+            "Resource with ID 'test-resource' not found."
+            in str(context.exception)
+        )
+
+    @patch(
+        "gem5.resources.client.clientwrapper",
+        ClientWrapper(
+            {
+                "sources": {
+                    "gem5-resources": {
+                        "dataSource": "gem5-vision",
+                        "database": "gem5-vision",
+                        "collection": "versions_test",
+                        "url": "https://data.mongodb-api.com/app/data-ejhjf/endpoint/data/v",
+                        "authUrl": "https://realm.mongodb.com/api/client/v2.0/app/data-ejhjf/auth/providers/api-key/login",
+                        "apiKey": "OIi5bAP7xxIGK782t8ZoiD2BkBGEzMdX3upChf9zdCxHSnMoiTnjI22Yw5kOSgy9",
+                        "isMongo": True,
+                    }
+                },
+            }
+        ),
+    )
+    @patch("urllib.request.urlopen", side_effect=mocked_requests_post)
+    def test_invalid_url(self, mock_get):
+        resource_id = "test-resource"
+        f = io.StringIO()
+        with self.assertRaises(Exception) as context:
+            with contextlib.redirect_stderr(f):
+                get_resource_json_obj(
+                    resource_id,
+                )
+        self.assertTrue(
+            "Error getting resources from client gem5-resources:"
+            " Panic: Not found" in str(f.getvalue())
+        )
+        self.assertTrue(
+            "Resource with ID 'test-resource' not found."
+            in str(context.exception)
+        )
+
+    @patch(
+        "gem5.resources.client.clientwrapper",
+        ClientWrapper(
+            {
+                "sources": {
+                    "gem5-resources": {
+                        "dataSource": "gem5-vision",
+                        "database": "gem5-vision",
+                        "collection": "versions_test",
+                        "url": "https://data.mongodb-api.com/app/data-ejhjf/endpoint/data/v1",
+                        "authUrl": "https://realm.mongodb.com/api/client/v2.0/app/data-ejhjf/auth/providers/api-key/login",
+                        "apiKey": "OIi5bAP7xxIGK782t8ZoiD2BkBGEzMdX3upChf9zdCxHSnMoiTnjI22Yw5kOSgy9",
+                        "isMongo": True,
+                    }
+                },
+            }
+        ),
+    )
+    @patch("urllib.request.urlopen", side_effect=mocked_requests_post)
+    def test_invalid_url(self, mock_get):
+        resource_id = "test-too-many"
+        f = io.StringIO()
+        with self.assertRaises(Exception) as context:
+            with contextlib.redirect_stderr(f):
+                get_resource_json_obj(
+                    resource_id,
+                )
+        self.assertTrue(
+            "Error getting resources from client gem5-resources:"
+            " Panic: Too many requests" in str(f.getvalue())
+        )
+        self.assertTrue(
+            "Resource with ID 'test-too-many' not found."
+            in str(context.exception)
+        )
diff --git a/tests/pyunit/stdlib/resources/pyunit_downloader_checks.py b/tests/pyunit/stdlib/resources/pyunit_downloader_checks.py
index 08736bbc32..19169e480e 100644
--- a/tests/pyunit/stdlib/resources/pyunit_downloader_checks.py
+++ b/tests/pyunit/stdlib/resources/pyunit_downloader_checks.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2022 The Regents of the University of California
+# Copyright (c) 2023 The Regents of the University of California
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -28,6 +28,7 @@ import unittest
 import tempfile
 import os
 from typing import Dict
+import json
 
 from gem5.resources.downloader import (
     _get_resources_json_at_path,
@@ -42,48 +43,102 @@ class ResourceDownloaderTestSuite(unittest.TestCase):
     @classmethod
     def setUpClass(cls) -> str:
         """
-        This creates a simple resource.json temp file for testing purposes.
+        This creates a simple resources collection for testing
         """
-
-        file_contents = (
-            "{"
-            + f'"version" : "{_resources_json_version_required()}",'
-            + """
-    "url_base" : "http://dist.gem5.org/dist/v21-2",
-    "previous-versions" : {},
-    "resources": [
-        {
-            "type": "resource",
-            "name" : "riscv-disk-img",
-            "documentation" : "A simple RISCV disk image based on busybox.",
-            "architecture": "RISCV",
-            "is_zipped" : true,
-            "md5sum" : "d6126db9f6bed7774518ae25aa35f153",
-            "url": "{url_base}/images/riscv/busybox/riscv-disk.img.gz",
-            "source" : "src/riscv-fs",
-            "additional_metadata" : {
-                "root_partition": null
-            }
-        },
-        {
-            "type": "resource",
-            "name" : "riscv-lupio-busybox-img",
-            "documentation" : "A RISCV disk image, based on busybox, to ...",
-            "architecture": "RISCV",
-            "is_zipped" : true,
-            "md5sum" : "e5bee8a31f45f4803f87c0d781553ccc",
-            "url": "{url_base}/images/riscv/busybox/riscv-lupio-busybox.img",
-            "source" : "src/lupv",
-            "additional_metadata" : {
-                "root_partition": "1"
-            }
-        }
-    ]
-}
-        """
-        )
+        file_contents = [
+            {
+                "category": "binary",
+                "id": "this-is-a-test-resource",
+                "description": "This is a test resource",
+                "architecture": "X86",
+                "size": 13816,
+                "tags": ["asmtest", "testing", "riscv", "testing"],
+                "is_zipped": False,
+                "md5sum": "4e70a98b6976969deffff91eed17fba1",
+                "source": "src/asmtest",
+                "url": "http://dist.gem5.org/dist/develop/test-progs/asmtest/bin/rv64mi-p-sbreak",
+                "code_examples": [],
+                "license": " BSD-3-Clause",
+                "author": [],
+                "source_url": "https://github.com/gem5/gem5-resources/tree/develop/src/asmtest",
+                "resource_version": "1.0.0",
+                "gem5_versions": ["23.0"],
+                "example_usage": 'get_resource(resource_name="rv64mi-p-sbreak")',
+            },
+            {
+                "category": "binary",
+                "id": "this-is-a-test-resource",
+                "description": "This is a test resource but double newer",
+                "architecture": "X86",
+                "size": 13816,
+                "tags": ["asmtest"],
+                "is_zipped": False,
+                "md5sum": "4e70a98b6976969deffff91eed17fba1",
+                "source": "src/asmtest",
+                "url": "http://dist.gem5.org/dist/develop/test-progs/asmtest/bin/rv64mi-p-sbreak",
+                "code_examples": [],
+                "license": " BSD-3-Clause",
+                "author": [],
+                "source_url": "https://github.com/gem5/gem5-resources/tree/develop/src/asmtest",
+                "resource_version": "2.0.0",
+                "gem5_versions": ["23.1"],
+                "example_usage": 'get_resource(resource_name="rv64mi-p-sbreak")',
+            },
+            {
+                "category": "simpoint",
+                "id": "test-version",
+                "description": "Simpoints for running the 'x86-print-this' resource with the parameters `\"print this\" 15000`. This is encapsulated in the 'x86-print-this-15000-with-simpoints' workload.",
+                "architecture": "X86",
+                "size": 10240,
+                "tags": [],
+                "is_zipped": False,
+                "md5sum": "3fcffe3956c8a95e3fb82e232e2b41fb",
+                "is_tar_archive": True,
+                "url": "http://dist.gem5.org/dist/develop/simpoints/x86-print-this-15000-simpoints-20221013.tar",
+                "simpoint_interval": 1000000,
+                "warmup_interval": 1000000,
+                "code_examples": [],
+                "license": "",
+                "author": [],
+                "source_url": "",
+                "resource_version": "1.0.0",
+                "gem5_versions": ["23.0"],
+                "workload_name": "x86-print-this-15000-with-simpoints",
+                "example_usage": 'get_resource(resource_name="x86-print-this-1500-simpoints")',
+                "workloads": [
+                    "x86-print-this-15000-with-simpoints",
+                    "x86-print-this-15000-with-simpoints-and-checkpoint",
+                ],
+            },
+            {
+                "category": "file",
+                "id": "test-version",
+                "description": "Simpoints for running the 'x86-print-this' resource with the parameters `\"print this\" 15000`. This is encapsulated in the 'x86-print-this-15000-with-simpoints' workload.",
+                "architecture": "X86",
+                "size": 10240,
+                "tags": [],
+                "is_zipped": False,
+                "md5sum": "3fcffe3956c8a95e3fb82e232e2b41fb",
+                "is_tar_archive": True,
+                "url": "http://dist.gem5.org/dist/develop/simpoints/x86-print-this-15000-simpoints-20221013.tar",
+                "simpoint_interval": 1000000,
+                "warmup_interval": 1000000,
+                "code_examples": [],
+                "license": "",
+                "author": [],
+                "source_url": "",
+                "resource_version": "0.2.0",
+                "gem5_versions": ["23.0"],
+                "workload_name": "x86-print-this-15000-with-simpoints",
+                "example_usage": 'get_resource(resource_name="x86-print-this-1500-simpoints")',
+                "workloads": [
+                    "x86-print-this-15000-with-simpoints",
+                    "x86-print-this-15000-with-simpoints-and-checkpoint",
+                ],
+            },
+        ]
         file = tempfile.NamedTemporaryFile(mode="w", delete=False)
-        file.write(file_contents)
+        file.write(json.dumps(file_contents))
         file.close()
         cls.file_path = file.name
 
@@ -100,14 +155,16 @@ class ResourceDownloaderTestSuite(unittest.TestCase):
         "create_temp_resources_json" has been loaded correctly into a Python
         dictionary.
         """
-        self.assertTrue("resources" in json)
-        self.assertEquals(2, len(json["resources"]))
-        self.assertTrue("name" in json["resources"][0])
-        self.assertEquals("riscv-disk-img", json["resources"][0]["name"])
-        self.assertTrue("name" in json["resources"][1])
-        self.assertEquals(
-            "riscv-lupio-busybox-img", json["resources"][1]["name"]
-        )
+        self.assertEquals(4, len(json))
+        self.assertTrue("id" in json[0])
+        self.assertEquals("this-is-a-test-resource", json[0]["id"])
+        self.assertEquals("binary", json[0]["category"])
+        self.assertTrue("id" in json[1])
+        self.assertEquals("this-is-a-test-resource", json[1]["id"])
+        self.assertTrue("id" in json[2])
+        self.assertEquals("test-version", json[2]["id"])
+        self.assertTrue("id" in json[3])
+        self.assertEquals("test-version", json[3]["id"])
 
     def test_get_resources_json_at_path(self) -> None:
         # Tests the gem5.resources.downloader._get_resources_json_at_path()
diff --git a/tests/pyunit/stdlib/resources/pyunit_obtain_resources_check.py b/tests/pyunit/stdlib/resources/pyunit_obtain_resources_check.py
new file mode 100644
index 0000000000..791d96c1f1
--- /dev/null
+++ b/tests/pyunit/stdlib/resources/pyunit_obtain_resources_check.py
@@ -0,0 +1,192 @@
+# Copyright (c) 2023 The Regents of the University of California
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import unittest
+import os
+import io
+import contextlib
+from pathlib import Path
+
+from gem5.resources.resource import *
+
+from gem5.resources.looppoint import (
+    LooppointCsvLoader,
+    LooppointJsonLoader,
+)
+
+from gem5.isas import ISA
+
+from _m5 import core
+
+from gem5.resources.client_api.client_wrapper import ClientWrapper
+from unittest.mock import patch
+
+mock_json_path = Path(__file__).parent / "refs/obtain-resource.json"
+
+mock_config_json = {
+    "sources": {
+        "baba": {
+            "url": mock_json_path,
+            "isMongo": False,
+        }
+    },
+}
+
+
+@patch(
+    "gem5.resources.client.clientwrapper",
+    new=ClientWrapper(mock_config_json),
+)
+class TestObtainResourcesCheck(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        """Prior to running the suite we set the resource directory to
+        "ref/resource-specialization.json"
+        """
+        os.environ["GEM5_RESOURCE_JSON"] = os.path.join(
+            os.path.realpath(os.path.dirname(__file__)),
+            "refs",
+            "obtain-resource.json",
+        )
+
+    @classmethod
+    def tearDownClass(cls) -> None:
+        """After running the suite we unset the gem5-resource JSON file, as to
+        not interfere with others tests.
+        """
+        del os.environ["GEM5_RESOURCE_JSON"]
+
+    def get_resource_dir(cls) -> str:
+        """To ensure the resources are cached to the same directory as all
+        other tests, this function returns the location of the testing
+        directories "resources" directory.
+        """
+        return os.path.join(
+            os.path.realpath(os.path.dirname(__file__)),
+            os.pardir,
+            os.pardir,
+            os.pardir,
+            "gem5",
+            "resources",
+        )
+
+    def test_obtain_resources_no_version(self):
+        """Test that the resource loader returns latest version compatible with that version of gem5 when no version is specified."""
+        gem5Version = core.gem5Version
+        resource = obtain_resource(
+            resource_id="test-binary-resource",
+            resource_directory=self.get_resource_dir(),
+        )
+        self.assertEquals("2.5.0", resource.get_resource_version())
+        self.assertIsInstance(resource, BinaryResource)
+        # self.assertIn(gem5Version, resource.get_gem5_versions())
+        self.assertEquals("test description", resource.get_description())
+        self.assertEquals("src/test-source", resource.get_source())
+        self.assertEquals(ISA.ARM, resource.get_architecture())
+
+    def test_obtain_resources_with_version_compatible(self):
+        gem5Version = core.gem5Version
+        resource = obtain_resource(
+            resource_id="test-binary-resource",
+            resource_directory=self.get_resource_dir(),
+            resource_version="1.7.0",
+        )
+        self.assertEquals("1.7.0", resource.get_resource_version())
+        self.assertIsInstance(resource, BinaryResource)
+        # self.assertIn(gem5Version, resource.get_gem5_versions())
+        self.assertEquals(
+            "test description v1.7.0", resource.get_description()
+        )
+        self.assertEquals("src/test-source", resource.get_source())
+        self.assertEquals(ISA.ARM, resource.get_architecture())
+
+    def test_obtain_resources_with_version_incompatible(self):
+        resource = None
+        f = io.StringIO()
+        with contextlib.redirect_stderr(f):
+            resource = obtain_resource(
+                resource_id="test-binary-resource",
+                resource_directory=self.get_resource_dir(),
+                resource_version="1.5.0",
+            )
+        self.assertTrue(
+            f"warn: Resource test-binary-resource with version 1.5.0 is not known to be compatible with gem5 version {core.gem5Version}. "
+            "This may cause problems with your simulation. This resource's compatibility with different gem5 versions can be found here: "
+            f"https://resources.gem5.org/resources/test-binary-resource/versions"
+            in f.getvalue()
+        )
+
+        resource = obtain_resource(
+            resource_id="test-binary-resource",
+            resource_directory=self.get_resource_dir(),
+            resource_version="1.5.0",
+        )
+        self.assertEquals("1.5.0", resource.get_resource_version())
+        self.assertIsInstance(resource, BinaryResource)
+        self.assertEquals(
+            "test description for 1.5.0", resource.get_description()
+        )
+        self.assertEquals("src/test-source", resource.get_source())
+        self.assertEquals(ISA.ARM, resource.get_architecture())
+
+    def test_obtain_resources_no_version_invalid_id(self):
+        with self.assertRaises(Exception) as context:
+            obtain_resource(
+                resource_id="invalid-id",
+                resource_directory=self.get_resource_dir(),
+            )
+        self.assertTrue(
+            "Resource with ID 'invalid-id' not found."
+            in str(context.exception)
+        )
+
+    def test_obtain_resources_with_version_invalid_id(self):
+        with self.assertRaises(Exception) as context:
+            obtain_resource(
+                resource_id="invalid-id",
+                resource_directory=self.get_resource_dir(),
+                resource_version="1.7.0",
+            )
+        self.assertTrue(
+            "Resource with ID 'invalid-id' not found."
+            in str(context.exception)
+        )
+
+    def test_obtain_resources_with_version_invalid_version(self):
+        with self.assertRaises(Exception) as context:
+            obtain_resource(
+                resource_id="test-binary-resource",
+                resource_directory=self.get_resource_dir(),
+                resource_version="3.0.0",
+            )
+        print("context.exception: ", context.exception)
+        print(str(context.exception))
+        self.assertTrue(
+            f"Resource test-binary-resource with version '3.0.0'"
+            " not found.\nResource versions can be found at: "
+            f"https://resources.gem5.org/resources/test-binary-resource/versions"
+            in str(context.exception)
+        )
diff --git a/tests/pyunit/stdlib/resources/pyunit_resource_download_checks.py b/tests/pyunit/stdlib/resources/pyunit_resource_download_checks.py
new file mode 100644
index 0000000000..8f6674ff0d
--- /dev/null
+++ b/tests/pyunit/stdlib/resources/pyunit_resource_download_checks.py
@@ -0,0 +1,72 @@
+# Copyright (c) 2023 The Regents of the University of California
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import unittest
+import tempfile
+import os
+from typing import Dict
+
+from gem5.resources.downloader import (
+    get_resources_json_obj,
+)
+
+
+class ResourceDownloadTestSuite(unittest.TestCase):
+    """Test cases for gem5.resources.downloader"""
+
+    @classmethod
+    def setUpClass(cls) -> str:
+        pass
+
+    def get_resource_json_by_id(self) -> None:
+        """Get a resource by its id"""
+        resources = get_resources_json_obj("test-version")
+        self.assertEqual(resources["id"], "test-version")
+        self.assertEqual(resources["resource_version"], "2.0.0")
+
+    def get_resource_json_invalid_id(self) -> None:
+        """Should throw an exception when trying to get a resource that doesn't exist"""
+        with self.assertRaises(Exception) as context:
+            get_resources_json_obj("this-resource-doesnt-exist")
+        self.assertTrue(
+            f"Error: Resource with name 'this-resource-doesnt-exist' does not exist"
+            in str(context.exception)
+        )
+
+    def get_resource_json_by_id_and_version(self) -> None:
+        """Get a resource by its id and version"""
+        resources = get_resources_json_obj("test-version", "1.0.0")
+        self.assertEqual(resources["id"], "test-version")
+        self.assertEqual(resources["resource_version"], "1.0.0")
+
+    def get_resource_json_by_id_and_invalid_version(self) -> None:
+        """Get a resource by its id and an invalid version (does not exist)"""
+        with self.assertRaises(Exception) as context:
+            get_resources_json_obj("test-version", "3.0.0")
+        self.assertTrue(
+            f"Specified Version 3.0.0 does not exist for the resource 'test-version'."
+            in str(context.exception)
+        )
diff --git a/tests/pyunit/stdlib/resources/pyunit_resource_specialization.py b/tests/pyunit/stdlib/resources/pyunit_resource_specialization.py
index 660bf5f38f..5c22a7341e 100644
--- a/tests/pyunit/stdlib/resources/pyunit_resource_specialization.py
+++ b/tests/pyunit/stdlib/resources/pyunit_resource_specialization.py
@@ -37,7 +37,25 @@ from gem5.resources.looppoint import (
 
 from gem5.isas import ISA
 
+from gem5.resources.client_api.client_wrapper import ClientWrapper
+from unittest.mock import patch
 
+mock_json_path = Path(__file__).parent / "refs/resource-specialization.json"
+
+mock_config_json = {
+    "sources": {
+        "baba": {
+            "url": mock_json_path,
+            "isMongo": False,
+        }
+    },
+}
+
+
+@patch(
+    "gem5.resources.client.clientwrapper",
+    ClientWrapper(mock_config_json),
+)
 class ResourceSpecializationSuite(unittest.TestCase):
     """This suite tests that `gem5.resource.resource` casts to the correct
     `AbstractResource` specialization when using the `obtain_resource`
@@ -79,14 +97,14 @@ class ResourceSpecializationSuite(unittest.TestCase):
     def test_binary_resource(self) -> None:
         """Tests the loading of of a BinaryResource"""
         resource = obtain_resource(
-            resource_name="binary-example",
+            resource_id="binary-example",
             resource_directory=self.get_resource_dir(),
         )
 
         self.assertIsInstance(resource, BinaryResource)
 
         self.assertEquals(
-            "binary-example documentation.", resource.get_documentation()
+            "binary-example documentation.", resource.get_description()
         )
         self.assertEquals("src/simple", resource.get_source())
         self.assertEquals(ISA.ARM, resource.get_architecture())
@@ -94,14 +112,14 @@ class ResourceSpecializationSuite(unittest.TestCase):
     def test_kernel_resource(self) -> None:
         """Tests the loading of a KernelResource."""
         resource = obtain_resource(
-            resource_name="kernel-example",
+            resource_id="kernel-example",
             resource_directory=self.get_resource_dir(),
         )
 
         self.assertIsInstance(resource, KernelResource)
 
         self.assertEquals(
-            "kernel-example documentation.", resource.get_documentation()
+            "kernel-example documentation.", resource.get_description()
         )
         self.assertEquals("src/linux-kernel", resource.get_source())
         self.assertEquals(ISA.RISCV, resource.get_architecture())
@@ -109,14 +127,14 @@ class ResourceSpecializationSuite(unittest.TestCase):
     def test_bootloader_resource(self) -> None:
         """Tests the loading of a BootloaderResource."""
         resource = obtain_resource(
-            resource_name="bootloader-example",
+            resource_id="bootloader-example",
             resource_directory=self.get_resource_dir(),
         )
 
         self.assertIsInstance(resource, BootloaderResource)
 
         self.assertEquals(
-            "bootloader documentation.", resource.get_documentation()
+            "bootloader documentation.", resource.get_description()
         )
         self.assertIsNone(resource.get_source())
         self.assertIsNone(resource.get_architecture())
@@ -124,14 +142,14 @@ class ResourceSpecializationSuite(unittest.TestCase):
     def test_disk_image_resource(self) -> None:
         """Tests the loading of a DiskImageResource."""
         resource = obtain_resource(
-            resource_name="disk-image-example",
+            resource_id="disk-image-example",
             resource_directory=self.get_resource_dir(),
         )
 
         self.assertIsInstance(resource, DiskImageResource)
 
         self.assertEquals(
-            "disk-image documentation.", resource.get_documentation()
+            "disk-image documentation.", resource.get_description()
         )
         self.assertEquals("src/x86-ubuntu", resource.get_source())
         self.assertEquals("1", resource.get_root_partition())
@@ -139,40 +157,40 @@ class ResourceSpecializationSuite(unittest.TestCase):
     def test_checkpoint_resource(self) -> None:
         """Tests the loading of a CheckpointResource."""
         resource = obtain_resource(
-            resource_name="checkpoint-example",
+            resource_id="checkpoint-example",
             resource_directory=self.get_resource_dir(),
         )
 
         self.assertIsInstance(resource, CheckpointResource)
 
         self.assertEquals(
-            "checkpoint-example documentation.", resource.get_documentation()
+            "checkpoint-example documentation.", resource.get_description()
         )
         self.assertIsNone(resource.get_source())
 
     def test_git_resource(self) -> None:
         """Tests the loading of a GitResource."""
         resource = obtain_resource(
-            resource_name="git-example",
+            resource_id="git-example",
             resource_directory=self.get_resource_dir(),
         )
 
         self.assertIsInstance(resource, GitResource)
 
-        self.assertIsNone(resource.get_documentation())
+        self.assertIsNone(resource.get_description())
         self.assertIsNone(resource.get_source())
 
     def test_simpoint_directory_resource(self) -> None:
         """Tests the loading of a Simpoint directory resource."""
         resource = obtain_resource(
-            resource_name="simpoint-directory-example",
+            resource_id="simpoint-directory-example",
             resource_directory=self.get_resource_dir(),
         )
 
         self.assertIsInstance(resource, SimpointDirectoryResource)
 
         self.assertEquals(
-            "simpoint directory documentation.", resource.get_documentation()
+            "simpoint directory documentation.", resource.get_description()
         )
         self.assertIsNone(resource.get_source())
 
@@ -199,14 +217,14 @@ class ResourceSpecializationSuite(unittest.TestCase):
     def test_simpoint_resource(self) -> None:
         """Tests the loading of a Simpoint resource."""
         resource = obtain_resource(
-            resource_name="simpoint-example",
+            resource_id="simpoint-example",
             resource_directory=self.get_resource_dir(),
         )
 
         self.assertIsInstance(resource, SimpointResource)
 
         self.assertEquals(
-            "simpoint documentation.", resource.get_documentation()
+            "simpoint documentation.", resource.get_description()
         )
         self.assertIsNone(resource.get_source())
         self.assertIsNone(resource.get_local_path())
@@ -219,26 +237,27 @@ class ResourceSpecializationSuite(unittest.TestCase):
     def test_file_resource(self) -> None:
         """Tests the loading of a FileResource."""
         resource = obtain_resource(
-            resource_name="file-example",
+            resource_id="file-example",
             resource_directory=self.get_resource_dir(),
+            resource_version="1.0.0",
         )
 
         self.assertIsInstance(resource, FileResource)
 
-        self.assertIsNone(resource.get_documentation())
+        self.assertIsNone(resource.get_description())
         self.assertIsNone(resource.get_source())
 
     def test_directory_resource(self) -> None:
         """Tests the loading of a DirectoryResource."""
         resource = obtain_resource(
-            resource_name="directory-example",
+            resource_id="directory-example",
             resource_directory=self.get_resource_dir(),
         )
 
         self.assertIsInstance(resource, DirectoryResource)
 
         self.assertEquals(
-            "directory-example documentation.", resource.get_documentation()
+            "directory-example documentation.", resource.get_description()
         )
         self.assertIsNone(resource.get_source())
 
@@ -247,7 +266,7 @@ class ResourceSpecializationSuite(unittest.TestCase):
         pinpoints csv file."""
 
         resource = obtain_resource(
-            resource_name="looppoint-pinpoint-csv-resource",
+            resource_id="looppoint-pinpoint-csv-resource",
             resource_directory=self.get_resource_dir(),
         )
 
@@ -258,7 +277,7 @@ class ResourceSpecializationSuite(unittest.TestCase):
         self.assertIsInstance(resource, LooppointCsvLoader)
 
         self.assertEquals(
-            "A looppoint pinpoints csv file.", resource.get_documentation()
+            "A looppoint pinpoints csv file.", resource.get_description()
         )
         self.assertIsNone(resource.get_source())
 
@@ -267,8 +286,9 @@ class ResourceSpecializationSuite(unittest.TestCase):
         Looppoint JSON file."""
 
         resource = obtain_resource(
-            resource_name="looppoint-json-restore-resource-region-1",
+            resource_id="looppoint-json-restore-resource-region-1",
             resource_directory=self.get_resource_dir(),
+            resource_version="1.0.0",
         )
 
         self.assertIsInstance(resource, LooppointJsonResource)
@@ -278,6 +298,6 @@ class ResourceSpecializationSuite(unittest.TestCase):
         self.assertTrue("1" in resource.get_regions())
 
         self.assertEquals(
-            "A looppoint json file resource.", resource.get_documentation()
+            "A looppoint json file resource.", resource.get_description()
         )
         self.assertIsNone(resource.get_source())
diff --git a/tests/pyunit/stdlib/resources/pyunit_workload_checks.py b/tests/pyunit/stdlib/resources/pyunit_workload_checks.py
index 2bc31f5a3f..b898faeb79 100644
--- a/tests/pyunit/stdlib/resources/pyunit_workload_checks.py
+++ b/tests/pyunit/stdlib/resources/pyunit_workload_checks.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2022 The Regents of the University of California
+# Copyright (c) 2023 The Regents of the University of California
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -36,6 +36,29 @@ from gem5.resources.resource import (
 
 from typing import Dict
 
+from gem5.resources.client_api.client_wrapper import ClientWrapper
+from unittest.mock import patch
+from pathlib import Path
+
+mock_config_json1 = {
+    "sources": {
+        "baba": {
+            "url": Path(__file__).parent
+            / "refs/workload-checks-custom-workload.json",
+            "isMongo": False,
+        }
+    },
+}
+
+mock_config_json2 = {
+    "sources": {
+        "baba": {
+            "url": Path(__file__).parent / "refs/workload-checks.json",
+            "isMongo": False,
+        }
+    },
+}
+
 
 class CustomWorkloadTestSuite(unittest.TestCase):
     """
@@ -43,8 +66,11 @@ class CustomWorkloadTestSuite(unittest.TestCase):
     """
 
     @classmethod
+    @patch(
+        "gem5.resources.client.clientwrapper",
+        new=ClientWrapper(mock_config_json1),
+    )
     def setUpClass(cls) -> None:
-
         os.environ["GEM5_RESOURCE_JSON"] = os.path.join(
             os.path.realpath(os.path.dirname(__file__)),
             "refs",
@@ -114,8 +140,7 @@ class CustomWorkloadTestSuite(unittest.TestCase):
             "test", self.custom_workload.get_parameters()["binary"]
         )
 
-        # We set the overridden parameter back to it's old value.
-        self.custom_workload.set_parameter("binary", old_value)
+        # We set the overridden parameter back to it's old valu        self.custom_workload.set_parameter("binary", old_value)
 
 
 class WorkloadTestSuite(unittest.TestCase):
@@ -124,8 +149,11 @@ class WorkloadTestSuite(unittest.TestCase):
     """
 
     @classmethod
+    @patch(
+        "gem5.resources.client.clientwrapper",
+        ClientWrapper(mock_config_json2),
+    )
     def setUpClass(cls):
-
         os.environ["GEM5_RESOURCE_JSON"] = os.path.join(
             os.path.realpath(os.path.dirname(__file__)),
             "refs",
@@ -157,9 +185,9 @@ class WorkloadTestSuite(unittest.TestCase):
         self.assertTrue("kernel" in parameters)
         self.assertTrue(isinstance(parameters["kernel"], BinaryResource))
 
-        self.assertTrue("disk_image" in parameters)
+        self.assertTrue("disk-image" in parameters)
         self.assertTrue(
-            isinstance(parameters["disk_image"], DiskImageResource)
+            isinstance(parameters["disk-image"], DiskImageResource)
         )
 
         self.assertTrue("readfile_contents" in parameters)
diff --git a/tests/pyunit/stdlib/resources/refs/mongo_dup_mock.json b/tests/pyunit/stdlib/resources/refs/mongo_dup_mock.json
new file mode 100644
index 0000000000..fe87cc8c28
--- /dev/null
+++ b/tests/pyunit/stdlib/resources/refs/mongo_dup_mock.json
@@ -0,0 +1,30 @@
+[
+    {
+        "category": "file",
+        "id": "test-duplicate",
+        "description": "test",
+        "architecture": "X86",
+        "size": 10240,
+        "tags": [],
+        "is_zipped": false,
+        "md5sum": "3fcffe3956c8a95e3fb82e232e2b41fb",
+        "is_tar_archive": true,
+        "url": "http://dist.gem5.org/dist/develop/simpoints/x86-print-this-15000-simpoints-20221013.tar",
+        "simpoint_interval": 1000000,
+        "warmup_interval": 1000000,
+        "code_examples": [],
+        "license": "",
+        "author": [],
+        "source_url": "",
+        "resource_version": "0.2.0",
+        "gem5_versions": [
+            "23.0"
+        ],
+        "workload_name": "x86-print-this-15000-with-simpoints",
+        "example_usage": "get_resource(resource_name=\"x86-print-this-1500-simpoints\")",
+        "workloads": [
+            "x86-print-this-15000-with-simpoints",
+            "x86-print-this-15000-with-simpoints-and-checkpoint"
+        ]
+    }
+]
diff --git a/tests/pyunit/stdlib/resources/refs/mongo_mock.json b/tests/pyunit/stdlib/resources/refs/mongo_mock.json
new file mode 100644
index 0000000000..b6376cc5e4
--- /dev/null
+++ b/tests/pyunit/stdlib/resources/refs/mongo_mock.json
@@ -0,0 +1,56 @@
+[
+    {
+        "category": "disk-image",
+        "id": "x86-ubuntu-18.04-img",
+        "description": "This is a test resource",
+        "architecture": "X86",
+        "size": 688119691,
+        "tags": [
+            "x86",
+            "fullsystem"
+        ],
+        "is_zipped": true,
+        "md5sum": "90e363abf0ddf22eefa2c7c5c9391c49",
+        "source": "src/x86-ubuntu",
+        "url": "http://dist.gem5.org/dist/develop/images/x86/ubuntu-18-04/x86-ubuntu.img.gz",
+        "root_partition": "1",
+        "code_examples": [],
+        "license": "",
+        "author": [
+            "Ayaz Akram"
+        ],
+        "source_url": "https://github.com/gem5/gem5-resources/tree/develop/src/x86-ubuntu",
+        "resource_version": "1.0.0",
+        "gem5_versions": [
+            "23.0"
+        ],
+        "example_usage": "get_resource(resource_name=\"x86-ubuntu-18.04-img\")"
+    },
+    {
+        "category": "disk-image",
+        "id": "x86-ubuntu-18.04-img",
+        "description": "A disk image containing Ubuntu 18.04 for x86. This image will run an `m5 readfile` instruction after booting. If no script file is specified an `m5 exit` instruction will be executed.",
+        "architecture": "X86",
+        "size": 688119691,
+        "tags": [
+            "x86",
+            "fullsystem"
+        ],
+        "is_zipped": true,
+        "md5sum": "90e363abf0ddf22eefa2c7c5c9391c49",
+        "source": "src/x86-ubuntu",
+        "url": "http://dist.gem5.org/dist/develop/images/x86/ubuntu-18-04/x86-ubuntu.img.gz",
+        "root_partition": "1",
+        "code_examples": [],
+        "license": "",
+        "author": [
+            "Ayaz Akram"
+        ],
+        "source_url": "https://github.com/gem5/gem5-resources/tree/develop/src/x86-ubuntu",
+        "resource_version": "1.1.0",
+        "gem5_versions": [
+            "23.0"
+        ],
+        "example_usage": "get_resource(resource_name=\"x86-ubuntu-18.04-img\")"
+    }
+]
diff --git a/tests/pyunit/stdlib/resources/refs/obtain-resource.json b/tests/pyunit/stdlib/resources/refs/obtain-resource.json
new file mode 100644
index 0000000000..fac95e106a
--- /dev/null
+++ b/tests/pyunit/stdlib/resources/refs/obtain-resource.json
@@ -0,0 +1,59 @@
+[
+    {
+        "category": "binary",
+        "id": "test-binary-resource",
+        "description": "test description",
+        "architecture": "ARM",
+        "is_zipped": false,
+        "md5sum": "71b2cb004fe2cda4556f0b1a38638af6",
+        "url": "http://dist.gem5.org/dist/develop/test-progs/hello/bin/arm/linux/hello64-static",
+        "source": "src/test-source",
+        "resource_version": "2.5.0",
+        "gem5_versions": [
+            "25.0"
+        ]
+    },
+    {
+        "category": "binary",
+        "id": "test-binary-resource",
+        "description": "test description",
+        "architecture": "ARM",
+        "is_zipped": false,
+        "md5sum": "71b2cb004fe2cda4556f0b1a38638af6",
+        "url": "http://dist.gem5.org/dist/develop/test-progs/hello/bin/arm/linux/hello64-static",
+        "source": "src/test-source",
+        "resource_version": "2.0.0",
+        "gem5_versions": [
+            "develop"
+        ]
+    },
+    {
+        "category": "binary",
+        "id": "test-binary-resource",
+        "description": "test description v1.7.0",
+        "architecture": "ARM",
+        "is_zipped": false,
+        "md5sum": "71b2cb004fe2cda4556f0b1a38638af6",
+        "url": "http://dist.gem5.org/dist/develop/test-progs/hello/bin/arm/linux/hello64-static",
+        "source": "src/test-source",
+        "resource_version": "1.7.0",
+        "gem5_versions": [
+            "develop"
+        ]
+    },
+    {
+        "category": "binary",
+        "id": "test-binary-resource",
+        "description": "test description for 1.5.0",
+        "architecture": "ARM",
+        "is_zipped": false,
+        "md5sum": "71b2cb004fe2cda4556f0b1a38638af6",
+        "url": "http://dist.gem5.org/dist/develop/test-progs/hello/bin/arm/linux/hello64-static",
+        "source": "src/test-source",
+        "resource_version": "1.5.0",
+        "gem5_versions": [
+            "21.1",
+            "22.1"
+        ]
+    }
+]
diff --git a/tests/pyunit/stdlib/resources/refs/resource-specialization.json b/tests/pyunit/stdlib/resources/refs/resource-specialization.json
index c4d5eb4714..1129f1bd05 100644
--- a/tests/pyunit/stdlib/resources/refs/resource-specialization.json
+++ b/tests/pyunit/stdlib/resources/refs/resource-specialization.json
@@ -1,132 +1,181 @@
-
-{
-    "version" : "develop",
-    "url_base" : "http://dist.gem5.org/dist/v22-1",
-    "previous-versions" : {
-        "develop" : "https://gem5.googlesource.com/public/gem5-resources/+/refs/heads/develop/resources.json?format=TEXT",
-        "21.2" : "http://resources.gem5.org/prev-resources-json/resources-21-2.json"
+[
+    {
+        "category": "kernel",
+        "id": "kernel-example",
+        "description": "kernel-example documentation.",
+        "architecture": "RISCV",
+        "is_zipped": false,
+        "md5sum": "60a53c7d47d7057436bf4b9df707a841",
+        "url": "http://dist.gem5.org/dist/develop/kernels/x86/static/vmlinux-5.4.49",
+        "source": "src/linux-kernel",
+        "resource_version": "1.0.0",
+        "gem5_versions": [
+            "23.0"
+        ]
     },
-    "resources": [
-        {
-            "type" : "kernel",
-            "name" : "kernel-example",
-            "documentation" : "kernel-example documentation.",
-            "architecture" : "RISCV",
-            "is_zipped" : false,
-            "md5sum" : "60a53c7d47d7057436bf4b9df707a841",
-            "url" : "{url_base}/kernels/x86/static/vmlinux-5.4.49",
-            "source" : "src/linux-kernel"
-        },
-        {
-            "type" : "disk-image",
-            "name" : "disk-image-example",
-            "documentation" : "disk-image documentation.",
-            "architecture" : "X86",
-            "is_zipped" : true,
-            "md5sum" : "90e363abf0ddf22eefa2c7c5c9391c49",
-            "url" : "{url_base}/images/x86/ubuntu-18-04/x86-ubuntu.img.gz",
-            "source" : "src/x86-ubuntu",
-            "root_partition": "1"
-        },
-        {
-            "type" : "binary",
-            "name" : "binary-example",
-            "documentation" : "binary-example documentation.",
-            "architecture" : "ARM",
-            "is_zipped" :  false,
-            "md5sum" : "71b2cb004fe2cda4556f0b1a38638af6",
-            "url" : "{url_base}/test-progs/hello/bin/arm/linux/hello64-static",
-            "source" : "src/simple"
-        },
-        {
-            "type" : "bootloader",
-            "name" : "bootloader-example",
-            "documentation" : "bootloader documentation.",
-            "is_zipped" :  false,
-            "md5sum" : "71b2cb004fe2cda4556f0b1a38638af6",
-            "url" : "{url_base}/test-progs/hello/bin/arm/linux/hello64-static"
-        },
-        {
-            "type" : "checkpoint",
-            "name" : "checkpoint-example",
-            "documentation" : "checkpoint-example documentation.",
-            "architecture": "RISCV",
-            "is_zipped" : false,
-            "md5sum" : "3a57c1bb1077176c4587b8a3bf4f8ace",
-            "source" : null,
-            "is_tar_archive" : true,
-            "url": "{url_base}/checkpoints/riscv-hello-example-checkpoint.tar"
-        },
-        {
-            "type" : "git",
-            "name" : "git-example",
-            "documentation" : null,
-            "is_zipped" :  false,
-            "is_tar_archive" : true,
-            "md5sum" : "71b2cb004fe2cda4556f0b1a38638af6",
-            "url": "{url_base}/checkpoints/riscv-hello-example-checkpoint.tar"
-        },
-        {
-            "type" : "file",
-            "name" : "file-example",
-            "documentation" : null,
-            "is_zipped" :  false,
-            "md5sum" : "71b2cb004fe2cda4556f0b1a38638af6",
-            "url": "{url_base}/checkpoints/riscv-hello-example-checkpoint.tar",
-            "source" : null
-        },
-        {
-            "type" : "directory",
-            "name" : "directory-example",
-            "documentation" : "directory-example documentation.",
-            "is_zipped" : false,
-            "md5sum" : "3a57c1bb1077176c4587b8a3bf4f8ace",
-            "source" : null,
-            "is_tar_archive" : true,
-            "url": "{url_base}/checkpoints/riscv-hello-example-checkpoint.tar"
-        },
-        {
-            "type": "simpoint-directory",
-            "name": "simpoint-directory-example",
-            "documentation": "simpoint directory documentation.",
-            "is_zipped" : false,
-            "md5sum" : "3fcffe3956c8a95e3fb82e232e2b41fb",
-            "source" : null,
-            "is_tar_archive" : true,
-            "url": "{url_base}/simpoints/x86-print-this-15000-simpoints-20221013.tar",
-            "simpoint_interval": 1000000,
-            "warmup_interval": 1000000,
-            "simpoint_file": "simpoint.simpt",
-            "weight_file": "simpoint.weight",
-            "workload_name": "Example Workload"
-        },
-        {
-            "type": "simpoint",
-            "name": "simpoint-example",
-            "documentation": "simpoint documentation.",
-            "simpoint_interval": 1000000,
-            "warmup_interval": 23445,
-            "simpoint_list" : [2,3,4,15],
-            "weight_list" : [0.1, 0.2, 0.4, 0.3]
-        },
-        {
-            "type": "looppoint-pinpoint-csv",
-            "name": "looppoint-pinpoint-csv-resource",
-            "documentation" : "A looppoint pinpoints csv file.",
-            "is_zipped" :  false,
-            "md5sum" : "199ab22dd463dc70ee2d034bfe045082",
-            "url": "http://dist.gem5.org/dist/develop/pinpoints/x86-matrix-multiply-omp-100-8-global-pinpoints-20230127",
-            "source" : null
-        },
-        {
-            "type": "looppoint-json",
-            "name": "looppoint-json-restore-resource-region-1",
-            "documentation" : "A looppoint json file resource.",
-            "is_zipped" :  false,
-            "region_id" : "1",
-            "md5sum" : "a71ed64908b082ea619b26b940a643c1",
-            "url": "http://dist.gem5.org/dist/develop/looppoints/x86-matrix-multiply-omp-100-8-looppoint-json-20230128",
-            "source" : null
-        }
-    ]
-}
+    {
+        "category": "disk-image",
+        "id": "disk-image-example",
+        "description": "disk-image documentation.",
+        "architecture": "X86",
+        "is_zipped": true,
+        "md5sum": "90e363abf0ddf22eefa2c7c5c9391c49",
+        "url": "http://dist.gem5.org/dist/develop/images/x86/ubuntu-18-04/x86-ubuntu.img.gz",
+        "source": "src/x86-ubuntu",
+        "root_partition": "1",
+        "resource_version": "1.0.0",
+        "gem5_versions": [
+            "23.0"
+        ]
+    },
+    {
+        "category": "binary",
+        "id": "binary-example",
+        "description": "binary-example documentation.",
+        "architecture": "ARM",
+        "is_zipped": false,
+        "md5sum": "71b2cb004fe2cda4556f0b1a38638af6",
+        "url": "http://dist.gem5.org/dist/develop/test-progs/hello/bin/arm/linux/hello64-static",
+        "source": "src/simple",
+        "resource_version": "1.0.0",
+        "gem5_versions": [
+            "23.0"
+        ]
+    },
+    {
+        "category": "bootloader",
+        "id": "bootloader-example",
+        "description": "bootloader documentation.",
+        "is_zipped": false,
+        "md5sum": "71b2cb004fe2cda4556f0b1a38638af6",
+        "url": "http://dist.gem5.org/dist/develop/test-progs/hello/bin/arm/linux/hello64-static",
+        "resource_version": "1.0.0",
+        "gem5_versions": [
+            "23.0"
+        ]
+    },
+    {
+        "category": "checkpoint",
+        "id": "checkpoint-example",
+        "description": "checkpoint-example documentation.",
+        "architecture": "RISCV",
+        "is_zipped": false,
+        "md5sum": "3a57c1bb1077176c4587b8a3bf4f8ace",
+        "source": null,
+        "is_tar_archive": true,
+        "url": "http://dist.gem5.org/dist/develop/checkpoints/riscv-hello-example-checkpoint.tar",
+        "resource_version": "1.0.0",
+        "gem5_versions": [
+            "23.0"
+        ]
+    },
+    {
+        "category": "git",
+        "id": "git-example",
+        "description": null,
+        "is_zipped": false,
+        "is_tar_archive": true,
+        "md5sum": "71b2cb004fe2cda4556f0b1a38638af6",
+        "url": "http://dist.gem5.org/dist/develop/checkpoints/riscv-hello-example-checkpoint.tar",
+        "resource_version": "1.0.0",
+        "gem5_versions": [
+            "23.0"
+        ]
+    },
+    {
+        "category": "file",
+        "id": "file-example",
+        "description": null,
+        "is_zipped": false,
+        "md5sum": "71b2cb004fe2cda4556f0b1a38638af6",
+        "url": "http://dist.gem5.org/dist/develop/checkpoints/riscv-hello-example-checkpoint.tar",
+        "source": null,
+        "resource_version": "1.0.0",
+        "gem5_versions": [
+            "23.0"
+        ]
+    },
+    {
+        "category": "directory",
+        "id": "directory-example",
+        "description": "directory-example documentation.",
+        "is_zipped": false,
+        "md5sum": "3a57c1bb1077176c4587b8a3bf4f8ace",
+        "source": null,
+        "is_tar_archive": true,
+        "url": "http://dist.gem5.org/dist/develop/checkpoints/riscv-hello-example-checkpoint.tar",
+        "resource_version": "1.0.0",
+        "gem5_versions": [
+            "23.0"
+        ]
+    },
+    {
+        "category": "simpoint-directory",
+        "id": "simpoint-directory-example",
+        "description": "simpoint directory documentation.",
+        "is_zipped": false,
+        "md5sum": "3fcffe3956c8a95e3fb82e232e2b41fb",
+        "source": null,
+        "is_tar_archive": true,
+        "url": "http://dist.gem5.org/dist/develop/simpoints/x86-print-this-15000-simpoints-20221013.tar",
+        "simpoint_interval": 1000000,
+        "warmup_interval": 1000000,
+        "simpoint_file": "simpoint.simpt",
+        "weight_file": "simpoint.weight",
+        "workload_name": "Example Workload",
+        "resource_version": "1.0.0",
+        "gem5_versions": [
+            "23.0"
+        ]
+    },
+    {
+        "category": "simpoint",
+        "id": "simpoint-example",
+        "description": "simpoint documentation.",
+        "simpoint_interval": 1000000,
+        "warmup_interval": 23445,
+        "simpoint_list": [
+            2,
+            3,
+            4,
+            15
+        ],
+        "weight_list": [
+            0.1,
+            0.2,
+            0.4,
+            0.3
+        ],
+        "resource_version": "1.0.0",
+        "gem5_versions": [
+            "23.0"
+        ]
+    },
+    {
+        "category": "looppoint-pinpoint-csv",
+        "id": "looppoint-pinpoint-csv-resource",
+        "description": "A looppoint pinpoints csv file.",
+        "is_zipped": false,
+        "md5sum": "199ab22dd463dc70ee2d034bfe045082",
+        "url": "http://dist.gem5.org/dist/develop/pinpoints/x86-matrix-multiply-omp-100-8-global-pinpoints-20230127",
+        "source": null,
+        "resource_version": "1.0.0",
+        "gem5_versions": [
+            "23.0"
+        ]
+    },
+    {
+        "category": "looppoint-json",
+        "id": "looppoint-json-restore-resource-region-1",
+        "description": "A looppoint json file resource.",
+        "is_zipped": false,
+        "region_id": "1",
+        "md5sum": "a71ed64908b082ea619b26b940a643c1",
+        "url": "http://dist.gem5.org/dist/develop/looppoints/x86-matrix-multiply-omp-100-8-looppoint-json-20230128",
+        "source": null,
+        "resource_version": "1.0.0",
+        "gem5_versions": [
+            "23.0"
+        ]
+    }
+]
diff --git a/tests/pyunit/stdlib/resources/refs/resources.json b/tests/pyunit/stdlib/resources/refs/resources.json
new file mode 100644
index 0000000000..812caeff43
--- /dev/null
+++ b/tests/pyunit/stdlib/resources/refs/resources.json
@@ -0,0 +1,329 @@
+[
+    {
+        "category": "binary",
+        "id": "this-is-a-test-resource",
+        "description": "This is a test resource",
+        "architecture": "X86",
+        "size": 13816,
+        "tags": [
+            "asmtest",
+            "testing",
+            "riscv",
+            "testing"
+        ],
+        "is_zipped": false,
+        "md5sum": "4e70a98b6976969deffff91eed17fba1",
+        "source": "src/asmtest",
+        "url": "http://dist.gem5.org/dist/develop/test-progs/asmtest/bin/rv64mi-p-sbreak",
+        "code_examples": [],
+        "license": " BSD-3-Clause",
+        "author": [],
+        "source_url": "https://github.com/gem5/gem5-resources/tree/develop/src/asmtest",
+        "resource_version": "1.0.0",
+        "gem5_versions": [
+            "23.0"
+        ],
+        "example_usage": "get_resource(resource_name=\"rv64mi-p-sbreak\")"
+    },
+    {
+        "category": "binary",
+        "id": "this-is-a-test-resource",
+        "description": "This is a test resource but newer",
+        "architecture": "X86",
+        "size": 13816,
+        "tags": [
+            "asmtest",
+            "testing",
+            "riscv",
+            "testing",
+            "new"
+        ],
+        "is_zipped": false,
+        "md5sum": "4e70a98b6976969deffff91eed17fba1",
+        "source": "src/asmtest",
+        "url": "http://dist.gem5.org/dist/develop/test-progs/asmtest/bin/rv64mi-p-sbreak",
+        "code_examples": [],
+        "license": " BSD-3-Clause",
+        "author": [],
+        "source_url": "https://github.com/gem5/gem5-resources/tree/develop/src/asmtest",
+        "resource_version": "1.1.0",
+        "gem5_versions": [
+            "23.0"
+        ],
+        "example_usage": "get_resource(resource_name=\"rv64mi-p-sbreak\")"
+    },
+    {
+        "category": "binary",
+        "id": "this-is-a-test-resource",
+        "description": "This is a test resource but double newer",
+        "architecture": "X86",
+        "size": 13816,
+        "tags": [
+            "asmtest"
+        ],
+        "is_zipped": false,
+        "md5sum": "4e70a98b6976969deffff91eed17fba1",
+        "source": "src/asmtest",
+        "url": "http://dist.gem5.org/dist/develop/test-progs/asmtest/bin/rv64mi-p-sbreak",
+        "code_examples": [],
+        "license": " BSD-3-Clause",
+        "author": [],
+        "source_url": "https://github.com/gem5/gem5-resources/tree/develop/src/asmtest",
+        "resource_version": "2.0.0",
+        "gem5_versions": [
+            "23.1"
+        ],
+        "example_usage": "get_resource(resource_name=\"rv64mi-p-sbreak\")"
+    },
+    {
+        "category": "simpoint",
+        "id": "test-version",
+        "description": "Simpoints for running the 'x86-print-this' resource with the parameters `\"print this\" 15000`. This is encapsulated in the 'x86-print-this-15000-with-simpoints' workload.",
+        "architecture": "X86",
+        "size": 10240,
+        "tags": [],
+        "is_zipped": false,
+        "md5sum": "3fcffe3956c8a95e3fb82e232e2b41fb",
+        "is_tar_archive": true,
+        "url": "http://dist.gem5.org/dist/develop/simpoints/x86-print-this-15000-simpoints-20221013.tar",
+        "simpoint_interval": 1000000,
+        "warmup_interval": 1000000,
+        "code_examples": [],
+        "license": "",
+        "author": [],
+        "source_url": "",
+        "resource_version": "1.0.0",
+        "gem5_versions": [
+            "23.0"
+        ],
+        "workload_name": "x86-print-this-15000-with-simpoints",
+        "example_usage": "get_resource(resource_name=\"x86-print-this-1500-simpoints\")",
+        "workloads": [
+            "x86-print-this-15000-with-simpoints",
+            "x86-print-this-15000-with-simpoints-and-checkpoint"
+        ]
+    },
+    {
+        "category": "file",
+        "id": "test-version",
+        "description": "Simpoints for running the 'x86-print-this' resource with the parameters `\"print this\" 15000`. This is encapsulated in the 'x86-print-this-15000-with-simpoints' workload.",
+        "architecture": "X86",
+        "size": 10240,
+        "tags": [],
+        "is_zipped": false,
+        "md5sum": "3fcffe3956c8a95e3fb82e232e2b41fb",
+        "is_tar_archive": true,
+        "url": "http://dist.gem5.org/dist/develop/simpoints/x86-print-this-15000-simpoints-20221013.tar",
+        "simpoint_interval": 1000000,
+        "warmup_interval": 1000000,
+        "code_examples": [],
+        "license": "",
+        "author": [],
+        "source_url": "",
+        "resource_version": "0.2.0",
+        "gem5_versions": [
+            "23.0"
+        ],
+        "workload_name": "x86-print-this-15000-with-simpoints",
+        "example_usage": "get_resource(resource_name=\"x86-print-this-1500-simpoints\")",
+        "workloads": [
+            "x86-print-this-15000-with-simpoints",
+            "x86-print-this-15000-with-simpoints-and-checkpoint"
+        ]
+    },
+    {
+        "category": "file",
+        "id": "test-duplicate",
+        "description": "Simpoints for running the 'x86-print-this' resource with the parameters `\"print this\" 15000`. This is encapsulated in the 'x86-print-this-15000-with-simpoints' workload.",
+        "architecture": "X86",
+        "size": 10240,
+        "tags": [],
+        "is_zipped": false,
+        "md5sum": "3fcffe3956c8a95e3fb82e232e2b41fb",
+        "is_tar_archive": true,
+        "url": "http://dist.gem5.org/dist/develop/simpoints/x86-print-this-15000-simpoints-20221013.tar",
+        "simpoint_interval": 1000000,
+        "warmup_interval": 1000000,
+        "code_examples": [],
+        "license": "",
+        "author": [],
+        "source_url": "",
+        "resource_version": "0.2.0",
+        "gem5_versions": [
+            "23.0"
+        ],
+        "workload_name": "x86-print-this-15000-with-simpoints",
+        "example_usage": "get_resource(resource_name=\"x86-print-this-1500-simpoints\")",
+        "workloads": [
+            "x86-print-this-15000-with-simpoints",
+            "x86-print-this-15000-with-simpoints-and-checkpoint"
+        ]
+    },
+    {
+        "category": "file",
+        "id": "aa",
+        "description": "Simpoints for running the 'x86-print-this' resource with the parameters `\"print this\" 15000`. This is encapsulated in the 'x86-print-this-15000-with-simpoints' workload.",
+        "architecture": "X86",
+        "size": 10240,
+        "tags": [],
+        "is_zipped": false,
+        "md5sum": "3fcffe3956c8a95e3fb82e232e2b41fb",
+        "is_tar_archive": true,
+        "url": "http://dist.gem5.org/dist/develop/simpoints/x86-print-this-15000-simpoints-20221013.tar",
+        "simpoint_interval": 1000000,
+        "warmup_interval": 1000000,
+        "code_examples": [],
+        "license": "",
+        "author": [],
+        "source_url": "",
+        "resource_version": "0.2.0",
+        "gem5_versions": [
+            "23.0"
+        ],
+        "workload_name": "x86-print-this-15000-with-simpoints",
+        "example_usage": "get_resource(resource_name=\"x86-print-this-1500-simpoints\")",
+        "workloads": [
+            "x86-print-this-15000-with-simpoints",
+            "x86-print-this-15000-with-simpoints-and-checkpoint"
+        ]
+    },
+    {
+        "category": "file",
+        "id": "asfsaf",
+        "description": "Simpoints for running the 'x86-print-this' resource with the parameters `\"print this\" 15000`. This is encapsulated in the 'x86-print-this-15000-with-simpoints' workload.",
+        "architecture": "X86",
+        "size": 10240,
+        "tags": [],
+        "is_zipped": false,
+        "md5sum": "3fcffe3956c8a95e3fb82e232e2b41fb",
+        "is_tar_archive": true,
+        "url": "http://dist.gem5.org/dist/develop/simpoints/x86-print-this-15000-simpoints-20221013.tar",
+        "simpoint_interval": 1000000,
+        "warmup_interval": 1000000,
+        "code_examples": [],
+        "license": "",
+        "author": [],
+        "source_url": "",
+        "resource_version": "0.2.0",
+        "gem5_versions": [
+            "23.0"
+        ],
+        "workload_name": "x86-print-this-15000-with-simpoints",
+        "example_usage": "get_resource(resource_name=\"x86-print-this-1500-simpoints\")",
+        "workloads": [
+            "x86-print-this-15000-with-simpoints",
+            "x86-print-this-15000-with-simpoints-and-checkpoint"
+        ]
+    },
+    {
+        "category": "file",
+        "id": "simpoint-resource",
+        "description": "Simpoints for running the 'x86-print-this' resource with the parameters `\"print this\" 15000`. This is encapsulated in the 'x86-print-this-15000-with-simpoints' workload.",
+        "architecture": "X86",
+        "size": 10240,
+        "tags": [],
+        "is_zipped": false,
+        "md5sum": "3fcffe3956c8a95e3fb82e232e2b41fb",
+        "is_tar_archive": true,
+        "url": "http://dist.gem5.org/dist/develop/simpoints/x86-print-this-15000-simpoints-20221013.tar",
+        "simpoint_interval": 1000000,
+        "warmup_interval": 1000000,
+        "code_examples": [],
+        "license": "",
+        "author": [],
+        "source_url": "",
+        "resource_version": "0.2.0",
+        "gem5_versions": [
+            "23.0"
+        ],
+        "workload_name": "x86-print-this-15000-with-simpoints",
+        "example_usage": "get_resource(resource_name=\"x86-print-this-1500-simpoints\")",
+        "workloads": [
+            "x86-print-this-15000-with-simpoints",
+            "x86-print-this-15000-with-simpoints-and-checkpoint"
+        ]
+    },
+    {
+        "category": "file",
+        "id": "bat43f34fman",
+        "description": "Simpoints for running the 'x86-print-this' resource with the parameters `\"print this\" 15000`. This is encapsulated in the 'x86-print-this-15000-with-simpoints' workload.",
+        "architecture": "X86",
+        "size": 10240,
+        "tags": [],
+        "is_zipped": false,
+        "md5sum": "3fcffe3956c8a95e3fb82e232e2b41fb",
+        "is_tar_archive": true,
+        "url": "http://dist.gem5.org/dist/develop/simpoints/x86-print-this-15000-simpoints-20221013.tar",
+        "simpoint_interval": 1000000,
+        "warmup_interval": 1000000,
+        "code_examples": [],
+        "license": "",
+        "author": [],
+        "source_url": "",
+        "resource_version": "0.2.0",
+        "gem5_versions": [
+            "23.0"
+        ],
+        "workload_name": "x86-print-this-15000-with-simpoints",
+        "example_usage": "get_resource(resource_name=\"x86-print-this-1500-simpoints\")",
+        "workloads": [
+            "x86-print-this-15000-with-simpoints",
+            "x86-print-this-15000-with-simpoints-and-checkpoint"
+        ]
+    },
+    {
+        "category": "file",
+        "id": "adadadas",
+        "description": "Simpoints for running the 'x86-print-this' resource with the parameters `\"print this\" 15000`. This is encapsulated in the 'x86-print-this-15000-with-simpoints' workload.",
+        "architecture": "X86",
+        "size": 10240,
+        "tags": [],
+        "is_zipped": false,
+        "md5sum": "3fcffe3956c8a95e3fb82e232e2b41fb",
+        "is_tar_archive": true,
+        "url": "http://dist.gem5.org/dist/develop/simpoints/x86-print-this-15000-simpoints-20221013.tar",
+        "simpoint_interval": 1000000,
+        "warmup_interval": 1000000,
+        "code_examples": [],
+        "license": "",
+        "author": [],
+        "source_url": "",
+        "resource_version": "0.2.0",
+        "gem5_versions": [
+            "23.0"
+        ],
+        "workload_name": "x86-print-this-15000-with-simpoints",
+        "example_usage": "get_resource(resource_name=\"x86-print-this-1500-simpoints\")",
+        "workloads": [
+            "x86-print-this-15000-with-simpoints",
+            "x86-print-this-15000-with-simpoints-and-checkpoint"
+        ]
+    },
+    {
+        "category": "disk-image",
+        "id": "x86-ubuntu-18.04-img",
+        "description": "This is a test resource",
+        "architecture": "X86",
+        "size": 688119691,
+        "tags": [
+            "x86",
+            "fullsystem"
+        ],
+        "is_zipped": true,
+        "md5sum": "90e363abf0ddf22eefa2c7c5c9391c49",
+        "source": "src/x86-ubuntu",
+        "url": "http://dist.gem5.org/dist/develop/images/x86/ubuntu-18-04/x86-ubuntu.img.gz",
+        "root_partition": "1",
+        "code_examples": [],
+        "license": "",
+        "author": [
+            "Ayaz Akram"
+        ],
+        "source_url": "https://github.com/gem5/gem5-resources/tree/develop/src/x86-ubuntu",
+        "resource_version": "2.0.0",
+        "gem5_versions": [
+            "23.0"
+        ],
+        "example_usage": "get_resource(resource_name=\"x86-ubuntu-18.04-img\")"
+    }
+]
diff --git a/tests/pyunit/stdlib/resources/refs/workload-checks-custom-workload.json b/tests/pyunit/stdlib/resources/refs/workload-checks-custom-workload.json
index a9dd2aaa46..8349b92658 100644
--- a/tests/pyunit/stdlib/resources/refs/workload-checks-custom-workload.json
+++ b/tests/pyunit/stdlib/resources/refs/workload-checks-custom-workload.json
@@ -1,17 +1,16 @@
-{
-        "version" : null,
-        "url_base" : "http://dist.gem5.org/dist/v22-0",
-        "previous-versions" : {},
-        "resources": [
-        {
-            "type" : "binary",
-            "name" : "x86-hello64-static",
-            "documentation" : "A 'Hello World!' binary.",
-            "architecture" : "X86",
-            "is_zipped" :  false,
-            "md5sum" : "dbf120338b37153e3334603970cebd8c",
-            "url" : "{url_base}/test-progs/hello/bin/x86/linux/hello64-static",
-            "source" : "src/simple"
-        }
-    ]
-}
+[
+    {
+        "category": "binary",
+        "id": "x86-hello64-static",
+        "description": "A 'Hello World!' binary.",
+        "architecture": "X86",
+        "is_zipped": false,
+        "md5sum": "dbf120338b37153e3334603970cebd8c",
+        "url": "{url_base}/test-progs/hello/bin/x86/linux/hello64-static",
+        "source": "src/simple",
+        "resource_version": "1.0.0",
+        "gem5_versions": [
+            "23.0"
+        ]
+    }
+]
diff --git a/tests/pyunit/stdlib/resources/refs/workload-checks.json b/tests/pyunit/stdlib/resources/refs/workload-checks.json
index 4f7e76bfb5..d19396bf8c 100644
--- a/tests/pyunit/stdlib/resources/refs/workload-checks.json
+++ b/tests/pyunit/stdlib/resources/refs/workload-checks.json
@@ -1,40 +1,48 @@
-{
-    "url_base" : "http://dist.gem5.org/dist/v22-0",
-    "previous-versions" : {},
-    "resources": [
-        {
-            "type" : "kernel",
-            "name" : "x86-linux-kernel-5.2.3",
-            "documentation" : "The linux kernel (v5.2.3), compiled to X86.",
-            "architecture" : "X86",
-            "is_zipped" : false,
-            "md5sum" : "4838c99b77d33c8307b939c16624e4ac",
-            "url" : "{url_base}/kernels/x86/static/vmlinux-5.2.3",
-            "source" : "src/linux-kernel"
+[
+    {
+        "category": "kernel",
+        "id": "x86-linux-kernel-5.2.3",
+        "description": "The linux kernel (v5.2.3), compiled to X86.",
+        "architecture": "X86",
+        "is_zipped": false,
+        "md5sum": "4838c99b77d33c8307b939c16624e4ac",
+        "url": "{url_base}/kernels/x86/static/vmlinux-5.2.3",
+        "source": "src/linux-kernel",
+        "resource_version": "1.0.0",
+        "gem5_versions": [
+            "23.0"
+        ]
+    },
+    {
+        "category": "disk-image",
+        "id": "x86-ubuntu-18.04-img",
+        "description": "A disk image containing Ubuntu 18.04 for x86..",
+        "architecture": "X86",
+        "is_zipped": true,
+        "md5sum": "90e363abf0ddf22eefa2c7c5c9391c49",
+        "url": "{url_base}/images/x86/ubuntu-18-04/x86-ubuntu.img.gz",
+        "source": "src/x86-ubuntu",
+        "root_partition": "1",
+        "resource_version": "1.0.0",
+        "gem5_versions": [
+            "23.0"
+        ]
+    },
+    {
+        "category": "workload",
+        "id": "simple-boot",
+        "description": "Description of workload here",
+        "function": "set_kernel_disk_workload",
+        "resources": {
+            "kernel": "x86-linux-kernel-5.2.3",
+            "disk-image": "x86-ubuntu-18.04-img"
         },
-        {
-            "type" : "disk-image",
-            "name" : "x86-ubuntu-18.04-img",
-            "documentation" : "A disk image containing Ubuntu 18.04 for x86..",
-            "architecture" : "X86",
-            "is_zipped" : true,
-            "md5sum" : "90e363abf0ddf22eefa2c7c5c9391c49",
-            "url" : "{url_base}/images/x86/ubuntu-18-04/x86-ubuntu.img.gz",
-            "source" : "src/x86-ubuntu",
-            "root_partition": "1"
+        "additional_params": {
+            "readfile_contents": "echo 'Boot successful'; m5 exit"
         },
-        {
-            "type" : "workload",
-            "name" : "simple-boot",
-            "documentation" : "Description of workload here",
-            "function" : "set_kernel_disk_workload",
-            "resources" : {
-                "kernel" : "x86-linux-kernel-5.2.3",
-                "disk_image" : "x86-ubuntu-18.04-img"
-            },
-            "additional_params" : {
-                "readfile_contents" : "echo 'Boot successful'; m5 exit"
-            }
-        }
-    ]
-}
+        "resource_version": "1.0.0",
+        "gem5_versions": [
+            "23.0"
+        ]
+    }
+]

From 4434d4897368b48e464ed70bbdd5a45d1c4a326f Mon Sep 17 00:00:00 2001
From: Giacomo Travaglini <giacomo.travaglini@arm.com>
Date: Fri, 2 Jun 2023 09:24:42 +0100
Subject: [PATCH 471/492] arch-arm: Apply FEAT_IDST to missing ID registers

When FEAT_IDST got implemented [1], we forgot to add the
logic for AArch64 ID registers tracking AArch32 state/capabilities

[1]: https://gem5-review.googlesource.com/c/public/gem5/+/70723

Signed-off-by: Giacomo Travaglini <giacomo.travaglini@arm.com>
Change-Id: I19bddf67ecc379a14f91cfede385692536982101
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/71178
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Richard Cooper <richard.cooper@arm.com>
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
---
 src/arch/arm/regs/misc.cc | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/src/arch/arm/regs/misc.cc b/src/arch/arm/regs/misc.cc
index f32aa7230c..f1c69cc007 100644
--- a/src/arch/arm/regs/misc.cc
+++ b/src/arch/arm/regs/misc.cc
@@ -3911,83 +3911,104 @@ ISA::initializeMiscRegMetadata()
       .mapsTo(MISCREG_MIDR);
     InitReg(MISCREG_MPIDR_EL1)
       .allPrivileges().exceptUserMode().writes(0)
+      .faultRead(EL0, faultIdst)
       .mapsTo(MISCREG_MPIDR);
     InitReg(MISCREG_REVIDR_EL1)
+      .faultRead(EL0, faultIdst)
       .faultRead(EL1, HCR_TRAP(tid1))
       .allPrivileges().exceptUserMode().writes(0);
     InitReg(MISCREG_ID_PFR0_EL1)
       .allPrivileges().exceptUserMode().writes(0)
+      .faultRead(EL0, faultIdst)
       .faultRead(EL1, HCR_TRAP(tid3))
       .mapsTo(MISCREG_ID_PFR0);
     InitReg(MISCREG_ID_PFR1_EL1)
       .allPrivileges().exceptUserMode().writes(0)
+      .faultRead(EL0, faultIdst)
       .faultRead(EL1, HCR_TRAP(tid3))
       .mapsTo(MISCREG_ID_PFR1);
     InitReg(MISCREG_ID_DFR0_EL1)
       .allPrivileges().exceptUserMode().writes(0)
+      .faultRead(EL0, faultIdst)
       .faultRead(EL1, HCR_TRAP(tid3))
       .mapsTo(MISCREG_ID_DFR0);
     InitReg(MISCREG_ID_AFR0_EL1)
       .allPrivileges().exceptUserMode().writes(0)
+      .faultRead(EL0, faultIdst)
       .faultRead(EL1, HCR_TRAP(tid3))
       .mapsTo(MISCREG_ID_AFR0);
     InitReg(MISCREG_ID_MMFR0_EL1)
       .allPrivileges().exceptUserMode().writes(0)
+      .faultRead(EL0, faultIdst)
       .faultRead(EL1, HCR_TRAP(tid3))
       .mapsTo(MISCREG_ID_MMFR0);
     InitReg(MISCREG_ID_MMFR1_EL1)
       .allPrivileges().exceptUserMode().writes(0)
+      .faultRead(EL0, faultIdst)
       .faultRead(EL1, HCR_TRAP(tid3))
       .mapsTo(MISCREG_ID_MMFR1);
     InitReg(MISCREG_ID_MMFR2_EL1)
       .allPrivileges().exceptUserMode().writes(0)
+      .faultRead(EL0, faultIdst)
       .faultRead(EL1, HCR_TRAP(tid3))
       .mapsTo(MISCREG_ID_MMFR2);
     InitReg(MISCREG_ID_MMFR3_EL1)
       .allPrivileges().exceptUserMode().writes(0)
+      .faultRead(EL0, faultIdst)
       .faultRead(EL1, HCR_TRAP(tid3))
       .mapsTo(MISCREG_ID_MMFR3);
     InitReg(MISCREG_ID_MMFR4_EL1)
       .allPrivileges().exceptUserMode().writes(0)
+      .faultRead(EL0, faultIdst)
       .faultRead(EL1, HCR_TRAP(tid3))
       .mapsTo(MISCREG_ID_MMFR4);
     InitReg(MISCREG_ID_ISAR0_EL1)
       .allPrivileges().exceptUserMode().writes(0)
+      .faultRead(EL0, faultIdst)
       .faultRead(EL1, HCR_TRAP(tid3))
       .mapsTo(MISCREG_ID_ISAR0);
     InitReg(MISCREG_ID_ISAR1_EL1)
       .allPrivileges().exceptUserMode().writes(0)
+      .faultRead(EL0, faultIdst)
       .faultRead(EL1, HCR_TRAP(tid3))
       .mapsTo(MISCREG_ID_ISAR1);
     InitReg(MISCREG_ID_ISAR2_EL1)
       .allPrivileges().exceptUserMode().writes(0)
+      .faultRead(EL0, faultIdst)
       .faultRead(EL1, HCR_TRAP(tid3))
       .mapsTo(MISCREG_ID_ISAR2);
     InitReg(MISCREG_ID_ISAR3_EL1)
       .allPrivileges().exceptUserMode().writes(0)
+      .faultRead(EL0, faultIdst)
       .faultRead(EL1, HCR_TRAP(tid3))
       .mapsTo(MISCREG_ID_ISAR3);
     InitReg(MISCREG_ID_ISAR4_EL1)
       .allPrivileges().exceptUserMode().writes(0)
+      .faultRead(EL0, faultIdst)
       .faultRead(EL1, HCR_TRAP(tid3))
       .mapsTo(MISCREG_ID_ISAR4);
     InitReg(MISCREG_ID_ISAR5_EL1)
       .allPrivileges().exceptUserMode().writes(0)
+      .faultRead(EL0, faultIdst)
       .faultRead(EL1, HCR_TRAP(tid3))
       .mapsTo(MISCREG_ID_ISAR5);
     InitReg(MISCREG_ID_ISAR6_EL1)
       .allPrivileges().exceptUserMode().writes(0)
+      .faultRead(EL0, faultIdst)
       .faultRead(EL1, HCR_TRAP(tid3))
       .mapsTo(MISCREG_ID_ISAR6);
     InitReg(MISCREG_MVFR0_EL1)
+      .faultRead(EL0, faultIdst)
       .faultRead(EL1, HCR_TRAP(tid3))
       .allPrivileges().exceptUserMode().writes(0)
       .mapsTo(MISCREG_MVFR0);
     InitReg(MISCREG_MVFR1_EL1)
+      .faultRead(EL0, faultIdst)
       .faultRead(EL1, HCR_TRAP(tid3))
       .allPrivileges().exceptUserMode().writes(0)
       .mapsTo(MISCREG_MVFR1);
     InitReg(MISCREG_MVFR2_EL1)
+      .faultRead(EL0, faultIdst)
       .faultRead(EL1, HCR_TRAP(tid3))
       .allPrivileges().exceptUserMode().writes(0);
     InitReg(MISCREG_ID_AA64PFR0_EL1)

From 3322127793e1d7fbaa9f28df14a3f5f72fa772f6 Mon Sep 17 00:00:00 2001
From: KUNAL PAI <kunpai@ucdavis.edu>
Date: Wed, 7 Jun 2023 15:30:15 -0700
Subject: [PATCH 472/492] tests: Fix bugs related to gem5 Vision

This patch fixes refs under tests/pyunit/stdlib/resources.

Removes instances of {url_base} in refs.

Also, renames two refs: mongo_mock and mongo_dup_mock
to mongo-mock and mongo-dup-mock to follow naming
convention of other refs.

Change-Id: If115114bc7a89764e7c546b77a93d36d6a3b5f8a
Co-authored-by: Parth Shah <helloparthshah@gmail.com>
Co-authored-by: Harshil2107 <harshilp2107@gmail.com>
Co-authored-by: aarsli <arsli@ucdavis.edu>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/71378
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
---
 tests/pyunit/stdlib/resources/pyunit_client_wrapper_checks.py | 4 ++--
 .../refs/{mongo_dup_mock.json => mongo-dup-mock.json}         | 0
 .../resources/refs/{mongo_mock.json => mongo-mock.json}       | 0
 .../resources/refs/workload-checks-custom-workload.json       | 2 +-
 tests/pyunit/stdlib/resources/refs/workload-checks.json       | 4 ++--
 5 files changed, 5 insertions(+), 5 deletions(-)
 rename tests/pyunit/stdlib/resources/refs/{mongo_dup_mock.json => mongo-dup-mock.json} (100%)
 rename tests/pyunit/stdlib/resources/refs/{mongo_mock.json => mongo-mock.json} (100%)

diff --git a/tests/pyunit/stdlib/resources/pyunit_client_wrapper_checks.py b/tests/pyunit/stdlib/resources/pyunit_client_wrapper_checks.py
index 96aadf6879..344f67b8b0 100644
--- a/tests/pyunit/stdlib/resources/pyunit_client_wrapper_checks.py
+++ b/tests/pyunit/stdlib/resources/pyunit_client_wrapper_checks.py
@@ -82,12 +82,12 @@ mock_config_combined = {
 
 mock_json = {}
 
-with open(Path(__file__).parent / "refs/mongo_mock.json", "r") as f:
+with open(Path(__file__).parent / "refs/mongo-mock.json", "r") as f:
     mock_json = json.load(f)
 
 duplicate_mock_json = {}
 
-with open(Path(__file__).parent / "refs/mongo_dup_mock.json", "r") as f:
+with open(Path(__file__).parent / "refs/mongo-dup-mock.json", "r") as f:
     duplicate_mock_json = json.load(f)
 
 
diff --git a/tests/pyunit/stdlib/resources/refs/mongo_dup_mock.json b/tests/pyunit/stdlib/resources/refs/mongo-dup-mock.json
similarity index 100%
rename from tests/pyunit/stdlib/resources/refs/mongo_dup_mock.json
rename to tests/pyunit/stdlib/resources/refs/mongo-dup-mock.json
diff --git a/tests/pyunit/stdlib/resources/refs/mongo_mock.json b/tests/pyunit/stdlib/resources/refs/mongo-mock.json
similarity index 100%
rename from tests/pyunit/stdlib/resources/refs/mongo_mock.json
rename to tests/pyunit/stdlib/resources/refs/mongo-mock.json
diff --git a/tests/pyunit/stdlib/resources/refs/workload-checks-custom-workload.json b/tests/pyunit/stdlib/resources/refs/workload-checks-custom-workload.json
index 8349b92658..a7e9c9d84f 100644
--- a/tests/pyunit/stdlib/resources/refs/workload-checks-custom-workload.json
+++ b/tests/pyunit/stdlib/resources/refs/workload-checks-custom-workload.json
@@ -6,7 +6,7 @@
         "architecture": "X86",
         "is_zipped": false,
         "md5sum": "dbf120338b37153e3334603970cebd8c",
-        "url": "{url_base}/test-progs/hello/bin/x86/linux/hello64-static",
+        "url": "http://dist.gem5.org/dist/develop/test-progs/hello/bin/x86/linux/hello64-static",
         "source": "src/simple",
         "resource_version": "1.0.0",
         "gem5_versions": [
diff --git a/tests/pyunit/stdlib/resources/refs/workload-checks.json b/tests/pyunit/stdlib/resources/refs/workload-checks.json
index d19396bf8c..d41001d26c 100644
--- a/tests/pyunit/stdlib/resources/refs/workload-checks.json
+++ b/tests/pyunit/stdlib/resources/refs/workload-checks.json
@@ -6,7 +6,7 @@
         "architecture": "X86",
         "is_zipped": false,
         "md5sum": "4838c99b77d33c8307b939c16624e4ac",
-        "url": "{url_base}/kernels/x86/static/vmlinux-5.2.3",
+        "url": "http://dist.gem5.org/dist/develop/kernels/x86/static/vmlinux-5.2.3",
         "source": "src/linux-kernel",
         "resource_version": "1.0.0",
         "gem5_versions": [
@@ -20,7 +20,7 @@
         "architecture": "X86",
         "is_zipped": true,
         "md5sum": "90e363abf0ddf22eefa2c7c5c9391c49",
-        "url": "{url_base}/images/x86/ubuntu-18-04/x86-ubuntu.img.gz",
+        "url": "http://dist.gem5.org/dist/develop/images/x86/ubuntu-18-04/x86-ubuntu.img.gz",
         "source": "src/x86-ubuntu",
         "root_partition": "1",
         "resource_version": "1.0.0",

From 8219b1961a66f786588860810135a7e405bd70c8 Mon Sep 17 00:00:00 2001
From: Roger Chang <rogerycchang@google.com>
Date: Mon, 5 Jun 2023 14:32:18 +0800
Subject: [PATCH 473/492] scons: Fix grpc protobuf actions

The change will fix the proto import issue and build issue with
--no-duplicate-sources options, more details please reference:
https://gem5-review.googlesource.com/c/public/gem5/+/64491.

Change-Id: I259413f7739f89598dcd42c3f2e1e865cec3de43
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/71318
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/71338
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
---
 src/SConscript | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/SConscript b/src/SConscript
index 13f08d2f5a..d26bf49cce 100644
--- a/src/SConscript
+++ b/src/SConscript
@@ -280,9 +280,9 @@ def protoc_grpc_emitter(target, source, env):
     root, ext = os.path.splitext(source[0].get_abspath())
     return [root + '.grpc.pb.cc', root + '.grpc.pb.h'], source
 
-protoc_grpc_action=MakeAction('${PROTOC} --grpc_out ${BUILDDIR} '
-        '--plugin=protoc-gen-grpc=${PROTOC_GRPC} --proto_path ${BUILDDIR} '
-        '${SOURCE.get_abspath()}',
+protoc_grpc_action=MakeAction('${PROTOC} --grpc_out ${TARGET.dir.abspath} '
+        '--plugin=protoc-gen-grpc=${PROTOC_GRPC} '
+        '--proto_path ${SOURCE.dir.abspath} ${SOURCE.abspath}',
         Transform("PROTOC"))
 
 env.Append(BUILDERS={'GrpcProtoBufCC' : Builder(

From d33c41118b1636b55a5aadb34e5724c9be673896 Mon Sep 17 00:00:00 2001
From: Xuan Hu <huxuan@bosc.ac.cn>
Date: Tue, 17 Jan 2023 21:08:57 +0800
Subject: [PATCH 474/492] arch-riscv,cpu-minor: Add MinorDefaultVecFU for
 risc-v v-ext

Change-Id: Id5c5ae5fa1901154cadeb0a4958703f3f15d491f
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67295
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/71398
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
---
 src/cpu/minor/BaseMinorCPU.py | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/src/cpu/minor/BaseMinorCPU.py b/src/cpu/minor/BaseMinorCPU.py
index bd27b92540..c20a310447 100644
--- a/src/cpu/minor/BaseMinorCPU.py
+++ b/src/cpu/minor/BaseMinorCPU.py
@@ -250,6 +250,33 @@ class MinorDefaultMiscFU(MinorFU):
     opLat = 1
 
 
+class MinorDefaultVecFU(MinorFU):
+    opClasses = minorMakeOpClassSet(
+        [
+            "VectorUnitStrideLoad",
+            "VectorUnitStrideStore",
+            "VectorUnitStrideMaskLoad",
+            "VectorUnitStrideMaskStore",
+            "VectorStridedLoad",
+            "VectorStridedStore",
+            "VectorIndexedLoad",
+            "VectorIndexedStore",
+            "VectorUnitStrideFaultOnlyFirstLoad",
+            "VectorWholeRegisterLoad",
+            "VectorWholeRegisterStore",
+            "VectorIntegerArith",
+            "VectorFloatArith",
+            "VectorFloatConvert",
+            "VectorIntegerReduce",
+            "VectorFloatReduce",
+            "VectorMisc",
+            "VectorIntegerExtension",
+            "VectorConfig",
+        ]
+    )
+    opLat = 1
+
+
 class MinorDefaultFUPool(MinorFUPool):
     funcUnits = [
         MinorDefaultIntFU(),
@@ -260,6 +287,7 @@ class MinorDefaultFUPool(MinorFUPool):
         MinorDefaultPredFU(),
         MinorDefaultMemFU(),
         MinorDefaultMiscFU(),
+        MinorDefaultVecFU(),
     ]
 
 

From 1ae409d627f8dc1945bb28ece10747f4744b1b9c Mon Sep 17 00:00:00 2001
From: Roger Chang <rogerycchang@google.com>
Date: Sat, 3 Jun 2023 10:30:38 +0800
Subject: [PATCH 475/492] stdlib: Add U74VecFU to U74CPU

This change is to elimilate the warning message from U74CPU.

Change-Id: I7a5d0cd0b2955e54ed14fc1ac6f7127bd7f0604b
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/71238
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/71399
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
---
 src/python/gem5/prebuilt/riscvmatched/riscvmatched_core.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/python/gem5/prebuilt/riscvmatched/riscvmatched_core.py b/src/python/gem5/prebuilt/riscvmatched/riscvmatched_core.py
index 48291bf670..4b8d2c1d32 100644
--- a/src/python/gem5/prebuilt/riscvmatched/riscvmatched_core.py
+++ b/src/python/gem5/prebuilt/riscvmatched/riscvmatched_core.py
@@ -75,6 +75,10 @@ class U74MiscFU(MinorDefaultMiscFU):
     pass
 
 
+class U74VecFU(MinorDefaultVecFU):
+    pass
+
+
 class U74FUPool(MinorFUPool):
     funcUnits = [
         U74IntFU(),
@@ -86,6 +90,7 @@ class U74FUPool(MinorFUPool):
         U74MemReadFU(),
         U74MemWriteFU(),
         U74MiscFU(),
+        U74VecFU(),
     ]
 
 

From f78471fb81db40c836b89a015a61b8311b4b98fd Mon Sep 17 00:00:00 2001
From: Mahyar Samani <msamani@ucdavis.edu>
Date: Thu, 8 Jun 2023 14:56:50 -0700
Subject: [PATCH 476/492] tests: Reducing json stat dump size.

This change reduces the number of stats dumped as json in
traffic_gen tests.

Change-Id: I94becb2e6d5da6096271cf7893ff2b380314da06
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/71402
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 tests/gem5/traffic_gen/simple_traffic_run.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tests/gem5/traffic_gen/simple_traffic_run.py b/tests/gem5/traffic_gen/simple_traffic_run.py
index 7c0f18865a..3766d7314f 100644
--- a/tests/gem5/traffic_gen/simple_traffic_run.py
+++ b/tests/gem5/traffic_gen/simple_traffic_run.py
@@ -209,7 +209,9 @@ print("Beginning simulation!")
 exit_event = m5.simulate()
 print(f"Exiting @ tick {m5.curTick()} because {exit_event.getCause()}.")
 
-simstats = get_simstat(root, prepare_stats=True)
+simstats = get_simstat(
+    [core.generator for core in generator.get_cores()], prepare_stats=True
+)
 json_output = Path(m5.options.outdir) / "output.json"
 with open(json_output, "w") as stats_file:
     simstats.dump(stats_file, indent=2)

From d89ba08eb031dff1704261441a4eb21104ca7694 Mon Sep 17 00:00:00 2001
From: Jason Lowe-Power <jason@lowepower.com>
Date: Mon, 12 Jun 2023 15:31:04 -0700
Subject: [PATCH 477/492] python: Ignore -s as gem5 option

This enables more compatibility with the normal python binary. This is
needed to get multiprocessing to work on some systems.

Change-Id: Ibb946136d153979bf54a773060010a0ae479a9d1
Signed-off-by: Jason Lowe-Power <jason@lowepower.com>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/71518
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/python/m5/main.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/src/python/m5/main.py b/src/python/m5/main.py
index a68279b633..4701dfa5ea 100644
--- a/src/python/m5/main.py
+++ b/src/python/m5/main.py
@@ -193,6 +193,13 @@ def parse_options():
         callback=collect_args,
     )
 
+    option(
+        "-s",
+        action="store_true",
+        help="IGNORED, only for compatibility with python. don't"
+        "add user site directory to sys.path; also PYTHONNOUSERSITE",
+    )
+
     # Statistics options
     group("Statistics Options")
     option(

From f4559a703f16d98c9142bba6c51bc9d85ad684ec Mon Sep 17 00:00:00 2001
From: Ayaz Akram <yazakram@ucdavis.edu>
Date: Wed, 14 Jun 2023 11:03:33 -0700
Subject: [PATCH 478/492] configs: Fix SPEC benchmarks example scripts

This small change fixes the gem5_library example
scripts for SPEC benchmarks to make them compatible
with the latest version of the std library.

Change-Id: I3da9745f0ee6b253871e32082e135e0fa4040108
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/71718
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
---
 configs/example/gem5_library/x86-spec-cpu2006-benchmarks.py | 2 +-
 configs/example/gem5_library/x86-spec-cpu2017-benchmarks.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/configs/example/gem5_library/x86-spec-cpu2006-benchmarks.py b/configs/example/gem5_library/x86-spec-cpu2006-benchmarks.py
index 60d93ebe77..10d5da0adb 100644
--- a/configs/example/gem5_library/x86-spec-cpu2006-benchmarks.py
+++ b/configs/example/gem5_library/x86-spec-cpu2006-benchmarks.py
@@ -262,7 +262,7 @@ board.set_kernel_disk_workload(
     kernel=Resource("x86-linux-kernel-4.19.83"),
     # The location of the x86 SPEC CPU 2017 image
     disk_image=CustomDiskImageResource(
-        args.image, disk_root_partition=args.partition
+        args.image, root_partition=args.partition
     ),
     readfile_contents=command,
 )
diff --git a/configs/example/gem5_library/x86-spec-cpu2017-benchmarks.py b/configs/example/gem5_library/x86-spec-cpu2017-benchmarks.py
index c491c9bce9..cb5f5d19e3 100644
--- a/configs/example/gem5_library/x86-spec-cpu2017-benchmarks.py
+++ b/configs/example/gem5_library/x86-spec-cpu2017-benchmarks.py
@@ -278,7 +278,7 @@ board.set_kernel_disk_workload(
     kernel=Resource("x86-linux-kernel-4.19.83"),
     # The location of the x86 SPEC CPU 2017 image
     disk_image=CustomDiskImageResource(
-        args.image, disk_root_partition=args.partition
+        args.image, root_partition=args.partition
     ),
     readfile_contents=command,
 )

From 328aaa626fbbc3d664086b6dd4bde11bd71f9d5d Mon Sep 17 00:00:00 2001
From: Roger Chang <rogerycchang@google.com>
Date: Tue, 13 Jun 2023 17:25:47 +0800
Subject: [PATCH 479/492] arch-riscv: Fix unexpected behavior of float
 operations in Mac OS

The uint_fast16_t is the integer at least 16 bits size, it can be
32, 64 bits and more. Usually most of the simulations are in the
x86-64 linux host, the size of uint_fast16_t is 64 bits. Therefore,
there is no problem for double precision float operations and it can
pass FloatMM test. However, in the Mac OS, the size of uint_fast16_t
is 16 bits, it will lose the upper bits when converting float
register bits to freg_t and it will generate unexpected results for
FloatMM test.

The change can guarantee that the size of data in freg_t is at least
64 bits and it will not lose any data from floating point to freg_t.

Reference:
https://developer.apple.com/documentation/kernel/uint_fast16_t

https://codebrowser.dev/glibc/glibc/stdlib/stdint.h.html

Change-Id: I3df6610f0903cdee0f56584d6cbdb51ac26c86c8
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/71519
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
---
 src/arch/riscv/regs/float.hh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/arch/riscv/regs/float.hh b/src/arch/riscv/regs/float.hh
index 1654bdb627..4809372070 100644
--- a/src/arch/riscv/regs/float.hh
+++ b/src/arch/riscv/regs/float.hh
@@ -105,7 +105,7 @@ static constexpr float64_t f64(freg_t r) { return r; }
 static constexpr freg_t freg(float16_t f) { return {boxF16(f.v)}; }
 static constexpr freg_t freg(float32_t f) { return {boxF32(f.v)}; }
 static constexpr freg_t freg(float64_t f) { return f; }
-static constexpr freg_t freg(uint_fast16_t f) { return {f}; }
+static constexpr freg_t freg(uint_fast64_t f) { return {f}; }
 
 namespace float_reg
 {

From 82587ce71bbbdc80d3ef6386e07c892f309697a3 Mon Sep 17 00:00:00 2001
From: "Bobby R. Bruce" <bbruce@ucdavis.edu>
Date: Mon, 12 Jun 2023 14:09:15 -0700
Subject: [PATCH 480/492] stdlib: Refactor gem5 Vision/gem5-resources code
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch includes several changes to the gem5 tools interface to the
gem5-resources infrastructure. These are:

* The old download and JSON query functions have been removed from the
  downloader module. These functions were used for directly downloading
  and inspecting the resource JSON file, hosted at
  https://resources.gem5.org/resources. This information is now obtained
  via `gem5.client`. If a resources JSON file is specified as a client,
  it should conform to the new schema:
  https//resources.gem5.org/gem5-resources-schema.json. The old schema
  (pre-v23.0) is no longer valid. Tests have been updated to reflect
  this change. Those which tested these old functions have been removed.
* Unused imports have been removed.
* For the resource query functions, and those tasked with obtaining the
  resources, the parameter `gem5_version` has been added. In all cases
  it does the same thing:
    * It will filter results based on compatibility to the
      `gem5_version` specified. If no resources are compatible the
      latest version of that resource is chosen (though a warning is
      thrown).
    * By default it is set to the current gem5 version.
    * It is optional. If `None`, this filtering functionality is not
      carried out.
    * Tests have been updated to fix the version to “develop” so the
      they do not break between versions.
* The `gem5_version` parameters will filter using a logic which will
  base compatibility on the specificity of the gem5-version specified in
  a resource’s data. If a resource has a compatible gem5-version of
  “v18.4” it will be compatible with any minor/hotfix version within the
  v18.4 release (this can be seen as matching on “v18.4.*.*”.) Likewise,
  if a resource has a compatible gem5-version of “v18.4.1” then it’s
  only compatible with the v18.4.1 release but any of it’s hot fix
  releases (“v18.4.1.*”).
* The ‘list_resources’ function has been updated to use the
  “gem5.client” APIs to get resource information from the clients
  (MongoDB or a JSON file). This has been designed to remain backwards
  compatible to as much as is possible, though, due to schema changes,
  the function does search across all versions of gem5.
* `get_resources` function was added to the `AbstractClient`. This is a
   more general function than `get_resource_by_id`. It was
  primarily created to handle the `list_resources` update but is a
  useful update to the API. The `get_resource_by_id` function has been
  altered to function as a wrapped to the `get_resources` function.
* Removed “GEM5_RESOURCE_JSON” code has been removed. This is no longer
  used.
* Tests have been cleaned up a little bit to be easier to read.
* Some docstrings have been updated.

Things that are left TODO with this code:

* The client_wrapper/client/abstract_client abstractions are rather
  pointless. In particular the client_wrapper and client classes could
  be merged.
* The downloader module no longer does much and should have its
  functions merged into other modules.
* With the addition of the `get_resources` function, much of the code in
  the `AbstractClient` could be simplified.

Change-Id: I0ce48e88b93a2b9db53d4749861fa0b5f9472053
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/71506
Reviewed-by: Kunal Pai <kunpai@ucdavis.edu>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/python/gem5/resources/client.py           |  52 +++-
 .../resources/client_api/abstract_client.py   |  56 +++-
 .../gem5/resources/client_api/atlasclient.py  |  22 +-
 .../resources/client_api/client_wrapper.py    |  81 +++++-
 .../gem5/resources/client_api/jsonclient.py   |  38 ++-
 src/python/gem5/resources/downloader.py       | 271 ++----------------
 src/python/gem5/resources/resource.py         |  18 +-
 src/python/gem5/resources/workload.py         |  12 +-
 tests/gem5/configs/download_check.py          |  66 +++--
 .../resources/pyunit_client_wrapper_checks.py | 109 +++----
 ...checks.py => pyunit_json_client_checks.py} |  37 +--
 .../pyunit_obtain_resources_check.py          |  47 +--
 .../pyunit_resource_download_checks.py        |  72 -----
 .../pyunit_resource_specialization.py         |  29 +-
 .../resources/pyunit_workload_checks.py       |  48 +---
 .../stdlib/resources/refs/mongo-mock.json     |   4 +-
 .../resources/refs/obtain-resource.json       |   8 +-
 .../refs/resource-specialization.json         |  18 +-
 .../stdlib/resources/refs/resources.json      |  24 +-
 .../refs/workload-checks-custom-workload.json |  16 --
 .../resources/refs/workload-checks.json       |  20 +-
 21 files changed, 445 insertions(+), 603 deletions(-)
 rename tests/pyunit/stdlib/resources/{pyunit_downloader_checks.py => pyunit_json_client_checks.py} (87%)
 delete mode 100644 tests/pyunit/stdlib/resources/pyunit_resource_download_checks.py
 delete mode 100644 tests/pyunit/stdlib/resources/refs/workload-checks-custom-workload.json

diff --git a/src/python/gem5/resources/client.py b/src/python/gem5/resources/client.py
index bd473eb038..ab8262bf92 100644
--- a/src/python/gem5/resources/client.py
+++ b/src/python/gem5/resources/client.py
@@ -31,6 +31,7 @@ from typing import Optional, Dict, List
 from .client_api.client_wrapper import ClientWrapper
 from gem5.gem5_default_config import config
 from m5.util import inform
+from _m5 import core
 
 
 def getFileContent(file_path: Path) -> Dict:
@@ -49,17 +50,7 @@ def getFileContent(file_path: Path) -> Dict:
 clientwrapper = None
 
 
-def get_resource_json_obj(
-    resource_id,
-    resource_version: Optional[str] = None,
-    clients: Optional[List[str]] = None,
-) -> Dict:
-    """
-    Get the resource json object from the clients wrapper
-    :param resource_id: The resource id
-    :param resource_version: The resource version
-    :param clients: The list of clients to query
-    """
+def _get_clientwrapper():
     global clientwrapper
     if clientwrapper is None:
         # First check if the config file path is provided in the environment variable
@@ -78,7 +69,42 @@ def get_resource_json_obj(
             gem5_config = config
             inform("Using default config")
         clientwrapper = ClientWrapper(gem5_config)
+    return clientwrapper
 
-    return clientwrapper.get_resource_json_obj_from_client(
-        resource_id, resource_version, clients
+
+def list_resources(
+    clients: Optional[List[str]] = None,
+    gem5_version: Optional[str] = core.gem5Version,
+) -> Dict[str, List[str]]:
+    """
+    List all the resources available
+
+    :param clients: The list of clients to query
+    :param gem5_version: The gem5 version of the resource to get. By default,
+    it is the gem5 version of the current build. If set to none, it will return
+    all gem5 versions of the resource.
+    :return: A Python Dict where the key is the resource id and the value is
+    a list of all the supported resource versions.
+    """
+    return _get_clientwrapper().list_resources(clients, gem5_version)
+
+
+def get_resource_json_obj(
+    resource_id,
+    resource_version: Optional[str] = None,
+    clients: Optional[List[str]] = None,
+    gem5_version: Optional[str] = core.gem5Version,
+) -> Dict:
+    """
+    Get the resource json object from the clients wrapper
+    :param resource_id: The resource id
+    :param resource_version: The resource version
+    :param clients: The list of clients to query
+    :param gem5_version: The gem5 versions to filter the resources based on
+    compatibility. By default, it is the gem5 version of the current build.
+    If None, filtering based on compatibility is not performed.
+    """
+
+    return _get_clientwrapper().get_resource_json_obj_from_client(
+        resource_id, resource_version, clients, gem5_version
     )
diff --git a/src/python/gem5/resources/client_api/abstract_client.py b/src/python/gem5/resources/client_api/abstract_client.py
index 74a513fc56..7f8ad6166e 100644
--- a/src/python/gem5/resources/client_api/abstract_client.py
+++ b/src/python/gem5/resources/client_api/abstract_client.py
@@ -25,7 +25,7 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 from abc import ABC, abstractmethod
-from typing import Any, Dict, List
+from typing import Any, Dict, List, Optional
 import urllib.parse
 
 
@@ -63,9 +63,61 @@ class AbstractClient(ABC):
             return False
 
     @abstractmethod
+    def get_resources(
+        self,
+        resource_id: Optional[str] = None,
+        resource_version: Optional[str] = None,
+        gem5_version: Optional[str] = None,
+    ) -> List[Dict[str, Any]]:
+        """
+        :param resource_id: The ID of the Resource. Optional, if not set, all
+        resources will be returned.
+        :param resource_version: The version of the Resource. Optional, if
+        not set, all resource versions will be returned. Note: If `resource_id`
+        is not set, this parameter will be ignored.
+        :param gem5_version: The version of gem5. Optional, if not set, all
+        versions will be returned.
+        :return: A list of all the Resources with the given ID.
+        """
+        raise NotImplementedError
+
+    def filter_incompatible_resources(
+        self,
+        resources_to_filter: List[Dict[str, Any]],
+        gem5_version: Optional[str] = None,
+    ) -> List[Dict[str, Any]]:
+        """Returns a filtered list resources based on gem5 version
+        compatibility.
+
+        Note: This function assumes if the minor component of
+        a resource's gem5_version is not specified, the resource is compatible
+        with all minor versions of the same major version.
+        Likewise, if no hot-fix component is specified, it is assumed that
+        the resource is compatible with all hot-fix versions of the same
+        minor version.
+
+        * '20.1' would be compatible with gem5 '20.1.1.0' and '20.1.2.0'.
+        * '21.5.2' would be compatible with gem5 '21.5.2.0' and '21.5.2.0'.
+        * '22.3.2.4' would only be compatible with gem5 '22.3.2.4'.
+
+        :param resources_to_filter: The list of resources to filter.
+        :param gem5_version: The gem5 version in which the filtered resources
+        should be compatible. If None, no filtering will be done.
+        :
+        """
+        if not gem5_version:
+            return resources_to_filter
+
+        filtered_resources = []
+        for resource in resources_to_filter:
+            for version in resource["gem5_versions"]:
+                if gem5_version.startswith(version):
+                    filtered_resources.append(resource)
+        return filtered_resources
+
     def get_resources_by_id(self, resource_id: str) -> List[Dict[str, Any]]:
         """
         :param resource_id: The ID of the Resource.
         :return: A list of all the Resources with the given ID.
         """
-        raise NotImplementedError
+        return self.get_resources(resource_id=resource_id)
diff --git a/src/python/gem5/resources/client_api/atlasclient.py b/src/python/gem5/resources/client_api/atlasclient.py
index 4a6e5cf691..7d2a27c3f7 100644
--- a/src/python/gem5/resources/client_api/atlasclient.py
+++ b/src/python/gem5/resources/client_api/atlasclient.py
@@ -64,14 +64,26 @@ class AtlasClient(AbstractClient):
         token = result["access_token"]
         return token
 
-    def get_resources_by_id(self, resource_id: str) -> List[Dict[str, Any]]:
+    def get_resources(
+        self,
+        resource_id: Optional[str] = None,
+        resource_version: Optional[str] = None,
+        gem5_version: Optional[str] = None,
+    ) -> List[Dict[str, Any]]:
         url = f"{self.url}/action/find"
         data = {
             "dataSource": self.dataSource,
             "collection": self.collection,
             "database": self.database,
-            "filter": {"id": resource_id},
         }
+        filter = {}
+        if resource_id:
+            filter["id"] = resource_id
+            if resource_version is not None:
+                filter["resource_version"] = resource_version
+
+        if filter:
+            data["filter"] = filter
         data = json.dumps(data).encode("utf-8")
 
         headers = {
@@ -88,4 +100,8 @@ class AtlasClient(AbstractClient):
         result = json.loads(response.read().decode("utf-8"))
         resources = result["documents"]
 
-        return resources
+        # I do this as a lazy post-processing step because I can't figure out
+        # how to do this via an Atlas query, which may be more efficient.
+        return self.filter_incompatible_resources(
+            resources_to_filter=resources, gem5_version=gem5_version
+        )
diff --git a/src/python/gem5/resources/client_api/client_wrapper.py b/src/python/gem5/resources/client_api/client_wrapper.py
index 74ee831c1f..ebf1b8ec2b 100644
--- a/src/python/gem5/resources/client_api/client_wrapper.py
+++ b/src/python/gem5/resources/client_api/client_wrapper.py
@@ -59,6 +59,38 @@ class ClientWrapper:
                 warn(f"Error creating client {client}: {str(e)}")
         return clients
 
+    def list_resources(
+        self,
+        clients: Optional[List[str]] = None,
+        gem5_version: Optional[str] = core.gem5Version,
+    ) -> Dict[str, List[str]]:
+
+        clients_to_search = (
+            list(self.clients.keys()) if clients is None else clients
+        )
+        # There's some duplications of functionality here (similar code in
+        # `get_all_resources_by_id`. This code could be refactored to avoid
+        # this).
+        resources = []
+        for client in clients_to_search:
+            if client not in self.clients:
+                raise Exception(f"Client: {client} does not exist")
+            try:
+                resources.extend(
+                    self.clients[client].get_resources(
+                        gem5_version=gem5_version
+                    )
+                )
+            except Exception as e:
+                warn(f"Error getting resources from client {client}: {str(e)}")
+
+        to_return = {}
+        for resource in resources:
+            if resource["id"] not in to_return:
+                to_return[resource["id"]] = []
+            to_return[resource["id"]].append(resource["resource_version"])
+        return to_return
+
     def get_all_resources_by_id(
         self,
         resource_id: str,
@@ -98,6 +130,7 @@ class ClientWrapper:
         resource_id: str,
         resource_version: Optional[str] = None,
         clients: Optional[List[str]] = None,
+        gem5_version: Optional[str] = core.gem5Version,
     ) -> Dict:
         """
         This function returns the resource object from the client with the
@@ -106,6 +139,9 @@ class ClientWrapper:
         :param resource_version: The version of the resource to search for.
         :param clients: A list of clients to search through. If None, all
         clients are searched.
+        :param gem5_version: The gem5 version to check compatibility with. If
+        None, no compatibility check is performed. By default, is the current
+        version of gem5.
         :return: The resource object as a Python dictionary if found.
         If not found, exception is thrown.
         """
@@ -124,7 +160,9 @@ class ClientWrapper:
 
         else:
             compatible_resources = (
-                self._get_resources_compatible_with_gem5_version(resources)
+                self._get_resources_compatible_with_gem5_version(
+                    resources, gem5_version=gem5_version
+                )
             )
             if len(compatible_resources) == 0:
                 resource_to_return = self._sort_resources(resources)[0]
@@ -133,7 +171,10 @@ class ClientWrapper:
                     compatible_resources
                 )[0]
 
-        self._check_resource_version_compatibility(resource_to_return)
+        if gem5_version:
+            self._check_resource_version_compatibility(
+                resource_to_return, gem5_version=gem5_version
+            )
 
         return resource_to_return
 
@@ -172,16 +213,31 @@ class ClientWrapper:
     ) -> List:
         """
         Returns a list of compatible resources with the current gem5 version.
+
+        Note: This function assumes if the minor component of
+        a resource's gem5_version is not specified, it that the
+        resource is compatible all minor versions of the same major version.
+        Likewise, if no hot-fix component is specified, it is assumed that
+        the resource is compatible with all hot-fix versions of the same
+        minor version.
+
+        * '20.1' would be compatible with gem5 '20.1.1.0' and '20.1.2.0'.
+        * '21.5.2' would be compatible with gem5 '21.5.2.0' and '21.5.2.0'.
+        * '22.3.2.4' would only be compatible with gem5 '22.3.2.4'.
+
         :param resources: A list of resources to filter.
         :return: A list of compatible resources as Python dictionaries.
-        If no compatible resources are found, the original list of resources
-        is returned.
+
+        **Note**: This is a big duplication of code. This functionality already
+        exists in the `AbstractClient` class. This code should be refactored
+        to avoid this duplication.
         """
-        compatible_resources = [
-            resource
-            for resource in resources
-            if gem5_version in resource["gem5_versions"]
-        ]
+
+        compatible_resources = []
+        for resource in resources:
+            for version in resource["gem5_versions"]:
+                if gem5_version.startswith(version):
+                    compatible_resources.append(resource)
         return compatible_resources
 
     def _sort_resources(self, resources: List) -> List:
@@ -213,7 +269,12 @@ class ClientWrapper:
         """
         if not resource:
             return False
-        if gem5_version not in resource["gem5_versions"]:
+        if (
+            gem5_version
+            and not self._get_resources_compatible_with_gem5_version(
+                [resource], gem5_version=gem5_version
+            )
+        ):
             warn(
                 f"Resource {resource['id']} with version "
                 f"{resource['resource_version']} is not known to be compatible"
diff --git a/src/python/gem5/resources/client_api/jsonclient.py b/src/python/gem5/resources/client_api/jsonclient.py
index 225126e2a8..9e837131b0 100644
--- a/src/python/gem5/resources/client_api/jsonclient.py
+++ b/src/python/gem5/resources/client_api/jsonclient.py
@@ -58,13 +58,31 @@ class JSONClient(AbstractClient):
                 )
             self.resources = json.loads(response.read().decode("utf-8"))
 
-    def get_resources_by_id(self, resource_id: str) -> List[Dict[str, Any]]:
-        """
-        :param resource_id: The ID of the Resource.
-        :return: A list of all the Resources with the given ID.
-        """
-        return [
-            resource
-            for resource in self.resources
-            if resource["id"] == resource_id
-        ]
+    def get_resources_json(self) -> List[Dict[str, Any]]:
+        """Returns a JSON representation of the resources."""
+        return self.resources
+
+    def get_resources(
+        self,
+        resource_id: Optional[str] = None,
+        resource_version: Optional[str] = None,
+        gem5_version: Optional[str] = None,
+    ) -> List[Dict[str, Any]]:
+        filter = self.resources  # Unfiltered.
+        if resource_id:
+            filter = [  # Filter by resource_id.
+                resource
+                for resource in filter
+                if resource["id"] == resource_id
+            ]
+            if resource_version:
+                filter = [  # Filter by resource_version.
+                    resource
+                    for resource in filter
+                    if resource["resource_version"] == resource_version
+                ]
+
+        # Filter by gem5_version.
+        return self.filter_incompatible_resources(
+            resources_to_filter=filter, gem5_version=gem5_version
+        )
diff --git a/src/python/gem5/resources/downloader.py b/src/python/gem5/resources/downloader.py
index 0781d9b15a..bb5ca84cc0 100644
--- a/src/python/gem5/resources/downloader.py
+++ b/src/python/gem5/resources/downloader.py
@@ -24,24 +24,24 @@
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-import json
 import urllib.request
 import urllib.parse
-import hashlib
 import os
 import shutil
 import gzip
-import hashlib
-import base64
 import time
 import random
 from pathlib import Path
 import tarfile
-from tempfile import gettempdir
 from urllib.error import HTTPError
-from typing import List, Dict, Set, Optional
+from typing import List, Optional, Dict
 
-from .client import get_resource_json_obj
+from _m5 import core
+
+from .client import (
+    get_resource_json_obj,
+    list_resources as client_list_resources,
+)
 from .md5_utils import md5_file, md5_dir
 from ..utils.progress_bar import tqdm, progress_hook
 
@@ -53,188 +53,6 @@ information about resources from resources.gem5.org.
 """
 
 
-def _resources_json_version_required() -> str:
-    """
-    Specifies the version of resources.json to obtain.
-    """
-    return "develop"
-
-
-def _get_resources_json_uri() -> str:
-    return "https://resources.gem5.org/resources.json"
-
-
-def _url_validator(url):
-    try:
-        result = urllib.parse.urlparse(url)
-        return all([result.scheme, result.netloc, result.path])
-    except:
-        return False
-
-
-def _get_resources_json_at_path(path: str, use_caching: bool = True) -> Dict:
-    """
-    Returns a resource JSON, in the form of a Python Dict. The location
-    of the JSON must be specified.
-
-    If `use_caching` is True, and a URL is passed, a copy of the JSON will be
-    cached locally, and used for up to an hour after retrieval.
-
-    :param path: The URL or local path of the JSON file.
-    :param use_caching: True if a cached file is to be used (up to an hour),
-    otherwise the file will be retrieved from the URL regardless. True by
-    default. Only valid in cases where a URL is passed.
-    """
-
-    # If a local valid path is passed, just load it.
-    if Path(path).is_file():
-        return json.load(open(path))
-
-    # If it's not a local path, it should be a URL. We check this here and
-    # raise an Exception if it's not.
-    if not _url_validator(path):
-        raise Exception(
-            f"Resources location '{path}' is not a valid path or URL."
-        )
-
-    download_path = os.path.join(
-        gettempdir(),
-        f"gem5-resources-{hashlib.md5(path.encode()).hexdigest()}"
-        f"-{str(os.getuid())}.json",
-    )
-
-    # We apply a lock on the resources file for when it's downloaded, or
-    # re-downloaded, and read. This stops a corner-case from occuring where
-    # the file is re-downloaded while being read by another gem5 thread.
-    # Note the timeout is 120 so the `_download` function is given time to run
-    # its Truncated Exponential Backoff algorithm
-    # (maximum of roughly 1 minute). Typically this code will run quickly.
-    with FileLock(f"{download_path}.lock", timeout=120):
-
-        # The resources.json file can change at any time, but to avoid
-        # excessive retrieval we cache a version locally and use it for up to
-        # an hour before obtaining a fresh copy.
-        #
-        # `time.time()` and `os.path.getmtime(..)` both return an unix epoch
-        # time in seconds. Therefore, the value of "3600" here represents an
-        # hour difference between the two values. `time.time()` gets the
-        # current time, and `os.path.getmtime(<file>)` gets the modification
-        # time of the file. This is the most portable solution as other ideas,
-        # like "file creation time", are  not always the same concept between
-        # operating systems.
-        if (
-            not use_caching
-            or not os.path.exists(download_path)
-            or (time.time() - os.path.getmtime(download_path)) > 3600
-        ):
-            _download(path, download_path)
-
-    with open(download_path) as f:
-        file_contents = f.read()
-
-    try:
-        to_return = json.loads(file_contents)
-    except json.JSONDecodeError:
-        # This is a bit of a hack. If the URL specified exists in a Google
-        # Source repo (which is the case when on the gem5 develop branch) we
-        # retrieve the JSON in base64 format. This cannot be loaded directly as
-        # text. Conversion is therefore needed.
-        to_return = json.loads(base64.b64decode(file_contents).decode("utf-8"))
-
-    return to_return
-
-
-def _get_resources_json() -> Dict:
-    """
-    Gets the Resources JSON.
-
-    :returns: The Resources JSON (as a Python Dictionary).
-    """
-
-    path = os.getenv("GEM5_RESOURCE_JSON", _get_resources_json_uri())
-    to_return = _get_resources_json_at_path(path=path)
-
-    # If the current version pulled is not correct, look up the
-    # "previous-versions" field to find the correct one.
-    # If the resource JSON file does not have a "version" field or it's
-    # null/None, then we will use this resource JSON file (this is usefull for
-    # testing purposes).
-    version = _resources_json_version_required()
-    json_version = None if "version" not in to_return else to_return["version"]
-
-    if json_version and json_version != version:
-        if version in to_return["previous-versions"].keys():
-            to_return = _get_resources_json_at_path(
-                path=to_return["previous-versions"][version]
-            )
-        else:
-            # This should never happen, but we thrown an exception to explain
-            # that we can't find the version.
-            raise Exception(
-                f"Version '{version}' of resources.json cannot be found."
-            )
-
-    return to_return
-
-
-def _get_url_base() -> str:
-    """
-    Obtains the "url_base" string from the resources.json file.
-
-    :returns: The "url_base" string value from the resources.json file.
-    """
-    json = _get_resources_json()
-    if "url_base" in json.keys():
-        return json["url_base"]
-    return ""
-
-
-def _get_resources(
-    valid_types: Set[str], resources_group: Optional[Dict] = None
-) -> Dict[str, Dict]:
-    """
-    A recursive function to get all the workload/resource of the specified type
-    in the resources.json file.
-
-    :param valid_types: The type to return (i.e., "resource" or "workload).
-    :param resource_group: Used for recursion: The current resource group being
-    iterated through.
-
-    :returns: A dictionary of artifact names to the resource JSON objects.
-    """
-
-    if resources_group is None:
-        resources_group = _get_resources_json()["resources"]
-
-    to_return = {}
-    for resource in resources_group:
-        if resource["type"] in valid_types:
-            # If the type is valid then we add it directly to the map
-            # after a check that the name is unique.
-            if resource["name"] in to_return.keys():
-                raise Exception(
-                    f"Error: Duplicate resource with name '{resource['name']}'."
-                )
-            to_return[resource["name"]] = resource
-        elif resource["type"] == "group":
-            # If it's a group we get recursive. We then check to see if there
-            # are any duplication of keys.
-            new_map = _get_resources(
-                valid_types=valid_types, resources_group=resource["contents"]
-            )
-            intersection = set(new_map.keys()).intersection(to_return.keys())
-            if len(intersection) > 0:
-                # Note: if this error is received it's likely an error with
-                # the resources.json file. The resources names need to be
-                # unique keyes.
-                raise Exception(
-                    f"Error: Duplicate resources with names: {str(intersection)}."
-                )
-            to_return.update(new_map)
-
-    return to_return
-
-
 def _download(url: str, download_to: str, max_attempts: int = 6) -> None:
     """
     Downloads a file.
@@ -336,61 +154,26 @@ def _download(url: str, download_to: str, max_attempts: int = 6) -> None:
             )
 
 
-def list_resources() -> List[str]:
+def list_resources(
+    clients: Optional[List] = None, gem5_version: Optional[str] = None
+) -> Dict[str, List[str]]:
     """
-    Lists all available resources by name.
+    Lists all available resources. Returns a dictionary where the key is the
+    id of the resources and the value is a list of that resource's versions.
+
+    :param clients: A list of clients to use when listing resources. If None,
+    all clients will be used. None by default.
+
+    :param gem5_version: The gem5 version to which all resources should be
+    compatible with. If None, compatibility of resources is not considered and
+    all resources will be returned.
+
+    **Note**: This function is here for legacy reasons. The `list_resources`
+    function was originally stored here. In order to remain backwards
+    compatible, this function will call the `client_list_resources` function
 
-    :returns: A list of resources by name.
     """
-    from .resource import _get_resource_json_type_map
-
-    return _get_resources(
-        valid_types=_get_resource_json_type_map.keys()
-    ).keys()
-
-
-def get_workload_json_obj(workload_name: str) -> Dict:
-    """
-    Get a JSON object of a specified workload.
-
-    :param workload_name: The name of the workload.
-
-    :raises Exception: An exception is raised if the specified workload does
-    not exit.
-    """
-    workload_map = _get_resources(valid_types={"workload"})
-
-    if workload_name not in workload_map:
-        raise Exception(
-            f"Error: Workload with name {workload_name} does not exist"
-        )
-
-    return workload_map[workload_name]
-
-
-def get_resources_json_obj(resource_name: str) -> Dict:
-    """
-    Get a JSON object of a specified resource.
-
-    :param resource_name: The name of the resource.
-
-    :returns: The JSON object (in the form of a dictionary).
-
-    :raises Exception: An exception is raised if the specified resources does
-    not exist.
-    """
-    from .resource import _get_resource_json_type_map
-
-    resource_map = _get_resources(
-        valid_types=_get_resource_json_type_map.keys()
-    )
-
-    if resource_name not in resource_map:
-        raise Exception(
-            f"Error: Resource with name '{resource_name}' does not exist"
-        )
-
-    return resource_map[resource_name]
+    return client_list_resources(clients=clients, gem5_version=gem5_version)
 
 
 def get_resource(
@@ -401,6 +184,7 @@ def get_resource(
     download_md5_mismatch: bool = True,
     resource_version: Optional[str] = None,
     clients: Optional[List] = None,
+    gem5_version: Optional[str] = core.gem5Version,
 ) -> None:
     """
     Obtains a gem5 resource and stored it to a specified location. If the
@@ -429,6 +213,10 @@ def get_resource(
     :param clients: A list of clients to use when obtaining the resource. If
     None, all clients will be used. None by default.
 
+    :param gem5_version: The gem5 version to use when obtaining the resource.
+    By default, the version of gem5 being used is used. This is used primarily
+    for testing purposes.
+
     :raises Exception: An exception is thrown if a file is already present at
     `to_path` but it does not have the correct md5 sum. An exception will also
     be thrown is a directory is present at `to_path`
@@ -444,6 +232,7 @@ def get_resource(
             resource_name,
             resource_version=resource_version,
             clients=clients,
+            gem5_version=gem5_version,
         )
 
         if os.path.exists(to_path):
diff --git a/src/python/gem5/resources/resource.py b/src/python/gem5/resources/resource.py
index 22adf15670..bc9f4480ba 100644
--- a/src/python/gem5/resources/resource.py
+++ b/src/python/gem5/resources/resource.py
@@ -28,6 +28,7 @@ from abc import ABCMeta
 import os
 from pathlib import Path
 from m5.util import warn, fatal
+from _m5 import core
 
 from .downloader import get_resource
 
@@ -559,17 +560,15 @@ def obtain_resource(
     download_md5_mismatch: bool = True,
     resource_version: Optional[str] = None,
     clients: Optional[List] = None,
+    gem5_version=core.gem5Version,
 ) -> AbstractResource:
     """
     This function primarily serves as a factory for resources. It will return
     the correct `AbstractResource` implementation based on the resource
-    requested, by referencing the "resource.json" file (by default, that hosted
-    at https://resources.gem5.org/resources.json). In addition to this, this
-    function will download the resource if not detected in the
-    `resource_directory`.
+    requested.
 
     :param resource_name: The name of the gem5 resource as it appears under the
-    "name" field in the `resource.json` file.
+    "id" field in the `resource.json` file.
     :param resource_directory: The location of the directory in which the
     resource is to be stored. If this parameter is not set, it will set to
     the environment variable `GEM5_RESOURCE_DIR`. If the environment is not
@@ -582,11 +581,17 @@ def obtain_resource(
     Not a required parameter. None by default.
     :param clients: A list of clients to search for the resource. If this
     parameter is not set, it will default search all clients.
+    :param gem5_version: The gem5 version to use to filter incompatible
+    resource versions. By default set to the current gem5 version. If None,
+    this filtering is not performed.
     """
 
     # Obtain the resource object entry for this resource
     resource_json = get_resource_json_obj(
-        resource_id, resource_version=resource_version, clients=clients
+        resource_id,
+        resource_version=resource_version,
+        clients=clients,
+        gem5_version=gem5_version,
     )
 
     to_path = None
@@ -629,6 +634,7 @@ def obtain_resource(
             download_md5_mismatch=download_md5_mismatch,
             resource_version=resource_version,
             clients=clients,
+            gem5_version=gem5_version,
         )
 
     # Obtain the type from the JSON. From this we will determine what subclass
diff --git a/src/python/gem5/resources/workload.py b/src/python/gem5/resources/workload.py
index 148ab3f35a..0798b891ab 100644
--- a/src/python/gem5/resources/workload.py
+++ b/src/python/gem5/resources/workload.py
@@ -27,6 +27,8 @@
 from .resource import obtain_resource
 from .client import get_resource_json_obj
 
+from _m5 import core
+
 from typing import Dict, Any, List, Optional
 
 
@@ -160,6 +162,7 @@ class Workload(AbstractWorkload):
         resource_directory: Optional[str] = None,
         resource_version: Optional[str] = None,
         clients: Optional[List] = None,
+        gem5_version: Optional[str] = core.gem5Version,
     ) -> None:
         """
         This constructor will load the workload details from the workload with
@@ -201,12 +204,17 @@ class Workload(AbstractWorkload):
         :param resource_directory: An optional parameter that specifies where
         any resources should be download and accessed from. If None, a default
         location will be used. None by default.
+        :param gem5_version: The gem5 version for the Workload to be loaded.
+        By default, the current gem5 version is used. This will filter
+        resources which are incompatible with the current gem5 version. If
+        None, no filtering will be done.
         """
 
         workload_json = get_resource_json_obj(
             workload_name,
             resource_version=resource_version,
             clients=clients,
+            gem5_version=gem5_version,
         )
 
         func = workload_json["function"]
@@ -219,7 +227,9 @@ class Workload(AbstractWorkload):
                 value = workload_json["resources"][key]
                 assert isinstance(value, str)
                 params[key] = obtain_resource(
-                    value, resource_directory=resource_directory
+                    value,
+                    resource_directory=resource_directory,
+                    gem5_version=gem5_version,
                 )
 
         if "additional_params" in workload_json:
diff --git a/tests/gem5/configs/download_check.py b/tests/gem5/configs/download_check.py
index decc62c2d7..2180f4f26a 100644
--- a/tests/gem5/configs/download_check.py
+++ b/tests/gem5/configs/download_check.py
@@ -26,10 +26,11 @@
 
 from gem5.resources.downloader import (
     list_resources,
-    get_resources_json_obj,
     get_resource,
 )
 
+from gem5.resources.client import get_resource_json_obj
+
 from gem5.resources.md5_utils import md5
 
 import os
@@ -51,6 +52,15 @@ parser.add_argument(
     "checked",
 )
 
+parser.add_argument(
+    "--gem5-version",
+    type=str,
+    required=False,
+    help="The gem5 version to check the resources against. Resources not "
+    "compatible with this version will be ignored. If not set, no "
+    "compatibility tests are performed.",
+)
+
 parser.add_argument(
     "--download-directory",
     type=str,
@@ -67,39 +77,59 @@ if not Path(args.download_directory).exists():
 
 
 ids = args.ids
+resource_list = list_resources(gem5_version=args.gem5_version)
 if len(ids) == 0:
-    ids = list_resources()
+    ids = resource_list
 
 # We log all the errors as they occur then dump them at the end. This means we
 # can be aware of all download errors in a single failure.
 errors = str()
 
 for id in ids:
-    if id not in list_resources():
+    if id not in resource_list:
         errors += (
             f"Resource with ID '{id}' not found in "
             + f"`list_resources()`.{os.linesep}"
         )
         continue
 
-    resource_json = get_resources_json_obj(id)
-    download_path = os.path.join(args.download_directory, id)
-    try:
-        get_resource(resource_name=id, to_path=download_path)
-    except Exception as e:
-        errors += f"Failure to download resource '{id}'.{os.linesep}"
-        errors += f"Exception message:{os.linesep}{str(e)}"
-        errors += f"{os.linesep}{os.linesep}"
-        continue
+    for resource_version in ids[id]:
 
-    if md5(Path(download_path)) != resource_json["md5sum"]:
-        errors += (
-            f"Downloaded resource '{id}' md5 "
-            + f"({md5(Path(download_path))}) differs to that in the "
-            + f"JSON ({resource_json['md5sum']}).{os.linesep}"
+        resource_json = get_resource_json_obj(
+            resource_id=id,
+            resource_version=resource_version,
+            gem5_version=args.gem5_version,
         )
+        if resource_json["category"] == "workload":
+            # Workloads are not downloaded as part of this test.
+            continue
+        download_path = os.path.join(
+            args.download_directory, f"{id}-v{resource_version}"
+        )
+        try:
+            get_resource(
+                resource_name=id,
+                resource_version=resource_version,
+                gem5_version=args.gem5_version,
+                to_path=download_path,
+            )
+        except Exception as e:
+            errors += (
+                f"Failure to download resource '{id}', "
+                + f"v{resource_version}.{os.linesep}"
+            )
+            errors += f"Exception message:{os.linesep}{str(e)}"
+            errors += f"{os.linesep}{os.linesep}"
+            continue
 
-    # Remove the downloaded resource.
+        if md5(Path(download_path)) != resource_json["md5sum"]:
+            errors += (
+                f"Downloaded resource '{id}' md5 "
+                + f"({md5(Path(download_path))}) differs to that recorded in "
+                + f" gem5-resources ({resource_json['md5sum']}).{os.linesep}"
+            )
+
+        # Remove the downloaded resource.
     shutil.rmtree(download_path, ignore_errors=True)
 
 # If errors exist, raise an exception highlighting them.
diff --git a/tests/pyunit/stdlib/resources/pyunit_client_wrapper_checks.py b/tests/pyunit/stdlib/resources/pyunit_client_wrapper_checks.py
index 344f67b8b0..f190b1ed5f 100644
--- a/tests/pyunit/stdlib/resources/pyunit_client_wrapper_checks.py
+++ b/tests/pyunit/stdlib/resources/pyunit_client_wrapper_checks.py
@@ -25,13 +25,9 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 import unittest
-from gem5.isas import ISA
 from gem5.resources.client import get_resource_json_obj
-import gem5.resources.client
 from gem5.resources.client_api.client_wrapper import ClientWrapper
-from typing import Dict
 from unittest.mock import patch
-from unittest import mock
 import json
 from urllib.error import HTTPError
 import io
@@ -62,23 +58,8 @@ mock_config_mongo = {
     },
 }
 
-mock_config_combined = {
-    "sources": {
-        "gem5-resources": {
-            "dataSource": "gem5-vision",
-            "database": "gem5-vision",
-            "collection": "versions_test",
-            "url": "https://data.mongodb-api.com/app/data-ejhjf/endpoint/data/v1",
-            "authUrl": "https://realm.mongodb.com/api/client/v2.0/app/data-ejhjf/auth/providers/api-key/login",
-            "apiKey": "OIi5bAP7xxIGK782t8ZoiD2BkBGEzMdX3upChf9zdCxHSnMoiTnjI22Yw5kOSgy9",
-            "isMongo": True,
-        },
-        "baba": {
-            "url": mock_json_path,
-            "isMongo": False,
-        },
-    },
-}
+mock_config_combined = mock_config_mongo
+mock_config_combined["sources"]["baba"] = mock_config_json["sources"]["baba"]
 
 mock_json = {}
 
@@ -145,12 +126,12 @@ class ClientWrapperTestSuite(unittest.TestCase):
     def test_get_resource_json_obj(self):
         # Test that the resource object is correctly returned
         resource = "this-is-a-test-resource"
-        resource = get_resource_json_obj(resource)
+        resource = get_resource_json_obj(resource, gem5_version="develop")
         self.assertEqual(resource["id"], "this-is-a-test-resource")
-        self.assertEqual(resource["resource_version"], "2.0.0")
+        self.assertEqual(resource["resource_version"], "1.1.0")
         self.assertEqual(resource["category"], "binary")
         self.assertEqual(
-            resource["description"], "This is a test resource but double newer"
+            resource["description"], "This is a test resource but newer"
         )
         self.assertEqual(
             resource["source_url"],
@@ -167,7 +148,9 @@ class ClientWrapperTestSuite(unittest.TestCase):
         resource_id = "test-id"
         client = "invalid"
         with self.assertRaises(Exception) as context:
-            get_resource_json_obj(resource_id, clients=[client])
+            get_resource_json_obj(
+                resource_id, clients=[client], gem5_version="develop"
+            )
         self.assertTrue(
             f"Client: {client} does not exist" in str(context.exception)
         )
@@ -181,7 +164,9 @@ class ClientWrapperTestSuite(unittest.TestCase):
         resource_id = "this-is-a-test-resource"
         resource_version = "1.0.0"
         resource = get_resource_json_obj(
-            resource_id, resource_version=resource_version
+            resource_id,
+            resource_version=resource_version,
+            gem5_version="develop",
         )
         self.assertEqual(resource["id"], "this-is-a-test-resource")
         self.assertEqual(resource["resource_version"], "1.0.0")
@@ -200,17 +185,18 @@ class ClientWrapperTestSuite(unittest.TestCase):
     @patch("urllib.request.urlopen", side_effect=mocked_requests_post)
     def test_get_resource_json_obj_1(self, mock_get):
         resource = "x86-ubuntu-18.04-img"
-        resource = get_resource_json_obj(resource)
+        resource = get_resource_json_obj(resource, gem5_version="develop")
         self.assertEqual(resource["id"], "x86-ubuntu-18.04-img")
-        self.assertEqual(resource["resource_version"], "1.1.0")
+        self.assertEqual(resource["resource_version"], "2.0.0")
         self.assertEqual(resource["category"], "disk-image")
         self.assertEqual(
             resource["description"],
-            "A disk image containing Ubuntu 18.04 for x86. This image will run an `m5 readfile` instruction after booting. If no script file is specified an `m5 exit` instruction will be executed.",
+            "This is a test resource",
         )
         self.assertEqual(
             resource["source_url"],
-            "https://github.com/gem5/gem5-resources/tree/develop/src/x86-ubuntu",
+            "https://github.com/gem5/gem5-resources/tree/develop/"
+            "src/x86-ubuntu",
         )
         self.assertEqual(resource["architecture"], "X86")
 
@@ -227,6 +213,7 @@ class ClientWrapperTestSuite(unittest.TestCase):
             resource_id,
             resource_version=resource_version,
             clients=["gem5-resources"],
+            gem5_version="develop",
         )
         self.assertEqual(resource["id"], "x86-ubuntu-18.04-img")
         self.assertEqual(resource["resource_version"], "1.0.0")
@@ -246,7 +233,9 @@ class ClientWrapperTestSuite(unittest.TestCase):
     def test_get_resource_json_obj_with_id_invalid_mongodb(self, mock_get):
         resource_id = "invalid-id"
         with self.assertRaises(Exception) as context:
-            get_resource_json_obj(resource_id, clients=["gem5-resources"])
+            get_resource_json_obj(
+                resource_id, clients=["gem5-resources"], gem5_version="develop"
+            )
         self.assertTrue(
             "Resource with ID 'invalid-id' not found."
             in str(context.exception)
@@ -267,12 +256,13 @@ class ClientWrapperTestSuite(unittest.TestCase):
                 resource_id,
                 resource_version=resource_version,
                 clients=["gem5-resources"],
+                gem5_version="develop",
             )
         self.assertTrue(
             f"Resource x86-ubuntu-18.04-img with version '2.5.0'"
             " not found.\nResource versions can be found at: "
-            f"https://resources.gem5.org/resources/x86-ubuntu-18.04-img/versions"
-            in str(context.exception)
+            "https://resources.gem5.org/resources/x86-ubuntu-18.04-img/"
+            "versions" in str(context.exception)
         )
 
     @patch(
@@ -286,12 +276,13 @@ class ClientWrapperTestSuite(unittest.TestCase):
             get_resource_json_obj(
                 resource_id,
                 resource_version=resource_version,
+                gem5_version="develop",
             )
         self.assertTrue(
-            f"Resource this-is-a-test-resource with version '2.5.0'"
+            "Resource this-is-a-test-resource with version '2.5.0'"
             " not found.\nResource versions can be found at: "
-            f"https://resources.gem5.org/resources/this-is-a-test-resource/versions"
-            in str(context.exception)
+            "https://resources.gem5.org/resources/this-is-a-test-resource/"
+            "versions" in str(context.exception)
         )
 
     @patch(
@@ -308,11 +299,13 @@ class ClientWrapperTestSuite(unittest.TestCase):
             resource_id_mongo,
             resource_version=resource_version_mongo,
             clients=["gem5-resources"],
+            gem5_version="develop",
         )
         resource_json = get_resource_json_obj(
             resource_id_json,
             resource_version=resource_version_json,
             clients=["baba"],
+            gem5_version="develop",
         )
         self.assertEqual(resource_mongo["id"], "x86-ubuntu-18.04-img")
         self.assertEqual(resource_mongo["resource_version"], "1.0.0")
@@ -322,7 +315,8 @@ class ClientWrapperTestSuite(unittest.TestCase):
         )
         self.assertEqual(
             resource_mongo["source_url"],
-            "https://github.com/gem5/gem5-resources/tree/develop/src/x86-ubuntu",
+            "https://github.com/gem5/gem5-resources/tree/develop/src/"
+            "x86-ubuntu",
         )
         self.assertEqual(resource_mongo["architecture"], "X86")
 
@@ -347,6 +341,7 @@ class ClientWrapperTestSuite(unittest.TestCase):
         resource_id = "simpoint-resource"
         resource = get_resource_json_obj(
             resource_id,
+            gem5_version="develop",
         )
         self.assertEqual(resource["id"], resource_id)
         self.assertEqual(resource["resource_version"], "0.2.0")
@@ -371,6 +366,7 @@ class ClientWrapperTestSuite(unittest.TestCase):
         resource_id = "x86-ubuntu-18.04-img"
         resource_json = get_resource_json_obj(
             resource_id,
+            gem5_version="develop",
         )
 
         self.assertEqual(resource_json["id"], "x86-ubuntu-18.04-img")
@@ -378,8 +374,7 @@ class ClientWrapperTestSuite(unittest.TestCase):
         self.assertEqual(resource_json["category"], "disk-image")
 
         resource_json = get_resource_json_obj(
-            resource_id,
-            resource_version="1.0.0",
+            resource_id, resource_version="1.0.0", gem5_version="develop"
         )
 
         self.assertEqual(resource_json["id"], "x86-ubuntu-18.04-img")
@@ -396,6 +391,7 @@ class ClientWrapperTestSuite(unittest.TestCase):
         with self.assertRaises(Exception) as context:
             get_resource_json_obj(
                 resource_id,
+                gem5_version="develop",
             )
         self.assertTrue(
             f"Resource {resource_id} has multiple resources with"
@@ -428,6 +424,7 @@ class ClientWrapperTestSuite(unittest.TestCase):
             with contextlib.redirect_stderr(f):
                 get_resource_json_obj(
                     resource_id,
+                    gem5_version="develop",
                 )
         self.assertTrue(
             "Error getting resources from client gem5-resources:"
@@ -440,21 +437,7 @@ class ClientWrapperTestSuite(unittest.TestCase):
 
     @patch(
         "gem5.resources.client.clientwrapper",
-        ClientWrapper(
-            {
-                "sources": {
-                    "gem5-resources": {
-                        "dataSource": "gem5-vision",
-                        "database": "gem5-vision",
-                        "collection": "versions_test",
-                        "url": "https://data.mongodb-api.com/app/data-ejhjf/endpoint/data/v",
-                        "authUrl": "https://realm.mongodb.com/api/client/v2.0/app/data-ejhjf/auth/providers/api-key/login",
-                        "apiKey": "OIi5bAP7xxIGK782t8ZoiD2BkBGEzMdX3upChf9zdCxHSnMoiTnjI22Yw5kOSgy9",
-                        "isMongo": True,
-                    }
-                },
-            }
-        ),
+        ClientWrapper(mock_config_mongo),
     )
     @patch("urllib.request.urlopen", side_effect=mocked_requests_post)
     def test_invalid_url(self, mock_get):
@@ -464,6 +447,7 @@ class ClientWrapperTestSuite(unittest.TestCase):
             with contextlib.redirect_stderr(f):
                 get_resource_json_obj(
                     resource_id,
+                    gem5_version="develop",
                 )
         self.assertTrue(
             "Error getting resources from client gem5-resources:"
@@ -476,21 +460,7 @@ class ClientWrapperTestSuite(unittest.TestCase):
 
     @patch(
         "gem5.resources.client.clientwrapper",
-        ClientWrapper(
-            {
-                "sources": {
-                    "gem5-resources": {
-                        "dataSource": "gem5-vision",
-                        "database": "gem5-vision",
-                        "collection": "versions_test",
-                        "url": "https://data.mongodb-api.com/app/data-ejhjf/endpoint/data/v1",
-                        "authUrl": "https://realm.mongodb.com/api/client/v2.0/app/data-ejhjf/auth/providers/api-key/login",
-                        "apiKey": "OIi5bAP7xxIGK782t8ZoiD2BkBGEzMdX3upChf9zdCxHSnMoiTnjI22Yw5kOSgy9",
-                        "isMongo": True,
-                    }
-                },
-            }
-        ),
+        ClientWrapper(mock_config_mongo),
     )
     @patch("urllib.request.urlopen", side_effect=mocked_requests_post)
     def test_invalid_url(self, mock_get):
@@ -500,6 +470,7 @@ class ClientWrapperTestSuite(unittest.TestCase):
             with contextlib.redirect_stderr(f):
                 get_resource_json_obj(
                     resource_id,
+                    gem5_version="develop",
                 )
         self.assertTrue(
             "Error getting resources from client gem5-resources:"
diff --git a/tests/pyunit/stdlib/resources/pyunit_downloader_checks.py b/tests/pyunit/stdlib/resources/pyunit_json_client_checks.py
similarity index 87%
rename from tests/pyunit/stdlib/resources/pyunit_downloader_checks.py
rename to tests/pyunit/stdlib/resources/pyunit_json_client_checks.py
index 19169e480e..88db3d4967 100644
--- a/tests/pyunit/stdlib/resources/pyunit_downloader_checks.py
+++ b/tests/pyunit/stdlib/resources/pyunit_json_client_checks.py
@@ -30,15 +30,11 @@ import os
 from typing import Dict
 import json
 
-from gem5.resources.downloader import (
-    _get_resources_json_at_path,
-    _get_resources_json,
-    _resources_json_version_required,
-)
+from gem5.resources.client_api.jsonclient import JSONClient
 
 
-class ResourceDownloaderTestSuite(unittest.TestCase):
-    """Test cases for gem5.resources.downloader"""
+class JSONClientTestSuite(unittest.TestCase):
+    """Test cases for gem5.resources.client_api.jsonclient"""
 
     @classmethod
     def setUpClass(cls) -> str:
@@ -142,12 +138,9 @@ class ResourceDownloaderTestSuite(unittest.TestCase):
         file.close()
         cls.file_path = file.name
 
-        os.environ["GEM5_RESOURCE_JSON"] = cls.file_path
-
     @classmethod
     def tearDownClass(cls) -> None:
         os.remove(cls.file_path)
-        del os.environ["GEM5_RESOURCE_JSON"]
 
     def verify_json(self, json: Dict) -> None:
         """
@@ -167,32 +160,22 @@ class ResourceDownloaderTestSuite(unittest.TestCase):
         self.assertEquals("test-version", json[3]["id"])
 
     def test_get_resources_json_at_path(self) -> None:
-        # Tests the gem5.resources.downloader._get_resources_json_at_path()
-        # function.
+        # Tests JSONClient.get_resources_json()
 
-        json = _get_resources_json_at_path(path=self.file_path)
-        self.verify_json(json=json)
-
-    def test_get_resources_json(self) -> None:
-        # Tests the gem5.resources.downloader._get_resources_json() function.
-
-        json = _get_resources_json()
+        client = JSONClient(path=self.file_path)
+        json = client.get_resources_json()
         self.verify_json(json=json)
 
     def test_get_resources_json_invalid_url(self) -> None:
-        # Tests the gem5.resources.downloader._get_resources_json() function in
-        # case where an invalid url is passed as the URL/PATH of the
-        # resources.json file.
+        # Tests the JSONClient.get_resources_json() function in case where an
+        # invalid url is passed as the URL/PATH of the resources JSON file.
 
         path = "NotAURLorFilePath"
-        os.environ["GEM5_RESOURCE_JSON"] = path
         with self.assertRaises(Exception) as context:
-            _get_resources_json()
+            client = JSONClient(path=path)
+            json = client.get_resources_json()
 
         self.assertTrue(
             f"Resources location '{path}' is not a valid path or URL."
             in str(context.exception)
         )
-
-        # Set back to the old path
-        os.environ["GEM5_RESOURCE_JSON"] = self.file_path
diff --git a/tests/pyunit/stdlib/resources/pyunit_obtain_resources_check.py b/tests/pyunit/stdlib/resources/pyunit_obtain_resources_check.py
index 791d96c1f1..b1eda4e6ed 100644
--- a/tests/pyunit/stdlib/resources/pyunit_obtain_resources_check.py
+++ b/tests/pyunit/stdlib/resources/pyunit_obtain_resources_check.py
@@ -30,12 +30,7 @@ import io
 import contextlib
 from pathlib import Path
 
-from gem5.resources.resource import *
-
-from gem5.resources.looppoint import (
-    LooppointCsvLoader,
-    LooppointJsonLoader,
-)
+from gem5.resources.resource import obtain_resource, BinaryResource
 
 from gem5.isas import ISA
 
@@ -61,24 +56,6 @@ mock_config_json = {
     new=ClientWrapper(mock_config_json),
 )
 class TestObtainResourcesCheck(unittest.TestCase):
-    @classmethod
-    def setUpClass(cls):
-        """Prior to running the suite we set the resource directory to
-        "ref/resource-specialization.json"
-        """
-        os.environ["GEM5_RESOURCE_JSON"] = os.path.join(
-            os.path.realpath(os.path.dirname(__file__)),
-            "refs",
-            "obtain-resource.json",
-        )
-
-    @classmethod
-    def tearDownClass(cls) -> None:
-        """After running the suite we unset the gem5-resource JSON file, as to
-        not interfere with others tests.
-        """
-        del os.environ["GEM5_RESOURCE_JSON"]
-
     def get_resource_dir(cls) -> str:
         """To ensure the resources are cached to the same directory as all
         other tests, this function returns the location of the testing
@@ -99,26 +76,27 @@ class TestObtainResourcesCheck(unittest.TestCase):
         resource = obtain_resource(
             resource_id="test-binary-resource",
             resource_directory=self.get_resource_dir(),
+            gem5_version="develop",
         )
-        self.assertEquals("2.5.0", resource.get_resource_version())
+        self.assertEquals("1.7.0", resource.get_resource_version())
         self.assertIsInstance(resource, BinaryResource)
-        # self.assertIn(gem5Version, resource.get_gem5_versions())
-        self.assertEquals("test description", resource.get_description())
+        self.assertEquals(
+            "test description v1.7.0", resource.get_description()
+        )
         self.assertEquals("src/test-source", resource.get_source())
         self.assertEquals(ISA.ARM, resource.get_architecture())
 
     def test_obtain_resources_with_version_compatible(self):
-        gem5Version = core.gem5Version
         resource = obtain_resource(
             resource_id="test-binary-resource",
             resource_directory=self.get_resource_dir(),
-            resource_version="1.7.0",
+            resource_version="1.5.0",
+            gem5_version="develop",
         )
-        self.assertEquals("1.7.0", resource.get_resource_version())
+        self.assertEquals("1.5.0", resource.get_resource_version())
         self.assertIsInstance(resource, BinaryResource)
-        # self.assertIn(gem5Version, resource.get_gem5_versions())
         self.assertEquals(
-            "test description v1.7.0", resource.get_description()
+            "test description for 1.5.0", resource.get_description()
         )
         self.assertEquals("src/test-source", resource.get_source())
         self.assertEquals(ISA.ARM, resource.get_architecture())
@@ -143,6 +121,7 @@ class TestObtainResourcesCheck(unittest.TestCase):
             resource_id="test-binary-resource",
             resource_directory=self.get_resource_dir(),
             resource_version="1.5.0",
+            gem5_version="develop",
         )
         self.assertEquals("1.5.0", resource.get_resource_version())
         self.assertIsInstance(resource, BinaryResource)
@@ -157,6 +136,7 @@ class TestObtainResourcesCheck(unittest.TestCase):
             obtain_resource(
                 resource_id="invalid-id",
                 resource_directory=self.get_resource_dir(),
+                gem5_version="develop",
             )
         self.assertTrue(
             "Resource with ID 'invalid-id' not found."
@@ -169,6 +149,7 @@ class TestObtainResourcesCheck(unittest.TestCase):
                 resource_id="invalid-id",
                 resource_directory=self.get_resource_dir(),
                 resource_version="1.7.0",
+                gem5_version="develop",
             )
         self.assertTrue(
             "Resource with ID 'invalid-id' not found."
@@ -182,8 +163,6 @@ class TestObtainResourcesCheck(unittest.TestCase):
                 resource_directory=self.get_resource_dir(),
                 resource_version="3.0.0",
             )
-        print("context.exception: ", context.exception)
-        print(str(context.exception))
         self.assertTrue(
             f"Resource test-binary-resource with version '3.0.0'"
             " not found.\nResource versions can be found at: "
diff --git a/tests/pyunit/stdlib/resources/pyunit_resource_download_checks.py b/tests/pyunit/stdlib/resources/pyunit_resource_download_checks.py
deleted file mode 100644
index 8f6674ff0d..0000000000
--- a/tests/pyunit/stdlib/resources/pyunit_resource_download_checks.py
+++ /dev/null
@@ -1,72 +0,0 @@
-# Copyright (c) 2023 The Regents of the University of California
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met: redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer;
-# redistributions in binary form must reproduce the above copyright
-# notice, this list of conditions and the following disclaimer in the
-# documentation and/or other materials provided with the distribution;
-# neither the name of the copyright holders nor the names of its
-# contributors may be used to endorse or promote products derived from
-# this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-import unittest
-import tempfile
-import os
-from typing import Dict
-
-from gem5.resources.downloader import (
-    get_resources_json_obj,
-)
-
-
-class ResourceDownloadTestSuite(unittest.TestCase):
-    """Test cases for gem5.resources.downloader"""
-
-    @classmethod
-    def setUpClass(cls) -> str:
-        pass
-
-    def get_resource_json_by_id(self) -> None:
-        """Get a resource by its id"""
-        resources = get_resources_json_obj("test-version")
-        self.assertEqual(resources["id"], "test-version")
-        self.assertEqual(resources["resource_version"], "2.0.0")
-
-    def get_resource_json_invalid_id(self) -> None:
-        """Should throw an exception when trying to get a resource that doesn't exist"""
-        with self.assertRaises(Exception) as context:
-            get_resources_json_obj("this-resource-doesnt-exist")
-        self.assertTrue(
-            f"Error: Resource with name 'this-resource-doesnt-exist' does not exist"
-            in str(context.exception)
-        )
-
-    def get_resource_json_by_id_and_version(self) -> None:
-        """Get a resource by its id and version"""
-        resources = get_resources_json_obj("test-version", "1.0.0")
-        self.assertEqual(resources["id"], "test-version")
-        self.assertEqual(resources["resource_version"], "1.0.0")
-
-    def get_resource_json_by_id_and_invalid_version(self) -> None:
-        """Get a resource by its id and an invalid version (does not exist)"""
-        with self.assertRaises(Exception) as context:
-            get_resources_json_obj("test-version", "3.0.0")
-        self.assertTrue(
-            f"Specified Version 3.0.0 does not exist for the resource 'test-version'."
-            in str(context.exception)
-        )
diff --git a/tests/pyunit/stdlib/resources/pyunit_resource_specialization.py b/tests/pyunit/stdlib/resources/pyunit_resource_specialization.py
index 5c22a7341e..f2088db8ef 100644
--- a/tests/pyunit/stdlib/resources/pyunit_resource_specialization.py
+++ b/tests/pyunit/stdlib/resources/pyunit_resource_specialization.py
@@ -62,24 +62,6 @@ class ResourceSpecializationSuite(unittest.TestCase):
     function.
     """
 
-    @classmethod
-    def setUpClass(cls):
-        """Prior to running the suite we set the resource directory to
-        "ref/resource-specialization.json"
-        """
-        os.environ["GEM5_RESOURCE_JSON"] = os.path.join(
-            os.path.realpath(os.path.dirname(__file__)),
-            "refs",
-            "resource-specialization.json",
-        )
-
-    @classmethod
-    def tearDownClass(cls) -> None:
-        """After running the suite we unset the gem5-resource JSON file, as to
-        not interfere with others tests.
-        """
-        del os.environ["GEM5_RESOURCE_JSON"]
-
     def get_resource_dir(cls) -> str:
         """To ensure the resources are cached to the same directory as all
         other tests, this function returns the location of the testing
@@ -99,6 +81,7 @@ class ResourceSpecializationSuite(unittest.TestCase):
         resource = obtain_resource(
             resource_id="binary-example",
             resource_directory=self.get_resource_dir(),
+            gem5_version="develop",
         )
 
         self.assertIsInstance(resource, BinaryResource)
@@ -114,6 +97,7 @@ class ResourceSpecializationSuite(unittest.TestCase):
         resource = obtain_resource(
             resource_id="kernel-example",
             resource_directory=self.get_resource_dir(),
+            gem5_version="develop",
         )
 
         self.assertIsInstance(resource, KernelResource)
@@ -129,6 +113,7 @@ class ResourceSpecializationSuite(unittest.TestCase):
         resource = obtain_resource(
             resource_id="bootloader-example",
             resource_directory=self.get_resource_dir(),
+            gem5_version="develop",
         )
 
         self.assertIsInstance(resource, BootloaderResource)
@@ -144,6 +129,7 @@ class ResourceSpecializationSuite(unittest.TestCase):
         resource = obtain_resource(
             resource_id="disk-image-example",
             resource_directory=self.get_resource_dir(),
+            gem5_version="develop",
         )
 
         self.assertIsInstance(resource, DiskImageResource)
@@ -159,6 +145,7 @@ class ResourceSpecializationSuite(unittest.TestCase):
         resource = obtain_resource(
             resource_id="checkpoint-example",
             resource_directory=self.get_resource_dir(),
+            gem5_version="develop",
         )
 
         self.assertIsInstance(resource, CheckpointResource)
@@ -173,6 +160,7 @@ class ResourceSpecializationSuite(unittest.TestCase):
         resource = obtain_resource(
             resource_id="git-example",
             resource_directory=self.get_resource_dir(),
+            gem5_version="develop",
         )
 
         self.assertIsInstance(resource, GitResource)
@@ -185,6 +173,7 @@ class ResourceSpecializationSuite(unittest.TestCase):
         resource = obtain_resource(
             resource_id="simpoint-directory-example",
             resource_directory=self.get_resource_dir(),
+            gem5_version="develop",
         )
 
         self.assertIsInstance(resource, SimpointDirectoryResource)
@@ -219,6 +208,7 @@ class ResourceSpecializationSuite(unittest.TestCase):
         resource = obtain_resource(
             resource_id="simpoint-example",
             resource_directory=self.get_resource_dir(),
+            gem5_version="develop",
         )
 
         self.assertIsInstance(resource, SimpointResource)
@@ -240,6 +230,7 @@ class ResourceSpecializationSuite(unittest.TestCase):
             resource_id="file-example",
             resource_directory=self.get_resource_dir(),
             resource_version="1.0.0",
+            gem5_version="develop",
         )
 
         self.assertIsInstance(resource, FileResource)
@@ -268,6 +259,7 @@ class ResourceSpecializationSuite(unittest.TestCase):
         resource = obtain_resource(
             resource_id="looppoint-pinpoint-csv-resource",
             resource_directory=self.get_resource_dir(),
+            gem5_version="develop",
         )
 
         self.assertIsInstance(resource, LooppointCsvResource)
@@ -289,6 +281,7 @@ class ResourceSpecializationSuite(unittest.TestCase):
             resource_id="looppoint-json-restore-resource-region-1",
             resource_directory=self.get_resource_dir(),
             resource_version="1.0.0",
+            gem5_version="develop",
         )
 
         self.assertIsInstance(resource, LooppointJsonResource)
diff --git a/tests/pyunit/stdlib/resources/pyunit_workload_checks.py b/tests/pyunit/stdlib/resources/pyunit_workload_checks.py
index b898faeb79..b59e09d4fe 100644
--- a/tests/pyunit/stdlib/resources/pyunit_workload_checks.py
+++ b/tests/pyunit/stdlib/resources/pyunit_workload_checks.py
@@ -40,17 +40,7 @@ from gem5.resources.client_api.client_wrapper import ClientWrapper
 from unittest.mock import patch
 from pathlib import Path
 
-mock_config_json1 = {
-    "sources": {
-        "baba": {
-            "url": Path(__file__).parent
-            / "refs/workload-checks-custom-workload.json",
-            "isMongo": False,
-        }
-    },
-}
-
-mock_config_json2 = {
+mock_config_json = {
     "sources": {
         "baba": {
             "url": Path(__file__).parent / "refs/workload-checks.json",
@@ -68,29 +58,19 @@ class CustomWorkloadTestSuite(unittest.TestCase):
     @classmethod
     @patch(
         "gem5.resources.client.clientwrapper",
-        new=ClientWrapper(mock_config_json1),
+        new=ClientWrapper(mock_config_json),
     )
     def setUpClass(cls) -> None:
-        os.environ["GEM5_RESOURCE_JSON"] = os.path.join(
-            os.path.realpath(os.path.dirname(__file__)),
-            "refs",
-            "workload-checks-custom-workload.json",
-        )
-
         cls.custom_workload = CustomWorkload(
             function="set_se_binary_workload",
             parameters={
-                "binary": obtain_resource("x86-hello64-static"),
+                "binary": obtain_resource(
+                    "x86-hello64-static", gem5_version="develop"
+                ),
                 "arguments": ["hello", 6],
             },
         )
 
-    @classmethod
-    def tearDownClass(cls):
-        # Unset the environment variable so this test does not interfere with
-        # others.
-        os.environ["GEM5_RESOURCE_JSON"]
-
     def test_get_function_str(self) -> None:
         # Tests `CustomResource.get_function_str`
 
@@ -140,7 +120,8 @@ class CustomWorkloadTestSuite(unittest.TestCase):
             "test", self.custom_workload.get_parameters()["binary"]
         )
 
-        # We set the overridden parameter back to it's old valu        self.custom_workload.set_parameter("binary", old_value)
+        # We set the overridden parameter back to it's old value
+        self.custom_workload.set_parameter("binary", old_value)
 
 
 class WorkloadTestSuite(unittest.TestCase):
@@ -151,21 +132,10 @@ class WorkloadTestSuite(unittest.TestCase):
     @classmethod
     @patch(
         "gem5.resources.client.clientwrapper",
-        ClientWrapper(mock_config_json2),
+        ClientWrapper(mock_config_json),
     )
     def setUpClass(cls):
-        os.environ["GEM5_RESOURCE_JSON"] = os.path.join(
-            os.path.realpath(os.path.dirname(__file__)),
-            "refs",
-            "workload-checks.json",
-        )
-        cls.workload = Workload("simple-boot")
-
-    @classmethod
-    def tearDownClass(cls):
-        # Unset the environment variable so this test does not interfere with
-        # others.
-        os.environ["GEM5_RESOURCE_JSON"]
+        cls.workload = Workload("simple-boot", gem5_version="develop")
 
     def test_get_function_str(self) -> None:
         # Tests `Resource.get_function_str`
diff --git a/tests/pyunit/stdlib/resources/refs/mongo-mock.json b/tests/pyunit/stdlib/resources/refs/mongo-mock.json
index b6376cc5e4..e2fb058ff7 100644
--- a/tests/pyunit/stdlib/resources/refs/mongo-mock.json
+++ b/tests/pyunit/stdlib/resources/refs/mongo-mock.json
@@ -22,7 +22,7 @@
         "source_url": "https://github.com/gem5/gem5-resources/tree/develop/src/x86-ubuntu",
         "resource_version": "1.0.0",
         "gem5_versions": [
-            "23.0"
+            "develop"
         ],
         "example_usage": "get_resource(resource_name=\"x86-ubuntu-18.04-img\")"
     },
@@ -49,7 +49,7 @@
         "source_url": "https://github.com/gem5/gem5-resources/tree/develop/src/x86-ubuntu",
         "resource_version": "1.1.0",
         "gem5_versions": [
-            "23.0"
+            "develop"
         ],
         "example_usage": "get_resource(resource_name=\"x86-ubuntu-18.04-img\")"
     }
diff --git a/tests/pyunit/stdlib/resources/refs/obtain-resource.json b/tests/pyunit/stdlib/resources/refs/obtain-resource.json
index fac95e106a..9125bf4ae6 100644
--- a/tests/pyunit/stdlib/resources/refs/obtain-resource.json
+++ b/tests/pyunit/stdlib/resources/refs/obtain-resource.json
@@ -24,7 +24,7 @@
         "source": "src/test-source",
         "resource_version": "2.0.0",
         "gem5_versions": [
-            "develop"
+            "23.0"
         ]
     },
     {
@@ -38,7 +38,8 @@
         "source": "src/test-source",
         "resource_version": "1.7.0",
         "gem5_versions": [
-            "develop"
+            "develop",
+            "develop-2"
         ]
     },
     {
@@ -52,8 +53,7 @@
         "source": "src/test-source",
         "resource_version": "1.5.0",
         "gem5_versions": [
-            "21.1",
-            "22.1"
+            "develop"
         ]
     }
 ]
diff --git a/tests/pyunit/stdlib/resources/refs/resource-specialization.json b/tests/pyunit/stdlib/resources/refs/resource-specialization.json
index 1129f1bd05..414bf73b11 100644
--- a/tests/pyunit/stdlib/resources/refs/resource-specialization.json
+++ b/tests/pyunit/stdlib/resources/refs/resource-specialization.json
@@ -10,6 +10,7 @@
         "source": "src/linux-kernel",
         "resource_version": "1.0.0",
         "gem5_versions": [
+            "develop",
             "23.0"
         ]
     },
@@ -25,6 +26,7 @@
         "root_partition": "1",
         "resource_version": "1.0.0",
         "gem5_versions": [
+            "develop",
             "23.0"
         ]
     },
@@ -39,6 +41,7 @@
         "source": "src/simple",
         "resource_version": "1.0.0",
         "gem5_versions": [
+            "develop",
             "23.0"
         ]
     },
@@ -51,6 +54,7 @@
         "url": "http://dist.gem5.org/dist/develop/test-progs/hello/bin/arm/linux/hello64-static",
         "resource_version": "1.0.0",
         "gem5_versions": [
+            "develop",
             "23.0"
         ]
     },
@@ -66,6 +70,7 @@
         "url": "http://dist.gem5.org/dist/develop/checkpoints/riscv-hello-example-checkpoint.tar",
         "resource_version": "1.0.0",
         "gem5_versions": [
+            "develop",
             "23.0"
         ]
     },
@@ -75,10 +80,11 @@
         "description": null,
         "is_zipped": false,
         "is_tar_archive": true,
-        "md5sum": "71b2cb004fe2cda4556f0b1a38638af6",
+        "md5sum": "3a57c1bb1077176c4587b8a3bf4f8ace",
         "url": "http://dist.gem5.org/dist/develop/checkpoints/riscv-hello-example-checkpoint.tar",
         "resource_version": "1.0.0",
         "gem5_versions": [
+            "develop",
             "23.0"
         ]
     },
@@ -87,11 +93,12 @@
         "id": "file-example",
         "description": null,
         "is_zipped": false,
-        "md5sum": "71b2cb004fe2cda4556f0b1a38638af6",
+        "md5sum": "2efd144c11829ab18d54eae6371e120a",
         "url": "http://dist.gem5.org/dist/develop/checkpoints/riscv-hello-example-checkpoint.tar",
         "source": null,
         "resource_version": "1.0.0",
         "gem5_versions": [
+            "develop",
             "23.0"
         ]
     },
@@ -106,6 +113,7 @@
         "url": "http://dist.gem5.org/dist/develop/checkpoints/riscv-hello-example-checkpoint.tar",
         "resource_version": "1.0.0",
         "gem5_versions": [
+            "develop",
             "23.0"
         ]
     },
@@ -125,6 +133,7 @@
         "workload_name": "Example Workload",
         "resource_version": "1.0.0",
         "gem5_versions": [
+            "develop",
             "23.0"
         ]
     },
@@ -148,6 +157,7 @@
         ],
         "resource_version": "1.0.0",
         "gem5_versions": [
+            "develop",
             "23.0"
         ]
     },
@@ -161,6 +171,7 @@
         "source": null,
         "resource_version": "1.0.0",
         "gem5_versions": [
+            "develop",
             "23.0"
         ]
     },
@@ -170,11 +181,12 @@
         "description": "A looppoint json file resource.",
         "is_zipped": false,
         "region_id": "1",
-        "md5sum": "a71ed64908b082ea619b26b940a643c1",
+        "md5sum": "efb85ebdf90c5cee655bf2e05ae7692a",
         "url": "http://dist.gem5.org/dist/develop/looppoints/x86-matrix-multiply-omp-100-8-looppoint-json-20230128",
         "source": null,
         "resource_version": "1.0.0",
         "gem5_versions": [
+            "develop",
             "23.0"
         ]
     }
diff --git a/tests/pyunit/stdlib/resources/refs/resources.json b/tests/pyunit/stdlib/resources/refs/resources.json
index 812caeff43..56930f37d5 100644
--- a/tests/pyunit/stdlib/resources/refs/resources.json
+++ b/tests/pyunit/stdlib/resources/refs/resources.json
@@ -21,7 +21,7 @@
         "source_url": "https://github.com/gem5/gem5-resources/tree/develop/src/asmtest",
         "resource_version": "1.0.0",
         "gem5_versions": [
-            "23.0"
+            "develop"
         ],
         "example_usage": "get_resource(resource_name=\"rv64mi-p-sbreak\")"
     },
@@ -48,7 +48,7 @@
         "source_url": "https://github.com/gem5/gem5-resources/tree/develop/src/asmtest",
         "resource_version": "1.1.0",
         "gem5_versions": [
-            "23.0"
+            "develop"
         ],
         "example_usage": "get_resource(resource_name=\"rv64mi-p-sbreak\")"
     },
@@ -71,7 +71,7 @@
         "source_url": "https://github.com/gem5/gem5-resources/tree/develop/src/asmtest",
         "resource_version": "2.0.0",
         "gem5_versions": [
-            "23.1"
+            "999.1"
         ],
         "example_usage": "get_resource(resource_name=\"rv64mi-p-sbreak\")"
     },
@@ -94,7 +94,7 @@
         "source_url": "",
         "resource_version": "1.0.0",
         "gem5_versions": [
-            "23.0"
+            "develop"
         ],
         "workload_name": "x86-print-this-15000-with-simpoints",
         "example_usage": "get_resource(resource_name=\"x86-print-this-1500-simpoints\")",
@@ -122,7 +122,7 @@
         "source_url": "",
         "resource_version": "0.2.0",
         "gem5_versions": [
-            "23.0"
+            "develop"
         ],
         "workload_name": "x86-print-this-15000-with-simpoints",
         "example_usage": "get_resource(resource_name=\"x86-print-this-1500-simpoints\")",
@@ -150,7 +150,7 @@
         "source_url": "",
         "resource_version": "0.2.0",
         "gem5_versions": [
-            "23.0"
+            "develop"
         ],
         "workload_name": "x86-print-this-15000-with-simpoints",
         "example_usage": "get_resource(resource_name=\"x86-print-this-1500-simpoints\")",
@@ -178,7 +178,7 @@
         "source_url": "",
         "resource_version": "0.2.0",
         "gem5_versions": [
-            "23.0"
+            "develop"
         ],
         "workload_name": "x86-print-this-15000-with-simpoints",
         "example_usage": "get_resource(resource_name=\"x86-print-this-1500-simpoints\")",
@@ -206,7 +206,7 @@
         "source_url": "",
         "resource_version": "0.2.0",
         "gem5_versions": [
-            "23.0"
+            "develop"
         ],
         "workload_name": "x86-print-this-15000-with-simpoints",
         "example_usage": "get_resource(resource_name=\"x86-print-this-1500-simpoints\")",
@@ -234,7 +234,7 @@
         "source_url": "",
         "resource_version": "0.2.0",
         "gem5_versions": [
-            "23.0"
+            "develop"
         ],
         "workload_name": "x86-print-this-15000-with-simpoints",
         "example_usage": "get_resource(resource_name=\"x86-print-this-1500-simpoints\")",
@@ -262,7 +262,7 @@
         "source_url": "",
         "resource_version": "0.2.0",
         "gem5_versions": [
-            "23.0"
+            "develop"
         ],
         "workload_name": "x86-print-this-15000-with-simpoints",
         "example_usage": "get_resource(resource_name=\"x86-print-this-1500-simpoints\")",
@@ -290,7 +290,7 @@
         "source_url": "",
         "resource_version": "0.2.0",
         "gem5_versions": [
-            "23.0"
+            "develop"
         ],
         "workload_name": "x86-print-this-15000-with-simpoints",
         "example_usage": "get_resource(resource_name=\"x86-print-this-1500-simpoints\")",
@@ -322,7 +322,7 @@
         "source_url": "https://github.com/gem5/gem5-resources/tree/develop/src/x86-ubuntu",
         "resource_version": "2.0.0",
         "gem5_versions": [
-            "23.0"
+            "develop"
         ],
         "example_usage": "get_resource(resource_name=\"x86-ubuntu-18.04-img\")"
     }
diff --git a/tests/pyunit/stdlib/resources/refs/workload-checks-custom-workload.json b/tests/pyunit/stdlib/resources/refs/workload-checks-custom-workload.json
deleted file mode 100644
index a7e9c9d84f..0000000000
--- a/tests/pyunit/stdlib/resources/refs/workload-checks-custom-workload.json
+++ /dev/null
@@ -1,16 +0,0 @@
-[
-    {
-        "category": "binary",
-        "id": "x86-hello64-static",
-        "description": "A 'Hello World!' binary.",
-        "architecture": "X86",
-        "is_zipped": false,
-        "md5sum": "dbf120338b37153e3334603970cebd8c",
-        "url": "http://dist.gem5.org/dist/develop/test-progs/hello/bin/x86/linux/hello64-static",
-        "source": "src/simple",
-        "resource_version": "1.0.0",
-        "gem5_versions": [
-            "23.0"
-        ]
-    }
-]
diff --git a/tests/pyunit/stdlib/resources/refs/workload-checks.json b/tests/pyunit/stdlib/resources/refs/workload-checks.json
index d41001d26c..dcb8577619 100644
--- a/tests/pyunit/stdlib/resources/refs/workload-checks.json
+++ b/tests/pyunit/stdlib/resources/refs/workload-checks.json
@@ -10,7 +10,7 @@
         "source": "src/linux-kernel",
         "resource_version": "1.0.0",
         "gem5_versions": [
-            "23.0"
+            "develop"
         ]
     },
     {
@@ -25,7 +25,7 @@
         "root_partition": "1",
         "resource_version": "1.0.0",
         "gem5_versions": [
-            "23.0"
+            "develop"
         ]
     },
     {
@@ -42,7 +42,21 @@
         },
         "resource_version": "1.0.0",
         "gem5_versions": [
-            "23.0"
+            "develop"
+        ]
+    },
+    {
+        "category": "binary",
+        "id": "x86-hello64-static",
+        "description": "A 'Hello World!' binary.",
+        "architecture": "X86",
+        "is_zipped": false,
+        "md5sum": "dbf120338b37153e3334603970cebd8c",
+        "url": "http://dist.gem5.org/dist/develop/test-progs/hello/bin/x86/linux/hello64-static",
+        "source": "src/simple",
+        "resource_version": "1.0.0",
+        "gem5_versions": [
+            "develop"
         ]
     }
 ]

From a63d376ecd4debd60f89fa2e0592dac6f9addae2 Mon Sep 17 00:00:00 2001
From: "Bobby R. Bruce" <bbruce@ucdavis.edu>
Date: Tue, 13 Jun 2023 17:42:23 -0700
Subject: [PATCH 481/492] python: Remove Python 'pipes' module

This is scheduled for removal from Python in 3.13:
https://docs.python.org/3/library/pipes.html.

The 'shlex.quote' function can replace the 'pipes.quote' function used
in "main.py". A special wrapper has been made to account for the Windows
case which 'shlex.quote' doesn't handle.

Change-Id: I9c84605f0ccd8468b9cab6cece6248ef8c2107f0
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/71678
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
---
 src/python/m5/main.py | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/src/python/m5/main.py b/src/python/m5/main.py
index 4701dfa5ea..ddcb024f8b 100644
--- a/src/python/m5/main.py
+++ b/src/python/m5/main.py
@@ -496,10 +496,23 @@ def main():
             % (socket.gethostname(), os.getpid())
         )
 
-        # in Python 3 pipes.quote() is moved to shlex.quote()
-        import pipes
+        def quote(arg: str) -> str:
+            """Quotes a string for printing in a shell. In addition to Unix,
+            this is designed to handle the problematic Windows cases where
+            'shlex.quote' doesn't work"""
 
-        print("command line:", " ".join(map(pipes.quote, sys.argv)))
+            if os.name == "nt" and os.sep == "\\":
+                # If a Windows machine, we manually quote the string.
+                arg = arg.replace('"', '\\"')
+                if re.search("\s", args):
+                    # We quote args which have whitespace.
+                    arg = '"' + arg + '"'
+                return arg
+            import shlex
+
+            return shlex.quote(arg)
+
+        print("command line:", " ".join(map(quote, sys.argv)))
         print()
 
     # check to make sure we can find the listed script

From b182b15f93621206c87c6c760cdfc1f5df1877cf Mon Sep 17 00:00:00 2001
From: "Bobby R. Bruce" <bbruce@ucdavis.edu>
Date: Tue, 13 Jun 2023 18:14:38 -0700
Subject: [PATCH 482/492] scons,stdlib: Remove deprecated 'distutils' module

The Python module 'distutils' will be removed in Python 3.12:
https://docs.python.org/3/library/distutils.html

This patch removed usage of 'distutils' in the gem5 code base.

Change-Id: I1e3a944446149f3cd6cbf4211a1565b5f74c85a0
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/71679
Tested-by: kokoro <noreply+kokoro@google.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
---
 src/SConscript                                |  4 +--
 .../resources/client_api/client_wrapper.py    | 26 ++++++++++++++-----
 2 files changed, 22 insertions(+), 8 deletions(-)

diff --git a/src/SConscript b/src/SConscript
index d26bf49cce..1b4430327c 100644
--- a/src/SConscript
+++ b/src/SConscript
@@ -39,7 +39,7 @@
 
 import collections
 import copy
-import distutils.spawn
+from shutil import which
 import itertools
 import os
 import os.path
@@ -269,7 +269,7 @@ def ProtoBuf(source, tags=None, add_tags=None):
     '''Add a Protocol Buffer to build'''
     Source(source, tags, add_tags, append={'CXXFLAGS': '-Wno-array-bounds'})
 
-env['PROTOC_GRPC'] = distutils.spawn.find_executable('grpc_cpp_plugin')
+env['PROTOC_GRPC'] = which('grpc_cpp_plugin')
 if env['PROTOC_GRPC']:
     with Configure(env) as conf:
         if (not env['HAVE_PKG_CONFIG'] or
diff --git a/src/python/gem5/resources/client_api/client_wrapper.py b/src/python/gem5/resources/client_api/client_wrapper.py
index ebf1b8ec2b..d2baabc52d 100644
--- a/src/python/gem5/resources/client_api/client_wrapper.py
+++ b/src/python/gem5/resources/client_api/client_wrapper.py
@@ -27,8 +27,7 @@
 from .jsonclient import JSONClient
 from .atlasclient import AtlasClient
 from _m5 import core
-from typing import Optional, Dict, List
-from distutils.version import StrictVersion
+from typing import Optional, Dict, List, Tuple
 import itertools
 from m5.util import warn
 
@@ -247,12 +246,27 @@ class ClientWrapper:
         :param resources: A list of resources to sort.
         :return: A list of sorted resources.
         """
+
+        def sort_tuple(resource: Dict) -> Tuple:
+            """This is used for sorting resources by ID and version. First
+            the ID is sorted, then the version. In cases where the version
+            contains periods, it's assumed this is to separate a
+            "major.minor.hotfix" style versioning system. In which case, the
+            value separated in the most-significant position is sorted before
+            those less significant. If the value is a digit it is cast as an
+            int, otherwise, it is cast as a string, to lower-case.
+            """
+            to_return = (resource["id"].lower(),)
+            for val in resource["resource_version"].split("."):
+                if val.isdigit():
+                    to_return += (int(val),)
+                else:
+                    to_return += (str(val).lower(),)
+            return to_return
+
         return sorted(
             resources,
-            key=lambda resource: (
-                resource["id"].lower(),
-                StrictVersion(resource["resource_version"]),
-            ),
+            key=lambda resource: sort_tuple(resource),
             reverse=True,
         )
 

From d54b8f8475495213f5c2e69aadd7886c69bcd53c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adri=C3=A0=20Armejach?= <adria.armejach@bsc.es>
Date: Wed, 7 Jun 2023 17:01:03 +0200
Subject: [PATCH 483/492] arch-riscv: fix load reserved store conditional

  * According to the manual, load reservations must be cleared on a
    failed or a successful SC attempt.
  * A load reservation can be arbitrarily large. The current
    implementation was reserving something different than cacheBlockSize
    which could lead to problems if snoop addresses are cache block
    aligned. This patch implementation assumes a cacheBlock granularity.
  * Load reservations should also be cleared on faults

Change-Id: I64513534710b5f269260fcb204f717801913e2f5
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/71520
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
---
 src/arch/generic/isa.hh  |  1 +
 src/arch/riscv/faults.cc |  3 +++
 src/arch/riscv/isa.cc    | 29 ++++++++++++++++++-----------
 src/arch/riscv/isa.hh    | 12 ++++++++++++
 4 files changed, 34 insertions(+), 11 deletions(-)

diff --git a/src/arch/generic/isa.hh b/src/arch/generic/isa.hh
index e9e4d95d7b..58f66fc99b 100644
--- a/src/arch/generic/isa.hh
+++ b/src/arch/generic/isa.hh
@@ -70,6 +70,7 @@ class BaseISA : public SimObject
   public:
     virtual PCStateBase *newPCState(Addr new_inst_addr=0) const = 0;
     virtual void clear() {}
+    virtual void clearLoadReservation(ContextID cid) {}
 
     virtual RegVal readMiscRegNoEffect(RegIndex idx) const = 0;
     virtual RegVal readMiscReg(RegIndex idx) = 0;
diff --git a/src/arch/riscv/faults.cc b/src/arch/riscv/faults.cc
index 940f7107ba..8fb8f81261 100644
--- a/src/arch/riscv/faults.cc
+++ b/src/arch/riscv/faults.cc
@@ -153,6 +153,9 @@ RiscvFault::invoke(ThreadContext *tc, const StaticInstPtr &inst)
             tc->setMiscReg(MISCREG_NMIE, 0);
         }
 
+        // Clear load reservation address
+        tc->getIsaPtr()->clearLoadReservation(tc->contextId());
+
         // Set PC to fault handler address
         Addr addr = mbits(tc->readMiscReg(tvec), 63, 2);
         if (isInterrupt() && bits(tc->readMiscReg(tvec), 1, 0) == 1)
diff --git a/src/arch/riscv/isa.cc b/src/arch/riscv/isa.cc
index d744fe369b..94a8239bac 100644
--- a/src/arch/riscv/isa.cc
+++ b/src/arch/riscv/isa.cc
@@ -672,11 +672,6 @@ ISA::unserialize(CheckpointIn &cp)
     UNSERIALIZE_CONTAINER(miscRegFile);
 }
 
-const int WARN_FAILURE = 10000;
-
-const Addr INVALID_RESERVATION_ADDR = (Addr) -1;
-std::unordered_map<int, Addr> load_reservation_addrs;
-
 void
 ISA::handleLockedSnoop(PacketPtr pkt, Addr cacheBlockMask)
 {
@@ -696,9 +691,9 @@ ISA::handleLockedRead(const RequestPtr &req)
 {
     Addr& load_reservation_addr = load_reservation_addrs[tc->contextId()];
 
-    load_reservation_addr = req->getPaddr() & ~0xF;
+    load_reservation_addr = req->getPaddr();
     DPRINTF(LLSC, "[cid:%d]: Reserved address %x.\n",
-            req->contextId(), req->getPaddr() & ~0xF);
+            req->contextId(), req->getPaddr());
 }
 
 bool
@@ -717,12 +712,13 @@ ISA::handleLockedWrite(const RequestPtr &req, Addr cacheBlockMask)
             lr_addr_empty ? "yes" : "no");
     if (!lr_addr_empty) {
         DPRINTF(LLSC, "[cid:%d]: addr = %x.\n", req->contextId(),
-                req->getPaddr() & ~0xF);
+                req->getPaddr() & cacheBlockMask);
         DPRINTF(LLSC, "[cid:%d]: last locked addr = %x.\n", req->contextId(),
-                load_reservation_addr);
+                load_reservation_addr & cacheBlockMask);
     }
-    if (lr_addr_empty
-            || load_reservation_addr != ((req->getPaddr() & ~0xF))) {
+    if (lr_addr_empty ||
+            (load_reservation_addr & cacheBlockMask)
+            != ((req->getPaddr() & cacheBlockMask))) {
         req->setExtraData(0);
         int stCondFailures = tc->readStCondFailures();
         tc->setStCondFailures(++stCondFailures);
@@ -730,12 +726,21 @@ ISA::handleLockedWrite(const RequestPtr &req, Addr cacheBlockMask)
             warn("%i: context %d: %d consecutive SC failures.\n",
                     curTick(), tc->contextId(), stCondFailures);
         }
+
+        // Must clear any reservations
+        load_reservation_addr = INVALID_RESERVATION_ADDR;
+
         return false;
     }
     if (req->isUncacheable()) {
         req->setExtraData(2);
     }
 
+    // Must clear any reservations
+    load_reservation_addr = INVALID_RESERVATION_ADDR;
+
+    DPRINTF(LLSC, "[cid:%d]: SC success! Current locked addr = %x.\n",
+            req->contextId(), load_reservation_addr & cacheBlockMask);
     return true;
 }
 
@@ -743,6 +748,8 @@ void
 ISA::globalClearExclusive()
 {
     tc->getCpuPtr()->wakeup(tc->threadId());
+    Addr& load_reservation_addr = load_reservation_addrs[tc->contextId()];
+    load_reservation_addr = INVALID_RESERVATION_ADDR;
 }
 
 void
diff --git a/src/arch/riscv/isa.hh b/src/arch/riscv/isa.hh
index 5a2a610479..7ef5c526f5 100644
--- a/src/arch/riscv/isa.hh
+++ b/src/arch/riscv/isa.hh
@@ -76,6 +76,11 @@ class ISA : public BaseISA
 
     bool hpmCounterEnabled(int counter) const;
 
+    // Load reserve - store conditional monitor
+    const int WARN_FAILURE = 10000;
+    const Addr INVALID_RESERVATION_ADDR = (Addr)-1;
+    std::unordered_map<int, Addr> load_reservation_addrs;
+
   public:
     using Params = RiscvISAParams;
 
@@ -87,6 +92,13 @@ class ISA : public BaseISA
         return new PCState(new_inst_addr, rv_type);
     }
 
+    void
+    clearLoadReservation(ContextID cid) override
+    {
+        Addr& load_reservation_addr = load_reservation_addrs[cid];
+        load_reservation_addr = INVALID_RESERVATION_ADDR;
+    }
+
   public:
     RegVal readMiscRegNoEffect(RegIndex idx) const override;
     RegVal readMiscReg(RegIndex idx) override;

From 04b4a4705a2c0bb937172169e3634e1b3352c779 Mon Sep 17 00:00:00 2001
From: "Bobby R. Bruce" <bbruce@ucdavis.edu>
Date: Mon, 3 Jul 2023 10:03:57 -0700
Subject: [PATCH 484/492] stdlib: Change default gem5-resources DB collection

This was set to "test_collection", which was used during development.
Changing to "resources".

Change-Id: I52c83c6b73f3a227fbb05dc321a4bc38210ad71c
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/72158
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
---
 src/python/gem5_default_config.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/python/gem5_default_config.py b/src/python/gem5_default_config.py
index 980c883109..6b689f58d5 100644
--- a/src/python/gem5_default_config.py
+++ b/src/python/gem5_default_config.py
@@ -29,7 +29,7 @@ config = {
         "gem5-resources": {
             "dataSource": "gem5-vision",
             "database": "gem5-vision",
-            "collection": "versions_test",
+            "collection": "resources",
             "url": "https://data.mongodb-api.com/app/data-ejhjf/endpoint/data/v1",
             "authUrl": "https://realm.mongodb.com/api/client/v2.0/app/data-ejhjf/auth/providers/api-key/login",
             "apiKey": "OIi5bAP7xxIGK782t8ZoiD2BkBGEzMdX3upChf9zdCxHSnMoiTnjI22Yw5kOSgy9",

From 4cf6695bc7bceb1505625380f0519b5a361378eb Mon Sep 17 00:00:00 2001
From: Jason Lowe-Power <jason@lowepower.com>
Date: Sun, 18 Jun 2023 14:58:24 -0400
Subject: [PATCH 485/492] misc: Add release notes for v23.0

Change-Id: I003f170339e69a445586fe0486a1db595a10683f
Signed-off-by: Jason Lowe-Power <jason@lowepower.com>
---
 RELEASE-NOTES.md | 118 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 118 insertions(+)

diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md
index 931be695ba..0c158ee245 100644
--- a/RELEASE-NOTES.md
+++ b/RELEASE-NOTES.md
@@ -1,3 +1,121 @@
+# Version 23.0
+
+This release has approximately 500 contributions from 50 unique contributors.
+Below we highlight key gem5 features and improvements in this release.
+
+## Significant API and user-facing changes
+
+### Major renaming of CPU stats
+
+The CPU stats have been renamed.
+See <https://gem5.atlassian.net/browse/GEM5-1304> for details.
+
+Now, each stage (fetch, execute, commit) have their own stat group.
+Stats that are shared between the different CPU model (O3, Minor, Simple) now have the exact same names.
+
+**Important:** Some stat names were misleading before this change.
+With this change, stats with the same names between different CPU models have the same meaning.
+
+### `fs.py` and `se.py` deprecated
+
+These scripts have not been well supported for many gem5 releases.
+With gem5 23.0, we have officially deprecated these scripts.
+They have been moved into the `deprecated` directory, **but they will be removed in a future release.**
+As a replacement, we strongly suggest using the gem5 standard library.
+See <https://www.gem5.org/documentation/gem5-stdlib/overview> for more information.
+
+### Renaming of `DEBUG` guard into `GEM5_DEBUG`
+
+Scons no longer defines the `DEBUG` guard in debug builds, so code making using of it should use `GEM5_DEBUG` instead.
+
+### Other API changes
+
+Also, this release:
+
+- Removes deprecated namespaces. Namespace names were updated a couple of releases ago. This release removes the old names.
+- Uses `MemberEventWrapper` in favor of `EventWrapper` for instance member functions.
+- Adds an extension mechanism to `Packet` and `Request`.
+- Sets x86 CPU vendor string to "HygoneGenuine" to better support GLIBC.
+
+## New features and improvements
+
+### Large improvements to gem5 resources and gem5 resources website
+
+We now have a new web portal for the gem5 resources: <https://resources.gem5.org>
+
+This web portal will allow users to browse the resources available (e.g., disk images, kernels, workloads, binaries, simpoints, etc.) to use out-of-the-box with the gem5 standard library.
+You can filter based on architecture, resource type, and compatible gem5 versions.
+
+For each resource, there are examples of how to use the resource and pointers to examples using the resource in the gem5 codebase.
+
+More information can be found on gem5's website: <https://www.gem5.org/documentation/general_docs/gem5_resources/>
+
+We will be expanding gem5 resources with more workloads and resources over the course of the next release.
+If you would like to contribute to gem5 resources by uploading your own workloads, disk images, etc., please create an issue on GitHub.
+
+In addition to the new gem5 Resources web portal, the gem5 Resources API has been significantly updated and improved.
+There are now much simpler functions for getting resources such as `obtain_resource(<name>)` that will download the resource by name and return a reference that can be used (e.g., as a binary in `set_se_workload` function on the board).
+As such the generic `Resouce` class has been deprecated and will be removed in a future release.
+
+Resources are now specialized for their particular category.
+For example, there is now a `BinaryResource` class which will return if a user specifies a binary resource when using the `obtain_resource` function.
+This allow for resource typing and for greater resource specialization.
+
+### Arm ISA improvements
+
+Architectural support for Armv9 [Scalable Matrix extension](https://developer.arm.com/documentation/ddi0616/latest) (FEAT_SME).
+The implementation employs a simple renaming scheme for the Za array register in the O3 CPU, so that writes to difference tiles in the register are considered a dependency and are therefore serialized.
+
+The following SVE and SIMD & FP extensions have also been implemented:
+* FEAT_F64MM
+* FEAT_F32MM
+* FEAT_DOTPROD
+* FEAT_I8MM
+
+And more generally:
+
+* FEAT_TLBIOS
+* FEAT_FLAGM
+* FEAT_FLAGM2
+* FEAT_RNG
+* FEAT_RNG_TRAP
+* FEAT_EVT
+
+### Support for DRAMSys
+
+gem5 can now use DRAMSys <https://github.com/tukl-msd/DRAMSys> as a DRAM backend.
+
+### RISC-V improvements
+
+This release:
+
+- Fully implements RISC-V scalar cryptography extensions.
+- Fully implement RISC-V rv32.
+- Implements PMP lock features.
+- Adds general RISC-V improvements to provide better stability.
+
+### Standard library improvements and new components
+
+This release:
+
+- Adds MESI_Three_Level component.
+- Supports ELFies and LoopPoint analysis output from Sniper.
+- Supports DRAMSys in the stdlib.
+
+## Bugfixes and other small improvements
+
+This release also:
+
+- Removes deprecated python libraries.
+- Adds a DDR5 model.
+- Adds AMD GPU MI200/gfx90a support.
+- Changes building so it no longer "duplicates sources" in build/ which improves support for some IDEs and code analysis. If you still need to duplicate sources you can use the `--duplicate-sources` option to `scons`.
+- Enables `--debug-activate=<object name>` to use debug trace for only a single SimObject (the opposite of `--debug-ignore`). See `--debug-help` for more information.
+- Adds support to exit the simulation loop based on Arm-PMU events.
+- Supports Python 3.11.
+- Adds the idea of a CpuCluster to gem5.
+
+
 # Version 22.1.0.0
 
 This release has 500 contributions from 48 unique contributors and marks our second major release of 2022.

From 818c2d15a229ef87e310a1dd1180980787fff3cc Mon Sep 17 00:00:00 2001
From: Matthew Poremba <matthew.poremba@amd.com>
Date: Thu, 29 Jun 2023 16:48:48 -0700
Subject: [PATCH 486/492] configs: Create base GPUFS vega config and atomic
 config

Move the Vega KVM script code to a common base file and add scripts for
KVM and atomic. Since atomic is now possible in GPUFS this gives a way
to run it without editing the current scripts.

Change-Id: I094bc4d4df856563535c28c1f6d6cc045d6734cd
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/71939
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com>
Maintainer: Matt Sinclair <mattdsinclair@gmail.com>
(cherry picked from commit 05ffa354268301821518fd430d069cd23049527d)
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/72078
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
---
 configs/example/gpufs/vega10.py        | 153 +++++++++++++++++++++++++
 configs/example/gpufs/vega10_atomic.py |  32 ++++++
 configs/example/gpufs/vega10_kvm.py    | 127 +-------------------
 3 files changed, 188 insertions(+), 124 deletions(-)
 create mode 100644 configs/example/gpufs/vega10.py
 create mode 100644 configs/example/gpufs/vega10_atomic.py

diff --git a/configs/example/gpufs/vega10.py b/configs/example/gpufs/vega10.py
new file mode 100644
index 0000000000..9eff5a2974
--- /dev/null
+++ b/configs/example/gpufs/vega10.py
@@ -0,0 +1,153 @@
+# Copyright (c) 2022-2023 Advanced Micro Devices, Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# 3. Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from this
+# software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+import m5
+import runfs
+import base64
+import tempfile
+import argparse
+import sys
+import os
+
+from amd import AmdGPUOptions
+from common import Options
+from common import GPUTLBOptions
+from ruby import Ruby
+
+
+demo_runscript_without_checkpoint = """\
+export LD_LIBRARY_PATH=/opt/rocm/lib:$LD_LIBRARY_PATH
+export HSA_ENABLE_INTERRUPT=0
+dmesg -n8
+dd if=/root/roms/vega10.rom of=/dev/mem bs=1k seek=768 count=128
+if [ ! -f /lib/modules/`uname -r`/updates/dkms/amdgpu.ko ]; then
+    echo "ERROR: Missing DKMS package for kernel `uname -r`. Exiting gem5."
+    /sbin/m5 exit
+fi
+modprobe -v amdgpu ip_block_mask=0xff ppfeaturemask=0 dpm=0 audio=0
+echo "Running {} {}"
+echo "{}" | base64 -d > myapp
+chmod +x myapp
+./myapp {}
+/sbin/m5 exit
+"""
+
+demo_runscript_with_checkpoint = """\
+export LD_LIBRARY_PATH=/opt/rocm/lib:$LD_LIBRARY_PATH
+export HSA_ENABLE_INTERRUPT=0
+dmesg -n8
+dd if=/root/roms/vega10.rom of=/dev/mem bs=1k seek=768 count=128
+if [ ! -f /lib/modules/`uname -r`/updates/dkms/amdgpu.ko ]; then
+    echo "ERROR: Missing DKMS package for kernel `uname -r`. Exiting gem5."
+    /sbin/m5 exit
+fi
+modprobe -v amdgpu ip_block_mask=0xff ppfeaturemask=0 dpm=0 audio=0
+echo "Running {} {}"
+echo "{}" | base64 -d > myapp
+chmod +x myapp
+/sbin/m5 checkpoint
+./myapp {}
+/sbin/m5 exit
+"""
+
+
+def addDemoOptions(parser):
+    parser.add_argument(
+        "-a", "--app", default=None, help="GPU application to run"
+    )
+    parser.add_argument(
+        "-o", "--opts", default="", help="GPU application arguments"
+    )
+
+
+def runVegaGPUFS(cpu_type):
+    parser = argparse.ArgumentParser()
+    runfs.addRunFSOptions(parser)
+    Options.addCommonOptions(parser)
+    AmdGPUOptions.addAmdGPUOptions(parser)
+    Ruby.define_options(parser)
+    GPUTLBOptions.tlb_options(parser)
+    addDemoOptions(parser)
+
+    # Parse now so we can override options
+    args = parser.parse_args()
+    demo_runscript = ""
+
+    # Create temp script to run application
+    if args.app is None:
+        print(f"No application given. Use {sys.argv[0]} -a <app>")
+        sys.exit(1)
+    elif args.kernel is None:
+        print(f"No kernel path given. Use {sys.argv[0]} --kernel <vmlinux>")
+        sys.exit(1)
+    elif args.disk_image is None:
+        print(f"No disk path given. Use {sys.argv[0]} --disk-image <linux>")
+        sys.exit(1)
+    elif args.gpu_mmio_trace is None:
+        print(f"No MMIO trace path. Use {sys.argv[0]} --gpu-mmio-trace <path>")
+        sys.exit(1)
+    elif not os.path.isfile(args.app):
+        print("Could not find applcation", args.app)
+        sys.exit(1)
+
+    # Choose runscript Based on whether any checkpointing args are set
+    if args.checkpoint_dir is not None:
+        demo_runscript = demo_runscript_with_checkpoint
+    else:
+        demo_runscript = demo_runscript_without_checkpoint
+
+    with open(os.path.abspath(args.app), "rb") as binfile:
+        encodedBin = base64.b64encode(binfile.read()).decode()
+
+    _, tempRunscript = tempfile.mkstemp()
+    with open(tempRunscript, "w") as b64file:
+        runscriptStr = demo_runscript.format(
+            args.app, args.opts, encodedBin, args.opts
+        )
+        b64file.write(runscriptStr)
+
+    if args.second_disk == None:
+        args.second_disk = args.disk_image
+
+    # Defaults for Vega10
+    args.ruby = True
+    args.cpu_type = cpu_type
+    args.num_cpus = 1
+    args.mem_size = "3GB"
+    args.dgpu = True
+    args.dgpu_mem_size = "16GB"
+    args.dgpu_start = "0GB"
+    args.checkpoint_restore = 0
+    args.disjoint = True
+    args.timing_gpu = True
+    args.script = tempRunscript
+    args.dgpu_xor_low_bit = 0
+
+    # Run gem5
+    runfs.runGpuFSSystem(args)
diff --git a/configs/example/gpufs/vega10_atomic.py b/configs/example/gpufs/vega10_atomic.py
new file mode 100644
index 0000000000..4ff2cc2e72
--- /dev/null
+++ b/configs/example/gpufs/vega10_atomic.py
@@ -0,0 +1,32 @@
+# Copyright (c) 2023 Advanced Micro Devices, Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# 3. Neither the name of the copyright holder nor the names of its
+# contributors may be used to endorse or promote products derived from this
+# software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+import vega10
+
+vega10.runVegaGPUFS("AtomicSimpleCPU")
diff --git a/configs/example/gpufs/vega10_kvm.py b/configs/example/gpufs/vega10_kvm.py
index 11f9fe2f80..39dc5e0110 100644
--- a/configs/example/gpufs/vega10_kvm.py
+++ b/configs/example/gpufs/vega10_kvm.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2022 Advanced Micro Devices, Inc.
+# Copyright (c) 2022-2023 Advanced Micro Devices, Inc.
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -27,127 +27,6 @@
 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 # POSSIBILITY OF SUCH DAMAGE.
 
-import m5
-import runfs
-import base64
-import tempfile
-import argparse
-import sys
-import os
+import vega10
 
-from amd import AmdGPUOptions
-from common import Options
-from common import GPUTLBOptions
-from ruby import Ruby
-
-
-demo_runscript_without_checkpoint = """\
-export LD_LIBRARY_PATH=/opt/rocm/lib:$LD_LIBRARY_PATH
-export HSA_ENABLE_INTERRUPT=0
-dmesg -n8
-dd if=/root/roms/vega10.rom of=/dev/mem bs=1k seek=768 count=128
-if [ ! -f /lib/modules/`uname -r`/updates/dkms/amdgpu.ko ]; then
-    echo "ERROR: Missing DKMS package for kernel `uname -r`. Exiting gem5."
-    /sbin/m5 exit
-fi
-modprobe -v amdgpu ip_block_mask=0xff ppfeaturemask=0 dpm=0 audio=0
-echo "Running {} {}"
-echo "{}" | base64 -d > myapp
-chmod +x myapp
-./myapp {}
-/sbin/m5 exit
-"""
-
-demo_runscript_with_checkpoint = """\
-export LD_LIBRARY_PATH=/opt/rocm/lib:$LD_LIBRARY_PATH
-export HSA_ENABLE_INTERRUPT=0
-dmesg -n8
-dd if=/root/roms/vega10.rom of=/dev/mem bs=1k seek=768 count=128
-if [ ! -f /lib/modules/`uname -r`/updates/dkms/amdgpu.ko ]; then
-    echo "ERROR: Missing DKMS package for kernel `uname -r`. Exiting gem5."
-    /sbin/m5 exit
-fi
-modprobe -v amdgpu ip_block_mask=0xff ppfeaturemask=0 dpm=0 audio=0
-echo "Running {} {}"
-echo "{}" | base64 -d > myapp
-chmod +x myapp
-/sbin/m5 checkpoint
-./myapp {}
-/sbin/m5 exit
-"""
-
-
-def addDemoOptions(parser):
-    parser.add_argument(
-        "-a", "--app", default=None, help="GPU application to run"
-    )
-    parser.add_argument(
-        "-o", "--opts", default="", help="GPU application arguments"
-    )
-
-
-if __name__ == "__m5_main__":
-    parser = argparse.ArgumentParser()
-    runfs.addRunFSOptions(parser)
-    Options.addCommonOptions(parser)
-    AmdGPUOptions.addAmdGPUOptions(parser)
-    Ruby.define_options(parser)
-    GPUTLBOptions.tlb_options(parser)
-    addDemoOptions(parser)
-
-    # Parse now so we can override options
-    args = parser.parse_args()
-    demo_runscript = ""
-
-    # Create temp script to run application
-    if args.app is None:
-        print(f"No application given. Use {sys.argv[0]} -a <app>")
-        sys.exit(1)
-    elif args.kernel is None:
-        print(f"No kernel path given. Use {sys.argv[0]} --kernel <vmlinux>")
-        sys.exit(1)
-    elif args.disk_image is None:
-        print(f"No disk path given. Use {sys.argv[0]} --disk-image <linux>")
-        sys.exit(1)
-    elif args.gpu_mmio_trace is None:
-        print(f"No MMIO trace path. Use {sys.argv[0]} --gpu-mmio-trace <path>")
-        sys.exit(1)
-    elif not os.path.isfile(args.app):
-        print("Could not find applcation", args.app)
-        sys.exit(1)
-
-    # Choose runscript Based on whether any checkpointing args are set
-    if args.checkpoint_dir is not None:
-        demo_runscript = demo_runscript_with_checkpoint
-    else:
-        demo_runscript = demo_runscript_without_checkpoint
-
-    with open(os.path.abspath(args.app), "rb") as binfile:
-        encodedBin = base64.b64encode(binfile.read()).decode()
-
-    _, tempRunscript = tempfile.mkstemp()
-    with open(tempRunscript, "w") as b64file:
-        runscriptStr = demo_runscript.format(
-            args.app, args.opts, encodedBin, args.opts
-        )
-        b64file.write(runscriptStr)
-
-    if args.second_disk == None:
-        args.second_disk = args.disk_image
-
-    # Defaults for Vega10
-    args.ruby = True
-    args.cpu_type = "X86KvmCPU"
-    args.num_cpus = 1
-    args.mem_size = "3GB"
-    args.dgpu = True
-    args.dgpu_mem_size = "16GB"
-    args.dgpu_start = "0GB"
-    args.checkpoint_restore = 0
-    args.disjoint = True
-    args.timing_gpu = True
-    args.script = tempRunscript
-    args.dgpu_xor_low_bit = 0
-
-    # Run gem5
-    runfs.runGpuFSSystem(args)
+vega10.runVegaGPUFS("X86KvmCPU")

From 8d1722478160edc64f5069efccfaf2aaa8213208 Mon Sep 17 00:00:00 2001
From: "Bobby R. Bruce" <bbruce@ucdavis.edu>
Date: Mon, 3 Jul 2023 13:03:05 -0700
Subject: [PATCH 487/492] stdlib,tests: Fix download_check.py

This was causing the Weekly tests to fail. The removing of the download
directory should only happen at the end. Prior to this patch it was
deleted and then referenced, which caused problems.

Change-Id: I134782e89a13f5c3cd5c1912ad53a701d0413d16
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/72159
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 tests/gem5/configs/download_check.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/tests/gem5/configs/download_check.py b/tests/gem5/configs/download_check.py
index 2180f4f26a..e3b06a578d 100644
--- a/tests/gem5/configs/download_check.py
+++ b/tests/gem5/configs/download_check.py
@@ -94,7 +94,6 @@ for id in ids:
         continue
 
     for resource_version in ids[id]:
-
         resource_json = get_resource_json_obj(
             resource_id=id,
             resource_version=resource_version,
@@ -129,8 +128,8 @@ for id in ids:
                 + f" gem5-resources ({resource_json['md5sum']}).{os.linesep}"
             )
 
-        # Remove the downloaded resource.
-    shutil.rmtree(download_path, ignore_errors=True)
+# Remove the downloaded resource.
+shutil.rmtree(args.download_directory, ignore_errors=True)
 
 # If errors exist, raise an exception highlighting them.
 if errors:

From 6c997b633fb5759bd45ff947626c5228e2c67539 Mon Sep 17 00:00:00 2001
From: "Bobby R. Bruce" <bbruce@ucdavis.edu>
Date: Fri, 7 Jul 2023 08:46:16 -0700
Subject: [PATCH 488/492] scons: Remove -Werror for the gem5 v23.0 release

While gem5 compiles on all our supported compilers, removing the -Werror
flag on the stable branch ensures that, as new compilers are released
with stricter warnings, gem5 remains compilable.

Change-Id: Ie32a5c526a17cc584bce3b71d27b8f052caf612b
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/72178
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 SConstruct | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/SConstruct b/SConstruct
index 9b25b33783..4fe2f64366 100755
--- a/SConstruct
+++ b/SConstruct
@@ -447,12 +447,6 @@ for variant_path in variant_paths:
                     conf.CheckLinkFlag(
                             '-Wl,--thread-count=%d' % GetOption('num_jobs'))
 
-        # Treat warnings as errors but white list some warnings that we
-        # want to allow (e.g., deprecation warnings).
-        env.Append(CCFLAGS=['-Werror',
-                             '-Wno-error=deprecated-declarations',
-                             '-Wno-error=deprecated',
-                            ])
 
     else:
         error('\n'.join((

From 578eaead472f6bceea3876cbc85a0120a6055c9b Mon Sep 17 00:00:00 2001
From: Matthew Poremba <matthew.poremba@amd.com>
Date: Wed, 28 Jun 2023 14:49:48 -0500
Subject: [PATCH 489/492] dev-amdgpu: Perform frame writes atomically

The PCI read/write functions are atomic functions in gem5, meaning they
expect a response with a latency value on the same simulation Tick. For
reads to a PCI device, the response must also include a data value read
from the device.

The AMDGPU device has a PCI BAR which mirrors the frame buffer memory.
Currently reads are done atomically, but writes are sent to a DMA device
without waiting for a write completion ACK. As a result, it is possible
that writes can be queued in the DMA device long enough that another
read for a queued address arrives. This happens very deterministically
with the AtomicSimpleCPU and causes GPUFS to break with that CPU.

This change makes writes to the frame BAR atomic the same as reads. This
avoids that problem and as a result the AtomicSimpleCPU can now load the
driver for GPUFS simulations.

Change-Id: I9a8e8b172712c78b667ebcec81a0c5d0060234db
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/71898
Maintainer: Matt Sinclair <mattdsinclair@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com>
Maintainer: Matthew Poremba <matthew.poremba@amd.com>
Reviewed-by: Matthew Poremba <matthew.poremba@amd.com>
(cherry picked from commit 079fc47dc202ffe7c77e1e94bb1d5e0ee38d1816)
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/72079
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
---
 src/dev/amdgpu/amdgpu_device.cc | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/src/dev/amdgpu/amdgpu_device.cc b/src/dev/amdgpu/amdgpu_device.cc
index 3260d058b0..d1058f1606 100644
--- a/src/dev/amdgpu/amdgpu_device.cc
+++ b/src/dev/amdgpu/amdgpu_device.cc
@@ -349,6 +349,22 @@ AMDGPUDevice::writeFrame(PacketPtr pkt, Addr offset)
     }
 
     nbio.writeFrame(pkt, offset);
+
+    /*
+     * Write the value to device memory. This must be done functionally
+     * because this method is called by the PCIDevice::write method which
+     * is a non-timing write.
+     */
+    RequestPtr req = std::make_shared<Request>(offset, pkt->getSize(), 0,
+                                               vramRequestorId());
+    PacketPtr writePkt = Packet::createWrite(req);
+    uint8_t *dataPtr = new uint8_t[pkt->getSize()];
+    std::memcpy(dataPtr, pkt->getPtr<uint8_t>(),
+                pkt->getSize() * sizeof(uint8_t));
+    writePkt->dataDynamic(dataPtr);
+
+    auto system = cp->shader()->gpuCmdProc.system();
+    system->getDeviceMemory(writePkt)->access(writePkt);
 }
 
 void
@@ -489,8 +505,6 @@ AMDGPUDevice::write(PacketPtr pkt)
 
     switch (barnum) {
       case FRAMEBUFFER_BAR:
-          gpuMemMgr->writeRequest(offset, pkt->getPtr<uint8_t>(),
-                                  pkt->getSize(), 0, nullptr);
           writeFrame(pkt, offset);
           break;
       case DOORBELL_BAR:

From 387fc6964e27d085acaf15628045734875b7a0eb Mon Sep 17 00:00:00 2001
From: Matthew Poremba <matthew.poremba@amd.com>
Date: Thu, 6 Jul 2023 08:13:01 -0700
Subject: [PATCH 490/492] gpu-compute,configs: Make sim exits conditional

The unconditional exit event when a kernel completes that was added in
c644eae2ddd34cf449a9c4476730bd29703c4dd7 is causing scripts that do not
ignore unknown exit events to end simulation prematurely. One such
script is the apu_se.py script used in SE mode GPU simulation. Make this
exit conditional to the parameter being set to a valid value to avoid
this problem.

Change-Id: I1d2c082291fdbcf27390913ffdffb963ec8080dd
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/72098
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Maintainer: Matt Sinclair <mattdsinclair@gmail.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
(cherry picked from commit 3756af8ed91f0860dcfb0a7111478212980e37ae)
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/72138
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
---
 configs/example/gpufs/system/system.py | 3 ++-
 src/gpu-compute/GPU.py                 | 4 ++++
 src/gpu-compute/dispatcher.cc          | 7 ++++++-
 src/gpu-compute/dispatcher.hh          | 2 ++
 4 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/configs/example/gpufs/system/system.py b/configs/example/gpufs/system/system.py
index 263ffc0a43..471892945e 100644
--- a/configs/example/gpufs/system/system.py
+++ b/configs/example/gpufs/system/system.py
@@ -115,7 +115,8 @@ def makeGpuFSSystem(args):
         numHWQueues=args.num_hw_queues,
         walker=hsapp_pt_walker,
     )
-    dispatcher = GPUDispatcher()
+    dispatcher_exit_events = True if args.exit_at_gpu_kernel > -1 else False
+    dispatcher = GPUDispatcher(kernel_exit_events=dispatcher_exit_events)
     cp_pt_walker = VegaPagetableWalker()
     gpu_cmd_proc = GPUCommandProcessor(
         hsapp=gpu_hsapp, dispatcher=dispatcher, walker=cp_pt_walker
diff --git a/src/gpu-compute/GPU.py b/src/gpu-compute/GPU.py
index c5449cc398..c64a6b791d 100644
--- a/src/gpu-compute/GPU.py
+++ b/src/gpu-compute/GPU.py
@@ -328,6 +328,10 @@ class GPUDispatcher(SimObject):
     cxx_class = "gem5::GPUDispatcher"
     cxx_header = "gpu-compute/dispatcher.hh"
 
+    kernel_exit_events = Param.Bool(
+        False, "Enable exiting sim loop after a kernel"
+    )
+
 
 class GPUCommandProcessor(DmaVirtDevice):
     type = "GPUCommandProcessor"
diff --git a/src/gpu-compute/dispatcher.cc b/src/gpu-compute/dispatcher.cc
index a76ba7c0be..d63c875fe5 100644
--- a/src/gpu-compute/dispatcher.cc
+++ b/src/gpu-compute/dispatcher.cc
@@ -50,7 +50,8 @@ GPUDispatcher::GPUDispatcher(const Params &p)
     : SimObject(p), shader(nullptr), gpuCmdProc(nullptr),
       tickEvent([this]{ exec(); },
           "GPU Dispatcher tick", false, Event::CPU_Tick_Pri),
-      dispatchActive(false), stats(this)
+      dispatchActive(false), kernelExitEvents(p.kernel_exit_events),
+      stats(this)
 {
     schedule(&tickEvent, 0);
 }
@@ -330,6 +331,10 @@ GPUDispatcher::notifyWgCompl(Wavefront *wf)
         DPRINTF(GPUWgLatency, "Kernel Complete ticks:%d kernel:%d\n",
                 curTick(), kern_id);
         DPRINTF(GPUKernelInfo, "Completed kernel %d\n", kern_id);
+
+        if (kernelExitEvents) {
+            exitSimLoop("GPU Kernel Completed");
+        }
     }
 
     if (!tickEvent.scheduled()) {
diff --git a/src/gpu-compute/dispatcher.hh b/src/gpu-compute/dispatcher.hh
index 7699ceff7c..eafa0804b1 100644
--- a/src/gpu-compute/dispatcher.hh
+++ b/src/gpu-compute/dispatcher.hh
@@ -92,6 +92,8 @@ class GPUDispatcher : public SimObject
     std::queue<int> doneIds;
     // is there a kernel in execution?
     bool dispatchActive;
+    // Enable exiting sim loop after each kernel completion
+    bool kernelExitEvents;
 
   protected:
     struct GPUDispatcherStats : public statistics::Group

From e377e15c07e439b696a4a09b0179fd4553d11d0a Mon Sep 17 00:00:00 2001
From: "Bobby R. Bruce" <bbruce@ucdavis.edu>
Date: Thu, 15 Jun 2023 16:08:49 -0700
Subject: [PATCH 491/492] misc: Update gem5 version to v23.0.0.0

Change-Id: Ie14b35a62a152e3d78b16bcd4c92ec9a790f1396
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/71724
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
---
 src/Doxyfile        | 2 +-
 src/base/version.cc | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Doxyfile b/src/Doxyfile
index 24d70bbc55..325040fee0 100644
--- a/src/Doxyfile
+++ b/src/Doxyfile
@@ -31,7 +31,7 @@ PROJECT_NAME           = gem5
 # This could be handy for archiving the generated documentation or
 # if some version control system is used.
 
-PROJECT_NUMBER         = [DEVELOP-FOR-23.0]
+PROJECT_NUMBER         = v23.0.0.0
 
 # The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute)
 # base path where the generated documentation will be put.
diff --git a/src/base/version.cc b/src/base/version.cc
index 8131a3197e..38f415590a 100644
--- a/src/base/version.cc
+++ b/src/base/version.cc
@@ -32,6 +32,6 @@ namespace gem5
 /**
  * @ingroup api_base_utils
  */
-const char *gem5Version = "[DEVELOP-FOR-23.0]";
+const char *gem5Version = "23.0.0.0";
 
 } // namespace gem5

From 20ee3b97622be2cc448de31d84dd5f29109e0ada Mon Sep 17 00:00:00 2001
From: "Bobby R. Bruce" <bbruce@ucdavis.edu>
Date: Fri, 7 Jul 2023 18:21:44 -0700
Subject: [PATCH 492/492] stdlib: Remove simulator.py beta warnings

This component is no longer in a beta state

Change-Id: I8e309771aaa03197cf1738ad0af57c253ad58ecd
---
 src/python/gem5/simulate/simulator.py | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/src/python/gem5/simulate/simulator.py b/src/python/gem5/simulate/simulator.py
index e27679a996..0551745b36 100644
--- a/src/python/gem5/simulate/simulator.py
+++ b/src/python/gem5/simulate/simulator.py
@@ -53,10 +53,6 @@ class Simulator:
     """
     This Simulator class is used to manage the execution of a gem5 simulation.
 
-    **Warning:** The simulate package is still in a beta state. The gem5
-    project does not guarantee the APIs within this package will remain
-    consistent in future across upcoming releases.
-
     Example
     -------
     Examples using the Simulator class can be found under
@@ -165,12 +161,6 @@ class Simulator:
 
         """
 
-        warn(
-            "The simulate package is still in a beta state. The gem5 "
-            "project does not guarantee the APIs within this package will "
-            "remain consistent across upcoming releases."
-        )
-
         # We specify a dictionary here outlining the default behavior for each
         # exit event. Each exit event is mapped to a generator.
         self._default_on_exit_dict = {