cpu-o3: O3 LSQ Generalisation

This patch does a large modification of the LSQ in the O3 model. The main goal of the patch is to remove the 'an operation can be served with one or two memory requests' assumption that is present in the LSQ and the instruction with the req, reqLow, reqHigh triplet, and generalising it to operations that can be addressed with one request, and operations that require many requests, embodied in the SingleDataRequest and the SplitDataRequest. This modification has been done mimicking the minor model to an extent, shifting the responsibilities of dealing with VtoP translation and tracking the status and resources from the DynInst to the LSQ via the LSQRequest. The LSQRequest models the information concerning the operation, handles the creation of fragments for translation and request as well as assembling/splitting the data accordingly. With this modifications, the implementation of vector ISAs, particularly on the memory side, become more rich, as the new model permits a dissociation of the ISA characteristics as vector length, from the microarchitectural characteristics that govern how contiguous loads are executing, allowing exploration of different LSQ to DL1 bus widths to understand the tradeoffs in complexity and performance. Part of the complexities introduced stem from the fact that gem5 keeps a large amount of metadata regarding, in particular, memory operations, thus, when an instruction is squashed while some operation as TLB lookup or cache access is ongoing, when the relevant structure communicates to the LSQ that the operation is over, it tries to access some pieces of data that should have died when the instruction is squashed, leading to asserts, panics, or memory corruption. To ensure the correct behaviour, the LSQRequest rely on assesing who is their owner, and self-destroying if they detect their owner is done with the request, and there will be no subsequent action. For example, in the case of an instruction squashed whal the TLB is doing a walk to serve the translation, when the translation is served by the TLB, the LSQRequest detects that the instruction was squashed, and as the translation is done, no one else expect to access its information, and therefore, it self-destructs. Having destroyed the LSQRequest earlier, would lead to wrong behaviour as the TLB walk may access some fields of it. Additional authors: - Gabor Dozsa <gabor.dozsa@arm.com> Change-Id: I9578a1a3f6b899c390cdd886856a24db68ff7d0c Signed-off-by: Giacomo Gabrielli <giacomo.gabrielli@arm.com> Reviewed-on: https://gem5-review.googlesource.com/c/13516 Reviewed-by: Anthony Gutierrez <anthony.gutierrez@amd.com> Maintainer: Anthony Gutierrez <anthony.gutierrez@amd.com>
2017-02-13 09:41:44 +00:00
parent 6379bebd41
commit 51becd2475
12 changed files with 1919 additions and 1281 deletions
--- a/src/base/refcnt.hh
+++ b/src/base/refcnt.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited
+ * Copyright (c) 2017-2018 ARM Limited
 * All rights reserved.
 *
 * The license below extends only to copyright in the software and shall
--- a/src/cpu/base_dyn_inst.hh
+++ b/src/cpu/base_dyn_inst.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011,2013,2016 ARM Limited
+ * Copyright (c) 2011, 2013, 2016-2018 ARM Limited
 * Copyright (c) 2013 Advanced Micro Devices, Inc.
 * All rights reserved.
 *
@@ -84,6 +84,10 @@ class BaseDynInst : public ExecContext, public RefCounted
    typedef typename ImplCPU::ImplState ImplState;
    using VecRegContainer = TheISA::VecRegContainer;

+    using LSQRequestPtr = typename Impl::CPUPol::LSQ::LSQRequest*;
+    using LQIterator = typename Impl::CPUPol::LSQUnit::LQIterator;
+    using SQIterator = typename Impl::CPUPol::LSQUnit::SQIterator;
+
    // The DynInstPtr type.
    typedef typename Impl::DynInstPtr DynInstPtr;
    typedef RefCountingPtr<BaseDynInst<Impl> > BaseDynInstPtr;
@@ -203,12 +207,7 @@ class BaseDynInst : public ExecContext, public RefCounted
    Addr effAddr;

    /** The effective physical address. */
-    Addr physEffAddrLow;
-
-    /** The effective physical address
-     *  of the second request for a split request
-     */
-    Addr physEffAddrHigh;
+    Addr physEffAddr;

    /** The memory request flags (from translation). */
    unsigned memReqFlags;
@@ -224,19 +223,19 @@ class BaseDynInst : public ExecContext, public RefCounted

    /** Load queue index. */
    int16_t lqIdx;
+    LQIterator lqIt;

    /** Store queue index. */
    int16_t sqIdx;
+    SQIterator sqIt;


    /////////////////////// TLB Miss //////////////////////
    /**
-     * Saved memory requests (needed when the DTB address translation is
+     * Saved memory request (needed when the DTB address translation is
     * delayed due to a hw page table walk).
     */
-    RequestPtr savedReq;
-    RequestPtr savedSreqLow;
-    RequestPtr savedSreqHigh;
+    LSQRequestPtr savedReq;

    /////////////////////// Checker //////////////////////
    // Need a copy of main request pointer to verify on writes.
@@ -270,6 +269,7 @@ class BaseDynInst : public ExecContext, public RefCounted

    /** Is the effective virtual address valid. */
    bool effAddrValid() const { return instFlags[EffAddrValid]; }
+    void effAddrValid(bool b) { instFlags[EffAddrValid] = b; }

    /** Whether or not the memory operation is done. */
    bool memOpDone() const { return instFlags[MemOpDone]; }
@@ -303,18 +303,6 @@ class BaseDynInst : public ExecContext, public RefCounted
    Fault writeMem(uint8_t *data, unsigned size, Addr addr,
                   Request::Flags flags, uint64_t *res);

-    /** Splits a request in two if it crosses a dcache block. */
-    void splitRequest(const RequestPtr &req, RequestPtr &sreqLow,
-                      RequestPtr &sreqHigh);
-
-    /** Initiate a DTB address translation. */
-    void initiateTranslation(const RequestPtr &req, const RequestPtr &sreqLow,
-                             const RequestPtr &sreqHigh, uint64_t *res,
-                             BaseTLB::Mode mode);
-
-    /** Finish a DTB address translation. */
-    void finishTranslation(WholeTranslationState *state);
-
    /** True if the DTB address translation has started. */
    bool translationStarted() const { return instFlags[TranslationStarted]; }
    void translationStarted(bool f) { instFlags[TranslationStarted] = f; }
@@ -454,6 +442,9 @@ class BaseDynInst : public ExecContext, public RefCounted

    /** Returns the fault type. */
    Fault getFault() const { return fault; }
+    /** TODO: This I added for the LSQRequest side to be able to modify the
+     * fault. There should be a better mechanism in place. */
+    Fault& getFault() { return fault; }

    /** Checks whether or not this instruction has had its branch target
     *  calculated yet.  For now it is not utilized and is hacked to be
@@ -589,7 +580,8 @@ class BaseDynInst : public ExecContext, public RefCounted
    int8_t numIntDestRegs() const { return staticInst->numIntDestRegs(); }
    int8_t numCCDestRegs() const { return staticInst->numCCDestRegs(); }
    int8_t numVecDestRegs() const { return staticInst->numVecDestRegs(); }
-    int8_t numVecElemDestRegs() const {
+    int8_t numVecElemDestRegs() const
+    {
        return staticInst->numVecElemDestRegs();
    }

@@ -837,6 +829,7 @@ class BaseDynInst : public ExecContext, public RefCounted

    /** Sets the ASID. */
    void setASID(short addr_space_id) { asid = addr_space_id; }
+    short getASID() { return asid; }

    /** Sets the thread id. */
    void setTid(ThreadID tid) { threadNumber = tid; }
@@ -853,9 +846,12 @@ class BaseDynInst : public ExecContext, public RefCounted

    /** Is this instruction's memory access strictly ordered? */
    bool strictlyOrdered() const { return instFlags[IsStrictlyOrdered]; }
+    void strictlyOrdered(bool so) { instFlags[IsStrictlyOrdered] = so; }

    /** Has this instruction generated a memory request. */
    bool hasRequest() const { return instFlags[ReqMade]; }
+    /** Assert this instruction has generated a memory request. */
+    void setRequest() { instFlags[ReqMade] = true; }

    /** Returns iterator to this instruction in the list of all insts. */
    ListIt &getInstListIt() { return instListIt; }
@@ -887,50 +883,9 @@ Fault
 BaseDynInst<Impl>::initiateMemRead(Addr addr, unsigned size,
                                   Request::Flags flags)
 {
-    instFlags[ReqMade] = true;
-    RequestPtr req = NULL;
-    RequestPtr sreqLow = NULL;
-    RequestPtr sreqHigh = NULL;
-
-    if (instFlags[ReqMade] && translationStarted()) {
-        req = savedReq;
-        sreqLow = savedSreqLow;
-        sreqHigh = savedSreqHigh;
-    } else {
-        req = std::make_shared<Request>(
-            asid, addr, size, flags, masterId(),
-            this->pc.instAddr(), thread->contextId());
-
-        req->taskId(cpu->taskId());
-
-        // Only split the request if the ISA supports unaligned accesses.
-        if (TheISA::HasUnalignedMemAcc) {
-            splitRequest(req, sreqLow, sreqHigh);
-        }
-        initiateTranslation(req, sreqLow, sreqHigh, NULL, BaseTLB::Read);
-    }
-
-    if (translationCompleted()) {
-        if (fault == NoFault) {
-            effAddr = req->getVaddr();
-            effSize = size;
-            instFlags[EffAddrValid] = true;
-
-            if (cpu->checker) {
-                reqToVerify = std::make_shared<Request>(*req);
-            }
-            fault = cpu->read(req, sreqLow, sreqHigh, lqIdx);
-        } else {
-            // Commit will have to clean up whatever happened.  Set this
-            // instruction as executed.
-            this->setExecuted();
-        }
-    }
-
-    if (traceData)
-        traceData->setMem(addr, size, flags);
-
-    return fault;
+    return cpu->pushRequest(
+            dynamic_cast<typename DynInstPtr::PtrType>(this),
+            /* ld */ true, nullptr, size, addr, flags, nullptr);
 }

 template<class Impl>
@@ -938,154 +893,9 @@ Fault
 BaseDynInst<Impl>::writeMem(uint8_t *data, unsigned size, Addr addr,
                            Request::Flags flags, uint64_t *res)
 {
-    if (traceData)
-        traceData->setMem(addr, size, flags);
-
-    instFlags[ReqMade] = true;
-    RequestPtr req = NULL;
-    RequestPtr sreqLow = NULL;
-    RequestPtr sreqHigh = NULL;
-
-    if (instFlags[ReqMade] && translationStarted()) {
-        req = savedReq;
-        sreqLow = savedSreqLow;
-        sreqHigh = savedSreqHigh;
-    } else {
-        req = std::make_shared<Request>(
-            asid, addr, size, flags, masterId(),
-            this->pc.instAddr(), thread->contextId());
-
-        req->taskId(cpu->taskId());
-
-        // Only split the request if the ISA supports unaligned accesses.
-        if (TheISA::HasUnalignedMemAcc) {
-            splitRequest(req, sreqLow, sreqHigh);
-        }
-        initiateTranslation(req, sreqLow, sreqHigh, res, BaseTLB::Write);
-    }
-
-    if (fault == NoFault && translationCompleted()) {
-        effAddr = req->getVaddr();
-        effSize = size;
-        instFlags[EffAddrValid] = true;
-
-        if (cpu->checker) {
-            reqToVerify = std::make_shared<Request>(*req);
-        }
-        fault = cpu->write(req, sreqLow, sreqHigh, data, sqIdx);
-    }
-
-    return fault;
-}
-
-template<class Impl>
-inline void
-BaseDynInst<Impl>::splitRequest(const RequestPtr &req, RequestPtr &sreqLow,
-                                RequestPtr &sreqHigh)
-{
-    // Check to see if the request crosses the next level block boundary.
-    unsigned block_size = cpu->cacheLineSize();
-    Addr addr = req->getVaddr();
-    Addr split_addr = roundDown(addr + req->getSize() - 1, block_size);
-    assert(split_addr <= addr || split_addr - addr < block_size);
-
-    // Spans two blocks.
-    if (split_addr > addr) {
-        req->splitOnVaddr(split_addr, sreqLow, sreqHigh);
-    }
-}
-
-template<class Impl>
-inline void
-BaseDynInst<Impl>::initiateTranslation(const RequestPtr &req,
-                                       const RequestPtr &sreqLow,
-                                       const RequestPtr &sreqHigh,
-                                       uint64_t *res,
-                                       BaseTLB::Mode mode)
-{
-    translationStarted(true);
-
-    if (!TheISA::HasUnalignedMemAcc || sreqLow == NULL) {
-        WholeTranslationState *state =
-            new WholeTranslationState(req, NULL, res, mode);
-
-        // One translation if the request isn't split.
-        DataTranslation<BaseDynInstPtr> *trans =
-            new DataTranslation<BaseDynInstPtr>(this, state);
-
-        cpu->dtb->translateTiming(req, thread->getTC(), trans, mode);
-
-        if (!translationCompleted()) {
-            // The translation isn't yet complete, so we can't possibly have a
-            // fault. Overwrite any existing fault we might have from a previous
-            // execution of this instruction (e.g. an uncachable load that
-            // couldn't execute because it wasn't at the head of the ROB).
-            fault = NoFault;
-
-            // Save memory requests.
-            savedReq = state->mainReq;
-            savedSreqLow = state->sreqLow;
-            savedSreqHigh = state->sreqHigh;
-        }
-    } else {
-        WholeTranslationState *state =
-            new WholeTranslationState(req, sreqLow, sreqHigh, NULL, res, mode);
-
-        // Two translations when the request is split.
-        DataTranslation<BaseDynInstPtr> *stransLow =
-            new DataTranslation<BaseDynInstPtr>(this, state, 0);
-        DataTranslation<BaseDynInstPtr> *stransHigh =
-            new DataTranslation<BaseDynInstPtr>(this, state, 1);
-
-        cpu->dtb->translateTiming(sreqLow, thread->getTC(), stransLow, mode);
-        cpu->dtb->translateTiming(sreqHigh, thread->getTC(), stransHigh, mode);
-
-        if (!translationCompleted()) {
-            // The translation isn't yet complete, so we can't possibly have a
-            // fault. Overwrite any existing fault we might have from a previous
-            // execution of this instruction (e.g. an uncachable load that
-            // couldn't execute because it wasn't at the head of the ROB).
-            fault = NoFault;
-
-            // Save memory requests.
-            savedReq = state->mainReq;
-            savedSreqLow = state->sreqLow;
-            savedSreqHigh = state->sreqHigh;
-        }
-    }
-}
-
-template<class Impl>
-inline void
-BaseDynInst<Impl>::finishTranslation(WholeTranslationState *state)
-{
-    fault = state->getFault();
-
-    instFlags[IsStrictlyOrdered] = state->isStrictlyOrdered();
-
-    if (fault == NoFault) {
-        // save Paddr for a single req
-        physEffAddrLow = state->getPaddr();
-
-        // case for the request that has been split
-        if (state->isSplit) {
-          physEffAddrLow = state->sreqLow->getPaddr();
-          physEffAddrHigh = state->sreqHigh->getPaddr();
-        }
-
-        memReqFlags = state->getFlags();
-
-        if (state->mainReq->isCondSwap()) {
-            assert(state->res);
-            state->mainReq->setExtraData(*state->res);
-        }
-
-    } else {
-        state->deleteReqs();
-    }
-    delete state;
-
-    translationCompleted(true);
+    return cpu->pushRequest(
+            dynamic_cast<typename DynInstPtr::PtrType>(this),
+            /* st */ false, data, size, addr, flags, res);
 }

 #endif // __CPU_BASE_DYN_INST_HH__
--- a/src/cpu/base_dyn_inst_impl.hh
+++ b/src/cpu/base_dyn_inst_impl.hh
@@ -69,8 +69,6 @@ BaseDynInst<Impl>::BaseDynInst(const StaticInstPtr &_staticInst,
    macroop(_macroop),
    memData(nullptr),
    savedReq(nullptr),
-    savedSreqLow(nullptr),
-    savedSreqHigh(nullptr),
    reqToVerify(nullptr)
 {
    seqNum = seq_num;
@@ -96,8 +94,7 @@ BaseDynInst<Impl>::initVars()
 {
    memData = NULL;
    effAddr = 0;
-    physEffAddrLow = 0;
-    physEffAddrHigh = 0;
+    physEffAddr = 0;
    readyRegs = 0;
    memReqFlags = 0;

--- a/src/cpu/o3/cpu.cc
+++ b/src/cpu/o3/cpu.cc
@@ -850,7 +850,6 @@ FullO3CPU<Impl>::insertThread(ThreadID tid)

    //Reset ROB/IQ/LSQ Entries
    commit.rob->resetEntries();
-    iew.resetEntries();
 }

 template <class Impl>
--- a/src/cpu/o3/cpu.hh
+++ b/src/cpu/o3/cpu.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011-2013, 2016 ARM Limited
+ * Copyright (c) 2011-2013, 2016-2018 ARM Limited
 * Copyright (c) 2013 Advanced Micro Devices, Inc.
 * All rights reserved
 *
@@ -125,6 +125,7 @@ class FullO3CPU : public BaseO3CPU

    BaseTLB *itb;
    BaseTLB *dtb;
+    using LSQRequest = typename LSQ<Impl>::LSQRequest;

    /** Overall CPU status. */
    Status _status;
@@ -733,21 +734,25 @@ class FullO3CPU : public BaseO3CPU
    /** Available thread ids in the cpu*/
    std::vector<ThreadID> tids;

-    /** CPU read function, forwards read to LSQ. */
-    Fault read(const RequestPtr &req,
-               RequestPtr &sreqLow, RequestPtr &sreqHigh,
-               int load_idx)
+    /** CPU pushRequest function, forwards request to LSQ. */
+    Fault pushRequest(const DynInstPtr& inst, bool isLoad, uint8_t *data,
+                      unsigned int size, Addr addr, Request::Flags flags,
+                      uint64_t *res)
    {
-        return this->iew.ldstQueue.read(req, sreqLow, sreqHigh, load_idx);
+        return iew.ldstQueue.pushRequest(inst, isLoad, data, size, addr,
+                flags, res);
+    }
+
+    /** CPU read function, forwards read to LSQ. */
+    Fault read(LSQRequest* req, int load_idx)
+    {
+        return this->iew.ldstQueue.read(req, load_idx);
    }

    /** CPU write function, forwards write to LSQ. */
-    Fault write(const RequestPtr &req,
-                const RequestPtr &sreqLow, const RequestPtr &sreqHigh,
-                uint8_t *data, int store_idx)
+    Fault write(LSQRequest* req, uint8_t *data, int store_idx)
    {
-        return this->iew.ldstQueue.write(req, sreqLow, sreqHigh,
-                                         data, store_idx);
+        return this->iew.ldstQueue.write(req, data, store_idx);
    }

    /** Used by the fetch unit to get a hold of the instruction port. */
--- a/src/cpu/o3/iew_impl.hh
+++ b/src/cpu/o3/iew_impl.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2010-2013 ARM Limited
+ * Copyright (c) 2010-2013, 2018 ARM Limited
 * Copyright (c) 2013 Advanced Micro Devices, Inc.
 * All rights reserved.
 *
@@ -743,14 +743,6 @@ DefaultIEW<Impl>::updateStatus()
    }
 }

-template <class Impl>
-void
-DefaultIEW<Impl>::resetEntries()
-{
-    instQueue.resetEntries();
-    ldstQueue.resetEntries();
-}
-
 template <class Impl>
 bool
 DefaultIEW<Impl>::checkStall(ThreadID tid)
@@ -1353,7 +1345,7 @@ DefaultIEW<Impl>::executeInsts()
                DPRINTF(IEW, "LDSTQ detected a violation. Violator PC: %s "
                        "[sn:%lli], inst PC: %s [sn:%lli]. Addr is: %#x.\n",
                        violator->pcState(), violator->seqNum,
-                        inst->pcState(), inst->seqNum, inst->physEffAddrLow);
+                        inst->pcState(), inst->seqNum, inst->physEffAddr);

                fetchRedirect[tid] = true;

@@ -1376,7 +1368,7 @@ DefaultIEW<Impl>::executeInsts()
                DPRINTF(IEW, "LDSTQ detected a violation.  Violator PC: "
                        "%s, inst PC: %s.  Addr is: %#x.\n",
                        violator->pcState(), inst->pcState(),
-                        inst->physEffAddrLow);
+                        inst->physEffAddr);
                DPRINTF(IEW, "Violation will not be handled because "
                        "already squashing\n");

@@ -1460,6 +1452,8 @@ DefaultIEW<Impl>::tick()
    wroteToTimeBuffer = false;
    updatedQueues = false;

+    ldstQueue.tick();
+
    sortInsts();

    // Free function units marked as being freed this cycle.
--- a/src/cpu/o3/inst_queue_impl.hh
+++ b/src/cpu/o3/inst_queue_impl.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011-2014 ARM Limited
+ * Copyright (c) 2011-2014, 2017-2018 ARM Limited
 * Copyright (c) 2013 Advanced Micro Devices, Inc.
 * All rights reserved.
 *
@@ -1140,9 +1140,6 @@ template <class Impl>
 void
 InstructionQueue<Impl>::blockMemInst(const DynInstPtr &blocked_inst)
 {
-    blocked_inst->translationStarted(false);
-    blocked_inst->translationCompleted(false);
-
    blocked_inst->clearIssued();
    blocked_inst->clearCanIssue();
    blockedMemInsts.push_back(blocked_inst);
@@ -1285,9 +1282,9 @@ InstructionQueue<Impl>::doSquash(ThreadID tid)
                                           squashed_inst);
                    }

-
                    ++iqSquashedOperandsExamined;
                }
+
            } else if (!squashed_inst->isStoreConditional() ||
                       !squashed_inst->isCompleted()) {
                NonSpecMapIt ns_inst_it =
--- a/src/cpu/o3/lsq.hh
+++ b/src/cpu/o3/lsq.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011-2012, 2014 ARM Limited
+ * Copyright (c) 2011-2012, 2014, 2018 ARM Limited
 * Copyright (c) 2013 Advanced Micro Devices, Inc.
 * All rights reserved
 *
@@ -47,8 +47,9 @@
 #include <map>
 #include <queue>

-#include "cpu/o3/lsq_unit.hh"
+#include "arch/generic/tlb.hh"
 #include "cpu/inst_seq.hh"
+#include "cpu/o3/lsq_unit.hh"
 #include "enums/SMTQueuePolicy.hh"
 #include "mem/port.hh"
 #include "sim/sim_object.hh"
@@ -56,13 +57,659 @@
 struct DerivO3CPUParams;

 template <class Impl>
-class LSQ {
+class LSQ
+
+{
  public:
    typedef typename Impl::O3CPU O3CPU;
    typedef typename Impl::DynInstPtr DynInstPtr;
    typedef typename Impl::CPUPol::IEW IEW;
    typedef typename Impl::CPUPol::LSQUnit LSQUnit;

+    class LSQRequest;
+    /** Derived class to hold any sender state the LSQ needs. */
+    class LSQSenderState : public Packet::SenderState
+    {
+      protected:
+        /** The senderState needs to know the LSQRequest who owns it. */
+        LSQRequest* _request;
+
+        /** Default constructor. */
+        LSQSenderState(LSQRequest* request, bool isLoad_)
+            : _request(request), mainPkt(nullptr), pendingPacket(nullptr),
+              outstanding(0), isLoad(isLoad_), needWB(isLoad_), isSplit(false),
+              pktToSend(false), deleted(false)
+          { }
+      public:
+
+        /** Instruction which initiated the access to memory. */
+        DynInstPtr inst;
+        /** The main packet from a split load, used during writeback. */
+        PacketPtr mainPkt;
+        /** A second packet from a split store that needs sending. */
+        PacketPtr pendingPacket;
+        /** Number of outstanding packets to complete. */
+        uint8_t outstanding;
+        /** Whether or not it is a load. */
+        bool isLoad;
+        /** Whether or not the instruction will need to writeback. */
+        bool needWB;
+        /** Whether or not this access is split in two. */
+        bool isSplit;
+        /** Whether or not there is a packet that needs sending. */
+        bool pktToSend;
+        /** Has the request been deleted?
+         * LSQ entries can be squashed before the response comes back. in that
+         * case the SenderState knows.
+         */
+        bool deleted;
+        ContextID contextId() { return inst->contextId(); }
+
+        /** Completes a packet and returns whether the access is finished. */
+        inline bool isComplete() { return outstanding == 0; }
+        inline void deleteRequest() { deleted = true; }
+        inline bool alive() { return !deleted; }
+        LSQRequest* request() { return _request; }
+        virtual void complete() = 0;
+        void writebackDone() { _request->writebackDone(); }
+    };
+
+    /** Memory operation metadata.
+     * This class holds the information about a memory operation. It lives
+     * from initiateAcc to resource deallocation at commit or squash.
+     * LSQRequest objects are owned by the LQ/SQ Entry in the LSQUnit that
+     * holds the operation. It is also used by the LSQSenderState. In addition,
+     * the LSQRequest is a TranslationState, therefore, upon squash, there must
+     * be a defined ownership transferal in case the LSQ resources are
+     * deallocated before the TLB is done using the TranslationState. If that
+     * happens, the LSQRequest will be self-owned, and responsible to detect
+     * that its services are no longer required and self-destruct.
+     *
+     * Lifetime of a LSQRequest:
+     *                 +--------------------+
+     *                 |LSQ creates and owns|
+     *                 +--------------------+
+     *                           |
+     *                 +--------------------+
+     *                 | Initate translation|
+     *                 +--------------------+
+     *                           |
+     *                        ___^___
+     *                    ___/       \___
+     *             ______/   Squashed?   \
+     *            |      \___         ___/
+     *            |          \___ ___/
+     *            |              v
+     *            |              |
+     *            |    +--------------------+
+     *            |    |  Translation done  |
+     *            |    +--------------------+
+     *            |              |
+     *            |    +--------------------+
+     *            |    |     Send packet    |<------+
+     *            |    +--------------------+       |
+     *            |              |                  |
+     *            |           ___^___               |
+     *            |       ___/       \___           |
+     *            |  ____/   Squashed?   \          |
+     *            | |    \___         ___/          |
+     *            | |        \___ ___/              |
+     *            | |            v                  |
+     *            | |            |                  |
+     *            | |         ___^___               |
+     *            | |     ___/       \___           |
+     *            | |    /     Done?     \__________|
+     *            | |    \___         ___/
+     *            | |        \___ ___/
+     *            | |            v
+     *            | |            |
+     *            | |  +--------------------+
+     *            | |  |    Manage stuff    |
+     *            | |  |   Free resources   |
+     *            | |  +--------------------+
+     *            | |
+     *            | |  +--------------------+
+     *            | |  |  senderState owns  |
+     *            | +->|  onRecvTimingResp  |
+     *            |    |   free resources   |
+     *            |    +--------------------+
+     *            |
+     *            |   +----------------------+
+     *            |   |  self owned (Trans)  |
+     *            +-->| on TranslationFinish |
+     *                |    free resources    |
+     *                +----------------------+
+     *
+     *
+     */
+    class LSQRequest : public BaseTLB::Translation
+    {
+      protected:
+        typedef uint32_t FlagsStorage;
+        typedef ::Flags<FlagsStorage> FlagsType;
+
+        enum Flag : FlagsStorage
+        {
+            IsLoad              = 0x00000001,
+            /** True if this is a store that writes registers (SC). */
+            WbStore             = 0x00000002,
+            Delayed             = 0x00000004,
+            IsSplit             = 0x00000008,
+            /** True if any translation has been sent to TLB. */
+            TranslationStarted  = 0x00000010,
+            /** True if there are un-replied outbound translations.. */
+            TranslationFinished = 0x00000020,
+            Sent                = 0x00000040,
+            Retry               = 0x00000080,
+            Complete            = 0x00000100,
+            /** Ownership tracking flags. */
+            /** Translation squashed. */
+            TranslationSquashed = 0x00000200,
+            /** Request discarded */
+            Discarded           = 0x00000400,
+            /** LSQ resources freed. */
+            LSQEntryFreed       = 0x00000800,
+            /** Store written back. */
+            WritebackScheduled  = 0x00001000,
+            WritebackDone       = 0x00002000
+        };
+        FlagsType flags;
+
+        enum class State
+        {
+            NotIssued,
+            Translation,
+            Request,
+            Complete,
+            Squashed,
+            Fault,
+        };
+        State _state;
+        LSQSenderState* _senderState;
+        void setState(const State& newState) { _state = newState; }
+
+        uint32_t numTranslatedFragments;
+        uint32_t numInTranslationFragments;
+
+        /** LQ/SQ entry idx. */
+        uint32_t _entryIdx;
+
+        void markDelayed() { flags.set(Flag::Delayed); }
+        bool isDelayed() { return flags.isSet(Flag::Delayed); }
+
+      public:
+        LSQUnit& _port;
+        const DynInstPtr _inst;
+        uint32_t _taskId;
+        PacketDataPtr _data;
+        std::vector<PacketPtr> _packets;
+        std::vector<RequestPtr> _requests;
+        std::vector<Fault> _fault;
+        uint64_t* _res;
+        const Addr _addr;
+        const uint32_t _size;
+        const Request::Flags _flags;
+        uint32_t _numOutstandingPackets;
+      protected:
+        LSQUnit* lsqUnit() { return &_port; }
+        LSQRequest(LSQUnit* port, const DynInstPtr& inst, bool isLoad) :
+            _state(State::NotIssued), _senderState(nullptr),
+            _port(*port), _inst(inst), _data(nullptr),
+            _res(nullptr), _addr(0), _size(0), _flags(0),
+            _numOutstandingPackets(0)
+        {
+            flags.set(Flag::IsLoad, isLoad);
+            flags.set(Flag::WbStore, _inst->isStoreConditional());
+            install();
+        }
+        LSQRequest(LSQUnit* port, const DynInstPtr& inst, bool isLoad,
+                   const Addr& addr, const uint32_t& size,
+                   const Request::Flags& flags_,
+                   PacketDataPtr data = nullptr, uint64_t* res = nullptr)
+            : _state(State::NotIssued), _senderState(nullptr),
+            numTranslatedFragments(0),
+            numInTranslationFragments(0),
+            _port(*port), _inst(inst), _data(data),
+            _res(res), _addr(addr), _size(size),
+            _flags(flags_),
+            _numOutstandingPackets(0)
+        {
+            flags.set(Flag::IsLoad, isLoad);
+            flags.set(Flag::WbStore, _inst->isStoreConditional());
+            install();
+        }
+
+        bool
+        isLoad() const
+        {
+            return flags.isSet(Flag::IsLoad);
+        }
+
+        /** Install the request in the LQ/SQ. */
+        void install()
+        {
+            if (isLoad()) {
+                _port.loadQueue[_inst->lqIdx].setRequest(this);
+            } else {
+                _port.storeQueue[_inst->sqIdx].setRequest(this);
+            }
+        }
+        virtual bool
+        squashed() const override
+        {
+            return _inst->isSquashed();
+        }
+
+        /**
+         * Test if the LSQRequest has been released, i.e. self-owned.
+         * An LSQRequest manages itself when the resources on the LSQ are freed
+         * but the translation is still going on and the LSQEntry was freed.
+         */
+        bool
+        isReleased()
+        {
+            return flags.isSet(Flag::LSQEntryFreed) ||
+                flags.isSet(Flag::Discarded);
+        }
+
+        /** Release the LSQRequest.
+         * Notify the sender state that the request it points to is not valid
+         * anymore. Understand if the request is orphan (self-managed) and if
+         * so, mark it as freed, else destroy it, as this means
+         * the end of its life cycle.
+         * An LSQRequest is orphan when its resources are released
+         * but there is any in-flight translation request to the TLB or access
+         * request to the memory.
+         */
+        void release(Flag reason)
+        {
+            assert(reason == Flag::LSQEntryFreed || reason == Flag::Discarded);
+            if (!isAnyOutstandingRequest()) {
+                delete this;
+            } else {
+                if (_senderState) {
+                    _senderState->deleteRequest();
+                }
+                flags.set(reason);
+            }
+        }
+
+        /** Destructor.
+         * The LSQRequest owns the request. If the packet has already been
+         * sent, the sender state will be deleted upon receiving the reply.
+         */
+        virtual ~LSQRequest()
+        {
+            assert(!isAnyOutstandingRequest());
+            _inst->savedReq = nullptr;
+            if (_senderState)
+                delete _senderState;
+
+            for (auto r: _packets)
+                delete r;
+        };
+
+
+      public:
+        /** Convenience getters/setters. */
+        /** @{ */
+        /** Set up Context numbers. */
+        void
+        setContext(const ContextID& context_id)
+        {
+            request()->setContext(context_id);
+        }
+
+        const DynInstPtr&
+        instruction()
+        {
+            return _inst;
+        }
+
+        /** Set up virtual request.
+         * For a previously allocated Request objects.
+         */
+        void
+        setVirt(int asid, Addr vaddr, unsigned size, Request::Flags flags_,
+                MasterID mid, Addr pc)
+        {
+            request()->setVirt(asid, vaddr, size, flags_, mid, pc);
+        }
+
+        void
+        taskId(const uint32_t& v)
+        {
+            _taskId = v;
+            for (auto& r: _requests)
+                r->taskId(v);
+        }
+
+        uint32_t taskId() const { return _taskId; }
+        RequestPtr request(int idx = 0) { return _requests.at(idx); }
+
+        const RequestPtr
+        request(int idx = 0) const
+        {
+            return _requests.at(idx);
+        }
+
+        Addr getVaddr(int idx = 0) const { return request(idx)->getVaddr(); }
+        virtual void initiateTranslation() = 0;
+
+        PacketPtr packet(int idx = 0) { return _packets.at(idx); }
+
+        virtual PacketPtr
+        mainPacket()
+        {
+            assert (_packets.size() == 1);
+            return packet();
+        }
+
+        virtual RequestPtr
+        mainRequest()
+        {
+            assert (_requests.size() == 1);
+            return request();
+        }
+
+        void
+        senderState(LSQSenderState* st)
+        {
+            _senderState = st;
+            for (auto& pkt: _packets) {
+                if (pkt)
+                    pkt->senderState = st;
+            }
+        }
+
+        const LSQSenderState*
+        senderState() const
+        {
+            return _senderState;
+        }
+
+        /**
+         * Mark senderState as discarded. This will cause to discard response
+         * packets from the cache.
+         */
+        void
+        discardSenderState()
+        {
+            assert(_senderState);
+            _senderState->deleteRequest();
+        }
+
+        /**
+         * Test if there is any in-flight translation or mem access request
+         */
+        bool
+        isAnyOutstandingRequest()
+        {
+            return numInTranslationFragments > 0 ||
+                _numOutstandingPackets > 0 ||
+                (flags.isSet(Flag::WritebackScheduled) &&
+                 !flags.isSet(Flag::WritebackDone));
+        }
+
+        bool
+        isSplit() const
+        {
+            return flags.isSet(Flag::IsSplit);
+        }
+        /** @} */
+        virtual bool recvTimingResp(PacketPtr pkt) = 0;
+        virtual void sendPacketToCache() = 0;
+        virtual void buildPackets() = 0;
+
+        /**
+         * Memory mapped IPR accesses
+         */
+        virtual void handleIprWrite(ThreadContext *thread, PacketPtr pkt) = 0;
+        virtual Cycles handleIprRead(ThreadContext *thread, PacketPtr pkt) = 0;
+
+        /**
+         * Test if the request accesses a particular cache line.
+         */
+        virtual bool isCacheBlockHit(Addr blockAddr, Addr cacheBlockMask) = 0;
+
+        /** Update the status to reflect that a packet was sent. */
+        void
+        packetSent()
+        {
+            flags.set(Flag::Sent);
+        }
+        /** Update the status to reflect that a packet was not sent.
+         * When a packet fails to be sent, we mark the request as needing a
+         * retry. Note that Retry flag is sticky.
+         */
+        void
+        packetNotSent()
+        {
+            flags.set(Flag::Retry);
+            flags.clear(Flag::Sent);
+        }
+
+        void sendFragmentToTranslation(int i);
+        bool
+        isComplete()
+        {
+            return flags.isSet(Flag::Complete);
+        }
+
+        bool
+        isInTranslation()
+        {
+            return _state == State::Translation;
+        }
+
+        bool
+        isTranslationComplete()
+        {
+            return flags.isSet(Flag::TranslationStarted) &&
+                   !isInTranslation();
+        }
+
+        bool
+        isTranslationBlocked()
+        {
+            return _state == State::Translation &&
+                flags.isSet(Flag::TranslationStarted) &&
+                !flags.isSet(Flag::TranslationFinished);
+        }
+
+        bool
+        isSent()
+        {
+            return flags.isSet(Flag::Sent);
+        }
+
+        /**
+         * The LSQ entry is cleared
+         */
+        void
+        freeLSQEntry()
+        {
+            release(Flag::LSQEntryFreed);
+        }
+
+        /**
+         * The request is discarded (e.g. partial store-load forwarding)
+         */
+        void
+        discard()
+        {
+            release(Flag::Discarded);
+        }
+
+        void
+        packetReplied()
+        {
+            assert(_numOutstandingPackets > 0);
+            _numOutstandingPackets--;
+            if (_numOutstandingPackets == 0 && isReleased())
+                delete this;
+        }
+
+        void
+        writebackScheduled()
+        {
+            assert(!flags.isSet(Flag::WritebackScheduled));
+            flags.set(Flag::WritebackScheduled);
+        }
+
+        void
+        writebackDone()
+        {
+            flags.set(Flag::WritebackDone);
+            /* If the lsq resources are already free */
+            if (isReleased()) {
+                delete this;
+            }
+        }
+
+        void
+        squashTranslation()
+        {
+            assert(numInTranslationFragments == 0);
+            flags.set(Flag::TranslationSquashed);
+            /* If we are on our own, self-destruct. */
+            if (isReleased()) {
+                delete this;
+            }
+        }
+
+        void
+        complete()
+        {
+            flags.set(Flag::Complete);
+        }
+    };
+
+    class SingleDataRequest : public LSQRequest
+    {
+      protected:
+        /* Given that we are inside templates, children need explicit
+         * declaration of the names in the parent class. */
+        using Flag = typename LSQRequest::Flag;
+        using State = typename LSQRequest::State;
+        using LSQRequest::_fault;
+        using LSQRequest::_inst;
+        using LSQRequest::_packets;
+        using LSQRequest::_port;
+        using LSQRequest::_res;
+        using LSQRequest::_senderState;
+        using LSQRequest::_state;
+        using LSQRequest::flags;
+        using LSQRequest::isLoad;
+        using LSQRequest::isTranslationComplete;
+        using LSQRequest::lsqUnit;
+        using LSQRequest::request;
+        using LSQRequest::sendFragmentToTranslation;
+        using LSQRequest::setState;
+        using LSQRequest::numInTranslationFragments;
+        using LSQRequest::numTranslatedFragments;
+        using LSQRequest::_numOutstandingPackets;
+      public:
+        SingleDataRequest(LSQUnit* port, const DynInstPtr& inst, bool isLoad,
+                          const Addr& addr, const uint32_t& size,
+                          const Request::Flags& flags_,
+                          PacketDataPtr data = nullptr,
+                          uint64_t* res = nullptr) :
+            LSQRequest(port, inst, isLoad, addr, size, flags_, data, res)
+        {
+            LSQRequest::_requests.push_back(
+                std::make_shared<Request>(inst->getASID(), addr, size, flags_,
+                    inst->masterId(), inst->instAddr(), inst->contextId()));
+            LSQRequest::_requests.back()->setReqInstSeqNum(inst->seqNum);
+        }
+        inline virtual ~SingleDataRequest() {}
+        virtual void initiateTranslation();
+        virtual void finish(const Fault &fault, const RequestPtr &req,
+                ThreadContext* tc, BaseTLB::Mode mode);
+        virtual bool recvTimingResp(PacketPtr pkt);
+        virtual void sendPacketToCache();
+        virtual void buildPackets();
+        virtual void handleIprWrite(ThreadContext *thread, PacketPtr pkt);
+        virtual Cycles handleIprRead(ThreadContext *thread, PacketPtr pkt);
+        virtual bool isCacheBlockHit(Addr blockAddr, Addr cacheBlockMask);
+    };
+
+    class SplitDataRequest : public LSQRequest
+    {
+      protected:
+        /* Given that we are inside templates, children need explicit
+         * declaration of the names in the parent class. */
+        using Flag = typename LSQRequest::Flag;
+        using State = typename LSQRequest::State;
+        using LSQRequest::_addr;
+        using LSQRequest::_data;
+        using LSQRequest::_fault;
+        using LSQRequest::_flags;
+        using LSQRequest::_inst;
+        using LSQRequest::_packets;
+        using LSQRequest::_port;
+        using LSQRequest::_requests;
+        using LSQRequest::_res;
+        using LSQRequest::_senderState;
+        using LSQRequest::_size;
+        using LSQRequest::_state;
+        using LSQRequest::_taskId;
+        using LSQRequest::flags;
+        using LSQRequest::isLoad;
+        using LSQRequest::isTranslationComplete;
+        using LSQRequest::lsqUnit;
+        using LSQRequest::numInTranslationFragments;
+        using LSQRequest::numTranslatedFragments;
+        using LSQRequest::request;
+        using LSQRequest::sendFragmentToTranslation;
+        using LSQRequest::setState;
+        using LSQRequest::_numOutstandingPackets;
+
+        uint32_t numFragments;
+        uint32_t numReceivedPackets;
+        RequestPtr mainReq;
+        PacketPtr _mainPacket;
+
+
+      public:
+        SplitDataRequest(LSQUnit* port, const DynInstPtr& inst, bool isLoad,
+                         const Addr& addr, const uint32_t& size,
+                         const Request::Flags & flags_,
+                         PacketDataPtr data = nullptr,
+                         uint64_t* res = nullptr) :
+            LSQRequest(port, inst, isLoad, addr, size, flags_, data, res),
+            numFragments(0),
+            numReceivedPackets(0),
+            mainReq(nullptr),
+            _mainPacket(nullptr)
+        {
+            flags.set(Flag::IsSplit);
+        }
+        virtual ~SplitDataRequest()
+        {
+            if (mainReq) {
+                mainReq = nullptr;
+            }
+            if (_mainPacket) {
+                delete _mainPacket;
+                _mainPacket = nullptr;
+            }
+        }
+        virtual void finish(const Fault &fault, const RequestPtr &req,
+                ThreadContext* tc, BaseTLB::Mode mode);
+        virtual bool recvTimingResp(PacketPtr pkt);
+        virtual void initiateTranslation();
+        virtual void sendPacketToCache();
+        virtual void buildPackets();
+
+        virtual void handleIprWrite(ThreadContext *thread, PacketPtr pkt);
+        virtual Cycles handleIprRead(ThreadContext *thread, PacketPtr pkt);
+        virtual bool isCacheBlockHit(Addr blockAddr, Addr cacheBlockMask);
+
+        virtual RequestPtr mainRequest();
+        virtual PacketPtr mainPacket();
+    };
+
    /** Constructs an LSQ with the given parameters. */
    LSQ(O3CPU *cpu_ptr, IEW *iew_ptr, DerivO3CPUParams *params);
    ~LSQ() { }
@@ -85,17 +732,9 @@ class LSQ {

    /** Number of entries needed for the given amount of threads.*/
    int entryAmount(ThreadID num_threads);
-    void removeEntries(ThreadID tid);
-    /** Reset the max entries for each thread. */
-    void resetEntries();
-    /** Resize the max entries for a thread. */
-    void resizeEntries(unsigned size, ThreadID tid);

    /** Ticks the LSQ. */
-    void tick();
-    /** Ticks a specific LSQ Unit. */
-    void tick(ThreadID tid)
-    { thread[tid].tick(); }
+    void tick() { usedStorePorts = 0; }

    /** Inserts a load into the LSQ. */
    void insertLoad(const DynInstPtr &load_inst);
@@ -112,13 +751,13 @@ class LSQ {
     * Commits loads up until the given sequence number for a specific thread.
     */
    void commitLoads(InstSeqNum &youngest_inst, ThreadID tid)
-    { thread[tid].commitLoads(youngest_inst); }
+    { thread.at(tid).commitLoads(youngest_inst); }

    /**
     * Commits stores up until the given sequence number for a specific thread.
     */
    void commitStores(InstSeqNum &youngest_inst, ThreadID tid)
-    { thread[tid].commitStores(youngest_inst); }
+    { thread.at(tid).commitStores(youngest_inst); }

    /**
     * Attempts to write back stores until all cache ports are used or the
@@ -131,8 +770,11 @@ class LSQ {
    /**
     * Squash instructions from a thread until the specified sequence number.
     */
-    void squash(const InstSeqNum &squashed_num, ThreadID tid)
-    { thread[tid].squash(squashed_num); }
+    void
+    squash(const InstSeqNum &squashed_num, ThreadID tid)
+    {
+        thread.at(tid).squash(squashed_num);
+    }

    /** Returns whether or not there was a memory ordering violation. */
    bool violation();
@@ -140,50 +782,49 @@ class LSQ {
     * Returns whether or not there was a memory ordering violation for a
     * specific thread.
     */
-    bool violation(ThreadID tid)
-    { return thread[tid].violation(); }
+    bool violation(ThreadID tid) { return thread.at(tid).violation(); }

    /** Gets the instruction that caused the memory ordering violation. */
-    DynInstPtr getMemDepViolator(ThreadID tid)
-    { return thread[tid].getMemDepViolator(); }
+    DynInstPtr
+    getMemDepViolator(ThreadID tid)
+    {
+        return thread.at(tid).getMemDepViolator();
+    }

    /** Returns the head index of the load queue for a specific thread. */
-    int getLoadHead(ThreadID tid)
-    { return thread[tid].getLoadHead(); }
+    int getLoadHead(ThreadID tid) { return thread.at(tid).getLoadHead(); }

    /** Returns the sequence number of the head of the load queue. */
-    InstSeqNum getLoadHeadSeqNum(ThreadID tid)
+    InstSeqNum
+    getLoadHeadSeqNum(ThreadID tid)
    {
-        return thread[tid].getLoadHeadSeqNum();
+        return thread.at(tid).getLoadHeadSeqNum();
    }

    /** Returns the head index of the store queue. */
-    int getStoreHead(ThreadID tid)
-    { return thread[tid].getStoreHead(); }
+    int getStoreHead(ThreadID tid) { return thread.at(tid).getStoreHead(); }

    /** Returns the sequence number of the head of the store queue. */
-    InstSeqNum getStoreHeadSeqNum(ThreadID tid)
+    InstSeqNum
+    getStoreHeadSeqNum(ThreadID tid)
    {
-        return thread[tid].getStoreHeadSeqNum();
+        return thread.at(tid).getStoreHeadSeqNum();
    }

    /** Returns the number of instructions in all of the queues. */
    int getCount();
    /** Returns the number of instructions in the queues of one thread. */
-    int getCount(ThreadID tid)
-    { return thread[tid].getCount(); }
+    int getCount(ThreadID tid) { return thread.at(tid).getCount(); }

    /** Returns the total number of loads in the load queue. */
    int numLoads();
    /** Returns the total number of loads for a single thread. */
-    int numLoads(ThreadID tid)
-    { return thread[tid].numLoads(); }
+    int numLoads(ThreadID tid) { return thread.at(tid).numLoads(); }

    /** Returns the total number of stores in the store queue. */
    int numStores();
    /** Returns the total number of stores for a single thread. */
-    int numStores(ThreadID tid)
-    { return thread[tid].numStores(); }
+    int numStores(ThreadID tid) { return thread.at(tid).numStores(); }

    /** Returns the number of free load entries. */
    unsigned numFreeLoadEntries();
@@ -242,46 +883,39 @@ class LSQ {
    /** Returns whether or not a specific thread has any stores to write back
     * to memory.
     */
-    bool hasStoresToWB(ThreadID tid)
-    { return thread[tid].hasStoresToWB(); }
+    bool hasStoresToWB(ThreadID tid) { return thread.at(tid).hasStoresToWB(); }

    /** Returns the number of stores a specific thread has to write back. */
-    int numStoresToWB(ThreadID tid)
-    { return thread[tid].numStoresToWB(); }
+    int numStoresToWB(ThreadID tid) { return thread.at(tid).numStoresToWB(); }

    /** Returns if the LSQ will write back to memory this cycle. */
    bool willWB();
    /** Returns if the LSQ of a specific thread will write back to memory this
     * cycle.
     */
-    bool willWB(ThreadID tid)
-    { return thread[tid].willWB(); }
+    bool willWB(ThreadID tid) { return thread.at(tid).willWB(); }

    /** Debugging function to print out all instructions. */
    void dumpInsts() const;
    /** Debugging function to print out instructions from a specific thread. */
-    void dumpInsts(ThreadID tid) const
-    { thread[tid].dumpInsts(); }
+    void dumpInsts(ThreadID tid) const { thread.at(tid).dumpInsts(); }

    /** Executes a read operation, using the load specified at the load
     * index.
     */
-    Fault read(const RequestPtr &req,
-               RequestPtr &sreqLow, RequestPtr &sreqHigh,
-               int load_idx);
+    Fault read(LSQRequest* req, int load_idx);

    /** Executes a store operation, using the store specified at the store
     * index.
     */
-    Fault write(const RequestPtr &req,
-                const RequestPtr &sreqLow, const RequestPtr &sreqHigh,
-                uint8_t *data, int store_idx);
+    Fault write(LSQRequest* req, uint8_t *data, int store_idx);

    /**
     * Retry the previous send that failed.
     */
    void recvReqRetry();

+    void completeDataAccess(PacketPtr pkt);
    /**
     * Handles writing back and completing the load or store that has
     * returned from memory.
@@ -292,13 +926,34 @@ class LSQ {

    void recvTimingSnoopReq(PacketPtr pkt);

+    Fault pushRequest(const DynInstPtr& inst, bool isLoad, uint8_t *data,
+                      unsigned int size, Addr addr, Request::Flags flags,
+                      uint64_t *res);
+
    /** The CPU pointer. */
    O3CPU *cpu;

    /** The IEW stage pointer. */
    IEW *iewStage;

+    /** Is D-cache blocked? */
+    bool cacheBlocked() const;
+    /** Set D-cache blocked status */
+    void cacheBlocked(bool v);
+    /** Is any store port available to use? */
+    bool storePortAvailable() const;
+    /** Another store port is in use */
+    void storePortBusy();
+
  protected:
+    /** D-cache is blocked */
+    bool _cacheBlocked;
+    /** The number of cache ports available each cycle (stores only). */
+    int cacheStorePorts;
+    /** The number of used cache ports in this cycle by stores. */
+    int usedStorePorts;
+
+
    /** The LSQ policy for SMT mode. */
    SMTQueuePolicy lsqPolicy;

@@ -307,8 +962,10 @@ class LSQ {
     * and threshold, this function calculates how many resources each thread
     * can occupy at most.
     */
-    static uint32_t maxLSQAllocation(SMTQueuePolicy pol, uint32_t entries,
-            uint32_t numThreads, uint32_t SMTThreshold) {
+    static uint32_t
+    maxLSQAllocation(SMTQueuePolicy pol, uint32_t entries,
+            uint32_t numThreads, uint32_t SMTThreshold)
+    {
        if (pol == SMTQueuePolicy::Dynamic) {
            return entries;
        } else if (pol == SMTQueuePolicy::Partitioned) {
@@ -346,24 +1003,20 @@ class LSQ {

 template <class Impl>
 Fault
-LSQ<Impl>::read(const RequestPtr &req,
-                RequestPtr &sreqLow, RequestPtr &sreqHigh,
-                int load_idx)
+LSQ<Impl>::read(LSQRequest* req, int load_idx)
 {
-    ThreadID tid = cpu->contextToThread(req->contextId());
+    ThreadID tid = cpu->contextToThread(req->request()->contextId());

-    return thread[tid].read(req, sreqLow, sreqHigh, load_idx);
+    return thread.at(tid).read(req, load_idx);
 }

 template <class Impl>
 Fault
-LSQ<Impl>::write(const RequestPtr &req,
-                 const RequestPtr &sreqLow, const RequestPtr &sreqHigh,
-                 uint8_t *data, int store_idx)
+LSQ<Impl>::write(LSQRequest* req, uint8_t *data, int store_idx)
 {
-    ThreadID tid = cpu->contextToThread(req->contextId());
+    ThreadID tid = cpu->contextToThread(req->request()->contextId());

-    return thread[tid].write(req, sreqLow, sreqHigh, data, store_idx);
+    return thread.at(tid).write(req, data, store_idx);
 }

 #endif // __CPU_O3_LSQ_HH__
--- a/src/cpu/o3/lsq_impl.hh
+++ b/src/cpu/o3/lsq_impl.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011-2012, 2014 ARM Limited
+ * Copyright (c) 2011-2012, 2014, 2017-2018 ARM Limited
 * Copyright (c) 2013 Advanced Micro Devices, Inc.
 * All rights reserved
 *
@@ -61,6 +61,8 @@ using namespace std;
 template <class Impl>
 LSQ<Impl>::LSQ(O3CPU *cpu_ptr, IEW *iew_ptr, DerivO3CPUParams *params)
    : cpu(cpu_ptr), iewStage(iew_ptr),
+      _cacheBlocked(false),
+      cacheStorePorts(params->cacheStorePorts), usedStorePorts(0),
      lsqPolicy(params->smtLSQPolicy),
      LQEntries(params->LQEntries),
      SQEntries(params->SQEntries),
@@ -76,8 +78,8 @@ LSQ<Impl>::LSQ(O3CPU *cpu_ptr, IEW *iew_ptr, DerivO3CPUParams *params)
    //************ Handle SMT Parameters ***********/
    //**********************************************/

-    //Figure out fetch policy
-    if (lsqPolicy == SMTQueuePolicy::Dynamic) {
+    /* Run SMT olicy checks. */
+        if (lsqPolicy == SMTQueuePolicy::Dynamic) {
        DPRINTF(LSQ, "LSQ sharing policy set to Dynamic\n");
    } else if (lsqPolicy == SMTQueuePolicy::Partitioned) {
        DPRINTF(Fetch, "LSQ sharing policy set to Partitioned: "
@@ -85,8 +87,8 @@ LSQ<Impl>::LSQ(O3CPU *cpu_ptr, IEW *iew_ptr, DerivO3CPUParams *params)
                maxLQEntries,maxSQEntries);
    } else if (lsqPolicy == SMTQueuePolicy::Threshold) {

-        assert(params->smtLSQThreshold > LQEntries);
-        assert(params->smtLSQThreshold > SQEntries);
+        assert(params->smtLSQThreshold > params->LQEntries);
+        assert(params->smtLSQThreshold > params->SQEntries);

        DPRINTF(LSQ, "LSQ sharing policy set to Threshold: "
                "%i entries per LQ | %i entries per SQ\n",
@@ -163,79 +165,41 @@ template <class Impl>
 void
 LSQ<Impl>::takeOverFrom()
 {
+    usedStorePorts = 0;
+    _cacheBlocked = false;
+
    for (ThreadID tid = 0; tid < numThreads; tid++) {
        thread[tid].takeOverFrom();
    }
 }

-template <class Impl>
-int
-LSQ<Impl>::entryAmount(ThreadID num_threads)
+template<class Impl>
+bool
+LSQ<Impl>::cacheBlocked() const
 {
-    if (lsqPolicy == SMTQueuePolicy::Partitioned) {
-        return LQEntries / num_threads;
-    } else {
-        return 0;
-    }
-}
-
-template <class Impl>
-void
-LSQ<Impl>::resetEntries()
-{
-    if (lsqPolicy != SMTQueuePolicy::Dynamic || numThreads > 1) {
-        int active_threads = activeThreads->size();
-
-        int maxEntries;
-
-        if (lsqPolicy == SMTQueuePolicy::Partitioned) {
-            maxEntries = LQEntries / active_threads;
-        } else if (lsqPolicy == SMTQueuePolicy::Threshold &&
-                   active_threads == 1) {
-            maxEntries = LQEntries;
-        } else {
-            maxEntries = LQEntries;
-        }
-
-        list<ThreadID>::iterator threads  = activeThreads->begin();
-        list<ThreadID>::iterator end = activeThreads->end();
-
-        while (threads != end) {
-            ThreadID tid = *threads++;
-
-            resizeEntries(maxEntries, tid);
-        }
-    }
+    return _cacheBlocked;
 }

 template<class Impl>
 void
-LSQ<Impl>::removeEntries(ThreadID tid)
+LSQ<Impl>::cacheBlocked(bool v)
 {
-    thread[tid].clearLQ();
-    thread[tid].clearSQ();
+    _cacheBlocked = v;
+}
+
+template<class Impl>
+bool
+LSQ<Impl>::storePortAvailable() const
+{
+    return usedStorePorts < cacheStorePorts;
 }

 template<class Impl>
 void
-LSQ<Impl>::resizeEntries(unsigned size, ThreadID tid)
+LSQ<Impl>::storePortBusy()
 {
-    thread[tid].resizeLQ(size);
-    thread[tid].resizeSQ(size);
-}
-
-template<class Impl>
-void
-LSQ<Impl>::tick()
-{
-    list<ThreadID>::iterator threads = activeThreads->begin();
-    list<ThreadID>::iterator end = activeThreads->end();
-
-    while (threads != end) {
-        ThreadID tid = *threads++;
-
-        thread[tid].tick();
-    }
+    usedStorePorts++;
+    assert(usedStorePorts <= cacheStorePorts);
 }

 template<class Impl>
@@ -316,12 +280,22 @@ void
 LSQ<Impl>::recvReqRetry()
 {
    iewStage->cacheUnblocked();
+    cacheBlocked(false);

    for (ThreadID tid : *activeThreads) {
        thread[tid].recvRetry();
    }
 }

+template <class Impl>
+void
+LSQ<Impl>::completeDataAccess(PacketPtr pkt)
+{
+    auto senderState = dynamic_cast<LSQSenderState*>(pkt->senderState);
+    thread[cpu->contextToThread(senderState->contextId())]
+        .completeDataAccess(pkt);
+}
+
 template <class Impl>
 bool
 LSQ<Impl>::recvTimingResp(PacketPtr pkt)
@@ -330,8 +304,10 @@ LSQ<Impl>::recvTimingResp(PacketPtr pkt)
        DPRINTF(LSQ, "Got error packet back for address: %#X\n",
                pkt->getAddr());

-    thread[cpu->contextToThread(pkt->req->contextId())]
-        .completeDataAccess(pkt);
+    auto senderState = dynamic_cast<LSQSenderState*>(pkt->senderState);
+    panic_if(!senderState, "Got packet back with unknown sender state\n");
+
+    thread[cpu->contextToThread(senderState->contextId())].recvTimingResp(pkt);

    if (pkt->isInvalidate()) {
        // This response also contains an invalidate; e.g. this can be the case
@@ -352,8 +328,9 @@ LSQ<Impl>::recvTimingResp(PacketPtr pkt)
            thread[tid].checkSnoop(pkt);
        }
    }
+    // Update the LSQRequest state (this may delete the request)
+    senderState->request()->packetReplied();

-    delete pkt;
    return true;
 }

@@ -681,4 +658,442 @@ LSQ<Impl>::dumpInsts() const
    }
 }

+static Addr
+addrBlockOffset(Addr addr, unsigned int block_size)
+{
+    return addr & (block_size - 1);
+}
+
+static Addr
+addrBlockAlign(Addr addr, uint64_t block_size)
+{
+    return addr & ~(block_size - 1);
+}
+
+static bool
+transferNeedsBurst(Addr addr, uint64_t size, uint64_t block_size)
+{
+    return (addrBlockOffset(addr, block_size) + size) > block_size;
+}
+
+template<class Impl>
+Fault
+LSQ<Impl>::pushRequest(const DynInstPtr& inst, bool isLoad, uint8_t *data,
+                       unsigned int size, Addr addr, Request::Flags flags,
+                       uint64_t *res)
+{
+    ThreadID tid = cpu->contextToThread(inst->contextId());
+    auto cacheLineSize = cpu->cacheLineSize();
+    bool needs_burst = transferNeedsBurst(addr, size, cacheLineSize);
+    LSQRequest* req = nullptr;
+
+    if (inst->translationStarted()) {
+        req = inst->savedReq;
+        assert(req);
+    } else {
+        if (needs_burst) {
+            req = new SplitDataRequest(&thread[tid], inst, isLoad, addr,
+                    size, flags, data, res);
+        } else {
+            req = new SingleDataRequest(&thread[tid], inst, isLoad, addr,
+                    size, flags, data, res);
+        }
+        assert(req);
+        inst->setRequest();
+        req->taskId(cpu->taskId());
+
+        req->initiateTranslation();
+    }
+
+    /* This is the place were instructions get the effAddr. */
+    if (req->isTranslationComplete()) {
+        if (inst->getFault() == NoFault) {
+            inst->effAddr = req->getVaddr();
+            inst->effSize = size;
+            inst->effAddrValid(true);
+
+            if (cpu->checker) {
+                inst->reqToVerify = std::make_shared<Request>(*req->request());
+            }
+            if (isLoad)
+                inst->getFault() = cpu->read(req, inst->lqIdx);
+            else
+                inst->getFault() = cpu->write(req, data, inst->sqIdx);
+        } else if (isLoad) {
+            // Commit will have to clean up whatever happened.  Set this
+            // instruction as executed.
+            inst->setExecuted();
+        }
+    }
+
+    if (inst->traceData)
+        inst->traceData->setMem(addr, size, flags);
+
+    return inst->getFault();
+}
+
+template<class Impl>
+void
+LSQ<Impl>::SingleDataRequest::finish(const Fault &fault, const RequestPtr &req,
+        ThreadContext* tc, BaseTLB::Mode mode)
+{
+    _fault.push_back(fault);
+    numInTranslationFragments = 0;
+    numTranslatedFragments = 1;
+    /* If the instruction has been squahsed, let the request know
+     * as it may have to self-destruct. */
+    if (_inst->isSquashed()) {
+        this->squashTranslation();
+    } else {
+        _inst->strictlyOrdered(req->isStrictlyOrdered());
+
+        flags.set(Flag::TranslationFinished);
+        if (fault == NoFault) {
+            _inst->physEffAddr = req->getPaddr();
+            _inst->memReqFlags = req->getFlags();
+            if (req->isCondSwap()) {
+                assert(_res);
+                req->setExtraData(*_res);
+            }
+            setState(State::Request);
+        } else {
+            setState(State::Fault);
+        }
+
+        LSQRequest::_inst->fault = fault;
+        LSQRequest::_inst->translationCompleted(true);
+    }
+}
+
+template<class Impl>
+void
+LSQ<Impl>::SplitDataRequest::finish(const Fault &fault, const RequestPtr &req,
+        ThreadContext* tc, BaseTLB::Mode mode)
+{
+    _fault.push_back(fault);
+    assert(req == _requests[numTranslatedFragments] || this->isDelayed());
+
+    numInTranslationFragments--;
+    numTranslatedFragments++;
+
+    mainReq->setFlags(req->getFlags());
+
+    if (numTranslatedFragments == _requests.size()) {
+        if (_inst->isSquashed()) {
+            this->squashTranslation();
+        } else {
+            _inst->strictlyOrdered(mainReq->isStrictlyOrdered());
+            flags.set(Flag::TranslationFinished);
+            auto fault_it = _fault.begin();
+            /* Ffwd to the first NoFault. */
+            while (fault_it != _fault.end() && *fault_it == NoFault)
+                fault_it++;
+            /* If none of the fragments faulted: */
+            if (fault_it == _fault.end()) {
+                _inst->physEffAddr = request(0)->getPaddr();
+
+                _inst->memReqFlags = mainReq->getFlags();
+                if (mainReq->isCondSwap()) {
+                    assert(_res);
+                    mainReq->setExtraData(*_res);
+                }
+                setState(State::Request);
+                _inst->fault = NoFault;
+            } else {
+                setState(State::Fault);
+                _inst->fault = *fault_it;
+            }
+            _inst->translationCompleted(true);
+        }
+    }
+}
+
+template<class Impl>
+void
+LSQ<Impl>::SingleDataRequest::initiateTranslation()
+{
+    _inst->translationStarted(true);
+    setState(State::Translation);
+    flags.set(Flag::TranslationStarted);
+
+    _inst->savedReq = this;
+    sendFragmentToTranslation(0);
+
+    if (isTranslationComplete()) {
+    }
+}
+
+template<class Impl>
+PacketPtr
+LSQ<Impl>::SplitDataRequest::mainPacket()
+{
+    return _mainPacket;
+}
+
+template<class Impl>
+RequestPtr
+LSQ<Impl>::SplitDataRequest::mainRequest()
+{
+    return mainReq;
+}
+
+template<class Impl>
+void
+LSQ<Impl>::SplitDataRequest::initiateTranslation()
+{
+    _inst->translationStarted(true);
+    setState(State::Translation);
+    flags.set(Flag::TranslationStarted);
+
+    unsigned int cacheLineSize = _port.cacheLineSize();
+    Addr base_addr = _addr;
+    Addr next_addr = addrBlockAlign(_addr + cacheLineSize, cacheLineSize);
+    Addr final_addr = addrBlockAlign(_addr + _size, cacheLineSize);
+    uint32_t size_so_far = 0;
+
+    mainReq = std::make_shared<Request>(_inst->getASID(), base_addr,
+                _size, _flags, _inst->masterId(),
+                _inst->instAddr(), _inst->contextId());
+
+    // Paddr is not used in mainReq. However, we will accumulate the flags
+    // from the sub requests into mainReq by calling setFlags() in finish().
+    // setFlags() assumes that paddr is set so flip the paddr valid bit here to
+    // avoid a potential assert in setFlags() when we call it from  finish().
+    mainReq->setPaddr(0);
+
+    /* Get the pre-fix, possibly unaligned. */
+    _requests.push_back(std::make_shared<Request>(_inst->getASID(), base_addr,
+                next_addr - base_addr, _flags, _inst->masterId(),
+                _inst->instAddr(), _inst->contextId()));
+    size_so_far = next_addr - base_addr;
+
+    /* We are block aligned now, reading whole blocks. */
+    base_addr = next_addr;
+    while (base_addr != final_addr) {
+        _requests.push_back(std::make_shared<Request>(_inst->getASID(),
+                    base_addr, cacheLineSize, _flags, _inst->masterId(),
+                    _inst->instAddr(), _inst->contextId()));
+        size_so_far += cacheLineSize;
+        base_addr += cacheLineSize;
+    }
+
+    /* Deal with the tail. */
+    if (size_so_far < _size) {
+        _requests.push_back(std::make_shared<Request>(_inst->getASID(),
+                    base_addr, _size - size_so_far, _flags, _inst->masterId(),
+                    _inst->instAddr(), _inst->contextId()));
+    }
+
+    /* Setup the requests and send them to translation. */
+    for (auto& r: _requests) {
+        r->setReqInstSeqNum(_inst->seqNum);
+        r->taskId(_taskId);
+    }
+    this->_inst->savedReq = this;
+    numInTranslationFragments = 0;
+    numTranslatedFragments = 0;
+
+    for (uint32_t i = 0; i < _requests.size(); i++) {
+        sendFragmentToTranslation(i);
+    }
+}
+
+template<class Impl>
+void
+LSQ<Impl>::LSQRequest::sendFragmentToTranslation(int i)
+{
+    numInTranslationFragments++;
+    _port.dTLB()->translateTiming(
+            this->request(i),
+            this->_inst->thread->getTC(), this,
+            this->isLoad() ? BaseTLB::Read : BaseTLB::Write);
+}
+
+template<class Impl>
+bool
+LSQ<Impl>::SingleDataRequest::recvTimingResp(PacketPtr pkt)
+{
+    assert(_numOutstandingPackets == 1);
+    auto state = dynamic_cast<LSQSenderState*>(pkt->senderState);
+    setState(State::Complete);
+    flags.set(Flag::Complete);
+    state->outstanding--;
+    assert(pkt == _packets.front());
+    _port.completeDataAccess(pkt);
+    return true;
+}
+
+template<class Impl>
+bool
+LSQ<Impl>::SplitDataRequest::recvTimingResp(PacketPtr pkt)
+{
+    auto state = dynamic_cast<LSQSenderState*>(pkt->senderState);
+    uint32_t pktIdx = 0;
+    while (pktIdx < _packets.size() && pkt != _packets[pktIdx])
+        pktIdx++;
+    assert(pktIdx < _packets.size());
+    assert(pkt->req == _requests[pktIdx]);
+    assert(pkt == _packets[pktIdx]);
+    numReceivedPackets++;
+    state->outstanding--;
+    if (numReceivedPackets == _packets.size()) {
+        setState(State::Complete);
+        flags.set(Flag::Complete);
+        /* Assemble packets. */
+        PacketPtr resp = isLoad()
+            ? Packet::createRead(mainReq)
+            : Packet::createWrite(mainReq);
+        if (isLoad())
+            resp->dataStatic(_inst->memData);
+        else
+            resp->dataStatic(_data);
+        resp->senderState = _senderState;
+        _port.completeDataAccess(resp);
+        delete resp;
+    }
+    return true;
+}
+
+template<class Impl>
+void
+LSQ<Impl>::SingleDataRequest::buildPackets()
+{
+    assert(_senderState);
+    /* Retries do not create new packets. */
+    if (_packets.size() == 0) {
+        _packets.push_back(
+                isLoad()
+                    ?  Packet::createRead(request())
+                    :  Packet::createWrite(request()));
+        _packets.back()->dataStatic(_inst->memData);
+        _packets.back()->senderState = _senderState;
+    }
+    assert(_packets.size() == 1);
+}
+
+template<class Impl>
+void
+LSQ<Impl>::SplitDataRequest::buildPackets()
+{
+    /* Extra data?? */
+    ptrdiff_t offset = 0;
+    if (_packets.size() == 0) {
+        /* New stuff */
+        if (isLoad()) {
+            _mainPacket = Packet::createRead(mainReq);
+            _mainPacket->dataStatic(_inst->memData);
+        }
+        for (auto& r: _requests) {
+            PacketPtr pkt = isLoad() ? Packet::createRead(r)
+                                    : Packet::createWrite(r);
+            if (isLoad()) {
+                pkt->dataStatic(_inst->memData + offset);
+            } else {
+                uint8_t* req_data = new uint8_t[r->getSize()];
+                std::memcpy(req_data,
+                        _inst->memData + offset,
+                        r->getSize());
+                pkt->dataDynamic(req_data);
+            }
+            offset += r->getSize();
+            pkt->senderState = _senderState;
+            _packets.push_back(pkt);
+        }
+    }
+    assert(_packets.size() == _requests.size());
+}
+
+template<class Impl>
+void
+LSQ<Impl>::SingleDataRequest::sendPacketToCache()
+{
+    assert(_numOutstandingPackets == 0);
+    if (lsqUnit()->trySendPacket(isLoad(), _packets.at(0)))
+        _numOutstandingPackets = 1;
+}
+
+template<class Impl>
+void
+LSQ<Impl>::SplitDataRequest::sendPacketToCache()
+{
+    /* Try to send the packets. */
+    while (numReceivedPackets + _numOutstandingPackets < _packets.size() &&
+            lsqUnit()->trySendPacket(isLoad(),
+                _packets.at(numReceivedPackets + _numOutstandingPackets))) {
+        _numOutstandingPackets++;
+    }
+}
+
+template<class Impl>
+void
+LSQ<Impl>::SingleDataRequest::handleIprWrite(ThreadContext *thread,
+                                             PacketPtr pkt)
+{
+    TheISA::handleIprWrite(thread, pkt);
+}
+
+template<class Impl>
+void
+LSQ<Impl>::SplitDataRequest::handleIprWrite(ThreadContext *thread,
+                                            PacketPtr mainPkt)
+{
+    unsigned offset = 0;
+    for (auto r: _requests) {
+        PacketPtr pkt = new Packet(r, MemCmd::WriteReq);
+        pkt->dataStatic(mainPkt->getPtr<uint8_t>() + offset);
+        TheISA::handleIprWrite(thread, pkt);
+        offset += r->getSize();
+        delete pkt;
+    }
+}
+
+template<class Impl>
+Cycles
+LSQ<Impl>::SingleDataRequest::handleIprRead(ThreadContext *thread,
+                                            PacketPtr pkt)
+{
+    return TheISA::handleIprRead(thread, pkt);
+}
+
+template<class Impl>
+Cycles
+LSQ<Impl>::SplitDataRequest::handleIprRead(ThreadContext *thread,
+                                           PacketPtr mainPkt)
+{
+    Cycles delay(0);
+    unsigned offset = 0;
+
+    for (auto r: _requests) {
+        PacketPtr pkt = new Packet(r, MemCmd::ReadReq);
+        pkt->dataStatic(mainPkt->getPtr<uint8_t>() + offset);
+        Cycles d = TheISA::handleIprRead(thread, pkt);
+        if (d > delay)
+            delay = d;
+        offset += r->getSize();
+        delete pkt;
+    }
+    return delay;
+}
+
+template<class Impl>
+bool
+LSQ<Impl>::SingleDataRequest::isCacheBlockHit(Addr blockAddr, Addr blockMask)
+{
+    return ( (LSQRequest::_requests[0]->getPaddr() & blockMask) == blockAddr);
+}
+
+template<class Impl>
+bool
+LSQ<Impl>::SplitDataRequest::isCacheBlockHit(Addr blockAddr, Addr blockMask)
+{
+    bool is_hit = false;
+    for (auto &r: _requests) {
+        if ((r->getPaddr() & blockMask) == blockAddr) {
+            is_hit = true;
+            break;
+        }
+    }
+    return is_hit;
+}
+
 #endif//__CPU_O3_LSQ_IMPL_HH__
--- a/src/cpu/o3/lsq_unit.hh
+++ b/src/cpu/o3/lsq_unit.hh
--- a/src/cpu/o3/lsq_unit_impl.hh
+++ b/src/cpu/o3/lsq_unit_impl.hh
--- a/src/cpu/o3/probe/elastic_trace.cc
+++ b/src/cpu/o3/probe/elastic_trace.cc
@@ -409,7 +409,7 @@ ElasticTrace::addDepTraceRecord(const DynInstConstPtr& head_inst,
    new_record->reqFlags = head_inst->memReqFlags;
    new_record->virtAddr = head_inst->effAddr;
    new_record->asid = head_inst->asid;
-    new_record->physAddr = head_inst->physEffAddrLow;
+    new_record->physAddr = head_inst->physEffAddr;
    // Currently the tracing does not support split requests.
    new_record->size = head_inst->effSize;
    new_record->pc = head_inst->instAddr();