arch-vega: Add VEGA page tables and TLB

Add the page table walker, page table format, TLB, TLB coalescer, and associated support in the AMDGPUDevice. This page table format used the hardware format for dGPU and is very different from APU/GCN3 which use the X86 page table format. In order to support either format for the GPU model, a common TranslationState called GpuTranslation state is created which holds the combined fields of both the APU and Vega translation state. Similarly the TlbEntry is cast at runtime by the corresponding arch files as they are the only files which touch the internals of the TlbEntry. The GPU model only checks if a TlbEntry is non-null and thus does not need to cast to peek inside the data structure. Change-Id: I4484c66239b48df5224d61caa6e968e56eea38a5 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/51848 Reviewed-by: Jason Lowe-Power <power.jg@gmail.com> Maintainer: Jason Lowe-Power <power.jg@gmail.com> Tested-by: kokoro <noreply+kokoro@google.com>
2021-09-01 18:25:19 -05:00
parent 7cfe88df74
commit 539a2e2bcd
22 changed files with 3599 additions and 69 deletions
--- a/src/arch/amdgpu/common/gpu_translation_state.hh
+++ b/src/arch/amdgpu/common/gpu_translation_state.hh
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2022 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its
+ * contributors may be used to endorse or promote products derived from this
+ * software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __ARCH_AMDGPU_COMMON_GPU_TRANSLATION_STATE_HH__
+#define __ARCH_AMDGPU_COMMON_GPU_TRANSLATION_STATE_HH__
+
+#include "arch/generic/mmu.hh"
+
+namespace gem5
+{
+
+class ResponsePort;
+
+/**
+ * GPU TranslationState: this currently is a somewhat bastardization of
+ * the usage of SenderState, whereby the receiver of a packet is not
+ * usually supposed to need to look at the contents of the senderState,
+ * you're really only supposed to look at what you pushed on, pop it
+ * off, and send it back.
+ *
+ * However, since there is state that we want to pass to the TLBs using
+ * the send/recv Timing/Functional/etc. APIs, which don't allow for new
+ * arguments, we need a common TLB senderState to pass between TLBs,
+ * both "forwards" and "backwards."
+ *
+ * So, basically, the rule is that any packet received by a TLB port
+ * (cpuside OR memside) must be safely castable to a GpuTranslationState.
+ */
+
+struct GpuTranslationState : public Packet::SenderState
+{
+    // TLB mode, read or write
+    BaseMMU::Mode tlbMode;
+    // SE mode thread context associated with this req
+    ThreadContext *tc;
+    // FS mode related fields
+    int deviceId;
+    int pasId; // Process Address Space ID
+
+    /*
+    * TLB entry to be populated and passed back and filled in
+    * previous TLBs.  Equivalent to the data cache concept of
+    * "data return."
+    */
+    Serializable *tlbEntry;
+    // Is this a TLB prefetch request?
+    bool isPrefetch;
+    // When was the req for this translation issued
+    uint64_t issueTime;
+    // Remember where this came from
+    std::vector<ResponsePort*>ports;
+
+    // keep track of #uncoalesced reqs per packet per TLB level;
+    // reqCnt per level >= reqCnt higher level
+    std::vector<int> reqCnt;
+    // TLB level this packet hit in; 0 if it hit in the page table
+    int hitLevel;
+    Packet::SenderState *saved;
+
+    GpuTranslationState(BaseMMU::Mode tlb_mode, ThreadContext *_tc,
+                        bool _prefetch=false,
+                        Packet::SenderState *_saved=nullptr)
+        : tlbMode(tlb_mode), tc(_tc), deviceId(0), pasId(0), tlbEntry(nullptr),
+          isPrefetch(_prefetch), issueTime(0), hitLevel(0), saved(_saved)
+    { }
+
+    GpuTranslationState(BaseMMU::Mode tlb_mode,
+                       bool _prefetch=false,
+                       Packet::SenderState *_saved=nullptr)
+        : tlbMode(tlb_mode), tc(nullptr), deviceId(0), pasId(0),
+          tlbEntry(nullptr), isPrefetch(_prefetch), issueTime(0), hitLevel(0),
+          saved(_saved)
+    { }
+};
+
+} // namespace gem5
+
+#endif // __ARCH_AMDGPU_COMMON_GPU_TRANSLATION_STATE_HH__
--- a/src/arch/amdgpu/common/tlb.cc
+++ b/src/arch/amdgpu/common/tlb.cc
@@ -35,6 +35,7 @@
 #include <cmath>
 #include <cstring>

+#include "arch/amdgpu/common/gpu_translation_state.hh"
 #include "arch/x86/faults.hh"
 #include "arch/x86/insts/microldstop.hh"
 #include "arch/x86/page_size.hh"
@@ -664,8 +665,8 @@ namespace X86ISA
        Addr virt_page_addr = roundDown(pkt->req->getVaddr(),
                                        X86ISA::PageBytes);

-        TranslationState *sender_state =
-                safe_cast<TranslationState*>(pkt->senderState);
+        GpuTranslationState *sender_state =
+                safe_cast<GpuTranslationState*>(pkt->senderState);

        bool update_stats = !sender_state->isPrefetch;
        ThreadContext * tmp_tc = sender_state->tc;
@@ -788,8 +789,8 @@ namespace X86ISA
        assert(pkt);
        Addr vaddr = pkt->req->getVaddr();

-        TranslationState *sender_state =
-            safe_cast<TranslationState*>(pkt->senderState);
+        GpuTranslationState *sender_state =
+            safe_cast<GpuTranslationState*>(pkt->senderState);

        ThreadContext *tc = sender_state->tc;
        Mode mode = sender_state->tlbMode;
@@ -799,7 +800,7 @@ namespace X86ISA
        if (tlb_outcome == TLB_HIT) {
            DPRINTF(GPUTLB, "Translation Done - TLB Hit for addr %#x\n",
                vaddr);
-            local_entry = sender_state->tlbEntry;
+            local_entry = safe_cast<TlbEntry *>(sender_state->tlbEntry);
        } else {
            DPRINTF(GPUTLB, "Translation Done - TLB Miss for addr %#x\n",
                    vaddr);
@@ -809,7 +810,7 @@ namespace X86ISA
             * lower TLB level. The senderState should be "carrying" a pointer
             * to the correct TLBEntry.
             */
-            new_entry = sender_state->tlbEntry;
+            new_entry = safe_cast<TlbEntry *>(sender_state->tlbEntry);
            assert(new_entry);
            local_entry = new_entry;

@@ -877,8 +878,8 @@ namespace X86ISA
        assert(translationReturnEvent[virtPageAddr]);
        assert(pkt);

-        TranslationState *tmp_sender_state =
-            safe_cast<TranslationState*>(pkt->senderState);
+        GpuTranslationState *tmp_sender_state =
+            safe_cast<GpuTranslationState*>(pkt->senderState);

        int req_cnt = tmp_sender_state->reqCnt.back();
        bool update_stats = !tmp_sender_state->isPrefetch;
@@ -945,8 +946,8 @@ namespace X86ISA
            DPRINTF(GPUTLB, "Doing a page walk for address %#x\n",
                    virtPageAddr);

-            TranslationState *sender_state =
-                safe_cast<TranslationState*>(pkt->senderState);
+            GpuTranslationState *sender_state =
+                safe_cast<GpuTranslationState*>(pkt->senderState);

            Process *p = sender_state->tc->getProcessPtr();
            Addr vaddr = pkt->req->getVaddr();
@@ -1038,8 +1039,8 @@ namespace X86ISA
    void
    GpuTLB::handleFuncTranslationReturn(PacketPtr pkt, tlbOutcome tlb_outcome)
    {
-        TranslationState *sender_state =
-            safe_cast<TranslationState*>(pkt->senderState);
+        GpuTranslationState *sender_state =
+            safe_cast<GpuTranslationState*>(pkt->senderState);

        ThreadContext *tc = sender_state->tc;
        Mode mode = sender_state->tlbMode;
@@ -1051,7 +1052,7 @@ namespace X86ISA
            DPRINTF(GPUTLB, "Functional Translation Done - TLB hit for addr "
                    "%#x\n", vaddr);

-            local_entry = sender_state->tlbEntry;
+            local_entry = safe_cast<TlbEntry *>(sender_state->tlbEntry);
        } else {
            DPRINTF(GPUTLB, "Functional Translation Done - TLB miss for addr "
                    "%#x\n", vaddr);
@@ -1061,7 +1062,7 @@ namespace X86ISA
             * lower TLB level. The senderState should be "carrying" a pointer
             * to the correct TLBEntry.
             */
-            new_entry = sender_state->tlbEntry;
+            new_entry = safe_cast<TlbEntry *>(sender_state->tlbEntry);
            assert(new_entry);
            local_entry = new_entry;

@@ -1110,8 +1111,8 @@ namespace X86ISA
    void
    GpuTLB::CpuSidePort::recvFunctional(PacketPtr pkt)
    {
-        TranslationState *sender_state =
-            safe_cast<TranslationState*>(pkt->senderState);
+        GpuTranslationState *sender_state =
+            safe_cast<GpuTranslationState*>(pkt->senderState);

        ThreadContext *tc = sender_state->tc;
        bool update_stats = !sender_state->isPrefetch;
--- a/src/arch/amdgpu/common/tlb.hh
+++ b/src/arch/amdgpu/common/tlb.hh
@@ -264,56 +264,6 @@ namespace X86ISA
        Port &getPort(const std::string &if_name,
                      PortID idx=InvalidPortID) override;

-        /**
-         * TLB TranslationState: this currently is a somewhat bastardization of
-         * the usage of SenderState, whereby the receiver of a packet is not
-         * usually supposed to need to look at the contents of the senderState,
-         * you're really only supposed to look at what you pushed on, pop it
-         * off, and send it back.
-         *
-         * However, since there is state that we want to pass to the TLBs using
-         * the send/recv Timing/Functional/etc. APIs, which don't allow for new
-         * arguments, we need a common TLB senderState to pass between TLBs,
-         * both "forwards" and "backwards."
-         *
-         * So, basically, the rule is that any packet received by a TLB port
-         * (cpuside OR memside) must be safely castable to a TranslationState.
-         */
-
-        struct TranslationState : public Packet::SenderState
-        {
-            // TLB mode, read or write
-            Mode tlbMode;
-            // Thread context associated with this req
-            ThreadContext *tc;
-
-            /*
-            * TLB entry to be populated and passed back and filled in
-            * previous TLBs.  Equivalent to the data cache concept of
-            * "data return."
-            */
-            TlbEntry *tlbEntry;
-            // Is this a TLB prefetch request?
-            bool isPrefetch;
-            // When was the req for this translation issued
-            uint64_t issueTime;
-            // Remember where this came from
-            std::vector<ResponsePort*>ports;
-
-            // keep track of #uncoalesced reqs per packet per TLB level;
-            // reqCnt per level >= reqCnt higher level
-            std::vector<int> reqCnt;
-            // TLB level this packet hit in; 0 if it hit in the page table
-            int hitLevel;
-            Packet::SenderState *saved;
-
-            TranslationState(Mode tlb_mode, ThreadContext *_tc,
-                             bool is_prefetch=false,
-                             Packet::SenderState *_saved=nullptr)
-                : tlbMode(tlb_mode), tc(_tc), tlbEntry(nullptr),
-                  isPrefetch(is_prefetch), issueTime(0),
-                  hitLevel(0),saved(_saved) { }
-        };

        // maximum number of permitted coalesced requests per cycle
        int maxCoalescedReqs;
@@ -436,8 +386,6 @@ namespace X86ISA
    };
 }

-using GpuTranslationState = X86ISA::GpuTLB::TranslationState;
-
 } // namespace gem5

 #endif // __GPU_TLB_HH__
--- a/src/arch/amdgpu/common/tlb_coalescer.cc
+++ b/src/arch/amdgpu/common/tlb_coalescer.cc
@@ -33,6 +33,7 @@

 #include <cstring>

+#include "arch/amdgpu/common/gpu_translation_state.hh"
 #include "arch/x86/page_size.hh"
 #include "base/logging.hh"
 #include "debug/GPUTLB.hh"
@@ -149,7 +150,8 @@ TLBCoalescer::updatePhysAddresses(PacketPtr pkt)
    GpuTranslationState *sender_state =
        safe_cast<GpuTranslationState*>(pkt->senderState);

-    TheISA::TlbEntry *tlb_entry = sender_state->tlbEntry;
+    X86ISA::TlbEntry *tlb_entry =
+        safe_cast<X86ISA::TlbEntry *>(sender_state->tlbEntry);
    assert(tlb_entry);
    Addr first_entry_vaddr = tlb_entry->vaddr;
    Addr first_entry_paddr = tlb_entry->paddr;