From 6a4a12ebbdcc4591f02dca0989ff0353af2b9422 Mon Sep 17 00:00:00 2001
From: Matthew Poremba <matthew.poremba@amd.com>
Date: Tue, 18 Oct 2022 12:56:48 -0700
Subject: [PATCH] arch-vega: Improve non-native page size support

Vega allows for any integer multiple of 4kB pages. However, the current
implementation is designed for 4kB page primarily. In order to support
variable page sizes, the physical address calculation needs to be
updated to add the virtual page offset to the base physical address
rather than bitwise-OR. Bitwise-OR assumes physical pages are at
aligned to the page size which is generally not the case for very
large pages (1GB+).

This changeset changes all of the physical address computations to add
the virtual offset to the physical page address. This fixes many GPUFS
applications which use larger pages. The support was tested by
hipMalloc'ing ~5GB to induce a large page being created. The test
application now passes verification with this change.

Change-Id: Ic8d1475e001def443f3e4ab609449bca0c40b638
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/64751
Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com>
Maintainer: Matt Sinclair <mattdsinclair@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/arch/amdgpu/vega/tlb.cc           | 28 ++++++---------------------
 src/arch/amdgpu/vega/tlb_coalescer.cc | 11 ++---------
 2 files changed, 8 insertions(+), 31 deletions(-)

diff --git a/src/arch/amdgpu/vega/tlb.cc b/src/arch/amdgpu/vega/tlb.cc
index cee445ff30..c3dd576f0e 100644
--- a/src/arch/amdgpu/vega/tlb.cc
+++ b/src/arch/amdgpu/vega/tlb.cc
@@ -158,24 +158,7 @@ GpuTLB::insert(Addr vpn, VegaTlbEntry &entry)
 {
     VegaTlbEntry *newEntry = nullptr;
 
-    /**
-     * vpn holds the virtual page address assuming native page size.
-     * However, we need to check the entry size as Vega supports
-     * flexible page sizes of arbitrary size. The set will assume
-     * native page size but the vpn needs to be fixed up to consider
-     * the flexible page size.
-     */
-    Addr real_vpn = vpn & ~(entry.size() - 1);
-
-    /**
-     * Also fix up the ppn as this is used in the math later to compute paddr.
-     */
-    Addr real_ppn = entry.paddr & ~(entry.size() - 1);
-
-    int set = (real_vpn >> VegaISA::PageShift) & setMask;
-
-    DPRINTF(GPUTLB, "Inserted %#lx -> %#lx of size %#lx into set %d\n",
-            real_vpn, real_ppn, entry.size(), set);
+    int set = (entry.vaddr >> VegaISA::PageShift) & setMask;
 
     if (!freeList[set].empty()) {
         newEntry = freeList[set].front();
@@ -186,10 +169,11 @@ GpuTLB::insert(Addr vpn, VegaTlbEntry &entry)
     }
 
     *newEntry = entry;
-    newEntry->vaddr = real_vpn;
-    newEntry->paddr = real_ppn;
     entryList[set].push_front(newEntry);
 
+    DPRINTF(GPUTLB, "Inserted %#lx -> %#lx of size %#lx into set %d\n",
+            newEntry->vaddr, newEntry->paddr, entry.size(), set);
+
     return newEntry;
 }
 
@@ -524,7 +508,7 @@ GpuTLB::handleTranslationReturn(Addr virt_page_addr,
 
     pagingProtectionChecks(pkt, local_entry, mode);
     int page_size = local_entry->size();
-    Addr paddr = local_entry->paddr | (vaddr & (page_size - 1));
+    Addr paddr = local_entry->paddr + (vaddr & (page_size - 1));
     DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr);
 
     // Since this packet will be sent through the cpu side port, it must be
@@ -767,7 +751,7 @@ GpuTLB::handleFuncTranslationReturn(PacketPtr pkt, tlbOutcome tlb_outcome)
         pagingProtectionChecks(pkt, local_entry, mode);
 
     int page_size = local_entry->size();
-    Addr paddr = local_entry->paddr | (vaddr & (page_size - 1));
+    Addr paddr = local_entry->paddr + (vaddr & (page_size - 1));
     DPRINTF(GPUTLB, "Translated %#x -> %#x.\n", vaddr, paddr);
 
     pkt->req->setPaddr(paddr);
diff --git a/src/arch/amdgpu/vega/tlb_coalescer.cc b/src/arch/amdgpu/vega/tlb_coalescer.cc
index d02c9bc155..414bb85bbb 100644
--- a/src/arch/amdgpu/vega/tlb_coalescer.cc
+++ b/src/arch/amdgpu/vega/tlb_coalescer.cc
@@ -162,13 +162,6 @@ VegaTLBCoalescer::updatePhysAddresses(PacketPtr pkt)
     int page_size = tlb_entry.size();
     bool uncacheable = tlb_entry.uncacheable();
     int first_hit_level = sender_state->hitLevel;
-
-    // Get the physical page address of the translated request
-    // Using the page_size specified in the TLBEntry allows us
-    // to support different page sizes.
-    Addr phys_page_paddr = pkt->req->getPaddr();
-    phys_page_paddr &= ~(page_size - 1);
-
     bool is_system = pkt->req->systemReq();
 
     for (int i = 0; i < issuedTranslationsTable[virt_page_addr].size(); ++i) {
@@ -190,8 +183,8 @@ VegaTLBCoalescer::updatePhysAddresses(PacketPtr pkt)
          * page offsets.
          */
         if (i) {
-            Addr paddr = phys_page_paddr;
-            paddr |= (local_pkt->req->getVaddr() & (page_size - 1));
+            Addr paddr = first_entry_paddr
+                       + (local_pkt->req->getVaddr() & (page_size - 1));
             local_pkt->req->setPaddr(paddr);
 
             if (uncacheable)