From 24504c9a3eebff840c7f97c654c4a0b1f0f884a8 Mon Sep 17 00:00:00 2001
From: Matthew Poremba <matthew.poremba@amd.com>
Date: Thu, 3 Oct 2024 08:47:08 -0700
Subject: [PATCH] dev-amdgpu: Use GPU specific cache line size (#1621)

Invalidate requests align to system cache line size. This causes
problems if the GPU cache hierarchy's cache line size is different than
the system as the unlaigned requests never return, leading to deadlock
on deferred dispatch.

This commit uses the cache line size from the GPU memory manager and
makes the cache line size there non-optional.

Tested with multiple RubySystems where CPU side was 64B and GPU side was
128B cache lines.
---
 configs/example/gpufs/system/system.py | 2 +-
 src/dev/amdgpu/AMDGPU.py               | 2 ++
 src/dev/amdgpu/amdgpu_device.cc        | 3 +--
 src/dev/amdgpu/memory_manager.cc       | 2 +-
 src/dev/amdgpu/memory_manager.hh       | 2 ++
 5 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/configs/example/gpufs/system/system.py b/configs/example/gpufs/system/system.py
index 622c2cbeb9..1ce261d764 100644
--- a/configs/example/gpufs/system/system.py
+++ b/configs/example/gpufs/system/system.py
@@ -199,7 +199,7 @@ def makeGpuFSSystem(args):
     system.pc.south_bridge.gpu.pm4_pkt_procs = pm4_procs
 
     # GPU data path
-    gpu_mem_mgr = AMDGPUMemoryManager()
+    gpu_mem_mgr = AMDGPUMemoryManager(cache_line_size=args.cacheline_size)
     system.pc.south_bridge.gpu.memory_manager = gpu_mem_mgr
 
     # CPU data path (SystemHub)
diff --git a/src/dev/amdgpu/AMDGPU.py b/src/dev/amdgpu/AMDGPU.py
index 0e0f597927..35ffcfe528 100644
--- a/src/dev/amdgpu/AMDGPU.py
+++ b/src/dev/amdgpu/AMDGPU.py
@@ -128,6 +128,8 @@ class AMDGPUMemoryManager(ClockedObject):
     cxx_header = "dev/amdgpu/memory_manager.hh"
     cxx_class = "gem5::AMDGPUMemoryManager"
 
+    cache_line_size = Param.UInt64("Cache line size in bytes")
+
     port = RequestPort("Memory Port to access VRAM (device memory)")
     system = Param.System(Parent.any, "System the dGPU belongs to")
 
diff --git a/src/dev/amdgpu/amdgpu_device.cc b/src/dev/amdgpu/amdgpu_device.cc
index f8ecad3805..c82d0de60c 100644
--- a/src/dev/amdgpu/amdgpu_device.cc
+++ b/src/dev/amdgpu/amdgpu_device.cc
@@ -428,8 +428,7 @@ AMDGPUDevice::writeFrame(PacketPtr pkt, Addr offset)
     DPRINTF(AMDGPUDevice, "Wrote framebuffer address %#lx\n", offset);
 
     for (auto& cu: CP()->shader()->cuList) {
-        auto system = CP()->shader()->gpuCmdProc.system();
-        Addr aligned_addr = offset & ~(system->cacheLineSize() - 1);
+        Addr aligned_addr = offset & ~(gpuMemMgr->getCacheLineSize() - 1);
         cu->sendInvL2(aligned_addr);
     }
 
diff --git a/src/dev/amdgpu/memory_manager.cc b/src/dev/amdgpu/memory_manager.cc
index 7b671b0611..77fc1576e4 100644
--- a/src/dev/amdgpu/memory_manager.cc
+++ b/src/dev/amdgpu/memory_manager.cc
@@ -44,7 +44,7 @@ namespace gem5
 
 AMDGPUMemoryManager::AMDGPUMemoryManager(const AMDGPUMemoryManagerParams &p)
     : ClockedObject(p), _gpuMemPort(csprintf("%s-port", name()), *this),
-      cacheLineSize(p.system->cacheLineSize()),
+      cacheLineSize(p.cache_line_size),
       _requestorId(p.system->getRequestorId(this))
 {
 }
diff --git a/src/dev/amdgpu/memory_manager.hh b/src/dev/amdgpu/memory_manager.hh
index 0bd08d6ff9..7fb734b8a2 100644
--- a/src/dev/amdgpu/memory_manager.hh
+++ b/src/dev/amdgpu/memory_manager.hh
@@ -125,6 +125,8 @@ class AMDGPUMemoryManager : public ClockedObject
      */
     RequestorID getRequestorID() const { return _requestorId; }
 
+    Addr getCacheLineSize() const { return cacheLineSize; }
+
     Port &
     getPort(const std::string &if_name, PortID idx) override
     {