dev-amdgpu: Use GPU specific cache line size (#1621)

Invalidate requests align to system cache line size. This causes problems if the GPU cache hierarchy's cache line size is different than the system as the unlaigned requests never return, leading to deadlock on deferred dispatch. This commit uses the cache line size from the GPU memory manager and makes the cache line size there non-optional. Tested with multiple RubySystems where CPU side was 64B and GPU side was 128B cache lines.
2024-10-03 08:47:08 -07:00
parent c8c75959ad
commit 24504c9a3e
5 changed files with 7 additions and 4 deletions
--- a/configs/example/gpufs/system/system.py
+++ b/configs/example/gpufs/system/system.py
@@ -199,7 +199,7 @@ def makeGpuFSSystem(args):
    system.pc.south_bridge.gpu.pm4_pkt_procs = pm4_procs

    # GPU data path
-    gpu_mem_mgr = AMDGPUMemoryManager()
+    gpu_mem_mgr = AMDGPUMemoryManager(cache_line_size=args.cacheline_size)
    system.pc.south_bridge.gpu.memory_manager = gpu_mem_mgr

    # CPU data path (SystemHub)
--- a/src/dev/amdgpu/AMDGPU.py
+++ b/src/dev/amdgpu/AMDGPU.py
@@ -128,6 +128,8 @@ class AMDGPUMemoryManager(ClockedObject):
    cxx_header = "dev/amdgpu/memory_manager.hh"
    cxx_class = "gem5::AMDGPUMemoryManager"

+    cache_line_size = Param.UInt64("Cache line size in bytes")
+
    port = RequestPort("Memory Port to access VRAM (device memory)")
    system = Param.System(Parent.any, "System the dGPU belongs to")

--- a/src/dev/amdgpu/amdgpu_device.cc
+++ b/src/dev/amdgpu/amdgpu_device.cc
@@ -428,8 +428,7 @@ AMDGPUDevice::writeFrame(PacketPtr pkt, Addr offset)
    DPRINTF(AMDGPUDevice, "Wrote framebuffer address %#lx\n", offset);

    for (auto& cu: CP()->shader()->cuList) {
-        auto system = CP()->shader()->gpuCmdProc.system();
-        Addr aligned_addr = offset & ~(system->cacheLineSize() - 1);
+        Addr aligned_addr = offset & ~(gpuMemMgr->getCacheLineSize() - 1);
        cu->sendInvL2(aligned_addr);
    }

--- a/src/dev/amdgpu/memory_manager.cc
+++ b/src/dev/amdgpu/memory_manager.cc
@@ -44,7 +44,7 @@ namespace gem5

 AMDGPUMemoryManager::AMDGPUMemoryManager(const AMDGPUMemoryManagerParams &p)
    : ClockedObject(p), _gpuMemPort(csprintf("%s-port", name()), *this),
-      cacheLineSize(p.system->cacheLineSize()),
+      cacheLineSize(p.cache_line_size),
      _requestorId(p.system->getRequestorId(this))
 {
 }
--- a/src/dev/amdgpu/memory_manager.hh
+++ b/src/dev/amdgpu/memory_manager.hh
@@ -125,6 +125,8 @@ class AMDGPUMemoryManager : public ClockedObject
     */
    RequestorID getRequestorID() const { return _requestorId; }

+    Addr getCacheLineSize() const { return cacheLineSize; }
+
    Port &
    getPort(const std::string &if_name, PortID idx) override
    {