dev-amdgpu: Use GPU specific cache line size (#1621)

Invalidate requests align to system cache line size. This causes
problems if the GPU cache hierarchy's cache line size is different than
the system as the unlaigned requests never return, leading to deadlock
on deferred dispatch.

This commit uses the cache line size from the GPU memory manager and
makes the cache line size there non-optional.

Tested with multiple RubySystems where CPU side was 64B and GPU side was
128B cache lines.
This commit is contained in:
Matthew Poremba
2024-10-03 08:47:08 -07:00
committed by GitHub
parent c8c75959ad
commit 24504c9a3e
5 changed files with 7 additions and 4 deletions

View File

@@ -199,7 +199,7 @@ def makeGpuFSSystem(args):
system.pc.south_bridge.gpu.pm4_pkt_procs = pm4_procs
# GPU data path
gpu_mem_mgr = AMDGPUMemoryManager()
gpu_mem_mgr = AMDGPUMemoryManager(cache_line_size=args.cacheline_size)
system.pc.south_bridge.gpu.memory_manager = gpu_mem_mgr
# CPU data path (SystemHub)

View File

@@ -128,6 +128,8 @@ class AMDGPUMemoryManager(ClockedObject):
cxx_header = "dev/amdgpu/memory_manager.hh"
cxx_class = "gem5::AMDGPUMemoryManager"
cache_line_size = Param.UInt64("Cache line size in bytes")
port = RequestPort("Memory Port to access VRAM (device memory)")
system = Param.System(Parent.any, "System the dGPU belongs to")

View File

@@ -428,8 +428,7 @@ AMDGPUDevice::writeFrame(PacketPtr pkt, Addr offset)
DPRINTF(AMDGPUDevice, "Wrote framebuffer address %#lx\n", offset);
for (auto& cu: CP()->shader()->cuList) {
auto system = CP()->shader()->gpuCmdProc.system();
Addr aligned_addr = offset & ~(system->cacheLineSize() - 1);
Addr aligned_addr = offset & ~(gpuMemMgr->getCacheLineSize() - 1);
cu->sendInvL2(aligned_addr);
}

View File

@@ -44,7 +44,7 @@ namespace gem5
AMDGPUMemoryManager::AMDGPUMemoryManager(const AMDGPUMemoryManagerParams &p)
: ClockedObject(p), _gpuMemPort(csprintf("%s-port", name()), *this),
cacheLineSize(p.system->cacheLineSize()),
cacheLineSize(p.cache_line_size),
_requestorId(p.system->getRequestorId(this))
{
}

View File

@@ -125,6 +125,8 @@ class AMDGPUMemoryManager : public ClockedObject
*/
RequestorID getRequestorID() const { return _requestorId; }
Addr getCacheLineSize() const { return cacheLineSize; }
Port &
getPort(const std::string &if_name, PortID idx) override
{