From 54d24380661f342bfed74634b6033de71c56c332 Mon Sep 17 00:00:00 2001 From: Matthew Poremba Date: Fri, 6 May 2022 09:42:44 -0500 Subject: [PATCH] dev-amdgpu: Removed hardcoded AQL queue size The AQL queue size is currently hardcoded to 64kB. For longer running applications this causes the circular queue to wrap before reaching the real end of the queue. Add the computation for queue size instead. Previously longer applications (e.g., bc in pannotia) were hanging around 4k kernels. With change the application launches 10k+ kernels. Change-Id: I6c31677c1799a3c9ce28cf4e7e79efcb987e3b7f Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/59449 Maintainer: Jason Lowe-Power Reviewed-by: Jason Lowe-Power Maintainer: Matt Sinclair Tested-by: kokoro Reviewed-by: Matt Sinclair --- src/dev/amdgpu/pm4_packet_processor.cc | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/src/dev/amdgpu/pm4_packet_processor.cc b/src/dev/amdgpu/pm4_packet_processor.cc index 0ee2034394..c70f2f26ad 100644 --- a/src/dev/amdgpu/pm4_packet_processor.cc +++ b/src/dev/amdgpu/pm4_packet_processor.cc @@ -415,12 +415,19 @@ PM4PacketProcessor::processMQD(PM4MapQueues *pkt, PM4Queue *q, Addr addr, gpuDevice->insertQId(vmid, new_q->id()); if (mqd->aql) { - // Note: The size of the AQL queue is currently hardcoded to 64k. This - // can cause issues if the AQL queue is larger since it will not wrap - // around at the right time in the HSAPacketProcessor. + // The queue size is encoded in the cp_hqd_pq_control field in the + // kernel driver in the 6 lowest bits as log2(queue_size / 4) - 1 + // number of dwords. + // + // https://github.com/RadeonOpenCompute/ROCK-Kernel-Driver/blob/ + // roc-4.3.x/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c#L3561 + // + // Queue size is then 2^(cp_hqd_pq_control[5:0] + 1) dword. Multiply + // by 4 to get the number of bytes as HSAPP expects. + int mqd_size = (1 << ((mqd->hqd_pq_control & 0x3f) + 1)) * 4; auto &hsa_pp = gpuDevice->CP()->hsaPacketProc(); hsa_pp.setDeviceQueueDesc(mqd->aqlRptr, mqd->base, new_q->id(), - 65536, 8, GfxVersion::gfx900, offset, + mqd_size, 8, GfxVersion::gfx900, offset, mqd->mqdReadIndex); }