From 489074fbfd9de86c9f5b38bcdaaf4ce2328c7d70 Mon Sep 17 00:00:00 2001 From: Matthew Poremba Date: Sun, 30 Oct 2022 12:41:47 -0700 Subject: [PATCH] dev-amdgpu: Fix issues with PM4 queue map, fences The PM4 release_mem packet is used as a DMA fence in the driver. It specifies which queue the interrupt came from by encoding the me, pipe, and queue fields from the map_queue packet into the interrupt ring ID. Currently these fields are incorrect because (1) the order in the bitfield is backwards, (2) the queue constructor assigns a pointer to the PM4MapQueue packet containing this data to the dmaBuffer which gets deleted in short order, and (3) the order of the encoding of ring ID is incorrect. This change fixes these issues by (1) placing the struct vales in correct order, (2) creating a const copy of the dmaBuffer on construction, and (3) using the ring ID encoding expected by the driver: https://github.com/RadeonOpenCompute/ROCK-Kernel-Driver/blob/roc-4.3.x/ drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c#L5989 Change-Id: I72c382980e57573f8a8a6879912c4139c7e2f505 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/65095 Tested-by: kokoro Reviewed-by: Matt Sinclair Maintainer: Matt Sinclair --- src/dev/amdgpu/pm4_defines.hh | 4 ++-- src/dev/amdgpu/pm4_packet_processor.cc | 25 ++++++++++++++----------- src/dev/amdgpu/pm4_packet_processor.hh | 2 ++ src/dev/amdgpu/pm4_queues.hh | 16 ++++++++-------- 4 files changed, 26 insertions(+), 21 deletions(-) diff --git a/src/dev/amdgpu/pm4_defines.hh b/src/dev/amdgpu/pm4_defines.hh index b690e54906..42832d50bf 100644 --- a/src/dev/amdgpu/pm4_defines.hh +++ b/src/dev/amdgpu/pm4_defines.hh @@ -124,9 +124,9 @@ typedef struct GEM5_PACKED uint32_t reserved2 : 2; uint32_t vmid : 4; uint32_t reserved3 : 1; - uint32_t me : 1; - uint32_t pipe : 2; uint32_t queueSlot : 3; + uint32_t pipe : 2; + uint32_t me : 1; uint32_t reserved6 : 2; uint32_t queueType : 3; uint32_t allocFormat : 2; diff --git a/src/dev/amdgpu/pm4_packet_processor.cc b/src/dev/amdgpu/pm4_packet_processor.cc index 48496f6585..404beab16c 100644 --- a/src/dev/amdgpu/pm4_packet_processor.cc +++ b/src/dev/amdgpu/pm4_packet_processor.cc @@ -116,14 +116,14 @@ void PM4PacketProcessor::mapKiq(Addr offset) { DPRINTF(PM4PacketProcessor, "Mapping KIQ\n"); - newQueue((QueueDesc *)&kiq, offset); + newQueue((QueueDesc *)&kiq, offset, &kiq_pkt); } void PM4PacketProcessor::mapPq(Addr offset) { DPRINTF(PM4PacketProcessor, "Mapping PQ\n"); - newQueue((QueueDesc *)&pq, offset); + newQueue((QueueDesc *)&pq, offset, &pq_pkt); } void @@ -146,8 +146,9 @@ PM4PacketProcessor::newQueue(QueueDesc *mqd, Addr offset, : QueueType::Compute; gpuDevice->setDoorbellType(offset, qt); - DPRINTF(PM4PacketProcessor, "New PM4 queue %d, base: %p offset: %p\n", - id, q->base(), q->offset()); + DPRINTF(PM4PacketProcessor, "New PM4 queue %d, base: %p offset: %p, me: " + "%d, pipe %d queue: %d\n", id, q->base(), q->offset(), q->me(), + q->pipe(), q->queue()); } void @@ -490,14 +491,16 @@ PM4PacketProcessor::releaseMemDone(PM4Queue *q, PM4ReleaseMem *pkt, Addr addr) DPRINTF(PM4PacketProcessor, "PM4 release_mem wrote %d to %p\n", pkt->dataLo, addr); if (pkt->intSelect == 2) { - DPRINTF(PM4PacketProcessor, "PM4 interrupt, ctx: %d, me: %d, pipe: " - "%d, queueSlot:%d\n", pkt->intCtxId, q->me(), q->pipe(), - q->queue()); - // Rearranging the queue field of PM4MapQueues as the interrupt RingId - // format specified in PM4ReleaseMem pkt. - uint32_t ringId = (q->me() << 6) | (q->pipe() << 4) | q->queue(); + DPRINTF(PM4PacketProcessor, "PM4 interrupt, id: %d ctx: %d, me: %d, " + "pipe: %d, queueSlot:%d\n", q->id(), pkt->intCtxId, q->me(), + q->pipe(), q->queue()); + + uint8_t ringId = 0; + if (q->id() != 0) { + ringId = (q->queue() << 4) | (q->me() << 2) | q->pipe(); + } gpuDevice->getIH()->prepareInterruptCookie(pkt->intCtxId, ringId, - SOC15_IH_CLIENTID_GRBM_CP, CP_EOP); + SOC15_IH_CLIENTID_GRBM_CP, CP_EOP); gpuDevice->getIH()->submitInterruptCookie(); } diff --git a/src/dev/amdgpu/pm4_packet_processor.hh b/src/dev/amdgpu/pm4_packet_processor.hh index c77edd2651..48066713a5 100644 --- a/src/dev/amdgpu/pm4_packet_processor.hh +++ b/src/dev/amdgpu/pm4_packet_processor.hh @@ -54,8 +54,10 @@ class PM4PacketProcessor : public DmaVirtDevice AMDGPUDevice *gpuDevice; /* First graphics queue */ PrimaryQueue pq; + PM4MapQueues pq_pkt; /* First compute queue */ QueueDesc kiq; + PM4MapQueues kiq_pkt; /* All PM4 queues, indexed by VMID */ std::unordered_map queues; diff --git a/src/dev/amdgpu/pm4_queues.hh b/src/dev/amdgpu/pm4_queues.hh index 4e8638b39e..19973b113e 100644 --- a/src/dev/amdgpu/pm4_queues.hh +++ b/src/dev/amdgpu/pm4_queues.hh @@ -375,16 +375,16 @@ class PM4Queue Addr _offset; bool _processing; bool _ib; - PM4MapQueues *_pkt; + const PM4MapQueues _pkt; public: PM4Queue() : _id(0), q(nullptr), _wptr(0), _offset(0), _processing(false), - _ib(false), _pkt(nullptr) {} + _ib(false), _pkt() {} PM4Queue(int id, QueueDesc *queue, Addr offset) : _id(id), q(queue), _wptr(queue->rptr), _ibWptr(0), _offset(offset), - _processing(false), _ib(false), _pkt(nullptr) {} + _processing(false), _ib(false), _pkt() {} PM4Queue(int id, QueueDesc *queue, Addr offset, PM4MapQueues *pkt) : _id(id), q(queue), _wptr(queue->rptr), _ibWptr(0), _offset(offset), - _processing(false), _ib(false), _pkt(pkt) {} + _processing(false), _ib(false), _pkt(*pkt) {} QueueDesc *getMQD() { return q; } int id() { return _id; } @@ -466,10 +466,10 @@ class PM4Queue void offset(Addr value) { _offset = value; } void processing(bool value) { _processing = value; } void ib(bool value) { _ib = value; } - uint32_t me() { if (_pkt) return _pkt->me; else return 0; } - uint32_t pipe() { if (_pkt) return _pkt->pipe; else return 0; } - uint32_t queue() { if (_pkt) return _pkt->queueSlot; else return 0; } - bool privileged() { assert(_pkt); return _pkt->queueSel == 0 ? 1 : 0; } + uint32_t me() { return _pkt.me + 1; } + uint32_t pipe() { return _pkt.pipe; } + uint32_t queue() { return _pkt.queueSlot; } + bool privileged() { return _pkt.queueSel == 0 ? 1 : 0; } }; } // namespace gem5