From 07f6b7c59c7554778f4b17e3844bf7f9e128abd4 Mon Sep 17 00:00:00 2001 From: Matthew Poremba Date: Wed, 29 May 2024 07:15:46 -0700 Subject: [PATCH] dev-amdgpu: Fix pending PCI RLC doorbell (#1157) SDMA RLC queues do not currently remove their doorbell mapping. This can cause issues re-registering the queue and prevents the pending doorbells feature from working. In addition the data value of the doorbell (the ring buffer rptr) is not saved, leading to UB when this workaround is used. This commit removes the doorbell mapping from the gpu device when the SDMA engine unmaps an RLC queue and copies the next doorbell value to the pending packet as was originally intended. Change-Id: Ifd551450f439c065579afcf916f8ff192e7598ab --- src/dev/amdgpu/amdgpu_device.cc | 11 ++++++++++- src/dev/amdgpu/amdgpu_device.hh | 1 + src/dev/amdgpu/sdma_engine.cc | 1 + 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/src/dev/amdgpu/amdgpu_device.cc b/src/dev/amdgpu/amdgpu_device.cc index b3a91830fe..fc977a2de0 100644 --- a/src/dev/amdgpu/amdgpu_device.cc +++ b/src/dev/amdgpu/amdgpu_device.cc @@ -517,10 +517,13 @@ AMDGPUDevice::writeDoorbell(PacketPtr pkt, Addr offset) offset); // We have to ACK the PCI packet immediately, so create a copy of the - // packet here to send again. + // packet here to send again. The packet data contains the value of + // the doorbell to write so we need to copy that as the original + // packet gets deleted after the PCI write() method returns. RequestPtr pending_req(pkt->req); PacketPtr pending_pkt = Packet::createWrite(pending_req); uint8_t *pending_data = new uint8_t[pkt->getSize()]; + memcpy(pending_data, pkt->getPtr(), pkt->getSize()); pending_pkt->dataDynamic(pending_data); pendingDoorbellPkts.emplace(offset, pending_pkt); @@ -709,6 +712,12 @@ AMDGPUDevice::setDoorbellType(uint32_t offset, QueueType qt, int ip_id) doorbells[offset].ip_id = ip_id; } +void +AMDGPUDevice::unsetDoorbell(uint32_t offset) +{ + doorbells.erase(offset); +} + void AMDGPUDevice::setSDMAEngine(Addr offset, SDMAEngine *eng) { diff --git a/src/dev/amdgpu/amdgpu_device.hh b/src/dev/amdgpu/amdgpu_device.hh index 33b6a9f3e7..83b79a1d05 100644 --- a/src/dev/amdgpu/amdgpu_device.hh +++ b/src/dev/amdgpu/amdgpu_device.hh @@ -196,6 +196,7 @@ class AMDGPUDevice : public PciDevice * Set handles to GPU blocks. */ void setDoorbellType(uint32_t offset, QueueType qt, int ip_id = 0); + void unsetDoorbell(uint32_t offset); void processPendingDoorbells(uint32_t offset); void setSDMAEngine(Addr offset, SDMAEngine *eng); diff --git a/src/dev/amdgpu/sdma_engine.cc b/src/dev/amdgpu/sdma_engine.cc index 735be554b4..e1e123df48 100644 --- a/src/dev/amdgpu/sdma_engine.cc +++ b/src/dev/amdgpu/sdma_engine.cc @@ -269,6 +269,7 @@ SDMAEngine::deallocateRLCQueues() for (auto doorbell: rlcInfo) { if (doorbell) { unregisterRLCQueue(doorbell); + gpuDevice->unsetDoorbell(doorbell); } } }