From 079fc47dc202ffe7c77e1e94bb1d5e0ee38d1816 Mon Sep 17 00:00:00 2001 From: Matthew Poremba Date: Wed, 28 Jun 2023 14:49:48 -0500 Subject: [PATCH] dev-amdgpu: Perform frame writes atomically The PCI read/write functions are atomic functions in gem5, meaning they expect a response with a latency value on the same simulation Tick. For reads to a PCI device, the response must also include a data value read from the device. The AMDGPU device has a PCI BAR which mirrors the frame buffer memory. Currently reads are done atomically, but writes are sent to a DMA device without waiting for a write completion ACK. As a result, it is possible that writes can be queued in the DMA device long enough that another read for a queued address arrives. This happens very deterministically with the AtomicSimpleCPU and causes GPUFS to break with that CPU. This change makes writes to the frame BAR atomic the same as reads. This avoids that problem and as a result the AtomicSimpleCPU can now load the driver for GPUFS simulations. Change-Id: I9a8e8b172712c78b667ebcec81a0c5d0060234db Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/71898 Maintainer: Matt Sinclair Tested-by: kokoro Reviewed-by: Matt Sinclair Maintainer: Matthew Poremba Reviewed-by: Matthew Poremba --- src/dev/amdgpu/amdgpu_device.cc | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/src/dev/amdgpu/amdgpu_device.cc b/src/dev/amdgpu/amdgpu_device.cc index 3260d058b0..d1058f1606 100644 --- a/src/dev/amdgpu/amdgpu_device.cc +++ b/src/dev/amdgpu/amdgpu_device.cc @@ -349,6 +349,22 @@ AMDGPUDevice::writeFrame(PacketPtr pkt, Addr offset) } nbio.writeFrame(pkt, offset); + + /* + * Write the value to device memory. This must be done functionally + * because this method is called by the PCIDevice::write method which + * is a non-timing write. + */ + RequestPtr req = std::make_shared(offset, pkt->getSize(), 0, + vramRequestorId()); + PacketPtr writePkt = Packet::createWrite(req); + uint8_t *dataPtr = new uint8_t[pkt->getSize()]; + std::memcpy(dataPtr, pkt->getPtr(), + pkt->getSize() * sizeof(uint8_t)); + writePkt->dataDynamic(dataPtr); + + auto system = cp->shader()->gpuCmdProc.system(); + system->getDeviceMemory(writePkt)->access(writePkt); } void @@ -489,8 +505,6 @@ AMDGPUDevice::write(PacketPtr pkt) switch (barnum) { case FRAMEBUFFER_BAR: - gpuMemMgr->writeRequest(offset, pkt->getPtr(), - pkt->getSize(), 0, nullptr); writeFrame(pkt, offset); break; case DOORBELL_BAR: