From 009cec56e0e7a082ed684e98c5600babc2d2283e Mon Sep 17 00:00:00 2001 From: Matthew Poremba Date: Wed, 7 Feb 2024 13:29:44 -0600 Subject: [PATCH] dev-amdgpu: Check for SDMA copies to GART range The SDMA engine can potentially be used to write to the GART address range. Since gem5 has a shadow copy of the GART table to avoid sending functional reads to device memory, the GART table must be updated when copying to the GART range. This changeset adds a check in the VM for GART range and implements the SDMA copy packet writing to the GART range. A fatal is added to write and ptePde, which are the only other two ways to write to memory, as using these packets to update the GART table has not been observed. Change-Id: I1e62dfd9179cc9e987659e68414209fd77bba2bd --- src/dev/amdgpu/amdgpu_vm.hh | 6 ++++++ src/dev/amdgpu/sdma_engine.cc | 37 ++++++++++++++++++++++++++++------- 2 files changed, 36 insertions(+), 7 deletions(-) diff --git a/src/dev/amdgpu/amdgpu_vm.hh b/src/dev/amdgpu/amdgpu_vm.hh index f35a735111..5af666f379 100644 --- a/src/dev/amdgpu/amdgpu_vm.hh +++ b/src/dev/amdgpu/amdgpu_vm.hh @@ -172,6 +172,12 @@ class AMDGPUVM : public Serializable */ Addr gartSize(); + bool + inGARTRange(Addr paddr) + { + return ((paddr >= gartBase()) && (paddr <= (gartBase() + gartSize()))); + } + /** * Copy of GART table. Typically resides in device memory, however we use * a copy in gem5 to simplify the interface. diff --git a/src/dev/amdgpu/sdma_engine.cc b/src/dev/amdgpu/sdma_engine.cc index 34ad027234..94bcdf9cb9 100644 --- a/src/dev/amdgpu/sdma_engine.cc +++ b/src/dev/amdgpu/sdma_engine.cc @@ -627,10 +627,14 @@ SDMAEngine::writeReadData(SDMAQueue *q, sdmaWrite *pkt, uint32_t *dmaBuffer) // lastly we write read data to the destination address if (gpuDevice->getVM().inMMHUB(pkt->dest)) { - Addr mmhubAddr = pkt->dest - gpuDevice->getVM().getMMHUBBase(); + Addr mmhub_addr = pkt->dest - gpuDevice->getVM().getMMHUBBase(); + + fatal_if(gpuDevice->getVM().inGARTRange(mmhub_addr), + "SDMA write to GART not implemented"); + auto cb = new EventFunctionWrapper( [ = ]{ writeDone(q, pkt, dmaBuffer); }, name()); - gpuDevice->getMemMgr()->writeRequest(mmhubAddr, (uint8_t *)dmaBuffer, + gpuDevice->getMemMgr()->writeRequest(mmhub_addr, (uint8_t *)dmaBuffer, bufferSize, 0, cb); } else { if (q->priv()) { @@ -663,9 +667,11 @@ SDMAEngine::copy(SDMAQueue *q, sdmaCopy *pkt) // count represents the number of bytes - 1 to be copied pkt->count++; if (q->priv()) { - DPRINTF(SDMAEngine, "Getting GART addr for %lx\n", pkt->source); - pkt->source = getGARTAddr(pkt->source); - DPRINTF(SDMAEngine, "GART addr %lx\n", pkt->source); + if (!gpuDevice->getVM().inMMHUB(pkt->source)) { + DPRINTF(SDMAEngine, "Getting GART addr for %lx\n", pkt->source); + pkt->source = getGARTAddr(pkt->source); + DPRINTF(SDMAEngine, "GART addr %lx\n", pkt->source); + } } // Read data from the source first, then call the copyReadData method @@ -742,6 +748,19 @@ SDMAEngine::copyReadData(SDMAQueue *q, sdmaCopy *pkt, uint8_t *dmaBuffer) [ = ] (const uint64_t &) { copyDone(q, pkt, dmaBuffer); }); dmaWriteVirt(pkt->dest, pkt->count, cb, (void *)dmaBuffer); } + + // For destinations in the GART table, gem5 uses a mapping tables instead + // of functionally going to device memory, so we need to update that copy. + if (gpuDevice->getVM().inGARTRange(device_addr)) { + // GART entries are always 8 bytes. + assert((pkt->count % 8) == 0); + for (int i = 0; i < pkt->count/8; ++i) { + Addr gart_addr = device_addr + i*8 - gpuDevice->getVM().gartBase(); + DPRINTF(SDMAEngine, "Shadow copying to GART table %lx -> %lx\n", + gart_addr, dmaBuffer64[i]); + gpuDevice->getVM().gartTable[gart_addr] = dmaBuffer64[i]; + } + } } /* Completion of a copy packet. */ @@ -971,10 +990,14 @@ SDMAEngine::ptePde(SDMAQueue *q, sdmaPtePde *pkt) // Writing generated data to the destination address. if (gpuDevice->getVM().inMMHUB(pkt->dest)) { - Addr mmhubAddr = pkt->dest - gpuDevice->getVM().getMMHUBBase(); + Addr mmhub_addr = pkt->dest - gpuDevice->getVM().getMMHUBBase(); + + fatal_if(gpuDevice->getVM().inGARTRange(mmhub_addr), + "SDMA write to GART not implemented"); + auto cb = new EventFunctionWrapper( [ = ]{ ptePdeDone(q, pkt, dmaBuffer); }, name()); - gpuDevice->getMemMgr()->writeRequest(mmhubAddr, (uint8_t *)dmaBuffer, + gpuDevice->getMemMgr()->writeRequest(mmhub_addr, (uint8_t *)dmaBuffer, sizeof(uint64_t) * pkt->count, 0, cb); } else {