diff --git a/src/dev/amdgpu/amdgpu_vm.hh b/src/dev/amdgpu/amdgpu_vm.hh index f35a735111..5af666f379 100644 --- a/src/dev/amdgpu/amdgpu_vm.hh +++ b/src/dev/amdgpu/amdgpu_vm.hh @@ -172,6 +172,12 @@ class AMDGPUVM : public Serializable */ Addr gartSize(); + bool + inGARTRange(Addr paddr) + { + return ((paddr >= gartBase()) && (paddr <= (gartBase() + gartSize()))); + } + /** * Copy of GART table. Typically resides in device memory, however we use * a copy in gem5 to simplify the interface. diff --git a/src/dev/amdgpu/sdma_engine.cc b/src/dev/amdgpu/sdma_engine.cc index 34ad027234..94bcdf9cb9 100644 --- a/src/dev/amdgpu/sdma_engine.cc +++ b/src/dev/amdgpu/sdma_engine.cc @@ -627,10 +627,14 @@ SDMAEngine::writeReadData(SDMAQueue *q, sdmaWrite *pkt, uint32_t *dmaBuffer) // lastly we write read data to the destination address if (gpuDevice->getVM().inMMHUB(pkt->dest)) { - Addr mmhubAddr = pkt->dest - gpuDevice->getVM().getMMHUBBase(); + Addr mmhub_addr = pkt->dest - gpuDevice->getVM().getMMHUBBase(); + + fatal_if(gpuDevice->getVM().inGARTRange(mmhub_addr), + "SDMA write to GART not implemented"); + auto cb = new EventFunctionWrapper( [ = ]{ writeDone(q, pkt, dmaBuffer); }, name()); - gpuDevice->getMemMgr()->writeRequest(mmhubAddr, (uint8_t *)dmaBuffer, + gpuDevice->getMemMgr()->writeRequest(mmhub_addr, (uint8_t *)dmaBuffer, bufferSize, 0, cb); } else { if (q->priv()) { @@ -663,9 +667,11 @@ SDMAEngine::copy(SDMAQueue *q, sdmaCopy *pkt) // count represents the number of bytes - 1 to be copied pkt->count++; if (q->priv()) { - DPRINTF(SDMAEngine, "Getting GART addr for %lx\n", pkt->source); - pkt->source = getGARTAddr(pkt->source); - DPRINTF(SDMAEngine, "GART addr %lx\n", pkt->source); + if (!gpuDevice->getVM().inMMHUB(pkt->source)) { + DPRINTF(SDMAEngine, "Getting GART addr for %lx\n", pkt->source); + pkt->source = getGARTAddr(pkt->source); + DPRINTF(SDMAEngine, "GART addr %lx\n", pkt->source); + } } // Read data from the source first, then call the copyReadData method @@ -742,6 +748,19 @@ SDMAEngine::copyReadData(SDMAQueue *q, sdmaCopy *pkt, uint8_t *dmaBuffer) [ = ] (const uint64_t &) { copyDone(q, pkt, dmaBuffer); }); dmaWriteVirt(pkt->dest, pkt->count, cb, (void *)dmaBuffer); } + + // For destinations in the GART table, gem5 uses a mapping tables instead + // of functionally going to device memory, so we need to update that copy. + if (gpuDevice->getVM().inGARTRange(device_addr)) { + // GART entries are always 8 bytes. + assert((pkt->count % 8) == 0); + for (int i = 0; i < pkt->count/8; ++i) { + Addr gart_addr = device_addr + i*8 - gpuDevice->getVM().gartBase(); + DPRINTF(SDMAEngine, "Shadow copying to GART table %lx -> %lx\n", + gart_addr, dmaBuffer64[i]); + gpuDevice->getVM().gartTable[gart_addr] = dmaBuffer64[i]; + } + } } /* Completion of a copy packet. */ @@ -971,10 +990,14 @@ SDMAEngine::ptePde(SDMAQueue *q, sdmaPtePde *pkt) // Writing generated data to the destination address. if (gpuDevice->getVM().inMMHUB(pkt->dest)) { - Addr mmhubAddr = pkt->dest - gpuDevice->getVM().getMMHUBBase(); + Addr mmhub_addr = pkt->dest - gpuDevice->getVM().getMMHUBBase(); + + fatal_if(gpuDevice->getVM().inGARTRange(mmhub_addr), + "SDMA write to GART not implemented"); + auto cb = new EventFunctionWrapper( [ = ]{ ptePdeDone(q, pkt, dmaBuffer); }, name()); - gpuDevice->getMemMgr()->writeRequest(mmhubAddr, (uint8_t *)dmaBuffer, + gpuDevice->getMemMgr()->writeRequest(mmhub_addr, (uint8_t *)dmaBuffer, sizeof(uint64_t) * pkt->count, 0, cb); } else {