dev-amdgpu: Check for SDMA copies to GART range

The SDMA engine can potentially be used to write to the GART address
range. Since gem5 has a shadow copy of the GART table to avoid sending
functional reads to device memory, the GART table must be updated when
copying to the GART range.

This changeset adds a check in the VM for GART range and implements the
SDMA copy packet writing to the GART range. A fatal is added to write
and ptePde, which are the only other two ways to write to memory, as
using these packets to update the GART table has not been observed.

Change-Id: I1e62dfd9179cc9e987659e68414209fd77bba2bd
This commit is contained in:
Matthew Poremba
2024-02-07 13:29:44 -06:00
parent 998709d4fc
commit 009cec56e0
2 changed files with 36 additions and 7 deletions

View File

@@ -172,6 +172,12 @@ class AMDGPUVM : public Serializable
*/
Addr gartSize();
bool
inGARTRange(Addr paddr)
{
return ((paddr >= gartBase()) && (paddr <= (gartBase() + gartSize())));
}
/**
* Copy of GART table. Typically resides in device memory, however we use
* a copy in gem5 to simplify the interface.

View File

@@ -627,10 +627,14 @@ SDMAEngine::writeReadData(SDMAQueue *q, sdmaWrite *pkt, uint32_t *dmaBuffer)
// lastly we write read data to the destination address
if (gpuDevice->getVM().inMMHUB(pkt->dest)) {
Addr mmhubAddr = pkt->dest - gpuDevice->getVM().getMMHUBBase();
Addr mmhub_addr = pkt->dest - gpuDevice->getVM().getMMHUBBase();
fatal_if(gpuDevice->getVM().inGARTRange(mmhub_addr),
"SDMA write to GART not implemented");
auto cb = new EventFunctionWrapper(
[ = ]{ writeDone(q, pkt, dmaBuffer); }, name());
gpuDevice->getMemMgr()->writeRequest(mmhubAddr, (uint8_t *)dmaBuffer,
gpuDevice->getMemMgr()->writeRequest(mmhub_addr, (uint8_t *)dmaBuffer,
bufferSize, 0, cb);
} else {
if (q->priv()) {
@@ -663,9 +667,11 @@ SDMAEngine::copy(SDMAQueue *q, sdmaCopy *pkt)
// count represents the number of bytes - 1 to be copied
pkt->count++;
if (q->priv()) {
DPRINTF(SDMAEngine, "Getting GART addr for %lx\n", pkt->source);
pkt->source = getGARTAddr(pkt->source);
DPRINTF(SDMAEngine, "GART addr %lx\n", pkt->source);
if (!gpuDevice->getVM().inMMHUB(pkt->source)) {
DPRINTF(SDMAEngine, "Getting GART addr for %lx\n", pkt->source);
pkt->source = getGARTAddr(pkt->source);
DPRINTF(SDMAEngine, "GART addr %lx\n", pkt->source);
}
}
// Read data from the source first, then call the copyReadData method
@@ -742,6 +748,19 @@ SDMAEngine::copyReadData(SDMAQueue *q, sdmaCopy *pkt, uint8_t *dmaBuffer)
[ = ] (const uint64_t &) { copyDone(q, pkt, dmaBuffer); });
dmaWriteVirt(pkt->dest, pkt->count, cb, (void *)dmaBuffer);
}
// For destinations in the GART table, gem5 uses a mapping tables instead
// of functionally going to device memory, so we need to update that copy.
if (gpuDevice->getVM().inGARTRange(device_addr)) {
// GART entries are always 8 bytes.
assert((pkt->count % 8) == 0);
for (int i = 0; i < pkt->count/8; ++i) {
Addr gart_addr = device_addr + i*8 - gpuDevice->getVM().gartBase();
DPRINTF(SDMAEngine, "Shadow copying to GART table %lx -> %lx\n",
gart_addr, dmaBuffer64[i]);
gpuDevice->getVM().gartTable[gart_addr] = dmaBuffer64[i];
}
}
}
/* Completion of a copy packet. */
@@ -971,10 +990,14 @@ SDMAEngine::ptePde(SDMAQueue *q, sdmaPtePde *pkt)
// Writing generated data to the destination address.
if (gpuDevice->getVM().inMMHUB(pkt->dest)) {
Addr mmhubAddr = pkt->dest - gpuDevice->getVM().getMMHUBBase();
Addr mmhub_addr = pkt->dest - gpuDevice->getVM().getMMHUBBase();
fatal_if(gpuDevice->getVM().inGARTRange(mmhub_addr),
"SDMA write to GART not implemented");
auto cb = new EventFunctionWrapper(
[ = ]{ ptePdeDone(q, pkt, dmaBuffer); }, name());
gpuDevice->getMemMgr()->writeRequest(mmhubAddr, (uint8_t *)dmaBuffer,
gpuDevice->getMemMgr()->writeRequest(mmhub_addr, (uint8_t *)dmaBuffer,
sizeof(uint64_t) * pkt->count, 0,
cb);
} else {