dev-amdgpu: Implement SDMA constant fill

This SDMA packet is much more common starting around ROCm 5.4.
Previously this was mostly used to clear page tables after an
application ended and was therefore left unimplemented. It is
now used for basic operation like device memsets.

This patch implements constant fill as it is now necessary.

Change-Id: I9b2cf076ec17f5ed07c20bb820e7db0c082bbfbc
This commit is contained in:
Matthew Poremba
2023-07-19 15:26:02 -05:00
parent 618b2a60de
commit 3b35e73eb8
3 changed files with 88 additions and 4 deletions

View File

@@ -510,9 +510,12 @@ SDMAEngine::decodeHeader(SDMAQueue *q, uint32_t header)
dmaReadVirt(q->rptr(), sizeof(sdmaAtomic), cb, dmaBuffer);
} break;
case SDMA_OP_CONST_FILL: {
q->incRptr(sizeof(sdmaConstFill));
warn("SDMA_OP_CONST_FILL not implemented");
decodeNext(q);
DPRINTF(SDMAEngine, "SDMA Constant fill packet\n");
dmaBuffer = new sdmaConstFill();
cb = new DmaVirtCallback<uint64_t>(
[ = ] (const uint64_t &)
{ constFill(q, (sdmaConstFill *)dmaBuffer, header); });
dmaReadVirt(q->rptr(), sizeof(sdmaConstFill), cb, dmaBuffer);
} break;
case SDMA_OP_PTEPDE: {
DPRINTF(SDMAEngine, "SDMA PTEPDE packet\n");
@@ -1026,6 +1029,68 @@ SDMAEngine::atomicDone(SDMAQueue *q, sdmaAtomicHeader *header, sdmaAtomic *pkt,
decodeNext(q);
}
void
SDMAEngine::constFill(SDMAQueue *q, sdmaConstFill *pkt, uint32_t header)
{
q->incRptr(sizeof(sdmaConstFill));
sdmaConstFillHeader fill_header;
fill_header.ordinal = header;
DPRINTF(SDMAEngine, "ConstFill %lx srcData %x count %d size %d sw %d\n",
pkt->addr, pkt->srcData, pkt->count, fill_header.fillsize,
fill_header.sw);
// Count is number of <size> elements - 1. Size is log2 of byte size.
int fill_bytes = (pkt->count + 1) * (1 << fill_header.fillsize);
uint8_t *fill_data = new uint8_t[fill_bytes];
memset(fill_data, pkt->srcData, fill_bytes);
Addr device_addr = getDeviceAddress(pkt->addr);
if (device_addr) {
DPRINTF(SDMAEngine, "ConstFill %d bytes of %x to device at %lx\n",
fill_bytes, pkt->srcData, pkt->addr);
auto cb = new EventFunctionWrapper(
[ = ]{ constFillDone(q, pkt, fill_data); }, name());
// Copy the minimum page size at a time in case the physical addresses
// are not contiguous.
ChunkGenerator gen(pkt->addr, fill_bytes, AMDGPU_MMHUB_PAGE_SIZE);
for (; !gen.done(); gen.next()) {
Addr chunk_addr = getDeviceAddress(gen.addr());
assert(chunk_addr);
DPRINTF(SDMAEngine, "Copying chunk of %d bytes from %#lx (%#lx)\n",
gen.size(), gen.addr(), chunk_addr);
gpuDevice->getMemMgr()->writeRequest(chunk_addr, fill_data,
gen.size(), 0,
gen.last() ? cb : nullptr);
fill_data += gen.size();
}
} else {
DPRINTF(SDMAEngine, "ConstFill %d bytes of %x to host at %lx\n",
fill_bytes, pkt->srcData, pkt->addr);
auto cb = new DmaVirtCallback<uint64_t>(
[ = ] (const uint64_t &)
{ constFillDone(q, pkt, fill_data); });
dmaWriteVirt(pkt->addr, fill_bytes, cb, (void *)fill_data);
}
}
void
SDMAEngine::constFillDone(SDMAQueue *q, sdmaConstFill *pkt, uint8_t *fill_data)
{
DPRINTF(SDMAEngine, "ConstFill to %lx done\n", pkt->addr);
delete fill_data;
delete pkt;
decodeNext(q);
}
AddrRangeList
SDMAEngine::getAddrRanges() const
{

View File

@@ -245,6 +245,8 @@ class SDMAEngine : public DmaVirtDevice
uint64_t *dmaBuffer);
void atomicDone(SDMAQueue *q, sdmaAtomicHeader *header, sdmaAtomic *pkt,
uint64_t *dmaBuffer);
void constFill(SDMAQueue *q, sdmaConstFill *pkt, uint32_t header);
void constFillDone(SDMAQueue *q, sdmaConstFill *pkt, uint8_t *fill_data);
/**
* Methods for getting SDMA MMIO base address and size. These are set by

View File

@@ -37,7 +37,7 @@ namespace gem5
{
/**
* SDMA packets
* SDMA packets - see src/core/inc/sdma_registers.h in ROCR-Runtime
*/
typedef struct GEM5_PACKED
{
@@ -80,6 +80,23 @@ typedef struct GEM5_PACKED
} sdmaConstFill;
static_assert(sizeof(sdmaConstFill) == 16);
typedef struct GEM5_PACKED
{
union
{
struct
{
uint32_t op : 8;
uint32_t sub_op : 8;
uint32_t sw : 2;
uint32_t res0 : 12;
uint32_t fillsize : 2;
};
uint32_t ordinal;
};
} sdmaConstFillHeader;
static_assert(sizeof(sdmaConstFillHeader) == 4);
typedef struct GEM5_PACKED
{
uint32_t key0;