dev-amdgpu: Implement SDMA constant fill
This SDMA packet is much more common starting around ROCm 5.4. Previously this was mostly used to clear page tables after an application ended and was therefore left unimplemented. It is now used for basic operation like device memsets. This patch implements constant fill as it is now necessary. Change-Id: I9b2cf076ec17f5ed07c20bb820e7db0c082bbfbc
This commit is contained in:
@@ -510,9 +510,12 @@ SDMAEngine::decodeHeader(SDMAQueue *q, uint32_t header)
|
||||
dmaReadVirt(q->rptr(), sizeof(sdmaAtomic), cb, dmaBuffer);
|
||||
} break;
|
||||
case SDMA_OP_CONST_FILL: {
|
||||
q->incRptr(sizeof(sdmaConstFill));
|
||||
warn("SDMA_OP_CONST_FILL not implemented");
|
||||
decodeNext(q);
|
||||
DPRINTF(SDMAEngine, "SDMA Constant fill packet\n");
|
||||
dmaBuffer = new sdmaConstFill();
|
||||
cb = new DmaVirtCallback<uint64_t>(
|
||||
[ = ] (const uint64_t &)
|
||||
{ constFill(q, (sdmaConstFill *)dmaBuffer, header); });
|
||||
dmaReadVirt(q->rptr(), sizeof(sdmaConstFill), cb, dmaBuffer);
|
||||
} break;
|
||||
case SDMA_OP_PTEPDE: {
|
||||
DPRINTF(SDMAEngine, "SDMA PTEPDE packet\n");
|
||||
@@ -1026,6 +1029,68 @@ SDMAEngine::atomicDone(SDMAQueue *q, sdmaAtomicHeader *header, sdmaAtomic *pkt,
|
||||
decodeNext(q);
|
||||
}
|
||||
|
||||
void
|
||||
SDMAEngine::constFill(SDMAQueue *q, sdmaConstFill *pkt, uint32_t header)
|
||||
{
|
||||
q->incRptr(sizeof(sdmaConstFill));
|
||||
|
||||
sdmaConstFillHeader fill_header;
|
||||
fill_header.ordinal = header;
|
||||
|
||||
DPRINTF(SDMAEngine, "ConstFill %lx srcData %x count %d size %d sw %d\n",
|
||||
pkt->addr, pkt->srcData, pkt->count, fill_header.fillsize,
|
||||
fill_header.sw);
|
||||
|
||||
// Count is number of <size> elements - 1. Size is log2 of byte size.
|
||||
int fill_bytes = (pkt->count + 1) * (1 << fill_header.fillsize);
|
||||
uint8_t *fill_data = new uint8_t[fill_bytes];
|
||||
|
||||
memset(fill_data, pkt->srcData, fill_bytes);
|
||||
|
||||
Addr device_addr = getDeviceAddress(pkt->addr);
|
||||
if (device_addr) {
|
||||
DPRINTF(SDMAEngine, "ConstFill %d bytes of %x to device at %lx\n",
|
||||
fill_bytes, pkt->srcData, pkt->addr);
|
||||
|
||||
auto cb = new EventFunctionWrapper(
|
||||
[ = ]{ constFillDone(q, pkt, fill_data); }, name());
|
||||
|
||||
// Copy the minimum page size at a time in case the physical addresses
|
||||
// are not contiguous.
|
||||
ChunkGenerator gen(pkt->addr, fill_bytes, AMDGPU_MMHUB_PAGE_SIZE);
|
||||
for (; !gen.done(); gen.next()) {
|
||||
Addr chunk_addr = getDeviceAddress(gen.addr());
|
||||
assert(chunk_addr);
|
||||
|
||||
DPRINTF(SDMAEngine, "Copying chunk of %d bytes from %#lx (%#lx)\n",
|
||||
gen.size(), gen.addr(), chunk_addr);
|
||||
|
||||
gpuDevice->getMemMgr()->writeRequest(chunk_addr, fill_data,
|
||||
gen.size(), 0,
|
||||
gen.last() ? cb : nullptr);
|
||||
fill_data += gen.size();
|
||||
}
|
||||
} else {
|
||||
DPRINTF(SDMAEngine, "ConstFill %d bytes of %x to host at %lx\n",
|
||||
fill_bytes, pkt->srcData, pkt->addr);
|
||||
|
||||
auto cb = new DmaVirtCallback<uint64_t>(
|
||||
[ = ] (const uint64_t &)
|
||||
{ constFillDone(q, pkt, fill_data); });
|
||||
dmaWriteVirt(pkt->addr, fill_bytes, cb, (void *)fill_data);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
SDMAEngine::constFillDone(SDMAQueue *q, sdmaConstFill *pkt, uint8_t *fill_data)
|
||||
{
|
||||
DPRINTF(SDMAEngine, "ConstFill to %lx done\n", pkt->addr);
|
||||
|
||||
delete fill_data;
|
||||
delete pkt;
|
||||
decodeNext(q);
|
||||
}
|
||||
|
||||
AddrRangeList
|
||||
SDMAEngine::getAddrRanges() const
|
||||
{
|
||||
|
||||
@@ -245,6 +245,8 @@ class SDMAEngine : public DmaVirtDevice
|
||||
uint64_t *dmaBuffer);
|
||||
void atomicDone(SDMAQueue *q, sdmaAtomicHeader *header, sdmaAtomic *pkt,
|
||||
uint64_t *dmaBuffer);
|
||||
void constFill(SDMAQueue *q, sdmaConstFill *pkt, uint32_t header);
|
||||
void constFillDone(SDMAQueue *q, sdmaConstFill *pkt, uint8_t *fill_data);
|
||||
|
||||
/**
|
||||
* Methods for getting SDMA MMIO base address and size. These are set by
|
||||
|
||||
@@ -37,7 +37,7 @@ namespace gem5
|
||||
{
|
||||
|
||||
/**
|
||||
* SDMA packets
|
||||
* SDMA packets - see src/core/inc/sdma_registers.h in ROCR-Runtime
|
||||
*/
|
||||
typedef struct GEM5_PACKED
|
||||
{
|
||||
@@ -80,6 +80,23 @@ typedef struct GEM5_PACKED
|
||||
} sdmaConstFill;
|
||||
static_assert(sizeof(sdmaConstFill) == 16);
|
||||
|
||||
typedef struct GEM5_PACKED
|
||||
{
|
||||
union
|
||||
{
|
||||
struct
|
||||
{
|
||||
uint32_t op : 8;
|
||||
uint32_t sub_op : 8;
|
||||
uint32_t sw : 2;
|
||||
uint32_t res0 : 12;
|
||||
uint32_t fillsize : 2;
|
||||
};
|
||||
uint32_t ordinal;
|
||||
};
|
||||
} sdmaConstFillHeader;
|
||||
static_assert(sizeof(sdmaConstFillHeader) == 4);
|
||||
|
||||
typedef struct GEM5_PACKED
|
||||
{
|
||||
uint32_t key0;
|
||||
|
||||
Reference in New Issue
Block a user