diff --git a/src/dev/amdgpu/sdma_engine.cc b/src/dev/amdgpu/sdma_engine.cc index e99d694634..0202f583e6 100644 --- a/src/dev/amdgpu/sdma_engine.cc +++ b/src/dev/amdgpu/sdma_engine.cc @@ -510,9 +510,12 @@ SDMAEngine::decodeHeader(SDMAQueue *q, uint32_t header) dmaReadVirt(q->rptr(), sizeof(sdmaAtomic), cb, dmaBuffer); } break; case SDMA_OP_CONST_FILL: { - q->incRptr(sizeof(sdmaConstFill)); - warn("SDMA_OP_CONST_FILL not implemented"); - decodeNext(q); + DPRINTF(SDMAEngine, "SDMA Constant fill packet\n"); + dmaBuffer = new sdmaConstFill(); + cb = new DmaVirtCallback( + [ = ] (const uint64_t &) + { constFill(q, (sdmaConstFill *)dmaBuffer, header); }); + dmaReadVirt(q->rptr(), sizeof(sdmaConstFill), cb, dmaBuffer); } break; case SDMA_OP_PTEPDE: { DPRINTF(SDMAEngine, "SDMA PTEPDE packet\n"); @@ -1026,6 +1029,68 @@ SDMAEngine::atomicDone(SDMAQueue *q, sdmaAtomicHeader *header, sdmaAtomic *pkt, decodeNext(q); } +void +SDMAEngine::constFill(SDMAQueue *q, sdmaConstFill *pkt, uint32_t header) +{ + q->incRptr(sizeof(sdmaConstFill)); + + sdmaConstFillHeader fill_header; + fill_header.ordinal = header; + + DPRINTF(SDMAEngine, "ConstFill %lx srcData %x count %d size %d sw %d\n", + pkt->addr, pkt->srcData, pkt->count, fill_header.fillsize, + fill_header.sw); + + // Count is number of elements - 1. Size is log2 of byte size. + int fill_bytes = (pkt->count + 1) * (1 << fill_header.fillsize); + uint8_t *fill_data = new uint8_t[fill_bytes]; + + memset(fill_data, pkt->srcData, fill_bytes); + + Addr device_addr = getDeviceAddress(pkt->addr); + if (device_addr) { + DPRINTF(SDMAEngine, "ConstFill %d bytes of %x to device at %lx\n", + fill_bytes, pkt->srcData, pkt->addr); + + auto cb = new EventFunctionWrapper( + [ = ]{ constFillDone(q, pkt, fill_data); }, name()); + + // Copy the minimum page size at a time in case the physical addresses + // are not contiguous. + ChunkGenerator gen(pkt->addr, fill_bytes, AMDGPU_MMHUB_PAGE_SIZE); + for (; !gen.done(); gen.next()) { + Addr chunk_addr = getDeviceAddress(gen.addr()); + assert(chunk_addr); + + DPRINTF(SDMAEngine, "Copying chunk of %d bytes from %#lx (%#lx)\n", + gen.size(), gen.addr(), chunk_addr); + + gpuDevice->getMemMgr()->writeRequest(chunk_addr, fill_data, + gen.size(), 0, + gen.last() ? cb : nullptr); + fill_data += gen.size(); + } + } else { + DPRINTF(SDMAEngine, "ConstFill %d bytes of %x to host at %lx\n", + fill_bytes, pkt->srcData, pkt->addr); + + auto cb = new DmaVirtCallback( + [ = ] (const uint64_t &) + { constFillDone(q, pkt, fill_data); }); + dmaWriteVirt(pkt->addr, fill_bytes, cb, (void *)fill_data); + } +} + +void +SDMAEngine::constFillDone(SDMAQueue *q, sdmaConstFill *pkt, uint8_t *fill_data) +{ + DPRINTF(SDMAEngine, "ConstFill to %lx done\n", pkt->addr); + + delete fill_data; + delete pkt; + decodeNext(q); +} + AddrRangeList SDMAEngine::getAddrRanges() const { diff --git a/src/dev/amdgpu/sdma_engine.hh b/src/dev/amdgpu/sdma_engine.hh index bcbd497e8a..5abe63fcc6 100644 --- a/src/dev/amdgpu/sdma_engine.hh +++ b/src/dev/amdgpu/sdma_engine.hh @@ -245,6 +245,8 @@ class SDMAEngine : public DmaVirtDevice uint64_t *dmaBuffer); void atomicDone(SDMAQueue *q, sdmaAtomicHeader *header, sdmaAtomic *pkt, uint64_t *dmaBuffer); + void constFill(SDMAQueue *q, sdmaConstFill *pkt, uint32_t header); + void constFillDone(SDMAQueue *q, sdmaConstFill *pkt, uint8_t *fill_data); /** * Methods for getting SDMA MMIO base address and size. These are set by diff --git a/src/dev/amdgpu/sdma_packets.hh b/src/dev/amdgpu/sdma_packets.hh index 52a47d3a2d..07d3f12600 100644 --- a/src/dev/amdgpu/sdma_packets.hh +++ b/src/dev/amdgpu/sdma_packets.hh @@ -37,7 +37,7 @@ namespace gem5 { /** - * SDMA packets + * SDMA packets - see src/core/inc/sdma_registers.h in ROCR-Runtime */ typedef struct GEM5_PACKED { @@ -80,6 +80,23 @@ typedef struct GEM5_PACKED } sdmaConstFill; static_assert(sizeof(sdmaConstFill) == 16); +typedef struct GEM5_PACKED +{ + union + { + struct + { + uint32_t op : 8; + uint32_t sub_op : 8; + uint32_t sw : 2; + uint32_t res0 : 12; + uint32_t fillsize : 2; + }; + uint32_t ordinal; + }; +} sdmaConstFillHeader; +static_assert(sizeof(sdmaConstFillHeader) == 4); + typedef struct GEM5_PACKED { uint32_t key0;