dev-amdgpu: Implement SDMA atomic packet

SDMA atomic packets are used in conjunction with RLC queues in SDMA for
synchronization similar to how HSA signals are used with BLIT kernels
when SDMA is disabled. Implement a skeleton of the SDMA atomic packet
methods as well as the atomic add64 operation.

The atomic add operation appears to be the only operation used in ROCm,
so this implementation is fairly complete. See:

https://github.com/RadeonOpenCompute/ROCR-Runtime/blob/
    rocm-4.2.x/src/core/runtime/amd_blit_sdma.cpp#L880

Change-Id: I62cc337f2ffe590bdb947b48053760ee8b3a6f32
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/63174
Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com>
Maintainer: Matt Sinclair <mattdsinclair@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
This commit is contained in:
Matthew Poremba
2022-09-05 10:24:00 -07:00
parent 9ea28bd782
commit 6c935657fd
3 changed files with 80 additions and 3 deletions

View File

@@ -429,9 +429,14 @@ SDMAEngine::decodeHeader(SDMAQueue *q, uint32_t header)
decodeNext(q);
} break;
case SDMA_OP_ATOMIC: {
q->incRptr(sizeof(sdmaAtomic));
warn("SDMA_OP_ATOMIC not implemented");
decodeNext(q);
DPRINTF(SDMAEngine, "SDMA Atomic packet\n");
dmaBuffer = new sdmaAtomic();
sdmaAtomicHeader *h = new sdmaAtomicHeader();
*h = *(sdmaAtomicHeader *)&header;
cb = new DmaVirtCallback<uint64_t>(
[ = ] (const uint64_t &)
{ atomic(q, h, (sdmaAtomic *)dmaBuffer); });
dmaReadVirt(q->rptr(), sizeof(sdmaAtomic), cb, dmaBuffer);
} break;
case SDMA_OP_CONST_FILL: {
q->incRptr(sizeof(sdmaConstFill));
@@ -861,6 +866,62 @@ SDMAEngine::ptePdeDone(SDMAQueue *q, sdmaPtePde *pkt, uint64_t *dmaBuffer)
decodeNext(q);
}
void
SDMAEngine::atomic(SDMAQueue *q, sdmaAtomicHeader *header, sdmaAtomic *pkt)
{
q->incRptr(sizeof(sdmaAtomic));
DPRINTF(SDMAEngine, "Atomic op %d on addr %#lx, src: %ld, cmp: %ld, loop?"
" %d loopInt: %d\n", header->opcode, pkt->addr, pkt->srcData,
pkt->cmpData, header->loop, pkt->loopInt);
// Read the data at pkt->addr
uint64_t *dmaBuffer = new uint64_t;
auto cb = new DmaVirtCallback<uint64_t>(
[ = ] (const uint64_t &)
{ atomicData(q, header, pkt, dmaBuffer); });
dmaReadVirt(pkt->addr, sizeof(uint64_t), cb, (void *)dmaBuffer);
}
void
SDMAEngine::atomicData(SDMAQueue *q, sdmaAtomicHeader *header, sdmaAtomic *pkt,
uint64_t *dmaBuffer)
{
DPRINTF(SDMAEngine, "Atomic op %d on addr %#lx got data %#lx\n",
header->opcode, pkt->addr, *dmaBuffer);
if (header->opcode == SDMA_ATOMIC_ADD64) {
// Atomic add with return -- dst = dst + src
int64_t dst_data = *dmaBuffer;
int64_t src_data = pkt->srcData;
DPRINTF(SDMAEngine, "Atomic ADD_RTN: %ld + %ld = %ld\n", dst_data,
src_data, dst_data + src_data);
// Reuse the dmaBuffer allocated
*dmaBuffer = dst_data + src_data;
auto cb = new DmaVirtCallback<uint64_t>(
[ = ] (const uint64_t &)
{ atomicDone(q, header, pkt, dmaBuffer); });
dmaWriteVirt(pkt->addr, sizeof(uint64_t), cb, (void *)dmaBuffer);
} else {
panic("Unsupported SDMA atomic opcode: %d\n", header->opcode);
}
}
void
SDMAEngine::atomicDone(SDMAQueue *q, sdmaAtomicHeader *header, sdmaAtomic *pkt,
uint64_t *dmaBuffer)
{
DPRINTF(SDMAEngine, "Atomic op %d op addr %#lx complete (sent %lx)\n",
header->opcode, pkt->addr, *dmaBuffer);
delete dmaBuffer;
delete header;
delete pkt;
decodeNext(q);
}
AddrRangeList
SDMAEngine::getAddrRanges() const
{

View File

@@ -218,6 +218,11 @@ class SDMAEngine : public DmaVirtDevice
bool pollRegMemFunc(uint32_t value, uint32_t reference, uint32_t func);
void ptePde(SDMAQueue *q, sdmaPtePde *pkt);
void ptePdeDone(SDMAQueue *q, sdmaPtePde *pkt, uint64_t *dmaBuffer);
void atomic(SDMAQueue *q, sdmaAtomicHeader *header, sdmaAtomic *pkt);
void atomicData(SDMAQueue *q, sdmaAtomicHeader *header, sdmaAtomic *pkt,
uint64_t *dmaBuffer);
void atomicDone(SDMAQueue *q, sdmaAtomicHeader *header, sdmaAtomic *pkt,
uint64_t *dmaBuffer);
/**
* Methods for getting the values of SDMA MMIO registers.

View File

@@ -283,6 +283,17 @@ typedef struct GEM5_PACKED
} sdmaAtomic;
static_assert(sizeof(sdmaAtomic) == 28);
typedef struct GEM5_PACKED
{
int unused2 : 16;
int loop : 1;
int unused1 : 8;
int opcode : 7;
} sdmaAtomicHeader;
static_assert(sizeof(sdmaAtomicHeader) == 4);
constexpr unsigned int SDMA_ATOMIC_ADD64 = 47;
typedef struct GEM5_PACKED
{
uint64_t dest;