diff --git a/src/dev/amdgpu/pm4_packet_processor.cc b/src/dev/amdgpu/pm4_packet_processor.cc index f78f8333a6..152fd4da73 100644 --- a/src/dev/amdgpu/pm4_packet_processor.cc +++ b/src/dev/amdgpu/pm4_packet_processor.cc @@ -458,9 +458,7 @@ PM4PacketProcessor::processSDMAMQD(PM4MapQueues *pkt, PM4Queue *q, Addr addr, SDMAEngine *sdma_eng = gpuDevice->getSDMAById(pkt->engineSel - 2); // Register RLC queue with SDMA - sdma_eng->registerRLCQueue(pkt->doorbellOffset << 2, - mqd->rb_base << 8, rlc_size, - rptr_wb_addr); + sdma_eng->registerRLCQueue(pkt->doorbellOffset << 2, addr, mqd); // Register doorbell with GPU device gpuDevice->setSDMAEngine(pkt->doorbellOffset << 2, sdma_eng); diff --git a/src/dev/amdgpu/pm4_queues.hh b/src/dev/amdgpu/pm4_queues.hh index 8b6626d176..ddadd6543b 100644 --- a/src/dev/amdgpu/pm4_queues.hh +++ b/src/dev/amdgpu/pm4_queues.hh @@ -33,6 +33,8 @@ #ifndef __DEV_AMDGPU_PM4_QUEUES_HH__ #define __DEV_AMDGPU_PM4_QUEUES_HH__ +#include "dev/amdgpu/pm4_defines.hh" + namespace gem5 { @@ -201,10 +203,24 @@ typedef struct GEM5_PACKED }; uint64_t rb_base; }; - uint32_t sdmax_rlcx_rb_rptr; - uint32_t sdmax_rlcx_rb_rptr_hi; - uint32_t sdmax_rlcx_rb_wptr; - uint32_t sdmax_rlcx_rb_wptr_hi; + union + { + struct + { + uint32_t sdmax_rlcx_rb_rptr; + uint32_t sdmax_rlcx_rb_rptr_hi; + }; + uint64_t rptr; + }; + union + { + struct + { + uint32_t sdmax_rlcx_rb_wptr; + uint32_t sdmax_rlcx_rb_wptr_hi; + }; + uint64_t wptr; + }; uint32_t sdmax_rlcx_rb_wptr_poll_cntl; uint32_t sdmax_rlcx_rb_rptr_addr_hi; uint32_t sdmax_rlcx_rb_rptr_addr_lo; diff --git a/src/dev/amdgpu/sdma_engine.cc b/src/dev/amdgpu/sdma_engine.cc index 02203c8178..4c03bf57b2 100644 --- a/src/dev/amdgpu/sdma_engine.cc +++ b/src/dev/amdgpu/sdma_engine.cc @@ -165,30 +165,40 @@ SDMAEngine::translate(Addr vaddr, Addr size) } void -SDMAEngine::registerRLCQueue(Addr doorbell, Addr rb_base, uint32_t size, - Addr rptr_wb_addr) +SDMAEngine::registerRLCQueue(Addr doorbell, Addr mqdAddr, SDMAQueueDesc *mqd) { + uint32_t rlc_size = 4UL << bits(mqd->sdmax_rlcx_rb_cntl, 6, 1); + Addr rptr_wb_addr = mqd->sdmax_rlcx_rb_rptr_addr_hi; + rptr_wb_addr <<= 32; + rptr_wb_addr |= mqd->sdmax_rlcx_rb_rptr_addr_lo; + // Get first free RLC if (!rlc0.valid()) { DPRINTF(SDMAEngine, "Doorbell %lx mapped to RLC0\n", doorbell); rlcInfo[0] = doorbell; rlc0.valid(true); - rlc0.base(rb_base); + rlc0.base(mqd->rb_base << 8); + rlc0.size(rlc_size); rlc0.rptr(0); - rlc0.wptr(0); + rlc0.incRptr(mqd->rptr); + rlc0.setWptr(mqd->wptr); rlc0.rptrWbAddr(rptr_wb_addr); rlc0.processing(false); - rlc0.size(size); + rlc0.setMQD(mqd); + rlc0.setMQDAddr(mqdAddr); } else if (!rlc1.valid()) { DPRINTF(SDMAEngine, "Doorbell %lx mapped to RLC1\n", doorbell); rlcInfo[1] = doorbell; rlc1.valid(true); - rlc1.base(rb_base); + rlc1.base(mqd->rb_base << 8); + rlc1.size(rlc_size); rlc1.rptr(0); - rlc1.wptr(0); + rlc1.incRptr(mqd->rptr); + rlc1.setWptr(mqd->wptr); rlc1.rptrWbAddr(rptr_wb_addr); rlc1.processing(false); - rlc1.size(size); + rlc1.setMQD(mqd); + rlc1.setMQDAddr(mqdAddr); } else { panic("No free RLCs. Check they are properly unmapped."); } @@ -199,9 +209,37 @@ SDMAEngine::unregisterRLCQueue(Addr doorbell) { DPRINTF(SDMAEngine, "Unregistering RLC queue at %#lx\n", doorbell); if (rlcInfo[0] == doorbell) { + SDMAQueueDesc *mqd = rlc0.getMQD(); + if (mqd) { + DPRINTF(SDMAEngine, "Writing RLC0 SDMAMQD back to %#lx\n", + rlc0.getMQDAddr()); + + mqd->rptr = rlc0.globalRptr(); + mqd->wptr = rlc0.getWptr(); + + auto cb = new DmaVirtCallback( + [ = ] (const uint32_t &) { }); + dmaWriteVirt(rlc0.getMQDAddr(), sizeof(SDMAQueueDesc), cb, mqd); + } else { + warn("RLC0 SDMAMQD address invalid\n"); + } rlc0.valid(false); rlcInfo[0] = 0; } else if (rlcInfo[1] == doorbell) { + SDMAQueueDesc *mqd = rlc1.getMQD(); + if (mqd) { + DPRINTF(SDMAEngine, "Writing RLC1 SDMAMQD back to %#lx\n", + rlc1.getMQDAddr()); + + mqd->rptr = rlc1.globalRptr(); + mqd->wptr = rlc1.getWptr(); + + auto cb = new DmaVirtCallback( + [ = ] (const uint32_t &) { }); + dmaWriteVirt(rlc1.getMQDAddr(), sizeof(SDMAQueueDesc), cb, mqd); + } else { + warn("RLC1 SDMAMQD address invalid\n"); + } rlc1.valid(false); rlcInfo[1] = 0; } else { @@ -213,7 +251,9 @@ void SDMAEngine::deallocateRLCQueues() { for (auto doorbell: rlcInfo) { - unregisterRLCQueue(doorbell); + if (doorbell) { + unregisterRLCQueue(doorbell); + } } } diff --git a/src/dev/amdgpu/sdma_engine.hh b/src/dev/amdgpu/sdma_engine.hh index 0bfee126c9..27c169193b 100644 --- a/src/dev/amdgpu/sdma_engine.hh +++ b/src/dev/amdgpu/sdma_engine.hh @@ -34,6 +34,7 @@ #include "base/bitunion.hh" #include "dev/amdgpu/amdgpu_device.hh" +#include "dev/amdgpu/pm4_queues.hh" #include "dev/amdgpu/sdma_packets.hh" #include "dev/dma_virt_device.hh" #include "params/SDMAEngine.hh" @@ -65,9 +66,11 @@ class SDMAEngine : public DmaVirtDevice SDMAQueue *_parent; SDMAQueue *_ib; SDMAType _type; + SDMAQueueDesc *_mqd; + Addr _mqd_addr = 0; public: SDMAQueue() : _rptr(0), _wptr(0), _valid(false), _processing(false), - _parent(nullptr), _ib(nullptr), _type(SDMAGfx) {} + _parent(nullptr), _ib(nullptr), _type(SDMAGfx), _mqd(nullptr) {} Addr base() { return _base; } Addr rptr() { return _base + _rptr; } @@ -82,6 +85,8 @@ class SDMAEngine : public DmaVirtDevice SDMAQueue* parent() { return _parent; } SDMAQueue* ib() { return _ib; } SDMAType queueType() { return _type; } + SDMAQueueDesc* getMQD() { return _mqd; } + Addr getMQDAddr() { return _mqd_addr; } void base(Addr value) { _base = value; } @@ -114,6 +119,8 @@ class SDMAEngine : public DmaVirtDevice void parent(SDMAQueue* q) { _parent = q; } void ib(SDMAQueue* ib) { _ib = ib; } void queueType(SDMAType type) { _type = type; } + void setMQD(SDMAQueueDesc *mqd) { _mqd = mqd; } + void setMQDAddr(Addr mqdAddr) { _mqd_addr = mqdAddr; } }; /* SDMA Engine ID */ @@ -280,8 +287,7 @@ class SDMAEngine : public DmaVirtDevice /** * Methods for RLC queues */ - void registerRLCQueue(Addr doorbell, Addr rb_base, uint32_t size, - Addr rptr_wb_addr); + void registerRLCQueue(Addr doorbell, Addr mqdAddr, SDMAQueueDesc *mqd); void unregisterRLCQueue(Addr doorbell); void deallocateRLCQueues();