dev-amdgpu: Writeback RLC queue MQD when unmapped
Currently when RLC queues (user mode queues) are mapped, the read/write pointers of the ring buffer are set to zero. However, these queues could be unmapped and then remapped later. In that situation the read/write pointers should be the previous value before unmapping occurred. Since the read pointer gets reset to zero, the queue begins reading from the start of the ring, which usually contains older packets. There is a 99% chance those packets contain addresses which are no longer in the page tables which will cause a page fault. To fix this we update the MQD with the current read/write pointer values and then writeback the MQD to memory when the queue is unmapped. This requires adding a pointer to the MQD and the host address of the MQD where it should be written back to. The interface for registering RLC queue is also simplified. Since we need to pass the MQD anyway, we can get values from it as well. Fixes b+tree and streamcluster from rodinia (when using RLC queues). Change-Id: Ie5dad4d7d90ea240c3e9f0cddf3e844a3cd34c4f Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/65791 Tested-by: kokoro <noreply+kokoro@google.com> Maintainer: Matt Sinclair <mattdsinclair@gmail.com> Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com>
This commit is contained in:
@@ -458,9 +458,7 @@ PM4PacketProcessor::processSDMAMQD(PM4MapQueues *pkt, PM4Queue *q, Addr addr,
|
||||
SDMAEngine *sdma_eng = gpuDevice->getSDMAById(pkt->engineSel - 2);
|
||||
|
||||
// Register RLC queue with SDMA
|
||||
sdma_eng->registerRLCQueue(pkt->doorbellOffset << 2,
|
||||
mqd->rb_base << 8, rlc_size,
|
||||
rptr_wb_addr);
|
||||
sdma_eng->registerRLCQueue(pkt->doorbellOffset << 2, addr, mqd);
|
||||
|
||||
// Register doorbell with GPU device
|
||||
gpuDevice->setSDMAEngine(pkt->doorbellOffset << 2, sdma_eng);
|
||||
|
||||
@@ -33,6 +33,8 @@
|
||||
#ifndef __DEV_AMDGPU_PM4_QUEUES_HH__
|
||||
#define __DEV_AMDGPU_PM4_QUEUES_HH__
|
||||
|
||||
#include "dev/amdgpu/pm4_defines.hh"
|
||||
|
||||
namespace gem5
|
||||
{
|
||||
|
||||
@@ -201,10 +203,24 @@ typedef struct GEM5_PACKED
|
||||
};
|
||||
uint64_t rb_base;
|
||||
};
|
||||
uint32_t sdmax_rlcx_rb_rptr;
|
||||
uint32_t sdmax_rlcx_rb_rptr_hi;
|
||||
uint32_t sdmax_rlcx_rb_wptr;
|
||||
uint32_t sdmax_rlcx_rb_wptr_hi;
|
||||
union
|
||||
{
|
||||
struct
|
||||
{
|
||||
uint32_t sdmax_rlcx_rb_rptr;
|
||||
uint32_t sdmax_rlcx_rb_rptr_hi;
|
||||
};
|
||||
uint64_t rptr;
|
||||
};
|
||||
union
|
||||
{
|
||||
struct
|
||||
{
|
||||
uint32_t sdmax_rlcx_rb_wptr;
|
||||
uint32_t sdmax_rlcx_rb_wptr_hi;
|
||||
};
|
||||
uint64_t wptr;
|
||||
};
|
||||
uint32_t sdmax_rlcx_rb_wptr_poll_cntl;
|
||||
uint32_t sdmax_rlcx_rb_rptr_addr_hi;
|
||||
uint32_t sdmax_rlcx_rb_rptr_addr_lo;
|
||||
|
||||
@@ -165,30 +165,40 @@ SDMAEngine::translate(Addr vaddr, Addr size)
|
||||
}
|
||||
|
||||
void
|
||||
SDMAEngine::registerRLCQueue(Addr doorbell, Addr rb_base, uint32_t size,
|
||||
Addr rptr_wb_addr)
|
||||
SDMAEngine::registerRLCQueue(Addr doorbell, Addr mqdAddr, SDMAQueueDesc *mqd)
|
||||
{
|
||||
uint32_t rlc_size = 4UL << bits(mqd->sdmax_rlcx_rb_cntl, 6, 1);
|
||||
Addr rptr_wb_addr = mqd->sdmax_rlcx_rb_rptr_addr_hi;
|
||||
rptr_wb_addr <<= 32;
|
||||
rptr_wb_addr |= mqd->sdmax_rlcx_rb_rptr_addr_lo;
|
||||
|
||||
// Get first free RLC
|
||||
if (!rlc0.valid()) {
|
||||
DPRINTF(SDMAEngine, "Doorbell %lx mapped to RLC0\n", doorbell);
|
||||
rlcInfo[0] = doorbell;
|
||||
rlc0.valid(true);
|
||||
rlc0.base(rb_base);
|
||||
rlc0.base(mqd->rb_base << 8);
|
||||
rlc0.size(rlc_size);
|
||||
rlc0.rptr(0);
|
||||
rlc0.wptr(0);
|
||||
rlc0.incRptr(mqd->rptr);
|
||||
rlc0.setWptr(mqd->wptr);
|
||||
rlc0.rptrWbAddr(rptr_wb_addr);
|
||||
rlc0.processing(false);
|
||||
rlc0.size(size);
|
||||
rlc0.setMQD(mqd);
|
||||
rlc0.setMQDAddr(mqdAddr);
|
||||
} else if (!rlc1.valid()) {
|
||||
DPRINTF(SDMAEngine, "Doorbell %lx mapped to RLC1\n", doorbell);
|
||||
rlcInfo[1] = doorbell;
|
||||
rlc1.valid(true);
|
||||
rlc1.base(rb_base);
|
||||
rlc1.base(mqd->rb_base << 8);
|
||||
rlc1.size(rlc_size);
|
||||
rlc1.rptr(0);
|
||||
rlc1.wptr(0);
|
||||
rlc1.incRptr(mqd->rptr);
|
||||
rlc1.setWptr(mqd->wptr);
|
||||
rlc1.rptrWbAddr(rptr_wb_addr);
|
||||
rlc1.processing(false);
|
||||
rlc1.size(size);
|
||||
rlc1.setMQD(mqd);
|
||||
rlc1.setMQDAddr(mqdAddr);
|
||||
} else {
|
||||
panic("No free RLCs. Check they are properly unmapped.");
|
||||
}
|
||||
@@ -199,9 +209,37 @@ SDMAEngine::unregisterRLCQueue(Addr doorbell)
|
||||
{
|
||||
DPRINTF(SDMAEngine, "Unregistering RLC queue at %#lx\n", doorbell);
|
||||
if (rlcInfo[0] == doorbell) {
|
||||
SDMAQueueDesc *mqd = rlc0.getMQD();
|
||||
if (mqd) {
|
||||
DPRINTF(SDMAEngine, "Writing RLC0 SDMAMQD back to %#lx\n",
|
||||
rlc0.getMQDAddr());
|
||||
|
||||
mqd->rptr = rlc0.globalRptr();
|
||||
mqd->wptr = rlc0.getWptr();
|
||||
|
||||
auto cb = new DmaVirtCallback<uint32_t>(
|
||||
[ = ] (const uint32_t &) { });
|
||||
dmaWriteVirt(rlc0.getMQDAddr(), sizeof(SDMAQueueDesc), cb, mqd);
|
||||
} else {
|
||||
warn("RLC0 SDMAMQD address invalid\n");
|
||||
}
|
||||
rlc0.valid(false);
|
||||
rlcInfo[0] = 0;
|
||||
} else if (rlcInfo[1] == doorbell) {
|
||||
SDMAQueueDesc *mqd = rlc1.getMQD();
|
||||
if (mqd) {
|
||||
DPRINTF(SDMAEngine, "Writing RLC1 SDMAMQD back to %#lx\n",
|
||||
rlc1.getMQDAddr());
|
||||
|
||||
mqd->rptr = rlc1.globalRptr();
|
||||
mqd->wptr = rlc1.getWptr();
|
||||
|
||||
auto cb = new DmaVirtCallback<uint32_t>(
|
||||
[ = ] (const uint32_t &) { });
|
||||
dmaWriteVirt(rlc1.getMQDAddr(), sizeof(SDMAQueueDesc), cb, mqd);
|
||||
} else {
|
||||
warn("RLC1 SDMAMQD address invalid\n");
|
||||
}
|
||||
rlc1.valid(false);
|
||||
rlcInfo[1] = 0;
|
||||
} else {
|
||||
@@ -213,7 +251,9 @@ void
|
||||
SDMAEngine::deallocateRLCQueues()
|
||||
{
|
||||
for (auto doorbell: rlcInfo) {
|
||||
unregisterRLCQueue(doorbell);
|
||||
if (doorbell) {
|
||||
unregisterRLCQueue(doorbell);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -34,6 +34,7 @@
|
||||
|
||||
#include "base/bitunion.hh"
|
||||
#include "dev/amdgpu/amdgpu_device.hh"
|
||||
#include "dev/amdgpu/pm4_queues.hh"
|
||||
#include "dev/amdgpu/sdma_packets.hh"
|
||||
#include "dev/dma_virt_device.hh"
|
||||
#include "params/SDMAEngine.hh"
|
||||
@@ -65,9 +66,11 @@ class SDMAEngine : public DmaVirtDevice
|
||||
SDMAQueue *_parent;
|
||||
SDMAQueue *_ib;
|
||||
SDMAType _type;
|
||||
SDMAQueueDesc *_mqd;
|
||||
Addr _mqd_addr = 0;
|
||||
public:
|
||||
SDMAQueue() : _rptr(0), _wptr(0), _valid(false), _processing(false),
|
||||
_parent(nullptr), _ib(nullptr), _type(SDMAGfx) {}
|
||||
_parent(nullptr), _ib(nullptr), _type(SDMAGfx), _mqd(nullptr) {}
|
||||
|
||||
Addr base() { return _base; }
|
||||
Addr rptr() { return _base + _rptr; }
|
||||
@@ -82,6 +85,8 @@ class SDMAEngine : public DmaVirtDevice
|
||||
SDMAQueue* parent() { return _parent; }
|
||||
SDMAQueue* ib() { return _ib; }
|
||||
SDMAType queueType() { return _type; }
|
||||
SDMAQueueDesc* getMQD() { return _mqd; }
|
||||
Addr getMQDAddr() { return _mqd_addr; }
|
||||
|
||||
void base(Addr value) { _base = value; }
|
||||
|
||||
@@ -114,6 +119,8 @@ class SDMAEngine : public DmaVirtDevice
|
||||
void parent(SDMAQueue* q) { _parent = q; }
|
||||
void ib(SDMAQueue* ib) { _ib = ib; }
|
||||
void queueType(SDMAType type) { _type = type; }
|
||||
void setMQD(SDMAQueueDesc *mqd) { _mqd = mqd; }
|
||||
void setMQDAddr(Addr mqdAddr) { _mqd_addr = mqdAddr; }
|
||||
};
|
||||
|
||||
/* SDMA Engine ID */
|
||||
@@ -280,8 +287,7 @@ class SDMAEngine : public DmaVirtDevice
|
||||
/**
|
||||
* Methods for RLC queues
|
||||
*/
|
||||
void registerRLCQueue(Addr doorbell, Addr rb_base, uint32_t size,
|
||||
Addr rptr_wb_addr);
|
||||
void registerRLCQueue(Addr doorbell, Addr mqdAddr, SDMAQueueDesc *mqd);
|
||||
void unregisterRLCQueue(Addr doorbell);
|
||||
void deallocateRLCQueues();
|
||||
|
||||
|
||||
Reference in New Issue
Block a user