dev-amdgpu: Writeback RLC queue MQD when unmapped

Currently when RLC queues (user mode queues) are mapped, the read/write
pointers of the ring buffer are set to zero. However, these queues could
be unmapped and then remapped later. In that situation the read/write
pointers should be the previous value before unmapping occurred. Since
the read pointer gets reset to zero, the queue begins reading from the
start of the ring, which usually contains older packets. There is a 99%
chance those packets contain addresses which are no longer in the page
tables which will cause a page fault.

To fix this we update the MQD with the current read/write pointer values
and then writeback the MQD to memory when the queue is unmapped. This
requires adding a pointer to the MQD and the host address of the MQD
where it should be written back to. The interface for registering RLC
queue is also simplified. Since we need to pass the MQD anyway, we can
get values from it as well.

Fixes b+tree and streamcluster from rodinia (when using RLC queues).

Change-Id: Ie5dad4d7d90ea240c3e9f0cddf3e844a3cd34c4f
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/65791
Tested-by: kokoro <noreply+kokoro@google.com>
Maintainer: Matt Sinclair <mattdsinclair@gmail.com>
Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com>
This commit is contained in:
Matthew Poremba
2022-11-18 16:47:50 -08:00
parent c0d67cba3a
commit eee42275ee
4 changed files with 79 additions and 19 deletions

View File

@@ -458,9 +458,7 @@ PM4PacketProcessor::processSDMAMQD(PM4MapQueues *pkt, PM4Queue *q, Addr addr,
SDMAEngine *sdma_eng = gpuDevice->getSDMAById(pkt->engineSel - 2);
// Register RLC queue with SDMA
sdma_eng->registerRLCQueue(pkt->doorbellOffset << 2,
mqd->rb_base << 8, rlc_size,
rptr_wb_addr);
sdma_eng->registerRLCQueue(pkt->doorbellOffset << 2, addr, mqd);
// Register doorbell with GPU device
gpuDevice->setSDMAEngine(pkt->doorbellOffset << 2, sdma_eng);

View File

@@ -33,6 +33,8 @@
#ifndef __DEV_AMDGPU_PM4_QUEUES_HH__
#define __DEV_AMDGPU_PM4_QUEUES_HH__
#include "dev/amdgpu/pm4_defines.hh"
namespace gem5
{
@@ -201,10 +203,24 @@ typedef struct GEM5_PACKED
};
uint64_t rb_base;
};
uint32_t sdmax_rlcx_rb_rptr;
uint32_t sdmax_rlcx_rb_rptr_hi;
uint32_t sdmax_rlcx_rb_wptr;
uint32_t sdmax_rlcx_rb_wptr_hi;
union
{
struct
{
uint32_t sdmax_rlcx_rb_rptr;
uint32_t sdmax_rlcx_rb_rptr_hi;
};
uint64_t rptr;
};
union
{
struct
{
uint32_t sdmax_rlcx_rb_wptr;
uint32_t sdmax_rlcx_rb_wptr_hi;
};
uint64_t wptr;
};
uint32_t sdmax_rlcx_rb_wptr_poll_cntl;
uint32_t sdmax_rlcx_rb_rptr_addr_hi;
uint32_t sdmax_rlcx_rb_rptr_addr_lo;

View File

@@ -165,30 +165,40 @@ SDMAEngine::translate(Addr vaddr, Addr size)
}
void
SDMAEngine::registerRLCQueue(Addr doorbell, Addr rb_base, uint32_t size,
Addr rptr_wb_addr)
SDMAEngine::registerRLCQueue(Addr doorbell, Addr mqdAddr, SDMAQueueDesc *mqd)
{
uint32_t rlc_size = 4UL << bits(mqd->sdmax_rlcx_rb_cntl, 6, 1);
Addr rptr_wb_addr = mqd->sdmax_rlcx_rb_rptr_addr_hi;
rptr_wb_addr <<= 32;
rptr_wb_addr |= mqd->sdmax_rlcx_rb_rptr_addr_lo;
// Get first free RLC
if (!rlc0.valid()) {
DPRINTF(SDMAEngine, "Doorbell %lx mapped to RLC0\n", doorbell);
rlcInfo[0] = doorbell;
rlc0.valid(true);
rlc0.base(rb_base);
rlc0.base(mqd->rb_base << 8);
rlc0.size(rlc_size);
rlc0.rptr(0);
rlc0.wptr(0);
rlc0.incRptr(mqd->rptr);
rlc0.setWptr(mqd->wptr);
rlc0.rptrWbAddr(rptr_wb_addr);
rlc0.processing(false);
rlc0.size(size);
rlc0.setMQD(mqd);
rlc0.setMQDAddr(mqdAddr);
} else if (!rlc1.valid()) {
DPRINTF(SDMAEngine, "Doorbell %lx mapped to RLC1\n", doorbell);
rlcInfo[1] = doorbell;
rlc1.valid(true);
rlc1.base(rb_base);
rlc1.base(mqd->rb_base << 8);
rlc1.size(rlc_size);
rlc1.rptr(0);
rlc1.wptr(0);
rlc1.incRptr(mqd->rptr);
rlc1.setWptr(mqd->wptr);
rlc1.rptrWbAddr(rptr_wb_addr);
rlc1.processing(false);
rlc1.size(size);
rlc1.setMQD(mqd);
rlc1.setMQDAddr(mqdAddr);
} else {
panic("No free RLCs. Check they are properly unmapped.");
}
@@ -199,9 +209,37 @@ SDMAEngine::unregisterRLCQueue(Addr doorbell)
{
DPRINTF(SDMAEngine, "Unregistering RLC queue at %#lx\n", doorbell);
if (rlcInfo[0] == doorbell) {
SDMAQueueDesc *mqd = rlc0.getMQD();
if (mqd) {
DPRINTF(SDMAEngine, "Writing RLC0 SDMAMQD back to %#lx\n",
rlc0.getMQDAddr());
mqd->rptr = rlc0.globalRptr();
mqd->wptr = rlc0.getWptr();
auto cb = new DmaVirtCallback<uint32_t>(
[ = ] (const uint32_t &) { });
dmaWriteVirt(rlc0.getMQDAddr(), sizeof(SDMAQueueDesc), cb, mqd);
} else {
warn("RLC0 SDMAMQD address invalid\n");
}
rlc0.valid(false);
rlcInfo[0] = 0;
} else if (rlcInfo[1] == doorbell) {
SDMAQueueDesc *mqd = rlc1.getMQD();
if (mqd) {
DPRINTF(SDMAEngine, "Writing RLC1 SDMAMQD back to %#lx\n",
rlc1.getMQDAddr());
mqd->rptr = rlc1.globalRptr();
mqd->wptr = rlc1.getWptr();
auto cb = new DmaVirtCallback<uint32_t>(
[ = ] (const uint32_t &) { });
dmaWriteVirt(rlc1.getMQDAddr(), sizeof(SDMAQueueDesc), cb, mqd);
} else {
warn("RLC1 SDMAMQD address invalid\n");
}
rlc1.valid(false);
rlcInfo[1] = 0;
} else {
@@ -213,7 +251,9 @@ void
SDMAEngine::deallocateRLCQueues()
{
for (auto doorbell: rlcInfo) {
unregisterRLCQueue(doorbell);
if (doorbell) {
unregisterRLCQueue(doorbell);
}
}
}

View File

@@ -34,6 +34,7 @@
#include "base/bitunion.hh"
#include "dev/amdgpu/amdgpu_device.hh"
#include "dev/amdgpu/pm4_queues.hh"
#include "dev/amdgpu/sdma_packets.hh"
#include "dev/dma_virt_device.hh"
#include "params/SDMAEngine.hh"
@@ -65,9 +66,11 @@ class SDMAEngine : public DmaVirtDevice
SDMAQueue *_parent;
SDMAQueue *_ib;
SDMAType _type;
SDMAQueueDesc *_mqd;
Addr _mqd_addr = 0;
public:
SDMAQueue() : _rptr(0), _wptr(0), _valid(false), _processing(false),
_parent(nullptr), _ib(nullptr), _type(SDMAGfx) {}
_parent(nullptr), _ib(nullptr), _type(SDMAGfx), _mqd(nullptr) {}
Addr base() { return _base; }
Addr rptr() { return _base + _rptr; }
@@ -82,6 +85,8 @@ class SDMAEngine : public DmaVirtDevice
SDMAQueue* parent() { return _parent; }
SDMAQueue* ib() { return _ib; }
SDMAType queueType() { return _type; }
SDMAQueueDesc* getMQD() { return _mqd; }
Addr getMQDAddr() { return _mqd_addr; }
void base(Addr value) { _base = value; }
@@ -114,6 +119,8 @@ class SDMAEngine : public DmaVirtDevice
void parent(SDMAQueue* q) { _parent = q; }
void ib(SDMAQueue* ib) { _ib = ib; }
void queueType(SDMAType type) { _type = type; }
void setMQD(SDMAQueueDesc *mqd) { _mqd = mqd; }
void setMQDAddr(Addr mqdAddr) { _mqd_addr = mqdAddr; }
};
/* SDMA Engine ID */
@@ -280,8 +287,7 @@ class SDMAEngine : public DmaVirtDevice
/**
* Methods for RLC queues
*/
void registerRLCQueue(Addr doorbell, Addr rb_base, uint32_t size,
Addr rptr_wb_addr);
void registerRLCQueue(Addr doorbell, Addr mqdAddr, SDMAQueueDesc *mqd);
void unregisterRLCQueue(Addr doorbell);
void deallocateRLCQueues();