dev-amdgpu: Implement UNMAP_QUEUES queue_sel==2
Unmap queues with queue_sel of 2 unmaps all queues while queue_sel of 3 unmaps all non-static queues. The implementation of 3 was actually correct for 2. Static queues are queues which were mapped using a map queues packet with a queue_type of 1 or 2. This commit adds ability to mark a queue as static. When unmap queues with queue_sel of 2 is sent, the existing code is now executed. With a value of 3, we now check if the queue was marked static and do not unmap it if marked. Change-Id: I87d7cf78a0600c7baa516c01f42c294d3c4e90c5
This commit is contained in:
@@ -943,13 +943,13 @@ AMDGPUDevice::deallocatePasid(uint16_t pasid)
|
||||
}
|
||||
|
||||
void
|
||||
AMDGPUDevice::deallocateAllQueues()
|
||||
AMDGPUDevice::deallocateAllQueues(bool unmap_static)
|
||||
{
|
||||
idMap.erase(idMap.begin(), idMap.end());
|
||||
usedVMIDs.erase(usedVMIDs.begin(), usedVMIDs.end());
|
||||
|
||||
for (auto& it : sdmaEngs) {
|
||||
it.second->deallocateRLCQueues();
|
||||
it.second->deallocateRLCQueues(unmap_static);
|
||||
}
|
||||
|
||||
// "All" queues implicitly refers to all user queues. User queues begin at
|
||||
|
||||
@@ -217,7 +217,7 @@ class AMDGPUDevice : public PciDevice
|
||||
uint16_t allocateVMID(uint16_t pasid);
|
||||
void deallocateVmid(uint16_t vmid);
|
||||
void deallocatePasid(uint16_t pasid);
|
||||
void deallocateAllQueues();
|
||||
void deallocateAllQueues(bool unmap_static);
|
||||
void mapDoorbellToVMID(Addr doorbell, uint16_t vmid);
|
||||
uint16_t getVMID(Addr doorbell) { return doorbellVMIDMap[doorbell]; }
|
||||
std::unordered_map<uint16_t, std::set<int>>& getUsedVMIDs();
|
||||
|
||||
@@ -518,8 +518,11 @@ PM4PacketProcessor::processSDMAMQD(PM4MapQueues *pkt, PM4Queue *q, Addr addr,
|
||||
assert(pkt->engineSel == 2 || pkt->engineSel == 3);
|
||||
SDMAEngine *sdma_eng = gpuDevice->getSDMAById(pkt->engineSel - 2);
|
||||
|
||||
// Queue type 1 and 2 are "static" queues
|
||||
bool is_static = (pkt->queueType == 2) || (pkt->queueType == 3);
|
||||
|
||||
// Register RLC queue with SDMA
|
||||
sdma_eng->registerRLCQueue(pkt->doorbellOffset << 2, addr, mqd);
|
||||
sdma_eng->registerRLCQueue(pkt->doorbellOffset << 2, addr, mqd, is_static);
|
||||
|
||||
// Register doorbell with GPU device
|
||||
gpuDevice->setSDMAEngine(pkt->doorbellOffset << 2, sdma_eng);
|
||||
@@ -586,6 +589,47 @@ PM4PacketProcessor::updateReadIndex(Addr offset, uint64_t rd_idx)
|
||||
queuesMap[offset]->getMQD()->mqdReadIndex = rd_idx;
|
||||
}
|
||||
|
||||
void
|
||||
PM4PacketProcessor::unmapAllQueues(bool unmap_static)
|
||||
{
|
||||
auto &hsa_pp = gpuDevice->CP()->hsaPacketProc();
|
||||
for (auto iter : gpuDevice->getUsedVMIDs()) {
|
||||
for (auto id : iter.second) {
|
||||
assert(queues.count(id));
|
||||
|
||||
// Do not unmap KMD queues.
|
||||
if (queues[id]->privileged()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Do not unmap static queues if requested.
|
||||
if (!unmap_static && queues[id]->isStatic()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
QueueDesc *mqd = queues[id]->getMQD();
|
||||
DPRINTF(PM4PacketProcessor, "Unmapping queue %d with read "
|
||||
"index %ld\n", id, mqd->mqdReadIndex);
|
||||
|
||||
// Partially writing the mqd with an offset of 96 dwords as gem5
|
||||
// does not use the full MQD and begins 96 dwords from the start
|
||||
// of the full MQD structure. See src/dev/amdgpu/pm4_queues.hh.
|
||||
Addr addr = getGARTAddr(queues[id]->mqdBase() +
|
||||
96 * sizeof(uint32_t));
|
||||
Addr mqd_base = queues[id]->mqdBase();
|
||||
auto cb = new DmaVirtCallback<uint32_t>(
|
||||
[ = ] (const uint32_t &) {
|
||||
doneMQDWrite(mqd_base, addr);
|
||||
});
|
||||
mqd->base >>= 8;
|
||||
dmaWriteVirt(addr, sizeof(QueueDesc), cb, mqd);
|
||||
queues.erase(id);
|
||||
hsa_pp.unsetDeviceQueueDesc(id, 8);
|
||||
delete mqd;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
PM4PacketProcessor::unmapQueues(PM4Queue *q, PM4UnmapQueues *pkt)
|
||||
{
|
||||
@@ -634,38 +678,13 @@ PM4PacketProcessor::unmapQueues(PM4Queue *q, PM4UnmapQueues *pkt)
|
||||
gpuDevice->deallocatePasid(pkt->pasid);
|
||||
break;
|
||||
case 2:
|
||||
panic("Unmapping queue selection 2 unimplemented\n");
|
||||
unmapAllQueues(true);
|
||||
gpuDevice->deallocateAllQueues(true);
|
||||
break;
|
||||
case 3: {
|
||||
auto &hsa_pp = gpuDevice->CP()->hsaPacketProc();
|
||||
for (auto iter : gpuDevice->getUsedVMIDs()) {
|
||||
for (auto id : iter.second) {
|
||||
assert(queues.count(id));
|
||||
|
||||
// Do not unmap KMD queues
|
||||
if (queues[id]->privileged()) {
|
||||
continue;
|
||||
}
|
||||
QueueDesc *mqd = queues[id]->getMQD();
|
||||
DPRINTF(PM4PacketProcessor, "Unmapping queue %d with read "
|
||||
"index %ld\n", id, mqd->mqdReadIndex);
|
||||
// Partially writing the mqd with an offset of 96 dwords
|
||||
Addr addr = getGARTAddr(queues[id]->mqdBase() +
|
||||
96 * sizeof(uint32_t));
|
||||
Addr mqd_base = queues[id]->mqdBase();
|
||||
auto cb = new DmaVirtCallback<uint32_t>(
|
||||
[ = ] (const uint32_t &) {
|
||||
doneMQDWrite(mqd_base, addr);
|
||||
});
|
||||
mqd->base >>= 8;
|
||||
dmaWriteVirt(addr, sizeof(QueueDesc), cb, mqd);
|
||||
queues.erase(id);
|
||||
hsa_pp.unsetDeviceQueueDesc(id, 8);
|
||||
delete mqd;
|
||||
}
|
||||
}
|
||||
gpuDevice->deallocateAllQueues();
|
||||
} break;
|
||||
case 3:
|
||||
unmapAllQueues(false);
|
||||
gpuDevice->deallocateAllQueues(false);
|
||||
break;
|
||||
default:
|
||||
panic("Unrecognized options\n");
|
||||
break;
|
||||
@@ -1127,6 +1146,7 @@ PM4PacketProcessor::serialize(CheckpointOut &cp) const
|
||||
uint32_t pipe[num_queues];
|
||||
uint32_t queue[num_queues];
|
||||
bool privileged[num_queues];
|
||||
uint32_t queue_type[num_queues];
|
||||
uint32_t hqd_active[num_queues];
|
||||
uint32_t hqd_vmid[num_queues];
|
||||
Addr aql_rptr[num_queues];
|
||||
@@ -1157,6 +1177,7 @@ PM4PacketProcessor::serialize(CheckpointOut &cp) const
|
||||
pipe[i] = q->pipe();
|
||||
queue[i] = q->queue();
|
||||
privileged[i] = q->privileged();
|
||||
queue_type[i] = q->queueType();
|
||||
hqd_active[i] = q->getMQD()->hqd_active;
|
||||
hqd_vmid[i] = q->getMQD()->hqd_vmid;
|
||||
aql_rptr[i] = q->getMQD()->aqlRptr;
|
||||
@@ -1183,6 +1204,7 @@ PM4PacketProcessor::serialize(CheckpointOut &cp) const
|
||||
SERIALIZE_ARRAY(pipe, num_queues);
|
||||
SERIALIZE_ARRAY(queue, num_queues);
|
||||
SERIALIZE_ARRAY(privileged, num_queues);
|
||||
SERIALIZE_ARRAY(queue_type, num_queues);
|
||||
SERIALIZE_ARRAY(hqd_active, num_queues);
|
||||
SERIALIZE_ARRAY(hqd_vmid, num_queues);
|
||||
SERIALIZE_ARRAY(aql_rptr, num_queues);
|
||||
@@ -1216,6 +1238,7 @@ PM4PacketProcessor::unserialize(CheckpointIn &cp)
|
||||
uint32_t pipe[num_queues];
|
||||
uint32_t queue[num_queues];
|
||||
bool privileged[num_queues];
|
||||
uint32_t queue_type[num_queues];
|
||||
uint32_t hqd_active[num_queues];
|
||||
uint32_t hqd_vmid[num_queues];
|
||||
Addr aql_rptr[num_queues];
|
||||
@@ -1239,6 +1262,7 @@ PM4PacketProcessor::unserialize(CheckpointIn &cp)
|
||||
UNSERIALIZE_ARRAY(pipe, num_queues);
|
||||
UNSERIALIZE_ARRAY(queue, num_queues);
|
||||
UNSERIALIZE_ARRAY(privileged, num_queues);
|
||||
UNSERIALIZE_ARRAY(queue_type, num_queues);
|
||||
UNSERIALIZE_ARRAY(hqd_active, num_queues);
|
||||
UNSERIALIZE_ARRAY(hqd_vmid, num_queues);
|
||||
UNSERIALIZE_ARRAY(aql_rptr, num_queues);
|
||||
@@ -1269,7 +1293,8 @@ PM4PacketProcessor::unserialize(CheckpointIn &cp)
|
||||
queues[id[i]]->ib(ib[i]);
|
||||
queues[id[i]]->offset(offset[i]);
|
||||
queues[id[i]]->processing(processing[i]);
|
||||
queues[id[i]]->setPkt(me[i], pipe[i], queue[i], privileged[i]);
|
||||
queues[id[i]]->setPkt(me[i], pipe[i], queue[i], privileged[i],
|
||||
queue_type[i]);
|
||||
queues[id[i]]->getMQD()->hqd_active = hqd_active[i];
|
||||
queues[id[i]]->getMQD()->hqd_vmid = hqd_vmid[i];
|
||||
queues[id[i]]->getMQD()->aqlRptr = aql_rptr[i];
|
||||
|
||||
@@ -67,6 +67,8 @@ class PM4PacketProcessor : public DmaVirtDevice
|
||||
int _ipId;
|
||||
AddrRange _mmioRange;
|
||||
|
||||
void unmapAllQueues(bool unmap_static);
|
||||
|
||||
public:
|
||||
PM4PacketProcessor(const PM4PacketProcessorParams &p);
|
||||
|
||||
|
||||
@@ -486,12 +486,16 @@ class PM4Queue
|
||||
uint32_t pipe() { return _pkt.pipe; }
|
||||
uint32_t queue() { return _pkt.queueSlot; }
|
||||
bool privileged() { return _pkt.queueSel == 0 ? 1 : 0; }
|
||||
uint32_t queueType() { return _pkt.queueType; }
|
||||
bool isStatic() { return (_pkt.queueType != 0); }
|
||||
PM4MapQueues* getPkt() { return &_pkt; }
|
||||
void setPkt(uint32_t me, uint32_t pipe, uint32_t queue, bool privileged) {
|
||||
void setPkt(uint32_t me, uint32_t pipe, uint32_t queue, bool privileged,
|
||||
uint32_t queueType) {
|
||||
_pkt.me = me - 1;
|
||||
_pkt.pipe = pipe;
|
||||
_pkt.queueSlot = queue;
|
||||
_pkt.queueSel = (privileged == 0) ? 1 : 0;
|
||||
_pkt.queueType = queueType;
|
||||
}
|
||||
|
||||
// Same computation as processMQD. See comment there for details.
|
||||
|
||||
@@ -179,7 +179,8 @@ SDMAEngine::translate(Addr vaddr, Addr size)
|
||||
}
|
||||
|
||||
void
|
||||
SDMAEngine::registerRLCQueue(Addr doorbell, Addr mqdAddr, SDMAQueueDesc *mqd)
|
||||
SDMAEngine::registerRLCQueue(Addr doorbell, Addr mqdAddr, SDMAQueueDesc *mqd,
|
||||
bool isStatic)
|
||||
{
|
||||
uint32_t rlc_size = 4UL << bits(mqd->sdmax_rlcx_rb_cntl, 6, 1);
|
||||
Addr rptr_wb_addr = mqd->sdmax_rlcx_rb_rptr_addr_hi;
|
||||
@@ -202,6 +203,7 @@ SDMAEngine::registerRLCQueue(Addr doorbell, Addr mqdAddr, SDMAQueueDesc *mqd)
|
||||
rlc0.setMQD(mqd);
|
||||
rlc0.setMQDAddr(mqdAddr);
|
||||
rlc0.setPriv(priv);
|
||||
rlc0.setStatic(isStatic);
|
||||
} else if (!rlc1.valid()) {
|
||||
DPRINTF(SDMAEngine, "Doorbell %lx mapped to RLC1\n", doorbell);
|
||||
rlcInfo[1] = doorbell;
|
||||
@@ -216,16 +218,22 @@ SDMAEngine::registerRLCQueue(Addr doorbell, Addr mqdAddr, SDMAQueueDesc *mqd)
|
||||
rlc1.setMQD(mqd);
|
||||
rlc1.setMQDAddr(mqdAddr);
|
||||
rlc1.setPriv(priv);
|
||||
rlc1.setStatic(isStatic);
|
||||
} else {
|
||||
panic("No free RLCs. Check they are properly unmapped.");
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
SDMAEngine::unregisterRLCQueue(Addr doorbell)
|
||||
SDMAEngine::unregisterRLCQueue(Addr doorbell, bool unmap_static)
|
||||
{
|
||||
DPRINTF(SDMAEngine, "Unregistering RLC queue at %#lx\n", doorbell);
|
||||
if (rlcInfo[0] == doorbell) {
|
||||
if (!unmap_static && rlc0.isStatic()) {
|
||||
DPRINTF(SDMAEngine, "RLC0 is static. Will not unregister.\n");
|
||||
return;
|
||||
}
|
||||
|
||||
SDMAQueueDesc *mqd = rlc0.getMQD();
|
||||
if (mqd) {
|
||||
DPRINTF(SDMAEngine, "Writing RLC0 SDMAMQD back to %#lx\n",
|
||||
@@ -243,6 +251,11 @@ SDMAEngine::unregisterRLCQueue(Addr doorbell)
|
||||
rlc0.valid(false);
|
||||
rlcInfo[0] = 0;
|
||||
} else if (rlcInfo[1] == doorbell) {
|
||||
if (!unmap_static && rlc1.isStatic()) {
|
||||
DPRINTF(SDMAEngine, "RLC1 is static. Will not unregister.\n");
|
||||
return;
|
||||
}
|
||||
|
||||
SDMAQueueDesc *mqd = rlc1.getMQD();
|
||||
if (mqd) {
|
||||
DPRINTF(SDMAEngine, "Writing RLC1 SDMAMQD back to %#lx\n",
|
||||
@@ -262,15 +275,16 @@ SDMAEngine::unregisterRLCQueue(Addr doorbell)
|
||||
} else {
|
||||
panic("Cannot unregister: no RLC queue at %#lx\n", doorbell);
|
||||
}
|
||||
|
||||
gpuDevice->unsetDoorbell(doorbell);
|
||||
}
|
||||
|
||||
void
|
||||
SDMAEngine::deallocateRLCQueues()
|
||||
SDMAEngine::deallocateRLCQueues(bool unmap_static)
|
||||
{
|
||||
for (auto doorbell: rlcInfo) {
|
||||
if (doorbell) {
|
||||
unregisterRLCQueue(doorbell);
|
||||
gpuDevice->unsetDoorbell(doorbell);
|
||||
unregisterRLCQueue(doorbell, unmap_static);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -69,6 +69,7 @@ class SDMAEngine : public DmaVirtDevice
|
||||
SDMAQueueDesc *_mqd;
|
||||
Addr _mqd_addr = 0;
|
||||
bool _priv = true; // Only used for RLC queues. True otherwise.
|
||||
bool _static = false;
|
||||
public:
|
||||
SDMAQueue() : _rptr(0), _wptr(0), _valid(false), _processing(false),
|
||||
_parent(nullptr), _ib(nullptr), _type(SDMAGfx), _mqd(nullptr) {}
|
||||
@@ -89,6 +90,7 @@ class SDMAEngine : public DmaVirtDevice
|
||||
SDMAQueueDesc* getMQD() { return _mqd; }
|
||||
Addr getMQDAddr() { return _mqd_addr; }
|
||||
bool priv() { return _priv; }
|
||||
bool isStatic() { return _static; }
|
||||
|
||||
void base(Addr value) { _base = value; }
|
||||
|
||||
@@ -124,6 +126,7 @@ class SDMAEngine : public DmaVirtDevice
|
||||
void setMQD(SDMAQueueDesc *mqd) { _mqd = mqd; }
|
||||
void setMQDAddr(Addr mqdAddr) { _mqd_addr = mqdAddr; }
|
||||
void setPriv(bool priv) { _priv = priv; }
|
||||
void setStatic(bool isStatic) { _static = isStatic; }
|
||||
};
|
||||
|
||||
/* SDMA Engine ID */
|
||||
@@ -307,9 +310,10 @@ class SDMAEngine : public DmaVirtDevice
|
||||
/**
|
||||
* Methods for RLC queues
|
||||
*/
|
||||
void registerRLCQueue(Addr doorbell, Addr mqdAddr, SDMAQueueDesc *mqd);
|
||||
void unregisterRLCQueue(Addr doorbell);
|
||||
void deallocateRLCQueues();
|
||||
void registerRLCQueue(Addr doorbell, Addr mqdAddr, SDMAQueueDesc *mqd,
|
||||
bool isStatic);
|
||||
void unregisterRLCQueue(Addr doorbell, bool unmap_static);
|
||||
void deallocateRLCQueues(bool unmap_static);
|
||||
|
||||
int cur_vmid = 0;
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user