dev-amdgpu: Implement UNMAP_QUEUES queue_sel==2

Unmap queues with queue_sel of 2 unmaps all queues while queue_sel of 3
unmaps all non-static queues. The implementation of 3 was actually
correct for 2. Static queues are queues which were mapped using a map
queues packet with a queue_type of 1 or 2.

This commit adds ability to mark a queue as static. When unmap queues
with queue_sel of 2 is sent, the existing code is now executed. With a
value of 3, we now check if the queue was marked static and do not
unmap it if marked.

Change-Id: I87d7cf78a0600c7baa516c01f42c294d3c4e90c5
This commit is contained in:
Matthew Poremba
2024-08-19 17:27:32 -07:00
parent d78a571660
commit 21f1e54ecd
7 changed files with 94 additions and 45 deletions

View File

@@ -943,13 +943,13 @@ AMDGPUDevice::deallocatePasid(uint16_t pasid)
} }
void void
AMDGPUDevice::deallocateAllQueues() AMDGPUDevice::deallocateAllQueues(bool unmap_static)
{ {
idMap.erase(idMap.begin(), idMap.end()); idMap.erase(idMap.begin(), idMap.end());
usedVMIDs.erase(usedVMIDs.begin(), usedVMIDs.end()); usedVMIDs.erase(usedVMIDs.begin(), usedVMIDs.end());
for (auto& it : sdmaEngs) { for (auto& it : sdmaEngs) {
it.second->deallocateRLCQueues(); it.second->deallocateRLCQueues(unmap_static);
} }
// "All" queues implicitly refers to all user queues. User queues begin at // "All" queues implicitly refers to all user queues. User queues begin at

View File

@@ -217,7 +217,7 @@ class AMDGPUDevice : public PciDevice
uint16_t allocateVMID(uint16_t pasid); uint16_t allocateVMID(uint16_t pasid);
void deallocateVmid(uint16_t vmid); void deallocateVmid(uint16_t vmid);
void deallocatePasid(uint16_t pasid); void deallocatePasid(uint16_t pasid);
void deallocateAllQueues(); void deallocateAllQueues(bool unmap_static);
void mapDoorbellToVMID(Addr doorbell, uint16_t vmid); void mapDoorbellToVMID(Addr doorbell, uint16_t vmid);
uint16_t getVMID(Addr doorbell) { return doorbellVMIDMap[doorbell]; } uint16_t getVMID(Addr doorbell) { return doorbellVMIDMap[doorbell]; }
std::unordered_map<uint16_t, std::set<int>>& getUsedVMIDs(); std::unordered_map<uint16_t, std::set<int>>& getUsedVMIDs();

View File

@@ -518,8 +518,11 @@ PM4PacketProcessor::processSDMAMQD(PM4MapQueues *pkt, PM4Queue *q, Addr addr,
assert(pkt->engineSel == 2 || pkt->engineSel == 3); assert(pkt->engineSel == 2 || pkt->engineSel == 3);
SDMAEngine *sdma_eng = gpuDevice->getSDMAById(pkt->engineSel - 2); SDMAEngine *sdma_eng = gpuDevice->getSDMAById(pkt->engineSel - 2);
// Queue type 1 and 2 are "static" queues
bool is_static = (pkt->queueType == 2) || (pkt->queueType == 3);
// Register RLC queue with SDMA // Register RLC queue with SDMA
sdma_eng->registerRLCQueue(pkt->doorbellOffset << 2, addr, mqd); sdma_eng->registerRLCQueue(pkt->doorbellOffset << 2, addr, mqd, is_static);
// Register doorbell with GPU device // Register doorbell with GPU device
gpuDevice->setSDMAEngine(pkt->doorbellOffset << 2, sdma_eng); gpuDevice->setSDMAEngine(pkt->doorbellOffset << 2, sdma_eng);
@@ -586,6 +589,47 @@ PM4PacketProcessor::updateReadIndex(Addr offset, uint64_t rd_idx)
queuesMap[offset]->getMQD()->mqdReadIndex = rd_idx; queuesMap[offset]->getMQD()->mqdReadIndex = rd_idx;
} }
void
PM4PacketProcessor::unmapAllQueues(bool unmap_static)
{
auto &hsa_pp = gpuDevice->CP()->hsaPacketProc();
for (auto iter : gpuDevice->getUsedVMIDs()) {
for (auto id : iter.second) {
assert(queues.count(id));
// Do not unmap KMD queues.
if (queues[id]->privileged()) {
continue;
}
// Do not unmap static queues if requested.
if (!unmap_static && queues[id]->isStatic()) {
continue;
}
QueueDesc *mqd = queues[id]->getMQD();
DPRINTF(PM4PacketProcessor, "Unmapping queue %d with read "
"index %ld\n", id, mqd->mqdReadIndex);
// Partially writing the mqd with an offset of 96 dwords as gem5
// does not use the full MQD and begins 96 dwords from the start
// of the full MQD structure. See src/dev/amdgpu/pm4_queues.hh.
Addr addr = getGARTAddr(queues[id]->mqdBase() +
96 * sizeof(uint32_t));
Addr mqd_base = queues[id]->mqdBase();
auto cb = new DmaVirtCallback<uint32_t>(
[ = ] (const uint32_t &) {
doneMQDWrite(mqd_base, addr);
});
mqd->base >>= 8;
dmaWriteVirt(addr, sizeof(QueueDesc), cb, mqd);
queues.erase(id);
hsa_pp.unsetDeviceQueueDesc(id, 8);
delete mqd;
}
}
}
void void
PM4PacketProcessor::unmapQueues(PM4Queue *q, PM4UnmapQueues *pkt) PM4PacketProcessor::unmapQueues(PM4Queue *q, PM4UnmapQueues *pkt)
{ {
@@ -634,38 +678,13 @@ PM4PacketProcessor::unmapQueues(PM4Queue *q, PM4UnmapQueues *pkt)
gpuDevice->deallocatePasid(pkt->pasid); gpuDevice->deallocatePasid(pkt->pasid);
break; break;
case 2: case 2:
panic("Unmapping queue selection 2 unimplemented\n"); unmapAllQueues(true);
gpuDevice->deallocateAllQueues(true);
break; break;
case 3: { case 3:
auto &hsa_pp = gpuDevice->CP()->hsaPacketProc(); unmapAllQueues(false);
for (auto iter : gpuDevice->getUsedVMIDs()) { gpuDevice->deallocateAllQueues(false);
for (auto id : iter.second) { break;
assert(queues.count(id));
// Do not unmap KMD queues
if (queues[id]->privileged()) {
continue;
}
QueueDesc *mqd = queues[id]->getMQD();
DPRINTF(PM4PacketProcessor, "Unmapping queue %d with read "
"index %ld\n", id, mqd->mqdReadIndex);
// Partially writing the mqd with an offset of 96 dwords
Addr addr = getGARTAddr(queues[id]->mqdBase() +
96 * sizeof(uint32_t));
Addr mqd_base = queues[id]->mqdBase();
auto cb = new DmaVirtCallback<uint32_t>(
[ = ] (const uint32_t &) {
doneMQDWrite(mqd_base, addr);
});
mqd->base >>= 8;
dmaWriteVirt(addr, sizeof(QueueDesc), cb, mqd);
queues.erase(id);
hsa_pp.unsetDeviceQueueDesc(id, 8);
delete mqd;
}
}
gpuDevice->deallocateAllQueues();
} break;
default: default:
panic("Unrecognized options\n"); panic("Unrecognized options\n");
break; break;
@@ -1127,6 +1146,7 @@ PM4PacketProcessor::serialize(CheckpointOut &cp) const
uint32_t pipe[num_queues]; uint32_t pipe[num_queues];
uint32_t queue[num_queues]; uint32_t queue[num_queues];
bool privileged[num_queues]; bool privileged[num_queues];
uint32_t queue_type[num_queues];
uint32_t hqd_active[num_queues]; uint32_t hqd_active[num_queues];
uint32_t hqd_vmid[num_queues]; uint32_t hqd_vmid[num_queues];
Addr aql_rptr[num_queues]; Addr aql_rptr[num_queues];
@@ -1157,6 +1177,7 @@ PM4PacketProcessor::serialize(CheckpointOut &cp) const
pipe[i] = q->pipe(); pipe[i] = q->pipe();
queue[i] = q->queue(); queue[i] = q->queue();
privileged[i] = q->privileged(); privileged[i] = q->privileged();
queue_type[i] = q->queueType();
hqd_active[i] = q->getMQD()->hqd_active; hqd_active[i] = q->getMQD()->hqd_active;
hqd_vmid[i] = q->getMQD()->hqd_vmid; hqd_vmid[i] = q->getMQD()->hqd_vmid;
aql_rptr[i] = q->getMQD()->aqlRptr; aql_rptr[i] = q->getMQD()->aqlRptr;
@@ -1183,6 +1204,7 @@ PM4PacketProcessor::serialize(CheckpointOut &cp) const
SERIALIZE_ARRAY(pipe, num_queues); SERIALIZE_ARRAY(pipe, num_queues);
SERIALIZE_ARRAY(queue, num_queues); SERIALIZE_ARRAY(queue, num_queues);
SERIALIZE_ARRAY(privileged, num_queues); SERIALIZE_ARRAY(privileged, num_queues);
SERIALIZE_ARRAY(queue_type, num_queues);
SERIALIZE_ARRAY(hqd_active, num_queues); SERIALIZE_ARRAY(hqd_active, num_queues);
SERIALIZE_ARRAY(hqd_vmid, num_queues); SERIALIZE_ARRAY(hqd_vmid, num_queues);
SERIALIZE_ARRAY(aql_rptr, num_queues); SERIALIZE_ARRAY(aql_rptr, num_queues);
@@ -1216,6 +1238,7 @@ PM4PacketProcessor::unserialize(CheckpointIn &cp)
uint32_t pipe[num_queues]; uint32_t pipe[num_queues];
uint32_t queue[num_queues]; uint32_t queue[num_queues];
bool privileged[num_queues]; bool privileged[num_queues];
uint32_t queue_type[num_queues];
uint32_t hqd_active[num_queues]; uint32_t hqd_active[num_queues];
uint32_t hqd_vmid[num_queues]; uint32_t hqd_vmid[num_queues];
Addr aql_rptr[num_queues]; Addr aql_rptr[num_queues];
@@ -1239,6 +1262,7 @@ PM4PacketProcessor::unserialize(CheckpointIn &cp)
UNSERIALIZE_ARRAY(pipe, num_queues); UNSERIALIZE_ARRAY(pipe, num_queues);
UNSERIALIZE_ARRAY(queue, num_queues); UNSERIALIZE_ARRAY(queue, num_queues);
UNSERIALIZE_ARRAY(privileged, num_queues); UNSERIALIZE_ARRAY(privileged, num_queues);
UNSERIALIZE_ARRAY(queue_type, num_queues);
UNSERIALIZE_ARRAY(hqd_active, num_queues); UNSERIALIZE_ARRAY(hqd_active, num_queues);
UNSERIALIZE_ARRAY(hqd_vmid, num_queues); UNSERIALIZE_ARRAY(hqd_vmid, num_queues);
UNSERIALIZE_ARRAY(aql_rptr, num_queues); UNSERIALIZE_ARRAY(aql_rptr, num_queues);
@@ -1269,7 +1293,8 @@ PM4PacketProcessor::unserialize(CheckpointIn &cp)
queues[id[i]]->ib(ib[i]); queues[id[i]]->ib(ib[i]);
queues[id[i]]->offset(offset[i]); queues[id[i]]->offset(offset[i]);
queues[id[i]]->processing(processing[i]); queues[id[i]]->processing(processing[i]);
queues[id[i]]->setPkt(me[i], pipe[i], queue[i], privileged[i]); queues[id[i]]->setPkt(me[i], pipe[i], queue[i], privileged[i],
queue_type[i]);
queues[id[i]]->getMQD()->hqd_active = hqd_active[i]; queues[id[i]]->getMQD()->hqd_active = hqd_active[i];
queues[id[i]]->getMQD()->hqd_vmid = hqd_vmid[i]; queues[id[i]]->getMQD()->hqd_vmid = hqd_vmid[i];
queues[id[i]]->getMQD()->aqlRptr = aql_rptr[i]; queues[id[i]]->getMQD()->aqlRptr = aql_rptr[i];

View File

@@ -67,6 +67,8 @@ class PM4PacketProcessor : public DmaVirtDevice
int _ipId; int _ipId;
AddrRange _mmioRange; AddrRange _mmioRange;
void unmapAllQueues(bool unmap_static);
public: public:
PM4PacketProcessor(const PM4PacketProcessorParams &p); PM4PacketProcessor(const PM4PacketProcessorParams &p);

View File

@@ -486,12 +486,16 @@ class PM4Queue
uint32_t pipe() { return _pkt.pipe; } uint32_t pipe() { return _pkt.pipe; }
uint32_t queue() { return _pkt.queueSlot; } uint32_t queue() { return _pkt.queueSlot; }
bool privileged() { return _pkt.queueSel == 0 ? 1 : 0; } bool privileged() { return _pkt.queueSel == 0 ? 1 : 0; }
uint32_t queueType() { return _pkt.queueType; }
bool isStatic() { return (_pkt.queueType != 0); }
PM4MapQueues* getPkt() { return &_pkt; } PM4MapQueues* getPkt() { return &_pkt; }
void setPkt(uint32_t me, uint32_t pipe, uint32_t queue, bool privileged) { void setPkt(uint32_t me, uint32_t pipe, uint32_t queue, bool privileged,
uint32_t queueType) {
_pkt.me = me - 1; _pkt.me = me - 1;
_pkt.pipe = pipe; _pkt.pipe = pipe;
_pkt.queueSlot = queue; _pkt.queueSlot = queue;
_pkt.queueSel = (privileged == 0) ? 1 : 0; _pkt.queueSel = (privileged == 0) ? 1 : 0;
_pkt.queueType = queueType;
} }
// Same computation as processMQD. See comment there for details. // Same computation as processMQD. See comment there for details.

View File

@@ -179,7 +179,8 @@ SDMAEngine::translate(Addr vaddr, Addr size)
} }
void void
SDMAEngine::registerRLCQueue(Addr doorbell, Addr mqdAddr, SDMAQueueDesc *mqd) SDMAEngine::registerRLCQueue(Addr doorbell, Addr mqdAddr, SDMAQueueDesc *mqd,
bool isStatic)
{ {
uint32_t rlc_size = 4UL << bits(mqd->sdmax_rlcx_rb_cntl, 6, 1); uint32_t rlc_size = 4UL << bits(mqd->sdmax_rlcx_rb_cntl, 6, 1);
Addr rptr_wb_addr = mqd->sdmax_rlcx_rb_rptr_addr_hi; Addr rptr_wb_addr = mqd->sdmax_rlcx_rb_rptr_addr_hi;
@@ -202,6 +203,7 @@ SDMAEngine::registerRLCQueue(Addr doorbell, Addr mqdAddr, SDMAQueueDesc *mqd)
rlc0.setMQD(mqd); rlc0.setMQD(mqd);
rlc0.setMQDAddr(mqdAddr); rlc0.setMQDAddr(mqdAddr);
rlc0.setPriv(priv); rlc0.setPriv(priv);
rlc0.setStatic(isStatic);
} else if (!rlc1.valid()) { } else if (!rlc1.valid()) {
DPRINTF(SDMAEngine, "Doorbell %lx mapped to RLC1\n", doorbell); DPRINTF(SDMAEngine, "Doorbell %lx mapped to RLC1\n", doorbell);
rlcInfo[1] = doorbell; rlcInfo[1] = doorbell;
@@ -216,16 +218,22 @@ SDMAEngine::registerRLCQueue(Addr doorbell, Addr mqdAddr, SDMAQueueDesc *mqd)
rlc1.setMQD(mqd); rlc1.setMQD(mqd);
rlc1.setMQDAddr(mqdAddr); rlc1.setMQDAddr(mqdAddr);
rlc1.setPriv(priv); rlc1.setPriv(priv);
rlc1.setStatic(isStatic);
} else { } else {
panic("No free RLCs. Check they are properly unmapped."); panic("No free RLCs. Check they are properly unmapped.");
} }
} }
void void
SDMAEngine::unregisterRLCQueue(Addr doorbell) SDMAEngine::unregisterRLCQueue(Addr doorbell, bool unmap_static)
{ {
DPRINTF(SDMAEngine, "Unregistering RLC queue at %#lx\n", doorbell); DPRINTF(SDMAEngine, "Unregistering RLC queue at %#lx\n", doorbell);
if (rlcInfo[0] == doorbell) { if (rlcInfo[0] == doorbell) {
if (!unmap_static && rlc0.isStatic()) {
DPRINTF(SDMAEngine, "RLC0 is static. Will not unregister.\n");
return;
}
SDMAQueueDesc *mqd = rlc0.getMQD(); SDMAQueueDesc *mqd = rlc0.getMQD();
if (mqd) { if (mqd) {
DPRINTF(SDMAEngine, "Writing RLC0 SDMAMQD back to %#lx\n", DPRINTF(SDMAEngine, "Writing RLC0 SDMAMQD back to %#lx\n",
@@ -243,6 +251,11 @@ SDMAEngine::unregisterRLCQueue(Addr doorbell)
rlc0.valid(false); rlc0.valid(false);
rlcInfo[0] = 0; rlcInfo[0] = 0;
} else if (rlcInfo[1] == doorbell) { } else if (rlcInfo[1] == doorbell) {
if (!unmap_static && rlc1.isStatic()) {
DPRINTF(SDMAEngine, "RLC1 is static. Will not unregister.\n");
return;
}
SDMAQueueDesc *mqd = rlc1.getMQD(); SDMAQueueDesc *mqd = rlc1.getMQD();
if (mqd) { if (mqd) {
DPRINTF(SDMAEngine, "Writing RLC1 SDMAMQD back to %#lx\n", DPRINTF(SDMAEngine, "Writing RLC1 SDMAMQD back to %#lx\n",
@@ -262,15 +275,16 @@ SDMAEngine::unregisterRLCQueue(Addr doorbell)
} else { } else {
panic("Cannot unregister: no RLC queue at %#lx\n", doorbell); panic("Cannot unregister: no RLC queue at %#lx\n", doorbell);
} }
gpuDevice->unsetDoorbell(doorbell);
} }
void void
SDMAEngine::deallocateRLCQueues() SDMAEngine::deallocateRLCQueues(bool unmap_static)
{ {
for (auto doorbell: rlcInfo) { for (auto doorbell: rlcInfo) {
if (doorbell) { if (doorbell) {
unregisterRLCQueue(doorbell); unregisterRLCQueue(doorbell, unmap_static);
gpuDevice->unsetDoorbell(doorbell);
} }
} }
} }

View File

@@ -69,6 +69,7 @@ class SDMAEngine : public DmaVirtDevice
SDMAQueueDesc *_mqd; SDMAQueueDesc *_mqd;
Addr _mqd_addr = 0; Addr _mqd_addr = 0;
bool _priv = true; // Only used for RLC queues. True otherwise. bool _priv = true; // Only used for RLC queues. True otherwise.
bool _static = false;
public: public:
SDMAQueue() : _rptr(0), _wptr(0), _valid(false), _processing(false), SDMAQueue() : _rptr(0), _wptr(0), _valid(false), _processing(false),
_parent(nullptr), _ib(nullptr), _type(SDMAGfx), _mqd(nullptr) {} _parent(nullptr), _ib(nullptr), _type(SDMAGfx), _mqd(nullptr) {}
@@ -89,6 +90,7 @@ class SDMAEngine : public DmaVirtDevice
SDMAQueueDesc* getMQD() { return _mqd; } SDMAQueueDesc* getMQD() { return _mqd; }
Addr getMQDAddr() { return _mqd_addr; } Addr getMQDAddr() { return _mqd_addr; }
bool priv() { return _priv; } bool priv() { return _priv; }
bool isStatic() { return _static; }
void base(Addr value) { _base = value; } void base(Addr value) { _base = value; }
@@ -124,6 +126,7 @@ class SDMAEngine : public DmaVirtDevice
void setMQD(SDMAQueueDesc *mqd) { _mqd = mqd; } void setMQD(SDMAQueueDesc *mqd) { _mqd = mqd; }
void setMQDAddr(Addr mqdAddr) { _mqd_addr = mqdAddr; } void setMQDAddr(Addr mqdAddr) { _mqd_addr = mqdAddr; }
void setPriv(bool priv) { _priv = priv; } void setPriv(bool priv) { _priv = priv; }
void setStatic(bool isStatic) { _static = isStatic; }
}; };
/* SDMA Engine ID */ /* SDMA Engine ID */
@@ -307,9 +310,10 @@ class SDMAEngine : public DmaVirtDevice
/** /**
* Methods for RLC queues * Methods for RLC queues
*/ */
void registerRLCQueue(Addr doorbell, Addr mqdAddr, SDMAQueueDesc *mqd); void registerRLCQueue(Addr doorbell, Addr mqdAddr, SDMAQueueDesc *mqd,
void unregisterRLCQueue(Addr doorbell); bool isStatic);
void deallocateRLCQueues(); void unregisterRLCQueue(Addr doorbell, bool unmap_static);
void deallocateRLCQueues(bool unmap_static);
int cur_vmid = 0; int cur_vmid = 0;
}; };