diff --git a/src/dev/amdgpu/amdgpu_device.cc b/src/dev/amdgpu/amdgpu_device.cc index 1b81c4d0b2..b25ffbf79f 100644 --- a/src/dev/amdgpu/amdgpu_device.cc +++ b/src/dev/amdgpu/amdgpu_device.cc @@ -466,7 +466,17 @@ AMDGPUDevice::writeDoorbell(PacketPtr pkt, Addr offset) panic("Write to unkown queue type!"); } } else { - warn("Unknown doorbell offset: %lx\n", offset); + warn("Unknown doorbell offset: %lx. Saving to pending doorbells.\n", + offset); + + // We have to ACK the PCI packet immediately, so create a copy of the + // packet here to send again. + RequestPtr pending_req(pkt->req); + PacketPtr pending_pkt = Packet::createWrite(pending_req); + uint8_t *pending_data = new uint8_t[pkt->getSize()]; + pending_pkt->dataDynamic(pending_data); + + pendingDoorbellPkts.emplace(offset, pending_pkt); } } @@ -589,6 +599,17 @@ AMDGPUDevice::write(PacketPtr pkt) return pioDelay; } +void +AMDGPUDevice::processPendingDoorbells(uint32_t offset) +{ + if (pendingDoorbellPkts.count(offset)) { + DPRINTF(AMDGPUDevice, "Sending pending doorbell %x\n", offset); + writeDoorbell(pendingDoorbellPkts[offset], offset); + delete pendingDoorbellPkts[offset]; + pendingDoorbellPkts.erase(offset); + } +} + bool AMDGPUDevice::haveRegVal(uint32_t addr) { @@ -812,6 +833,14 @@ AMDGPUDevice::deallocateAllQueues() for (auto& it : sdmaEngs) { it.second->deallocateRLCQueues(); } + + // "All" queues implicitly refers to all user queues. User queues begin at + // doorbell address 0x4000, so unmap any queue at or above that address. + for (auto [offset, vmid] : doorbellVMIDMap) { + if (offset >= 0x4000) { + doorbells.erase(offset); + } + } } void diff --git a/src/dev/amdgpu/amdgpu_device.hh b/src/dev/amdgpu/amdgpu_device.hh index 7f69ec19f6..b6b6e2a81a 100644 --- a/src/dev/amdgpu/amdgpu_device.hh +++ b/src/dev/amdgpu/amdgpu_device.hh @@ -90,6 +90,7 @@ class AMDGPUDevice : public PciDevice using GPURegMap = std::unordered_map; GPURegMap regs; std::unordered_map doorbells; + std::unordered_map pendingDoorbellPkts; /** * VGA ROM methods @@ -187,6 +188,7 @@ class AMDGPUDevice : public PciDevice * Set handles to GPU blocks. */ void setDoorbellType(uint32_t offset, QueueType qt); + void processPendingDoorbells(uint32_t offset); void setSDMAEngine(Addr offset, SDMAEngine *eng); /** diff --git a/src/dev/amdgpu/pm4_packet_processor.cc b/src/dev/amdgpu/pm4_packet_processor.cc index fdb6f9d7ce..352af400b0 100644 --- a/src/dev/amdgpu/pm4_packet_processor.cc +++ b/src/dev/amdgpu/pm4_packet_processor.cc @@ -384,7 +384,10 @@ PM4PacketProcessor::mapQueues(PM4Queue *q, PM4MapQueues *pkt) "Mapping mqd from %p %p (vmid %d - last vmid %d).\n", addr, pkt->mqdAddr, pkt->vmid, gpuDevice->lastVMID()); - gpuDevice->mapDoorbellToVMID(pkt->doorbellOffset, + // The doorbellOffset is a dword address. We shift by two / multiply + // by four to get the byte address to match doorbell addresses in + // the GPU device. + gpuDevice->mapDoorbellToVMID(pkt->doorbellOffset << 2, gpuDevice->lastVMID()); QueueDesc *mqd = new QueueDesc(); @@ -444,6 +447,8 @@ PM4PacketProcessor::processMQD(PM4MapQueues *pkt, PM4Queue *q, Addr addr, DPRINTF(PM4PacketProcessor, "PM4 mqd read completed, base %p, mqd %p, " "hqdAQL %d.\n", mqd->base, mqd->mqdBase, mqd->aql); + + gpuDevice->processPendingDoorbells(offset); } void @@ -472,6 +477,8 @@ PM4PacketProcessor::processSDMAMQD(PM4MapQueues *pkt, PM4Queue *q, Addr addr, // Register doorbell with GPU device gpuDevice->setSDMAEngine(pkt->doorbellOffset << 2, sdma_eng); gpuDevice->setDoorbellType(pkt->doorbellOffset << 2, RLC); + + gpuDevice->processPendingDoorbells(pkt->doorbellOffset << 2); } void @@ -576,6 +583,7 @@ PM4PacketProcessor::unmapQueues(PM4Queue *q, PM4UnmapQueues *pkt) gpuDevice->deallocatePasid(pkt->pasid); break; case 2: + panic("Unmapping queue selection 2 unimplemented\n"); break; case 3: { auto &hsa_pp = gpuDevice->CP()->hsaPacketProc();