diff --git a/src/dev/amdgpu/SConscript b/src/dev/amdgpu/SConscript index 9f8eeacd00..428d1c56bc 100644 --- a/src/dev/amdgpu/SConscript +++ b/src/dev/amdgpu/SConscript @@ -50,6 +50,7 @@ Source('system_hub.cc', tags='x86 isa') DebugFlag('AMDGPUDevice', tags='x86 isa') DebugFlag('AMDGPUMem', tags='x86 isa') +DebugFlag('AMDGPUSystemHub', tags='x86 isa') DebugFlag('PM4PacketProcessor', tags='x86 isa') DebugFlag('SDMAEngine', tags='x86 isa') DebugFlag('SDMAData', tags='x86 isa') diff --git a/src/dev/amdgpu/system_hub.cc b/src/dev/amdgpu/system_hub.cc index 7a252ea0fa..892a7c4535 100644 --- a/src/dev/amdgpu/system_hub.cc +++ b/src/dev/amdgpu/system_hub.cc @@ -31,6 +31,8 @@ #include "dev/amdgpu/system_hub.hh" +#include "debug/AMDGPUSystemHub.hh" +#include "mem/packet_access.hh" #include "mem/port.hh" namespace gem5 @@ -39,16 +41,92 @@ namespace gem5 void AMDGPUSystemHub::sendRequest(PacketPtr pkt, Event *callback) { - ResponseEvent *dmaRespEvent = new ResponseEvent(callback); - Tick delay = 0; + // Some requests, in particular atomics, need to be sent in order + // to receive the correct values. If there is an atomic in progress + // we must block it until that request is complete. This is overly + // conservative and blocks reads/writes but this situation is rare + // so it should not impact simulated performance. + DeferredReq this_req(pkt, callback); + outstandingReqs[pkt->getAddr()].push_back(this_req); + + if (outstandingReqs[pkt->getAddr()].size () > 1) { + // There is another request in progress, Delay this one. + DPRINTF(AMDGPUSystemHub, "SystemHub deferring request for %#lx\n", + pkt->getAddr()); + } else { + // No other requests, we can send immediately. + sendDeferredRequest(this_req); + } +} + +void +AMDGPUSystemHub::sendDeferredRequest(DeferredReq& deferredReq) +{ + PacketPtr pkt = deferredReq.first; + Event *callback = deferredReq.second; + Tick delay = 0; + std::string req_type; + + if (pkt->isAtomicOp()) { + AtomicResponseEvent *atomicRespEvent = + new AtomicResponseEvent(*this, callback, pkt); + + // First read the value. The response event will do the atomic/write + // This places the current value in the packet, which is correct since + // atomics return the value prior to performing the atomic. + dmaRead(pkt->getAddr(), pkt->getSize(), atomicRespEvent, + pkt->getPtr(), 0, 0, delay); + + req_type = "Atomic"; + } else if (pkt->isWrite()) { + ResponseEvent *dmaRespEvent = + new ResponseEvent(*this, callback, pkt); - if (pkt->isWrite()) { dmaWrite(pkt->getAddr(), pkt->getSize(), dmaRespEvent, pkt->getPtr(), 0, 0, delay); + + req_type = "Write"; } else { + ResponseEvent *dmaRespEvent = + new ResponseEvent(*this, callback, pkt); + assert(pkt->isRead()); dmaRead(pkt->getAddr(), pkt->getSize(), dmaRespEvent, pkt->getPtr(), 0, 0, delay); + + req_type = "Read"; + } + + DPRINTF(AMDGPUSystemHub, "SystemHub %s request for %#lx size %d\n", + req_type.c_str(), pkt->getAddr(), pkt->getSize()); +} + +void +AMDGPUSystemHub::sendNextRequest(Addr addr, const PacketPtr donePkt) +{ + // Remove our request + assert(outstandingReqs.count(addr)); + + [[maybe_unused]] DeferredReq& frontPkt = outstandingReqs[addr].front(); + assert(frontPkt.first == donePkt); + + outstandingReqs[addr].pop_front(); + + // If there are no more requests this can be removed from the map. + // Otherwise issue the next request in the list + if (outstandingReqs[addr].empty()) { + DPRINTF(AMDGPUSystemHub, "SystemHub done with packets for addr %#lx\n", + donePkt->getAddr()); + + outstandingReqs.erase(addr); + } else { + DeferredReq& nextPkt = outstandingReqs[addr].front(); + + DPRINTF(AMDGPUSystemHub, "SystemHub sending deferred request for addr" + " %#lx size %d\n", nextPkt.first->getAddr(), + nextPkt.first->getSize()); + + sendDeferredRequest(nextPkt); } } @@ -57,8 +135,9 @@ AMDGPUSystemHub::dmaResponse(PacketPtr pkt) { } -AMDGPUSystemHub::ResponseEvent::ResponseEvent(Event *_callback) - : callback(_callback) +AMDGPUSystemHub::ResponseEvent::ResponseEvent( + AMDGPUSystemHub& _hub, Event *_callback, PacketPtr _pkt) + : systemHub(_hub), callback(_callback), pkt(_pkt) { // Delete this event after process is called setFlags(Event::AutoDelete); @@ -67,9 +146,62 @@ AMDGPUSystemHub::ResponseEvent::ResponseEvent(Event *_callback) void AMDGPUSystemHub::ResponseEvent::process() { + DPRINTF(AMDGPUSystemHub, "SystemHub response for addr %#lx size %d\n", + pkt->getAddr(), pkt->getSize()); + + systemHub.sendNextRequest(pkt->getAddr(), pkt); + callback->process(); } +AMDGPUSystemHub::AtomicResponseEvent::AtomicResponseEvent( + AMDGPUSystemHub& _hub, Event *_callback, PacketPtr _pkt) + : systemHub(_hub), callback(_callback), pkt(_pkt) +{ + // Delete this event after process is called + setFlags(Event::AutoDelete); +} + +void +AMDGPUSystemHub::AtomicResponseEvent::process() +{ + // Make a second response with the original sender's callback + ResponseEvent *dmaRespEvent = new ResponseEvent(systemHub, callback, pkt); + Tick delay = 0; + + // Create a new write packet which will be modifed then written + RequestPtr write_req = + std::make_shared(pkt->getAddr(), pkt->getSize(), 0, + pkt->requestorId()); + + PacketPtr write_pkt = Packet::createWrite(write_req); + uint8_t *write_data = new uint8_t[pkt->getSize()]; + std::memcpy(write_data, pkt->getPtr(), pkt->getSize()); + write_pkt->dataDynamic(write_data); + + // Perform the atomic on the write packet data. The atomic op is not + // copied from the original packet, so use the original packet. + assert(pkt->isAtomicOp()); + (*pkt->getAtomicOp())(write_pkt->getPtr()); + + // Write back the new value. The atomic is not considered done until + // this packet's response event is triggered. + systemHub.dmaWrite(write_pkt->getAddr(), write_pkt->getSize(), + dmaRespEvent, write_pkt->getPtr(), 0, 0, delay); + + // Atomics from the GPU are at most 64-bit and usually 32-bit. + // We can take a peek at the data for debugging purposes. + [[maybe_unused]] uint64_t req_data = 0x12345678; + if (write_pkt->getSize() == 8) { + req_data = write_pkt->getLE(); + } else if (pkt->getSize() == 4) { + req_data = write_pkt->getLE(); + } + + DPRINTF(AMDGPUSystemHub, "SystemHub atomic %#lx writing %lx size %d\n", + write_pkt->getAddr(), req_data, write_pkt->getSize()); +} + AddrRangeList AMDGPUSystemHub::getAddrRanges() const { diff --git a/src/dev/amdgpu/system_hub.hh b/src/dev/amdgpu/system_hub.hh index 0b48c3bc01..7955f5e694 100644 --- a/src/dev/amdgpu/system_hub.hh +++ b/src/dev/amdgpu/system_hub.hh @@ -63,16 +63,37 @@ class AMDGPUSystemHub : public DmaDevice AddrRangeList getAddrRanges() const override; private: + typedef std::pair DeferredReq; + typedef std::list DeferredReqList; + std::unordered_map outstandingReqs; + + void sendNextRequest(Addr addr, const PacketPtr donePkt); + void sendDeferredRequest(DeferredReq& deferredReq); class ResponseEvent : public Event { - Event *callback; + AMDGPUSystemHub &systemHub; + Event *callback; + PacketPtr pkt; - public: - ResponseEvent(Event *_callback); + public: + ResponseEvent(AMDGPUSystemHub& _hub, + Event *_callback, PacketPtr _pkt); void process(); + }; + class AtomicResponseEvent : public Event + { + AMDGPUSystemHub &systemHub; + Event *callback; + PacketPtr pkt; + + public: + AtomicResponseEvent(AMDGPUSystemHub& _hub, + Event *_callback, PacketPtr _pkt); + + void process(); }; };