From d05433b3f65af251e0e4b98266f50a5ab1b8a503 Mon Sep 17 00:00:00 2001 From: Matthew Poremba Date: Sat, 21 Oct 2023 12:36:35 -0500 Subject: [PATCH] gpu-compute,dev-hsa: Send vendor packet completion signal gem5 does not currently implement any vendor-specific HSA packets. Starting in ROCm 5.5, vendor packets appear to end with a completion signal. Not sending this completion causes gem5 to hang. Since these packets are not documented anywhere and need to be reverse engineered we send the completion signal, if non-zero, and finish the packet as is the current behavior. Testing: HIP examples working on most recent ROCm release (5.7.1). Change-Id: Id0841407bec564c84f590c943f0609b17e01e14c --- src/dev/hsa/hsa_packet.hh | 8 ++++++++ src/gpu-compute/gpu_command_processor.cc | 23 ++++++++++++++++------- 2 files changed, 24 insertions(+), 7 deletions(-) diff --git a/src/dev/hsa/hsa_packet.hh b/src/dev/hsa/hsa_packet.hh index 8c7d694431..8eab8385a6 100644 --- a/src/dev/hsa/hsa_packet.hh +++ b/src/dev/hsa/hsa_packet.hh @@ -100,6 +100,14 @@ struct _hsa_barrier_or_packet_t uint64_t completion_signal; }; +struct _hsa_generic_vendor_pkt +{ + uint32_t padding[14]; + Addr completion_signal; +}; +// All HSA AQL packets are 64 bytes. Confirm that here. +static_assert(sizeof(_hsa_generic_vendor_pkt) == 64); + } // namespace gem5 #endif // __DEV_HSA_HSA_PACKET_HH__ diff --git a/src/gpu-compute/gpu_command_processor.cc b/src/gpu-compute/gpu_command_processor.cc index ecc5f1d98b..5bed04b9dd 100644 --- a/src/gpu-compute/gpu_command_processor.cc +++ b/src/gpu-compute/gpu_command_processor.cc @@ -473,18 +473,27 @@ GPUCommandProcessor::driver() */ /** - * TODO: For now we simply tell the HSAPP to finish the packet, - * however a future patch will update this method to provide - * the proper handling of any required vendor-specific packets. - * In the version of ROCm that is currently supported (1.6) - * the runtime will send packets that direct the CP to - * invalidate the GPUs caches. We do this automatically on - * each kernel launch in the CU, so this is safe for now. + * TODO: For now we simply tell the HSAPP to finish the packet and write a + * completion signal, if any. However, in the future proper handing may be + * required for vendor specific packets. + * + * In the version of ROCm that is currently supported the runtime will send + * packets that direct the CP to invalidate the GPU caches. We do this + * automatically on each kernel launch in the CU, so that situation is safe + * for now. */ void GPUCommandProcessor::submitVendorPkt(void *raw_pkt, uint32_t queue_id, Addr host_pkt_addr) { + auto vendor_pkt = (_hsa_generic_vendor_pkt *)raw_pkt; + + if (vendor_pkt->completion_signal) { + sendCompletionSignal(vendor_pkt->completion_signal); + } + + warn("Ignoring vendor packet\n"); + hsaPP->finishPkt(raw_pkt, queue_id); }