diff --git a/src/dev/hsa/hsa_signal.hh b/src/dev/hsa/hsa_signal.hh index 6acbcb7e1b..7d1f316f04 100644 --- a/src/dev/hsa/hsa_signal.hh +++ b/src/dev/hsa/hsa_signal.hh @@ -69,6 +69,12 @@ typedef struct amd_signal_s uint32_t reserved3[2]; } amd_signal_t; +typedef struct +{ + uint64_t start_ts; + uint64_t end_ts; +} amd_event_t; + } // namespace gem5 #endif // DEV_HSA_HSA_SIGNAL_H diff --git a/src/gpu-compute/gpu_command_processor.cc b/src/gpu-compute/gpu_command_processor.cc index db69ad5cbd..ecc5f1d98b 100644 --- a/src/gpu-compute/gpu_command_processor.cc +++ b/src/gpu-compute/gpu_command_processor.cc @@ -248,6 +248,10 @@ GPUCommandProcessor::submitDispatchPkt(void *raw_pkt, uint32_t queue_id, initABI(task); ++dynamic_task_id; + + // The driver expects the start time to be in ns + Tick start_ts = curTick() / sim_clock::as_int::ns; + dispatchStartTime.insert({disp_pkt->completion_signal, start_ts}); } void @@ -280,16 +284,6 @@ GPUCommandProcessor::sendCompletionSignal(Addr signal_handle) void GPUCommandProcessor::updateHsaSignalAsync(Addr signal_handle, int64_t diff) { - Addr value_addr = getHsaSignalValueAddr(signal_handle); - - uint64_t *signalValue = new uint64_t; - auto cb = new DmaVirtCallback( - [ = ] (const uint64_t &) - { updateHsaSignalData(value_addr, diff, signalValue); }); - dmaReadVirt(value_addr, sizeof(uint64_t), cb, (void *)signalValue); - DPRINTF(GPUCommandProc, "updateHsaSignalAsync reading value addr %lx\n", - value_addr); - Addr mailbox_addr = getHsaSignalMailboxAddr(signal_handle); uint64_t *mailboxValue = new uint64_t; auto cb2 = new DmaVirtCallback( @@ -300,20 +294,6 @@ GPUCommandProcessor::updateHsaSignalAsync(Addr signal_handle, int64_t diff) mailbox_addr); } -void -GPUCommandProcessor::updateHsaSignalData(Addr value_addr, int64_t diff, - uint64_t *prev_value) -{ - // Reuse the value allocated for the read - DPRINTF(GPUCommandProc, "updateHsaSignalData read %ld, writing %ld\n", - *prev_value, *prev_value + diff); - *prev_value += diff; - auto cb = new DmaVirtCallback( - [ = ] (const uint64_t &) - { updateHsaSignalDone(prev_value); }); - dmaWriteVirt(value_addr, sizeof(uint64_t), cb, (void *)prev_value); -} - void GPUCommandProcessor::updateHsaMailboxData(Addr signal_handle, uint64_t *mailbox_value) @@ -331,6 +311,20 @@ GPUCommandProcessor::updateHsaMailboxData(Addr signal_handle, dmaReadVirt(event_addr, sizeof(uint64_t), cb, (void *)mailbox_value); } else { delete mailbox_value; + + Addr ts_addr = signal_handle + offsetof(amd_signal_t, start_ts); + + amd_event_t *event_ts = new amd_event_t; + event_ts->start_ts = dispatchStartTime[signal_handle]; + event_ts->end_ts = curTick() / sim_clock::as_int::ns; + auto cb = new DmaVirtCallback( + [ = ] (const uint64_t &) + { updateHsaEventTs(signal_handle, event_ts); }); + dmaWriteVirt(ts_addr, sizeof(amd_event_t), cb, (void *)event_ts); + DPRINTF(GPUCommandProc, "updateHsaMailboxData reading timestamp addr " + "%lx\n", ts_addr); + + dispatchStartTime.erase(signal_handle); } } @@ -346,6 +340,52 @@ GPUCommandProcessor::updateHsaEventData(Addr signal_handle, [ = ] (const uint64_t &) { updateHsaSignalDone(event_value); }, *event_value); dmaWriteVirt(mailbox_addr, sizeof(uint64_t), cb, &cb->dmaBuffer, 0); + + Addr ts_addr = signal_handle + offsetof(amd_signal_t, start_ts); + + amd_event_t *event_ts = new amd_event_t; + event_ts->start_ts = dispatchStartTime[signal_handle]; + event_ts->end_ts = curTick() / sim_clock::as_int::ns; + auto cb2 = new DmaVirtCallback( + [ = ] (const uint64_t &) + { updateHsaEventTs(signal_handle, event_ts); }); + dmaWriteVirt(ts_addr, sizeof(amd_event_t), cb2, (void *)event_ts); + DPRINTF(GPUCommandProc, "updateHsaEventData reading timestamp addr %lx\n", + ts_addr); + + dispatchStartTime.erase(signal_handle); +} + +void +GPUCommandProcessor::updateHsaEventTs(Addr signal_handle, + amd_event_t *ts) +{ + delete ts; + + Addr value_addr = getHsaSignalValueAddr(signal_handle); + int64_t diff = -1; + + uint64_t *signalValue = new uint64_t; + auto cb = new DmaVirtCallback( + [ = ] (const uint64_t &) + { updateHsaSignalData(value_addr, diff, signalValue); }); + dmaReadVirt(value_addr, sizeof(uint64_t), cb, (void *)signalValue); + DPRINTF(GPUCommandProc, "updateHsaSignalAsync reading value addr %lx\n", + value_addr); +} + +void +GPUCommandProcessor::updateHsaSignalData(Addr value_addr, int64_t diff, + uint64_t *prev_value) +{ + // Reuse the value allocated for the read + DPRINTF(GPUCommandProc, "updateHsaSignalData read %ld, writing %ld\n", + *prev_value, *prev_value + diff); + *prev_value += diff; + auto cb = new DmaVirtCallback( + [ = ] (const uint64_t &) + { updateHsaSignalDone(prev_value); }); + dmaWriteVirt(value_addr, sizeof(uint64_t), cb, (void *)prev_value); } void diff --git a/src/gpu-compute/gpu_command_processor.hh b/src/gpu-compute/gpu_command_processor.hh index 10407b9f93..f6783834eb 100644 --- a/src/gpu-compute/gpu_command_processor.hh +++ b/src/gpu-compute/gpu_command_processor.hh @@ -117,6 +117,7 @@ class GPUCommandProcessor : public DmaVirtDevice void updateHsaSignalDone(uint64_t *signal_value); void updateHsaMailboxData(Addr signal_handle, uint64_t *mailbox_value); void updateHsaEventData(Addr signal_handle, uint64_t *event_value); + void updateHsaEventTs(Addr signal_handle, amd_event_t *event_value); uint64_t functionalReadHsaSignal(Addr signal_handle); @@ -148,6 +149,9 @@ class GPUCommandProcessor : public DmaVirtDevice HSAPacketProcessor *hsaPP; TranslationGenPtr translate(Addr vaddr, Addr size) override; + // Keep track of start times for task dispatches. + std::unordered_map dispatchStartTime; + /** * Perform a DMA read of the read_dispatch_id_field_base_byte_offset * field, which follows directly after the read_dispatch_id (the read