diff --git a/src/dev/amdgpu/SConscript b/src/dev/amdgpu/SConscript index 428d1c56bc..b8ba454d48 100644 --- a/src/dev/amdgpu/SConscript +++ b/src/dev/amdgpu/SConscript @@ -39,6 +39,7 @@ SimObject('AMDGPU.py', sim_objects=['AMDGPUDevice', 'AMDGPUInterruptHandler', tags='x86 isa') Source('amdgpu_device.cc', tags='x86 isa') +Source('amdgpu_gfx.cc', tags='x86 isa') Source('amdgpu_nbio.cc', tags='x86 isa') Source('amdgpu_vm.cc', tags='x86 isa') Source('interrupt_handler.cc', tags='x86 isa') diff --git a/src/dev/amdgpu/amdgpu_device.cc b/src/dev/amdgpu/amdgpu_device.cc index 5cc8df424f..1b81c4d0b2 100644 --- a/src/dev/amdgpu/amdgpu_device.cc +++ b/src/dev/amdgpu/amdgpu_device.cc @@ -379,6 +379,9 @@ AMDGPUDevice::readMMIO(PacketPtr pkt, Addr offset) case GRBM_BASE: gpuvm.readMMIO(pkt, aperture_offset >> GRBM_OFFSET_SHIFT); break; + case GFX_BASE: + gfx.readMMIO(pkt, aperture_offset); + break; case MMHUB_BASE: gpuvm.readMMIO(pkt, aperture_offset >> MMHUB_OFFSET_SHIFT); break; @@ -507,6 +510,9 @@ AMDGPUDevice::writeMMIO(PacketPtr pkt, Addr offset) case NBIO_BASE: nbio.writeMMIO(pkt, aperture_offset); break; + case GFX_BASE: + gfx.writeMMIO(pkt, aperture_offset); + break; default: DPRINTF(AMDGPUDevice, "Unknown MMIO aperture for %#x\n", offset); break; diff --git a/src/dev/amdgpu/amdgpu_device.hh b/src/dev/amdgpu/amdgpu_device.hh index 56ed2f4fa8..7f69ec19f6 100644 --- a/src/dev/amdgpu/amdgpu_device.hh +++ b/src/dev/amdgpu/amdgpu_device.hh @@ -36,6 +36,7 @@ #include "base/bitunion.hh" #include "dev/amdgpu/amdgpu_defines.hh" +#include "dev/amdgpu/amdgpu_gfx.hh" #include "dev/amdgpu/amdgpu_nbio.hh" #include "dev/amdgpu/amdgpu_vm.hh" #include "dev/amdgpu/memory_manager.hh" @@ -109,6 +110,7 @@ class AMDGPUDevice : public PciDevice * Blocks of the GPU */ AMDGPUNbio nbio; + AMDGPUGfx gfx; AMDGPUMemoryManager *gpuMemMgr; AMDGPUInterruptHandler *deviceIH; AMDGPUVM gpuvm; diff --git a/src/dev/amdgpu/amdgpu_gfx.cc b/src/dev/amdgpu/amdgpu_gfx.cc new file mode 100644 index 0000000000..3d5b274b86 --- /dev/null +++ b/src/dev/amdgpu/amdgpu_gfx.cc @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2023 Advanced Micro Devices, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "dev/amdgpu/amdgpu_gfx.hh" + +#include "mem/packet_access.hh" +#include "sim/core.hh" + +namespace gem5 +{ + +void +AMDGPUGfx::readMMIO(PacketPtr pkt, Addr offset) +{ + switch (offset) { + case AMDGPU_MM_RLC_GPU_CLOCK_COUNT_LSB: + pkt->setLE(captured_clock_count); + break; + case AMDGPU_MM_RLC_GPU_CLOCK_COUNT_MSB: + pkt->setLE(captured_clock_count >> 32); + break; + default: + break; + } +} + +void +AMDGPUGfx::writeMMIO(PacketPtr pkt, Addr offset) +{ + switch (offset) { + case AMDGPU_MM_RLC_CAPTURE_GPU_CLOCK_COUNT: + // Use gem5 Ticks in nanoseconds are the counter. The first capture + // is expected to return zero. + if (captured_clock_count == 1) { + captured_clock_count = 0; + } else { + captured_clock_count = curTick() / sim_clock::as_int::ns; + } + break; + default: + break; + } +} + +} // namespace gem5 diff --git a/src/dev/amdgpu/amdgpu_gfx.hh b/src/dev/amdgpu/amdgpu_gfx.hh new file mode 100644 index 0000000000..c32b8624cf --- /dev/null +++ b/src/dev/amdgpu/amdgpu_gfx.hh @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2023 Advanced Micro Devices, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __DEV_AMDGPU_AMDGPU_GFX_HH__ +#define __DEV_AMDGPU_AMDGPU_GFX_HH__ + +#include "base/types.hh" +#include "mem/packet.hh" + +/** + * MMIO offsets for GFX. This class handles MMIO reads/writes to the GFX_BASE + * aperture which are generally read/written by the gfx driver source here: + * + * drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c + * https://github.com/RadeonOpenCompute/ROCK-Kernel-Driver/blob/master/ + * drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c + * + * The MMIO addresses in the file are dword addresses. Here they are converted + * to byte addresses so gem5 does not need to shift the values. + */ + +// Registers used to read GPU clock count used in profiling +#define AMDGPU_MM_RLC_GPU_CLOCK_COUNT_LSB 0x13090 +#define AMDGPU_MM_RLC_GPU_CLOCK_COUNT_MSB 0x13094 +#define AMDGPU_MM_RLC_CAPTURE_GPU_CLOCK_COUNT 0x13098 + +namespace gem5 +{ + +class AMDGPUGfx +{ + public: + AMDGPUGfx() { } + + void readMMIO(PacketPtr pkt, Addr offset); + void writeMMIO(PacketPtr pkt, Addr offset); + + private: + /* + * GPU clock count at the time capture MMIO is received. + */ + uint64_t captured_clock_count = 1; +}; + +} // namespace gem5 + +#endif // __DEV_AMDGPU_AMDGPU_GFX_HH__ diff --git a/src/dev/hsa/hsa_signal.hh b/src/dev/hsa/hsa_signal.hh index 6acbcb7e1b..7d1f316f04 100644 --- a/src/dev/hsa/hsa_signal.hh +++ b/src/dev/hsa/hsa_signal.hh @@ -69,6 +69,12 @@ typedef struct amd_signal_s uint32_t reserved3[2]; } amd_signal_t; +typedef struct +{ + uint64_t start_ts; + uint64_t end_ts; +} amd_event_t; + } // namespace gem5 #endif // DEV_HSA_HSA_SIGNAL_H diff --git a/src/gpu-compute/gpu_command_processor.cc b/src/gpu-compute/gpu_command_processor.cc index db69ad5cbd..ecc5f1d98b 100644 --- a/src/gpu-compute/gpu_command_processor.cc +++ b/src/gpu-compute/gpu_command_processor.cc @@ -248,6 +248,10 @@ GPUCommandProcessor::submitDispatchPkt(void *raw_pkt, uint32_t queue_id, initABI(task); ++dynamic_task_id; + + // The driver expects the start time to be in ns + Tick start_ts = curTick() / sim_clock::as_int::ns; + dispatchStartTime.insert({disp_pkt->completion_signal, start_ts}); } void @@ -280,16 +284,6 @@ GPUCommandProcessor::sendCompletionSignal(Addr signal_handle) void GPUCommandProcessor::updateHsaSignalAsync(Addr signal_handle, int64_t diff) { - Addr value_addr = getHsaSignalValueAddr(signal_handle); - - uint64_t *signalValue = new uint64_t; - auto cb = new DmaVirtCallback( - [ = ] (const uint64_t &) - { updateHsaSignalData(value_addr, diff, signalValue); }); - dmaReadVirt(value_addr, sizeof(uint64_t), cb, (void *)signalValue); - DPRINTF(GPUCommandProc, "updateHsaSignalAsync reading value addr %lx\n", - value_addr); - Addr mailbox_addr = getHsaSignalMailboxAddr(signal_handle); uint64_t *mailboxValue = new uint64_t; auto cb2 = new DmaVirtCallback( @@ -300,20 +294,6 @@ GPUCommandProcessor::updateHsaSignalAsync(Addr signal_handle, int64_t diff) mailbox_addr); } -void -GPUCommandProcessor::updateHsaSignalData(Addr value_addr, int64_t diff, - uint64_t *prev_value) -{ - // Reuse the value allocated for the read - DPRINTF(GPUCommandProc, "updateHsaSignalData read %ld, writing %ld\n", - *prev_value, *prev_value + diff); - *prev_value += diff; - auto cb = new DmaVirtCallback( - [ = ] (const uint64_t &) - { updateHsaSignalDone(prev_value); }); - dmaWriteVirt(value_addr, sizeof(uint64_t), cb, (void *)prev_value); -} - void GPUCommandProcessor::updateHsaMailboxData(Addr signal_handle, uint64_t *mailbox_value) @@ -331,6 +311,20 @@ GPUCommandProcessor::updateHsaMailboxData(Addr signal_handle, dmaReadVirt(event_addr, sizeof(uint64_t), cb, (void *)mailbox_value); } else { delete mailbox_value; + + Addr ts_addr = signal_handle + offsetof(amd_signal_t, start_ts); + + amd_event_t *event_ts = new amd_event_t; + event_ts->start_ts = dispatchStartTime[signal_handle]; + event_ts->end_ts = curTick() / sim_clock::as_int::ns; + auto cb = new DmaVirtCallback( + [ = ] (const uint64_t &) + { updateHsaEventTs(signal_handle, event_ts); }); + dmaWriteVirt(ts_addr, sizeof(amd_event_t), cb, (void *)event_ts); + DPRINTF(GPUCommandProc, "updateHsaMailboxData reading timestamp addr " + "%lx\n", ts_addr); + + dispatchStartTime.erase(signal_handle); } } @@ -346,6 +340,52 @@ GPUCommandProcessor::updateHsaEventData(Addr signal_handle, [ = ] (const uint64_t &) { updateHsaSignalDone(event_value); }, *event_value); dmaWriteVirt(mailbox_addr, sizeof(uint64_t), cb, &cb->dmaBuffer, 0); + + Addr ts_addr = signal_handle + offsetof(amd_signal_t, start_ts); + + amd_event_t *event_ts = new amd_event_t; + event_ts->start_ts = dispatchStartTime[signal_handle]; + event_ts->end_ts = curTick() / sim_clock::as_int::ns; + auto cb2 = new DmaVirtCallback( + [ = ] (const uint64_t &) + { updateHsaEventTs(signal_handle, event_ts); }); + dmaWriteVirt(ts_addr, sizeof(amd_event_t), cb2, (void *)event_ts); + DPRINTF(GPUCommandProc, "updateHsaEventData reading timestamp addr %lx\n", + ts_addr); + + dispatchStartTime.erase(signal_handle); +} + +void +GPUCommandProcessor::updateHsaEventTs(Addr signal_handle, + amd_event_t *ts) +{ + delete ts; + + Addr value_addr = getHsaSignalValueAddr(signal_handle); + int64_t diff = -1; + + uint64_t *signalValue = new uint64_t; + auto cb = new DmaVirtCallback( + [ = ] (const uint64_t &) + { updateHsaSignalData(value_addr, diff, signalValue); }); + dmaReadVirt(value_addr, sizeof(uint64_t), cb, (void *)signalValue); + DPRINTF(GPUCommandProc, "updateHsaSignalAsync reading value addr %lx\n", + value_addr); +} + +void +GPUCommandProcessor::updateHsaSignalData(Addr value_addr, int64_t diff, + uint64_t *prev_value) +{ + // Reuse the value allocated for the read + DPRINTF(GPUCommandProc, "updateHsaSignalData read %ld, writing %ld\n", + *prev_value, *prev_value + diff); + *prev_value += diff; + auto cb = new DmaVirtCallback( + [ = ] (const uint64_t &) + { updateHsaSignalDone(prev_value); }); + dmaWriteVirt(value_addr, sizeof(uint64_t), cb, (void *)prev_value); } void diff --git a/src/gpu-compute/gpu_command_processor.hh b/src/gpu-compute/gpu_command_processor.hh index 10407b9f93..f6783834eb 100644 --- a/src/gpu-compute/gpu_command_processor.hh +++ b/src/gpu-compute/gpu_command_processor.hh @@ -117,6 +117,7 @@ class GPUCommandProcessor : public DmaVirtDevice void updateHsaSignalDone(uint64_t *signal_value); void updateHsaMailboxData(Addr signal_handle, uint64_t *mailbox_value); void updateHsaEventData(Addr signal_handle, uint64_t *event_value); + void updateHsaEventTs(Addr signal_handle, amd_event_t *event_value); uint64_t functionalReadHsaSignal(Addr signal_handle); @@ -148,6 +149,9 @@ class GPUCommandProcessor : public DmaVirtDevice HSAPacketProcessor *hsaPP; TranslationGenPtr translate(Addr vaddr, Addr size) override; + // Keep track of start times for task dispatches. + std::unordered_map dispatchStartTime; + /** * Perform a DMA read of the read_dispatch_id_field_base_byte_offset * field, which follows directly after the read_dispatch_id (the read