diff --git a/src/dev/amdgpu/SConscript b/src/dev/amdgpu/SConscript index 713f0a6efe..9f8eeacd00 100644 --- a/src/dev/amdgpu/SConscript +++ b/src/dev/amdgpu/SConscript @@ -39,6 +39,7 @@ SimObject('AMDGPU.py', sim_objects=['AMDGPUDevice', 'AMDGPUInterruptHandler', tags='x86 isa') Source('amdgpu_device.cc', tags='x86 isa') +Source('amdgpu_nbio.cc', tags='x86 isa') Source('amdgpu_vm.cc', tags='x86 isa') Source('interrupt_handler.cc', tags='x86 isa') Source('memory_manager.cc', tags='x86 isa') diff --git a/src/dev/amdgpu/amdgpu_device.cc b/src/dev/amdgpu/amdgpu_device.cc index 2acf1f4af3..f58d1f7242 100644 --- a/src/dev/amdgpu/amdgpu_device.cc +++ b/src/dev/amdgpu/amdgpu_device.cc @@ -34,6 +34,7 @@ #include #include "debug/AMDGPUDevice.hh" +#include "dev/amdgpu/amdgpu_nbio.hh" #include "dev/amdgpu/amdgpu_vm.hh" #include "dev/amdgpu/interrupt_handler.hh" #include "dev/amdgpu/pm4_packet_processor.hh" @@ -129,6 +130,32 @@ AMDGPUDevice::AMDGPUDevice(const AMDGPUDeviceParams &p) pm4PktProc->setGPUDevice(this); cp->hsaPacketProc().setGPUDevice(this); cp->setGPUDevice(this); + + // Address aperture for device memory. We tell this to the driver and + // could possibly be anything, but these are the values used by hardware. + uint64_t mmhubBase = 0x8000ULL << 24; + uint64_t mmhubTop = 0x83ffULL << 24; + + // These are hardcoded register values to return what the driver expects + setRegVal(AMDGPU_MP0_SMN_C2PMSG_33, 0x80000000); + + // There are different registers for different GPUs, so we set the value + // based on the GPU type specified by the user. + if (p.device_name == "Vega10") { + setRegVal(VEGA10_FB_LOCATION_BASE, mmhubBase >> 24); + setRegVal(VEGA10_FB_LOCATION_TOP, mmhubTop >> 24); + } else if (p.device_name == "MI100") { + setRegVal(MI100_FB_LOCATION_BASE, mmhubBase >> 24); + setRegVal(MI100_FB_LOCATION_TOP, mmhubTop >> 24); + setRegVal(MI100_MEM_SIZE_REG, 0x3ff0); // 16GB of memory + } else { + panic("Unknown GPU device %s\n", p.device_name); + } + + gpuvm.setMMHUBBase(mmhubBase); + gpuvm.setMMHUBTop(mmhubTop); + + nbio.setGPUDevice(this); } void @@ -236,35 +263,25 @@ AMDGPUDevice::readFrame(PacketPtr pkt, Addr offset) * first, ignoring any writes from driver. (2) Any other address from * device backing store / abstract memory class functionally. */ - if (offset == 0xa28000) { - /* - * Handle special counter addresses in framebuffer. These counter - * addresses expect the read to return previous value + 1. - */ - if (regs.find(pkt->getAddr()) == regs.end()) { - regs[pkt->getAddr()] = 1; - } else { - regs[pkt->getAddr()]++; - } - - pkt->setUintX(regs[pkt->getAddr()], ByteOrder::little); - } else { - /* - * Read the value from device memory. This must be done functionally - * because this method is called by the PCIDevice::read method which - * is a non-timing read. - */ - RequestPtr req = std::make_shared(offset, pkt->getSize(), 0, - vramRequestorId()); - PacketPtr readPkt = Packet::createRead(req); - uint8_t *dataPtr = new uint8_t[pkt->getSize()]; - readPkt->dataDynamic(dataPtr); - - auto system = cp->shader()->gpuCmdProc.system(); - system->getDeviceMemory(readPkt)->access(readPkt); - - pkt->setUintX(readPkt->getUintX(ByteOrder::little), ByteOrder::little); + if (nbio.readFrame(pkt, offset)) { + return; } + + /* + * Read the value from device memory. This must be done functionally + * because this method is called by the PCIDevice::read method which + * is a non-timing read. + */ + RequestPtr req = std::make_shared(offset, pkt->getSize(), 0, + vramRequestorId()); + PacketPtr readPkt = Packet::createRead(req); + uint8_t *dataPtr = new uint8_t[pkt->getSize()]; + readPkt->dataDynamic(dataPtr); + + auto system = cp->shader()->gpuCmdProc.system(); + system->getDeviceMemory(readPkt)->access(readPkt); + + pkt->setUintX(readPkt->getUintX(ByteOrder::little), ByteOrder::little); } void @@ -285,8 +302,8 @@ AMDGPUDevice::readMMIO(PacketPtr pkt, Addr offset) DPRINTF(AMDGPUDevice, "Read MMIO %#lx\n", offset); mmioReader.readFromTrace(pkt, MMIO_BAR, offset); - if (regs.find(pkt->getAddr()) != regs.end()) { - uint64_t value = regs[pkt->getAddr()]; + if (regs.find(offset) != regs.end()) { + uint64_t value = regs[offset]; DPRINTF(AMDGPUDevice, "Reading what kernel wrote before: %#x\n", value); pkt->setUintX(value, ByteOrder::little); @@ -294,19 +311,8 @@ AMDGPUDevice::readMMIO(PacketPtr pkt, Addr offset) switch (aperture) { case NBIO_BASE: - switch (aperture_offset) { - // This is a PCIe status register. At some point during driver init - // the driver checks that interrupts are enabled. This is only - // checked once, so if the MMIO trace does not exactly line up with - // what the driver is doing in gem5, this may still have the first - // bit zero causing driver to fail. Therefore, we always set this - // bit to one as there is no harm to do so. - case 0x3c: // mmPCIE_DATA2 << 2 - uint32_t value = pkt->getLE() | 0x1; - DPRINTF(AMDGPUDevice, "Marking interrupts enabled: %#lx\n", value); - pkt->setLE(value); - break; - } break; + nbio.readMMIO(pkt, aperture_offset); + break; case GRBM_BASE: gpuvm.readMMIO(pkt, aperture_offset >> GRBM_OFFSET_SHIFT); break; @@ -332,6 +338,8 @@ AMDGPUDevice::writeFrame(PacketPtr pkt, Addr offset) DPRINTF(AMDGPUDevice, "GART translation %p -> %p\n", aperture_offset, gpuvm.gartTable[aperture_offset]); } + + nbio.writeFrame(pkt, offset); } void @@ -416,6 +424,10 @@ AMDGPUDevice::writeMMIO(PacketPtr pkt, Addr offset) case IH_BASE: deviceIH->writeMMIO(pkt, aperture_offset >> IH_OFFSET_SHIFT); break; + /* Write an IO space register */ + case NBIO_BASE: + nbio.writeMMIO(pkt, aperture_offset); + break; default: DPRINTF(AMDGPUDevice, "Unknown MMIO aperture for %#x\n", offset); break; @@ -489,19 +501,25 @@ AMDGPUDevice::write(PacketPtr pkt) DPRINTF(AMDGPUDevice, "PCI Write to %#lx data %#lx\n", pkt->getAddr(), data); - if (data || regs.find(pkt->getAddr()) != regs.end()) - regs[pkt->getAddr()] = data; - dispatchAccess(pkt, false); return pioDelay; } +bool +AMDGPUDevice::haveRegVal(uint32_t addr) +{ + return regs.count(addr); +} + uint32_t AMDGPUDevice::getRegVal(uint32_t addr) { + DPRINTF(AMDGPUDevice, "Getting register 0x%lx = %x\n", + addr, regs[addr]); return regs[addr]; } + void AMDGPUDevice::setRegVal(uint32_t addr, uint32_t value) { diff --git a/src/dev/amdgpu/amdgpu_device.hh b/src/dev/amdgpu/amdgpu_device.hh index 0e58f29038..cab799147e 100644 --- a/src/dev/amdgpu/amdgpu_device.hh +++ b/src/dev/amdgpu/amdgpu_device.hh @@ -36,6 +36,7 @@ #include "base/bitunion.hh" #include "dev/amdgpu/amdgpu_defines.hh" +#include "dev/amdgpu/amdgpu_nbio.hh" #include "dev/amdgpu/amdgpu_vm.hh" #include "dev/amdgpu/memory_manager.hh" #include "dev/amdgpu/mmio_reader.hh" @@ -106,6 +107,7 @@ class AMDGPUDevice : public PciDevice /** * Blocks of the GPU */ + AMDGPUNbio nbio; AMDGPUMemoryManager *gpuMemMgr; AMDGPUInterruptHandler *deviceIH; AMDGPUVM gpuvm; @@ -185,6 +187,7 @@ class AMDGPUDevice : public PciDevice * Register value getter/setter. Used by other GPU blocks to change * values from incoming driver/user packets. */ + bool haveRegVal(uint32_t addr); uint32_t getRegVal(uint32_t addr); void setRegVal(uint32_t addr, uint32_t value); diff --git a/src/dev/amdgpu/amdgpu_nbio.cc b/src/dev/amdgpu/amdgpu_nbio.cc new file mode 100644 index 0000000000..8064fd2a0e --- /dev/null +++ b/src/dev/amdgpu/amdgpu_nbio.cc @@ -0,0 +1,173 @@ +/* + * Copyright (c) 2023 Advanced Micro Devices, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "dev/amdgpu/amdgpu_nbio.hh" + +#include "debug/AMDGPUDevice.hh" +#include "dev/amdgpu/amdgpu_device.hh" +#include "mem/packet_access.hh" + +namespace gem5 +{ + +AMDGPUNbio::AMDGPUNbio() +{ + // All read-before-write MMIOs go here + triggered_reads[AMDGPU_MP0_SMN_C2PMSG_64] = 0x80000000; +} + +void +AMDGPUNbio::setGPUDevice(AMDGPUDevice *gpu_device) +{ + gpuDevice = gpu_device; +} + +void +AMDGPUNbio::readMMIO(PacketPtr pkt, Addr offset) +{ + switch (offset) { + // This is a PCIe status register. At some point during driver init + // the driver checks that interrupts are enabled. This is only + // checked once, so if the MMIO trace does not exactly line up with + // what the driver is doing in gem5, this may still have the first + // bit zero causing driver to fail. Therefore, we always set this + // bit to one as there is no harm to do so. + case AMDGPU_PCIE_DATA_REG: + { + uint32_t value = pkt->getLE() | 0x1; + DPRINTF(AMDGPUDevice, "Marking interrupts enabled: %#lx\n", value); + pkt->setLE(value); + } + break; + case AMDGPU_MM_DATA: + //pkt->setLE(regs[mm_index_reg]); + pkt->setLE(gpuDevice->getRegVal(mm_index_reg)); + break; + case VEGA10_INV_ENG17_ACK1: + case VEGA10_INV_ENG17_ACK2: + case MI100_INV_ENG17_ACK2: + case MI100_INV_ENG17_ACK3: + pkt->setLE(0x10001); + break; + case VEGA10_INV_ENG17_SEM1: + case VEGA10_INV_ENG17_SEM2: + case MI100_INV_ENG17_SEM2: + case MI100_INV_ENG17_SEM3: + pkt->setLE(0x1); + break; + // PSP responds with bit 31 set when ready + case AMDGPU_MP0_SMN_C2PMSG_35: + pkt->setLE(0x80000000); + break; + default: + if (triggered_reads.count(offset)) { + DPRINTF(AMDGPUDevice, "Found triggered read for %#x\n", offset); + pkt->setLE(triggered_reads[offset]); + } else if (gpuDevice->haveRegVal(offset)) { + uint32_t reg_val = gpuDevice->getRegVal(offset); + + DPRINTF(AMDGPUDevice, "Reading value of %#lx from regs: %#lx\n", + offset, reg_val); + + pkt->setLE(reg_val); + } else { + DPRINTF(AMDGPUDevice, "NBIO Unknown MMIO %#x (%#x)\n", offset, + pkt->getAddr()); + } + break; + } +} + +void +AMDGPUNbio::writeMMIO(PacketPtr pkt, Addr offset) +{ + if (offset == AMDGPU_MM_INDEX) { + assert(pkt->getSize() == 4); + mm_index_reg = insertBits(mm_index_reg, 31, 0, + pkt->getLE()); + } else if (offset == AMDGPU_MM_INDEX_HI) { + assert(pkt->getSize() == 4); + mm_index_reg = insertBits(mm_index_reg, 63, 32, + pkt->getLE()); + } else if (offset == AMDGPU_MM_DATA) { + DPRINTF(AMDGPUDevice, "MM write to reg %#lx data %#lx\n", + mm_index_reg, pkt->getLE()); + gpuDevice->setRegVal(AMDGPU_MM_DATA, pkt->getLE()); + } else if (offset == AMDGPU_MP0_SMN_C2PMSG_35) { + // See psp_v3_1_bootloader_load_sos in amdgpu driver code. + if (pkt->getLE() == 0x10000) { + triggered_reads[AMDGPU_MP0_SMN_C2PMSG_81] = 0xdf40b31; + } + } else if (offset == AMDGPU_MP0_SMN_C2PMSG_64) { + triggered_reads[AMDGPU_MP0_SMN_C2PMSG_64] = + 0x80000000 + pkt->getLE(); + } else if (offset == AMDGPU_MP0_SMN_C2PMSG_69) { + // PSP ring low addr + psp_ring = insertBits(psp_ring, 31, 0, pkt->getLE()); + psp_ring_listen_addr = psp_ring + - gpuDevice->getVM().getSysAddrRangeLow() + 0xc; + } else if (offset == AMDGPU_MP0_SMN_C2PMSG_70) { + // PSP ring high addr + psp_ring = insertBits(psp_ring, 63, 32, pkt->getLE()); + psp_ring_listen_addr = psp_ring + - gpuDevice->getVM().getSysAddrRangeLow() + 0xc; + } else if (offset == AMDGPU_MP0_SMN_C2PMSG_71) { + // PSP ring size + psp_ring_size = pkt->getLE(); + } +} + +bool +AMDGPUNbio::readFrame(PacketPtr pkt, Addr offset) +{ + if (offset == psp_ring_dev_addr) { + psp_ring_value++; + pkt->setUintX(psp_ring_value, ByteOrder::little); + + return true; + } + + return false; +} + +void +AMDGPUNbio::writeFrame(PacketPtr pkt, Addr offset) +{ + if (offset == psp_ring_listen_addr) { + assert(pkt->getSize() == 8); + psp_ring_dev_addr = pkt->getLE() + - gpuDevice->getVM().getSysAddrRangeLow(); + DPRINTF(AMDGPUDevice, "Setting PSP ring device address to %#lx\n", + psp_ring_dev_addr); + } +} + +} // namespace gem5 diff --git a/src/dev/amdgpu/amdgpu_nbio.hh b/src/dev/amdgpu/amdgpu_nbio.hh new file mode 100644 index 0000000000..68d174e870 --- /dev/null +++ b/src/dev/amdgpu/amdgpu_nbio.hh @@ -0,0 +1,118 @@ +/* + * Copyright (c) 2023 Advanced Micro Devices, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifndef __DEV_AMDGPU_AMDGPU_NBIO__ +#define __DEV_AMDGPU_AMDGPU_NBIO__ + +#include + +#include "base/types.hh" +#include "mem/packet.hh" + +namespace gem5 +{ + +class AMDGPUDevice; + +/** + * MMIO offsets for NBIO. NBIO handles initialization such as device + * discovery and psp functions. Values taken from: + * + * https://github.com/RadeonOpenCompute/ROCK-Kernel-Driver/blob/roc-4.3.x/ + * drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c + * + * The addresses in the file are dword addresses. Here they are converted + * to byte addresses so gem5 does not need to do any shifting. + */ +#define AMDGPU_MM_INDEX 0x00000 +#define AMDGPU_MM_INDEX_HI 0x00018 +#define AMDGPU_MM_DATA 0x00004 +#define AMDGPU_PCIE_DATA_REG 0x0003c + +// Message bus related to psp +#define AMDGPU_MP0_SMN_C2PMSG_33 0x58184 +#define AMDGPU_MP0_SMN_C2PMSG_35 0x5818c +#define AMDGPU_MP0_SMN_C2PMSG_64 0x58200 +#define AMDGPU_MP0_SMN_C2PMSG_69 0x58214 +#define AMDGPU_MP0_SMN_C2PMSG_70 0x58218 +#define AMDGPU_MP0_SMN_C2PMSG_71 0x5821c +#define AMDGPU_MP0_SMN_C2PMSG_81 0x58244 + +// Device specific invalidation engines used during initialization +#define VEGA10_INV_ENG17_ACK1 0x0a318 +#define VEGA10_INV_ENG17_ACK2 0x69c18 +#define VEGA10_INV_ENG17_SEM1 0x0a288 +#define VEGA10_INV_ENG17_SEM2 0x69b88 + +#define MI100_INV_ENG17_ACK1 0x0a318 +#define MI100_INV_ENG17_ACK2 0x6a918 +#define MI100_INV_ENG17_ACK3 0x76918 +#define MI100_INV_ENG17_SEM1 0x0a288 +#define MI100_INV_ENG17_SEM2 0x6a888 +#define MI100_INV_ENG17_SEM3 0x76888 + +class AMDGPUNbio +{ + public: + AMDGPUNbio(); + + void setGPUDevice(AMDGPUDevice *gpu_device); + + void readMMIO(PacketPtr pkt, Addr offset); + void writeMMIO(PacketPtr pkt, Addr offset); + + bool readFrame(PacketPtr pkt, Addr offset); + void writeFrame(PacketPtr pkt, Addr offset); + + private: + AMDGPUDevice *gpuDevice; + + /* + * Driver initialization sequence helper variables. + */ + uint64_t mm_index_reg = 0; + std::unordered_map triggered_reads; + + /* + * PSP variables used in initialization. + */ + Addr psp_ring = 0; + Addr psp_ring_dev_addr = 0; + Addr psp_ring_listen_addr = 0; + int psp_ring_size = 0; + int psp_ring_retval = 0; + int psp_ring_value = 0; +}; + +} // namespace gem5 + +#endif // __DEV_AMDGPU_AMDGPU_NBIO__ diff --git a/src/dev/amdgpu/amdgpu_vm.hh b/src/dev/amdgpu/amdgpu_vm.hh index 212a688716..ac35a11968 100644 --- a/src/dev/amdgpu/amdgpu_vm.hh +++ b/src/dev/amdgpu/amdgpu_vm.hh @@ -74,6 +74,13 @@ #define mmMMHUB_VM_FB_LOCATION_BASE 0x082c #define mmMMHUB_VM_FB_LOCATION_TOP 0x082d +#define VEGA10_FB_LOCATION_BASE 0x6a0b0 +#define VEGA10_FB_LOCATION_TOP 0x6a0b4 + +#define MI100_MEM_SIZE_REG 0x0378c +#define MI100_FB_LOCATION_BASE 0x6ac00 +#define MI100_FB_LOCATION_TOP 0x6ac04 + // AMD GPUs support 16 different virtual address spaces static constexpr int AMDGPU_VM_COUNT = 16; @@ -192,6 +199,9 @@ class AMDGPUVM : public Serializable Addr getMMHUBBase() { return mmhubBase; } Addr getMMHUBTop() { return mmhubTop; } + void setMMHUBBase(Addr base) { mmhubBase = base; } + void setMMHUBTop(Addr top) { mmhubTop = top; } + bool inFB(Addr vaddr) {