From 8b4c38302f09a95e0c42a1524bf9c7dede081a83 Mon Sep 17 00:00:00 2001 From: Matthew Poremba Date: Wed, 23 Aug 2023 13:20:42 -0500 Subject: [PATCH 1/2] dev: PCI: Fix PCI express capability union The capabilities for PCI express is a struct, instead of a union, like the other capability unions. A union is used here to provide access to the ordinal data values when reading/writing an offset while simultaneously providing human readable field values that can be set when writing the code. This commit changes it to union which is likely should be. Nothing appears to be using this union yet so it is likely an oversight. Change-Id: I85fe7cc62914525c70fd7a5946d725ed308f8775 --- src/dev/pci/pcireg.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/dev/pci/pcireg.h b/src/dev/pci/pcireg.h index ab5fea540c..e7794e4dc2 100644 --- a/src/dev/pci/pcireg.h +++ b/src/dev/pci/pcireg.h @@ -326,7 +326,7 @@ struct MSIXPbaEntry * Defines the PCI Express capability register and its associated bitfields * for a PCIe device. */ -struct PXCAP +union PXCAP { uint8_t data[48]; struct From addba01d294a5ea14b9b703ab830e3420db26915 Mon Sep 17 00:00:00 2001 From: Matthew Poremba Date: Wed, 23 Aug 2023 13:23:58 -0500 Subject: [PATCH 2/2] configs,dev-amdgpu: Add PCI express capability info The ROCm stack requires PCI express atomics. Currently the first PCI CapabilityPtr does not point to anything, which signals to the OS (Linux) that this is an early generation PCI device. As PCI express atomics were introduced later, the CapabilityPtr needs to point to at least a PCI express capability structure. This capability is defined as 0x10 in Linux. We additionally set the PCI atomic based bits and implement device specific PCI configuration space reads and writes to the amdgpu device. With this commit, the output of simulation when loading the amdgpu driver no longer outputs "PCIE atomics not supported". Further, an application which uses PCIe atomics (PyTorch with a reduce_sum kernel) now makes further progress. Change-Id: I5e3866979659a2657f558941106ef65c2f4d9988 --- configs/example/gpufs/system/amdgpu.py | 23 +++++++++ src/dev/amdgpu/amdgpu_device.cc | 67 +++++++++++++++++++++++--- 2 files changed, 83 insertions(+), 7 deletions(-) diff --git a/configs/example/gpufs/system/amdgpu.py b/configs/example/gpufs/system/amdgpu.py index 9697e50a04..450a00bf93 100644 --- a/configs/example/gpufs/system/amdgpu.py +++ b/configs/example/gpufs/system/amdgpu.py @@ -185,3 +185,26 @@ def connectGPU(system, args): system.pc.south_bridge.gpu.DeviceID = 0x6863 else: panic("Unknown GPU device: {}".format(args.gpu_device)) + + # Use the gem5 default of 0x280 OR'd with 0x10 which tells Linux there is + # a PCI capabilities list to travse. + system.pc.south_bridge.gpu.Status = 0x0290 + + # The PCI capabilities are like a linked list. The list has a memory + # offset and a capability type ID read by the OS. Make the first + # capability at 0x80 and set the PXCAP (PCI express) capability to + # that address. Mark the type ID as PCI express. + # We leave the next ID of PXCAP blank to end the list. + system.pc.south_bridge.gpu.PXCAPBaseOffset = 0x80 + system.pc.south_bridge.gpu.CapabilityPtr = 0x80 + system.pc.south_bridge.gpu.PXCAPCapId = 0x10 + + # Set bits 7 and 8 in the second PCIe device capabilities register which + # reports support for PCIe atomics for 32 and 64 bits respectively. + # Bit 9 for 128-bit compare and swap is not set because the amdgpu driver + # does not check this. + system.pc.south_bridge.gpu.PXCAPDevCap2 = 0x00000180 + + # Set bit 6 to enable atomic requestor, meaning this device can request + # atomics from other PCI devices. + system.pc.south_bridge.gpu.PXCAPDevCtrl2 = 0x00000040 diff --git a/src/dev/amdgpu/amdgpu_device.cc b/src/dev/amdgpu/amdgpu_device.cc index 44a1c9d394..5cc8df424f 100644 --- a/src/dev/amdgpu/amdgpu_device.cc +++ b/src/dev/amdgpu/amdgpu_device.cc @@ -216,11 +216,47 @@ AMDGPUDevice::getAddrRanges() const Tick AMDGPUDevice::readConfig(PacketPtr pkt) { - [[maybe_unused]] int offset = pkt->getAddr() & PCI_CONFIG_SIZE; - DPRINTF(AMDGPUDevice, "Read Config: from offset: %#x size: %#x " - "data: %#x\n", offset, pkt->getSize(), config.data[offset]); + int offset = pkt->getAddr() & PCI_CONFIG_SIZE; - Tick delay = PciDevice::readConfig(pkt); + if (offset < PCI_DEVICE_SPECIFIC) { + PciDevice::readConfig(pkt); + } else { + if (offset >= PXCAP_BASE && offset < (PXCAP_BASE + sizeof(PXCAP))) { + int pxcap_offset = offset - PXCAP_BASE; + + switch (pkt->getSize()) { + case sizeof(uint8_t): + pkt->setLE(pxcap.data[pxcap_offset]); + DPRINTF(AMDGPUDevice, + "Read PXCAP: dev %#x func %#x reg %#x 1 bytes: data " + "= %#x\n", _busAddr.dev, _busAddr.func, pxcap_offset, + (uint32_t)pkt->getLE()); + break; + case sizeof(uint16_t): + pkt->setLE( + *(uint16_t*)&pxcap.data[pxcap_offset]); + DPRINTF(AMDGPUDevice, + "Read PXCAP: dev %#x func %#x reg %#x 2 bytes: data " + "= %#x\n", _busAddr.dev, _busAddr.func, pxcap_offset, + (uint32_t)pkt->getLE()); + break; + case sizeof(uint32_t): + pkt->setLE( + *(uint32_t*)&pxcap.data[pxcap_offset]); + DPRINTF(AMDGPUDevice, + "Read PXCAP: dev %#x func %#x reg %#x 4 bytes: data " + "= %#x\n",_busAddr.dev, _busAddr.func, pxcap_offset, + (uint32_t)pkt->getLE()); + break; + default: + panic("Invalid access size (%d) for amdgpu PXCAP %#x\n", + pkt->getSize(), pxcap_offset); + } + pkt->makeAtomicResponse(); + } else { + warn("Device specific offset %d not implemented!\n", offset); + } + } // Before sending MMIOs the driver sends three interrupts in a row. // Use this to trigger creating a checkpoint to restore in timing mode. @@ -231,14 +267,14 @@ AMDGPUDevice::readConfig(PacketPtr pkt) if (offset == PCI0_INTERRUPT_PIN) { if (++init_interrupt_count == 3) { DPRINTF(AMDGPUDevice, "Checkpointing before first MMIO\n"); - exitSimLoop("checkpoint", 0, curTick() + delay + 1); + exitSimLoop("checkpoint", 0, curTick() + configDelay + 1); } } else { init_interrupt_count = 0; } } - return delay; + return configDelay; } Tick @@ -249,7 +285,24 @@ AMDGPUDevice::writeConfig(PacketPtr pkt) "data: %#x\n", offset, pkt->getSize(), pkt->getUintX(ByteOrder::little)); - return PciDevice::writeConfig(pkt); + if (offset < PCI_DEVICE_SPECIFIC) + return PciDevice::writeConfig(pkt); + + + if (offset >= PXCAP_BASE && offset < (PXCAP_BASE + sizeof(PXCAP))) { + uint8_t *pxcap_data = &(pxcap.data[0]); + int pxcap_offset = offset - PXCAP_BASE; + + DPRINTF(AMDGPUDevice, "Writing PXCAP offset %d size %d\n", + pxcap_offset, pkt->getSize()); + + memcpy(pxcap_data + pxcap_offset, pkt->getConstPtr(), + pkt->getSize()); + } + + pkt->makeAtomicResponse(); + + return configDelay; } void