configs,dev-amdgpu: Add PCI express capability info

The ROCm stack requires PCI express atomics. Currently the first PCI
CapabilityPtr does not point to anything, which signals to the OS
(Linux) that this is an early generation PCI device. As PCI express
atomics were introduced later, the CapabilityPtr needs to point to at
least a PCI express capability structure. This capability is defined as
0x10 in Linux. We additionally set the PCI atomic based bits and
implement device specific PCI configuration space reads and writes to
the amdgpu device.

With this commit, the output of simulation when loading the amdgpu
driver no longer outputs "PCIE atomics not supported". Further, an
application which uses PCIe atomics (PyTorch with a reduce_sum kernel)
now makes further progress.

Change-Id: I5e3866979659a2657f558941106ef65c2f4d9988
This commit is contained in:
Matthew Poremba
2023-08-23 13:23:58 -05:00
parent 8b4c38302f
commit addba01d29
2 changed files with 83 additions and 7 deletions

View File

@@ -185,3 +185,26 @@ def connectGPU(system, args):
system.pc.south_bridge.gpu.DeviceID = 0x6863 system.pc.south_bridge.gpu.DeviceID = 0x6863
else: else:
panic("Unknown GPU device: {}".format(args.gpu_device)) panic("Unknown GPU device: {}".format(args.gpu_device))
# Use the gem5 default of 0x280 OR'd with 0x10 which tells Linux there is
# a PCI capabilities list to travse.
system.pc.south_bridge.gpu.Status = 0x0290
# The PCI capabilities are like a linked list. The list has a memory
# offset and a capability type ID read by the OS. Make the first
# capability at 0x80 and set the PXCAP (PCI express) capability to
# that address. Mark the type ID as PCI express.
# We leave the next ID of PXCAP blank to end the list.
system.pc.south_bridge.gpu.PXCAPBaseOffset = 0x80
system.pc.south_bridge.gpu.CapabilityPtr = 0x80
system.pc.south_bridge.gpu.PXCAPCapId = 0x10
# Set bits 7 and 8 in the second PCIe device capabilities register which
# reports support for PCIe atomics for 32 and 64 bits respectively.
# Bit 9 for 128-bit compare and swap is not set because the amdgpu driver
# does not check this.
system.pc.south_bridge.gpu.PXCAPDevCap2 = 0x00000180
# Set bit 6 to enable atomic requestor, meaning this device can request
# atomics from other PCI devices.
system.pc.south_bridge.gpu.PXCAPDevCtrl2 = 0x00000040

View File

@@ -216,11 +216,47 @@ AMDGPUDevice::getAddrRanges() const
Tick Tick
AMDGPUDevice::readConfig(PacketPtr pkt) AMDGPUDevice::readConfig(PacketPtr pkt)
{ {
[[maybe_unused]] int offset = pkt->getAddr() & PCI_CONFIG_SIZE; int offset = pkt->getAddr() & PCI_CONFIG_SIZE;
DPRINTF(AMDGPUDevice, "Read Config: from offset: %#x size: %#x "
"data: %#x\n", offset, pkt->getSize(), config.data[offset]);
Tick delay = PciDevice::readConfig(pkt); if (offset < PCI_DEVICE_SPECIFIC) {
PciDevice::readConfig(pkt);
} else {
if (offset >= PXCAP_BASE && offset < (PXCAP_BASE + sizeof(PXCAP))) {
int pxcap_offset = offset - PXCAP_BASE;
switch (pkt->getSize()) {
case sizeof(uint8_t):
pkt->setLE<uint8_t>(pxcap.data[pxcap_offset]);
DPRINTF(AMDGPUDevice,
"Read PXCAP: dev %#x func %#x reg %#x 1 bytes: data "
"= %#x\n", _busAddr.dev, _busAddr.func, pxcap_offset,
(uint32_t)pkt->getLE<uint8_t>());
break;
case sizeof(uint16_t):
pkt->setLE<uint16_t>(
*(uint16_t*)&pxcap.data[pxcap_offset]);
DPRINTF(AMDGPUDevice,
"Read PXCAP: dev %#x func %#x reg %#x 2 bytes: data "
"= %#x\n", _busAddr.dev, _busAddr.func, pxcap_offset,
(uint32_t)pkt->getLE<uint16_t>());
break;
case sizeof(uint32_t):
pkt->setLE<uint32_t>(
*(uint32_t*)&pxcap.data[pxcap_offset]);
DPRINTF(AMDGPUDevice,
"Read PXCAP: dev %#x func %#x reg %#x 4 bytes: data "
"= %#x\n",_busAddr.dev, _busAddr.func, pxcap_offset,
(uint32_t)pkt->getLE<uint32_t>());
break;
default:
panic("Invalid access size (%d) for amdgpu PXCAP %#x\n",
pkt->getSize(), pxcap_offset);
}
pkt->makeAtomicResponse();
} else {
warn("Device specific offset %d not implemented!\n", offset);
}
}
// Before sending MMIOs the driver sends three interrupts in a row. // Before sending MMIOs the driver sends three interrupts in a row.
// Use this to trigger creating a checkpoint to restore in timing mode. // Use this to trigger creating a checkpoint to restore in timing mode.
@@ -231,14 +267,14 @@ AMDGPUDevice::readConfig(PacketPtr pkt)
if (offset == PCI0_INTERRUPT_PIN) { if (offset == PCI0_INTERRUPT_PIN) {
if (++init_interrupt_count == 3) { if (++init_interrupt_count == 3) {
DPRINTF(AMDGPUDevice, "Checkpointing before first MMIO\n"); DPRINTF(AMDGPUDevice, "Checkpointing before first MMIO\n");
exitSimLoop("checkpoint", 0, curTick() + delay + 1); exitSimLoop("checkpoint", 0, curTick() + configDelay + 1);
} }
} else { } else {
init_interrupt_count = 0; init_interrupt_count = 0;
} }
} }
return delay; return configDelay;
} }
Tick Tick
@@ -249,7 +285,24 @@ AMDGPUDevice::writeConfig(PacketPtr pkt)
"data: %#x\n", offset, pkt->getSize(), "data: %#x\n", offset, pkt->getSize(),
pkt->getUintX(ByteOrder::little)); pkt->getUintX(ByteOrder::little));
return PciDevice::writeConfig(pkt); if (offset < PCI_DEVICE_SPECIFIC)
return PciDevice::writeConfig(pkt);
if (offset >= PXCAP_BASE && offset < (PXCAP_BASE + sizeof(PXCAP))) {
uint8_t *pxcap_data = &(pxcap.data[0]);
int pxcap_offset = offset - PXCAP_BASE;
DPRINTF(AMDGPUDevice, "Writing PXCAP offset %d size %d\n",
pxcap_offset, pkt->getSize());
memcpy(pxcap_data + pxcap_offset, pkt->getConstPtr<void>(),
pkt->getSize());
}
pkt->makeAtomicResponse();
return configDelay;
} }
void void