dev-amdgpu: Enable more GPUs with device specific registers

Currently gem5 assumes the amdgpu device to be Vega10. In order to
support more devices we need to handle situations where different
registers and addresses have the same functionality but different
offsets on different devices.

This changeset adds an NBIO class to handle device discovery and driver
initialization related tasks, pulling them out of the AMDGPUDevice
class. The offsets used for MMIOs are reworked slightly to use offsets
rather than absolute addresses. This is because we cannot determine the
absolute address in the constructor since the BAR has not been assigned
by the OS yet.

Change-Id: I14b364374e086e185978334425a4e265cf2760d0
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/70041
Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com>
Maintainer: Matt Sinclair <mattdsinclair@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
This commit is contained in:
Matthew Poremba
2023-04-21 19:22:28 -05:00
parent 8b91ac6f8d
commit 316538bf8a
6 changed files with 369 additions and 46 deletions

View File

@@ -34,6 +34,7 @@
#include <fstream>
#include "debug/AMDGPUDevice.hh"
#include "dev/amdgpu/amdgpu_nbio.hh"
#include "dev/amdgpu/amdgpu_vm.hh"
#include "dev/amdgpu/interrupt_handler.hh"
#include "dev/amdgpu/pm4_packet_processor.hh"
@@ -129,6 +130,32 @@ AMDGPUDevice::AMDGPUDevice(const AMDGPUDeviceParams &p)
pm4PktProc->setGPUDevice(this);
cp->hsaPacketProc().setGPUDevice(this);
cp->setGPUDevice(this);
// Address aperture for device memory. We tell this to the driver and
// could possibly be anything, but these are the values used by hardware.
uint64_t mmhubBase = 0x8000ULL << 24;
uint64_t mmhubTop = 0x83ffULL << 24;
// These are hardcoded register values to return what the driver expects
setRegVal(AMDGPU_MP0_SMN_C2PMSG_33, 0x80000000);
// There are different registers for different GPUs, so we set the value
// based on the GPU type specified by the user.
if (p.device_name == "Vega10") {
setRegVal(VEGA10_FB_LOCATION_BASE, mmhubBase >> 24);
setRegVal(VEGA10_FB_LOCATION_TOP, mmhubTop >> 24);
} else if (p.device_name == "MI100") {
setRegVal(MI100_FB_LOCATION_BASE, mmhubBase >> 24);
setRegVal(MI100_FB_LOCATION_TOP, mmhubTop >> 24);
setRegVal(MI100_MEM_SIZE_REG, 0x3ff0); // 16GB of memory
} else {
panic("Unknown GPU device %s\n", p.device_name);
}
gpuvm.setMMHUBBase(mmhubBase);
gpuvm.setMMHUBTop(mmhubTop);
nbio.setGPUDevice(this);
}
void
@@ -236,35 +263,25 @@ AMDGPUDevice::readFrame(PacketPtr pkt, Addr offset)
* first, ignoring any writes from driver. (2) Any other address from
* device backing store / abstract memory class functionally.
*/
if (offset == 0xa28000) {
/*
* Handle special counter addresses in framebuffer. These counter
* addresses expect the read to return previous value + 1.
*/
if (regs.find(pkt->getAddr()) == regs.end()) {
regs[pkt->getAddr()] = 1;
} else {
regs[pkt->getAddr()]++;
}
pkt->setUintX(regs[pkt->getAddr()], ByteOrder::little);
} else {
/*
* Read the value from device memory. This must be done functionally
* because this method is called by the PCIDevice::read method which
* is a non-timing read.
*/
RequestPtr req = std::make_shared<Request>(offset, pkt->getSize(), 0,
vramRequestorId());
PacketPtr readPkt = Packet::createRead(req);
uint8_t *dataPtr = new uint8_t[pkt->getSize()];
readPkt->dataDynamic(dataPtr);
auto system = cp->shader()->gpuCmdProc.system();
system->getDeviceMemory(readPkt)->access(readPkt);
pkt->setUintX(readPkt->getUintX(ByteOrder::little), ByteOrder::little);
if (nbio.readFrame(pkt, offset)) {
return;
}
/*
* Read the value from device memory. This must be done functionally
* because this method is called by the PCIDevice::read method which
* is a non-timing read.
*/
RequestPtr req = std::make_shared<Request>(offset, pkt->getSize(), 0,
vramRequestorId());
PacketPtr readPkt = Packet::createRead(req);
uint8_t *dataPtr = new uint8_t[pkt->getSize()];
readPkt->dataDynamic(dataPtr);
auto system = cp->shader()->gpuCmdProc.system();
system->getDeviceMemory(readPkt)->access(readPkt);
pkt->setUintX(readPkt->getUintX(ByteOrder::little), ByteOrder::little);
}
void
@@ -285,8 +302,8 @@ AMDGPUDevice::readMMIO(PacketPtr pkt, Addr offset)
DPRINTF(AMDGPUDevice, "Read MMIO %#lx\n", offset);
mmioReader.readFromTrace(pkt, MMIO_BAR, offset);
if (regs.find(pkt->getAddr()) != regs.end()) {
uint64_t value = regs[pkt->getAddr()];
if (regs.find(offset) != regs.end()) {
uint64_t value = regs[offset];
DPRINTF(AMDGPUDevice, "Reading what kernel wrote before: %#x\n",
value);
pkt->setUintX(value, ByteOrder::little);
@@ -294,19 +311,8 @@ AMDGPUDevice::readMMIO(PacketPtr pkt, Addr offset)
switch (aperture) {
case NBIO_BASE:
switch (aperture_offset) {
// This is a PCIe status register. At some point during driver init
// the driver checks that interrupts are enabled. This is only
// checked once, so if the MMIO trace does not exactly line up with
// what the driver is doing in gem5, this may still have the first
// bit zero causing driver to fail. Therefore, we always set this
// bit to one as there is no harm to do so.
case 0x3c: // mmPCIE_DATA2 << 2
uint32_t value = pkt->getLE<uint32_t>() | 0x1;
DPRINTF(AMDGPUDevice, "Marking interrupts enabled: %#lx\n", value);
pkt->setLE<uint32_t>(value);
break;
} break;
nbio.readMMIO(pkt, aperture_offset);
break;
case GRBM_BASE:
gpuvm.readMMIO(pkt, aperture_offset >> GRBM_OFFSET_SHIFT);
break;
@@ -332,6 +338,8 @@ AMDGPUDevice::writeFrame(PacketPtr pkt, Addr offset)
DPRINTF(AMDGPUDevice, "GART translation %p -> %p\n", aperture_offset,
gpuvm.gartTable[aperture_offset]);
}
nbio.writeFrame(pkt, offset);
}
void
@@ -416,6 +424,10 @@ AMDGPUDevice::writeMMIO(PacketPtr pkt, Addr offset)
case IH_BASE:
deviceIH->writeMMIO(pkt, aperture_offset >> IH_OFFSET_SHIFT);
break;
/* Write an IO space register */
case NBIO_BASE:
nbio.writeMMIO(pkt, aperture_offset);
break;
default:
DPRINTF(AMDGPUDevice, "Unknown MMIO aperture for %#x\n", offset);
break;
@@ -489,19 +501,25 @@ AMDGPUDevice::write(PacketPtr pkt)
DPRINTF(AMDGPUDevice, "PCI Write to %#lx data %#lx\n",
pkt->getAddr(), data);
if (data || regs.find(pkt->getAddr()) != regs.end())
regs[pkt->getAddr()] = data;
dispatchAccess(pkt, false);
return pioDelay;
}
bool
AMDGPUDevice::haveRegVal(uint32_t addr)
{
return regs.count(addr);
}
uint32_t
AMDGPUDevice::getRegVal(uint32_t addr)
{
DPRINTF(AMDGPUDevice, "Getting register 0x%lx = %x\n",
addr, regs[addr]);
return regs[addr];
}
void
AMDGPUDevice::setRegVal(uint32_t addr, uint32_t value)
{