dev-amdgpu: Rework handling of unknown registers

The top level AMDGPUDevice currently reads/writes all unknown registers
to/from a map containing the previously written value. This is intended
as a way to handle registers that are not part of the model but the
driver requires for functionality. Since this is at the top level, it
can mask changes to register values which do not go through the same
interface. For example, reading an MMIO, changing via PM4 queue, and
reading again returns the stale cached value.

This commit removes the usage of the regs map in AMDGPUDevice,
implements some important MMIOs that were previously handled by it, and
moves the unknown register handling to the NBIO aperture only. To reduce
the number of additional MMIOs to implement, the display manager in
vega10 is now disabled.

Change-Id: Iff0a599dd82d663c7e710b79c6ef6d0ad1fc44a2
This commit is contained in:
Matthew Poremba
2024-02-08 12:26:27 -06:00
parent 009cec56e0
commit 6bbde8fbb8
7 changed files with 96 additions and 67 deletions

View File

@@ -130,6 +130,7 @@ AMDGPUDevice::AMDGPUDevice(const AMDGPUDeviceParams &p)
pm4PktProc->setGPUDevice(this);
cp->hsaPacketProc().setGPUDevice(this);
cp->setGPUDevice(this);
nbio.setGPUDevice(this);
// Address aperture for device memory. We tell this to the driver and
// could possibly be anything, but these are the values used by hardware.
@@ -163,8 +164,6 @@ AMDGPUDevice::AMDGPUDevice(const AMDGPUDeviceParams &p)
gpuvm.setMMHUBBase(mmhubBase);
gpuvm.setMMHUBTop(mmhubTop);
nbio.setGPUDevice(this);
}
void
@@ -365,13 +364,6 @@ AMDGPUDevice::readMMIO(PacketPtr pkt, Addr offset)
DPRINTF(AMDGPUDevice, "Read MMIO %#lx\n", offset);
mmioReader.readFromTrace(pkt, MMIO_BAR, offset);
if (regs.find(offset) != regs.end()) {
uint64_t value = regs[offset];
DPRINTF(AMDGPUDevice, "Reading what kernel wrote before: %#x\n",
value);
pkt->setUintX(value, ByteOrder::little);
}
switch (aperture) {
case NBIO_BASE:
nbio.readMMIO(pkt, aperture_offset);
@@ -610,26 +602,39 @@ AMDGPUDevice::processPendingDoorbells(uint32_t offset)
}
}
bool
AMDGPUDevice::haveRegVal(uint32_t addr)
{
return regs.count(addr);
}
uint32_t
AMDGPUDevice::getRegVal(uint32_t addr)
AMDGPUDevice::getRegVal(uint64_t addr)
{
// This is somewhat of a guess based on amdgpu_device_mm_access
// in amdgpu_device.c in the ROCk driver. If bit 32 is 1 then
// assume VRAM and use full address, otherwise assume register
// address and only user lower 31 bits.
Addr fixup_addr = bits(addr, 31, 31) ? addr : addr & 0x7fffffff;
uint32_t pkt_data = 0;
RequestPtr request = std::make_shared<Request>(fixup_addr,
sizeof(uint32_t), 0 /* flags */, vramRequestorId());
PacketPtr pkt = Packet::createRead(request);
pkt->dataStatic((uint8_t *)&pkt_data);
readMMIO(pkt, addr);
DPRINTF(AMDGPUDevice, "Getting register 0x%lx = %x\n",
addr, regs[addr]);
return regs[addr];
fixup_addr, pkt->getLE<uint32_t>());
return pkt->getLE<uint32_t>();
}
void
AMDGPUDevice::setRegVal(uint32_t addr, uint32_t value)
AMDGPUDevice::setRegVal(uint64_t addr, uint32_t value)
{
DPRINTF(AMDGPUDevice, "Setting register 0x%lx to %x\n",
addr, value);
regs[addr] = value;
uint32_t pkt_data = value;
RequestPtr request = std::make_shared<Request>(addr,
sizeof(uint32_t), 0 /* flags */, vramRequestorId());
PacketPtr pkt = Packet::createWrite(request);
pkt->dataStatic((uint8_t *)&pkt_data);
writeMMIO(pkt, addr);
}
void
@@ -675,20 +680,16 @@ AMDGPUDevice::serialize(CheckpointOut &cp) const
// Serialize the PciDevice base class
PciDevice::serialize(cp);
uint64_t regs_size = regs.size();
uint64_t doorbells_size = doorbells.size();
uint64_t sdma_engs_size = sdmaEngs.size();
uint64_t used_vmid_map_size = usedVMIDs.size();
SERIALIZE_SCALAR(regs_size);
SERIALIZE_SCALAR(doorbells_size);
SERIALIZE_SCALAR(sdma_engs_size);
// Save the number of vmids used
SERIALIZE_SCALAR(used_vmid_map_size);
// Make a c-style array of the regs to serialize
uint32_t reg_addrs[regs_size];
uint64_t reg_values[regs_size];
uint32_t doorbells_offset[doorbells_size];
QueueType doorbells_queues[doorbells_size];
uint32_t sdma_engs_offset[sdma_engs_size];
@@ -698,13 +699,6 @@ AMDGPUDevice::serialize(CheckpointOut &cp) const
std::vector<int> used_vmid_sets;
int idx = 0;
for (auto & it : regs) {
reg_addrs[idx] = it.first;
reg_values[idx] = it.second;
++idx;
}
idx = 0;
for (auto & it : doorbells) {
doorbells_offset[idx] = it.first;
doorbells_queues[idx] = it.second;
@@ -732,8 +726,6 @@ AMDGPUDevice::serialize(CheckpointOut &cp) const
int* vmid_array = new int[num_queue_id];
std::copy(used_vmid_sets.begin(), used_vmid_sets.end(), vmid_array);
SERIALIZE_ARRAY(reg_addrs, sizeof(reg_addrs)/sizeof(reg_addrs[0]));
SERIALIZE_ARRAY(reg_values, sizeof(reg_values)/sizeof(reg_values[0]));
SERIALIZE_ARRAY(doorbells_offset, sizeof(doorbells_offset)/
sizeof(doorbells_offset[0]));
SERIALIZE_ARRAY(doorbells_queues, sizeof(doorbells_queues)/
@@ -764,30 +756,15 @@ AMDGPUDevice::unserialize(CheckpointIn &cp)
// Unserialize the PciDevice base class
PciDevice::unserialize(cp);
uint64_t regs_size = 0;
uint64_t doorbells_size = 0;
uint64_t sdma_engs_size = 0;
uint64_t used_vmid_map_size = 0;
UNSERIALIZE_SCALAR(regs_size);
UNSERIALIZE_SCALAR(doorbells_size);
UNSERIALIZE_SCALAR(sdma_engs_size);
UNSERIALIZE_SCALAR(used_vmid_map_size);
if (regs_size > 0) {
uint32_t reg_addrs[regs_size];
uint64_t reg_values[regs_size];
UNSERIALIZE_ARRAY(reg_addrs, sizeof(reg_addrs)/sizeof(reg_addrs[0]));
UNSERIALIZE_ARRAY(reg_values,
sizeof(reg_values)/sizeof(reg_values[0]));
for (int idx = 0; idx < regs_size; ++idx) {
regs.insert(std::make_pair(reg_addrs[idx], reg_values[idx]));
}
}
if (doorbells_size > 0) {
uint32_t doorbells_offset[doorbells_size];
QueueType doorbells_queues[doorbells_size];
@@ -798,8 +775,6 @@ AMDGPUDevice::unserialize(CheckpointIn &cp)
sizeof(doorbells_queues[0]));
for (int idx = 0; idx < doorbells_size; ++idx) {
regs.insert(std::make_pair(doorbells_offset[idx],
doorbells_queues[idx]));
doorbells[doorbells_offset[idx]] = doorbells_queues[idx];
}
}