Files
gem5/src/dev/amdgpu/amdgpu_vm.cc
Matthew Poremba 1be246bbe3 dev-amdgpu: Add PM4PP, VMID, Linux definitions
The PM4 packet processor is handling all non-HSA GPU packets such
as packets for (un)mapping HSA queues. This commit pulls many
Linux structs and defines out into their own files for clarity.
Finally, it implements the VMID related functions in AMDGPU device.

Change-Id: I5f0057209305404df58aff2c4cd07762d1a31690
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/53068
Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com>
Maintainer: Matt Sinclair <mattdsinclair@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
2022-03-24 14:59:57 +00:00

337 lines
11 KiB
C++

/*
* Copyright (c) 2021 Advanced Micro Devices, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "dev/amdgpu/amdgpu_vm.hh"
#include "arch/amdgpu/vega/pagetable_walker.hh"
#include "arch/generic/mmu.hh"
#include "base/trace.hh"
#include "debug/AMDGPUDevice.hh"
#include "dev/amdgpu/amdgpu_defines.hh"
#include "mem/packet_access.hh"
namespace gem5
{
AMDGPUVM::AMDGPUVM()
{
// Zero out contexts
memset(&vmContext0, 0, sizeof(AMDGPUSysVMContext));
vmContexts.resize(AMDGPU_VM_COUNT);
for (int i = 0; i < AMDGPU_VM_COUNT; ++i) {
memset(&vmContexts[0], 0, sizeof(AMDGPUVMContext));
}
}
Addr
AMDGPUVM::gartBase()
{
return vmContext0.ptBase;
}
Addr
AMDGPUVM::gartSize()
{
return vmContext0.ptEnd - vmContext0.ptStart;
}
void
AMDGPUVM::readMMIO(PacketPtr pkt, Addr offset)
{
uint32_t value = pkt->getLE<uint32_t>();
switch (offset) {
// MMHUB MMIOs
case mmMMHUB_VM_INVALIDATE_ENG17_SEM:
DPRINTF(AMDGPUDevice, "Marking invalidate ENG17 SEM acquired\n");
pkt->setLE<uint32_t>(1);
break;
case mmMMHUB_VM_INVALIDATE_ENG17_ACK:
// This is only used by driver initialization and only expects an ACK
// for VMID 0 which is the first bit in the response.
DPRINTF(AMDGPUDevice, "Telling driver invalidate ENG17 is complete\n");
pkt->setLE<uint32_t>(1);
break;
case mmMMHUB_VM_FB_LOCATION_BASE:
mmhubBase = ((Addr)bits(value, 23, 0) << 24);
DPRINTF(AMDGPUDevice, "MMHUB FB base set to %#x\n", mmhubBase);
break;
case mmMMHUB_VM_FB_LOCATION_TOP:
mmhubTop = ((Addr)bits(value, 23, 0) << 24) | 0xFFFFFFULL;
DPRINTF(AMDGPUDevice, "MMHUB FB top set to %#x\n", mmhubTop);
break;
// GRBM MMIOs
case mmVM_INVALIDATE_ENG17_ACK:
DPRINTF(AMDGPUDevice, "Overwritting invalidation ENG17 ACK\n");
pkt->setLE<uint32_t>(1);
break;
default:
DPRINTF(AMDGPUDevice, "GPUVM read of unknown MMIO %#x\n", offset);
break;
}
}
void
AMDGPUVM::writeMMIO(PacketPtr pkt, Addr offset)
{
switch (offset) {
// VMID0 MMIOs
case mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32:
vmContext0.ptBaseL = pkt->getLE<uint32_t>();
// Clear extra bits not part of address
vmContext0.ptBaseL = insertBits(vmContext0.ptBaseL, 0, 0, 0);
break;
case mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32:
vmContext0.ptBaseH = pkt->getLE<uint32_t>();
break;
case mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32:
vmContext0.ptStartL = pkt->getLE<uint32_t>();
break;
case mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32:
vmContext0.ptStartH = pkt->getLE<uint32_t>();
break;
case mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32:
vmContext0.ptEndL = pkt->getLE<uint32_t>();
break;
case mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32:
vmContext0.ptEndH = pkt->getLE<uint32_t>();
break;
case mmMC_VM_AGP_TOP: {
uint32_t val = pkt->getLE<uint32_t>();
vmContext0.agpTop = (((Addr)bits(val, 23, 0)) << 24) | 0xffffff;
} break;
case mmMC_VM_AGP_BOT: {
uint32_t val = pkt->getLE<uint32_t>();
vmContext0.agpBot = ((Addr)bits(val, 23, 0)) << 24;
} break;
case mmMC_VM_AGP_BASE: {
uint32_t val = pkt->getLE<uint32_t>();
vmContext0.agpBase = ((Addr)bits(val, 23, 0)) << 24;
} break;
case mmMC_VM_FB_LOCATION_TOP: {
uint32_t val = pkt->getLE<uint32_t>();
vmContext0.fbTop = (((Addr)bits(val, 23, 0)) << 24) | 0xffffff;
} break;
case mmMC_VM_FB_LOCATION_BASE: {
uint32_t val = pkt->getLE<uint32_t>();
vmContext0.fbBase = ((Addr)bits(val, 23, 0)) << 24;
} break;
case mmMC_VM_FB_OFFSET: {
uint32_t val = pkt->getLE<uint32_t>();
vmContext0.fbOffset = ((Addr)bits(val, 23, 0)) << 24;
} break;
case mmMC_VM_SYSTEM_APERTURE_LOW_ADDR: {
uint32_t val = pkt->getLE<uint32_t>();
vmContext0.sysAddrL = ((Addr)bits(val, 29, 0)) << 18;
} break;
case mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR: {
uint32_t val = pkt->getLE<uint32_t>();
vmContext0.sysAddrH = ((Addr)bits(val, 29, 0)) << 18;
} break;
default:
break;
}
}
void
AMDGPUVM::serialize(CheckpointOut &cp) const
{
Addr vm0PTBase = vmContext0.ptBase;
Addr vm0PTStart = vmContext0.ptStart;
Addr vm0PTEnd = vmContext0.ptEnd;
SERIALIZE_SCALAR(vm0PTBase);
SERIALIZE_SCALAR(vm0PTStart);
SERIALIZE_SCALAR(vm0PTEnd);
SERIALIZE_SCALAR(vmContext0.agpBase);
SERIALIZE_SCALAR(vmContext0.agpTop);
SERIALIZE_SCALAR(vmContext0.agpBot);
SERIALIZE_SCALAR(vmContext0.fbBase);
SERIALIZE_SCALAR(vmContext0.fbTop);
SERIALIZE_SCALAR(vmContext0.fbOffset);
SERIALIZE_SCALAR(vmContext0.sysAddrL);
SERIALIZE_SCALAR(vmContext0.sysAddrH);
SERIALIZE_SCALAR(mmhubBase);
SERIALIZE_SCALAR(mmhubTop);
Addr ptBase[AMDGPU_VM_COUNT];
Addr ptStart[AMDGPU_VM_COUNT];
Addr ptEnd[AMDGPU_VM_COUNT];
for (int i = 0; i < AMDGPU_VM_COUNT; i++) {
ptBase[i] = vmContexts[i].ptBase;
ptStart[i] = vmContexts[i].ptStart;
ptEnd[i] = vmContexts[i].ptEnd;
}
SERIALIZE_ARRAY(ptBase, AMDGPU_VM_COUNT);
SERIALIZE_ARRAY(ptStart, AMDGPU_VM_COUNT);
SERIALIZE_ARRAY(ptEnd, AMDGPU_VM_COUNT);
}
void
AMDGPUVM::unserialize(CheckpointIn &cp)
{
// Unserialize requires fields not be packed
Addr vm0PTBase;
Addr vm0PTStart;
Addr vm0PTEnd;
UNSERIALIZE_SCALAR(vm0PTBase);
UNSERIALIZE_SCALAR(vm0PTStart);
UNSERIALIZE_SCALAR(vm0PTEnd);
vmContext0.ptBase = vm0PTBase;
vmContext0.ptStart = vm0PTStart;
vmContext0.ptEnd = vm0PTEnd;
UNSERIALIZE_SCALAR(vmContext0.agpBase);
UNSERIALIZE_SCALAR(vmContext0.agpTop);
UNSERIALIZE_SCALAR(vmContext0.agpBot);
UNSERIALIZE_SCALAR(vmContext0.fbBase);
UNSERIALIZE_SCALAR(vmContext0.fbTop);
UNSERIALIZE_SCALAR(vmContext0.fbOffset);
UNSERIALIZE_SCALAR(vmContext0.sysAddrL);
UNSERIALIZE_SCALAR(vmContext0.sysAddrH);
UNSERIALIZE_SCALAR(mmhubBase);
UNSERIALIZE_SCALAR(mmhubTop);
Addr ptBase[AMDGPU_VM_COUNT];
Addr ptStart[AMDGPU_VM_COUNT];
Addr ptEnd[AMDGPU_VM_COUNT];
UNSERIALIZE_ARRAY(ptBase, AMDGPU_VM_COUNT);
UNSERIALIZE_ARRAY(ptStart, AMDGPU_VM_COUNT);
UNSERIALIZE_ARRAY(ptEnd, AMDGPU_VM_COUNT);
for (int i = 0; i < AMDGPU_VM_COUNT; i++) {
vmContexts[i].ptBase = ptBase[i];
vmContexts[i].ptStart = ptStart[i];
vmContexts[i].ptEnd = ptEnd[i];
}
}
void
AMDGPUVM::AGPTranslationGen::translate(Range &range) const
{
assert(vm->inAGP(range.vaddr));
Addr next = roundUp(range.vaddr, AMDGPU_AGP_PAGE_SIZE);
if (next == range.vaddr)
next += AMDGPU_AGP_PAGE_SIZE;
range.size = std::min(range.size, next - range.vaddr);
range.paddr = range.vaddr - vm->getAGPBot() + vm->getAGPBase();
DPRINTF(AMDGPUDevice, "AMDGPUVM: AGP translation %#lx -> %#lx\n",
range.vaddr, range.paddr);
}
void
AMDGPUVM::GARTTranslationGen::translate(Range &range) const
{
Addr next = roundUp(range.vaddr, AMDGPU_GART_PAGE_SIZE);
if (next == range.vaddr)
next += AMDGPU_GART_PAGE_SIZE;
range.size = std::min(range.size, next - range.vaddr);
Addr gart_addr = bits(range.vaddr, 63, 12);
// This table is a bit hard to iterate over. If we cross a page, the next
// PTE is not necessarily the next entry but actually 7 entries away.
Addr lsb = bits(gart_addr, 2, 0);
gart_addr += lsb * 7;
// GART is a single level translation, so the value at the "virtual" addr
// is the PTE containing the physical address.
auto result = vm->gartTable.find(gart_addr);
if (result == vm->gartTable.end()) {
// There is no reason to fault as there is no recovery mechanism for
// invalid GART entries. Simply panic in this case
warn("GART translation for %p not found", range.vaddr);
// Some PM4 packets have register addresses which we ignore. In that
// case just return the vaddr rather than faulting.
range.paddr = range.vaddr;
} else {
Addr pte = result->second;
Addr lower_bits = bits(range.vaddr, 11, 0);
range.paddr = (bits(pte, 47, 12) << 12) | lower_bits;
}
DPRINTF(AMDGPUDevice, "AMDGPUVM: GART translation %#lx -> %#lx\n",
range.vaddr, range.paddr);
}
void
AMDGPUVM::MMHUBTranslationGen::translate(Range &range) const
{
assert(vm->inMMHUB(range.vaddr));
Addr next = roundUp(range.vaddr, AMDGPU_MMHUB_PAGE_SIZE);
if (next == range.vaddr)
next += AMDGPU_MMHUB_PAGE_SIZE;
range.size = std::min(range.size, next - range.vaddr);
range.paddr = range.vaddr - vm->getMMHUBBase();
DPRINTF(AMDGPUDevice, "AMDGPUVM: MMHUB translation %#lx -> %#lx\n",
range.vaddr, range.paddr);
}
void
AMDGPUVM::UserTranslationGen::translate(Range &range) const
{
// Get base address of the page table for this vmid
Addr base = vm->getPageTableBase(vmid);
Addr start = vm->getPageTableStart(vmid);
DPRINTF(AMDGPUDevice, "User tl base %#lx start %#lx walker %p\n",
base, start, walker);
bool dummy;
unsigned logBytes;
Addr paddr = range.vaddr;
Fault fault = walker->startFunctional(base, paddr, logBytes,
BaseMMU::Mode::Read, dummy);
if (fault != NoFault) {
fatal("User translation fault");
}
// GPU page size is variable. Use logBytes to determine size.
const Addr page_size = 1 << logBytes;
Addr next = roundUp(range.vaddr, page_size);
if (next == range.vaddr)
// We don't know the size of the next page, use default.
next += AMDGPU_USER_PAGE_SIZE;
range.size = std::min(range.size, next - range.vaddr);
range.paddr = paddr;
}
} // namespace gem5