The PM4 packet processor is handling all non-HSA GPU packets such as packets for (un)mapping HSA queues. This commit pulls many Linux structs and defines out into their own files for clarity. Finally, it implements the VMID related functions in AMDGPU device. Change-Id: I5f0057209305404df58aff2c4cd07762d1a31690 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/53068 Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com> Maintainer: Matt Sinclair <mattdsinclair@gmail.com> Tested-by: kokoro <noreply+kokoro@google.com>
337 lines
11 KiB
C++
337 lines
11 KiB
C++
/*
|
|
* Copyright (c) 2021 Advanced Micro Devices, Inc.
|
|
* All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions are met:
|
|
*
|
|
* 1. Redistributions of source code must retain the above copyright notice,
|
|
* this list of conditions and the following disclaimer.
|
|
*
|
|
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
|
* this list of conditions and the following disclaimer in the documentation
|
|
* and/or other materials provided with the distribution.
|
|
*
|
|
* 3. Neither the name of the copyright holder nor the names of its
|
|
* contributors may be used to endorse or promote products derived from this
|
|
* software without specific prior written permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
* POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
|
|
#include "dev/amdgpu/amdgpu_vm.hh"
|
|
|
|
#include "arch/amdgpu/vega/pagetable_walker.hh"
|
|
#include "arch/generic/mmu.hh"
|
|
#include "base/trace.hh"
|
|
#include "debug/AMDGPUDevice.hh"
|
|
#include "dev/amdgpu/amdgpu_defines.hh"
|
|
#include "mem/packet_access.hh"
|
|
|
|
namespace gem5
|
|
{
|
|
|
|
AMDGPUVM::AMDGPUVM()
|
|
{
|
|
// Zero out contexts
|
|
memset(&vmContext0, 0, sizeof(AMDGPUSysVMContext));
|
|
|
|
vmContexts.resize(AMDGPU_VM_COUNT);
|
|
for (int i = 0; i < AMDGPU_VM_COUNT; ++i) {
|
|
memset(&vmContexts[0], 0, sizeof(AMDGPUVMContext));
|
|
}
|
|
}
|
|
|
|
Addr
|
|
AMDGPUVM::gartBase()
|
|
{
|
|
return vmContext0.ptBase;
|
|
}
|
|
|
|
Addr
|
|
AMDGPUVM::gartSize()
|
|
{
|
|
return vmContext0.ptEnd - vmContext0.ptStart;
|
|
}
|
|
|
|
void
|
|
AMDGPUVM::readMMIO(PacketPtr pkt, Addr offset)
|
|
{
|
|
uint32_t value = pkt->getLE<uint32_t>();
|
|
|
|
switch (offset) {
|
|
// MMHUB MMIOs
|
|
case mmMMHUB_VM_INVALIDATE_ENG17_SEM:
|
|
DPRINTF(AMDGPUDevice, "Marking invalidate ENG17 SEM acquired\n");
|
|
pkt->setLE<uint32_t>(1);
|
|
break;
|
|
case mmMMHUB_VM_INVALIDATE_ENG17_ACK:
|
|
// This is only used by driver initialization and only expects an ACK
|
|
// for VMID 0 which is the first bit in the response.
|
|
DPRINTF(AMDGPUDevice, "Telling driver invalidate ENG17 is complete\n");
|
|
pkt->setLE<uint32_t>(1);
|
|
break;
|
|
case mmMMHUB_VM_FB_LOCATION_BASE:
|
|
mmhubBase = ((Addr)bits(value, 23, 0) << 24);
|
|
DPRINTF(AMDGPUDevice, "MMHUB FB base set to %#x\n", mmhubBase);
|
|
break;
|
|
case mmMMHUB_VM_FB_LOCATION_TOP:
|
|
mmhubTop = ((Addr)bits(value, 23, 0) << 24) | 0xFFFFFFULL;
|
|
DPRINTF(AMDGPUDevice, "MMHUB FB top set to %#x\n", mmhubTop);
|
|
break;
|
|
// GRBM MMIOs
|
|
case mmVM_INVALIDATE_ENG17_ACK:
|
|
DPRINTF(AMDGPUDevice, "Overwritting invalidation ENG17 ACK\n");
|
|
pkt->setLE<uint32_t>(1);
|
|
break;
|
|
default:
|
|
DPRINTF(AMDGPUDevice, "GPUVM read of unknown MMIO %#x\n", offset);
|
|
break;
|
|
}
|
|
}
|
|
|
|
void
|
|
AMDGPUVM::writeMMIO(PacketPtr pkt, Addr offset)
|
|
{
|
|
switch (offset) {
|
|
// VMID0 MMIOs
|
|
case mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32:
|
|
vmContext0.ptBaseL = pkt->getLE<uint32_t>();
|
|
// Clear extra bits not part of address
|
|
vmContext0.ptBaseL = insertBits(vmContext0.ptBaseL, 0, 0, 0);
|
|
break;
|
|
case mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32:
|
|
vmContext0.ptBaseH = pkt->getLE<uint32_t>();
|
|
break;
|
|
case mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32:
|
|
vmContext0.ptStartL = pkt->getLE<uint32_t>();
|
|
break;
|
|
case mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32:
|
|
vmContext0.ptStartH = pkt->getLE<uint32_t>();
|
|
break;
|
|
case mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32:
|
|
vmContext0.ptEndL = pkt->getLE<uint32_t>();
|
|
break;
|
|
case mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32:
|
|
vmContext0.ptEndH = pkt->getLE<uint32_t>();
|
|
break;
|
|
case mmMC_VM_AGP_TOP: {
|
|
uint32_t val = pkt->getLE<uint32_t>();
|
|
vmContext0.agpTop = (((Addr)bits(val, 23, 0)) << 24) | 0xffffff;
|
|
} break;
|
|
case mmMC_VM_AGP_BOT: {
|
|
uint32_t val = pkt->getLE<uint32_t>();
|
|
vmContext0.agpBot = ((Addr)bits(val, 23, 0)) << 24;
|
|
} break;
|
|
case mmMC_VM_AGP_BASE: {
|
|
uint32_t val = pkt->getLE<uint32_t>();
|
|
vmContext0.agpBase = ((Addr)bits(val, 23, 0)) << 24;
|
|
} break;
|
|
case mmMC_VM_FB_LOCATION_TOP: {
|
|
uint32_t val = pkt->getLE<uint32_t>();
|
|
vmContext0.fbTop = (((Addr)bits(val, 23, 0)) << 24) | 0xffffff;
|
|
} break;
|
|
case mmMC_VM_FB_LOCATION_BASE: {
|
|
uint32_t val = pkt->getLE<uint32_t>();
|
|
vmContext0.fbBase = ((Addr)bits(val, 23, 0)) << 24;
|
|
} break;
|
|
case mmMC_VM_FB_OFFSET: {
|
|
uint32_t val = pkt->getLE<uint32_t>();
|
|
vmContext0.fbOffset = ((Addr)bits(val, 23, 0)) << 24;
|
|
} break;
|
|
case mmMC_VM_SYSTEM_APERTURE_LOW_ADDR: {
|
|
uint32_t val = pkt->getLE<uint32_t>();
|
|
vmContext0.sysAddrL = ((Addr)bits(val, 29, 0)) << 18;
|
|
} break;
|
|
case mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR: {
|
|
uint32_t val = pkt->getLE<uint32_t>();
|
|
vmContext0.sysAddrH = ((Addr)bits(val, 29, 0)) << 18;
|
|
} break;
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
|
|
void
|
|
AMDGPUVM::serialize(CheckpointOut &cp) const
|
|
{
|
|
Addr vm0PTBase = vmContext0.ptBase;
|
|
Addr vm0PTStart = vmContext0.ptStart;
|
|
Addr vm0PTEnd = vmContext0.ptEnd;
|
|
SERIALIZE_SCALAR(vm0PTBase);
|
|
SERIALIZE_SCALAR(vm0PTStart);
|
|
SERIALIZE_SCALAR(vm0PTEnd);
|
|
|
|
SERIALIZE_SCALAR(vmContext0.agpBase);
|
|
SERIALIZE_SCALAR(vmContext0.agpTop);
|
|
SERIALIZE_SCALAR(vmContext0.agpBot);
|
|
SERIALIZE_SCALAR(vmContext0.fbBase);
|
|
SERIALIZE_SCALAR(vmContext0.fbTop);
|
|
SERIALIZE_SCALAR(vmContext0.fbOffset);
|
|
SERIALIZE_SCALAR(vmContext0.sysAddrL);
|
|
SERIALIZE_SCALAR(vmContext0.sysAddrH);
|
|
|
|
SERIALIZE_SCALAR(mmhubBase);
|
|
SERIALIZE_SCALAR(mmhubTop);
|
|
|
|
Addr ptBase[AMDGPU_VM_COUNT];
|
|
Addr ptStart[AMDGPU_VM_COUNT];
|
|
Addr ptEnd[AMDGPU_VM_COUNT];
|
|
for (int i = 0; i < AMDGPU_VM_COUNT; i++) {
|
|
ptBase[i] = vmContexts[i].ptBase;
|
|
ptStart[i] = vmContexts[i].ptStart;
|
|
ptEnd[i] = vmContexts[i].ptEnd;
|
|
}
|
|
SERIALIZE_ARRAY(ptBase, AMDGPU_VM_COUNT);
|
|
SERIALIZE_ARRAY(ptStart, AMDGPU_VM_COUNT);
|
|
SERIALIZE_ARRAY(ptEnd, AMDGPU_VM_COUNT);
|
|
}
|
|
|
|
void
|
|
AMDGPUVM::unserialize(CheckpointIn &cp)
|
|
{
|
|
// Unserialize requires fields not be packed
|
|
Addr vm0PTBase;
|
|
Addr vm0PTStart;
|
|
Addr vm0PTEnd;
|
|
UNSERIALIZE_SCALAR(vm0PTBase);
|
|
UNSERIALIZE_SCALAR(vm0PTStart);
|
|
UNSERIALIZE_SCALAR(vm0PTEnd);
|
|
vmContext0.ptBase = vm0PTBase;
|
|
vmContext0.ptStart = vm0PTStart;
|
|
vmContext0.ptEnd = vm0PTEnd;
|
|
|
|
UNSERIALIZE_SCALAR(vmContext0.agpBase);
|
|
UNSERIALIZE_SCALAR(vmContext0.agpTop);
|
|
UNSERIALIZE_SCALAR(vmContext0.agpBot);
|
|
UNSERIALIZE_SCALAR(vmContext0.fbBase);
|
|
UNSERIALIZE_SCALAR(vmContext0.fbTop);
|
|
UNSERIALIZE_SCALAR(vmContext0.fbOffset);
|
|
UNSERIALIZE_SCALAR(vmContext0.sysAddrL);
|
|
UNSERIALIZE_SCALAR(vmContext0.sysAddrH);
|
|
|
|
UNSERIALIZE_SCALAR(mmhubBase);
|
|
UNSERIALIZE_SCALAR(mmhubTop);
|
|
|
|
Addr ptBase[AMDGPU_VM_COUNT];
|
|
Addr ptStart[AMDGPU_VM_COUNT];
|
|
Addr ptEnd[AMDGPU_VM_COUNT];
|
|
UNSERIALIZE_ARRAY(ptBase, AMDGPU_VM_COUNT);
|
|
UNSERIALIZE_ARRAY(ptStart, AMDGPU_VM_COUNT);
|
|
UNSERIALIZE_ARRAY(ptEnd, AMDGPU_VM_COUNT);
|
|
for (int i = 0; i < AMDGPU_VM_COUNT; i++) {
|
|
vmContexts[i].ptBase = ptBase[i];
|
|
vmContexts[i].ptStart = ptStart[i];
|
|
vmContexts[i].ptEnd = ptEnd[i];
|
|
}
|
|
}
|
|
|
|
void
|
|
AMDGPUVM::AGPTranslationGen::translate(Range &range) const
|
|
{
|
|
assert(vm->inAGP(range.vaddr));
|
|
|
|
Addr next = roundUp(range.vaddr, AMDGPU_AGP_PAGE_SIZE);
|
|
if (next == range.vaddr)
|
|
next += AMDGPU_AGP_PAGE_SIZE;
|
|
|
|
range.size = std::min(range.size, next - range.vaddr);
|
|
range.paddr = range.vaddr - vm->getAGPBot() + vm->getAGPBase();
|
|
|
|
DPRINTF(AMDGPUDevice, "AMDGPUVM: AGP translation %#lx -> %#lx\n",
|
|
range.vaddr, range.paddr);
|
|
}
|
|
|
|
void
|
|
AMDGPUVM::GARTTranslationGen::translate(Range &range) const
|
|
{
|
|
Addr next = roundUp(range.vaddr, AMDGPU_GART_PAGE_SIZE);
|
|
if (next == range.vaddr)
|
|
next += AMDGPU_GART_PAGE_SIZE;
|
|
range.size = std::min(range.size, next - range.vaddr);
|
|
|
|
Addr gart_addr = bits(range.vaddr, 63, 12);
|
|
|
|
// This table is a bit hard to iterate over. If we cross a page, the next
|
|
// PTE is not necessarily the next entry but actually 7 entries away.
|
|
Addr lsb = bits(gart_addr, 2, 0);
|
|
gart_addr += lsb * 7;
|
|
|
|
// GART is a single level translation, so the value at the "virtual" addr
|
|
// is the PTE containing the physical address.
|
|
auto result = vm->gartTable.find(gart_addr);
|
|
if (result == vm->gartTable.end()) {
|
|
// There is no reason to fault as there is no recovery mechanism for
|
|
// invalid GART entries. Simply panic in this case
|
|
warn("GART translation for %p not found", range.vaddr);
|
|
|
|
// Some PM4 packets have register addresses which we ignore. In that
|
|
// case just return the vaddr rather than faulting.
|
|
range.paddr = range.vaddr;
|
|
} else {
|
|
Addr pte = result->second;
|
|
Addr lower_bits = bits(range.vaddr, 11, 0);
|
|
range.paddr = (bits(pte, 47, 12) << 12) | lower_bits;
|
|
}
|
|
|
|
DPRINTF(AMDGPUDevice, "AMDGPUVM: GART translation %#lx -> %#lx\n",
|
|
range.vaddr, range.paddr);
|
|
}
|
|
|
|
void
|
|
AMDGPUVM::MMHUBTranslationGen::translate(Range &range) const
|
|
{
|
|
assert(vm->inMMHUB(range.vaddr));
|
|
|
|
Addr next = roundUp(range.vaddr, AMDGPU_MMHUB_PAGE_SIZE);
|
|
if (next == range.vaddr)
|
|
next += AMDGPU_MMHUB_PAGE_SIZE;
|
|
|
|
range.size = std::min(range.size, next - range.vaddr);
|
|
range.paddr = range.vaddr - vm->getMMHUBBase();
|
|
|
|
DPRINTF(AMDGPUDevice, "AMDGPUVM: MMHUB translation %#lx -> %#lx\n",
|
|
range.vaddr, range.paddr);
|
|
}
|
|
|
|
void
|
|
AMDGPUVM::UserTranslationGen::translate(Range &range) const
|
|
{
|
|
// Get base address of the page table for this vmid
|
|
Addr base = vm->getPageTableBase(vmid);
|
|
Addr start = vm->getPageTableStart(vmid);
|
|
DPRINTF(AMDGPUDevice, "User tl base %#lx start %#lx walker %p\n",
|
|
base, start, walker);
|
|
|
|
bool dummy;
|
|
unsigned logBytes;
|
|
Addr paddr = range.vaddr;
|
|
Fault fault = walker->startFunctional(base, paddr, logBytes,
|
|
BaseMMU::Mode::Read, dummy);
|
|
if (fault != NoFault) {
|
|
fatal("User translation fault");
|
|
}
|
|
|
|
// GPU page size is variable. Use logBytes to determine size.
|
|
const Addr page_size = 1 << logBytes;
|
|
Addr next = roundUp(range.vaddr, page_size);
|
|
if (next == range.vaddr)
|
|
// We don't know the size of the next page, use default.
|
|
next += AMDGPU_USER_PAGE_SIZE;
|
|
|
|
range.size = std::min(range.size, next - range.vaddr);
|
|
range.paddr = paddr;
|
|
}
|
|
|
|
} // namespace gem5
|