/* * Copyright (c) 2021 Advanced Micro Devices, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * 3. Neither the name of the copyright holder nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include "dev/amdgpu/amdgpu_vm.hh" #include "arch/amdgpu/vega/pagetable_walker.hh" #include "arch/generic/mmu.hh" #include "base/trace.hh" #include "debug/AMDGPUDevice.hh" #include "dev/amdgpu/amdgpu_defines.hh" #include "mem/packet_access.hh" namespace gem5 { AMDGPUVM::AMDGPUVM() { // Zero out contexts memset(&vmContext0, 0, sizeof(AMDGPUSysVMContext)); vmContexts.resize(AMDGPU_VM_COUNT); for (int i = 0; i < AMDGPU_VM_COUNT; ++i) { memset(&vmContexts[0], 0, sizeof(AMDGPUVMContext)); } } Addr AMDGPUVM::gartBase() { return vmContext0.ptBase; } Addr AMDGPUVM::gartSize() { return vmContext0.ptEnd - vmContext0.ptStart; } void AMDGPUVM::readMMIO(PacketPtr pkt, Addr offset) { uint32_t value = pkt->getLE(); switch (offset) { // MMHUB MMIOs case mmMMHUB_VM_INVALIDATE_ENG17_SEM: DPRINTF(AMDGPUDevice, "Marking invalidate ENG17 SEM acquired\n"); pkt->setLE(1); break; case mmMMHUB_VM_INVALIDATE_ENG17_ACK: // This is only used by driver initialization and only expects an ACK // for VMID 0 which is the first bit in the response. DPRINTF(AMDGPUDevice, "Telling driver invalidate ENG17 is complete\n"); pkt->setLE(1); break; case mmMMHUB_VM_FB_LOCATION_BASE: mmhubBase = ((Addr)bits(value, 23, 0) << 24); DPRINTF(AMDGPUDevice, "MMHUB FB base set to %#x\n", mmhubBase); break; case mmMMHUB_VM_FB_LOCATION_TOP: mmhubTop = ((Addr)bits(value, 23, 0) << 24) | 0xFFFFFFULL; DPRINTF(AMDGPUDevice, "MMHUB FB top set to %#x\n", mmhubTop); break; // GRBM MMIOs case mmVM_INVALIDATE_ENG17_ACK: DPRINTF(AMDGPUDevice, "Overwritting invalidation ENG17 ACK\n"); pkt->setLE(1); break; default: DPRINTF(AMDGPUDevice, "GPUVM read of unknown MMIO %#x\n", offset); break; } } void AMDGPUVM::writeMMIO(PacketPtr pkt, Addr offset) { switch (offset) { // VMID0 MMIOs case mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32: vmContext0.ptBaseL = pkt->getLE(); // Clear extra bits not part of address vmContext0.ptBaseL = insertBits(vmContext0.ptBaseL, 0, 0, 0); break; case mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32: vmContext0.ptBaseH = pkt->getLE(); break; case mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32: vmContext0.ptStartL = pkt->getLE(); break; case mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32: vmContext0.ptStartH = pkt->getLE(); break; case mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32: vmContext0.ptEndL = pkt->getLE(); break; case mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32: vmContext0.ptEndH = pkt->getLE(); break; case mmMC_VM_AGP_TOP: { uint32_t val = pkt->getLE(); vmContext0.agpTop = (((Addr)bits(val, 23, 0)) << 24) | 0xffffff; } break; case mmMC_VM_AGP_BOT: { uint32_t val = pkt->getLE(); vmContext0.agpBot = ((Addr)bits(val, 23, 0)) << 24; } break; case mmMC_VM_AGP_BASE: { uint32_t val = pkt->getLE(); vmContext0.agpBase = ((Addr)bits(val, 23, 0)) << 24; } break; case mmMC_VM_FB_LOCATION_TOP: { uint32_t val = pkt->getLE(); vmContext0.fbTop = (((Addr)bits(val, 23, 0)) << 24) | 0xffffff; } break; case mmMC_VM_FB_LOCATION_BASE: { uint32_t val = pkt->getLE(); vmContext0.fbBase = ((Addr)bits(val, 23, 0)) << 24; } break; case mmMC_VM_FB_OFFSET: { uint32_t val = pkt->getLE(); vmContext0.fbOffset = ((Addr)bits(val, 23, 0)) << 24; } break; case mmMC_VM_SYSTEM_APERTURE_LOW_ADDR: { uint32_t val = pkt->getLE(); vmContext0.sysAddrL = ((Addr)bits(val, 29, 0)) << 18; } break; case mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR: { uint32_t val = pkt->getLE(); vmContext0.sysAddrH = ((Addr)bits(val, 29, 0)) << 18; } break; default: break; } } void AMDGPUVM::serialize(CheckpointOut &cp) const { Addr vm0PTBase = vmContext0.ptBase; Addr vm0PTStart = vmContext0.ptStart; Addr vm0PTEnd = vmContext0.ptEnd; SERIALIZE_SCALAR(vm0PTBase); SERIALIZE_SCALAR(vm0PTStart); SERIALIZE_SCALAR(vm0PTEnd); SERIALIZE_SCALAR(vmContext0.agpBase); SERIALIZE_SCALAR(vmContext0.agpTop); SERIALIZE_SCALAR(vmContext0.agpBot); SERIALIZE_SCALAR(vmContext0.fbBase); SERIALIZE_SCALAR(vmContext0.fbTop); SERIALIZE_SCALAR(vmContext0.fbOffset); SERIALIZE_SCALAR(vmContext0.sysAddrL); SERIALIZE_SCALAR(vmContext0.sysAddrH); SERIALIZE_SCALAR(mmhubBase); SERIALIZE_SCALAR(mmhubTop); Addr ptBase[AMDGPU_VM_COUNT]; Addr ptStart[AMDGPU_VM_COUNT]; Addr ptEnd[AMDGPU_VM_COUNT]; for (int i = 0; i < AMDGPU_VM_COUNT; i++) { ptBase[i] = vmContexts[i].ptBase; ptStart[i] = vmContexts[i].ptStart; ptEnd[i] = vmContexts[i].ptEnd; } SERIALIZE_ARRAY(ptBase, AMDGPU_VM_COUNT); SERIALIZE_ARRAY(ptStart, AMDGPU_VM_COUNT); SERIALIZE_ARRAY(ptEnd, AMDGPU_VM_COUNT); } void AMDGPUVM::unserialize(CheckpointIn &cp) { // Unserialize requires fields not be packed Addr vm0PTBase; Addr vm0PTStart; Addr vm0PTEnd; UNSERIALIZE_SCALAR(vm0PTBase); UNSERIALIZE_SCALAR(vm0PTStart); UNSERIALIZE_SCALAR(vm0PTEnd); vmContext0.ptBase = vm0PTBase; vmContext0.ptStart = vm0PTStart; vmContext0.ptEnd = vm0PTEnd; UNSERIALIZE_SCALAR(vmContext0.agpBase); UNSERIALIZE_SCALAR(vmContext0.agpTop); UNSERIALIZE_SCALAR(vmContext0.agpBot); UNSERIALIZE_SCALAR(vmContext0.fbBase); UNSERIALIZE_SCALAR(vmContext0.fbTop); UNSERIALIZE_SCALAR(vmContext0.fbOffset); UNSERIALIZE_SCALAR(vmContext0.sysAddrL); UNSERIALIZE_SCALAR(vmContext0.sysAddrH); UNSERIALIZE_SCALAR(mmhubBase); UNSERIALIZE_SCALAR(mmhubTop); Addr ptBase[AMDGPU_VM_COUNT]; Addr ptStart[AMDGPU_VM_COUNT]; Addr ptEnd[AMDGPU_VM_COUNT]; UNSERIALIZE_ARRAY(ptBase, AMDGPU_VM_COUNT); UNSERIALIZE_ARRAY(ptStart, AMDGPU_VM_COUNT); UNSERIALIZE_ARRAY(ptEnd, AMDGPU_VM_COUNT); for (int i = 0; i < AMDGPU_VM_COUNT; i++) { vmContexts[i].ptBase = ptBase[i]; vmContexts[i].ptStart = ptStart[i]; vmContexts[i].ptEnd = ptEnd[i]; } } void AMDGPUVM::AGPTranslationGen::translate(Range &range) const { assert(vm->inAGP(range.vaddr)); Addr next = roundUp(range.vaddr, AMDGPU_AGP_PAGE_SIZE); if (next == range.vaddr) next += AMDGPU_AGP_PAGE_SIZE; range.size = std::min(range.size, next - range.vaddr); range.paddr = range.vaddr - vm->getAGPBot() + vm->getAGPBase(); DPRINTF(AMDGPUDevice, "AMDGPUVM: AGP translation %#lx -> %#lx\n", range.vaddr, range.paddr); } void AMDGPUVM::GARTTranslationGen::translate(Range &range) const { Addr next = roundUp(range.vaddr, AMDGPU_GART_PAGE_SIZE); if (next == range.vaddr) next += AMDGPU_GART_PAGE_SIZE; range.size = std::min(range.size, next - range.vaddr); Addr gart_addr = bits(range.vaddr, 63, 12); // This table is a bit hard to iterate over. If we cross a page, the next // PTE is not necessarily the next entry but actually 7 entries away. Addr lsb = bits(gart_addr, 2, 0); gart_addr += lsb * 7; // GART is a single level translation, so the value at the "virtual" addr // is the PTE containing the physical address. auto result = vm->gartTable.find(gart_addr); if (result == vm->gartTable.end()) { // There is no reason to fault as there is no recovery mechanism for // invalid GART entries. Simply panic in this case warn("GART translation for %p not found", range.vaddr); // Some PM4 packets have register addresses which we ignore. In that // case just return the vaddr rather than faulting. range.paddr = range.vaddr; } else { Addr pte = result->second; Addr lower_bits = bits(range.vaddr, 11, 0); range.paddr = (bits(pte, 47, 12) << 12) | lower_bits; } DPRINTF(AMDGPUDevice, "AMDGPUVM: GART translation %#lx -> %#lx\n", range.vaddr, range.paddr); } void AMDGPUVM::MMHUBTranslationGen::translate(Range &range) const { assert(vm->inMMHUB(range.vaddr)); Addr next = roundUp(range.vaddr, AMDGPU_MMHUB_PAGE_SIZE); if (next == range.vaddr) next += AMDGPU_MMHUB_PAGE_SIZE; range.size = std::min(range.size, next - range.vaddr); range.paddr = range.vaddr - vm->getMMHUBBase(); DPRINTF(AMDGPUDevice, "AMDGPUVM: MMHUB translation %#lx -> %#lx\n", range.vaddr, range.paddr); } void AMDGPUVM::UserTranslationGen::translate(Range &range) const { // Get base address of the page table for this vmid Addr base = vm->getPageTableBase(vmid); Addr start = vm->getPageTableStart(vmid); DPRINTF(AMDGPUDevice, "User tl base %#lx start %#lx walker %p\n", base, start, walker); bool dummy; unsigned logBytes; Addr paddr = range.vaddr; Fault fault = walker->startFunctional(base, paddr, logBytes, BaseMMU::Mode::Read, dummy); if (fault != NoFault) { fatal("User translation fault"); } // GPU page size is variable. Use logBytes to determine size. const Addr page_size = 1 << logBytes; Addr next = roundUp(range.vaddr, page_size); if (next == range.vaddr) // We don't know the size of the next page, use default. next += AMDGPU_USER_PAGE_SIZE; range.size = std::min(range.size, next - range.vaddr); range.paddr = paddr; } } // namespace gem5