dev-amdgpu: Add PM4PP, VMID, Linux definitions
The PM4 packet processor is handling all non-HSA GPU packets such as packets for (un)mapping HSA queues. This commit pulls many Linux structs and defines out into their own files for clarity. Finally, it implements the VMID related functions in AMDGPU device. Change-Id: I5f0057209305404df58aff2c4cd07762d1a31690 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/53068 Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com> Maintainer: Matt Sinclair <mattdsinclair@gmail.com> Tested-by: kokoro <noreply+kokoro@google.com>
This commit is contained in:
@@ -98,6 +98,8 @@ def makeGpuFSSystem(args):
|
||||
shader.dispatcher = dispatcher
|
||||
shader.gpu_cmd_proc = gpu_cmd_proc
|
||||
|
||||
system.pc.south_bridge.gpu.cp = gpu_cmd_proc
|
||||
|
||||
# GPU Interrupt Handler
|
||||
device_ih = AMDGPUInterruptHandler()
|
||||
system.pc.south_bridge.gpu.device_ih = device_ih
|
||||
@@ -112,6 +114,10 @@ def makeGpuFSSystem(args):
|
||||
system.pc.south_bridge.gpu.sdma0 = sdma0
|
||||
system.pc.south_bridge.gpu.sdma1 = sdma1
|
||||
|
||||
# Setup PM4 packet processor
|
||||
pm4_pkt_proc = PM4PacketProcessor()
|
||||
system.pc.south_bridge.gpu.pm4_pkt_proc = pm4_pkt_proc
|
||||
|
||||
# GPU data path
|
||||
gpu_mem_mgr = AMDGPUMemoryManager()
|
||||
system.pc.south_bridge.gpu.memory_manager = gpu_mem_mgr
|
||||
@@ -123,6 +129,7 @@ def makeGpuFSSystem(args):
|
||||
system._dma_ports.append(sdma0)
|
||||
system._dma_ports.append(sdma1)
|
||||
system._dma_ports.append(device_ih)
|
||||
system._dma_ports.append(pm4_pkt_proc)
|
||||
|
||||
gpu_hsapp.pio = system.iobus.mem_side_ports
|
||||
gpu_cmd_proc.pio = system.iobus.mem_side_ports
|
||||
@@ -130,6 +137,7 @@ def makeGpuFSSystem(args):
|
||||
sdma0.pio = system.iobus.mem_side_ports
|
||||
sdma1.pio = system.iobus.mem_side_ports
|
||||
device_ih.pio = system.iobus.mem_side_ports
|
||||
pm4_pkt_proc.pio = system.iobus.mem_side_ports
|
||||
|
||||
# Create Ruby system using Ruby.py for now
|
||||
Ruby.create_system(args, True, system, system.iobus,
|
||||
|
||||
@@ -85,6 +85,7 @@ class AMDGPUDevice(PciDevice):
|
||||
# The config script should not create a new cp here but rather assign the
|
||||
# same cp that is assigned to the Shader SimObject.
|
||||
cp = Param.GPUCommandProcessor(NULL, "Command Processor")
|
||||
pm4_pkt_proc = Param.PM4PacketProcessor("PM4 Packet Processor")
|
||||
memory_manager = Param.AMDGPUMemoryManager("GPU Memory Manager")
|
||||
memories = VectorParam.AbstractMemory([], "All memories in the device")
|
||||
device_ih = Param.AMDGPUInterruptHandler("GPU Interrupt handler")
|
||||
@@ -97,6 +98,11 @@ class SDMAEngine(DmaVirtDevice):
|
||||
gpu_device = Param.AMDGPUDevice(NULL, 'GPU Controller')
|
||||
walker = Param.VegaPagetableWalker("Page table walker")
|
||||
|
||||
class PM4PacketProcessor(DmaVirtDevice):
|
||||
type = 'PM4PacketProcessor'
|
||||
cxx_header = "dev/amdgpu/pm4_packet_processor.hh"
|
||||
cxx_class = 'gem5::PM4PacketProcessor'
|
||||
|
||||
class AMDGPUMemoryManager(ClockedObject):
|
||||
type = 'AMDGPUMemoryManager'
|
||||
cxx_header = 'dev/amdgpu/memory_manager.hh'
|
||||
|
||||
@@ -35,16 +35,19 @@ if not env['BUILD_GPU']:
|
||||
# Controllers
|
||||
SimObject('AMDGPU.py', sim_objects=['AMDGPUDevice', 'AMDGPUInterruptHandler',
|
||||
'AMDGPUMemoryManager', 'AMDGPUSystemHub',
|
||||
'SDMAEngine'], tags='x86 isa')
|
||||
'SDMAEngine', 'PM4PacketProcessor'],
|
||||
tags='x86 isa')
|
||||
|
||||
Source('amdgpu_device.cc', tags='x86 isa')
|
||||
Source('amdgpu_vm.cc', tags='x86 isa')
|
||||
Source('interrupt_handler.cc', tags='x86 isa')
|
||||
Source('memory_manager.cc', tags='x86 isa')
|
||||
Source('mmio_reader.cc', tags='x86 isa')
|
||||
Source('pm4_packet_processor.cc', tags='x86 isa')
|
||||
Source('sdma_engine.cc', tags='x86 isa')
|
||||
Source('system_hub.cc', tags='x86 isa')
|
||||
|
||||
DebugFlag('AMDGPUDevice', tags='x86 isa')
|
||||
DebugFlag('AMDGPUMem', tags='x86 isa')
|
||||
DebugFlag('PM4PacketProcessor', tags='x86 isa')
|
||||
DebugFlag('SDMAEngine', tags='x86 isa')
|
||||
|
||||
@@ -36,7 +36,10 @@
|
||||
#include "debug/AMDGPUDevice.hh"
|
||||
#include "dev/amdgpu/amdgpu_vm.hh"
|
||||
#include "dev/amdgpu/interrupt_handler.hh"
|
||||
#include "dev/amdgpu/pm4_packet_processor.hh"
|
||||
#include "dev/amdgpu/sdma_engine.hh"
|
||||
#include "dev/hsa/hw_scheduler.hh"
|
||||
#include "gpu-compute/gpu_command_processor.hh"
|
||||
#include "mem/packet.hh"
|
||||
#include "mem/packet_access.hh"
|
||||
#include "params/AMDGPUDevice.hh"
|
||||
@@ -48,9 +51,9 @@ namespace gem5
|
||||
|
||||
AMDGPUDevice::AMDGPUDevice(const AMDGPUDeviceParams &p)
|
||||
: PciDevice(p), gpuMemMgr(p.memory_manager), deviceIH(p.device_ih),
|
||||
sdma0(p.sdma0), sdma1(p.sdma1),
|
||||
sdma0(p.sdma0), sdma1(p.sdma1), pm4PktProc(p.pm4_pkt_proc), cp(p.cp),
|
||||
checkpoint_before_mmios(p.checkpoint_before_mmios),
|
||||
init_interrupt_count(0)
|
||||
init_interrupt_count(0), _lastVMID(0)
|
||||
{
|
||||
// Loading the rom binary dumped from hardware.
|
||||
std::ifstream romBin;
|
||||
@@ -73,6 +76,7 @@ AMDGPUDevice::AMDGPUDevice(const AMDGPUDeviceParams &p)
|
||||
sdma1->setGPUDevice(this);
|
||||
sdma1->setId(1);
|
||||
deviceIH->setGPUDevice(this);
|
||||
pm4PktProc->setGPUDevice(this);
|
||||
}
|
||||
|
||||
void
|
||||
@@ -233,6 +237,14 @@ AMDGPUDevice::writeDoorbell(PacketPtr pkt, Addr offset)
|
||||
DPRINTF(AMDGPUDevice, "Doorbell offset %p queue: %d\n",
|
||||
offset, q_type);
|
||||
switch (q_type) {
|
||||
case Compute:
|
||||
pm4PktProc->process(pm4PktProc->getQueue(offset),
|
||||
pkt->getLE<uint64_t>());
|
||||
break;
|
||||
case Gfx:
|
||||
pm4PktProc->process(pm4PktProc->getQueue(offset, true),
|
||||
pkt->getLE<uint64_t>());
|
||||
break;
|
||||
case SDMAGfx: {
|
||||
SDMAEngine *sdmaEng = getSDMAEngine(offset);
|
||||
sdmaEng->processGfx(pkt->getLE<uint64_t>());
|
||||
@@ -241,9 +253,18 @@ AMDGPUDevice::writeDoorbell(PacketPtr pkt, Addr offset)
|
||||
SDMAEngine *sdmaEng = getSDMAEngine(offset);
|
||||
sdmaEng->processPage(pkt->getLE<uint64_t>());
|
||||
} break;
|
||||
case ComputeAQL: {
|
||||
cp->hsaPacketProc().hwScheduler()->write(offset,
|
||||
pkt->getLE<uint64_t>() + 1);
|
||||
pm4PktProc->updateReadIndex(offset, pkt->getLE<uint64_t>() + 1);
|
||||
} break;
|
||||
case InterruptHandler:
|
||||
deviceIH->updateRptr(pkt->getLE<uint32_t>());
|
||||
break;
|
||||
case RLC: {
|
||||
panic("RLC queues not yet supported. Run with the environment "
|
||||
"variable HSA_ENABLE_SDMA set to False");
|
||||
} break;
|
||||
default:
|
||||
panic("Write to unkown queue type!");
|
||||
}
|
||||
@@ -269,6 +290,11 @@ AMDGPUDevice::writeMMIO(PacketPtr pkt, Addr offset)
|
||||
case SDMA1_BASE:
|
||||
sdma1->writeMMIO(pkt, aperture_offset >> SDMA_OFFSET_SHIFT);
|
||||
break;
|
||||
/* Write a general register to the graphics register bus manager. */
|
||||
case GRBM_BASE:
|
||||
gpuvm.writeMMIO(pkt, aperture_offset >> GRBM_OFFSET_SHIFT);
|
||||
pm4PktProc->writeMMIO(pkt, aperture_offset >> GRBM_OFFSET_SHIFT);
|
||||
break;
|
||||
/* Write a register to the interrupt handler. */
|
||||
case IH_BASE:
|
||||
deviceIH->writeMMIO(pkt, aperture_offset >> IH_OFFSET_SHIFT);
|
||||
@@ -346,6 +372,19 @@ AMDGPUDevice::write(PacketPtr pkt)
|
||||
return pioDelay;
|
||||
}
|
||||
|
||||
uint32_t
|
||||
AMDGPUDevice::getRegVal(uint32_t addr)
|
||||
{
|
||||
return regs[addr];
|
||||
}
|
||||
void
|
||||
AMDGPUDevice::setRegVal(uint32_t addr, uint32_t value)
|
||||
{
|
||||
DPRINTF(AMDGPUDevice, "Setting register 0x%lx to %x\n",
|
||||
addr, value);
|
||||
regs[addr] = value;
|
||||
}
|
||||
|
||||
void
|
||||
AMDGPUDevice::setDoorbellType(uint32_t offset, QueueType qt)
|
||||
{
|
||||
@@ -359,6 +398,28 @@ AMDGPUDevice::setSDMAEngine(Addr offset, SDMAEngine *eng)
|
||||
sdmaEngs[offset] = eng;
|
||||
}
|
||||
|
||||
SDMAEngine*
|
||||
AMDGPUDevice::getSDMAById(int id)
|
||||
{
|
||||
/**
|
||||
* PM4 packets selected SDMAs using an integer ID. This method simply maps
|
||||
* the integer ID to a pointer to the SDMA and checks for invalid IDs.
|
||||
*/
|
||||
switch (id) {
|
||||
case 0:
|
||||
return sdma0;
|
||||
break;
|
||||
case 1:
|
||||
return sdma1;
|
||||
break;
|
||||
default:
|
||||
panic("No SDMA with id %d\n", id);
|
||||
break;
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
SDMAEngine*
|
||||
AMDGPUDevice::getSDMAEngine(Addr offset)
|
||||
{
|
||||
@@ -385,4 +446,62 @@ AMDGPUDevice::unserialize(CheckpointIn &cp)
|
||||
PciDevice::unserialize(cp);
|
||||
}
|
||||
|
||||
uint16_t
|
||||
AMDGPUDevice::allocateVMID(uint16_t pasid)
|
||||
{
|
||||
for (uint16_t vmid = 1; vmid < AMDGPU_VM_COUNT; vmid++) {
|
||||
auto result = usedVMIDs.find(vmid);
|
||||
if (result == usedVMIDs.end()) {
|
||||
idMap.insert(std::make_pair(pasid, vmid));
|
||||
usedVMIDs[vmid] = {};
|
||||
_lastVMID = vmid;
|
||||
return vmid;
|
||||
}
|
||||
}
|
||||
panic("All VMIDs have been assigned");
|
||||
}
|
||||
|
||||
void
|
||||
AMDGPUDevice::deallocateVmid(uint16_t vmid)
|
||||
{
|
||||
usedVMIDs.erase(vmid);
|
||||
}
|
||||
|
||||
void
|
||||
AMDGPUDevice::deallocatePasid(uint16_t pasid)
|
||||
{
|
||||
auto result = idMap.find(pasid);
|
||||
assert(result != idMap.end());
|
||||
if (result == idMap.end()) return;
|
||||
uint16_t vmid = result->second;
|
||||
|
||||
idMap.erase(result);
|
||||
usedVMIDs.erase(vmid);
|
||||
}
|
||||
|
||||
void
|
||||
AMDGPUDevice::deallocateAllQueues()
|
||||
{
|
||||
idMap.erase(idMap.begin(), idMap.end());
|
||||
usedVMIDs.erase(usedVMIDs.begin(), usedVMIDs.end());
|
||||
}
|
||||
|
||||
void
|
||||
AMDGPUDevice::mapDoorbellToVMID(Addr doorbell, uint16_t vmid)
|
||||
{
|
||||
doorbellVMIDMap[doorbell] = vmid;
|
||||
}
|
||||
|
||||
std::unordered_map<uint16_t, std::set<int>>&
|
||||
AMDGPUDevice::getUsedVMIDs()
|
||||
{
|
||||
return usedVMIDs;
|
||||
}
|
||||
|
||||
void
|
||||
AMDGPUDevice::insertQId(uint16_t vmid, int id)
|
||||
{
|
||||
usedVMIDs[vmid].insert(id);
|
||||
}
|
||||
|
||||
} // namespace gem5
|
||||
|
||||
@@ -112,6 +112,8 @@ class AMDGPUDevice : public PciDevice
|
||||
SDMAEngine *sdma0;
|
||||
SDMAEngine *sdma1;
|
||||
std::unordered_map<uint32_t, SDMAEngine *> sdmaEngs;
|
||||
PM4PacketProcessor *pm4PktProc;
|
||||
GPUCommandProcessor *cp;
|
||||
|
||||
/**
|
||||
* Initial checkpoint support variables.
|
||||
@@ -129,6 +131,16 @@ class AMDGPUDevice : public PciDevice
|
||||
uint64_t mmhubBase = 0x0;
|
||||
uint64_t mmhubTop = 0x0;
|
||||
|
||||
// VMIDs data structures
|
||||
// map of pasids to vmids
|
||||
std::unordered_map<uint16_t, uint16_t> idMap;
|
||||
// map of doorbell offsets to vmids
|
||||
std::unordered_map<Addr, uint16_t> doorbellVMIDMap;
|
||||
// map of vmid to all queue ids using that vmid
|
||||
std::unordered_map<uint16_t, std::set<int>> usedVMIDs;
|
||||
// last vmid allocated by map_process PM4 packet
|
||||
uint16_t _lastVMID;
|
||||
|
||||
public:
|
||||
AMDGPUDevice(const AMDGPUDeviceParams &p);
|
||||
|
||||
@@ -155,9 +167,11 @@ class AMDGPUDevice : public PciDevice
|
||||
* Get handles to GPU blocks.
|
||||
*/
|
||||
AMDGPUInterruptHandler* getIH() { return deviceIH; }
|
||||
SDMAEngine* getSDMAById(int id);
|
||||
SDMAEngine* getSDMAEngine(Addr offset);
|
||||
AMDGPUVM &getVM() { return gpuvm; }
|
||||
AMDGPUMemoryManager* getMemMgr() { return gpuMemMgr; }
|
||||
GPUCommandProcessor* CP() { return cp; }
|
||||
|
||||
/**
|
||||
* Set handles to GPU blocks.
|
||||
@@ -165,10 +179,28 @@ class AMDGPUDevice : public PciDevice
|
||||
void setDoorbellType(uint32_t offset, QueueType qt);
|
||||
void setSDMAEngine(Addr offset, SDMAEngine *eng);
|
||||
|
||||
/**
|
||||
* Register value getter/setter. Used by other GPU blocks to change
|
||||
* values from incoming driver/user packets.
|
||||
*/
|
||||
uint32_t getRegVal(uint32_t addr);
|
||||
void setRegVal(uint32_t addr, uint32_t value);
|
||||
|
||||
/**
|
||||
* Methods related to translations and system/device memory.
|
||||
*/
|
||||
RequestorID vramRequestorId() { return gpuMemMgr->getRequestorID(); }
|
||||
|
||||
/* HW context stuff */
|
||||
uint16_t lastVMID() { return _lastVMID; }
|
||||
uint16_t allocateVMID(uint16_t pasid);
|
||||
void deallocateVmid(uint16_t vmid);
|
||||
void deallocatePasid(uint16_t pasid);
|
||||
void deallocateAllQueues();
|
||||
void mapDoorbellToVMID(Addr doorbell, uint16_t vmid);
|
||||
uint16_t getVMID(Addr doorbell) { return doorbellVMIDMap[doorbell]; }
|
||||
std::unordered_map<uint16_t, std::set<int>>& getUsedVMIDs();
|
||||
void insertQId(uint16_t vmid, int id);
|
||||
};
|
||||
|
||||
} // namespace gem5
|
||||
|
||||
@@ -248,7 +248,7 @@ AMDGPUVM::AGPTranslationGen::translate(Range &range) const
|
||||
range.size = std::min(range.size, next - range.vaddr);
|
||||
range.paddr = range.vaddr - vm->getAGPBot() + vm->getAGPBase();
|
||||
|
||||
printf("AMDGPUVM: AGP translation %#lx -> %#lx\n",
|
||||
DPRINTF(AMDGPUDevice, "AMDGPUVM: AGP translation %#lx -> %#lx\n",
|
||||
range.vaddr, range.paddr);
|
||||
}
|
||||
|
||||
@@ -284,7 +284,7 @@ AMDGPUVM::GARTTranslationGen::translate(Range &range) const
|
||||
range.paddr = (bits(pte, 47, 12) << 12) | lower_bits;
|
||||
}
|
||||
|
||||
printf("AMDGPUVM: GART translation %#lx -> %#lx\n",
|
||||
DPRINTF(AMDGPUDevice, "AMDGPUVM: GART translation %#lx -> %#lx\n",
|
||||
range.vaddr, range.paddr);
|
||||
}
|
||||
|
||||
@@ -300,7 +300,7 @@ AMDGPUVM::MMHUBTranslationGen::translate(Range &range) const
|
||||
range.size = std::min(range.size, next - range.vaddr);
|
||||
range.paddr = range.vaddr - vm->getMMHUBBase();
|
||||
|
||||
printf("AMDGPUVM: MMHUB translation %#lx -> %#lx\n",
|
||||
DPRINTF(AMDGPUDevice, "AMDGPUVM: MMHUB translation %#lx -> %#lx\n",
|
||||
range.vaddr, range.paddr);
|
||||
}
|
||||
|
||||
@@ -310,7 +310,8 @@ AMDGPUVM::UserTranslationGen::translate(Range &range) const
|
||||
// Get base address of the page table for this vmid
|
||||
Addr base = vm->getPageTableBase(vmid);
|
||||
Addr start = vm->getPageTableStart(vmid);
|
||||
printf("User tl base %#lx start %#lx walker %p\n", base, start, walker);
|
||||
DPRINTF(AMDGPUDevice, "User tl base %#lx start %#lx walker %p\n",
|
||||
base, start, walker);
|
||||
|
||||
bool dummy;
|
||||
unsigned logBytes;
|
||||
|
||||
@@ -45,10 +45,10 @@
|
||||
* MMIO offsets for graphics register bus manager (GRBM). These values were
|
||||
* taken from linux header files. The header files can be found here:
|
||||
*
|
||||
* https://github.com/torvalds/linux/blob/master/drivers/gpu/drm/amd/include/
|
||||
* asic_reg/gc/gc_9_0_offset.h
|
||||
* https://github.com/torvalds/linux/blob/master/drivers/gpu/drm/amd/include/
|
||||
* asic_reg/mmhub/mmhub_1_0_offset.h
|
||||
* https://github.com/RadeonOpenCompute/ROCK-Kernel-Driver/blob/rocm-4.2.0/
|
||||
* drivers/gpu/drm/amd/include/ asic_reg/gc/gc_9_0_offset.h
|
||||
* https://github.com/RadeonOpenCompute/ROCK-Kernel-Driver/blob/rocm-4.2.0/
|
||||
* drivers/gpu/drm/amd/include/ asic_reg/mmhub/mmhub_1_0_offset.h
|
||||
*/
|
||||
|
||||
#define mmVM_INVALIDATE_ENG17_ACK 0x08c6
|
||||
@@ -256,6 +256,12 @@ class AMDGPUVM : public Serializable
|
||||
/**
|
||||
* Page table base/start accessors for user VMIDs.
|
||||
*/
|
||||
void
|
||||
setPageTableBase(uint16_t vmid, Addr ptBase)
|
||||
{
|
||||
vmContexts[vmid].ptBase = ptBase;
|
||||
}
|
||||
|
||||
Addr
|
||||
getPageTableBase(uint16_t vmid)
|
||||
{
|
||||
|
||||
@@ -48,11 +48,27 @@
|
||||
namespace gem5
|
||||
{
|
||||
|
||||
/*
|
||||
/**
|
||||
* Defines from driver code. Taken from
|
||||
* https://github.com/RadeonOpenCompute/ROCK-Kernel-Driver/blob/rocm-4.2.0/
|
||||
* drivers/gpu/drm/amd/include/soc15_ih_clientid.h
|
||||
*/
|
||||
enum soc15_ih_clientid
|
||||
{
|
||||
SOC15_IH_CLIENTID_RLC = 0x07,
|
||||
SOC15_IH_CLIENTID_SDMA0 = 0x08,
|
||||
SOC15_IH_CLIENTID_SDMA1 = 0x09
|
||||
};
|
||||
|
||||
enum ihSourceId
|
||||
{
|
||||
TRAP_ID = 224
|
||||
};
|
||||
|
||||
/**
|
||||
* MSI-style interrupts. Send a "cookie" response to clear interrupts.
|
||||
* From [1] we know the size of the struct is 8 dwords. Then we can look at
|
||||
* the register shift offsets in [2] to guess the rest. Or we can also look
|
||||
* at [3].
|
||||
* From [1] we know the size of the struct is 8 dwords. Then we can look at the register shift offsets in [2] to guess the rest.
|
||||
* Or we can also look at [3].
|
||||
*
|
||||
* [1] https://github.com/RadeonOpenCompute/ROCK-Kernel-Driver/blob/roc-4.3.x/
|
||||
* drivers/gpu/drm/amd/amdkfd/kfd_device.c#L316
|
||||
@@ -83,6 +99,9 @@ typedef struct
|
||||
uint32_t source_data_dw4;
|
||||
} AMDGPUInterruptCookie;
|
||||
|
||||
/**
|
||||
* Struct to contain all interrupt handler related registers.
|
||||
*/
|
||||
typedef struct
|
||||
{
|
||||
uint32_t IH_Cntl;
|
||||
|
||||
506
src/dev/amdgpu/pm4_defines.hh
Normal file
506
src/dev/amdgpu/pm4_defines.hh
Normal file
@@ -0,0 +1,506 @@
|
||||
/*
|
||||
* Copyright (c) 2021 Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. Neither the name of the copyright holder nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from this
|
||||
* software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef __DEV_AMDGPU_PM4_DEFINES_H__
|
||||
#define __DEV_AMDGPU_PM4_DEFINES_H__
|
||||
|
||||
#include <cstdlib>
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
|
||||
#include "base/types.hh"
|
||||
|
||||
namespace gem5
|
||||
{
|
||||
|
||||
/**
|
||||
* PM4 opcodes. Taken from linux tree at
|
||||
* https://github.com/RadeonOpenCompute/ROCK-Kernel-Driver/blob/rocm-4.3.x/
|
||||
* drivers/gpu/drm/amd/amdkfd/kfd_pm4_opcodes.h
|
||||
*/
|
||||
enum it_opcode_type
|
||||
{
|
||||
IT_NOP = 0x10,
|
||||
IT_WRITE_DATA = 0x37,
|
||||
IT_WAIT_REG_MEM = 0x3C,
|
||||
IT_INDIRECT_BUFFER = 0x3F,
|
||||
IT_RELEASE_MEM = 0x49,
|
||||
IT_SET_UCONFIG_REG = 0x79,
|
||||
IT_SWITCH_BUFFER = 0x8B,
|
||||
IT_MAP_PROCESS = 0xA1,
|
||||
IT_MAP_QUEUES = 0xA2,
|
||||
IT_UNMAP_QUEUES = 0xA3,
|
||||
IT_QUERY_STATUS = 0xA4,
|
||||
IT_RUN_LIST = 0xA5,
|
||||
};
|
||||
|
||||
/**
|
||||
* Value from vega10/pm4_header.h.
|
||||
*/
|
||||
#define PACKET3_SET_UCONFIG_REG_START 0x0000c000
|
||||
|
||||
/**
|
||||
* PM4 packets
|
||||
*/
|
||||
typedef struct GEM5_PACKED
|
||||
{
|
||||
union
|
||||
{
|
||||
struct
|
||||
{
|
||||
uint16_t predicated : 1;
|
||||
uint16_t shader : 1;
|
||||
uint16_t reserved : 6;
|
||||
uint16_t opcode : 8;
|
||||
uint16_t count : 14;
|
||||
uint16_t type : 2;
|
||||
};
|
||||
uint32_t ordinal;
|
||||
};
|
||||
} PM4Header;
|
||||
static_assert(sizeof(PM4Header) == 4);
|
||||
|
||||
typedef struct GEM5_PACKED
|
||||
{
|
||||
uint32_t reserved1 : 8;
|
||||
uint32_t destSel : 4;
|
||||
uint32_t reserved2 : 4;
|
||||
uint32_t addrIncr : 1;
|
||||
uint32_t reserved3 : 2;
|
||||
uint32_t resume : 1;
|
||||
uint32_t writeConfirm : 1;
|
||||
uint32_t reserved4 : 4;
|
||||
uint32_t cachePolicy : 2;
|
||||
uint32_t reserved5 : 5;
|
||||
union
|
||||
{
|
||||
struct
|
||||
{
|
||||
uint32_t destAddrLo;
|
||||
uint32_t destAddrHi;
|
||||
};
|
||||
uint64_t destAddr;
|
||||
};
|
||||
uint32_t data;
|
||||
} PM4WriteData;
|
||||
static_assert(sizeof(PM4WriteData) == 16);
|
||||
|
||||
typedef struct GEM5_PACKED
|
||||
{
|
||||
uint32_t reserved1 : 4;
|
||||
uint32_t queueSel : 2;
|
||||
uint32_t reserved2 : 2;
|
||||
uint32_t vmid : 4;
|
||||
uint32_t reserved3 : 1;
|
||||
uint32_t me : 1;
|
||||
uint32_t pipe : 2;
|
||||
uint32_t queueSlot : 3;
|
||||
uint32_t reserved6 : 2;
|
||||
uint32_t queueType : 3;
|
||||
uint32_t allocFormat : 2;
|
||||
uint32_t engineSel : 3;
|
||||
uint32_t numQueues : 3;
|
||||
uint32_t reserved4 : 1;
|
||||
uint32_t checkDisable : 1;
|
||||
uint32_t doorbellOffset : 26;
|
||||
uint32_t reserved5 : 4;
|
||||
union
|
||||
{
|
||||
struct
|
||||
{
|
||||
uint32_t mqdAddrLo : 32;
|
||||
uint32_t mqdAddrHi : 32;
|
||||
};
|
||||
uint64_t mqdAddr;
|
||||
};
|
||||
union
|
||||
{
|
||||
struct
|
||||
{
|
||||
uint32_t wptrAddrLo : 32;
|
||||
uint32_t wptrAddrHi : 32;
|
||||
};
|
||||
uint64_t wptrAddr;
|
||||
};
|
||||
} PM4MapQueues;
|
||||
static_assert(sizeof(PM4MapQueues) == 24);
|
||||
|
||||
typedef struct GEM5_PACKED
|
||||
{
|
||||
uint32_t action : 2;
|
||||
uint32_t reserved : 2;
|
||||
uint32_t queueSel : 2;
|
||||
uint32_t reserved1 : 20;
|
||||
uint32_t engineSel : 3;
|
||||
uint32_t numQueues : 3;
|
||||
union
|
||||
{
|
||||
struct
|
||||
{
|
||||
uint32_t pasid : 16;
|
||||
uint32_t reserved2 : 16;
|
||||
};
|
||||
struct
|
||||
{
|
||||
uint32_t reserved3 : 2;
|
||||
uint32_t doorbellOffset0 : 26;
|
||||
uint32_t reserved4 : 4;
|
||||
};
|
||||
};
|
||||
uint32_t reserved5 : 2;
|
||||
uint32_t doorbellOffset1 : 26;
|
||||
uint32_t reserved6 : 4;
|
||||
uint32_t reserved7 : 2;
|
||||
uint32_t doorbellOffset2 : 26;
|
||||
uint32_t reserved8 : 4;
|
||||
uint32_t reserved9 : 2;
|
||||
uint32_t doorbellOffset3 : 26;
|
||||
uint32_t reserved10 : 4;
|
||||
} PM4UnmapQueues;
|
||||
static_assert(sizeof(PM4UnmapQueues) == 20);
|
||||
|
||||
typedef struct GEM5_PACKED
|
||||
{
|
||||
uint32_t vmidMask : 16;
|
||||
uint32_t unmapLatency : 8;
|
||||
uint32_t reserved : 5;
|
||||
uint32_t queueType : 3;
|
||||
union
|
||||
{
|
||||
struct
|
||||
{
|
||||
uint32_t queueMaskLo;
|
||||
uint32_t queueMaskHi;
|
||||
};
|
||||
uint64_t queueMask;
|
||||
};
|
||||
union
|
||||
{
|
||||
struct
|
||||
{
|
||||
uint32_t gwsMaskLo;
|
||||
uint32_t gwsMaskHi;
|
||||
};
|
||||
uint64_t gwsMask;
|
||||
};
|
||||
uint16_t oacMask;
|
||||
uint16_t reserved1;
|
||||
uint32_t gdsHeapBase : 6;
|
||||
uint32_t reserved2 : 5;
|
||||
uint32_t gdsHeapSize : 6;
|
||||
uint32_t reserved3 : 15;
|
||||
} PM4SetResources;
|
||||
static_assert(sizeof(PM4SetResources) == 28);
|
||||
|
||||
typedef struct GEM5_PACKED
|
||||
{
|
||||
uint32_t pasid : 16;
|
||||
uint32_t reserved0 : 8;
|
||||
uint32_t diq : 1;
|
||||
uint32_t processQuantum : 7;
|
||||
union
|
||||
{
|
||||
struct
|
||||
{
|
||||
uint32_t ptBaseLo;
|
||||
uint32_t ptBaseHi;
|
||||
};
|
||||
uint64_t ptBase;
|
||||
};
|
||||
uint32_t shMemBases;
|
||||
uint32_t shMemConfig;
|
||||
uint32_t reserved1;
|
||||
uint32_t reserved2;
|
||||
uint32_t reserved3;
|
||||
uint32_t reserved4;
|
||||
uint32_t reserved5;
|
||||
union
|
||||
{
|
||||
struct
|
||||
{
|
||||
uint32_t gdsAddrLo;
|
||||
uint32_t gdsAddrHi;
|
||||
};
|
||||
uint64_t gdsAddr;
|
||||
};
|
||||
uint32_t numGws : 6;
|
||||
uint32_t reserved7 : 2;
|
||||
uint32_t numOac : 4;
|
||||
uint32_t reserved8 : 4;
|
||||
uint32_t gdsSize : 6;
|
||||
uint32_t numQueues : 10;
|
||||
union
|
||||
{
|
||||
struct
|
||||
{
|
||||
uint32_t completionSignalLo;
|
||||
uint32_t completionSignalHi;
|
||||
};
|
||||
uint64_t completionSignal;
|
||||
};
|
||||
} PM4MapProcess;
|
||||
static_assert(sizeof(PM4MapProcess) == 60);
|
||||
|
||||
typedef struct GEM5_PACKED
|
||||
{
|
||||
uint32_t function : 4;
|
||||
uint32_t memSpace : 2;
|
||||
uint32_t operation : 2;
|
||||
uint32_t reserved1 : 24;
|
||||
union
|
||||
{
|
||||
struct
|
||||
{
|
||||
uint32_t regAddr1 : 18;
|
||||
uint32_t reserved2 : 14;
|
||||
};
|
||||
uint32_t memAddrLo;
|
||||
};
|
||||
union
|
||||
{
|
||||
struct
|
||||
{
|
||||
uint32_t regAddr2 : 18;
|
||||
uint32_t reserved3 : 14;
|
||||
};
|
||||
uint32_t memAddrHi;
|
||||
};
|
||||
uint32_t reference;
|
||||
uint32_t mask;
|
||||
uint32_t pollInterval;
|
||||
} PM4WaitRegMem;
|
||||
static_assert(sizeof(PM4WaitRegMem) == 24);
|
||||
|
||||
typedef struct GEM5_PACKED
|
||||
{
|
||||
uint32_t regOffset : 16;
|
||||
uint32_t reserved : 16;
|
||||
uint32_t regData;
|
||||
} PM4SetUConfig;
|
||||
static_assert(sizeof(PM4SetUConfig) == 8);
|
||||
|
||||
typedef struct GEM5_PACKED
|
||||
{
|
||||
union
|
||||
{
|
||||
struct
|
||||
{
|
||||
uint32_t ibBaseLo;
|
||||
uint32_t ibBaseHi;
|
||||
};
|
||||
uint64_t ibBase;
|
||||
};
|
||||
uint32_t ibSize : 20;
|
||||
uint32_t chain : 1;
|
||||
uint32_t poll : 1;
|
||||
uint32_t reserved0 : 1;
|
||||
uint32_t valid: 1;
|
||||
uint32_t vmid : 4;
|
||||
uint32_t cachePolicy : 2;
|
||||
uint32_t reserved1 : 1;
|
||||
uint32_t priv : 1;
|
||||
} PM4IndirectBuf;
|
||||
static_assert(sizeof(PM4IndirectBuf) == 12);
|
||||
|
||||
typedef struct GEM5_PACKED
|
||||
{
|
||||
union
|
||||
{
|
||||
struct
|
||||
{
|
||||
uint32_t tmz : 1;
|
||||
uint32_t reserved : 31;
|
||||
};
|
||||
uint32_t dummy;
|
||||
};
|
||||
} PM4SwitchBuf;
|
||||
static_assert(sizeof(PM4SwitchBuf) == 4);
|
||||
|
||||
typedef struct GEM5_PACKED
|
||||
{
|
||||
union
|
||||
{
|
||||
struct
|
||||
{
|
||||
uint32_t ibBaseLo;
|
||||
uint32_t ibBaseHi;
|
||||
};
|
||||
uint64_t ibBase;
|
||||
};
|
||||
uint32_t ibSize : 20;
|
||||
uint32_t chain : 1;
|
||||
uint32_t ena : 1;
|
||||
uint32_t reserved1 : 2;
|
||||
uint32_t vmid : 4;
|
||||
uint32_t cachePolicy : 2;
|
||||
uint32_t preResume : 1;
|
||||
uint32_t priv : 1;
|
||||
} PM4IndirectBufConst;
|
||||
static_assert(sizeof(PM4IndirectBufConst) == 12);
|
||||
|
||||
typedef struct GEM5_PACKED
|
||||
{
|
||||
uint32_t tmz : 1;
|
||||
uint32_t reserved : 27;
|
||||
uint32_t command : 4;
|
||||
} PM4FrameCtrl;
|
||||
static_assert(sizeof(PM4FrameCtrl) == 4);
|
||||
|
||||
typedef struct GEM5_PACKED
|
||||
{
|
||||
uint32_t event : 6;
|
||||
uint32_t reserved0 : 2;
|
||||
uint32_t eventIdx : 4;
|
||||
uint32_t l1Volatile : 1;
|
||||
uint32_t l2Volatile : 1;
|
||||
uint32_t reserved1 : 1;
|
||||
uint32_t l2WB : 1;
|
||||
uint32_t l1Inv : 1;
|
||||
uint32_t l2Inv : 1;
|
||||
uint32_t reserved2 : 1;
|
||||
uint32_t l2NC : 1;
|
||||
uint32_t l2WC : 1;
|
||||
uint32_t l2Meta : 1;
|
||||
uint32_t reserved3 : 3;
|
||||
uint32_t cachePolicy : 2;
|
||||
uint32_t reserved4 : 1;
|
||||
uint32_t execute : 1;
|
||||
uint32_t reserved5 : 3;
|
||||
uint32_t reserved6 : 16;
|
||||
uint32_t destSelect : 2;
|
||||
uint32_t reserved7 : 6;
|
||||
uint32_t intSelect : 3;
|
||||
uint32_t reserved8 : 2;
|
||||
uint32_t dataSelect : 3;
|
||||
union
|
||||
{
|
||||
struct
|
||||
{
|
||||
uint32_t addrLo;
|
||||
uint32_t addrHi;
|
||||
};
|
||||
uint64_t addr;
|
||||
};
|
||||
union
|
||||
{
|
||||
struct
|
||||
{
|
||||
union
|
||||
{
|
||||
struct
|
||||
{
|
||||
uint32_t dwOffset : 16;
|
||||
uint32_t numDws : 16;
|
||||
};
|
||||
uint32_t dataLo : 32;
|
||||
};
|
||||
uint32_t dataHi;
|
||||
};
|
||||
uint64_t data;
|
||||
};
|
||||
uint32_t intCtxId;
|
||||
} PM4ReleaseMem;
|
||||
static_assert(sizeof(PM4ReleaseMem) == 28);
|
||||
|
||||
typedef struct GEM5_PACKED
|
||||
{
|
||||
uint32_t offset : 16;
|
||||
uint32_t reserved : 16;
|
||||
uint32_t data;
|
||||
} PM4SetUconfigReg;
|
||||
static_assert(sizeof(PM4SetUconfigReg) == 8);
|
||||
|
||||
typedef struct GEM5_PACKED
|
||||
{
|
||||
union
|
||||
{
|
||||
struct
|
||||
{
|
||||
uint32_t ibBaseLo;
|
||||
uint32_t ibBaseHi;
|
||||
};
|
||||
uint64_t ibBase;
|
||||
};
|
||||
uint32_t ibSize : 20;
|
||||
uint32_t chain : 1;
|
||||
uint32_t offleadPolling : 1;
|
||||
uint32_t reserved1 : 1;
|
||||
uint32_t valid : 1;
|
||||
uint32_t processCnt : 4;
|
||||
uint32_t reserved2 : 4;
|
||||
} PM4RunList;
|
||||
static_assert(sizeof(PM4RunList) == 12);
|
||||
|
||||
typedef struct GEM5_PACKED
|
||||
{
|
||||
uint32_t contextId : 28;
|
||||
uint32_t interruptSel : 2;
|
||||
uint32_t command : 2;
|
||||
union
|
||||
{
|
||||
struct
|
||||
{
|
||||
uint32_t pasid : 16;
|
||||
uint32_t reserved0 : 16;
|
||||
};
|
||||
struct
|
||||
{
|
||||
uint32_t reserved1 : 2;
|
||||
uint32_t doorbellOffset : 26;
|
||||
uint32_t engineSel : 3;
|
||||
uint32_t reserved2 : 1;
|
||||
};
|
||||
};
|
||||
union
|
||||
{
|
||||
struct
|
||||
{
|
||||
uint32_t addrLo;
|
||||
uint32_t addrHi;
|
||||
};
|
||||
uint64_t addr;
|
||||
};
|
||||
union
|
||||
{
|
||||
struct
|
||||
{
|
||||
uint32_t dataLo;
|
||||
uint32_t dataHi;
|
||||
};
|
||||
uint64_t data;
|
||||
};
|
||||
} PM4QueryStatus;
|
||||
static_assert(sizeof(PM4QueryStatus) == 24);
|
||||
|
||||
} // namespace gem5
|
||||
|
||||
#endif // __DEV_AMDGPU_PM4_DEFINES_HH__
|
||||
69
src/dev/amdgpu/pm4_mmio.hh
Normal file
69
src/dev/amdgpu/pm4_mmio.hh
Normal file
@@ -0,0 +1,69 @@
|
||||
/*
|
||||
* Copyright (c) 2021 Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. Neither the name of the copyright holder nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from this
|
||||
* software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef __DEV_AMDGPU_PM4_MMIO_HH__
|
||||
#define __DEV_AMDGPU_PM4_MMIO_HH__
|
||||
|
||||
namespace gem5
|
||||
{
|
||||
|
||||
#define mmCP_RB0_BASE 0x1040
|
||||
#define mmCP_RB0_CNTL 0x1041
|
||||
#define mmCP_RB_WPTR_POLL_ADDR_LO 0x1046
|
||||
#define mmCP_RB_WPTR_POLL_ADDR_HI 0x1047
|
||||
#define mmCP_RB_VMID 0x1051
|
||||
#define mmCP_RB0_RPTR_ADDR 0x1043
|
||||
#define mmCP_RB0_RPTR_ADDR_HI 0x1044
|
||||
#define mmCP_RB0_WPTR 0x1054
|
||||
#define mmCP_RB0_WPTR_HI 0x1055
|
||||
#define mmCP_RB_DOORBELL_CONTROL 0x1059
|
||||
#define mmCP_RB_DOORBELL_RANGE_LOWER 0x105a
|
||||
#define mmCP_RB_DOORBELL_RANGE_UPPER 0x105b
|
||||
#define mmCP_RB0_BASE_HI 0x10b1
|
||||
|
||||
#define mmCP_HQD_ACTIVE 0x1247
|
||||
#define mmCP_HQD_VMID 0x1248
|
||||
#define mmCP_HQD_PQ_BASE 0x124d
|
||||
#define mmCP_HQD_PQ_BASE_HI 0x124e
|
||||
#define mmCP_HQD_PQ_DOORBELL_CONTROL 0x1254
|
||||
#define mmCP_HQD_PQ_RPTR 0x124f
|
||||
#define mmCP_HQD_PQ_RPTR_REPORT_ADDR 0x1250
|
||||
#define mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI 0x1251
|
||||
#define mmCP_HQD_PQ_WPTR_POLL_ADDR 0x1252
|
||||
#define mmCP_HQD_PQ_WPTR_POLL_ADDR_HI 0x1253
|
||||
#define mmCP_HQD_IB_CONTROL 0x125a
|
||||
#define mmCP_HQD_PQ_WPTR_LO 0x127b
|
||||
#define mmCP_HQD_PQ_WPTR_HI 0x127c
|
||||
|
||||
} // namespace gem5
|
||||
|
||||
#endif // __DEV_AMDGPU_PM4_MMIO_HH__
|
||||
1071
src/dev/amdgpu/pm4_packet_processor.cc
Normal file
1071
src/dev/amdgpu/pm4_packet_processor.cc
Normal file
File diff suppressed because it is too large
Load Diff
190
src/dev/amdgpu/pm4_packet_processor.hh
Normal file
190
src/dev/amdgpu/pm4_packet_processor.hh
Normal file
@@ -0,0 +1,190 @@
|
||||
/*
|
||||
* Copyright (c) 2021 Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. Neither the name of the copyright holder nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from this
|
||||
* software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef __DEV_AMDGPU_PM4_PACKET_PROCESSOR__
|
||||
#define __DEV_AMDGPU_PM4_PACKET_PROCESSOR__
|
||||
|
||||
#include <unordered_map>
|
||||
|
||||
#include "dev/amdgpu/amdgpu_device.hh"
|
||||
#include "dev/amdgpu/pm4_defines.hh"
|
||||
#include "dev/amdgpu/pm4_queues.hh"
|
||||
#include "dev/dma_virt_device.hh"
|
||||
#include "params/PM4PacketProcessor.hh"
|
||||
|
||||
namespace gem5
|
||||
{
|
||||
|
||||
class AMDGPUDevice;
|
||||
|
||||
|
||||
|
||||
|
||||
class PM4PacketProcessor : public DmaVirtDevice
|
||||
{
|
||||
AMDGPUDevice *gpuDevice;
|
||||
/* First graphics queue */
|
||||
PrimaryQueue pq;
|
||||
/* First compute queue */
|
||||
QueueDesc kiq;
|
||||
|
||||
/* All PM4 queues, indexed by VMID */
|
||||
std::unordered_map<uint16_t, PM4Queue *> queues;
|
||||
/* A map of PM4 queues based on doorbell offset */
|
||||
std::unordered_map<uint32_t, PM4Queue *> queuesMap;
|
||||
public:
|
||||
PM4PacketProcessor(const PM4PacketProcessorParams &p);
|
||||
|
||||
void setGPUDevice(AMDGPUDevice *gpu_device);
|
||||
|
||||
/**
|
||||
* Inherited methods.
|
||||
*/
|
||||
Tick write(PacketPtr pkt) override { return 0; }
|
||||
Tick read(PacketPtr pkt) override { return 0; }
|
||||
AddrRangeList getAddrRanges() const override;
|
||||
void serialize(CheckpointOut &cp) const override;
|
||||
void unserialize(CheckpointIn &cp) override;
|
||||
|
||||
/**
|
||||
* Method for functional translation.
|
||||
*/
|
||||
TranslationGenPtr translate(Addr vaddr, Addr size) override;
|
||||
|
||||
uint32_t getKiqDoorbellOffset() { return kiq.doorbell & 0x1ffffffc; }
|
||||
uint32_t getPqDoorbellOffset() { return pq.doorbellOffset; }
|
||||
|
||||
Addr getGARTAddr(Addr addr) const;
|
||||
|
||||
/**
|
||||
* Based on an offset communicated through doorbell write, the
|
||||
* PM4PacketProcessor identifies which queue needs processing.
|
||||
*/
|
||||
PM4Queue* getQueue(Addr offset, bool gfx = false);
|
||||
/**
|
||||
* The first graphics queue, the Primary Queueu a.k.a. RB0, needs to be
|
||||
* mapped since all queue details are communicated through MMIOs to
|
||||
* special registers.
|
||||
*/
|
||||
void mapPq(Addr offset);
|
||||
/**
|
||||
* The first compute queue, the Kernel Interface Queueu a.k.a. KIQ, needs
|
||||
* to be mapped since all queue details are communicated through MMIOs to
|
||||
* special registers.
|
||||
*/
|
||||
void mapKiq(Addr offset);
|
||||
/**
|
||||
* This method creates a new PM4Queue based on a queue descriptor and an
|
||||
* offset.
|
||||
*/
|
||||
void newQueue(QueueDesc *q, Addr offset, PM4MapQueues *pkt = nullptr,
|
||||
int id = -1);
|
||||
|
||||
/**
|
||||
* This method start processing a PM4Queue from the current read pointer
|
||||
* to the newly communicated write pointer (i.e., wptrOffset).
|
||||
*/
|
||||
void process(PM4Queue *q, Addr wptrOffset);
|
||||
|
||||
/**
|
||||
* Update read index on doorbell rings. We use write index, however read
|
||||
* index == write index when the queue is empty. This allows us to save
|
||||
* previous read index when a queue is remapped. The remapped queue will
|
||||
* read from the previous read index rather than reset to zero.
|
||||
*/
|
||||
void updateReadIndex(Addr offset, uint64_t rd_idx);
|
||||
|
||||
/**
|
||||
* This method decodes the next packet in a PM4Queue.
|
||||
*/
|
||||
void decodeNext(PM4Queue *q);
|
||||
/**
|
||||
* This method calls other PM4 packet processing methods based on the
|
||||
* header of a PM4 packet.
|
||||
*/
|
||||
void decodeHeader(PM4Queue *q, PM4Header header);
|
||||
|
||||
/* Methods that implement PM4 packets */
|
||||
void writeData(PM4Queue *q, PM4WriteData *pkt);
|
||||
void writeDataDone(PM4Queue *q, PM4WriteData *pkt, Addr addr);
|
||||
void mapQueues(PM4Queue *q, PM4MapQueues *pkt);
|
||||
void unmapQueues(PM4Queue *q, PM4UnmapQueues *pkt);
|
||||
void doneMQDWrite(Addr mqdAddr, Addr addr);
|
||||
void mapProcess(PM4Queue *q, PM4MapProcess *pkt);
|
||||
void processMQD(PM4MapQueues *pkt, PM4Queue *q, Addr addr, QueueDesc *mqd,
|
||||
uint16_t vmid);
|
||||
void processSDMAMQD(PM4MapQueues *pkt, PM4Queue *q, Addr addr,
|
||||
SDMAQueueDesc *mqd, uint16_t vmid);
|
||||
void releaseMem(PM4Queue *q, PM4ReleaseMem *pkt);
|
||||
void releaseMemDone(PM4Queue *q, PM4ReleaseMem *pkt, Addr addr);
|
||||
void runList(PM4Queue *q, PM4RunList *pkt);
|
||||
void indirectBuffer(PM4Queue *q, PM4IndirectBuf *pkt);
|
||||
void switchBuffer(PM4Queue *q, PM4SwitchBuf *pkt);
|
||||
void setUconfigReg(PM4Queue *q, PM4SetUconfigReg *pkt);
|
||||
void waitRegMem(PM4Queue *q, PM4WaitRegMem *pkt);
|
||||
void queryStatus(PM4Queue *q, PM4QueryStatus *pkt);
|
||||
void queryStatusDone(PM4Queue *q, PM4QueryStatus *pkt);
|
||||
|
||||
/* Methods that implement MMIO regs */
|
||||
void writeMMIO(PacketPtr pkt, Addr mmio_offset);
|
||||
|
||||
void setHqdVmid(uint32_t data);
|
||||
void setHqdActive(uint32_t data);
|
||||
void setHqdPqBase(uint32_t data);
|
||||
void setHqdPqBaseHi(uint32_t data);
|
||||
void setHqdPqDoorbellCtrl(uint32_t data);
|
||||
void setHqdPqPtr(uint32_t data);
|
||||
void setHqdPqWptrLo(uint32_t data);
|
||||
void setHqdPqWptrHi(uint32_t data);
|
||||
void setHqdPqRptrReportAddr(uint32_t data);
|
||||
void setHqdPqRptrReportAddrHi(uint32_t data);
|
||||
void setHqdPqWptrPollAddr(uint32_t data);
|
||||
void setHqdPqWptrPollAddrHi(uint32_t data);
|
||||
void setHqdIbCtrl(uint32_t data);
|
||||
void setRbVmid(uint32_t data);
|
||||
void setRbCntl(uint32_t data);
|
||||
void setRbWptrLo(uint32_t data);
|
||||
void setRbWptrHi(uint32_t data);
|
||||
void setRbRptrAddrLo(uint32_t data);
|
||||
void setRbRptrAddrHi(uint32_t data);
|
||||
void setRbWptrPollAddrLo(uint32_t data);
|
||||
void setRbWptrPollAddrHi(uint32_t data);
|
||||
void setRbBaseLo(uint32_t data);
|
||||
void setRbBaseHi(uint32_t data);
|
||||
void setRbDoorbellCntrl(uint32_t data);
|
||||
void setRbDoorbellRangeLo(uint32_t data);
|
||||
void setRbDoorbellRangeHi(uint32_t data);
|
||||
};
|
||||
|
||||
} // namespace gem5
|
||||
|
||||
#endif //__DEV_AMDGPU_PM4_PACKET_PROCESSOR__
|
||||
477
src/dev/amdgpu/pm4_queues.hh
Normal file
477
src/dev/amdgpu/pm4_queues.hh
Normal file
@@ -0,0 +1,477 @@
|
||||
/*
|
||||
* Copyright (c) 2021 Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. Neither the name of the copyright holder nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from this
|
||||
* software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef __DEV_AMDGPU_PM4_QUEUES_HH__
|
||||
#define __DEV_AMDGPU_PM4_QUEUES_HH__
|
||||
|
||||
namespace gem5
|
||||
{
|
||||
|
||||
/**
|
||||
* Queue descriptor with relevant MQD attributes. Taken from
|
||||
* https://github.com/RadeonOpenCompute/ROCK-Kernel-Driver/blob/rocm-4.3.x/
|
||||
* drivers/gpu/drm/amd/include/v9_structs.h
|
||||
*/
|
||||
typedef struct GEM5_PACKED
|
||||
{
|
||||
union
|
||||
{
|
||||
struct
|
||||
{
|
||||
uint32_t cp_mqd_readindex_lo;
|
||||
uint32_t cp_mqd_readindex_hi;
|
||||
};
|
||||
uint64_t mqdReadIndex;
|
||||
};
|
||||
uint32_t cp_mqd_save_start_time_lo;
|
||||
uint32_t cp_mqd_save_start_time_hi;
|
||||
uint32_t cp_mqd_save_end_time_lo;
|
||||
uint32_t cp_mqd_save_end_time_hi;
|
||||
uint32_t cp_mqd_restore_start_time_lo;
|
||||
uint32_t cp_mqd_restore_start_time_hi;
|
||||
uint32_t cp_mqd_restore_end_time_lo;
|
||||
uint32_t cp_mqd_restore_end_time_hi;
|
||||
uint32_t disable_queue;
|
||||
uint32_t reserved_107;
|
||||
uint32_t gds_cs_ctxsw_cnt0;
|
||||
uint32_t gds_cs_ctxsw_cnt1;
|
||||
uint32_t gds_cs_ctxsw_cnt2;
|
||||
uint32_t gds_cs_ctxsw_cnt3;
|
||||
uint32_t reserved_112;
|
||||
uint32_t reserved_113;
|
||||
uint32_t cp_pq_exe_status_lo;
|
||||
uint32_t cp_pq_exe_status_hi;
|
||||
uint32_t cp_packet_id_lo;
|
||||
uint32_t cp_packet_id_hi;
|
||||
uint32_t cp_packet_exe_status_lo;
|
||||
uint32_t cp_packet_exe_status_hi;
|
||||
uint32_t gds_save_base_addr_lo;
|
||||
uint32_t gds_save_base_addr_hi;
|
||||
uint32_t gds_save_mask_lo;
|
||||
uint32_t gds_save_mask_hi;
|
||||
uint32_t ctx_save_base_addr_lo;
|
||||
uint32_t ctx_save_base_addr_hi;
|
||||
uint32_t dynamic_cu_mask_addr_lo;
|
||||
uint32_t dynamic_cu_mask_addr_hi;
|
||||
union
|
||||
{
|
||||
struct
|
||||
{
|
||||
uint32_t mqd_base_addr_lo;
|
||||
uint32_t mqd_base_addr_hi;
|
||||
};
|
||||
uint64_t mqdBase;
|
||||
};
|
||||
uint32_t hqd_active;
|
||||
uint32_t hqd_vmid;
|
||||
uint32_t hqd_persistent_state;
|
||||
uint32_t hqd_pipe_priority;
|
||||
uint32_t hqd_queue_priority;
|
||||
uint32_t hqd_quantum;
|
||||
union
|
||||
{
|
||||
struct
|
||||
{
|
||||
uint32_t hqd_pq_base_lo;
|
||||
uint32_t hqd_pq_base_hi;
|
||||
};
|
||||
uint64_t base;
|
||||
};
|
||||
union
|
||||
{
|
||||
uint32_t hqd_pq_rptr;
|
||||
uint32_t rptr;
|
||||
};
|
||||
union
|
||||
{
|
||||
struct
|
||||
{
|
||||
uint32_t hqd_pq_rptr_report_addr_lo;
|
||||
uint32_t hqd_pq_rptr_report_addr_hi;
|
||||
};
|
||||
uint64_t aqlRptr;
|
||||
};
|
||||
uint32_t hqd_pq_wptr_poll_addr_lo;
|
||||
uint32_t hqd_pq_wptr_poll_addr_hi;
|
||||
union
|
||||
{
|
||||
uint32_t hqd_pq_doorbell_control;
|
||||
uint32_t doorbell;
|
||||
};
|
||||
uint32_t reserved_144;
|
||||
uint32_t hqd_pq_control;
|
||||
union
|
||||
{
|
||||
struct
|
||||
{
|
||||
uint32_t hqd_ib_base_addr_lo;
|
||||
uint32_t hqd_ib_base_addr_hi;
|
||||
};
|
||||
Addr ibBase;
|
||||
};
|
||||
union
|
||||
{
|
||||
uint32_t hqd_ib_rptr;
|
||||
uint32_t ibRptr;
|
||||
};
|
||||
uint32_t hqd_ib_control;
|
||||
uint32_t hqd_iq_timer;
|
||||
uint32_t hqd_iq_rptr;
|
||||
uint32_t cp_hqd_dequeue_request;
|
||||
uint32_t cp_hqd_dma_offload;
|
||||
uint32_t cp_hqd_sema_cmd;
|
||||
uint32_t cp_hqd_msg_type;
|
||||
uint32_t cp_hqd_atomic0_preop_lo;
|
||||
uint32_t cp_hqd_atomic0_preop_hi;
|
||||
uint32_t cp_hqd_atomic1_preop_lo;
|
||||
uint32_t cp_hqd_atomic1_preop_hi;
|
||||
uint32_t cp_hqd_hq_status0;
|
||||
uint32_t cp_hqd_hq_control0;
|
||||
uint32_t cp_mqd_control;
|
||||
uint32_t cp_hqd_hq_status1;
|
||||
uint32_t cp_hqd_hq_control1;
|
||||
uint32_t cp_hqd_eop_base_addr_lo;
|
||||
uint32_t cp_hqd_eop_base_addr_hi;
|
||||
uint32_t cp_hqd_eop_control;
|
||||
uint32_t cp_hqd_eop_rptr;
|
||||
uint32_t cp_hqd_eop_wptr;
|
||||
uint32_t cp_hqd_eop_done_events;
|
||||
uint32_t cp_hqd_ctx_save_base_addr_lo;
|
||||
uint32_t cp_hqd_ctx_save_base_addr_hi;
|
||||
uint32_t cp_hqd_ctx_save_control;
|
||||
uint32_t cp_hqd_cntl_stack_offset;
|
||||
uint32_t cp_hqd_cntl_stack_size;
|
||||
uint32_t cp_hqd_wg_state_offset;
|
||||
uint32_t cp_hqd_ctx_save_size;
|
||||
uint32_t cp_hqd_gds_resource_state;
|
||||
uint32_t cp_hqd_error;
|
||||
uint32_t cp_hqd_eop_wptr_mem;
|
||||
union
|
||||
{
|
||||
uint32_t cp_hqd_aql_control;
|
||||
uint32_t aql;
|
||||
};
|
||||
uint32_t cp_hqd_pq_wptr_lo;
|
||||
uint32_t cp_hqd_pq_wptr_hi;
|
||||
} QueueDesc;
|
||||
|
||||
/**
|
||||
* Queue descriptor for SDMA-based user queues (RLC queues). Taken from
|
||||
* https://github.com/RadeonOpenCompute/ROCK-Kernel-Driver/blob/rocm-4.2.0/
|
||||
* drivers/gpu/drm/amd/include/v9_structs.h
|
||||
*/
|
||||
typedef struct GEM5_PACKED
|
||||
{
|
||||
uint32_t sdmax_rlcx_rb_cntl;
|
||||
union
|
||||
{
|
||||
struct
|
||||
{
|
||||
uint32_t sdmax_rlcx_rb_base;
|
||||
uint32_t sdmax_rlcx_rb_base_hi;
|
||||
};
|
||||
uint64_t rb_base;
|
||||
};
|
||||
uint32_t sdmax_rlcx_rb_rptr;
|
||||
uint32_t sdmax_rlcx_rb_rptr_hi;
|
||||
uint32_t sdmax_rlcx_rb_wptr;
|
||||
uint32_t sdmax_rlcx_rb_wptr_hi;
|
||||
uint32_t sdmax_rlcx_rb_wptr_poll_cntl;
|
||||
uint32_t sdmax_rlcx_rb_rptr_addr_hi;
|
||||
uint32_t sdmax_rlcx_rb_rptr_addr_lo;
|
||||
uint32_t sdmax_rlcx_ib_cntl;
|
||||
uint32_t sdmax_rlcx_ib_rptr;
|
||||
uint32_t sdmax_rlcx_ib_offset;
|
||||
uint32_t sdmax_rlcx_ib_base_lo;
|
||||
uint32_t sdmax_rlcx_ib_base_hi;
|
||||
uint32_t sdmax_rlcx_ib_size;
|
||||
uint32_t sdmax_rlcx_skip_cntl;
|
||||
uint32_t sdmax_rlcx_context_status;
|
||||
uint32_t sdmax_rlcx_doorbell;
|
||||
uint32_t sdmax_rlcx_status;
|
||||
uint32_t sdmax_rlcx_doorbell_log;
|
||||
uint32_t sdmax_rlcx_watermark;
|
||||
uint32_t sdmax_rlcx_doorbell_offset;
|
||||
uint32_t sdmax_rlcx_csa_addr_lo;
|
||||
uint32_t sdmax_rlcx_csa_addr_hi;
|
||||
uint32_t sdmax_rlcx_ib_sub_remain;
|
||||
uint32_t sdmax_rlcx_preempt;
|
||||
uint32_t sdmax_rlcx_dummy_reg;
|
||||
uint32_t sdmax_rlcx_rb_wptr_poll_addr_hi;
|
||||
uint32_t sdmax_rlcx_rb_wptr_poll_addr_lo;
|
||||
uint32_t sdmax_rlcx_rb_aql_cntl;
|
||||
uint32_t sdmax_rlcx_minor_ptr_update;
|
||||
uint32_t sdmax_rlcx_midcmd_data0;
|
||||
uint32_t sdmax_rlcx_midcmd_data1;
|
||||
uint32_t sdmax_rlcx_midcmd_data2;
|
||||
uint32_t sdmax_rlcx_midcmd_data3;
|
||||
uint32_t sdmax_rlcx_midcmd_data4;
|
||||
uint32_t sdmax_rlcx_midcmd_data5;
|
||||
uint32_t sdmax_rlcx_midcmd_data6;
|
||||
uint32_t sdmax_rlcx_midcmd_data7;
|
||||
uint32_t sdmax_rlcx_midcmd_data8;
|
||||
uint32_t sdmax_rlcx_midcmd_cntl;
|
||||
uint32_t reserved_42;
|
||||
uint32_t reserved_43;
|
||||
uint32_t reserved_44;
|
||||
uint32_t reserved_45;
|
||||
uint32_t reserved_46;
|
||||
uint32_t reserved_47;
|
||||
uint32_t reserved_48;
|
||||
uint32_t reserved_49;
|
||||
uint32_t reserved_50;
|
||||
uint32_t reserved_51;
|
||||
uint32_t reserved_52;
|
||||
uint32_t reserved_53;
|
||||
uint32_t reserved_54;
|
||||
uint32_t reserved_55;
|
||||
uint32_t reserved_56;
|
||||
uint32_t reserved_57;
|
||||
uint32_t reserved_58;
|
||||
uint32_t reserved_59;
|
||||
uint32_t reserved_60;
|
||||
uint32_t reserved_61;
|
||||
uint32_t reserved_62;
|
||||
uint32_t reserved_63;
|
||||
uint32_t reserved_64;
|
||||
uint32_t reserved_65;
|
||||
uint32_t reserved_66;
|
||||
uint32_t reserved_67;
|
||||
uint32_t reserved_68;
|
||||
uint32_t reserved_69;
|
||||
uint32_t reserved_70;
|
||||
uint32_t reserved_71;
|
||||
uint32_t reserved_72;
|
||||
uint32_t reserved_73;
|
||||
uint32_t reserved_74;
|
||||
uint32_t reserved_75;
|
||||
uint32_t reserved_76;
|
||||
uint32_t reserved_77;
|
||||
uint32_t reserved_78;
|
||||
uint32_t reserved_79;
|
||||
uint32_t reserved_80;
|
||||
uint32_t reserved_81;
|
||||
uint32_t reserved_82;
|
||||
uint32_t reserved_83;
|
||||
uint32_t reserved_84;
|
||||
uint32_t reserved_85;
|
||||
uint32_t reserved_86;
|
||||
uint32_t reserved_87;
|
||||
uint32_t reserved_88;
|
||||
uint32_t reserved_89;
|
||||
uint32_t reserved_90;
|
||||
uint32_t reserved_91;
|
||||
uint32_t reserved_92;
|
||||
uint32_t reserved_93;
|
||||
uint32_t reserved_94;
|
||||
uint32_t reserved_95;
|
||||
uint32_t reserved_96;
|
||||
uint32_t reserved_97;
|
||||
uint32_t reserved_98;
|
||||
uint32_t reserved_99;
|
||||
uint32_t reserved_100;
|
||||
uint32_t reserved_101;
|
||||
uint32_t reserved_102;
|
||||
uint32_t reserved_103;
|
||||
uint32_t reserved_104;
|
||||
uint32_t reserved_105;
|
||||
uint32_t reserved_106;
|
||||
uint32_t reserved_107;
|
||||
uint32_t reserved_108;
|
||||
uint32_t reserved_109;
|
||||
uint32_t reserved_110;
|
||||
uint32_t reserved_111;
|
||||
uint32_t reserved_112;
|
||||
uint32_t reserved_113;
|
||||
uint32_t reserved_114;
|
||||
uint32_t reserved_115;
|
||||
uint32_t reserved_116;
|
||||
uint32_t reserved_117;
|
||||
uint32_t reserved_118;
|
||||
uint32_t reserved_119;
|
||||
uint32_t reserved_120;
|
||||
uint32_t reserved_121;
|
||||
uint32_t reserved_122;
|
||||
uint32_t reserved_123;
|
||||
uint32_t reserved_124;
|
||||
uint32_t reserved_125;
|
||||
/* reserved_126,127: repurposed for driver-internal use */
|
||||
uint32_t sdma_engine_id;
|
||||
uint32_t sdma_queue_id;
|
||||
} SDMAQueueDesc;
|
||||
|
||||
/* The Primary Queue has extra attributes, which will be stored separately. */
|
||||
typedef struct PrimaryQueue : QueueDesc
|
||||
{
|
||||
union
|
||||
{
|
||||
struct
|
||||
{
|
||||
uint32_t queueRptrAddrLo;
|
||||
uint32_t queueRptrAddrHi;
|
||||
};
|
||||
Addr queueRptrAddr;
|
||||
};
|
||||
union
|
||||
{
|
||||
struct
|
||||
{
|
||||
uint32_t queueWptrLo;
|
||||
uint32_t queueWptrHi;
|
||||
};
|
||||
Addr queueWptr;
|
||||
};
|
||||
uint32_t doorbellOffset;
|
||||
uint32_t doorbellRangeLo;
|
||||
uint32_t doorbellRangeHi;
|
||||
} PrimaryQueue;
|
||||
|
||||
/**
|
||||
* Class defining a PM4 queue.
|
||||
*/
|
||||
class PM4Queue
|
||||
{
|
||||
int _id;
|
||||
|
||||
/* Queue descriptor read from the system memory of the simulated system. */
|
||||
QueueDesc *q;
|
||||
|
||||
/**
|
||||
* Most important fields of a PM4 queue are stored in the queue descriptor
|
||||
* (i.e., QueueDesc). However, since the write pointers are communicated
|
||||
* through the doorbell value, we will add separate atributes for them.
|
||||
*/
|
||||
Addr _wptr;
|
||||
Addr _ibWptr;
|
||||
Addr _offset;
|
||||
bool _processing;
|
||||
bool _ib;
|
||||
PM4MapQueues *_pkt;
|
||||
public:
|
||||
PM4Queue() : _id(0), q(nullptr), _wptr(0), _offset(0), _processing(false),
|
||||
_ib(false), _pkt(nullptr) {}
|
||||
PM4Queue(int id, QueueDesc *queue, Addr offset) :
|
||||
_id(id), q(queue), _wptr(queue->rptr), _ibWptr(0), _offset(offset),
|
||||
_processing(false), _ib(false), _pkt(nullptr) {}
|
||||
PM4Queue(int id, QueueDesc *queue, Addr offset, PM4MapQueues *pkt) :
|
||||
_id(id), q(queue), _wptr(queue->rptr), _ibWptr(0), _offset(offset),
|
||||
_processing(false), _ib(false), _pkt(pkt) {}
|
||||
|
||||
QueueDesc *getMQD() { return q; }
|
||||
int id() { return _id; }
|
||||
Addr mqdBase() { return q->mqdBase; }
|
||||
Addr base() { return q->base; }
|
||||
Addr ibBase() { return q->ibBase; }
|
||||
|
||||
Addr
|
||||
rptr()
|
||||
{
|
||||
if (ib()) return q->ibBase + q->ibRptr;
|
||||
else return q->base + q->rptr;
|
||||
}
|
||||
|
||||
Addr
|
||||
wptr()
|
||||
{
|
||||
if (ib()) return q->ibBase + _ibWptr;
|
||||
else return q->base + _wptr;
|
||||
}
|
||||
|
||||
Addr
|
||||
getRptr()
|
||||
{
|
||||
if (ib()) return q->ibRptr;
|
||||
else return q->rptr;
|
||||
}
|
||||
|
||||
Addr
|
||||
getWptr()
|
||||
{
|
||||
if (ib()) return _ibWptr;
|
||||
else return _wptr;
|
||||
}
|
||||
|
||||
Addr offset() { return _offset; }
|
||||
bool processing() { return _processing; }
|
||||
bool ib() { return _ib; }
|
||||
|
||||
void id(int value) { _id = value; }
|
||||
void base(Addr value) { q->base = value; }
|
||||
void ibBase(Addr value) { q->ibBase = value; }
|
||||
|
||||
/**
|
||||
* It seems that PM4 nop packets with count 0x3fff, not only do not
|
||||
* consider the count value, they also fast forward the read pointer.
|
||||
* Without proper sync packets this can potentially be dangerous, since
|
||||
* more useful packets can be enqueued in the time between nop enqueu and
|
||||
* nop processing.
|
||||
*/
|
||||
void
|
||||
fastforwardRptr()
|
||||
{
|
||||
if (ib()) q->ibRptr = _ibWptr;
|
||||
else q->rptr = _wptr;
|
||||
}
|
||||
|
||||
void
|
||||
incRptr(Addr value)
|
||||
{
|
||||
if (ib()) q->ibRptr += value;
|
||||
else q->rptr += value;
|
||||
}
|
||||
|
||||
void
|
||||
rptr(Addr value)
|
||||
{
|
||||
if (ib()) q->ibRptr = value;
|
||||
else q->rptr = value;
|
||||
}
|
||||
|
||||
void
|
||||
wptr(Addr value)
|
||||
{
|
||||
if (ib()) _ibWptr = value;
|
||||
else _wptr = value;
|
||||
}
|
||||
|
||||
void offset(Addr value) { _offset = value; }
|
||||
void processing(bool value) { _processing = value; }
|
||||
void ib(bool value) { _ib = value; }
|
||||
uint32_t me() { if (_pkt) return _pkt->me; else return 0; }
|
||||
uint32_t pipe() { if (_pkt) return _pkt->pipe; else return 0; }
|
||||
uint32_t queue() { if (_pkt) return _pkt->queueSlot; else return 0; }
|
||||
bool privileged() { assert(_pkt); return _pkt->queueSel == 0 ? 1 : 0; }
|
||||
};
|
||||
|
||||
} // namespace gem5
|
||||
|
||||
#endif // __DEV_AMDGPU_PM4_QUEUES_HH__
|
||||
@@ -36,7 +36,6 @@
|
||||
#include "dev/amdgpu/interrupt_handler.hh"
|
||||
#include "dev/amdgpu/sdma_commands.hh"
|
||||
#include "dev/amdgpu/sdma_mmio.hh"
|
||||
#include "dev/amdgpu/vega10/soc15_ih_clientid.h"
|
||||
#include "mem/packet.hh"
|
||||
#include "mem/packet_access.hh"
|
||||
#include "params/SDMAEngine.hh"
|
||||
|
||||
@@ -37,7 +37,7 @@
|
||||
* for SDMA. The header files can be found here:
|
||||
*
|
||||
* https://github.com/RadeonOpenCompute/ROCK-Kernel-Driver/blob/rocm-4.3.x/
|
||||
* drivers/gpu/drm/amd/include/asic_reg/sdma0/sdma0_4_0_offset.h
|
||||
* drivers/gpu/drm/amd/include/asic_reg/sdma0/sdma0_4_0_offset.h
|
||||
* https://github.com/RadeonOpenCompute/ROCK-Kernel-Driver/blob/rocm-4.3.x/
|
||||
* drivers/gpu/drm/amd/include/asic_reg/sdma1/sdma1_4_0_offset.h
|
||||
*/
|
||||
|
||||
@@ -1,81 +0,0 @@
|
||||
/*
|
||||
* Copyright 2018 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef __SOC15_IH_CLIENTID_H__
|
||||
#define __SOC15_IH_CLIENTID_H__
|
||||
|
||||
/*
|
||||
* src: https://github.com/RadeonOpenCompute/ROCK-Kernel-Driver/blob/
|
||||
* 89baa3f89c8cb0d76e999c01bf304301e35abc9b/drivers/gpu/drm/amd/include/
|
||||
* soc15_ih_clientid.h
|
||||
*/
|
||||
|
||||
/*
|
||||
* vega10+ IH clients
|
||||
*/
|
||||
enum soc15_ih_clientid {
|
||||
SOC15_IH_CLIENTID_IH = 0x00,
|
||||
SOC15_IH_CLIENTID_ACP = 0x01,
|
||||
SOC15_IH_CLIENTID_ATHUB = 0x02,
|
||||
SOC15_IH_CLIENTID_BIF = 0x03,
|
||||
SOC15_IH_CLIENTID_DCE = 0x04,
|
||||
SOC15_IH_CLIENTID_ISP = 0x05,
|
||||
SOC15_IH_CLIENTID_PCIE0 = 0x06,
|
||||
SOC15_IH_CLIENTID_RLC = 0x07,
|
||||
SOC15_IH_CLIENTID_SDMA0 = 0x08,
|
||||
SOC15_IH_CLIENTID_SDMA1 = 0x09,
|
||||
SOC15_IH_CLIENTID_SE0SH = 0x0a,
|
||||
SOC15_IH_CLIENTID_SE1SH = 0x0b,
|
||||
SOC15_IH_CLIENTID_SE2SH = 0x0c,
|
||||
SOC15_IH_CLIENTID_SE3SH = 0x0d,
|
||||
SOC15_IH_CLIENTID_SYSHUB = 0x0e,
|
||||
SOC15_IH_CLIENTID_UVD1 = 0x0e,
|
||||
SOC15_IH_CLIENTID_THM = 0x0f,
|
||||
SOC15_IH_CLIENTID_UVD = 0x10,
|
||||
SOC15_IH_CLIENTID_VCE0 = 0x11,
|
||||
SOC15_IH_CLIENTID_VMC = 0x12,
|
||||
SOC15_IH_CLIENTID_XDMA = 0x13,
|
||||
SOC15_IH_CLIENTID_GRBM_CP = 0x14,
|
||||
SOC15_IH_CLIENTID_ATS = 0x15,
|
||||
SOC15_IH_CLIENTID_ROM_SMUIO = 0x16,
|
||||
SOC15_IH_CLIENTID_DF = 0x17,
|
||||
SOC15_IH_CLIENTID_VCE1 = 0x18,
|
||||
SOC15_IH_CLIENTID_PWR = 0x19,
|
||||
SOC15_IH_CLIENTID_UTCL2 = 0x1b,
|
||||
SOC15_IH_CLIENTID_EA = 0x1c,
|
||||
SOC15_IH_CLIENTID_UTCL2LOG = 0x1d,
|
||||
SOC15_IH_CLIENTID_MP0 = 0x1e,
|
||||
SOC15_IH_CLIENTID_MP1 = 0x1f,
|
||||
|
||||
SOC15_IH_CLIENTID_MAX,
|
||||
|
||||
SOC15_IH_CLIENTID_VCN = SOC15_IH_CLIENTID_UVD
|
||||
};
|
||||
|
||||
enum ihSourceId {
|
||||
TRAP_ID = 224
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
Reference in New Issue
Block a user