dev-amdgpu: Support multiple CPs and MMIO AddrRanges

Currently gem5 assumes that there is only one command processor (CP)
which contains the PM4 packet processor. Some GPU devices have multiple
CPs which the driver tests individually during POST if they are used or
not. Therefore, these additional CPs need to be supported.

This commit allows for multiple PM4 packet processors which represent
multiple CPs. Each of these processors will have its own independent
MMIO address range. To more easily support ranges, the MMIO addresses
now use AddrRange to index a PM4 packet processor instead of the
hard-coded constexpr MMIO start and size pairs.

By default only one PM4 packet processor is created, meaning the
functionality of the simulation is unchanged for devices currently
supported in gem5.

Change-Id: I977f4fd3a169ef4a78671a4fb58c8ea0e19bf52c
This commit is contained in:
Matthew Poremba
2024-02-13 17:43:23 -06:00
parent 39153cd234
commit 823b5a6eb8
10 changed files with 245 additions and 151 deletions

View File

@@ -95,7 +95,7 @@ class AMDGPUDevice(PciDevice):
# The config script should not create a new cp here but rather assign the
# same cp that is assigned to the Shader SimObject.
cp = Param.GPUCommandProcessor(NULL, "Command Processor")
pm4_pkt_proc = Param.PM4PacketProcessor("PM4 Packet Processor")
pm4_pkt_procs = VectorParam.PM4PacketProcessor("PM4 Packet Processor")
memory_manager = Param.AMDGPUMemoryManager("GPU Memory Manager")
memories = VectorParam.AbstractMemory([], "All memories in the device")
device_ih = Param.AMDGPUInterruptHandler("GPU Interrupt handler")
@@ -118,6 +118,10 @@ class PM4PacketProcessor(DmaVirtDevice):
cxx_header = "dev/amdgpu/pm4_packet_processor.hh"
cxx_class = "gem5::PM4PacketProcessor"
# Default to 0 as the common case is one PM4 packet processor
ip_id = Param.Int(0, "Instance ID of this PM4 processor")
mmio_range = Param.AddrRange("Range of MMIO addresses")
class AMDGPUMemoryManager(ClockedObject):
type = "AMDGPUMemoryManager"

View File

@@ -49,6 +49,16 @@ enum QueueType
RLC
};
/*
* Hold information about doorbells including queue type and the IP
* block ID if the IP can have multiple instances.
*/
typedef struct
{
QueueType qtype;
int ip_id;
} DoorbellInfo;
// AMD GPUs support 16 different virtual address spaces
static constexpr int AMDGPU_VM_COUNT = 16;
@@ -61,36 +71,11 @@ constexpr int MMIO_BAR = 5;
constexpr uint32_t VGA_ROM_DEFAULT = 0xc0000;
constexpr uint32_t ROM_SIZE = 0x20000; // 128kB
/* SDMA base, size, mmio offset shift. */
static constexpr uint32_t SDMA0_BASE = 0x4980;
static constexpr uint32_t SDMA1_BASE = 0x5180;
static constexpr uint32_t SDMA_SIZE = 0x800;
static constexpr uint32_t SDMA_OFFSET_SHIFT = 2;
/* Interrupt handler base, size, mmio offset shift. */
static constexpr uint32_t IH_BASE = 0x4280;
static constexpr uint32_t IH_SIZE = 0x700;
/* Most MMIOs use DWORD addresses and thus need to be shifted. */
static constexpr uint32_t IH_OFFSET_SHIFT = 2;
/* Graphics register bus manager base, size, mmio offset shift. */
static constexpr uint32_t GRBM_BASE = 0x8000;
static constexpr uint32_t GRBM_SIZE = 0x5000;
static constexpr uint32_t GRBM_OFFSET_SHIFT = 2;
/* GFX base, size, mmio offset shift. */
static constexpr uint32_t GFX_BASE = 0x28000;
static constexpr uint32_t GFX_SIZE = 0x17000;
static constexpr uint32_t GFX_OFFSET_SHIFT = 2;
/* MMHUB base, size, mmio offset shift. */
static constexpr uint32_t MMHUB_BASE = 0x68000;
static constexpr uint32_t MMHUB_SIZE = 0x2120;
static constexpr uint32_t MMHUB_OFFSET_SHIFT = 2;
/* NBIO base and size. */
static constexpr uint32_t NBIO_BASE = 0x0;
static constexpr uint32_t NBIO_SIZE = 0x4280;
} // namespace gem5
#endif // __DEV_AMDGPU_AMDGPU_DEFINES_HH__

View File

@@ -54,8 +54,7 @@ namespace gem5
AMDGPUDevice::AMDGPUDevice(const AMDGPUDeviceParams &p)
: PciDevice(p), gpuMemMgr(p.memory_manager), deviceIH(p.device_ih),
pm4PktProc(p.pm4_pkt_proc), cp(p.cp),
checkpoint_before_mmios(p.checkpoint_before_mmios),
cp(p.cp), checkpoint_before_mmios(p.checkpoint_before_mmios),
init_interrupt_count(0), _lastVMID(0),
deviceMem(name() + ".deviceMem", p.memories, false, "", false)
{
@@ -81,6 +80,16 @@ AMDGPUDevice::AMDGPUDevice(const AMDGPUDeviceParams &p)
romRange = RangeSize(VGA_ROM_DEFAULT, ROM_SIZE);
}
if (p.device_name == "Vega10") {
gfx_version = GfxVersion::gfx900;
} else if (p.device_name == "MI100") {
gfx_version = GfxVersion::gfx908;
} else if (p.device_name == "MI200") {
gfx_version = GfxVersion::gfx90a;
} else {
panic("Unknown GPU device %s\n", p.device_name);
}
if (p.trace_file != "") {
mmioReader.readMMIOTrace(p.trace_file);
}
@@ -126,8 +135,22 @@ AMDGPUDevice::AMDGPUDevice(const AMDGPUDeviceParams &p)
panic("Unknown GPU device %s\n", p.device_name);
}
// Setup PM4 packet processors and sanity check IDs
std::set<int> pm4_ids;
for (auto& pm4 : p.pm4_pkt_procs) {
pm4->setGPUDevice(this);
fatal_if(pm4_ids.count(pm4->getIpId()),
"Two PM4s with same IP IDs is not allowed");
pm4_ids.insert(pm4->getIpId());
pm4PktProcs.insert({pm4->getIpId(), pm4});
pm4Ranges.insert({pm4->getMMIORange(), pm4});
}
// There should be at least one PM4 packet processor with ID 0
fatal_if(!pm4PktProcs.count(0), "No default PM4 processor found");
deviceIH->setGPUDevice(this);
pm4PktProc->setGPUDevice(this);
cp->hsaPacketProc().setGPUDevice(this);
cp->setGPUDevice(this);
nbio.setGPUDevice(this);
@@ -136,6 +159,23 @@ AMDGPUDevice::AMDGPUDevice(const AMDGPUDeviceParams &p)
// could possibly be anything, but these are the values used by hardware.
uint64_t mmhubBase = 0x8000ULL << 24;
uint64_t mmhubTop = 0x83ffULL << 24;
uint64_t mem_size = 0x3ff0; // 16 GB of memory
gpuvm.setMMHUBBase(mmhubBase);
gpuvm.setMMHUBTop(mmhubTop);
// Map other MMIO apertures based on gfx version. This must be done before
// any calls to get/setRegVal.
// NBIO 0x0 - 0x4280
// IH 0x4280 - 0x4980
// GRBM 0x8000 - 0xC000
// GFX 0x28000 - 0x3F000
// MMHUB 0x68000 - 0x6a120
gpuvm.setMMIOAperture(NBIO_MMIO_RANGE, AddrRange(0x0, 0x4280));
gpuvm.setMMIOAperture(IH_MMIO_RANGE, AddrRange(0x4280, 0x4980));
gpuvm.setMMIOAperture(GRBM_MMIO_RANGE, AddrRange(0x8000, 0xC000));
gpuvm.setMMIOAperture(GFX_MMIO_RANGE, AddrRange(0x28000, 0x3F000));
gpuvm.setMMIOAperture(MMHUB_MMIO_RANGE, AddrRange(0x68000, 0x6A120));
// These are hardcoded register values to return what the driver expects
setRegVal(AMDGPU_MP0_SMN_C2PMSG_33, 0x80000000);
@@ -145,25 +185,19 @@ AMDGPUDevice::AMDGPUDevice(const AMDGPUDeviceParams &p)
if (p.device_name == "Vega10") {
setRegVal(VEGA10_FB_LOCATION_BASE, mmhubBase >> 24);
setRegVal(VEGA10_FB_LOCATION_TOP, mmhubTop >> 24);
gfx_version = GfxVersion::gfx900;
} else if (p.device_name == "MI100") {
setRegVal(MI100_FB_LOCATION_BASE, mmhubBase >> 24);
setRegVal(MI100_FB_LOCATION_TOP, mmhubTop >> 24);
setRegVal(MI100_MEM_SIZE_REG, 0x3ff0); // 16GB of memory
gfx_version = GfxVersion::gfx908;
setRegVal(MI100_MEM_SIZE_REG, mem_size);
} else if (p.device_name == "MI200") {
// This device can have either 64GB or 128GB of device memory.
// This limits to 16GB for simulation.
setRegVal(MI200_FB_LOCATION_BASE, mmhubBase >> 24);
setRegVal(MI200_FB_LOCATION_TOP, mmhubTop >> 24);
setRegVal(MI200_MEM_SIZE_REG, 0x3ff0);
gfx_version = GfxVersion::gfx90a;
setRegVal(MI200_MEM_SIZE_REG, mem_size);
} else {
panic("Unknown GPU device %s\n", p.device_name);
}
gpuvm.setMMHUBBase(mmhubBase);
gpuvm.setMMHUBTop(mmhubTop);
}
void
@@ -356,29 +390,28 @@ AMDGPUDevice::readDoorbell(PacketPtr pkt, Addr offset)
void
AMDGPUDevice::readMMIO(PacketPtr pkt, Addr offset)
{
Addr aperture = gpuvm.getMmioAperture(offset);
Addr aperture_offset = offset - aperture;
AddrRange aperture = gpuvm.getMMIOAperture(offset);
Addr aperture_offset = offset - aperture.start();
// By default read from MMIO trace. Overwrite the packet for a select
// few more dynamic MMIOs.
DPRINTF(AMDGPUDevice, "Read MMIO %#lx\n", offset);
mmioReader.readFromTrace(pkt, MMIO_BAR, offset);
switch (aperture) {
case NBIO_BASE:
if (aperture == gpuvm.getMMIORange(NBIO_MMIO_RANGE)) {
DPRINTF(AMDGPUDevice, "NBIO base\n");
nbio.readMMIO(pkt, aperture_offset);
break;
case GRBM_BASE:
} else if (aperture == gpuvm.getMMIORange(GRBM_MMIO_RANGE)) {
DPRINTF(AMDGPUDevice, "GRBM base\n");
gpuvm.readMMIO(pkt, aperture_offset >> GRBM_OFFSET_SHIFT);
break;
case GFX_BASE:
} else if (aperture == gpuvm.getMMIORange(GFX_MMIO_RANGE)) {
DPRINTF(AMDGPUDevice, "GFX base\n");
gfx.readMMIO(pkt, aperture_offset);
break;
case MMHUB_BASE:
} else if (aperture == gpuvm.getMMIORange(MMHUB_MMIO_RANGE)) {
DPRINTF(AMDGPUDevice, "MMHUB base\n");
gpuvm.readMMIO(pkt, aperture_offset >> MMHUB_OFFSET_SHIFT);
break;
default:
break;
} else {
DPRINTF(AMDGPUDevice, "Unknown MMIO aperture for read %#x\n", offset);
}
}
@@ -422,17 +455,22 @@ AMDGPUDevice::writeDoorbell(PacketPtr pkt, Addr offset)
DPRINTF(AMDGPUDevice, "Wrote doorbell %#lx\n", offset);
if (doorbells.find(offset) != doorbells.end()) {
QueueType q_type = doorbells[offset];
QueueType q_type = doorbells[offset].qtype;
int ip_id = doorbells[offset].ip_id;
DPRINTF(AMDGPUDevice, "Doorbell offset %p queue: %d\n",
offset, q_type);
switch (q_type) {
case Compute:
pm4PktProc->process(pm4PktProc->getQueue(offset),
pkt->getLE<uint64_t>());
assert(pm4PktProcs.count(ip_id));
pm4PktProcs[ip_id]->process(
pm4PktProcs[ip_id]->getQueue(offset),
pkt->getLE<uint64_t>());
break;
case Gfx:
pm4PktProc->process(pm4PktProc->getQueue(offset, true),
pkt->getLE<uint64_t>());
assert(pm4PktProcs.count(ip_id));
pm4PktProcs[ip_id]->process(
pm4PktProcs[ip_id]->getQueue(offset, true),
pkt->getLE<uint64_t>());
break;
case SDMAGfx: {
SDMAEngine *sdmaEng = getSDMAEngine(offset);
@@ -443,9 +481,11 @@ AMDGPUDevice::writeDoorbell(PacketPtr pkt, Addr offset)
sdmaEng->processPage(pkt->getLE<uint64_t>());
} break;
case ComputeAQL: {
assert(pm4PktProcs.count(ip_id));
cp->hsaPacketProc().hwScheduler()->write(offset,
pkt->getLE<uint64_t>() + 1);
pm4PktProc->updateReadIndex(offset, pkt->getLE<uint64_t>() + 1);
pm4PktProcs[ip_id]->updateReadIndex(offset,
pkt->getLE<uint64_t>() + 1);
} break;
case InterruptHandler:
deviceIH->updateRptr(pkt->getLE<uint32_t>());
@@ -475,12 +515,12 @@ AMDGPUDevice::writeDoorbell(PacketPtr pkt, Addr offset)
void
AMDGPUDevice::writeMMIO(PacketPtr pkt, Addr offset)
{
Addr aperture = gpuvm.getMmioAperture(offset);
Addr aperture_offset = offset - aperture;
AddrRange aperture = gpuvm.getMMIOAperture(offset);
Addr aperture_offset = offset - aperture.start();
DPRINTF(AMDGPUDevice, "Wrote MMIO %#lx\n", offset);
// Check SDMA functions first, then fallback to switch statement
// Check SDMA functions first, then fallback to MMIO ranges.
for (int idx = 0; idx < sdmaIds.size(); ++idx) {
if (sdmaMmios[idx].contains(offset)) {
Addr sdma_offset = (offset - sdmaMmios[idx].start()) >> 2;
@@ -498,26 +538,31 @@ AMDGPUDevice::writeMMIO(PacketPtr pkt, Addr offset)
}
}
switch (aperture) {
/* Write a general register to the graphics register bus manager. */
case GRBM_BASE:
// Check PM4s next, returning to avoid duplicate writes.
for (auto& [range, pm4_proc] : pm4Ranges) {
if (range.contains(offset)) {
// PM4 MMIOs are offset based on the MMIO range start
Addr ip_offset = offset - range.start();
pm4_proc->writeMMIO(pkt, ip_offset >> GRBM_OFFSET_SHIFT);
return;
}
}
if (aperture == gpuvm.getMMIORange(GRBM_MMIO_RANGE)) {
DPRINTF(AMDGPUDevice, "GRBM base\n");
gpuvm.writeMMIO(pkt, aperture_offset >> GRBM_OFFSET_SHIFT);
pm4PktProc->writeMMIO(pkt, aperture_offset >> GRBM_OFFSET_SHIFT);
break;
/* Write a register to the interrupt handler. */
case IH_BASE:
} else if (aperture == gpuvm.getMMIORange(IH_MMIO_RANGE)) {
DPRINTF(AMDGPUDevice, "IH base\n");
deviceIH->writeMMIO(pkt, aperture_offset >> IH_OFFSET_SHIFT);
break;
/* Write an IO space register */
case NBIO_BASE:
} else if (aperture == gpuvm.getMMIORange(NBIO_MMIO_RANGE)) {
DPRINTF(AMDGPUDevice, "NBIO base\n");
nbio.writeMMIO(pkt, aperture_offset);
break;
case GFX_BASE:
} else if (aperture == gpuvm.getMMIORange(GFX_MMIO_RANGE)) {
DPRINTF(AMDGPUDevice, "GFX base\n");
gfx.writeMMIO(pkt, aperture_offset);
break;
default:
DPRINTF(AMDGPUDevice, "Unknown MMIO aperture for %#x\n", offset);
break;
} else {
DPRINTF(AMDGPUDevice, "Unknown MMIO aperture for write %#x\n", offset);
}
}
@@ -638,10 +683,11 @@ AMDGPUDevice::setRegVal(uint64_t addr, uint32_t value)
}
void
AMDGPUDevice::setDoorbellType(uint32_t offset, QueueType qt)
AMDGPUDevice::setDoorbellType(uint32_t offset, QueueType qt, int ip_id)
{
DPRINTF(AMDGPUDevice, "Setting doorbell type for %x\n", offset);
doorbells[offset] = qt;
doorbells[offset].qtype = qt;
doorbells[offset].ip_id = ip_id;
}
void
@@ -692,6 +738,7 @@ AMDGPUDevice::serialize(CheckpointOut &cp) const
// Make a c-style array of the regs to serialize
uint32_t doorbells_offset[doorbells_size];
QueueType doorbells_queues[doorbells_size];
int doorbells_ip_ids[doorbells_size];
uint32_t sdma_engs_offset[sdma_engs_size];
int sdma_engs[sdma_engs_size];
int used_vmids[used_vmid_map_size];
@@ -701,7 +748,8 @@ AMDGPUDevice::serialize(CheckpointOut &cp) const
int idx = 0;
for (auto & it : doorbells) {
doorbells_offset[idx] = it.first;
doorbells_queues[idx] = it.second;
doorbells_queues[idx] = it.second.qtype;
doorbells_ip_ids[idx] = it.second.ip_id;
++idx;
}
@@ -730,6 +778,8 @@ AMDGPUDevice::serialize(CheckpointOut &cp) const
sizeof(doorbells_offset[0]));
SERIALIZE_ARRAY(doorbells_queues, sizeof(doorbells_queues)/
sizeof(doorbells_queues[0]));
SERIALIZE_ARRAY(doorbells_ip_ids, sizeof(doorbells_ip_ids)/
sizeof(doorbells_ip_ids[0]));
SERIALIZE_ARRAY(sdma_engs_offset, sizeof(sdma_engs_offset)/
sizeof(sdma_engs_offset[0]));
SERIALIZE_ARRAY(sdma_engs, sizeof(sdma_engs)/sizeof(sdma_engs[0]));
@@ -768,14 +818,18 @@ AMDGPUDevice::unserialize(CheckpointIn &cp)
if (doorbells_size > 0) {
uint32_t doorbells_offset[doorbells_size];
QueueType doorbells_queues[doorbells_size];
int doorbells_ip_ids[doorbells_size];
UNSERIALIZE_ARRAY(doorbells_offset, sizeof(doorbells_offset)/
sizeof(doorbells_offset[0]));
UNSERIALIZE_ARRAY(doorbells_queues, sizeof(doorbells_queues)/
sizeof(doorbells_queues[0]));
UNSERIALIZE_ARRAY(doorbells_ip_ids, sizeof(doorbells_ip_ids)/
sizeof(doorbells_ip_ids[0]));
for (int idx = 0; idx < doorbells_size; ++idx) {
doorbells[doorbells_offset[idx]] = doorbells_queues[idx];
doorbells[doorbells_offset[idx]].qtype = doorbells_queues[idx];
doorbells[doorbells_offset[idx]].ip_id = doorbells_ip_ids[idx];
}
}

View File

@@ -87,7 +87,7 @@ class AMDGPUDevice : public PciDevice
/**
* Structures to hold registers, doorbells, and some frame memory
*/
std::unordered_map<uint32_t, QueueType> doorbells;
std::unordered_map<uint32_t, DoorbellInfo> doorbells;
std::unordered_map<uint32_t, PacketPtr> pendingDoorbellPkts;
/**
@@ -113,9 +113,19 @@ class AMDGPUDevice : public PciDevice
AMDGPUMemoryManager *gpuMemMgr;
AMDGPUInterruptHandler *deviceIH;
AMDGPUVM gpuvm;
PM4PacketProcessor *pm4PktProc;
GPUCommandProcessor *cp;
struct AddrRangeHasher
{
std::size_t operator()(const AddrRange& k) const
{
return k.start();
}
};
std::unordered_map<int, PM4PacketProcessor *> pm4PktProcs;
std::unordered_map<AddrRange, PM4PacketProcessor *,
AddrRangeHasher> pm4Ranges;
// SDMAs mapped by doorbell offset
std::unordered_map<uint32_t, SDMAEngine *> sdmaEngs;
// SDMAs mapped by ID
@@ -185,7 +195,7 @@ class AMDGPUDevice : public PciDevice
/**
* Set handles to GPU blocks.
*/
void setDoorbellType(uint32_t offset, QueueType qt);
void setDoorbellType(uint32_t offset, QueueType qt, int ip_id = 0);
void processPendingDoorbells(uint32_t offset);
void setSDMAEngine(Addr offset, SDMAEngine *eng);

View File

@@ -37,6 +37,7 @@
#include "base/trace.hh"
#include "debug/AMDGPUDevice.hh"
#include "dev/amdgpu/amdgpu_defines.hh"
#include "dev/amdgpu/amdgpu_device.hh"
#include "mem/packet_access.hh"
namespace gem5
@@ -51,6 +52,35 @@ AMDGPUVM::AMDGPUVM()
for (int i = 0; i < AMDGPU_VM_COUNT; ++i) {
memset(&vmContexts[0], 0, sizeof(AMDGPUVMContext));
}
for (int i = 0; i < NUM_MMIO_RANGES; ++i) {
mmioRanges[i] = AddrRange();
}
}
void
AMDGPUVM::setMMIOAperture(mmio_range_t mmio_aperture, AddrRange range)
{
mmioRanges[mmio_aperture] = range;
}
AddrRange
AMDGPUVM::getMMIORange(mmio_range_t mmio_aperture)
{
return mmioRanges[mmio_aperture];
}
const AddrRange&
AMDGPUVM::getMMIOAperture(Addr offset)
{
for (int i = 0; i < NUM_MMIO_RANGES; ++i) {
if (mmioRanges[i].contains(offset)) {
return mmioRanges[i];
}
}
// Default to NBIO
return mmioRanges[NBIO_MMIO_RANGE];
}
Addr

View File

@@ -99,9 +99,23 @@ static constexpr int AMDGPU_USER_PAGE_SIZE = 4096;
namespace gem5
{
typedef enum : int
{
NBIO_MMIO_RANGE,
MMHUB_MMIO_RANGE,
GFX_MMIO_RANGE,
GRBM_MMIO_RANGE,
IH_MMIO_RANGE,
NUM_MMIO_RANGES
} mmio_range_t;
class AMDGPUDevice;
class AMDGPUVM : public Serializable
{
private:
AMDGPUDevice *gpuDevice;
typedef struct GEM5_PACKED
{
// Page table addresses: from (Base + Start) to (End)
@@ -160,9 +174,13 @@ class AMDGPUVM : public Serializable
*/
std::vector<VegaISA::GpuTLB *> gpu_tlbs;
std::array<AddrRange, NUM_MMIO_RANGES> mmioRanges;
public:
AMDGPUVM();
void setGPUDevice(AMDGPUDevice *gpu_device) { gpuDevice = gpu_device; }
/**
* Return base address of GART table in framebuffer.
*/
@@ -232,38 +250,11 @@ class AMDGPUVM : public Serializable
Addr getSysAddrRangeLow () { return vmContext0.sysAddrL; }
Addr getSysAddrRangeHigh () { return vmContext0.sysAddrH; }
Addr
getMmioAperture(Addr addr)
{
// Aperture ranges:
// NBIO 0x0 - 0x4280
// IH 0x4280 - 0x4980
// SDMA0 0x4980 - 0x5180
// SDMA1 0x5180 - 0x5980
// GRBM 0x8000 - 0xD000
// GFX 0x28000 - 0x3F000
// MMHUB 0x68000 - 0x6a120
void setMMIOAperture(mmio_range_t mmio_aperture, AddrRange range);
const AddrRange& getMMIOAperture(Addr addr);
AddrRange getMMIORange(mmio_range_t mmio_aperture);
if (IH_BASE <= addr && addr < IH_BASE + IH_SIZE)
return IH_BASE;
else if (SDMA0_BASE <= addr && addr < SDMA0_BASE + SDMA_SIZE)
return SDMA0_BASE;
else if (SDMA1_BASE <= addr && addr < SDMA1_BASE + SDMA_SIZE)
return SDMA1_BASE;
else if (GRBM_BASE <= addr && addr < GRBM_BASE + GRBM_SIZE)
return GRBM_BASE;
else if (GFX_BASE <= addr && addr < GFX_BASE + GFX_SIZE)
return GFX_BASE;
else if (MMHUB_BASE <= addr && addr < MMHUB_BASE + MMHUB_SIZE)
return MMHUB_BASE;
else {
warn_once("Accessing unsupported MMIO aperture! Assuming NBIO\n");
return NBIO_BASE;
}
}
// Gettig mapped aperture base addresses
// Getting mapped aperture base addresses
Addr
getFrameAperture(Addr addr)
{

View File

@@ -36,34 +36,34 @@
namespace gem5
{
#define mmCP_RB0_BASE 0x1040
#define mmCP_RB0_CNTL 0x1041
#define mmCP_RB_WPTR_POLL_ADDR_LO 0x1046
#define mmCP_RB_WPTR_POLL_ADDR_HI 0x1047
#define mmCP_RB_VMID 0x1051
#define mmCP_RB0_RPTR_ADDR 0x1043
#define mmCP_RB0_RPTR_ADDR_HI 0x1044
#define mmCP_RB0_WPTR 0x1054
#define mmCP_RB0_WPTR_HI 0x1055
#define mmCP_RB_DOORBELL_CONTROL 0x1059
#define mmCP_RB_DOORBELL_RANGE_LOWER 0x105a
#define mmCP_RB_DOORBELL_RANGE_UPPER 0x105b
#define mmCP_RB0_BASE_HI 0x10b1
#define mmCP_RB0_BASE 0x040
#define mmCP_RB0_CNTL 0x041
#define mmCP_RB_WPTR_POLL_ADDR_LO 0x046
#define mmCP_RB_WPTR_POLL_ADDR_HI 0x047
#define mmCP_RB_VMID 0x051
#define mmCP_RB0_RPTR_ADDR 0x043
#define mmCP_RB0_RPTR_ADDR_HI 0x044
#define mmCP_RB0_WPTR 0x054
#define mmCP_RB0_WPTR_HI 0x055
#define mmCP_RB_DOORBELL_CONTROL 0x059
#define mmCP_RB_DOORBELL_RANGE_LOWER 0x05a
#define mmCP_RB_DOORBELL_RANGE_UPPER 0x05b
#define mmCP_RB0_BASE_HI 0x0b1
#define mmCP_HQD_ACTIVE 0x1247
#define mmCP_HQD_VMID 0x1248
#define mmCP_HQD_PQ_BASE 0x124d
#define mmCP_HQD_PQ_BASE_HI 0x124e
#define mmCP_HQD_PQ_DOORBELL_CONTROL 0x1254
#define mmCP_HQD_PQ_RPTR 0x124f
#define mmCP_HQD_PQ_RPTR_REPORT_ADDR 0x1250
#define mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI 0x1251
#define mmCP_HQD_PQ_WPTR_POLL_ADDR 0x1252
#define mmCP_HQD_PQ_WPTR_POLL_ADDR_HI 0x1253
#define mmCP_HQD_PQ_CONTROL 0x1256
#define mmCP_HQD_IB_CONTROL 0x125a
#define mmCP_HQD_PQ_WPTR_LO 0x127b
#define mmCP_HQD_PQ_WPTR_HI 0x127c
#define mmCP_HQD_ACTIVE 0x247
#define mmCP_HQD_VMID 0x248
#define mmCP_HQD_PQ_BASE 0x24d
#define mmCP_HQD_PQ_BASE_HI 0x24e
#define mmCP_HQD_PQ_DOORBELL_CONTROL 0x254
#define mmCP_HQD_PQ_RPTR 0x24f
#define mmCP_HQD_PQ_RPTR_REPORT_ADDR 0x250
#define mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI 0x251
#define mmCP_HQD_PQ_WPTR_POLL_ADDR 0x252
#define mmCP_HQD_PQ_WPTR_POLL_ADDR_HI 0x253
#define mmCP_HQD_PQ_CONTROL 0x256
#define mmCP_HQD_IB_CONTROL 0x25a
#define mmCP_HQD_PQ_WPTR_LO 0x27b
#define mmCP_HQD_PQ_WPTR_HI 0x27c
} // namespace gem5

View File

@@ -49,7 +49,7 @@ namespace gem5
{
PM4PacketProcessor::PM4PacketProcessor(const PM4PacketProcessorParams &p)
: DmaVirtDevice(p)
: DmaVirtDevice(p), _ipId(p.ip_id), _mmioRange(p.mmio_range)
{
memset(&kiq, 0, sizeof(QueueDesc));
memset(&pq, 0, sizeof(QueueDesc));
@@ -144,7 +144,7 @@ PM4PacketProcessor::newQueue(QueueDesc *mqd, Addr offset,
QueueType qt;
qt = mqd->aql ? QueueType::ComputeAQL
: QueueType::Compute;
gpuDevice->setDoorbellType(offset, qt);
gpuDevice->setDoorbellType(offset, qt, getIpId());
DPRINTF(PM4PacketProcessor, "New PM4 queue %d, base: %p offset: %p, me: "
"%d, pipe %d queue: %d size: %d\n", id, q->base(), q->offset(),
@@ -521,7 +521,7 @@ PM4PacketProcessor::processSDMAMQD(PM4MapQueues *pkt, PM4Queue *q, Addr addr,
// Register doorbell with GPU device
gpuDevice->setSDMAEngine(pkt->doorbellOffset << 2, sdma_eng);
gpuDevice->setDoorbellType(pkt->doorbellOffset << 2, RLC);
gpuDevice->setDoorbellType(pkt->doorbellOffset << 2, RLC, getIpId());
gpuDevice->processPendingDoorbells(pkt->doorbellOffset << 2);
}
@@ -774,9 +774,14 @@ PM4PacketProcessor::setUconfigReg(PM4Queue *q, PM4SetUconfigReg *pkt)
{
q->incRptr(sizeof(PM4SetUconfigReg));
DPRINTF(PM4PacketProcessor, "SetUconfig offset %x data %x\n",
pkt->offset, pkt->data);
// SET_UCONFIG_REG_START and pkt->offset are dword addresses
uint32_t reg_addr = (PACKET3_SET_UCONFIG_REG_START + pkt->offset) * 4;
// Additional CPs respond to addresses 0x40000 apart.
reg_addr += 0x40000 * getIpId();
gpuDevice->setRegVal(reg_addr, pkt->data);
decodeNext(q);
@@ -851,7 +856,7 @@ PM4PacketProcessor::writeMMIO(PacketPtr pkt, Addr mmio_offset)
break;
case mmCP_HQD_PQ_DOORBELL_CONTROL:
setHqdPqDoorbellCtrl(pkt->getLE<uint32_t>());
gpuDevice->setDoorbellType(getKiqDoorbellOffset(), Compute);
gpuDevice->setDoorbellType(getKiqDoorbellOffset(), Compute, getIpId());
break;
case mmCP_HQD_PQ_RPTR:
setHqdPqPtr(pkt->getLE<uint32_t>());
@@ -913,7 +918,7 @@ PM4PacketProcessor::writeMMIO(PacketPtr pkt, Addr mmio_offset)
break;
case mmCP_RB_DOORBELL_CONTROL:
setRbDoorbellCntrl(pkt->getLE<uint32_t>());
gpuDevice->setDoorbellType(getPqDoorbellOffset(), Gfx);
gpuDevice->setDoorbellType(getPqDoorbellOffset(), Gfx, getIpId());
break;
case mmCP_RB_DOORBELL_RANGE_LOWER:
setRbDoorbellRangeLo(pkt->getLE<uint32_t>());

View File

@@ -63,6 +63,10 @@ class PM4PacketProcessor : public DmaVirtDevice
std::unordered_map<uint16_t, PM4Queue *> queues;
/* A map of PM4 queues based on doorbell offset */
std::unordered_map<uint32_t, PM4Queue *> queuesMap;
int _ipId;
AddrRange _mmioRange;
public:
PM4PacketProcessor(const PM4PacketProcessorParams &p);
@@ -188,6 +192,9 @@ class PM4PacketProcessor : public DmaVirtDevice
void setRbDoorbellCntrl(uint32_t data);
void setRbDoorbellRangeLo(uint32_t data);
void setRbDoorbellRangeHi(uint32_t data);
int getIpId() const { return _ipId; }
AddrRange getMMIORange() const { return _mmioRange; }
};
} // namespace gem5