diff --git a/configs/example/gpufs/system/system.py b/configs/example/gpufs/system/system.py index 671d4efdc9..1f89bd935b 100644 --- a/configs/example/gpufs/system/system.py +++ b/configs/example/gpufs/system/system.py @@ -188,9 +188,15 @@ def makeGpuFSSystem(args): system.pc.south_bridge.gpu.sdmas = sdma_engines - # Setup PM4 packet processor - pm4_pkt_proc = PM4PacketProcessor() - system.pc.south_bridge.gpu.pm4_pkt_proc = pm4_pkt_proc + # Setup PM4 packet processors + pm4_procs = [] + pm4_procs.append( + PM4PacketProcessor( + ip_id=0, mmio_range=AddrRange(start=0xC000, end=0xD000) + ) + ) + + system.pc.south_bridge.gpu.pm4_pkt_procs = pm4_procs # GPU data path gpu_mem_mgr = AMDGPUMemoryManager() @@ -207,7 +213,8 @@ def makeGpuFSSystem(args): for sdma in sdma_engines: system._dma_ports.append(sdma) system._dma_ports.append(device_ih) - system._dma_ports.append(pm4_pkt_proc) + for pm4_proc in pm4_procs: + system._dma_ports.append(pm4_proc) system._dma_ports.append(system_hub) system._dma_ports.append(gpu_mem_mgr) system._dma_ports.append(hsapp_pt_walker) @@ -221,7 +228,8 @@ def makeGpuFSSystem(args): for sdma in sdma_engines: sdma.pio = system.iobus.mem_side_ports device_ih.pio = system.iobus.mem_side_ports - pm4_pkt_proc.pio = system.iobus.mem_side_ports + for pm4_proc in pm4_procs: + pm4_proc.pio = system.iobus.mem_side_ports system_hub.pio = system.iobus.mem_side_ports # Full system needs special TLBs for SQC, Scalar, and vector data ports diff --git a/src/dev/amdgpu/AMDGPU.py b/src/dev/amdgpu/AMDGPU.py index 0370f09e01..0e0f597927 100644 --- a/src/dev/amdgpu/AMDGPU.py +++ b/src/dev/amdgpu/AMDGPU.py @@ -95,7 +95,7 @@ class AMDGPUDevice(PciDevice): # The config script should not create a new cp here but rather assign the # same cp that is assigned to the Shader SimObject. cp = Param.GPUCommandProcessor(NULL, "Command Processor") - pm4_pkt_proc = Param.PM4PacketProcessor("PM4 Packet Processor") + pm4_pkt_procs = VectorParam.PM4PacketProcessor("PM4 Packet Processor") memory_manager = Param.AMDGPUMemoryManager("GPU Memory Manager") memories = VectorParam.AbstractMemory([], "All memories in the device") device_ih = Param.AMDGPUInterruptHandler("GPU Interrupt handler") @@ -118,6 +118,10 @@ class PM4PacketProcessor(DmaVirtDevice): cxx_header = "dev/amdgpu/pm4_packet_processor.hh" cxx_class = "gem5::PM4PacketProcessor" + # Default to 0 as the common case is one PM4 packet processor + ip_id = Param.Int(0, "Instance ID of this PM4 processor") + mmio_range = Param.AddrRange("Range of MMIO addresses") + class AMDGPUMemoryManager(ClockedObject): type = "AMDGPUMemoryManager" diff --git a/src/dev/amdgpu/amdgpu_defines.hh b/src/dev/amdgpu/amdgpu_defines.hh index bc6377fbbc..883501b84d 100644 --- a/src/dev/amdgpu/amdgpu_defines.hh +++ b/src/dev/amdgpu/amdgpu_defines.hh @@ -49,6 +49,16 @@ enum QueueType RLC }; +/* + * Hold information about doorbells including queue type and the IP + * block ID if the IP can have multiple instances. + */ +typedef struct +{ + QueueType qtype; + int ip_id; +} DoorbellInfo; + // AMD GPUs support 16 different virtual address spaces static constexpr int AMDGPU_VM_COUNT = 16; @@ -61,36 +71,11 @@ constexpr int MMIO_BAR = 5; constexpr uint32_t VGA_ROM_DEFAULT = 0xc0000; constexpr uint32_t ROM_SIZE = 0x20000; // 128kB -/* SDMA base, size, mmio offset shift. */ -static constexpr uint32_t SDMA0_BASE = 0x4980; -static constexpr uint32_t SDMA1_BASE = 0x5180; -static constexpr uint32_t SDMA_SIZE = 0x800; -static constexpr uint32_t SDMA_OFFSET_SHIFT = 2; - -/* Interrupt handler base, size, mmio offset shift. */ -static constexpr uint32_t IH_BASE = 0x4280; -static constexpr uint32_t IH_SIZE = 0x700; +/* Most MMIOs use DWORD addresses and thus need to be shifted. */ static constexpr uint32_t IH_OFFSET_SHIFT = 2; - -/* Graphics register bus manager base, size, mmio offset shift. */ -static constexpr uint32_t GRBM_BASE = 0x8000; -static constexpr uint32_t GRBM_SIZE = 0x5000; static constexpr uint32_t GRBM_OFFSET_SHIFT = 2; - -/* GFX base, size, mmio offset shift. */ -static constexpr uint32_t GFX_BASE = 0x28000; -static constexpr uint32_t GFX_SIZE = 0x17000; -static constexpr uint32_t GFX_OFFSET_SHIFT = 2; - -/* MMHUB base, size, mmio offset shift. */ -static constexpr uint32_t MMHUB_BASE = 0x68000; -static constexpr uint32_t MMHUB_SIZE = 0x2120; static constexpr uint32_t MMHUB_OFFSET_SHIFT = 2; -/* NBIO base and size. */ -static constexpr uint32_t NBIO_BASE = 0x0; -static constexpr uint32_t NBIO_SIZE = 0x4280; - } // namespace gem5 #endif // __DEV_AMDGPU_AMDGPU_DEFINES_HH__ diff --git a/src/dev/amdgpu/amdgpu_device.cc b/src/dev/amdgpu/amdgpu_device.cc index 4b684aa221..5ddd7756ba 100644 --- a/src/dev/amdgpu/amdgpu_device.cc +++ b/src/dev/amdgpu/amdgpu_device.cc @@ -54,8 +54,7 @@ namespace gem5 AMDGPUDevice::AMDGPUDevice(const AMDGPUDeviceParams &p) : PciDevice(p), gpuMemMgr(p.memory_manager), deviceIH(p.device_ih), - pm4PktProc(p.pm4_pkt_proc), cp(p.cp), - checkpoint_before_mmios(p.checkpoint_before_mmios), + cp(p.cp), checkpoint_before_mmios(p.checkpoint_before_mmios), init_interrupt_count(0), _lastVMID(0), deviceMem(name() + ".deviceMem", p.memories, false, "", false) { @@ -81,6 +80,16 @@ AMDGPUDevice::AMDGPUDevice(const AMDGPUDeviceParams &p) romRange = RangeSize(VGA_ROM_DEFAULT, ROM_SIZE); } + if (p.device_name == "Vega10") { + gfx_version = GfxVersion::gfx900; + } else if (p.device_name == "MI100") { + gfx_version = GfxVersion::gfx908; + } else if (p.device_name == "MI200") { + gfx_version = GfxVersion::gfx90a; + } else { + panic("Unknown GPU device %s\n", p.device_name); + } + if (p.trace_file != "") { mmioReader.readMMIOTrace(p.trace_file); } @@ -126,8 +135,22 @@ AMDGPUDevice::AMDGPUDevice(const AMDGPUDeviceParams &p) panic("Unknown GPU device %s\n", p.device_name); } + // Setup PM4 packet processors and sanity check IDs + std::set pm4_ids; + for (auto& pm4 : p.pm4_pkt_procs) { + pm4->setGPUDevice(this); + fatal_if(pm4_ids.count(pm4->getIpId()), + "Two PM4s with same IP IDs is not allowed"); + pm4_ids.insert(pm4->getIpId()); + pm4PktProcs.insert({pm4->getIpId(), pm4}); + + pm4Ranges.insert({pm4->getMMIORange(), pm4}); + } + + // There should be at least one PM4 packet processor with ID 0 + fatal_if(!pm4PktProcs.count(0), "No default PM4 processor found"); + deviceIH->setGPUDevice(this); - pm4PktProc->setGPUDevice(this); cp->hsaPacketProc().setGPUDevice(this); cp->setGPUDevice(this); nbio.setGPUDevice(this); @@ -136,6 +159,23 @@ AMDGPUDevice::AMDGPUDevice(const AMDGPUDeviceParams &p) // could possibly be anything, but these are the values used by hardware. uint64_t mmhubBase = 0x8000ULL << 24; uint64_t mmhubTop = 0x83ffULL << 24; + uint64_t mem_size = 0x3ff0; // 16 GB of memory + + gpuvm.setMMHUBBase(mmhubBase); + gpuvm.setMMHUBTop(mmhubTop); + + // Map other MMIO apertures based on gfx version. This must be done before + // any calls to get/setRegVal. + // NBIO 0x0 - 0x4280 + // IH 0x4280 - 0x4980 + // GRBM 0x8000 - 0xC000 + // GFX 0x28000 - 0x3F000 + // MMHUB 0x68000 - 0x6a120 + gpuvm.setMMIOAperture(NBIO_MMIO_RANGE, AddrRange(0x0, 0x4280)); + gpuvm.setMMIOAperture(IH_MMIO_RANGE, AddrRange(0x4280, 0x4980)); + gpuvm.setMMIOAperture(GRBM_MMIO_RANGE, AddrRange(0x8000, 0xC000)); + gpuvm.setMMIOAperture(GFX_MMIO_RANGE, AddrRange(0x28000, 0x3F000)); + gpuvm.setMMIOAperture(MMHUB_MMIO_RANGE, AddrRange(0x68000, 0x6A120)); // These are hardcoded register values to return what the driver expects setRegVal(AMDGPU_MP0_SMN_C2PMSG_33, 0x80000000); @@ -145,25 +185,19 @@ AMDGPUDevice::AMDGPUDevice(const AMDGPUDeviceParams &p) if (p.device_name == "Vega10") { setRegVal(VEGA10_FB_LOCATION_BASE, mmhubBase >> 24); setRegVal(VEGA10_FB_LOCATION_TOP, mmhubTop >> 24); - gfx_version = GfxVersion::gfx900; } else if (p.device_name == "MI100") { setRegVal(MI100_FB_LOCATION_BASE, mmhubBase >> 24); setRegVal(MI100_FB_LOCATION_TOP, mmhubTop >> 24); - setRegVal(MI100_MEM_SIZE_REG, 0x3ff0); // 16GB of memory - gfx_version = GfxVersion::gfx908; + setRegVal(MI100_MEM_SIZE_REG, mem_size); } else if (p.device_name == "MI200") { // This device can have either 64GB or 128GB of device memory. // This limits to 16GB for simulation. setRegVal(MI200_FB_LOCATION_BASE, mmhubBase >> 24); setRegVal(MI200_FB_LOCATION_TOP, mmhubTop >> 24); - setRegVal(MI200_MEM_SIZE_REG, 0x3ff0); - gfx_version = GfxVersion::gfx90a; + setRegVal(MI200_MEM_SIZE_REG, mem_size); } else { panic("Unknown GPU device %s\n", p.device_name); } - - gpuvm.setMMHUBBase(mmhubBase); - gpuvm.setMMHUBTop(mmhubTop); } void @@ -356,29 +390,28 @@ AMDGPUDevice::readDoorbell(PacketPtr pkt, Addr offset) void AMDGPUDevice::readMMIO(PacketPtr pkt, Addr offset) { - Addr aperture = gpuvm.getMmioAperture(offset); - Addr aperture_offset = offset - aperture; + AddrRange aperture = gpuvm.getMMIOAperture(offset); + Addr aperture_offset = offset - aperture.start(); // By default read from MMIO trace. Overwrite the packet for a select // few more dynamic MMIOs. DPRINTF(AMDGPUDevice, "Read MMIO %#lx\n", offset); mmioReader.readFromTrace(pkt, MMIO_BAR, offset); - switch (aperture) { - case NBIO_BASE: + if (aperture == gpuvm.getMMIORange(NBIO_MMIO_RANGE)) { + DPRINTF(AMDGPUDevice, "NBIO base\n"); nbio.readMMIO(pkt, aperture_offset); - break; - case GRBM_BASE: + } else if (aperture == gpuvm.getMMIORange(GRBM_MMIO_RANGE)) { + DPRINTF(AMDGPUDevice, "GRBM base\n"); gpuvm.readMMIO(pkt, aperture_offset >> GRBM_OFFSET_SHIFT); - break; - case GFX_BASE: + } else if (aperture == gpuvm.getMMIORange(GFX_MMIO_RANGE)) { + DPRINTF(AMDGPUDevice, "GFX base\n"); gfx.readMMIO(pkt, aperture_offset); - break; - case MMHUB_BASE: + } else if (aperture == gpuvm.getMMIORange(MMHUB_MMIO_RANGE)) { + DPRINTF(AMDGPUDevice, "MMHUB base\n"); gpuvm.readMMIO(pkt, aperture_offset >> MMHUB_OFFSET_SHIFT); - break; - default: - break; + } else { + DPRINTF(AMDGPUDevice, "Unknown MMIO aperture for read %#x\n", offset); } } @@ -422,17 +455,22 @@ AMDGPUDevice::writeDoorbell(PacketPtr pkt, Addr offset) DPRINTF(AMDGPUDevice, "Wrote doorbell %#lx\n", offset); if (doorbells.find(offset) != doorbells.end()) { - QueueType q_type = doorbells[offset]; + QueueType q_type = doorbells[offset].qtype; + int ip_id = doorbells[offset].ip_id; DPRINTF(AMDGPUDevice, "Doorbell offset %p queue: %d\n", offset, q_type); switch (q_type) { case Compute: - pm4PktProc->process(pm4PktProc->getQueue(offset), - pkt->getLE()); + assert(pm4PktProcs.count(ip_id)); + pm4PktProcs[ip_id]->process( + pm4PktProcs[ip_id]->getQueue(offset), + pkt->getLE()); break; case Gfx: - pm4PktProc->process(pm4PktProc->getQueue(offset, true), - pkt->getLE()); + assert(pm4PktProcs.count(ip_id)); + pm4PktProcs[ip_id]->process( + pm4PktProcs[ip_id]->getQueue(offset, true), + pkt->getLE()); break; case SDMAGfx: { SDMAEngine *sdmaEng = getSDMAEngine(offset); @@ -443,9 +481,11 @@ AMDGPUDevice::writeDoorbell(PacketPtr pkt, Addr offset) sdmaEng->processPage(pkt->getLE()); } break; case ComputeAQL: { + assert(pm4PktProcs.count(ip_id)); cp->hsaPacketProc().hwScheduler()->write(offset, pkt->getLE() + 1); - pm4PktProc->updateReadIndex(offset, pkt->getLE() + 1); + pm4PktProcs[ip_id]->updateReadIndex(offset, + pkt->getLE() + 1); } break; case InterruptHandler: deviceIH->updateRptr(pkt->getLE()); @@ -475,12 +515,12 @@ AMDGPUDevice::writeDoorbell(PacketPtr pkt, Addr offset) void AMDGPUDevice::writeMMIO(PacketPtr pkt, Addr offset) { - Addr aperture = gpuvm.getMmioAperture(offset); - Addr aperture_offset = offset - aperture; + AddrRange aperture = gpuvm.getMMIOAperture(offset); + Addr aperture_offset = offset - aperture.start(); DPRINTF(AMDGPUDevice, "Wrote MMIO %#lx\n", offset); - // Check SDMA functions first, then fallback to switch statement + // Check SDMA functions first, then fallback to MMIO ranges. for (int idx = 0; idx < sdmaIds.size(); ++idx) { if (sdmaMmios[idx].contains(offset)) { Addr sdma_offset = (offset - sdmaMmios[idx].start()) >> 2; @@ -498,26 +538,31 @@ AMDGPUDevice::writeMMIO(PacketPtr pkt, Addr offset) } } - switch (aperture) { - /* Write a general register to the graphics register bus manager. */ - case GRBM_BASE: + // Check PM4s next, returning to avoid duplicate writes. + for (auto& [range, pm4_proc] : pm4Ranges) { + if (range.contains(offset)) { + // PM4 MMIOs are offset based on the MMIO range start + Addr ip_offset = offset - range.start(); + pm4_proc->writeMMIO(pkt, ip_offset >> GRBM_OFFSET_SHIFT); + + return; + } + } + + if (aperture == gpuvm.getMMIORange(GRBM_MMIO_RANGE)) { + DPRINTF(AMDGPUDevice, "GRBM base\n"); gpuvm.writeMMIO(pkt, aperture_offset >> GRBM_OFFSET_SHIFT); - pm4PktProc->writeMMIO(pkt, aperture_offset >> GRBM_OFFSET_SHIFT); - break; - /* Write a register to the interrupt handler. */ - case IH_BASE: + } else if (aperture == gpuvm.getMMIORange(IH_MMIO_RANGE)) { + DPRINTF(AMDGPUDevice, "IH base\n"); deviceIH->writeMMIO(pkt, aperture_offset >> IH_OFFSET_SHIFT); - break; - /* Write an IO space register */ - case NBIO_BASE: + } else if (aperture == gpuvm.getMMIORange(NBIO_MMIO_RANGE)) { + DPRINTF(AMDGPUDevice, "NBIO base\n"); nbio.writeMMIO(pkt, aperture_offset); - break; - case GFX_BASE: + } else if (aperture == gpuvm.getMMIORange(GFX_MMIO_RANGE)) { + DPRINTF(AMDGPUDevice, "GFX base\n"); gfx.writeMMIO(pkt, aperture_offset); - break; - default: - DPRINTF(AMDGPUDevice, "Unknown MMIO aperture for %#x\n", offset); - break; + } else { + DPRINTF(AMDGPUDevice, "Unknown MMIO aperture for write %#x\n", offset); } } @@ -638,10 +683,11 @@ AMDGPUDevice::setRegVal(uint64_t addr, uint32_t value) } void -AMDGPUDevice::setDoorbellType(uint32_t offset, QueueType qt) +AMDGPUDevice::setDoorbellType(uint32_t offset, QueueType qt, int ip_id) { DPRINTF(AMDGPUDevice, "Setting doorbell type for %x\n", offset); - doorbells[offset] = qt; + doorbells[offset].qtype = qt; + doorbells[offset].ip_id = ip_id; } void @@ -692,6 +738,7 @@ AMDGPUDevice::serialize(CheckpointOut &cp) const // Make a c-style array of the regs to serialize uint32_t doorbells_offset[doorbells_size]; QueueType doorbells_queues[doorbells_size]; + int doorbells_ip_ids[doorbells_size]; uint32_t sdma_engs_offset[sdma_engs_size]; int sdma_engs[sdma_engs_size]; int used_vmids[used_vmid_map_size]; @@ -701,7 +748,8 @@ AMDGPUDevice::serialize(CheckpointOut &cp) const int idx = 0; for (auto & it : doorbells) { doorbells_offset[idx] = it.first; - doorbells_queues[idx] = it.second; + doorbells_queues[idx] = it.second.qtype; + doorbells_ip_ids[idx] = it.second.ip_id; ++idx; } @@ -730,6 +778,8 @@ AMDGPUDevice::serialize(CheckpointOut &cp) const sizeof(doorbells_offset[0])); SERIALIZE_ARRAY(doorbells_queues, sizeof(doorbells_queues)/ sizeof(doorbells_queues[0])); + SERIALIZE_ARRAY(doorbells_ip_ids, sizeof(doorbells_ip_ids)/ + sizeof(doorbells_ip_ids[0])); SERIALIZE_ARRAY(sdma_engs_offset, sizeof(sdma_engs_offset)/ sizeof(sdma_engs_offset[0])); SERIALIZE_ARRAY(sdma_engs, sizeof(sdma_engs)/sizeof(sdma_engs[0])); @@ -768,14 +818,18 @@ AMDGPUDevice::unserialize(CheckpointIn &cp) if (doorbells_size > 0) { uint32_t doorbells_offset[doorbells_size]; QueueType doorbells_queues[doorbells_size]; + int doorbells_ip_ids[doorbells_size]; UNSERIALIZE_ARRAY(doorbells_offset, sizeof(doorbells_offset)/ sizeof(doorbells_offset[0])); UNSERIALIZE_ARRAY(doorbells_queues, sizeof(doorbells_queues)/ sizeof(doorbells_queues[0])); + UNSERIALIZE_ARRAY(doorbells_ip_ids, sizeof(doorbells_ip_ids)/ + sizeof(doorbells_ip_ids[0])); for (int idx = 0; idx < doorbells_size; ++idx) { - doorbells[doorbells_offset[idx]] = doorbells_queues[idx]; + doorbells[doorbells_offset[idx]].qtype = doorbells_queues[idx]; + doorbells[doorbells_offset[idx]].ip_id = doorbells_ip_ids[idx]; } } diff --git a/src/dev/amdgpu/amdgpu_device.hh b/src/dev/amdgpu/amdgpu_device.hh index fface5fb3e..33b6a9f3e7 100644 --- a/src/dev/amdgpu/amdgpu_device.hh +++ b/src/dev/amdgpu/amdgpu_device.hh @@ -87,7 +87,7 @@ class AMDGPUDevice : public PciDevice /** * Structures to hold registers, doorbells, and some frame memory */ - std::unordered_map doorbells; + std::unordered_map doorbells; std::unordered_map pendingDoorbellPkts; /** @@ -113,9 +113,19 @@ class AMDGPUDevice : public PciDevice AMDGPUMemoryManager *gpuMemMgr; AMDGPUInterruptHandler *deviceIH; AMDGPUVM gpuvm; - PM4PacketProcessor *pm4PktProc; GPUCommandProcessor *cp; + struct AddrRangeHasher + { + std::size_t operator()(const AddrRange& k) const + { + return k.start(); + } + }; + std::unordered_map pm4PktProcs; + std::unordered_map pm4Ranges; + // SDMAs mapped by doorbell offset std::unordered_map sdmaEngs; // SDMAs mapped by ID @@ -185,7 +195,7 @@ class AMDGPUDevice : public PciDevice /** * Set handles to GPU blocks. */ - void setDoorbellType(uint32_t offset, QueueType qt); + void setDoorbellType(uint32_t offset, QueueType qt, int ip_id = 0); void processPendingDoorbells(uint32_t offset); void setSDMAEngine(Addr offset, SDMAEngine *eng); diff --git a/src/dev/amdgpu/amdgpu_vm.cc b/src/dev/amdgpu/amdgpu_vm.cc index 5a13ac9ba0..0eea590c5a 100644 --- a/src/dev/amdgpu/amdgpu_vm.cc +++ b/src/dev/amdgpu/amdgpu_vm.cc @@ -37,6 +37,7 @@ #include "base/trace.hh" #include "debug/AMDGPUDevice.hh" #include "dev/amdgpu/amdgpu_defines.hh" +#include "dev/amdgpu/amdgpu_device.hh" #include "mem/packet_access.hh" namespace gem5 @@ -51,6 +52,35 @@ AMDGPUVM::AMDGPUVM() for (int i = 0; i < AMDGPU_VM_COUNT; ++i) { memset(&vmContexts[0], 0, sizeof(AMDGPUVMContext)); } + + for (int i = 0; i < NUM_MMIO_RANGES; ++i) { + mmioRanges[i] = AddrRange(); + } +} + +void +AMDGPUVM::setMMIOAperture(mmio_range_t mmio_aperture, AddrRange range) +{ + mmioRanges[mmio_aperture] = range; +} + +AddrRange +AMDGPUVM::getMMIORange(mmio_range_t mmio_aperture) +{ + return mmioRanges[mmio_aperture]; +} + +const AddrRange& +AMDGPUVM::getMMIOAperture(Addr offset) +{ + for (int i = 0; i < NUM_MMIO_RANGES; ++i) { + if (mmioRanges[i].contains(offset)) { + return mmioRanges[i]; + } + } + + // Default to NBIO + return mmioRanges[NBIO_MMIO_RANGE]; } Addr diff --git a/src/dev/amdgpu/amdgpu_vm.hh b/src/dev/amdgpu/amdgpu_vm.hh index 5af666f379..857ef724da 100644 --- a/src/dev/amdgpu/amdgpu_vm.hh +++ b/src/dev/amdgpu/amdgpu_vm.hh @@ -99,9 +99,23 @@ static constexpr int AMDGPU_USER_PAGE_SIZE = 4096; namespace gem5 { +typedef enum : int +{ + NBIO_MMIO_RANGE, + MMHUB_MMIO_RANGE, + GFX_MMIO_RANGE, + GRBM_MMIO_RANGE, + IH_MMIO_RANGE, + NUM_MMIO_RANGES +} mmio_range_t; + +class AMDGPUDevice; + class AMDGPUVM : public Serializable { private: + AMDGPUDevice *gpuDevice; + typedef struct GEM5_PACKED { // Page table addresses: from (Base + Start) to (End) @@ -160,9 +174,13 @@ class AMDGPUVM : public Serializable */ std::vector gpu_tlbs; + std::array mmioRanges; + public: AMDGPUVM(); + void setGPUDevice(AMDGPUDevice *gpu_device) { gpuDevice = gpu_device; } + /** * Return base address of GART table in framebuffer. */ @@ -232,38 +250,11 @@ class AMDGPUVM : public Serializable Addr getSysAddrRangeLow () { return vmContext0.sysAddrL; } Addr getSysAddrRangeHigh () { return vmContext0.sysAddrH; } - Addr - getMmioAperture(Addr addr) - { - // Aperture ranges: - // NBIO 0x0 - 0x4280 - // IH 0x4280 - 0x4980 - // SDMA0 0x4980 - 0x5180 - // SDMA1 0x5180 - 0x5980 - // GRBM 0x8000 - 0xD000 - // GFX 0x28000 - 0x3F000 - // MMHUB 0x68000 - 0x6a120 + void setMMIOAperture(mmio_range_t mmio_aperture, AddrRange range); + const AddrRange& getMMIOAperture(Addr addr); + AddrRange getMMIORange(mmio_range_t mmio_aperture); - if (IH_BASE <= addr && addr < IH_BASE + IH_SIZE) - return IH_BASE; - else if (SDMA0_BASE <= addr && addr < SDMA0_BASE + SDMA_SIZE) - return SDMA0_BASE; - else if (SDMA1_BASE <= addr && addr < SDMA1_BASE + SDMA_SIZE) - return SDMA1_BASE; - else if (GRBM_BASE <= addr && addr < GRBM_BASE + GRBM_SIZE) - return GRBM_BASE; - else if (GFX_BASE <= addr && addr < GFX_BASE + GFX_SIZE) - return GFX_BASE; - else if (MMHUB_BASE <= addr && addr < MMHUB_BASE + MMHUB_SIZE) - return MMHUB_BASE; - else { - warn_once("Accessing unsupported MMIO aperture! Assuming NBIO\n"); - return NBIO_BASE; - } - - } - - // Gettig mapped aperture base addresses + // Getting mapped aperture base addresses Addr getFrameAperture(Addr addr) { diff --git a/src/dev/amdgpu/pm4_mmio.hh b/src/dev/amdgpu/pm4_mmio.hh index 3801223175..e9e504c3cd 100644 --- a/src/dev/amdgpu/pm4_mmio.hh +++ b/src/dev/amdgpu/pm4_mmio.hh @@ -36,34 +36,34 @@ namespace gem5 { -#define mmCP_RB0_BASE 0x1040 -#define mmCP_RB0_CNTL 0x1041 -#define mmCP_RB_WPTR_POLL_ADDR_LO 0x1046 -#define mmCP_RB_WPTR_POLL_ADDR_HI 0x1047 -#define mmCP_RB_VMID 0x1051 -#define mmCP_RB0_RPTR_ADDR 0x1043 -#define mmCP_RB0_RPTR_ADDR_HI 0x1044 -#define mmCP_RB0_WPTR 0x1054 -#define mmCP_RB0_WPTR_HI 0x1055 -#define mmCP_RB_DOORBELL_CONTROL 0x1059 -#define mmCP_RB_DOORBELL_RANGE_LOWER 0x105a -#define mmCP_RB_DOORBELL_RANGE_UPPER 0x105b -#define mmCP_RB0_BASE_HI 0x10b1 +#define mmCP_RB0_BASE 0x040 +#define mmCP_RB0_CNTL 0x041 +#define mmCP_RB_WPTR_POLL_ADDR_LO 0x046 +#define mmCP_RB_WPTR_POLL_ADDR_HI 0x047 +#define mmCP_RB_VMID 0x051 +#define mmCP_RB0_RPTR_ADDR 0x043 +#define mmCP_RB0_RPTR_ADDR_HI 0x044 +#define mmCP_RB0_WPTR 0x054 +#define mmCP_RB0_WPTR_HI 0x055 +#define mmCP_RB_DOORBELL_CONTROL 0x059 +#define mmCP_RB_DOORBELL_RANGE_LOWER 0x05a +#define mmCP_RB_DOORBELL_RANGE_UPPER 0x05b +#define mmCP_RB0_BASE_HI 0x0b1 -#define mmCP_HQD_ACTIVE 0x1247 -#define mmCP_HQD_VMID 0x1248 -#define mmCP_HQD_PQ_BASE 0x124d -#define mmCP_HQD_PQ_BASE_HI 0x124e -#define mmCP_HQD_PQ_DOORBELL_CONTROL 0x1254 -#define mmCP_HQD_PQ_RPTR 0x124f -#define mmCP_HQD_PQ_RPTR_REPORT_ADDR 0x1250 -#define mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI 0x1251 -#define mmCP_HQD_PQ_WPTR_POLL_ADDR 0x1252 -#define mmCP_HQD_PQ_WPTR_POLL_ADDR_HI 0x1253 -#define mmCP_HQD_PQ_CONTROL 0x1256 -#define mmCP_HQD_IB_CONTROL 0x125a -#define mmCP_HQD_PQ_WPTR_LO 0x127b -#define mmCP_HQD_PQ_WPTR_HI 0x127c +#define mmCP_HQD_ACTIVE 0x247 +#define mmCP_HQD_VMID 0x248 +#define mmCP_HQD_PQ_BASE 0x24d +#define mmCP_HQD_PQ_BASE_HI 0x24e +#define mmCP_HQD_PQ_DOORBELL_CONTROL 0x254 +#define mmCP_HQD_PQ_RPTR 0x24f +#define mmCP_HQD_PQ_RPTR_REPORT_ADDR 0x250 +#define mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI 0x251 +#define mmCP_HQD_PQ_WPTR_POLL_ADDR 0x252 +#define mmCP_HQD_PQ_WPTR_POLL_ADDR_HI 0x253 +#define mmCP_HQD_PQ_CONTROL 0x256 +#define mmCP_HQD_IB_CONTROL 0x25a +#define mmCP_HQD_PQ_WPTR_LO 0x27b +#define mmCP_HQD_PQ_WPTR_HI 0x27c } // namespace gem5 diff --git a/src/dev/amdgpu/pm4_packet_processor.cc b/src/dev/amdgpu/pm4_packet_processor.cc index c8baa5eab4..62e817aa98 100644 --- a/src/dev/amdgpu/pm4_packet_processor.cc +++ b/src/dev/amdgpu/pm4_packet_processor.cc @@ -49,7 +49,7 @@ namespace gem5 { PM4PacketProcessor::PM4PacketProcessor(const PM4PacketProcessorParams &p) - : DmaVirtDevice(p) + : DmaVirtDevice(p), _ipId(p.ip_id), _mmioRange(p.mmio_range) { memset(&kiq, 0, sizeof(QueueDesc)); memset(&pq, 0, sizeof(QueueDesc)); @@ -144,7 +144,7 @@ PM4PacketProcessor::newQueue(QueueDesc *mqd, Addr offset, QueueType qt; qt = mqd->aql ? QueueType::ComputeAQL : QueueType::Compute; - gpuDevice->setDoorbellType(offset, qt); + gpuDevice->setDoorbellType(offset, qt, getIpId()); DPRINTF(PM4PacketProcessor, "New PM4 queue %d, base: %p offset: %p, me: " "%d, pipe %d queue: %d size: %d\n", id, q->base(), q->offset(), @@ -521,7 +521,7 @@ PM4PacketProcessor::processSDMAMQD(PM4MapQueues *pkt, PM4Queue *q, Addr addr, // Register doorbell with GPU device gpuDevice->setSDMAEngine(pkt->doorbellOffset << 2, sdma_eng); - gpuDevice->setDoorbellType(pkt->doorbellOffset << 2, RLC); + gpuDevice->setDoorbellType(pkt->doorbellOffset << 2, RLC, getIpId()); gpuDevice->processPendingDoorbells(pkt->doorbellOffset << 2); } @@ -774,9 +774,14 @@ PM4PacketProcessor::setUconfigReg(PM4Queue *q, PM4SetUconfigReg *pkt) { q->incRptr(sizeof(PM4SetUconfigReg)); + DPRINTF(PM4PacketProcessor, "SetUconfig offset %x data %x\n", + pkt->offset, pkt->data); + // SET_UCONFIG_REG_START and pkt->offset are dword addresses uint32_t reg_addr = (PACKET3_SET_UCONFIG_REG_START + pkt->offset) * 4; + // Additional CPs respond to addresses 0x40000 apart. + reg_addr += 0x40000 * getIpId(); gpuDevice->setRegVal(reg_addr, pkt->data); decodeNext(q); @@ -851,7 +856,7 @@ PM4PacketProcessor::writeMMIO(PacketPtr pkt, Addr mmio_offset) break; case mmCP_HQD_PQ_DOORBELL_CONTROL: setHqdPqDoorbellCtrl(pkt->getLE()); - gpuDevice->setDoorbellType(getKiqDoorbellOffset(), Compute); + gpuDevice->setDoorbellType(getKiqDoorbellOffset(), Compute, getIpId()); break; case mmCP_HQD_PQ_RPTR: setHqdPqPtr(pkt->getLE()); @@ -913,7 +918,7 @@ PM4PacketProcessor::writeMMIO(PacketPtr pkt, Addr mmio_offset) break; case mmCP_RB_DOORBELL_CONTROL: setRbDoorbellCntrl(pkt->getLE()); - gpuDevice->setDoorbellType(getPqDoorbellOffset(), Gfx); + gpuDevice->setDoorbellType(getPqDoorbellOffset(), Gfx, getIpId()); break; case mmCP_RB_DOORBELL_RANGE_LOWER: setRbDoorbellRangeLo(pkt->getLE()); diff --git a/src/dev/amdgpu/pm4_packet_processor.hh b/src/dev/amdgpu/pm4_packet_processor.hh index 4782e70829..82c3c2716f 100644 --- a/src/dev/amdgpu/pm4_packet_processor.hh +++ b/src/dev/amdgpu/pm4_packet_processor.hh @@ -63,6 +63,10 @@ class PM4PacketProcessor : public DmaVirtDevice std::unordered_map queues; /* A map of PM4 queues based on doorbell offset */ std::unordered_map queuesMap; + + int _ipId; + AddrRange _mmioRange; + public: PM4PacketProcessor(const PM4PacketProcessorParams &p); @@ -188,6 +192,9 @@ class PM4PacketProcessor : public DmaVirtDevice void setRbDoorbellCntrl(uint32_t data); void setRbDoorbellRangeLo(uint32_t data); void setRbDoorbellRangeHi(uint32_t data); + + int getIpId() const { return _ipId; } + AddrRange getMMIORange() const { return _mmioRange; } }; } // namespace gem5