diff --git a/configs/example/gpufs/system/system.py b/configs/example/gpufs/system/system.py index 93f0194efb..90c5c01091 100644 --- a/configs/example/gpufs/system/system.py +++ b/configs/example/gpufs/system/system.py @@ -129,15 +129,45 @@ def makeGpuFSSystem(args): device_ih = AMDGPUInterruptHandler() system.pc.south_bridge.gpu.device_ih = device_ih - # Setup the SDMA engines - sdma0_pt_walker = VegaPagetableWalker() - sdma1_pt_walker = VegaPagetableWalker() + # Setup the SDMA engines depending on device. The MMIO base addresses + # can be found in the driver code under: + # include/asic_reg/sdmaX/sdmaX_Y_Z_offset.h + num_sdmas = 2 + sdma_bases = [] + sdma_sizes = [] + if args.gpu_device == "Vega10": + num_sdmas = 2 + sdma_bases = [0x4980, 0x5180] + sdma_sizes = [0x800] * 2 + elif args.gpu_device == "MI100": + num_sdmas = 8 + sdma_bases = [ + 0x4980, + 0x6180, + 0x78000, + 0x79000, + 0x7A000, + 0x7B000, + 0x7C000, + 0x7D000, + ] + sdma_sizes = [0x1000] * 8 + else: + m5.util.panic(f"Unknown GPU device {args.gpu_device}") - sdma0 = SDMAEngine(walker=sdma0_pt_walker) - sdma1 = SDMAEngine(walker=sdma1_pt_walker) + sdma_pt_walkers = [] + sdma_engines = [] + for sdma_idx in range(num_sdmas): + sdma_pt_walker = VegaPagetableWalker() + sdma_engine = SDMAEngine( + walker=sdma_pt_walker, + mmio_base=sdma_bases[sdma_idx], + mmio_size=sdma_sizes[sdma_idx], + ) + sdma_pt_walkers.append(sdma_pt_walker) + sdma_engines.append(sdma_engine) - system.pc.south_bridge.gpu.sdma0 = sdma0 - system.pc.south_bridge.gpu.sdma1 = sdma1 + system.pc.south_bridge.gpu.sdmas = sdma_engines # Setup PM4 packet processor pm4_pkt_proc = PM4PacketProcessor() @@ -155,22 +185,22 @@ def makeGpuFSSystem(args): system._dma_ports.append(gpu_hsapp) system._dma_ports.append(gpu_cmd_proc) system._dma_ports.append(system.pc.south_bridge.gpu) - system._dma_ports.append(sdma0) - system._dma_ports.append(sdma1) + for sdma in sdma_engines: + system._dma_ports.append(sdma) system._dma_ports.append(device_ih) system._dma_ports.append(pm4_pkt_proc) system._dma_ports.append(system_hub) system._dma_ports.append(gpu_mem_mgr) system._dma_ports.append(hsapp_pt_walker) system._dma_ports.append(cp_pt_walker) - system._dma_ports.append(sdma0_pt_walker) - system._dma_ports.append(sdma1_pt_walker) + for sdma_pt_walker in sdma_pt_walkers: + system._dma_ports.append(sdma_pt_walker) gpu_hsapp.pio = system.iobus.mem_side_ports gpu_cmd_proc.pio = system.iobus.mem_side_ports system.pc.south_bridge.gpu.pio = system.iobus.mem_side_ports - sdma0.pio = system.iobus.mem_side_ports - sdma1.pio = system.iobus.mem_side_ports + for sdma in sdma_engines: + sdma.pio = system.iobus.mem_side_ports device_ih.pio = system.iobus.mem_side_ports pm4_pkt_proc.pio = system.iobus.mem_side_ports system_hub.pio = system.iobus.mem_side_ports diff --git a/src/dev/amdgpu/AMDGPU.py b/src/dev/amdgpu/AMDGPU.py index 1e786726c9..616c501c63 100644 --- a/src/dev/amdgpu/AMDGPU.py +++ b/src/dev/amdgpu/AMDGPU.py @@ -79,11 +79,9 @@ class AMDGPUDevice(PciDevice): False, "Take a checkpoint before the device begins sending MMIOs" ) - # Specific to Vega10: Vega10 has two SDMA engines these do not have any - # assigned function and are referenced by ID so they are given the generic - # names sdma0, sdma1, ... sdmaN. - sdma0 = Param.SDMAEngine("SDMA Engine 0") - sdma1 = Param.SDMAEngine("SDMA Engine 1") + # SDMA engines. There are a different number depending on device, + # therefore an array is used. + sdmas = VectorParam.SDMAEngine("All SDMA Engines") # The cp is needed here to handle certain packets the device may receive. # The config script should not create a new cp here but rather assign the @@ -100,6 +98,9 @@ class SDMAEngine(DmaVirtDevice): cxx_header = "dev/amdgpu/sdma_engine.hh" cxx_class = "gem5::SDMAEngine" + mmio_base = Param.Addr(0x0, "Base MMIO Address") + mmio_size = Param.Addr(0x800, "Size of MMIO range") + gpu_device = Param.AMDGPUDevice(NULL, "GPU Controller") walker = Param.VegaPagetableWalker("Page table walker") diff --git a/src/dev/amdgpu/amdgpu_device.cc b/src/dev/amdgpu/amdgpu_device.cc index 7e6304afa1..2acf1f4af3 100644 --- a/src/dev/amdgpu/amdgpu_device.cc +++ b/src/dev/amdgpu/amdgpu_device.cc @@ -53,7 +53,7 @@ namespace gem5 AMDGPUDevice::AMDGPUDevice(const AMDGPUDeviceParams &p) : PciDevice(p), gpuMemMgr(p.memory_manager), deviceIH(p.device_ih), - sdma0(p.sdma0), sdma1(p.sdma1), pm4PktProc(p.pm4_pkt_proc), cp(p.cp), + pm4PktProc(p.pm4_pkt_proc), cp(p.cp), checkpoint_before_mmios(p.checkpoint_before_mmios), init_interrupt_count(0), _lastVMID(0), deviceMem(name() + ".deviceMem", p.memories, false, "", false) @@ -84,10 +84,47 @@ AMDGPUDevice::AMDGPUDevice(const AMDGPUDeviceParams &p) mmioReader.readMMIOTrace(p.trace_file); } - sdma0->setGPUDevice(this); - sdma0->setId(0); - sdma1->setGPUDevice(this); - sdma1->setId(1); + int sdma_id = 0; + for (auto& s : p.sdmas) { + s->setGPUDevice(this); + s->setId(sdma_id); + sdmaIds.insert({sdma_id, s}); + sdmaMmios.insert({sdma_id, + RangeSize(s->getMmioBase(), s->getMmioSize())}); + DPRINTF(AMDGPUDevice, "SDMA%d has MMIO range %s\n", sdma_id, + sdmaMmios[sdma_id].to_string().c_str()); + sdma_id++; + } + + // Map SDMA MMIO addresses to functions + sdmaFunc.insert({0x81, &SDMAEngine::setGfxBaseLo}); + sdmaFunc.insert({0x82, &SDMAEngine::setGfxBaseHi}); + sdmaFunc.insert({0x88, &SDMAEngine::setGfxRptrHi}); + sdmaFunc.insert({0x89, &SDMAEngine::setGfxRptrLo}); + sdmaFunc.insert({0x92, &SDMAEngine::setGfxDoorbellLo}); + sdmaFunc.insert({0xab, &SDMAEngine::setGfxDoorbellOffsetLo}); + sdmaFunc.insert({0x80, &SDMAEngine::setGfxSize}); + sdmaFunc.insert({0xb2, &SDMAEngine::setGfxWptrLo}); + sdmaFunc.insert({0xb3, &SDMAEngine::setGfxWptrHi}); + if (p.device_name == "Vega10") { + sdmaFunc.insert({0xe1, &SDMAEngine::setPageBaseLo}); + sdmaFunc.insert({0xe9, &SDMAEngine::setPageRptrLo}); + sdmaFunc.insert({0xe8, &SDMAEngine::setPageRptrHi}); + sdmaFunc.insert({0xf2, &SDMAEngine::setPageDoorbellLo}); + sdmaFunc.insert({0x10b, &SDMAEngine::setPageDoorbellOffsetLo}); + sdmaFunc.insert({0xe0, &SDMAEngine::setPageSize}); + sdmaFunc.insert({0x113, &SDMAEngine::setPageWptrLo}); + } else if (p.device_name == "MI100") { + sdmaFunc.insert({0xd9, &SDMAEngine::setPageBaseLo}); + sdmaFunc.insert({0xe1, &SDMAEngine::setPageRptrLo}); + sdmaFunc.insert({0xe0, &SDMAEngine::setPageRptrHi}); + sdmaFunc.insert({0xea, &SDMAEngine::setPageDoorbellLo}); + sdmaFunc.insert({0xd8, &SDMAEngine::setPageDoorbellOffsetLo}); + sdmaFunc.insert({0x10b, &SDMAEngine::setPageWptrLo}); + } else { + panic("Unknown GPU device %s\n", p.device_name); + } + deviceIH->setGPUDevice(this); pm4PktProc->setGPUDevice(this); cp->hsaPacketProc().setGPUDevice(this); @@ -351,15 +388,25 @@ AMDGPUDevice::writeMMIO(PacketPtr pkt, Addr offset) DPRINTF(AMDGPUDevice, "Wrote MMIO %#lx\n", offset); + // Check SDMA functions first, then fallback to switch statement + for (int idx = 0; idx < sdmaIds.size(); ++idx) { + if (sdmaMmios[idx].contains(offset)) { + Addr sdma_offset = (offset - sdmaMmios[idx].start()) >> 2; + if (sdmaFunc.count(sdma_offset)) { + DPRINTF(AMDGPUDevice, "Calling SDMA%d MMIO function %lx\n", + idx, sdma_offset); + sdmaFuncPtr mptr = sdmaFunc[sdma_offset]; + (getSDMAById(idx)->*mptr)(pkt->getLE()); + } else { + DPRINTF(AMDGPUDevice, "Unknown SDMA%d MMIO: %#lx\n", idx, + sdma_offset); + } + + return; + } + } + switch (aperture) { - /* Write a register to the first System DMA. */ - case SDMA0_BASE: - sdma0->writeMMIO(pkt, aperture_offset >> SDMA_OFFSET_SHIFT); - break; - /* Write a register to the second System DMA. */ - case SDMA1_BASE: - sdma1->writeMMIO(pkt, aperture_offset >> SDMA_OFFSET_SHIFT); - break; /* Write a general register to the graphics register bus manager. */ case GRBM_BASE: gpuvm.writeMMIO(pkt, aperture_offset >> GRBM_OFFSET_SHIFT); @@ -483,19 +530,9 @@ AMDGPUDevice::getSDMAById(int id) * PM4 packets selected SDMAs using an integer ID. This method simply maps * the integer ID to a pointer to the SDMA and checks for invalid IDs. */ - switch (id) { - case 0: - return sdma0; - break; - case 1: - return sdma1; - break; - default: - panic("No SDMA with id %d\n", id); - break; - } + assert(sdmaIds.count(id)); - return nullptr; + return sdmaIds[id]; } SDMAEngine* @@ -549,7 +586,7 @@ AMDGPUDevice::serialize(CheckpointOut &cp) const idx = 0; for (auto & it : sdmaEngs) { sdma_engs_offset[idx] = it.first; - sdma_engs[idx] = it.second == sdma0 ? 0 : 1; + sdma_engs[idx] = idx; ++idx; } @@ -620,7 +657,8 @@ AMDGPUDevice::unserialize(CheckpointIn &cp) UNSERIALIZE_ARRAY(sdma_engs, sizeof(sdma_engs)/sizeof(sdma_engs[0])); for (int idx = 0; idx < sdma_engs_size; ++idx) { - SDMAEngine *sdma = sdma_engs[idx] == 0 ? sdma0 : sdma1; + assert(sdmaIds.count(idx)); + SDMAEngine *sdma = sdmaIds[idx]; sdmaEngs.insert(std::make_pair(sdma_engs_offset[idx], sdma)); } } @@ -669,8 +707,9 @@ AMDGPUDevice::deallocateAllQueues() idMap.erase(idMap.begin(), idMap.end()); usedVMIDs.erase(usedVMIDs.begin(), usedVMIDs.end()); - sdma0->deallocateRLCQueues(); - sdma1->deallocateRLCQueues(); + for (auto& it : sdmaEngs) { + it.second->deallocateRLCQueues(); + } } void diff --git a/src/dev/amdgpu/amdgpu_device.hh b/src/dev/amdgpu/amdgpu_device.hh index b64067a158..0e58f29038 100644 --- a/src/dev/amdgpu/amdgpu_device.hh +++ b/src/dev/amdgpu/amdgpu_device.hh @@ -109,12 +109,19 @@ class AMDGPUDevice : public PciDevice AMDGPUMemoryManager *gpuMemMgr; AMDGPUInterruptHandler *deviceIH; AMDGPUVM gpuvm; - SDMAEngine *sdma0; - SDMAEngine *sdma1; - std::unordered_map sdmaEngs; PM4PacketProcessor *pm4PktProc; GPUCommandProcessor *cp; + // SDMAs mapped by doorbell offset + std::unordered_map sdmaEngs; + // SDMAs mapped by ID + std::unordered_map sdmaIds; + // SDMA ID to MMIO range + std::unordered_map sdmaMmios; + // SDMA ID to function + typedef void (SDMAEngine::*sdmaFuncPtr)(uint32_t); + std::unordered_map sdmaFunc; + /** * Initial checkpoint support variables. */ diff --git a/src/dev/amdgpu/interrupt_handler.cc b/src/dev/amdgpu/interrupt_handler.cc index a771976d98..6f277a1618 100644 --- a/src/dev/amdgpu/interrupt_handler.cc +++ b/src/dev/amdgpu/interrupt_handler.cc @@ -80,6 +80,12 @@ AMDGPUInterruptHandler::prepareInterruptCookie(ContextID cntxt_id, assert(client_id == SOC15_IH_CLIENTID_RLC || client_id == SOC15_IH_CLIENTID_SDMA0 || client_id == SOC15_IH_CLIENTID_SDMA1 || + client_id == SOC15_IH_CLIENTID_SDMA2 || + client_id == SOC15_IH_CLIENTID_SDMA3 || + client_id == SOC15_IH_CLIENTID_SDMA4 || + client_id == SOC15_IH_CLIENTID_SDMA5 || + client_id == SOC15_IH_CLIENTID_SDMA6 || + client_id == SOC15_IH_CLIENTID_SDMA7 || client_id == SOC15_IH_CLIENTID_GRBM_CP); assert(source_id == CP_EOP || source_id == TRAP_ID); diff --git a/src/dev/amdgpu/interrupt_handler.hh b/src/dev/amdgpu/interrupt_handler.hh index ab8a853074..9b80e081cc 100644 --- a/src/dev/amdgpu/interrupt_handler.hh +++ b/src/dev/amdgpu/interrupt_handler.hh @@ -58,6 +58,12 @@ enum soc15_ih_clientid SOC15_IH_CLIENTID_RLC = 0x07, SOC15_IH_CLIENTID_SDMA0 = 0x08, SOC15_IH_CLIENTID_SDMA1 = 0x09, + SOC15_IH_CLIENTID_SDMA2 = 0x01, + SOC15_IH_CLIENTID_SDMA3 = 0x04, + SOC15_IH_CLIENTID_SDMA4 = 0x05, + SOC15_IH_CLIENTID_SDMA5 = 0x11, + SOC15_IH_CLIENTID_SDMA6 = 0x13, + SOC15_IH_CLIENTID_SDMA7 = 0x18, SOC15_IH_CLIENTID_GRBM_CP = 0x14 }; diff --git a/src/dev/amdgpu/sdma_engine.cc b/src/dev/amdgpu/sdma_engine.cc index 736df45d9d..e99d694634 100644 --- a/src/dev/amdgpu/sdma_engine.cc +++ b/src/dev/amdgpu/sdma_engine.cc @@ -49,7 +49,8 @@ SDMAEngine::SDMAEngine(const SDMAEngineParams &p) : DmaVirtDevice(p), id(0), gfxBase(0), gfxRptr(0), gfxDoorbell(0), gfxDoorbellOffset(0), gfxWptr(0), pageBase(0), pageRptr(0), pageDoorbell(0), pageDoorbellOffset(0), - pageWptr(0), gpuDevice(nullptr), walker(p.walker) + pageWptr(0), gpuDevice(nullptr), walker(p.walker), + mmioBase(p.mmio_base), mmioSize(p.mmio_size) { gfx.ib(&gfxIb); gfxIb.parent(&gfx); @@ -87,6 +88,18 @@ SDMAEngine::getIHClientId() return SOC15_IH_CLIENTID_SDMA0; case 1: return SOC15_IH_CLIENTID_SDMA1; + case 2: + return SOC15_IH_CLIENTID_SDMA2; + case 3: + return SOC15_IH_CLIENTID_SDMA3; + case 4: + return SOC15_IH_CLIENTID_SDMA4; + case 5: + return SOC15_IH_CLIENTID_SDMA5; + case 6: + return SOC15_IH_CLIENTID_SDMA6; + case 7: + return SOC15_IH_CLIENTID_SDMA7; default: panic("Unknown SDMA id"); } @@ -1240,6 +1253,10 @@ SDMAEngine::setGfxDoorbellOffsetLo(uint32_t data) { gfxDoorbellOffset = insertBits(gfxDoorbellOffset, 31, 0, 0); gfxDoorbellOffset |= data; + if (bits(gfxDoorbell, 28, 28)) { + gpuDevice->setDoorbellType(gfxDoorbellOffset, QueueType::SDMAGfx); + gpuDevice->setSDMAEngine(gfxDoorbellOffset, this); + } } void @@ -1250,9 +1267,11 @@ SDMAEngine::setGfxDoorbellOffsetHi(uint32_t data) } void -SDMAEngine::setGfxSize(uint64_t data) +SDMAEngine::setGfxSize(uint32_t data) { - gfx.size(data); + uint32_t rb_size = bits(data, 6, 1); + assert(rb_size >= 6 && rb_size <= 62); + gfx.size(1 << (rb_size + 2)); } void @@ -1320,6 +1339,10 @@ SDMAEngine::setPageDoorbellOffsetLo(uint32_t data) { pageDoorbellOffset = insertBits(pageDoorbellOffset, 31, 0, 0); pageDoorbellOffset |= data; + if (bits(pageDoorbell, 28, 28)) { + gpuDevice->setDoorbellType(pageDoorbellOffset, QueueType::SDMAPage); + gpuDevice->setSDMAEngine(pageDoorbellOffset, this); + } } void @@ -1330,9 +1353,11 @@ SDMAEngine::setPageDoorbellOffsetHi(uint32_t data) } void -SDMAEngine::setPageSize(uint64_t data) +SDMAEngine::setPageSize(uint32_t data) { - page.size(data); + uint32_t rb_size = bits(data, 6, 1); + assert(rb_size >= 6 && rb_size <= 62); + page.size(1 << (rb_size + 2)); } void diff --git a/src/dev/amdgpu/sdma_engine.hh b/src/dev/amdgpu/sdma_engine.hh index 27c169193b..1e4f965920 100644 --- a/src/dev/amdgpu/sdma_engine.hh +++ b/src/dev/amdgpu/sdma_engine.hh @@ -156,6 +156,9 @@ class SDMAEngine : public DmaVirtDevice void processRLC0(Addr wptrOffset); void processRLC1(Addr wptrOffset); + Addr mmioBase = 0; + Addr mmioSize = 0; + public: SDMAEngine(const SDMAEngineParams &p); @@ -242,6 +245,14 @@ class SDMAEngine : public DmaVirtDevice void atomicDone(SDMAQueue *q, sdmaAtomicHeader *header, sdmaAtomic *pkt, uint64_t *dmaBuffer); + /** + * Methods for getting SDMA MMIO base address and size. These are set by + * the python configuration depending on device to allow for flexible base + * addresses depending on what GPU is being simulated. + */ + Addr getMmioBase() { return mmioBase; } + Addr getMmioSize() { return mmioSize; } + /** * Methods for getting the values of SDMA MMIO registers. */ @@ -269,7 +280,7 @@ class SDMAEngine : public DmaVirtDevice void setGfxDoorbellHi(uint32_t data); void setGfxDoorbellOffsetLo(uint32_t data); void setGfxDoorbellOffsetHi(uint32_t data); - void setGfxSize(uint64_t data); + void setGfxSize(uint32_t data); void setGfxWptrLo(uint32_t data); void setGfxWptrHi(uint32_t data); void setPageBaseLo(uint32_t data); @@ -280,7 +291,7 @@ class SDMAEngine : public DmaVirtDevice void setPageDoorbellHi(uint32_t data); void setPageDoorbellOffsetLo(uint32_t data); void setPageDoorbellOffsetHi(uint32_t data); - void setPageSize(uint64_t data); + void setPageSize(uint32_t data); void setPageWptrLo(uint32_t data); void setPageWptrHi(uint32_t data);