diff --git a/configs/example/gpufs/runfs.py b/configs/example/gpufs/runfs.py index fed155bc44..866fa89822 100644 --- a/configs/example/gpufs/runfs.py +++ b/configs/example/gpufs/runfs.py @@ -134,9 +134,9 @@ def addRunFSOptions(parser): parser.add_argument( "--gpu-device", default="Vega10", - choices=["Vega10", "MI100", "MI200"], - help="GPU model to run: Vega10 (gfx900), MI100 (gfx908), or " - "MI200 (gfx90a)", + choices=["Vega10", "MI100", "MI200", "MI300X"], + help="GPU model to run: Vega10 (gfx900), MI100 (gfx908), MI200 " + "(gfx90a), or MI300X (gfx942).", ) parser.add_argument( diff --git a/configs/example/gpufs/system/amdgpu.py b/configs/example/gpufs/system/amdgpu.py index 0813759e2a..55937cd255 100644 --- a/configs/example/gpufs/system/amdgpu.py +++ b/configs/example/gpufs/system/amdgpu.py @@ -191,10 +191,14 @@ def connectGPU(system, args): system.pc.south_bridge.gpu.DeviceID = 0x740F system.pc.south_bridge.gpu.SubsystemVendorID = 0x1002 system.pc.south_bridge.gpu.SubsystemID = 0x0C34 + elif args.gpu_device == "MI300X": + system.pc.south_bridge.gpu.DeviceID = 0x740F + system.pc.south_bridge.gpu.SubsystemVendorID = 0x1002 + system.pc.south_bridge.gpu.SubsystemID = 0x0C34 elif args.gpu_device == "Vega10": system.pc.south_bridge.gpu.DeviceID = 0x6863 else: - panic(f"Unknown GPU device: {args.gpu_device}") + m5.util.panic(f"Unknown GPU device: {args.gpu_device}") # Use the gem5 default of 0x280 OR'd with 0x10 which tells Linux there is # a PCI capabilities list to travse. diff --git a/configs/example/gpufs/system/system.py b/configs/example/gpufs/system/system.py index 7c596f0ccf..1322650964 100644 --- a/configs/example/gpufs/system/system.py +++ b/configs/example/gpufs/system/system.py @@ -161,7 +161,7 @@ def makeGpuFSSystem(args): 0x7D000, ] sdma_sizes = [0x1000] * 8 - elif args.gpu_device == "MI200": + elif args.gpu_device == "MI200" or args.gpu_device == "MI300X": num_sdmas = 5 sdma_bases = [ 0x4980, diff --git a/src/dev/amdgpu/amdgpu_device.cc b/src/dev/amdgpu/amdgpu_device.cc index 6bb5f9c2c5..b3a91830fe 100644 --- a/src/dev/amdgpu/amdgpu_device.cc +++ b/src/dev/amdgpu/amdgpu_device.cc @@ -86,6 +86,8 @@ AMDGPUDevice::AMDGPUDevice(const AMDGPUDeviceParams &p) gfx_version = GfxVersion::gfx908; } else if (p.device_name == "MI200") { gfx_version = GfxVersion::gfx90a; + } else if (p.device_name == "MI300X") { + gfx_version = GfxVersion::gfx942; } else { panic("Unknown GPU device %s\n", p.device_name); } @@ -124,7 +126,8 @@ AMDGPUDevice::AMDGPUDevice(const AMDGPUDeviceParams &p) sdmaFunc.insert({0x10b, &SDMAEngine::setPageDoorbellOffsetLo}); sdmaFunc.insert({0xe0, &SDMAEngine::setPageSize}); sdmaFunc.insert({0x113, &SDMAEngine::setPageWptrLo}); - } else if (p.device_name == "MI100" || p.device_name == "MI200") { + } else if (p.device_name == "MI100" || p.device_name == "MI200" + || p.device_name == "MI300X") { sdmaFunc.insert({0xd9, &SDMAEngine::setPageBaseLo}); sdmaFunc.insert({0xe1, &SDMAEngine::setPageRptrLo}); sdmaFunc.insert({0xe0, &SDMAEngine::setPageRptrHi}); @@ -195,6 +198,10 @@ AMDGPUDevice::AMDGPUDevice(const AMDGPUDeviceParams &p) setRegVal(MI200_FB_LOCATION_BASE, mmhubBase >> 24); setRegVal(MI200_FB_LOCATION_TOP, mmhubTop >> 24); setRegVal(MI200_MEM_SIZE_REG, mem_size); + } else if (p.device_name == "MI300X") { + setRegVal(MI200_FB_LOCATION_BASE, mmhubBase >> 24); + setRegVal(MI200_FB_LOCATION_TOP, mmhubTop >> 24); + setRegVal(MI200_MEM_SIZE_REG, mem_size); } else { panic("Unknown GPU device %s\n", p.device_name); } diff --git a/src/dev/amdgpu/pm4_defines.hh b/src/dev/amdgpu/pm4_defines.hh index a303f8ef84..d00dc3730d 100644 --- a/src/dev/amdgpu/pm4_defines.hh +++ b/src/dev/amdgpu/pm4_defines.hh @@ -328,8 +328,8 @@ typedef struct GEM5_PACKED }; uint64_t completionSignal; }; -} PM4MapProcessMI200; -static_assert(sizeof(PM4MapProcessMI200) == 80); +} PM4MapProcessV2; +static_assert(sizeof(PM4MapProcessV2) == 80); typedef struct GEM5_PACKED { diff --git a/src/dev/amdgpu/pm4_packet_processor.cc b/src/dev/amdgpu/pm4_packet_processor.cc index a921942678..9a8ba13914 100644 --- a/src/dev/amdgpu/pm4_packet_processor.cc +++ b/src/dev/amdgpu/pm4_packet_processor.cc @@ -290,18 +290,19 @@ PM4PacketProcessor::decodeHeader(PM4Queue *q, PM4Header header) dmaBuffer); } break; case IT_MAP_PROCESS: { - if (gpuDevice->getGfxVersion() == GfxVersion::gfx90a) { - dmaBuffer = new PM4MapProcessMI200(); + if (gpuDevice->getGfxVersion() == GfxVersion::gfx90a || + gpuDevice->getGfxVersion() == GfxVersion::gfx942) { + dmaBuffer = new PM4MapProcessV2(); cb = new DmaVirtCallback( [ = ] (const uint64_t &) - { mapProcessGfx90a(q, (PM4MapProcessMI200 *)dmaBuffer); }); - dmaReadVirt(getGARTAddr(q->rptr()), sizeof(PM4MapProcessMI200), + { mapProcessV2(q, (PM4MapProcessV2 *)dmaBuffer); }); + dmaReadVirt(getGARTAddr(q->rptr()), sizeof(PM4MapProcessV2), cb, dmaBuffer); } else { dmaBuffer = new PM4MapProcess(); cb = new DmaVirtCallback( [ = ] (const uint64_t &) - { mapProcessGfx9(q, (PM4MapProcess *)dmaBuffer); }); + { mapProcessV1(q, (PM4MapProcess *)dmaBuffer); }); dmaReadVirt(getGARTAddr(q->rptr()), sizeof(PM4MapProcess), cb, dmaBuffer); } @@ -701,7 +702,7 @@ PM4PacketProcessor::mapProcess(uint32_t pasid, uint64_t ptBase, } void -PM4PacketProcessor::mapProcessGfx9(PM4Queue *q, PM4MapProcess *pkt) +PM4PacketProcessor::mapProcessV1(PM4Queue *q, PM4MapProcess *pkt) { q->incRptr(sizeof(PM4MapProcess)); @@ -716,9 +717,9 @@ PM4PacketProcessor::mapProcessGfx9(PM4Queue *q, PM4MapProcess *pkt) } void -PM4PacketProcessor::mapProcessGfx90a(PM4Queue *q, PM4MapProcessMI200 *pkt) +PM4PacketProcessor::mapProcessV2(PM4Queue *q, PM4MapProcessV2 *pkt) { - q->incRptr(sizeof(PM4MapProcessMI200)); + q->incRptr(sizeof(PM4MapProcessV2)); DPRINTF(PM4PacketProcessor, "PM4 map_process pasid: %p quantum: " "%d pt: %p signal: %p\n", pkt->pasid, pkt->processQuantum, diff --git a/src/dev/amdgpu/pm4_packet_processor.hh b/src/dev/amdgpu/pm4_packet_processor.hh index 82c3c2716f..71271415fd 100644 --- a/src/dev/amdgpu/pm4_packet_processor.hh +++ b/src/dev/amdgpu/pm4_packet_processor.hh @@ -146,8 +146,8 @@ class PM4PacketProcessor : public DmaVirtDevice void unmapQueues(PM4Queue *q, PM4UnmapQueues *pkt); void doneMQDWrite(Addr mqdAddr, Addr addr); void mapProcess(uint32_t pasid, uint64_t ptBase, uint32_t shMemBases); - void mapProcessGfx9(PM4Queue *q, PM4MapProcess *pkt); - void mapProcessGfx90a(PM4Queue *q, PM4MapProcessMI200 *pkt); + void mapProcessV1(PM4Queue *q, PM4MapProcess *pkt); + void mapProcessV2(PM4Queue *q, PM4MapProcessV2 *pkt); void processMQD(PM4MapQueues *pkt, PM4Queue *q, Addr addr, QueueDesc *mqd, uint16_t vmid); void processSDMAMQD(PM4MapQueues *pkt, PM4Queue *q, Addr addr, diff --git a/src/gpu-compute/GPU.py b/src/gpu-compute/GPU.py index 41ff9e7893..8cb40f1c87 100644 --- a/src/gpu-compute/GPU.py +++ b/src/gpu-compute/GPU.py @@ -45,7 +45,7 @@ class PrefetchType(Enum): class GfxVersion(ScopedEnum): - vals = ["gfx900", "gfx902", "gfx908", "gfx90a"] + vals = ["gfx900", "gfx902", "gfx908", "gfx90a", "gfx942"] class PoolManager(SimObject): diff --git a/src/gpu-compute/hsa_queue_entry.hh b/src/gpu-compute/hsa_queue_entry.hh index f015b091fc..44de1a8d32 100644 --- a/src/gpu-compute/hsa_queue_entry.hh +++ b/src/gpu-compute/hsa_queue_entry.hh @@ -94,9 +94,10 @@ class HSAQueueEntry // LLVM docs: https://www.llvm.org/docs/AMDGPUUsage.html // #code-object-v3-kernel-descriptor // - // Currently, the only supported gfx version in gem5 that computes - // VGPR count differently is gfx90a. - if (gfx_version == GfxVersion::gfx90a) { + // Currently, the only supported gfx versions in gem5 that compute + // VGPR count differently are gfx90a and gfx942. + if (gfx_version == GfxVersion::gfx90a || + gfx_version == GfxVersion::gfx942) { numVgprs = (akc->granulated_workitem_vgpr_count + 1) * 8; } else { numVgprs = (akc->granulated_workitem_vgpr_count + 1) * 4; @@ -107,7 +108,8 @@ class HSAQueueEntry if (gfx_version == GfxVersion::gfx900 || gfx_version == GfxVersion::gfx902 || gfx_version == GfxVersion::gfx908 || - gfx_version == GfxVersion::gfx90a) { + gfx_version == GfxVersion::gfx90a || + gfx_version == GfxVersion::gfx942) { numSgprs = ((akc->granulated_wavefront_sgpr_count + 1) * 16)/2; } else { panic("Saw unknown gfx version setting up GPR counts\n"); diff --git a/src/gpu-compute/wavefront.cc b/src/gpu-compute/wavefront.cc index 98d882b20e..b5298bad4c 100644 --- a/src/gpu-compute/wavefront.cc +++ b/src/gpu-compute/wavefront.cc @@ -442,7 +442,8 @@ Wavefront::initRegState(HSAQueueEntry *task, int wgSizeInWorkItems) // Default to false and set to true for gem5 supported ISAs. bool packed_work_item_id = false; - if (task->gfxVersion() == GfxVersion::gfx90a) { + if (task->gfxVersion() == GfxVersion::gfx90a || + task->gfxVersion() == GfxVersion::gfx942) { packed_work_item_id = true; }