dev-amdgpu,configs,gpu-compute: Add gfx942 version

This is the version for MI300. For the most part, it is the same as
MI200 with the exception of architected flat scratch (not yet
implemented in gem5) and therefore a new version enum is required.

Change-Id: Id18cd7b57c4eebd467c010a3f61e3117beb8d58a
This commit is contained in:
Matthew Poremba
2024-05-15 10:49:05 -07:00
parent 65976e4c6d
commit 8be5ce6fc9
10 changed files with 39 additions and 24 deletions

View File

@@ -134,9 +134,9 @@ def addRunFSOptions(parser):
parser.add_argument(
"--gpu-device",
default="Vega10",
choices=["Vega10", "MI100", "MI200"],
help="GPU model to run: Vega10 (gfx900), MI100 (gfx908), or "
"MI200 (gfx90a)",
choices=["Vega10", "MI100", "MI200", "MI300X"],
help="GPU model to run: Vega10 (gfx900), MI100 (gfx908), MI200 "
"(gfx90a), or MI300X (gfx942).",
)
parser.add_argument(

View File

@@ -191,10 +191,14 @@ def connectGPU(system, args):
system.pc.south_bridge.gpu.DeviceID = 0x740F
system.pc.south_bridge.gpu.SubsystemVendorID = 0x1002
system.pc.south_bridge.gpu.SubsystemID = 0x0C34
elif args.gpu_device == "MI300X":
system.pc.south_bridge.gpu.DeviceID = 0x740F
system.pc.south_bridge.gpu.SubsystemVendorID = 0x1002
system.pc.south_bridge.gpu.SubsystemID = 0x0C34
elif args.gpu_device == "Vega10":
system.pc.south_bridge.gpu.DeviceID = 0x6863
else:
panic(f"Unknown GPU device: {args.gpu_device}")
m5.util.panic(f"Unknown GPU device: {args.gpu_device}")
# Use the gem5 default of 0x280 OR'd with 0x10 which tells Linux there is
# a PCI capabilities list to travse.

View File

@@ -161,7 +161,7 @@ def makeGpuFSSystem(args):
0x7D000,
]
sdma_sizes = [0x1000] * 8
elif args.gpu_device == "MI200":
elif args.gpu_device == "MI200" or args.gpu_device == "MI300X":
num_sdmas = 5
sdma_bases = [
0x4980,

View File

@@ -86,6 +86,8 @@ AMDGPUDevice::AMDGPUDevice(const AMDGPUDeviceParams &p)
gfx_version = GfxVersion::gfx908;
} else if (p.device_name == "MI200") {
gfx_version = GfxVersion::gfx90a;
} else if (p.device_name == "MI300X") {
gfx_version = GfxVersion::gfx942;
} else {
panic("Unknown GPU device %s\n", p.device_name);
}
@@ -124,7 +126,8 @@ AMDGPUDevice::AMDGPUDevice(const AMDGPUDeviceParams &p)
sdmaFunc.insert({0x10b, &SDMAEngine::setPageDoorbellOffsetLo});
sdmaFunc.insert({0xe0, &SDMAEngine::setPageSize});
sdmaFunc.insert({0x113, &SDMAEngine::setPageWptrLo});
} else if (p.device_name == "MI100" || p.device_name == "MI200") {
} else if (p.device_name == "MI100" || p.device_name == "MI200"
|| p.device_name == "MI300X") {
sdmaFunc.insert({0xd9, &SDMAEngine::setPageBaseLo});
sdmaFunc.insert({0xe1, &SDMAEngine::setPageRptrLo});
sdmaFunc.insert({0xe0, &SDMAEngine::setPageRptrHi});
@@ -195,6 +198,10 @@ AMDGPUDevice::AMDGPUDevice(const AMDGPUDeviceParams &p)
setRegVal(MI200_FB_LOCATION_BASE, mmhubBase >> 24);
setRegVal(MI200_FB_LOCATION_TOP, mmhubTop >> 24);
setRegVal(MI200_MEM_SIZE_REG, mem_size);
} else if (p.device_name == "MI300X") {
setRegVal(MI200_FB_LOCATION_BASE, mmhubBase >> 24);
setRegVal(MI200_FB_LOCATION_TOP, mmhubTop >> 24);
setRegVal(MI200_MEM_SIZE_REG, mem_size);
} else {
panic("Unknown GPU device %s\n", p.device_name);
}

View File

@@ -328,8 +328,8 @@ typedef struct GEM5_PACKED
};
uint64_t completionSignal;
};
} PM4MapProcessMI200;
static_assert(sizeof(PM4MapProcessMI200) == 80);
} PM4MapProcessV2;
static_assert(sizeof(PM4MapProcessV2) == 80);
typedef struct GEM5_PACKED
{

View File

@@ -290,18 +290,19 @@ PM4PacketProcessor::decodeHeader(PM4Queue *q, PM4Header header)
dmaBuffer);
} break;
case IT_MAP_PROCESS: {
if (gpuDevice->getGfxVersion() == GfxVersion::gfx90a) {
dmaBuffer = new PM4MapProcessMI200();
if (gpuDevice->getGfxVersion() == GfxVersion::gfx90a ||
gpuDevice->getGfxVersion() == GfxVersion::gfx942) {
dmaBuffer = new PM4MapProcessV2();
cb = new DmaVirtCallback<uint64_t>(
[ = ] (const uint64_t &)
{ mapProcessGfx90a(q, (PM4MapProcessMI200 *)dmaBuffer); });
dmaReadVirt(getGARTAddr(q->rptr()), sizeof(PM4MapProcessMI200),
{ mapProcessV2(q, (PM4MapProcessV2 *)dmaBuffer); });
dmaReadVirt(getGARTAddr(q->rptr()), sizeof(PM4MapProcessV2),
cb, dmaBuffer);
} else {
dmaBuffer = new PM4MapProcess();
cb = new DmaVirtCallback<uint64_t>(
[ = ] (const uint64_t &)
{ mapProcessGfx9(q, (PM4MapProcess *)dmaBuffer); });
{ mapProcessV1(q, (PM4MapProcess *)dmaBuffer); });
dmaReadVirt(getGARTAddr(q->rptr()), sizeof(PM4MapProcess), cb,
dmaBuffer);
}
@@ -701,7 +702,7 @@ PM4PacketProcessor::mapProcess(uint32_t pasid, uint64_t ptBase,
}
void
PM4PacketProcessor::mapProcessGfx9(PM4Queue *q, PM4MapProcess *pkt)
PM4PacketProcessor::mapProcessV1(PM4Queue *q, PM4MapProcess *pkt)
{
q->incRptr(sizeof(PM4MapProcess));
@@ -716,9 +717,9 @@ PM4PacketProcessor::mapProcessGfx9(PM4Queue *q, PM4MapProcess *pkt)
}
void
PM4PacketProcessor::mapProcessGfx90a(PM4Queue *q, PM4MapProcessMI200 *pkt)
PM4PacketProcessor::mapProcessV2(PM4Queue *q, PM4MapProcessV2 *pkt)
{
q->incRptr(sizeof(PM4MapProcessMI200));
q->incRptr(sizeof(PM4MapProcessV2));
DPRINTF(PM4PacketProcessor, "PM4 map_process pasid: %p quantum: "
"%d pt: %p signal: %p\n", pkt->pasid, pkt->processQuantum,

View File

@@ -146,8 +146,8 @@ class PM4PacketProcessor : public DmaVirtDevice
void unmapQueues(PM4Queue *q, PM4UnmapQueues *pkt);
void doneMQDWrite(Addr mqdAddr, Addr addr);
void mapProcess(uint32_t pasid, uint64_t ptBase, uint32_t shMemBases);
void mapProcessGfx9(PM4Queue *q, PM4MapProcess *pkt);
void mapProcessGfx90a(PM4Queue *q, PM4MapProcessMI200 *pkt);
void mapProcessV1(PM4Queue *q, PM4MapProcess *pkt);
void mapProcessV2(PM4Queue *q, PM4MapProcessV2 *pkt);
void processMQD(PM4MapQueues *pkt, PM4Queue *q, Addr addr, QueueDesc *mqd,
uint16_t vmid);
void processSDMAMQD(PM4MapQueues *pkt, PM4Queue *q, Addr addr,

View File

@@ -45,7 +45,7 @@ class PrefetchType(Enum):
class GfxVersion(ScopedEnum):
vals = ["gfx900", "gfx902", "gfx908", "gfx90a"]
vals = ["gfx900", "gfx902", "gfx908", "gfx90a", "gfx942"]
class PoolManager(SimObject):

View File

@@ -94,9 +94,10 @@ class HSAQueueEntry
// LLVM docs: https://www.llvm.org/docs/AMDGPUUsage.html
// #code-object-v3-kernel-descriptor
//
// Currently, the only supported gfx version in gem5 that computes
// VGPR count differently is gfx90a.
if (gfx_version == GfxVersion::gfx90a) {
// Currently, the only supported gfx versions in gem5 that compute
// VGPR count differently are gfx90a and gfx942.
if (gfx_version == GfxVersion::gfx90a ||
gfx_version == GfxVersion::gfx942) {
numVgprs = (akc->granulated_workitem_vgpr_count + 1) * 8;
} else {
numVgprs = (akc->granulated_workitem_vgpr_count + 1) * 4;
@@ -107,7 +108,8 @@ class HSAQueueEntry
if (gfx_version == GfxVersion::gfx900 ||
gfx_version == GfxVersion::gfx902 ||
gfx_version == GfxVersion::gfx908 ||
gfx_version == GfxVersion::gfx90a) {
gfx_version == GfxVersion::gfx90a ||
gfx_version == GfxVersion::gfx942) {
numSgprs = ((akc->granulated_wavefront_sgpr_count + 1) * 16)/2;
} else {
panic("Saw unknown gfx version setting up GPR counts\n");

View File

@@ -442,7 +442,8 @@ Wavefront::initRegState(HSAQueueEntry *task, int wgSizeInWorkItems)
// Default to false and set to true for gem5 supported ISAs.
bool packed_work_item_id = false;
if (task->gfxVersion() == GfxVersion::gfx90a) {
if (task->gfxVersion() == GfxVersion::gfx90a ||
task->gfxVersion() == GfxVersion::gfx942) {
packed_work_item_id = true;
}