dev-amdgpu,configs,gpu-compute: Add gfx942 version
This is the version for MI300. For the most part, it is the same as MI200 with the exception of architected flat scratch (not yet implemented in gem5) and therefore a new version enum is required. Change-Id: Id18cd7b57c4eebd467c010a3f61e3117beb8d58a
This commit is contained in:
@@ -134,9 +134,9 @@ def addRunFSOptions(parser):
|
||||
parser.add_argument(
|
||||
"--gpu-device",
|
||||
default="Vega10",
|
||||
choices=["Vega10", "MI100", "MI200"],
|
||||
help="GPU model to run: Vega10 (gfx900), MI100 (gfx908), or "
|
||||
"MI200 (gfx90a)",
|
||||
choices=["Vega10", "MI100", "MI200", "MI300X"],
|
||||
help="GPU model to run: Vega10 (gfx900), MI100 (gfx908), MI200 "
|
||||
"(gfx90a), or MI300X (gfx942).",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
|
||||
@@ -191,10 +191,14 @@ def connectGPU(system, args):
|
||||
system.pc.south_bridge.gpu.DeviceID = 0x740F
|
||||
system.pc.south_bridge.gpu.SubsystemVendorID = 0x1002
|
||||
system.pc.south_bridge.gpu.SubsystemID = 0x0C34
|
||||
elif args.gpu_device == "MI300X":
|
||||
system.pc.south_bridge.gpu.DeviceID = 0x740F
|
||||
system.pc.south_bridge.gpu.SubsystemVendorID = 0x1002
|
||||
system.pc.south_bridge.gpu.SubsystemID = 0x0C34
|
||||
elif args.gpu_device == "Vega10":
|
||||
system.pc.south_bridge.gpu.DeviceID = 0x6863
|
||||
else:
|
||||
panic(f"Unknown GPU device: {args.gpu_device}")
|
||||
m5.util.panic(f"Unknown GPU device: {args.gpu_device}")
|
||||
|
||||
# Use the gem5 default of 0x280 OR'd with 0x10 which tells Linux there is
|
||||
# a PCI capabilities list to travse.
|
||||
|
||||
@@ -161,7 +161,7 @@ def makeGpuFSSystem(args):
|
||||
0x7D000,
|
||||
]
|
||||
sdma_sizes = [0x1000] * 8
|
||||
elif args.gpu_device == "MI200":
|
||||
elif args.gpu_device == "MI200" or args.gpu_device == "MI300X":
|
||||
num_sdmas = 5
|
||||
sdma_bases = [
|
||||
0x4980,
|
||||
|
||||
@@ -86,6 +86,8 @@ AMDGPUDevice::AMDGPUDevice(const AMDGPUDeviceParams &p)
|
||||
gfx_version = GfxVersion::gfx908;
|
||||
} else if (p.device_name == "MI200") {
|
||||
gfx_version = GfxVersion::gfx90a;
|
||||
} else if (p.device_name == "MI300X") {
|
||||
gfx_version = GfxVersion::gfx942;
|
||||
} else {
|
||||
panic("Unknown GPU device %s\n", p.device_name);
|
||||
}
|
||||
@@ -124,7 +126,8 @@ AMDGPUDevice::AMDGPUDevice(const AMDGPUDeviceParams &p)
|
||||
sdmaFunc.insert({0x10b, &SDMAEngine::setPageDoorbellOffsetLo});
|
||||
sdmaFunc.insert({0xe0, &SDMAEngine::setPageSize});
|
||||
sdmaFunc.insert({0x113, &SDMAEngine::setPageWptrLo});
|
||||
} else if (p.device_name == "MI100" || p.device_name == "MI200") {
|
||||
} else if (p.device_name == "MI100" || p.device_name == "MI200"
|
||||
|| p.device_name == "MI300X") {
|
||||
sdmaFunc.insert({0xd9, &SDMAEngine::setPageBaseLo});
|
||||
sdmaFunc.insert({0xe1, &SDMAEngine::setPageRptrLo});
|
||||
sdmaFunc.insert({0xe0, &SDMAEngine::setPageRptrHi});
|
||||
@@ -195,6 +198,10 @@ AMDGPUDevice::AMDGPUDevice(const AMDGPUDeviceParams &p)
|
||||
setRegVal(MI200_FB_LOCATION_BASE, mmhubBase >> 24);
|
||||
setRegVal(MI200_FB_LOCATION_TOP, mmhubTop >> 24);
|
||||
setRegVal(MI200_MEM_SIZE_REG, mem_size);
|
||||
} else if (p.device_name == "MI300X") {
|
||||
setRegVal(MI200_FB_LOCATION_BASE, mmhubBase >> 24);
|
||||
setRegVal(MI200_FB_LOCATION_TOP, mmhubTop >> 24);
|
||||
setRegVal(MI200_MEM_SIZE_REG, mem_size);
|
||||
} else {
|
||||
panic("Unknown GPU device %s\n", p.device_name);
|
||||
}
|
||||
|
||||
@@ -328,8 +328,8 @@ typedef struct GEM5_PACKED
|
||||
};
|
||||
uint64_t completionSignal;
|
||||
};
|
||||
} PM4MapProcessMI200;
|
||||
static_assert(sizeof(PM4MapProcessMI200) == 80);
|
||||
} PM4MapProcessV2;
|
||||
static_assert(sizeof(PM4MapProcessV2) == 80);
|
||||
|
||||
typedef struct GEM5_PACKED
|
||||
{
|
||||
|
||||
@@ -290,18 +290,19 @@ PM4PacketProcessor::decodeHeader(PM4Queue *q, PM4Header header)
|
||||
dmaBuffer);
|
||||
} break;
|
||||
case IT_MAP_PROCESS: {
|
||||
if (gpuDevice->getGfxVersion() == GfxVersion::gfx90a) {
|
||||
dmaBuffer = new PM4MapProcessMI200();
|
||||
if (gpuDevice->getGfxVersion() == GfxVersion::gfx90a ||
|
||||
gpuDevice->getGfxVersion() == GfxVersion::gfx942) {
|
||||
dmaBuffer = new PM4MapProcessV2();
|
||||
cb = new DmaVirtCallback<uint64_t>(
|
||||
[ = ] (const uint64_t &)
|
||||
{ mapProcessGfx90a(q, (PM4MapProcessMI200 *)dmaBuffer); });
|
||||
dmaReadVirt(getGARTAddr(q->rptr()), sizeof(PM4MapProcessMI200),
|
||||
{ mapProcessV2(q, (PM4MapProcessV2 *)dmaBuffer); });
|
||||
dmaReadVirt(getGARTAddr(q->rptr()), sizeof(PM4MapProcessV2),
|
||||
cb, dmaBuffer);
|
||||
} else {
|
||||
dmaBuffer = new PM4MapProcess();
|
||||
cb = new DmaVirtCallback<uint64_t>(
|
||||
[ = ] (const uint64_t &)
|
||||
{ mapProcessGfx9(q, (PM4MapProcess *)dmaBuffer); });
|
||||
{ mapProcessV1(q, (PM4MapProcess *)dmaBuffer); });
|
||||
dmaReadVirt(getGARTAddr(q->rptr()), sizeof(PM4MapProcess), cb,
|
||||
dmaBuffer);
|
||||
}
|
||||
@@ -701,7 +702,7 @@ PM4PacketProcessor::mapProcess(uint32_t pasid, uint64_t ptBase,
|
||||
}
|
||||
|
||||
void
|
||||
PM4PacketProcessor::mapProcessGfx9(PM4Queue *q, PM4MapProcess *pkt)
|
||||
PM4PacketProcessor::mapProcessV1(PM4Queue *q, PM4MapProcess *pkt)
|
||||
{
|
||||
q->incRptr(sizeof(PM4MapProcess));
|
||||
|
||||
@@ -716,9 +717,9 @@ PM4PacketProcessor::mapProcessGfx9(PM4Queue *q, PM4MapProcess *pkt)
|
||||
}
|
||||
|
||||
void
|
||||
PM4PacketProcessor::mapProcessGfx90a(PM4Queue *q, PM4MapProcessMI200 *pkt)
|
||||
PM4PacketProcessor::mapProcessV2(PM4Queue *q, PM4MapProcessV2 *pkt)
|
||||
{
|
||||
q->incRptr(sizeof(PM4MapProcessMI200));
|
||||
q->incRptr(sizeof(PM4MapProcessV2));
|
||||
|
||||
DPRINTF(PM4PacketProcessor, "PM4 map_process pasid: %p quantum: "
|
||||
"%d pt: %p signal: %p\n", pkt->pasid, pkt->processQuantum,
|
||||
|
||||
@@ -146,8 +146,8 @@ class PM4PacketProcessor : public DmaVirtDevice
|
||||
void unmapQueues(PM4Queue *q, PM4UnmapQueues *pkt);
|
||||
void doneMQDWrite(Addr mqdAddr, Addr addr);
|
||||
void mapProcess(uint32_t pasid, uint64_t ptBase, uint32_t shMemBases);
|
||||
void mapProcessGfx9(PM4Queue *q, PM4MapProcess *pkt);
|
||||
void mapProcessGfx90a(PM4Queue *q, PM4MapProcessMI200 *pkt);
|
||||
void mapProcessV1(PM4Queue *q, PM4MapProcess *pkt);
|
||||
void mapProcessV2(PM4Queue *q, PM4MapProcessV2 *pkt);
|
||||
void processMQD(PM4MapQueues *pkt, PM4Queue *q, Addr addr, QueueDesc *mqd,
|
||||
uint16_t vmid);
|
||||
void processSDMAMQD(PM4MapQueues *pkt, PM4Queue *q, Addr addr,
|
||||
|
||||
@@ -45,7 +45,7 @@ class PrefetchType(Enum):
|
||||
|
||||
|
||||
class GfxVersion(ScopedEnum):
|
||||
vals = ["gfx900", "gfx902", "gfx908", "gfx90a"]
|
||||
vals = ["gfx900", "gfx902", "gfx908", "gfx90a", "gfx942"]
|
||||
|
||||
|
||||
class PoolManager(SimObject):
|
||||
|
||||
@@ -94,9 +94,10 @@ class HSAQueueEntry
|
||||
// LLVM docs: https://www.llvm.org/docs/AMDGPUUsage.html
|
||||
// #code-object-v3-kernel-descriptor
|
||||
//
|
||||
// Currently, the only supported gfx version in gem5 that computes
|
||||
// VGPR count differently is gfx90a.
|
||||
if (gfx_version == GfxVersion::gfx90a) {
|
||||
// Currently, the only supported gfx versions in gem5 that compute
|
||||
// VGPR count differently are gfx90a and gfx942.
|
||||
if (gfx_version == GfxVersion::gfx90a ||
|
||||
gfx_version == GfxVersion::gfx942) {
|
||||
numVgprs = (akc->granulated_workitem_vgpr_count + 1) * 8;
|
||||
} else {
|
||||
numVgprs = (akc->granulated_workitem_vgpr_count + 1) * 4;
|
||||
@@ -107,7 +108,8 @@ class HSAQueueEntry
|
||||
if (gfx_version == GfxVersion::gfx900 ||
|
||||
gfx_version == GfxVersion::gfx902 ||
|
||||
gfx_version == GfxVersion::gfx908 ||
|
||||
gfx_version == GfxVersion::gfx90a) {
|
||||
gfx_version == GfxVersion::gfx90a ||
|
||||
gfx_version == GfxVersion::gfx942) {
|
||||
numSgprs = ((akc->granulated_wavefront_sgpr_count + 1) * 16)/2;
|
||||
} else {
|
||||
panic("Saw unknown gfx version setting up GPR counts\n");
|
||||
|
||||
@@ -442,7 +442,8 @@ Wavefront::initRegState(HSAQueueEntry *task, int wgSizeInWorkItems)
|
||||
// Default to false and set to true for gem5 supported ISAs.
|
||||
bool packed_work_item_id = false;
|
||||
|
||||
if (task->gfxVersion() == GfxVersion::gfx90a) {
|
||||
if (task->gfxVersion() == GfxVersion::gfx90a ||
|
||||
task->gfxVersion() == GfxVersion::gfx942) {
|
||||
packed_work_item_id = true;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user