dev-amdgpu,configs,gpu-compute: Add gfx942 version

This is the version for MI300. For the most part, it is the same as
MI200 with the exception of architected flat scratch (not yet
implemented in gem5) and therefore a new version enum is required.

Change-Id: Id18cd7b57c4eebd467c010a3f61e3117beb8d58a
This commit is contained in:
Matthew Poremba
2024-05-15 10:49:05 -07:00
parent 65976e4c6d
commit 8be5ce6fc9
10 changed files with 39 additions and 24 deletions

View File

@@ -86,6 +86,8 @@ AMDGPUDevice::AMDGPUDevice(const AMDGPUDeviceParams &p)
gfx_version = GfxVersion::gfx908;
} else if (p.device_name == "MI200") {
gfx_version = GfxVersion::gfx90a;
} else if (p.device_name == "MI300X") {
gfx_version = GfxVersion::gfx942;
} else {
panic("Unknown GPU device %s\n", p.device_name);
}
@@ -124,7 +126,8 @@ AMDGPUDevice::AMDGPUDevice(const AMDGPUDeviceParams &p)
sdmaFunc.insert({0x10b, &SDMAEngine::setPageDoorbellOffsetLo});
sdmaFunc.insert({0xe0, &SDMAEngine::setPageSize});
sdmaFunc.insert({0x113, &SDMAEngine::setPageWptrLo});
} else if (p.device_name == "MI100" || p.device_name == "MI200") {
} else if (p.device_name == "MI100" || p.device_name == "MI200"
|| p.device_name == "MI300X") {
sdmaFunc.insert({0xd9, &SDMAEngine::setPageBaseLo});
sdmaFunc.insert({0xe1, &SDMAEngine::setPageRptrLo});
sdmaFunc.insert({0xe0, &SDMAEngine::setPageRptrHi});
@@ -195,6 +198,10 @@ AMDGPUDevice::AMDGPUDevice(const AMDGPUDeviceParams &p)
setRegVal(MI200_FB_LOCATION_BASE, mmhubBase >> 24);
setRegVal(MI200_FB_LOCATION_TOP, mmhubTop >> 24);
setRegVal(MI200_MEM_SIZE_REG, mem_size);
} else if (p.device_name == "MI300X") {
setRegVal(MI200_FB_LOCATION_BASE, mmhubBase >> 24);
setRegVal(MI200_FB_LOCATION_TOP, mmhubTop >> 24);
setRegVal(MI200_MEM_SIZE_REG, mem_size);
} else {
panic("Unknown GPU device %s\n", p.device_name);
}

View File

@@ -328,8 +328,8 @@ typedef struct GEM5_PACKED
};
uint64_t completionSignal;
};
} PM4MapProcessMI200;
static_assert(sizeof(PM4MapProcessMI200) == 80);
} PM4MapProcessV2;
static_assert(sizeof(PM4MapProcessV2) == 80);
typedef struct GEM5_PACKED
{

View File

@@ -290,18 +290,19 @@ PM4PacketProcessor::decodeHeader(PM4Queue *q, PM4Header header)
dmaBuffer);
} break;
case IT_MAP_PROCESS: {
if (gpuDevice->getGfxVersion() == GfxVersion::gfx90a) {
dmaBuffer = new PM4MapProcessMI200();
if (gpuDevice->getGfxVersion() == GfxVersion::gfx90a ||
gpuDevice->getGfxVersion() == GfxVersion::gfx942) {
dmaBuffer = new PM4MapProcessV2();
cb = new DmaVirtCallback<uint64_t>(
[ = ] (const uint64_t &)
{ mapProcessGfx90a(q, (PM4MapProcessMI200 *)dmaBuffer); });
dmaReadVirt(getGARTAddr(q->rptr()), sizeof(PM4MapProcessMI200),
{ mapProcessV2(q, (PM4MapProcessV2 *)dmaBuffer); });
dmaReadVirt(getGARTAddr(q->rptr()), sizeof(PM4MapProcessV2),
cb, dmaBuffer);
} else {
dmaBuffer = new PM4MapProcess();
cb = new DmaVirtCallback<uint64_t>(
[ = ] (const uint64_t &)
{ mapProcessGfx9(q, (PM4MapProcess *)dmaBuffer); });
{ mapProcessV1(q, (PM4MapProcess *)dmaBuffer); });
dmaReadVirt(getGARTAddr(q->rptr()), sizeof(PM4MapProcess), cb,
dmaBuffer);
}
@@ -701,7 +702,7 @@ PM4PacketProcessor::mapProcess(uint32_t pasid, uint64_t ptBase,
}
void
PM4PacketProcessor::mapProcessGfx9(PM4Queue *q, PM4MapProcess *pkt)
PM4PacketProcessor::mapProcessV1(PM4Queue *q, PM4MapProcess *pkt)
{
q->incRptr(sizeof(PM4MapProcess));
@@ -716,9 +717,9 @@ PM4PacketProcessor::mapProcessGfx9(PM4Queue *q, PM4MapProcess *pkt)
}
void
PM4PacketProcessor::mapProcessGfx90a(PM4Queue *q, PM4MapProcessMI200 *pkt)
PM4PacketProcessor::mapProcessV2(PM4Queue *q, PM4MapProcessV2 *pkt)
{
q->incRptr(sizeof(PM4MapProcessMI200));
q->incRptr(sizeof(PM4MapProcessV2));
DPRINTF(PM4PacketProcessor, "PM4 map_process pasid: %p quantum: "
"%d pt: %p signal: %p\n", pkt->pasid, pkt->processQuantum,

View File

@@ -146,8 +146,8 @@ class PM4PacketProcessor : public DmaVirtDevice
void unmapQueues(PM4Queue *q, PM4UnmapQueues *pkt);
void doneMQDWrite(Addr mqdAddr, Addr addr);
void mapProcess(uint32_t pasid, uint64_t ptBase, uint32_t shMemBases);
void mapProcessGfx9(PM4Queue *q, PM4MapProcess *pkt);
void mapProcessGfx90a(PM4Queue *q, PM4MapProcessMI200 *pkt);
void mapProcessV1(PM4Queue *q, PM4MapProcess *pkt);
void mapProcessV2(PM4Queue *q, PM4MapProcessV2 *pkt);
void processMQD(PM4MapQueues *pkt, PM4Queue *q, Addr addr, QueueDesc *mqd,
uint16_t vmid);
void processSDMAMQD(PM4MapQueues *pkt, PM4Queue *q, Addr addr,