dev-amdgpu,configs,gpu-compute: Add gfx942 version
This is the version for MI300. For the most part, it is the same as MI200 with the exception of architected flat scratch (not yet implemented in gem5) and therefore a new version enum is required. Change-Id: Id18cd7b57c4eebd467c010a3f61e3117beb8d58a
This commit is contained in:
@@ -86,6 +86,8 @@ AMDGPUDevice::AMDGPUDevice(const AMDGPUDeviceParams &p)
|
||||
gfx_version = GfxVersion::gfx908;
|
||||
} else if (p.device_name == "MI200") {
|
||||
gfx_version = GfxVersion::gfx90a;
|
||||
} else if (p.device_name == "MI300X") {
|
||||
gfx_version = GfxVersion::gfx942;
|
||||
} else {
|
||||
panic("Unknown GPU device %s\n", p.device_name);
|
||||
}
|
||||
@@ -124,7 +126,8 @@ AMDGPUDevice::AMDGPUDevice(const AMDGPUDeviceParams &p)
|
||||
sdmaFunc.insert({0x10b, &SDMAEngine::setPageDoorbellOffsetLo});
|
||||
sdmaFunc.insert({0xe0, &SDMAEngine::setPageSize});
|
||||
sdmaFunc.insert({0x113, &SDMAEngine::setPageWptrLo});
|
||||
} else if (p.device_name == "MI100" || p.device_name == "MI200") {
|
||||
} else if (p.device_name == "MI100" || p.device_name == "MI200"
|
||||
|| p.device_name == "MI300X") {
|
||||
sdmaFunc.insert({0xd9, &SDMAEngine::setPageBaseLo});
|
||||
sdmaFunc.insert({0xe1, &SDMAEngine::setPageRptrLo});
|
||||
sdmaFunc.insert({0xe0, &SDMAEngine::setPageRptrHi});
|
||||
@@ -195,6 +198,10 @@ AMDGPUDevice::AMDGPUDevice(const AMDGPUDeviceParams &p)
|
||||
setRegVal(MI200_FB_LOCATION_BASE, mmhubBase >> 24);
|
||||
setRegVal(MI200_FB_LOCATION_TOP, mmhubTop >> 24);
|
||||
setRegVal(MI200_MEM_SIZE_REG, mem_size);
|
||||
} else if (p.device_name == "MI300X") {
|
||||
setRegVal(MI200_FB_LOCATION_BASE, mmhubBase >> 24);
|
||||
setRegVal(MI200_FB_LOCATION_TOP, mmhubTop >> 24);
|
||||
setRegVal(MI200_MEM_SIZE_REG, mem_size);
|
||||
} else {
|
||||
panic("Unknown GPU device %s\n", p.device_name);
|
||||
}
|
||||
|
||||
@@ -328,8 +328,8 @@ typedef struct GEM5_PACKED
|
||||
};
|
||||
uint64_t completionSignal;
|
||||
};
|
||||
} PM4MapProcessMI200;
|
||||
static_assert(sizeof(PM4MapProcessMI200) == 80);
|
||||
} PM4MapProcessV2;
|
||||
static_assert(sizeof(PM4MapProcessV2) == 80);
|
||||
|
||||
typedef struct GEM5_PACKED
|
||||
{
|
||||
|
||||
@@ -290,18 +290,19 @@ PM4PacketProcessor::decodeHeader(PM4Queue *q, PM4Header header)
|
||||
dmaBuffer);
|
||||
} break;
|
||||
case IT_MAP_PROCESS: {
|
||||
if (gpuDevice->getGfxVersion() == GfxVersion::gfx90a) {
|
||||
dmaBuffer = new PM4MapProcessMI200();
|
||||
if (gpuDevice->getGfxVersion() == GfxVersion::gfx90a ||
|
||||
gpuDevice->getGfxVersion() == GfxVersion::gfx942) {
|
||||
dmaBuffer = new PM4MapProcessV2();
|
||||
cb = new DmaVirtCallback<uint64_t>(
|
||||
[ = ] (const uint64_t &)
|
||||
{ mapProcessGfx90a(q, (PM4MapProcessMI200 *)dmaBuffer); });
|
||||
dmaReadVirt(getGARTAddr(q->rptr()), sizeof(PM4MapProcessMI200),
|
||||
{ mapProcessV2(q, (PM4MapProcessV2 *)dmaBuffer); });
|
||||
dmaReadVirt(getGARTAddr(q->rptr()), sizeof(PM4MapProcessV2),
|
||||
cb, dmaBuffer);
|
||||
} else {
|
||||
dmaBuffer = new PM4MapProcess();
|
||||
cb = new DmaVirtCallback<uint64_t>(
|
||||
[ = ] (const uint64_t &)
|
||||
{ mapProcessGfx9(q, (PM4MapProcess *)dmaBuffer); });
|
||||
{ mapProcessV1(q, (PM4MapProcess *)dmaBuffer); });
|
||||
dmaReadVirt(getGARTAddr(q->rptr()), sizeof(PM4MapProcess), cb,
|
||||
dmaBuffer);
|
||||
}
|
||||
@@ -701,7 +702,7 @@ PM4PacketProcessor::mapProcess(uint32_t pasid, uint64_t ptBase,
|
||||
}
|
||||
|
||||
void
|
||||
PM4PacketProcessor::mapProcessGfx9(PM4Queue *q, PM4MapProcess *pkt)
|
||||
PM4PacketProcessor::mapProcessV1(PM4Queue *q, PM4MapProcess *pkt)
|
||||
{
|
||||
q->incRptr(sizeof(PM4MapProcess));
|
||||
|
||||
@@ -716,9 +717,9 @@ PM4PacketProcessor::mapProcessGfx9(PM4Queue *q, PM4MapProcess *pkt)
|
||||
}
|
||||
|
||||
void
|
||||
PM4PacketProcessor::mapProcessGfx90a(PM4Queue *q, PM4MapProcessMI200 *pkt)
|
||||
PM4PacketProcessor::mapProcessV2(PM4Queue *q, PM4MapProcessV2 *pkt)
|
||||
{
|
||||
q->incRptr(sizeof(PM4MapProcessMI200));
|
||||
q->incRptr(sizeof(PM4MapProcessV2));
|
||||
|
||||
DPRINTF(PM4PacketProcessor, "PM4 map_process pasid: %p quantum: "
|
||||
"%d pt: %p signal: %p\n", pkt->pasid, pkt->processQuantum,
|
||||
|
||||
@@ -146,8 +146,8 @@ class PM4PacketProcessor : public DmaVirtDevice
|
||||
void unmapQueues(PM4Queue *q, PM4UnmapQueues *pkt);
|
||||
void doneMQDWrite(Addr mqdAddr, Addr addr);
|
||||
void mapProcess(uint32_t pasid, uint64_t ptBase, uint32_t shMemBases);
|
||||
void mapProcessGfx9(PM4Queue *q, PM4MapProcess *pkt);
|
||||
void mapProcessGfx90a(PM4Queue *q, PM4MapProcessMI200 *pkt);
|
||||
void mapProcessV1(PM4Queue *q, PM4MapProcess *pkt);
|
||||
void mapProcessV2(PM4Queue *q, PM4MapProcessV2 *pkt);
|
||||
void processMQD(PM4MapQueues *pkt, PM4Queue *q, Addr addr, QueueDesc *mqd,
|
||||
uint16_t vmid);
|
||||
void processSDMAMQD(PM4MapQueues *pkt, PM4Queue *q, Addr addr,
|
||||
|
||||
Reference in New Issue
Block a user