gpu-compute: Implement packed workitem ABI init
This initialization method is used in gfx90a (MI200). Rather than using three VGPRs for X,Y,Z dimensions of the kernel, pack them into one register with 10-bits for each dimensions. Change-Id: I8e5b681c8287779ff9f80451d6028e862322294a
This commit is contained in:
@@ -474,8 +474,48 @@ Wavefront::initRegState(HSAQueueEntry *task, int wgSizeInWorkItems)
|
||||
|
||||
regInitIdx = 0;
|
||||
|
||||
// iterate over all the init fields and check which
|
||||
// bits are enabled
|
||||
// VGPRs are initialized to the work item IDs for a given thread. There
|
||||
// are two ways to initialize the IDs based on number of dimensions. ISAs
|
||||
// will either have packed work-item IDs or not. LLVM lists them here:
|
||||
// https://llvm.org/docs/AMDGPUUsage.html#amdgpu-processor-table
|
||||
// Default to false and set to true for gem5 supported ISAs.
|
||||
bool packed_work_item_id = false;
|
||||
|
||||
if (task->gfxVersion() == GfxVersion::gfx90a) {
|
||||
packed_work_item_id = true;
|
||||
}
|
||||
|
||||
// For ISAs with packed work item IDs, only one VGPR is used and the
|
||||
// (X,Y,Z) dimensions are packed into a single 32-bit VGPR with 10-bits
|
||||
// for each dimension
|
||||
if (packed_work_item_id) {
|
||||
TheGpuISA::VecRegContainerU32 raw_vgpr;
|
||||
TheGpuISA::VecElemU32 *packed_vgpr
|
||||
= raw_vgpr.as<TheGpuISA::VecElemU32>();
|
||||
|
||||
uint32_t physVgprIdx = computeUnit->registerManager
|
||||
->mapVgpr(this, regInitIdx);
|
||||
for (int lane = 0; lane < workItemId[0].size(); ++lane) {
|
||||
packed_vgpr[lane] = workItemId[0][lane] & 0x3ff;
|
||||
}
|
||||
if (task->vgprBitEnabled(1)) {
|
||||
for (int lane = 0; lane < workItemId[1].size(); ++lane) {
|
||||
packed_vgpr[lane] |= ((workItemId[1][lane] & 0x3ff) << 10);
|
||||
}
|
||||
}
|
||||
if (task->vgprBitEnabled(2)) {
|
||||
for (int lane = 0; lane < workItemId[2].size(); ++lane) {
|
||||
packed_vgpr[lane] |= ((workItemId[2][lane] & 0x3ff) << 20);
|
||||
}
|
||||
}
|
||||
computeUnit->vrf[simdId]->write(physVgprIdx, raw_vgpr);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
// For ISAs with non-packed work item IDs, map and initialize one VGPR
|
||||
// per dimensions. Do this by iterating over all the init fields and
|
||||
// checking which bits are enabled.
|
||||
for (int en_bit = 0; en_bit < NumVectorInitFields; ++en_bit) {
|
||||
if (task->vgprBitEnabled(en_bit)) {
|
||||
uint32_t physVgprIdx = 0;
|
||||
|
||||
Reference in New Issue
Block a user