diff --git a/src/gpu-compute/hsa_queue_entry.hh b/src/gpu-compute/hsa_queue_entry.hh index 4261f2c631..fbe0efef21 100644 --- a/src/gpu-compute/hsa_queue_entry.hh +++ b/src/gpu-compute/hsa_queue_entry.hh @@ -96,9 +96,22 @@ class HSAQueueEntry if (!numVgprs) numVgprs = (akc->granulated_workitem_vgpr_count + 1) * 4; - // TODO: Granularity changes for GFX9! - if (!numSgprs) - numSgprs = (akc->granulated_wavefront_sgpr_count + 1) * 8; + if (!numSgprs || numSgprs == + std::numeric_limitswavefront_sgpr_count)>::max()) { + // Supported major generation numbers: 0 (BLIT kernels), 8, and 9 + uint16_t version = akc->amd_machine_version_major; + assert((version == 0) || (version == 8) || (version == 9)); + // SGPR allocation granularies: + // - GFX8: 8 + // - GFX9: 16 + // Source: https://llvm.org/docs/AMDGPUUsage.html + if ((version == 0) || (version == 8)) { + // We assume that BLIT kernels use the same granularity as GFX8 + numSgprs = (akc->granulated_wavefront_sgpr_count + 1) * 8; + } else if (version == 9) { + numSgprs = ((akc->granulated_wavefront_sgpr_count + 1) * 16)/2; + } + } initialVgprState.reset(); initialSgprState.reset();