gpu-compute: Add granulated SGPR computation for gfx9
The granulated SGPR size is used when the number of SGPRs is unknown. The computation for this has changed since gfx8 and is commented as a TODO in a comment. This changeset implements the change and also checks for an invalid SGPR count. According to LLVM code this could happen "due to a compiler bug or when using inline asm.": https://github.com/llvm/llvm-project/blob/main/llvm/lib/Target/AMDGPU/ AMDGPUAsmPrinter.cpp#L723 Change-Id: Ie487a53940b323a0002341075e0f81af4147a7d8 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/65252 Maintainer: Matt Sinclair <mattdsinclair@gmail.com> Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com> Tested-by: kokoro <noreply+kokoro@google.com>
This commit is contained in:
@@ -96,9 +96,22 @@ class HSAQueueEntry
|
||||
if (!numVgprs)
|
||||
numVgprs = (akc->granulated_workitem_vgpr_count + 1) * 4;
|
||||
|
||||
// TODO: Granularity changes for GFX9!
|
||||
if (!numSgprs)
|
||||
numSgprs = (akc->granulated_wavefront_sgpr_count + 1) * 8;
|
||||
if (!numSgprs || numSgprs ==
|
||||
std::numeric_limits<decltype(akc->wavefront_sgpr_count)>::max()) {
|
||||
// Supported major generation numbers: 0 (BLIT kernels), 8, and 9
|
||||
uint16_t version = akc->amd_machine_version_major;
|
||||
assert((version == 0) || (version == 8) || (version == 9));
|
||||
// SGPR allocation granularies:
|
||||
// - GFX8: 8
|
||||
// - GFX9: 16
|
||||
// Source: https://llvm.org/docs/AMDGPUUsage.html
|
||||
if ((version == 0) || (version == 8)) {
|
||||
// We assume that BLIT kernels use the same granularity as GFX8
|
||||
numSgprs = (akc->granulated_wavefront_sgpr_count + 1) * 8;
|
||||
} else if (version == 9) {
|
||||
numSgprs = ((akc->granulated_wavefront_sgpr_count + 1) * 16)/2;
|
||||
}
|
||||
}
|
||||
|
||||
initialVgprState.reset();
|
||||
initialSgprState.reset();
|
||||
|
||||
Reference in New Issue
Block a user