diff --git a/src/gpu-compute/hsa_queue_entry.hh b/src/gpu-compute/hsa_queue_entry.hh
index 4261f2c631..fbe0efef21 100644
--- a/src/gpu-compute/hsa_queue_entry.hh
+++ b/src/gpu-compute/hsa_queue_entry.hh
@@ -96,9 +96,22 @@ class HSAQueueEntry
         if (!numVgprs)
             numVgprs = (akc->granulated_workitem_vgpr_count + 1) * 4;
 
-        // TODO: Granularity changes for GFX9!
-        if (!numSgprs)
-            numSgprs = (akc->granulated_wavefront_sgpr_count + 1) * 8;
+        if (!numSgprs || numSgprs ==
+            std::numeric_limits<decltype(akc->wavefront_sgpr_count)>::max()) {
+            // Supported major generation numbers: 0 (BLIT kernels), 8, and 9
+            uint16_t version = akc->amd_machine_version_major;
+            assert((version == 0) || (version == 8) || (version == 9));
+            // SGPR allocation granularies:
+            // - GFX8: 8
+            // - GFX9: 16
+            // Source: https://llvm.org/docs/AMDGPUUsage.html
+            if ((version == 0) || (version == 8)) {
+                // We assume that BLIT kernels use the same granularity as GFX8
+                numSgprs = (akc->granulated_wavefront_sgpr_count + 1) * 8;
+            } else if (version == 9) {
+                numSgprs = ((akc->granulated_wavefront_sgpr_count + 1) * 16)/2;
+            }
+        }
 
         initialVgprState.reset();
         initialSgprState.reset();