gpu-compute: Update tokens for flat global/scratch

Memory instructions acquire coalescer tokens in the schedule stage. Currently this is only done for buffer and flat instructions, but not flat global or flat scratch. This change now acquires tokens for flat global and flat scratch instructions. This provides back-pressure to the CUs and helps to avoid deadlocks in Ruby. The change also handles returning tokens for buffer, flat global, and flat scratch instructions. This was previously only being done for normal flat instructions leading to deadlocks in some applications when the tokens were exhausted. To simplify the logic, added a needsToken() method to GPUDynInst which return if the instruction is buffer or any flat segment. The waitcnts were also incorrect for flat global and flat scratch. We should always decrement vmem and exp count for stores and only normal flat instructions should decrement lgkm. Currently vmem/exp are not decremented for flat global and flat scratch which can lead to deadlock. This change set fixes this by always decrementing vmem/exp and lgkm only for normal flat instructions. Change-Id: I673f4ac6121e4b5a5e8491bc9130c6d825d95fc5
2023-10-06 10:30:45 -05:00
parent ae104cc431
commit 9f4d334644
5 changed files with 77 additions and 36 deletions
--- a/src/arch/amdgpu/vega/insts/instructions.cc
+++ b/src/arch/amdgpu/vega/insts/instructions.cc
@@ -43894,9 +43894,11 @@ namespace VegaISA
    {
        Wavefront *wf = gpuDynInst->wavefront();

-        if (gpuDynInst->exec_mask.none() && isFlat()) {
+        if (gpuDynInst->exec_mask.none()) {
            wf->decVMemInstsIssued();
-            wf->decLGKMInstsIssued();
+            if (isFlat()) {
+                wf->decLGKMInstsIssued();
+            }
            return;
        }

@@ -43978,9 +43980,11 @@ namespace VegaISA
    {
        Wavefront *wf = gpuDynInst->wavefront();

-        if (gpuDynInst->exec_mask.none() && isFlat()) {
+        if (gpuDynInst->exec_mask.none()) {
            wf->decVMemInstsIssued();
-            wf->decLGKMInstsIssued();
+            if (isFlat()) {
+                wf->decLGKMInstsIssued();
+            }
            return;
        }

@@ -44063,9 +44067,11 @@ namespace VegaISA
    {
        Wavefront *wf = gpuDynInst->wavefront();

-        if (gpuDynInst->exec_mask.none() && isFlat()) {
+        if (gpuDynInst->exec_mask.none()) {
            wf->decVMemInstsIssued();
-            wf->decLGKMInstsIssued();
+            if (isFlat()) {
+                wf->decLGKMInstsIssued();
+            }
            return;
        }

@@ -44118,9 +44124,11 @@ namespace VegaISA
    {
        Wavefront *wf = gpuDynInst->wavefront();

-        if (gpuDynInst->exec_mask.none() && isFlat()) {
+        if (gpuDynInst->exec_mask.none()) {
            wf->decVMemInstsIssued();
-            wf->decLGKMInstsIssued();
+            if (isFlat()) {
+                wf->decLGKMInstsIssued();
+            }
            return;
        }

@@ -44173,9 +44181,11 @@ namespace VegaISA
    {
        Wavefront *wf = gpuDynInst->wavefront();

-        if (gpuDynInst->exec_mask.none() && isFlat()) {
+        if (gpuDynInst->exec_mask.none()) {
            wf->decVMemInstsIssued();
-            wf->decLGKMInstsIssued();
+            if (isFlat()) {
+                wf->decLGKMInstsIssued();
+            }
            return;
        }

@@ -44237,9 +44247,11 @@ namespace VegaISA
    {
        Wavefront *wf = gpuDynInst->wavefront();

-        if (gpuDynInst->exec_mask.none() && isFlat()) {
+        if (gpuDynInst->exec_mask.none()) {
            wf->decVMemInstsIssued();
-            wf->decLGKMInstsIssued();
+            if (isFlat()) {
+                wf->decLGKMInstsIssued();
+            }
            return;
        }

@@ -44304,9 +44316,11 @@ namespace VegaISA
    {
        Wavefront *wf = gpuDynInst->wavefront();

-        if (gpuDynInst->exec_mask.none() && isFlat()) {
+        if (gpuDynInst->exec_mask.none()) {
            wf->decVMemInstsIssued();
-            wf->decLGKMInstsIssued();
+            if (isFlat()) {
+                wf->decLGKMInstsIssued();
+            }
            wf->decExpInstsIssued();
            return;
        }
@@ -44361,9 +44375,11 @@ namespace VegaISA
    {
        Wavefront *wf = gpuDynInst->wavefront();

-        if (gpuDynInst->exec_mask.none() && isFlat()) {
+        if (gpuDynInst->exec_mask.none()) {
            wf->decVMemInstsIssued();
-            wf->decLGKMInstsIssued();
+            if (isFlat()) {
+                wf->decLGKMInstsIssued();
+            }
            wf->decExpInstsIssued();
            return;
        }
@@ -44418,9 +44434,11 @@ namespace VegaISA
    {
        Wavefront *wf = gpuDynInst->wavefront();

-        if (gpuDynInst->exec_mask.none() && isFlat()) {
+        if (gpuDynInst->exec_mask.none()) {
            wf->decVMemInstsIssued();
-            wf->decLGKMInstsIssued();
+            if (isFlat()) {
+                wf->decLGKMInstsIssued();
+            }
            wf->decExpInstsIssued();
            return;
        }
@@ -44476,9 +44494,11 @@ namespace VegaISA
    {
        Wavefront *wf = gpuDynInst->wavefront();

-        if (gpuDynInst->exec_mask.none() && isFlat()) {
+        if (gpuDynInst->exec_mask.none()) {
            wf->decVMemInstsIssued();
-            wf->decLGKMInstsIssued();
+            if (isFlat()) {
+                wf->decLGKMInstsIssued();
+            }
            wf->decExpInstsIssued();
            return;
        }
@@ -44534,9 +44554,11 @@ namespace VegaISA
    {
        Wavefront *wf = gpuDynInst->wavefront();

-        if (gpuDynInst->exec_mask.none() && isFlat()) {
+        if (gpuDynInst->exec_mask.none()) {
            wf->decVMemInstsIssued();
-            wf->decLGKMInstsIssued();
+            if (isFlat()) {
+                wf->decLGKMInstsIssued();
+            }
            wf->decExpInstsIssued();
            return;
        }
@@ -44600,9 +44622,11 @@ namespace VegaISA
    {
        Wavefront *wf = gpuDynInst->wavefront();

-        if (gpuDynInst->exec_mask.none() && isFlat()) {
+        if (gpuDynInst->exec_mask.none()) {
            wf->decVMemInstsIssued();
-            wf->decLGKMInstsIssued();
+            if (isFlat()) {
+                wf->decLGKMInstsIssued();
+            }
            wf->decExpInstsIssued();
            return;
        }
@@ -44677,9 +44701,11 @@ namespace VegaISA
    {
        Wavefront *wf = gpuDynInst->wavefront();

-        if (gpuDynInst->exec_mask.none() && isFlat()) {
+        if (gpuDynInst->exec_mask.none()) {
            wf->decVMemInstsIssued();
-            wf->decLGKMInstsIssued();
+            if (isFlat()) {
+                wf->decLGKMInstsIssued();
+            }
            return;
        }

@@ -44757,9 +44783,11 @@ namespace VegaISA
    {
        Wavefront *wf = gpuDynInst->wavefront();

-        if (gpuDynInst->exec_mask.none() && isFlat()) {
+        if (gpuDynInst->exec_mask.none()) {
            wf->decVMemInstsIssued();
-            wf->decLGKMInstsIssued();
+            if (isFlat()) {
+                wf->decLGKMInstsIssued();
+            }
            return;
        }

@@ -44837,9 +44865,11 @@ namespace VegaISA
    {
        Wavefront *wf = gpuDynInst->wavefront();

-        if (gpuDynInst->exec_mask.none() && isFlat()) {
+        if (gpuDynInst->exec_mask.none()) {
            wf->decVMemInstsIssued();
-            wf->decLGKMInstsIssued();
+            if (isFlat()) {
+                wf->decLGKMInstsIssued();
+            }
            return;
        }

@@ -45370,9 +45400,11 @@ namespace VegaISA
    {
        Wavefront *wf = gpuDynInst->wavefront();

-        if (gpuDynInst->exec_mask.none() && isFlat()) {
+        if (gpuDynInst->exec_mask.none()) {
            wf->decVMemInstsIssued();
-            wf->decLGKMInstsIssued();
+            if (isFlat()) {
+                wf->decLGKMInstsIssued();
+            }
            return;
        }

@@ -45451,9 +45483,11 @@ namespace VegaISA
    {
        Wavefront *wf = gpuDynInst->wavefront();

-        if (gpuDynInst->exec_mask.none() && isFlat()) {
+        if (gpuDynInst->exec_mask.none()) {
            wf->decVMemInstsIssued();
-            wf->decLGKMInstsIssued();
+            if (isFlat()) {
+                wf->decLGKMInstsIssued();
+            }
            return;
        }

--- a/src/gpu-compute/gpu_dyn_inst.cc
+++ b/src/gpu-compute/gpu_dyn_inst.cc
@@ -582,6 +582,12 @@ GPUDynInst::readsFlatScratch() const
    return false;
 }

+bool
+GPUDynInst::needsToken() const
+{
+    return isGlobalMem() || isFlat() || isFlatGlobal() || isFlatScratch();
+}
+
 bool
 GPUDynInst::isAtomicAnd() const
 {
--- a/src/gpu-compute/gpu_dyn_inst.hh
+++ b/src/gpu-compute/gpu_dyn_inst.hh
@@ -257,6 +257,7 @@ class GPUDynInst : public GPUExecContext
    bool writesFlatScratch() const;
    bool readsExecMask() const;
    bool writesExecMask() const;
+    bool needsToken() const;

    bool isAtomicAnd() const;
    bool isAtomicOr() const;
--- a/src/gpu-compute/schedule_stage.cc
+++ b/src/gpu-compute/schedule_stage.cc
@@ -579,7 +579,7 @@ ScheduleStage::fillDispatchList()
                    // operation.
                    GPUDynInstPtr mp = schIter->first;
                    if (!mp->isMemSync() && !mp->isScalar() &&
-                        (mp->isGlobalMem() || mp->isFlat())) {
+                        mp->needsToken()) {
                        computeUnit.globalMemoryPipe.acqCoalescerToken(mp);
                    }

--- a/src/gpu-compute/wavefront.cc
+++ b/src/gpu-compute/wavefront.cc
@@ -1082,7 +1082,7 @@ Wavefront::exec()
     * we return here to avoid spurious errors related to flat insts
     * and their address segment resolution.
     */
-    if (execMask().none() && ii->isFlat()) {
+    if (execMask().none() && ii->needsToken()) {
        computeUnit->getTokenManager()->recvTokens(1);
        return;
    }