gpu-compute: Change kernel-based exit location (#184)

The previous exit event occurs when the dispatcher sends a completion signal for a kernel, but gem5 does some kernel-based stats updates after the signal is sent. Therefore, if these exit events are used as a way to dump per-kernel stats, some of the stats for the kernel that just ended will be in the next kernel's stat dump which is misleading. This patch moves the exit event to where the stats are updated and only exits if the dispatcher has requested a stat dump to prevent situations where stats are updated mid-kernel. Change-Id: I74dc1cad5fc90382a2a80564764b3e7c9fb65521
2023-08-16 07:38:12 -07:00
parent f6d44ac7b3 df4739929d
commit bc9bbc10f0
3 changed files with 18 additions and 2 deletions
--- a/src/gpu-compute/dispatcher.cc
+++ b/src/gpu-compute/dispatcher.cc
@@ -334,7 +334,7 @@ GPUDispatcher::notifyWgCompl(Wavefront *wf)
        DPRINTF(GPUKernelInfo, "Completed kernel %d\n", kern_id);

        if (kernelExitEvents) {
-            exitSimLoop("GPU Kernel Completed");
+            shader->requestKernelExitEvent();
        }
    }

--- a/src/gpu-compute/shader.cc
+++ b/src/gpu-compute/shader.cc
@@ -519,8 +519,14 @@ Shader::notifyCuSleep() {
    panic_if(_activeCus <= 0 || _activeCus > cuList.size(),
             "Invalid activeCu size\n");
    _activeCus--;
-    if (!_activeCus)
+    if (!_activeCus) {
        stats.shaderActiveTicks += curTick() - _lastInactiveTick;
+
+        if (kernelExitRequested) {
+            kernelExitRequested = false;
+            exitSimLoop("GPU Kernel Completed");
+        }
+    }
 }

 /**
--- a/src/gpu-compute/shader.hh
+++ b/src/gpu-compute/shader.hh
@@ -97,6 +97,10 @@ class Shader : public ClockedObject
    // Last tick that all CUs attached to this shader were inactive
    Tick _lastInactiveTick;

+    // If a kernel-based exit event was requested, wait for all CUs in the
+    // shader to complete before actually exiting so that stats are updated.
+    bool kernelExitRequested = false;
+
  public:
    typedef ShaderParams Params;
    enum hsail_mode_e {SIMT,VECTOR_SCALAR};
@@ -314,6 +318,12 @@ class Shader : public ClockedObject
        stats.vectorInstDstOperand[num_operands]++;
    }

+    void
+    requestKernelExitEvent()
+    {
+        kernelExitRequested = true;
+    }
+
  protected:
    struct ShaderStats : public statistics::Group
    {