diff --git a/src/gpu-compute/dispatcher.cc b/src/gpu-compute/dispatcher.cc index 7b36bce591..babc938489 100644 --- a/src/gpu-compute/dispatcher.cc +++ b/src/gpu-compute/dispatcher.cc @@ -334,7 +334,7 @@ GPUDispatcher::notifyWgCompl(Wavefront *wf) DPRINTF(GPUKernelInfo, "Completed kernel %d\n", kern_id); if (kernelExitEvents) { - exitSimLoop("GPU Kernel Completed"); + shader->requestKernelExitEvent(); } } diff --git a/src/gpu-compute/shader.cc b/src/gpu-compute/shader.cc index 73d2366b74..620d0152c1 100644 --- a/src/gpu-compute/shader.cc +++ b/src/gpu-compute/shader.cc @@ -519,8 +519,14 @@ Shader::notifyCuSleep() { panic_if(_activeCus <= 0 || _activeCus > cuList.size(), "Invalid activeCu size\n"); _activeCus--; - if (!_activeCus) + if (!_activeCus) { stats.shaderActiveTicks += curTick() - _lastInactiveTick; + + if (kernelExitRequested) { + kernelExitRequested = false; + exitSimLoop("GPU Kernel Completed"); + } + } } /** diff --git a/src/gpu-compute/shader.hh b/src/gpu-compute/shader.hh index 08dfd24b76..32ddf3d15b 100644 --- a/src/gpu-compute/shader.hh +++ b/src/gpu-compute/shader.hh @@ -97,6 +97,10 @@ class Shader : public ClockedObject // Last tick that all CUs attached to this shader were inactive Tick _lastInactiveTick; + // If a kernel-based exit event was requested, wait for all CUs in the + // shader to complete before actually exiting so that stats are updated. + bool kernelExitRequested = false; + public: typedef ShaderParams Params; enum hsail_mode_e {SIMT,VECTOR_SCALAR}; @@ -314,6 +318,12 @@ class Shader : public ClockedObject stats.vectorInstDstOperand[num_operands]++; } + void + requestKernelExitEvent() + { + kernelExitRequested = true; + } + protected: struct ShaderStats : public statistics::Group {