gpu-compute: enable flexible control of kernel boundary syncs

Kernel end release was turned on for VIPER protocol, which is in fact write-through based and thus no need to have release operation. This changeset splits the option 'impl_kern_boundary_sync' into 'impl_kern_launch_acq' and 'impl_kern_end_rel', and turns off release on VIPER. Change-Id: I5490019b6765a25bd801cc78fb7445b90eb02a3d Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/29917 Reviewed-by: Anthony Gutierrez <anthony.gutierrez@amd.com> Reviewed-by: Xianwei Zhang <xianwei.zhang@amd.com> Maintainer: Anthony Gutierrez <anthony.gutierrez@amd.com> Tested-by: kokoro <noreply+kokoro@google.com>
2018-06-18 13:50:11 -04:00
parent eb9efdaa44
commit 2c1e9c4e81
5 changed files with 20 additions and 11 deletions
--- a/src/arch/gcn3/insts/instructions.cc
+++ b/src/arch/gcn3/insts/instructions.cc
@@ -3759,9 +3759,13 @@ namespace Gcn3ISA
            // the last workgroup in the kernel).
            bool kernelEnd =
                wf->computeUnit->shader->dispatcher().isReachingKernelEnd(wf);
+            // further check whether 'release @ kernel end' is needed
+            bool relNeeded =
+                wf->computeUnit->shader->impl_kern_end_rel;

-            // if it is not a kernel end, then retire the workgroup directly
-            if (!kernelEnd) {
+            // if not a kernel end or no release needed, retire the workgroup
+            // directly
+            if (!kernelEnd || !relNeeded) {
                wf->computeUnit->shader->dispatcher().notifyWgCompl(wf);
                wf->setStatus(Wavefront::S_STOPPED);
                wf->computeUnit->completedWGs++;
@@ -3770,8 +3774,8 @@ namespace Gcn3ISA
            }

            /**
-             * If it is a kernel end, inject a memory sync and retire the
-             * workgroup after receving response.
+             * If a kernel end and release needed, inject a memory sync and
+             * retire the workgroup after receving all acks.
             */
            setFlag(MemSync);
            setFlag(GlobalSegment);