From efa1d87addd9caff066a12cc0472164f1c5dd7f5 Mon Sep 17 00:00:00 2001 From: KaiBatley <68886332+KaiBatley@users.noreply.github.com> Date: Mon, 17 Jul 2023 12:42:40 -0500 Subject: [PATCH] configs: fix GPU's default number of HW barrier/CU (#92) AMD GCN3 and Vega GPUs assume a max of 16 WG/CU. Any GPU WG with more than 1 WF requires a hardware barrier to allow WFs in the WG to synchronize locally. However, currently the default gem5 GPU configuration assumes only 4 barriers per CU, which artificially prevents applications with > 4 WG/CU that could run simultaneously from running simultaneously. This fix resolves this by updating the default number of hardware barriers per CU to 16, which mimics the support described in slide 39 here: https://www.olcf.ornl.gov/wp-content/uploads/2019/10/ ORNL_Application_Readiness_Workshop-AMD_GPU_Basics.pdf Change-Id: Ib7636a13359d998e676c1790f436a83ce88cbfc0 --- src/gpu-compute/GPU.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gpu-compute/GPU.py b/src/gpu-compute/GPU.py index d2b9e98362..1b6c6a7494 100644 --- a/src/gpu-compute/GPU.py +++ b/src/gpu-compute/GPU.py @@ -131,7 +131,7 @@ class ComputeUnit(ClockedObject): # Wavefront size is 64. This is configurable, however changing # this value to anything other than 64 will likely cause errors. wf_size = Param.Int(64, "Wavefront size (in work items)") - num_barrier_slots = Param.Int(4, "Number of barrier slots in a CU") + num_barrier_slots = Param.Int(16, "Number of barrier slots in a CU") num_SIMDs = Param.Int(4, "number of SIMD units per CU") num_scalar_cores = Param.Int(1, "number of Scalar cores per CU") num_scalar_mem_pipes = Param.Int(