From 84fedecafef8529bc48361c8a86445b5cf66d72c Mon Sep 17 00:00:00 2001
From: Matthew Poremba <matthew.poremba@amd.com>
Date: Wed, 7 Aug 2024 12:52:48 -0700
Subject: [PATCH] gpu-compute: Update Requests for invalidations

The SQC and TCC invalidations share a Request pointer which they both
modify. This can cause some problems, so use a different request pointer
for each invalidate. The setContext call is also removed as the value
being assigned to it is uninitialized.

Change-Id: I82ea7aa44a4f4515c1560993caa26cc6a89355af
---
 src/gpu-compute/compute_unit.cc |  4 ----
 src/gpu-compute/shader.cc       | 14 +++++++++-----
 2 files changed, 9 insertions(+), 9 deletions(-)
diff --git a/src/gpu-compute/compute_unit.cc b/src/gpu-compute/compute_unit.cc
index 807fd21d4d..598864f9f2 100644
--- a/src/gpu-compute/compute_unit.cc
+++ b/src/gpu-compute/compute_unit.cc
@@ -409,8 +409,6 @@ ComputeUnit::doInvalidate(RequestPtr req, int kernId){
 
     // kern_id will be used in inv responses
     gpuDynInst->kern_id = kernId;
-    // update contextId field
-    req->setContext(gpuDynInst->wfDynId);
 
     injectGlobalMemFence(gpuDynInst, true, req);
 }
@@ -438,8 +436,6 @@ ComputeUnit::doSQCInvalidate(RequestPtr req, int kernId){
 
     // kern_id will be used in inv responses
     gpuDynInst->kern_id = kernId;
-    // update contextId field
-    req->setContext(gpuDynInst->wfDynId);
 
     gpuDynInst->staticInstruction()->setFlag(GPUStaticInst::Scalar);
     scalarMemoryPipe.injectScalarMemFence(gpuDynInst, true, req);
diff --git a/src/gpu-compute/shader.cc b/src/gpu-compute/shader.cc
index 13b03b0a34..b7108efdf9 100644
--- a/src/gpu-compute/shader.cc
+++ b/src/gpu-compute/shader.cc
@@ -214,19 +214,23 @@ Shader::prepareInvalidate(HSAQueueEntry *task) {
     for (int i_cu = 0; i_cu < n_cu; ++i_cu) {
         // create a request to hold INV info; the request's fields will
         // be updated in cu before use
-        auto req = std::make_shared<Request>(0, 0, 0,
-                                             cuList[i_cu]->requestorId(),
-                                             0, -1);
+        auto tcc_req = std::make_shared<Request>(0, 0, 0,
+                                                 cuList[i_cu]->requestorId(),
+                                                 0, -1);
 
         _dispatcher.updateInvCounter(kernId, +1);
         // all necessary INV flags are all set now, call cu to execute
-        cuList[i_cu]->doInvalidate(req, task->dispatchId());
+        cuList[i_cu]->doInvalidate(tcc_req, task->dispatchId());
 
 
         // A set of CUs share a single SQC cache. Send a single invalidate
         // request to each SQC
+        auto sqc_req = std::make_shared<Request>(0, 0, 0,
+                                                 cuList[i_cu]->requestorId(),
+                                                 0, -1);
+
         if ((i_cu % n_cu_per_sqc) == 0) {
-            cuList[i_cu]->doSQCInvalidate(req, task->dispatchId());
+            cuList[i_cu]->doSQCInvalidate(sqc_req, task->dispatchId());
         }
 
         // I don't like this. This is intrusive coding.