From 84fedecafef8529bc48361c8a86445b5cf66d72c Mon Sep 17 00:00:00 2001 From: Matthew Poremba Date: Wed, 7 Aug 2024 12:52:48 -0700 Subject: [PATCH] gpu-compute: Update Requests for invalidations The SQC and TCC invalidations share a Request pointer which they both modify. This can cause some problems, so use a different request pointer for each invalidate. The setContext call is also removed as the value being assigned to it is uninitialized. Change-Id: I82ea7aa44a4f4515c1560993caa26cc6a89355af --- src/gpu-compute/compute_unit.cc | 4 ---- src/gpu-compute/shader.cc | 14 +++++++++----- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/gpu-compute/compute_unit.cc b/src/gpu-compute/compute_unit.cc index 807fd21d4d..598864f9f2 100644 --- a/src/gpu-compute/compute_unit.cc +++ b/src/gpu-compute/compute_unit.cc @@ -409,8 +409,6 @@ ComputeUnit::doInvalidate(RequestPtr req, int kernId){ // kern_id will be used in inv responses gpuDynInst->kern_id = kernId; - // update contextId field - req->setContext(gpuDynInst->wfDynId); injectGlobalMemFence(gpuDynInst, true, req); } @@ -438,8 +436,6 @@ ComputeUnit::doSQCInvalidate(RequestPtr req, int kernId){ // kern_id will be used in inv responses gpuDynInst->kern_id = kernId; - // update contextId field - req->setContext(gpuDynInst->wfDynId); gpuDynInst->staticInstruction()->setFlag(GPUStaticInst::Scalar); scalarMemoryPipe.injectScalarMemFence(gpuDynInst, true, req); diff --git a/src/gpu-compute/shader.cc b/src/gpu-compute/shader.cc index 13b03b0a34..b7108efdf9 100644 --- a/src/gpu-compute/shader.cc +++ b/src/gpu-compute/shader.cc @@ -214,19 +214,23 @@ Shader::prepareInvalidate(HSAQueueEntry *task) { for (int i_cu = 0; i_cu < n_cu; ++i_cu) { // create a request to hold INV info; the request's fields will // be updated in cu before use - auto req = std::make_shared(0, 0, 0, - cuList[i_cu]->requestorId(), - 0, -1); + auto tcc_req = std::make_shared(0, 0, 0, + cuList[i_cu]->requestorId(), + 0, -1); _dispatcher.updateInvCounter(kernId, +1); // all necessary INV flags are all set now, call cu to execute - cuList[i_cu]->doInvalidate(req, task->dispatchId()); + cuList[i_cu]->doInvalidate(tcc_req, task->dispatchId()); // A set of CUs share a single SQC cache. Send a single invalidate // request to each SQC + auto sqc_req = std::make_shared(0, 0, 0, + cuList[i_cu]->requestorId(), + 0, -1); + if ((i_cu % n_cu_per_sqc) == 0) { - cuList[i_cu]->doSQCInvalidate(req, task->dispatchId()); + cuList[i_cu]->doSQCInvalidate(sqc_req, task->dispatchId()); } // I don't like this. This is intrusive coding.