From 440409d80708fb7ace1b3749ce478fb6b5ee68d6 Mon Sep 17 00:00:00 2001 From: Vishnu Ramadas Date: Tue, 30 Jan 2024 14:45:12 -0600 Subject: [PATCH] gpu-compute: Add Icache invalidation at kernel start Previously, the data caches were invalidated at the start of each kernel. This commit adds support for invalidating instruction cache at kernel launch time Change-Id: I32e50f63fa1442c2514d4dd8f9d7689759f503d3 --- src/gpu-compute/compute_unit.cc | 33 ++++++++++++++++++++++++++++++--- src/gpu-compute/compute_unit.hh | 1 + src/gpu-compute/shader.cc | 5 +++++ 3 files changed, 36 insertions(+), 3 deletions(-) diff --git a/src/gpu-compute/compute_unit.cc b/src/gpu-compute/compute_unit.cc index f28a8e39c7..ba4c14c4f0 100644 --- a/src/gpu-compute/compute_unit.cc +++ b/src/gpu-compute/compute_unit.cc @@ -397,9 +397,9 @@ ComputeUnit::startWavefront(Wavefront *w, int waveId, LdsChunk *ldsChunk, } /** - * trigger invalidate operation in the cu + * trigger invalidate operation in the CU * - * req: request initialized in shader, carrying the invlidate flags + * req: request initialized in shader, carrying the invalidate flags */ void ComputeUnit::doInvalidate(RequestPtr req, int kernId){ @@ -425,6 +425,26 @@ ComputeUnit::doFlush(GPUDynInstPtr gpuDynInst) { injectGlobalMemFence(gpuDynInst, true); } +/** + * trigger SQCinvalidate operation in the CU + * + * req: request initialized in shader, carrying the invalidate flags + */ +void +ComputeUnit::doSQCInvalidate(RequestPtr req, int kernId){ + GPUDynInstPtr gpuDynInst + = std::make_shared(this, nullptr, + new KernelLaunchStaticInst(), getAndIncSeqNum()); + + // kern_id will be used in inv responses + gpuDynInst->kern_id = kernId; + // update contextId field + req->setContext(gpuDynInst->wfDynId); + + gpuDynInst->staticInstruction()->setFlag(GPUStaticInst::Scalar); + scalarMemoryPipe.injectScalarMemFence(gpuDynInst, true, req); +} + // reseting SIMD register pools // I couldn't think of any other place and // I think it is needed in my implementation @@ -1012,7 +1032,14 @@ ComputeUnit::DataPort::recvReqRetry() bool ComputeUnit::SQCPort::recvTimingResp(PacketPtr pkt) { - computeUnit->handleSQCReturn(pkt); + SenderState *sender_state = safe_cast(pkt->senderState); + /** Process the response only if there is a wavefront associated with it. + * Otherwise, it is from SQC invalidate that was issued at kernel start + * and doesn't have a wavefront or instruction associated with it. + */ + if (sender_state->wavefront != nullptr) { + computeUnit->handleSQCReturn(pkt); + } return true; } diff --git a/src/gpu-compute/compute_unit.hh b/src/gpu-compute/compute_unit.hh index 24324bb515..7e3f05d070 100644 --- a/src/gpu-compute/compute_unit.hh +++ b/src/gpu-compute/compute_unit.hh @@ -412,6 +412,7 @@ class ComputeUnit : public ClockedObject void doInvalidate(RequestPtr req, int kernId); void doFlush(GPUDynInstPtr gpuDynInst); + void doSQCInvalidate(RequestPtr req, int kernId); void dispWorkgroup(HSAQueueEntry *task, int num_wfs_in_wg); bool hasDispResources(HSAQueueEntry *task, int &num_wfs_in_wg); diff --git a/src/gpu-compute/shader.cc b/src/gpu-compute/shader.cc index e13e7c9cf4..a83b413cf9 100644 --- a/src/gpu-compute/shader.cc +++ b/src/gpu-compute/shader.cc @@ -221,6 +221,11 @@ Shader::prepareInvalidate(HSAQueueEntry *task) { // all necessary INV flags are all set now, call cu to execute cuList[i_cu]->doInvalidate(req, task->dispatchId()); + + if ((i_cu % 4) == 0) { + cuList[i_cu]->doSQCInvalidate(req, task->dispatchId()); + } + // I don't like this. This is intrusive coding. cuList[i_cu]->resetRegisterPool(); }