gpu-compute: Add Icache invalidation at kernel start

Previously, the data caches were invalidated at the start of each kernel. This commit adds support for invalidating instruction cache at kernel launch time Change-Id: I32e50f63fa1442c2514d4dd8f9d7689759f503d3
2024-01-30 14:45:12 -06:00
parent 03838afce0
commit 440409d807
3 changed files with 36 additions and 3 deletions
--- a/src/gpu-compute/compute_unit.cc
+++ b/src/gpu-compute/compute_unit.cc
@@ -397,9 +397,9 @@ ComputeUnit::startWavefront(Wavefront *w, int waveId, LdsChunk *ldsChunk,
 }

 /**
- * trigger invalidate operation in the cu
+ * trigger invalidate operation in the CU
 *
- * req: request initialized in shader, carrying the invlidate flags
+ * req: request initialized in shader, carrying the invalidate flags
 */
 void
 ComputeUnit::doInvalidate(RequestPtr req, int kernId){
@@ -425,6 +425,26 @@ ComputeUnit::doFlush(GPUDynInstPtr gpuDynInst) {
    injectGlobalMemFence(gpuDynInst, true);
 }

+/**
+ * trigger SQCinvalidate operation in the CU
+ *
+ * req: request initialized in shader, carrying the invalidate flags
+ */
+void
+ComputeUnit::doSQCInvalidate(RequestPtr req, int kernId){
+    GPUDynInstPtr gpuDynInst
+        = std::make_shared<GPUDynInst>(this, nullptr,
+            new KernelLaunchStaticInst(), getAndIncSeqNum());
+
+    // kern_id will be used in inv responses
+    gpuDynInst->kern_id = kernId;
+    // update contextId field
+    req->setContext(gpuDynInst->wfDynId);
+
+    gpuDynInst->staticInstruction()->setFlag(GPUStaticInst::Scalar);
+    scalarMemoryPipe.injectScalarMemFence(gpuDynInst, true, req);
+}
+
 // reseting SIMD register pools
 // I couldn't think of any other place and
 // I think it is needed in my implementation
@@ -1012,7 +1032,14 @@ ComputeUnit::DataPort::recvReqRetry()
 bool
 ComputeUnit::SQCPort::recvTimingResp(PacketPtr pkt)
 {
-    computeUnit->handleSQCReturn(pkt);
+    SenderState *sender_state = safe_cast<SenderState*>(pkt->senderState);
+    /** Process the response only if there is a wavefront associated with it.
+     * Otherwise, it is from SQC invalidate that was issued at kernel start
+     * and doesn't have a wavefront or instruction associated with it.
+     */
+    if (sender_state->wavefront != nullptr) {
+        computeUnit->handleSQCReturn(pkt);
+    }

    return true;
 }
--- a/src/gpu-compute/compute_unit.hh
+++ b/src/gpu-compute/compute_unit.hh
@@ -412,6 +412,7 @@ class ComputeUnit : public ClockedObject

    void doInvalidate(RequestPtr req, int kernId);
    void doFlush(GPUDynInstPtr gpuDynInst);
+    void doSQCInvalidate(RequestPtr req, int kernId);

    void dispWorkgroup(HSAQueueEntry *task, int num_wfs_in_wg);
    bool hasDispResources(HSAQueueEntry *task, int &num_wfs_in_wg);
--- a/src/gpu-compute/shader.cc
+++ b/src/gpu-compute/shader.cc
@@ -221,6 +221,11 @@ Shader::prepareInvalidate(HSAQueueEntry *task) {
        // all necessary INV flags are all set now, call cu to execute
        cuList[i_cu]->doInvalidate(req, task->dispatchId());

+
+        if ((i_cu % 4) == 0) {
+            cuList[i_cu]->doSQCInvalidate(req, task->dispatchId());
+        }
+
        // I don't like this. This is intrusive coding.
        cuList[i_cu]->resetRegisterPool();
    }