diff --git a/src/gpu-compute/fetch_unit.cc b/src/gpu-compute/fetch_unit.cc
index 66abfe1fb7..3fe181b519 100644
--- a/src/gpu-compute/fetch_unit.cc
+++ b/src/gpu-compute/fetch_unit.cc
@@ -493,8 +493,13 @@ FetchUnit::FetchBufDesc::reserveBuf(Addr vaddr)
 void
 FetchUnit::FetchBufDesc::fetchDone(Addr vaddr)
 {
+    // If the return vaddr is 0, then it belongs to an SQC invalidation
+    // request. This request calls incLGKMInstsIssued() function in its
+    // execution path. Since there is no valid memory return response
+    // associated with this instruction, decLGKMInstsIssued() is not
+    // executed. Do this here to decrement the counter and invalidate
+    // all buffers
     if (vaddr == 0) {
-        // S_ICACHE_INV fetch done
         wavefront->decLGKMInstsIssued();
         invBuf();
         return;
diff --git a/src/gpu-compute/scalar_memory_pipeline.cc b/src/gpu-compute/scalar_memory_pipeline.cc
index 767e4e05a7..54819e7d3f 100644
--- a/src/gpu-compute/scalar_memory_pipeline.cc
+++ b/src/gpu-compute/scalar_memory_pipeline.cc
@@ -174,10 +174,17 @@ ScalarMemPipeline::injectScalarMemFence(GPUDynInstPtr gpuDynInst,
         req->requestorId(computeUnit.requestorId());
     }
 
+    // When the SQC invalidate instruction is executed, it calls
+    // injectScalarMemFence. The instruction does not contain an address
+    // as one of its operands. Therefore, set the physical address of the
+    // invalidation request to 0 and handle it in the sequencer
     req->setPaddr(0);
 
     PacketPtr pkt = nullptr;
 
+    // If kernelMemSync is true, then the invalidation request is from
+    // kernel launch and is an implicit invalidation.If false, then it is
+    // due to an S_ICACHE_INV instruction
     if (kernelMemSync) {
         req->setCacheCoherenceFlags(Request::INV_L1);
         req->setReqInstSeqNum(gpuDynInst->seqNum());
@@ -186,12 +193,6 @@ ScalarMemPipeline::injectScalarMemFence(GPUDynInstPtr gpuDynInst,
         pkt->pushSenderState(
                 new ComputeUnit::SQCPort::SenderState(
                     gpuDynInst->wavefront(), nullptr));
-        ComputeUnit::SQCPort::MemReqEvent *sqc_event =
-                new ComputeUnit::SQCPort::MemReqEvent
-                (computeUnit.sqcPort, pkt);
-
-        computeUnit.schedule(
-                sqc_event, curTick() + computeUnit.scalar_req_tick_latency);
     } else {
         gpuDynInst->setRequestFlags(req);
 
@@ -201,14 +202,13 @@ ScalarMemPipeline::injectScalarMemFence(GPUDynInstPtr gpuDynInst,
         pkt->pushSenderState(
                 new ComputeUnit::SQCPort::SenderState(
                     gpuDynInst->wavefront(), nullptr));
-
-        ComputeUnit::SQCPort::MemReqEvent *sqc_event =
-                new ComputeUnit::SQCPort::MemReqEvent
-                (computeUnit.sqcPort, pkt);
-
-        computeUnit.schedule(
-                sqc_event, curTick() + computeUnit.scalar_req_tick_latency);
     }
+
+    ComputeUnit::SQCPort::MemReqEvent *sqc_event =
+            new ComputeUnit::SQCPort::MemReqEvent
+            (computeUnit.sqcPort, pkt);
+    computeUnit.schedule(
+            sqc_event, curTick() + computeUnit.scalar_req_tick_latency);
 }
 
 } // namespace gem5
diff --git a/src/mem/ruby/system/Sequencer.cc b/src/mem/ruby/system/Sequencer.cc
index 0a37c64adf..4fef7090b6 100644
--- a/src/mem/ruby/system/Sequencer.cc
+++ b/src/mem/ruby/system/Sequencer.cc
@@ -350,6 +350,11 @@ Sequencer::insertRequest(PacketPtr pkt, RubyRequestType primary_type,
         return RequestStatus_Ready;
     }
 
+    // If command is MemSyncReq, it is used to invalidate the cache.
+    // As the cache invalidation requests are already issued in invL1(),
+    // there is no need to create a new request for the same here.
+    // Instead, return RequestStatus_Aliased, and make the sequencer skip
+    // an extra issueRequest
     if (pkt->cmd == MemCmd::MemSyncReq) {
         return RequestStatus_Aliased;
     }