gpu-compute, mem-ruby: Add comments and reformat code

Change-Id: Id2b3886dce347fdcfcad22009a42b92febc00a6c
2024-02-06 15:32:06 -06:00
parent 7dae25e881
commit 690b2b9462
3 changed files with 24 additions and 14 deletions
--- a/src/gpu-compute/fetch_unit.cc
+++ b/src/gpu-compute/fetch_unit.cc
@@ -493,8 +493,13 @@ FetchUnit::FetchBufDesc::reserveBuf(Addr vaddr)
 void
 FetchUnit::FetchBufDesc::fetchDone(Addr vaddr)
 {
+    // If the return vaddr is 0, then it belongs to an SQC invalidation
+    // request. This request calls incLGKMInstsIssued() function in its
+    // execution path. Since there is no valid memory return response
+    // associated with this instruction, decLGKMInstsIssued() is not
+    // executed. Do this here to decrement the counter and invalidate
+    // all buffers
    if (vaddr == 0) {
-        // S_ICACHE_INV fetch done
        wavefront->decLGKMInstsIssued();
        invBuf();
        return;
--- a/src/gpu-compute/scalar_memory_pipeline.cc
+++ b/src/gpu-compute/scalar_memory_pipeline.cc
@@ -174,10 +174,17 @@ ScalarMemPipeline::injectScalarMemFence(GPUDynInstPtr gpuDynInst,
        req->requestorId(computeUnit.requestorId());
    }

+    // When the SQC invalidate instruction is executed, it calls
+    // injectScalarMemFence. The instruction does not contain an address
+    // as one of its operands. Therefore, set the physical address of the
+    // invalidation request to 0 and handle it in the sequencer
    req->setPaddr(0);

    PacketPtr pkt = nullptr;

+    // If kernelMemSync is true, then the invalidation request is from
+    // kernel launch and is an implicit invalidation.If false, then it is
+    // due to an S_ICACHE_INV instruction
    if (kernelMemSync) {
        req->setCacheCoherenceFlags(Request::INV_L1);
        req->setReqInstSeqNum(gpuDynInst->seqNum());
@@ -186,12 +193,6 @@ ScalarMemPipeline::injectScalarMemFence(GPUDynInstPtr gpuDynInst,
        pkt->pushSenderState(
                new ComputeUnit::SQCPort::SenderState(
                    gpuDynInst->wavefront(), nullptr));
-        ComputeUnit::SQCPort::MemReqEvent *sqc_event =
-                new ComputeUnit::SQCPort::MemReqEvent
-                (computeUnit.sqcPort, pkt);
-
-        computeUnit.schedule(
-                sqc_event, curTick() + computeUnit.scalar_req_tick_latency);
    } else {
        gpuDynInst->setRequestFlags(req);

@@ -201,14 +202,13 @@ ScalarMemPipeline::injectScalarMemFence(GPUDynInstPtr gpuDynInst,
        pkt->pushSenderState(
                new ComputeUnit::SQCPort::SenderState(
                    gpuDynInst->wavefront(), nullptr));
-
-        ComputeUnit::SQCPort::MemReqEvent *sqc_event =
-                new ComputeUnit::SQCPort::MemReqEvent
-                (computeUnit.sqcPort, pkt);
-
-        computeUnit.schedule(
-                sqc_event, curTick() + computeUnit.scalar_req_tick_latency);
    }
+
+    ComputeUnit::SQCPort::MemReqEvent *sqc_event =
+            new ComputeUnit::SQCPort::MemReqEvent
+            (computeUnit.sqcPort, pkt);
+    computeUnit.schedule(
+            sqc_event, curTick() + computeUnit.scalar_req_tick_latency);
 }

 } // namespace gem5
--- a/src/mem/ruby/system/Sequencer.cc
+++ b/src/mem/ruby/system/Sequencer.cc
@@ -350,6 +350,11 @@ Sequencer::insertRequest(PacketPtr pkt, RubyRequestType primary_type,
        return RequestStatus_Ready;
    }

+    // If command is MemSyncReq, it is used to invalidate the cache.
+    // As the cache invalidation requests are already issued in invL1(),
+    // there is no need to create a new request for the same here.
+    // Instead, return RequestStatus_Aliased, and make the sequencer skip
+    // an extra issueRequest
    if (pkt->cmd == MemCmd::MemSyncReq) {
        return RequestStatus_Aliased;
    }