diff --git a/src/gpu-compute/fetch_unit.cc b/src/gpu-compute/fetch_unit.cc index 66abfe1fb7..3fe181b519 100644 --- a/src/gpu-compute/fetch_unit.cc +++ b/src/gpu-compute/fetch_unit.cc @@ -493,8 +493,13 @@ FetchUnit::FetchBufDesc::reserveBuf(Addr vaddr) void FetchUnit::FetchBufDesc::fetchDone(Addr vaddr) { + // If the return vaddr is 0, then it belongs to an SQC invalidation + // request. This request calls incLGKMInstsIssued() function in its + // execution path. Since there is no valid memory return response + // associated with this instruction, decLGKMInstsIssued() is not + // executed. Do this here to decrement the counter and invalidate + // all buffers if (vaddr == 0) { - // S_ICACHE_INV fetch done wavefront->decLGKMInstsIssued(); invBuf(); return; diff --git a/src/gpu-compute/scalar_memory_pipeline.cc b/src/gpu-compute/scalar_memory_pipeline.cc index 767e4e05a7..54819e7d3f 100644 --- a/src/gpu-compute/scalar_memory_pipeline.cc +++ b/src/gpu-compute/scalar_memory_pipeline.cc @@ -174,10 +174,17 @@ ScalarMemPipeline::injectScalarMemFence(GPUDynInstPtr gpuDynInst, req->requestorId(computeUnit.requestorId()); } + // When the SQC invalidate instruction is executed, it calls + // injectScalarMemFence. The instruction does not contain an address + // as one of its operands. Therefore, set the physical address of the + // invalidation request to 0 and handle it in the sequencer req->setPaddr(0); PacketPtr pkt = nullptr; + // If kernelMemSync is true, then the invalidation request is from + // kernel launch and is an implicit invalidation.If false, then it is + // due to an S_ICACHE_INV instruction if (kernelMemSync) { req->setCacheCoherenceFlags(Request::INV_L1); req->setReqInstSeqNum(gpuDynInst->seqNum()); @@ -186,12 +193,6 @@ ScalarMemPipeline::injectScalarMemFence(GPUDynInstPtr gpuDynInst, pkt->pushSenderState( new ComputeUnit::SQCPort::SenderState( gpuDynInst->wavefront(), nullptr)); - ComputeUnit::SQCPort::MemReqEvent *sqc_event = - new ComputeUnit::SQCPort::MemReqEvent - (computeUnit.sqcPort, pkt); - - computeUnit.schedule( - sqc_event, curTick() + computeUnit.scalar_req_tick_latency); } else { gpuDynInst->setRequestFlags(req); @@ -201,14 +202,13 @@ ScalarMemPipeline::injectScalarMemFence(GPUDynInstPtr gpuDynInst, pkt->pushSenderState( new ComputeUnit::SQCPort::SenderState( gpuDynInst->wavefront(), nullptr)); - - ComputeUnit::SQCPort::MemReqEvent *sqc_event = - new ComputeUnit::SQCPort::MemReqEvent - (computeUnit.sqcPort, pkt); - - computeUnit.schedule( - sqc_event, curTick() + computeUnit.scalar_req_tick_latency); } + + ComputeUnit::SQCPort::MemReqEvent *sqc_event = + new ComputeUnit::SQCPort::MemReqEvent + (computeUnit.sqcPort, pkt); + computeUnit.schedule( + sqc_event, curTick() + computeUnit.scalar_req_tick_latency); } } // namespace gem5 diff --git a/src/mem/ruby/system/Sequencer.cc b/src/mem/ruby/system/Sequencer.cc index 0a37c64adf..4fef7090b6 100644 --- a/src/mem/ruby/system/Sequencer.cc +++ b/src/mem/ruby/system/Sequencer.cc @@ -350,6 +350,11 @@ Sequencer::insertRequest(PacketPtr pkt, RubyRequestType primary_type, return RequestStatus_Ready; } + // If command is MemSyncReq, it is used to invalidate the cache. + // As the cache invalidation requests are already issued in invL1(), + // there is no need to create a new request for the same here. + // Instead, return RequestStatus_Aliased, and make the sequencer skip + // an extra issueRequest if (pkt->cmd == MemCmd::MemSyncReq) { return RequestStatus_Aliased; }