diff --git a/src/gpu-compute/global_memory_pipeline.cc b/src/gpu-compute/global_memory_pipeline.cc index bcd93f8862..a2b24e4b47 100644 --- a/src/gpu-compute/global_memory_pipeline.cc +++ b/src/gpu-compute/global_memory_pipeline.cc @@ -130,6 +130,9 @@ GlobalMemPipeline::exec() DPRINTF(GPUMem, "CU%d: WF[%d][%d]: Completing global mem instr %s\n", m->cu_id, m->simdId, m->wfSlotId, m->disassemble()); m->completeAcc(m); + if (m->isFlat() && m->isLoad()) { + w->decLGKMInstsIssued(); + } w->decVMemInstsIssued(); if (m->isLoad() || m->isAtomicRet()) { @@ -193,6 +196,10 @@ GlobalMemPipeline::exec() mp->disassemble(), mp->seqNum()); mp->initiateAcc(mp); + if (mp->isFlat() && mp->isStore()) { + mp->wavefront()->decLGKMInstsIssued(); + } + if (mp->isStore() && mp->isGlobalSeg()) { mp->wavefront()->decExpInstsIssued(); } diff --git a/src/gpu-compute/gpu_dyn_inst.cc b/src/gpu-compute/gpu_dyn_inst.cc index 03ed689519..38e4ecf492 100644 --- a/src/gpu-compute/gpu_dyn_inst.cc +++ b/src/gpu-compute/gpu_dyn_inst.cc @@ -819,7 +819,6 @@ GPUDynInst::resolveFlatSegment(const VectorMask &mask) if (executedAs() == Enums::SC_GLOBAL) { // no transormation for global segment wavefront()->execUnitId = wavefront()->flatGmUnitId; - wavefront()->decLGKMInstsIssued(); if (isLoad()) { wavefront()->rdLmReqsInPipe--; } else if (isStore()) {