diff --git a/src/arch/amdgpu/vega/insts/instructions.cc b/src/arch/amdgpu/vega/insts/instructions.cc index b0f8c908ed..bb6a2233cd 100644 --- a/src/arch/amdgpu/vega/insts/instructions.cc +++ b/src/arch/amdgpu/vega/insts/instructions.cc @@ -43894,9 +43894,11 @@ namespace VegaISA { Wavefront *wf = gpuDynInst->wavefront(); - if (gpuDynInst->exec_mask.none() && isFlat()) { + if (gpuDynInst->exec_mask.none()) { wf->decVMemInstsIssued(); - wf->decLGKMInstsIssued(); + if (isFlat()) { + wf->decLGKMInstsIssued(); + } return; } @@ -43978,9 +43980,11 @@ namespace VegaISA { Wavefront *wf = gpuDynInst->wavefront(); - if (gpuDynInst->exec_mask.none() && isFlat()) { + if (gpuDynInst->exec_mask.none()) { wf->decVMemInstsIssued(); - wf->decLGKMInstsIssued(); + if (isFlat()) { + wf->decLGKMInstsIssued(); + } return; } @@ -44063,9 +44067,11 @@ namespace VegaISA { Wavefront *wf = gpuDynInst->wavefront(); - if (gpuDynInst->exec_mask.none() && isFlat()) { + if (gpuDynInst->exec_mask.none()) { wf->decVMemInstsIssued(); - wf->decLGKMInstsIssued(); + if (isFlat()) { + wf->decLGKMInstsIssued(); + } return; } @@ -44118,9 +44124,11 @@ namespace VegaISA { Wavefront *wf = gpuDynInst->wavefront(); - if (gpuDynInst->exec_mask.none() && isFlat()) { + if (gpuDynInst->exec_mask.none()) { wf->decVMemInstsIssued(); - wf->decLGKMInstsIssued(); + if (isFlat()) { + wf->decLGKMInstsIssued(); + } return; } @@ -44173,9 +44181,11 @@ namespace VegaISA { Wavefront *wf = gpuDynInst->wavefront(); - if (gpuDynInst->exec_mask.none() && isFlat()) { + if (gpuDynInst->exec_mask.none()) { wf->decVMemInstsIssued(); - wf->decLGKMInstsIssued(); + if (isFlat()) { + wf->decLGKMInstsIssued(); + } return; } @@ -44237,9 +44247,11 @@ namespace VegaISA { Wavefront *wf = gpuDynInst->wavefront(); - if (gpuDynInst->exec_mask.none() && isFlat()) { + if (gpuDynInst->exec_mask.none()) { wf->decVMemInstsIssued(); - wf->decLGKMInstsIssued(); + if (isFlat()) { + wf->decLGKMInstsIssued(); + } return; } @@ -44304,9 +44316,11 @@ namespace VegaISA { Wavefront *wf = gpuDynInst->wavefront(); - if (gpuDynInst->exec_mask.none() && isFlat()) { + if (gpuDynInst->exec_mask.none()) { wf->decVMemInstsIssued(); - wf->decLGKMInstsIssued(); + if (isFlat()) { + wf->decLGKMInstsIssued(); + } wf->decExpInstsIssued(); return; } @@ -44361,9 +44375,11 @@ namespace VegaISA { Wavefront *wf = gpuDynInst->wavefront(); - if (gpuDynInst->exec_mask.none() && isFlat()) { + if (gpuDynInst->exec_mask.none()) { wf->decVMemInstsIssued(); - wf->decLGKMInstsIssued(); + if (isFlat()) { + wf->decLGKMInstsIssued(); + } wf->decExpInstsIssued(); return; } @@ -44418,9 +44434,11 @@ namespace VegaISA { Wavefront *wf = gpuDynInst->wavefront(); - if (gpuDynInst->exec_mask.none() && isFlat()) { + if (gpuDynInst->exec_mask.none()) { wf->decVMemInstsIssued(); - wf->decLGKMInstsIssued(); + if (isFlat()) { + wf->decLGKMInstsIssued(); + } wf->decExpInstsIssued(); return; } @@ -44476,9 +44494,11 @@ namespace VegaISA { Wavefront *wf = gpuDynInst->wavefront(); - if (gpuDynInst->exec_mask.none() && isFlat()) { + if (gpuDynInst->exec_mask.none()) { wf->decVMemInstsIssued(); - wf->decLGKMInstsIssued(); + if (isFlat()) { + wf->decLGKMInstsIssued(); + } wf->decExpInstsIssued(); return; } @@ -44534,9 +44554,11 @@ namespace VegaISA { Wavefront *wf = gpuDynInst->wavefront(); - if (gpuDynInst->exec_mask.none() && isFlat()) { + if (gpuDynInst->exec_mask.none()) { wf->decVMemInstsIssued(); - wf->decLGKMInstsIssued(); + if (isFlat()) { + wf->decLGKMInstsIssued(); + } wf->decExpInstsIssued(); return; } @@ -44600,9 +44622,11 @@ namespace VegaISA { Wavefront *wf = gpuDynInst->wavefront(); - if (gpuDynInst->exec_mask.none() && isFlat()) { + if (gpuDynInst->exec_mask.none()) { wf->decVMemInstsIssued(); - wf->decLGKMInstsIssued(); + if (isFlat()) { + wf->decLGKMInstsIssued(); + } wf->decExpInstsIssued(); return; } @@ -44677,9 +44701,11 @@ namespace VegaISA { Wavefront *wf = gpuDynInst->wavefront(); - if (gpuDynInst->exec_mask.none() && isFlat()) { + if (gpuDynInst->exec_mask.none()) { wf->decVMemInstsIssued(); - wf->decLGKMInstsIssued(); + if (isFlat()) { + wf->decLGKMInstsIssued(); + } return; } @@ -44757,9 +44783,11 @@ namespace VegaISA { Wavefront *wf = gpuDynInst->wavefront(); - if (gpuDynInst->exec_mask.none() && isFlat()) { + if (gpuDynInst->exec_mask.none()) { wf->decVMemInstsIssued(); - wf->decLGKMInstsIssued(); + if (isFlat()) { + wf->decLGKMInstsIssued(); + } return; } @@ -44837,9 +44865,11 @@ namespace VegaISA { Wavefront *wf = gpuDynInst->wavefront(); - if (gpuDynInst->exec_mask.none() && isFlat()) { + if (gpuDynInst->exec_mask.none()) { wf->decVMemInstsIssued(); - wf->decLGKMInstsIssued(); + if (isFlat()) { + wf->decLGKMInstsIssued(); + } return; } @@ -45370,9 +45400,11 @@ namespace VegaISA { Wavefront *wf = gpuDynInst->wavefront(); - if (gpuDynInst->exec_mask.none() && isFlat()) { + if (gpuDynInst->exec_mask.none()) { wf->decVMemInstsIssued(); - wf->decLGKMInstsIssued(); + if (isFlat()) { + wf->decLGKMInstsIssued(); + } return; } @@ -45451,9 +45483,11 @@ namespace VegaISA { Wavefront *wf = gpuDynInst->wavefront(); - if (gpuDynInst->exec_mask.none() && isFlat()) { + if (gpuDynInst->exec_mask.none()) { wf->decVMemInstsIssued(); - wf->decLGKMInstsIssued(); + if (isFlat()) { + wf->decLGKMInstsIssued(); + } return; } diff --git a/src/gpu-compute/gpu_dyn_inst.cc b/src/gpu-compute/gpu_dyn_inst.cc index 0b394e7e36..c59317d2c4 100644 --- a/src/gpu-compute/gpu_dyn_inst.cc +++ b/src/gpu-compute/gpu_dyn_inst.cc @@ -582,6 +582,12 @@ GPUDynInst::readsFlatScratch() const return false; } +bool +GPUDynInst::needsToken() const +{ + return isGlobalMem() || isFlat() || isFlatGlobal() || isFlatScratch(); +} + bool GPUDynInst::isAtomicAnd() const { diff --git a/src/gpu-compute/gpu_dyn_inst.hh b/src/gpu-compute/gpu_dyn_inst.hh index 558cce8431..6551fa417a 100644 --- a/src/gpu-compute/gpu_dyn_inst.hh +++ b/src/gpu-compute/gpu_dyn_inst.hh @@ -257,6 +257,7 @@ class GPUDynInst : public GPUExecContext bool writesFlatScratch() const; bool readsExecMask() const; bool writesExecMask() const; + bool needsToken() const; bool isAtomicAnd() const; bool isAtomicOr() const; diff --git a/src/gpu-compute/schedule_stage.cc b/src/gpu-compute/schedule_stage.cc index 4c4028b152..0d475c577e 100644 --- a/src/gpu-compute/schedule_stage.cc +++ b/src/gpu-compute/schedule_stage.cc @@ -579,7 +579,7 @@ ScheduleStage::fillDispatchList() // operation. GPUDynInstPtr mp = schIter->first; if (!mp->isMemSync() && !mp->isScalar() && - (mp->isGlobalMem() || mp->isFlat())) { + mp->needsToken()) { computeUnit.globalMemoryPipe.acqCoalescerToken(mp); } diff --git a/src/gpu-compute/wavefront.cc b/src/gpu-compute/wavefront.cc index 8a1adfe802..0bca152e08 100644 --- a/src/gpu-compute/wavefront.cc +++ b/src/gpu-compute/wavefront.cc @@ -1082,7 +1082,7 @@ Wavefront::exec() * we return here to avoid spurious errors related to flat insts * and their address segment resolution. */ - if (execMask().none() && ii->isFlat()) { + if (execMask().none() && ii->needsToken()) { computeUnit->getTokenManager()->recvTokens(1); return; }