gpu-compute: Update tokens for flat global/scratch
Memory instructions acquire coalescer tokens in the schedule stage. Currently this is only done for buffer and flat instructions, but not flat global or flat scratch. This change now acquires tokens for flat global and flat scratch instructions. This provides back-pressure to the CUs and helps to avoid deadlocks in Ruby. The change also handles returning tokens for buffer, flat global, and flat scratch instructions. This was previously only being done for normal flat instructions leading to deadlocks in some applications when the tokens were exhausted. To simplify the logic, added a needsToken() method to GPUDynInst which return if the instruction is buffer or any flat segment. The waitcnts were also incorrect for flat global and flat scratch. We should always decrement vmem and exp count for stores and only normal flat instructions should decrement lgkm. Currently vmem/exp are not decremented for flat global and flat scratch which can lead to deadlock. This change set fixes this by always decrementing vmem/exp and lgkm only for normal flat instructions. Change-Id: I673f4ac6121e4b5a5e8491bc9130c6d825d95fc5
This commit is contained in:
@@ -43894,9 +43894,11 @@ namespace VegaISA
|
||||
{
|
||||
Wavefront *wf = gpuDynInst->wavefront();
|
||||
|
||||
if (gpuDynInst->exec_mask.none() && isFlat()) {
|
||||
if (gpuDynInst->exec_mask.none()) {
|
||||
wf->decVMemInstsIssued();
|
||||
wf->decLGKMInstsIssued();
|
||||
if (isFlat()) {
|
||||
wf->decLGKMInstsIssued();
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -43978,9 +43980,11 @@ namespace VegaISA
|
||||
{
|
||||
Wavefront *wf = gpuDynInst->wavefront();
|
||||
|
||||
if (gpuDynInst->exec_mask.none() && isFlat()) {
|
||||
if (gpuDynInst->exec_mask.none()) {
|
||||
wf->decVMemInstsIssued();
|
||||
wf->decLGKMInstsIssued();
|
||||
if (isFlat()) {
|
||||
wf->decLGKMInstsIssued();
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -44063,9 +44067,11 @@ namespace VegaISA
|
||||
{
|
||||
Wavefront *wf = gpuDynInst->wavefront();
|
||||
|
||||
if (gpuDynInst->exec_mask.none() && isFlat()) {
|
||||
if (gpuDynInst->exec_mask.none()) {
|
||||
wf->decVMemInstsIssued();
|
||||
wf->decLGKMInstsIssued();
|
||||
if (isFlat()) {
|
||||
wf->decLGKMInstsIssued();
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -44118,9 +44124,11 @@ namespace VegaISA
|
||||
{
|
||||
Wavefront *wf = gpuDynInst->wavefront();
|
||||
|
||||
if (gpuDynInst->exec_mask.none() && isFlat()) {
|
||||
if (gpuDynInst->exec_mask.none()) {
|
||||
wf->decVMemInstsIssued();
|
||||
wf->decLGKMInstsIssued();
|
||||
if (isFlat()) {
|
||||
wf->decLGKMInstsIssued();
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -44173,9 +44181,11 @@ namespace VegaISA
|
||||
{
|
||||
Wavefront *wf = gpuDynInst->wavefront();
|
||||
|
||||
if (gpuDynInst->exec_mask.none() && isFlat()) {
|
||||
if (gpuDynInst->exec_mask.none()) {
|
||||
wf->decVMemInstsIssued();
|
||||
wf->decLGKMInstsIssued();
|
||||
if (isFlat()) {
|
||||
wf->decLGKMInstsIssued();
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -44237,9 +44247,11 @@ namespace VegaISA
|
||||
{
|
||||
Wavefront *wf = gpuDynInst->wavefront();
|
||||
|
||||
if (gpuDynInst->exec_mask.none() && isFlat()) {
|
||||
if (gpuDynInst->exec_mask.none()) {
|
||||
wf->decVMemInstsIssued();
|
||||
wf->decLGKMInstsIssued();
|
||||
if (isFlat()) {
|
||||
wf->decLGKMInstsIssued();
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -44304,9 +44316,11 @@ namespace VegaISA
|
||||
{
|
||||
Wavefront *wf = gpuDynInst->wavefront();
|
||||
|
||||
if (gpuDynInst->exec_mask.none() && isFlat()) {
|
||||
if (gpuDynInst->exec_mask.none()) {
|
||||
wf->decVMemInstsIssued();
|
||||
wf->decLGKMInstsIssued();
|
||||
if (isFlat()) {
|
||||
wf->decLGKMInstsIssued();
|
||||
}
|
||||
wf->decExpInstsIssued();
|
||||
return;
|
||||
}
|
||||
@@ -44361,9 +44375,11 @@ namespace VegaISA
|
||||
{
|
||||
Wavefront *wf = gpuDynInst->wavefront();
|
||||
|
||||
if (gpuDynInst->exec_mask.none() && isFlat()) {
|
||||
if (gpuDynInst->exec_mask.none()) {
|
||||
wf->decVMemInstsIssued();
|
||||
wf->decLGKMInstsIssued();
|
||||
if (isFlat()) {
|
||||
wf->decLGKMInstsIssued();
|
||||
}
|
||||
wf->decExpInstsIssued();
|
||||
return;
|
||||
}
|
||||
@@ -44418,9 +44434,11 @@ namespace VegaISA
|
||||
{
|
||||
Wavefront *wf = gpuDynInst->wavefront();
|
||||
|
||||
if (gpuDynInst->exec_mask.none() && isFlat()) {
|
||||
if (gpuDynInst->exec_mask.none()) {
|
||||
wf->decVMemInstsIssued();
|
||||
wf->decLGKMInstsIssued();
|
||||
if (isFlat()) {
|
||||
wf->decLGKMInstsIssued();
|
||||
}
|
||||
wf->decExpInstsIssued();
|
||||
return;
|
||||
}
|
||||
@@ -44476,9 +44494,11 @@ namespace VegaISA
|
||||
{
|
||||
Wavefront *wf = gpuDynInst->wavefront();
|
||||
|
||||
if (gpuDynInst->exec_mask.none() && isFlat()) {
|
||||
if (gpuDynInst->exec_mask.none()) {
|
||||
wf->decVMemInstsIssued();
|
||||
wf->decLGKMInstsIssued();
|
||||
if (isFlat()) {
|
||||
wf->decLGKMInstsIssued();
|
||||
}
|
||||
wf->decExpInstsIssued();
|
||||
return;
|
||||
}
|
||||
@@ -44534,9 +44554,11 @@ namespace VegaISA
|
||||
{
|
||||
Wavefront *wf = gpuDynInst->wavefront();
|
||||
|
||||
if (gpuDynInst->exec_mask.none() && isFlat()) {
|
||||
if (gpuDynInst->exec_mask.none()) {
|
||||
wf->decVMemInstsIssued();
|
||||
wf->decLGKMInstsIssued();
|
||||
if (isFlat()) {
|
||||
wf->decLGKMInstsIssued();
|
||||
}
|
||||
wf->decExpInstsIssued();
|
||||
return;
|
||||
}
|
||||
@@ -44600,9 +44622,11 @@ namespace VegaISA
|
||||
{
|
||||
Wavefront *wf = gpuDynInst->wavefront();
|
||||
|
||||
if (gpuDynInst->exec_mask.none() && isFlat()) {
|
||||
if (gpuDynInst->exec_mask.none()) {
|
||||
wf->decVMemInstsIssued();
|
||||
wf->decLGKMInstsIssued();
|
||||
if (isFlat()) {
|
||||
wf->decLGKMInstsIssued();
|
||||
}
|
||||
wf->decExpInstsIssued();
|
||||
return;
|
||||
}
|
||||
@@ -44677,9 +44701,11 @@ namespace VegaISA
|
||||
{
|
||||
Wavefront *wf = gpuDynInst->wavefront();
|
||||
|
||||
if (gpuDynInst->exec_mask.none() && isFlat()) {
|
||||
if (gpuDynInst->exec_mask.none()) {
|
||||
wf->decVMemInstsIssued();
|
||||
wf->decLGKMInstsIssued();
|
||||
if (isFlat()) {
|
||||
wf->decLGKMInstsIssued();
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -44757,9 +44783,11 @@ namespace VegaISA
|
||||
{
|
||||
Wavefront *wf = gpuDynInst->wavefront();
|
||||
|
||||
if (gpuDynInst->exec_mask.none() && isFlat()) {
|
||||
if (gpuDynInst->exec_mask.none()) {
|
||||
wf->decVMemInstsIssued();
|
||||
wf->decLGKMInstsIssued();
|
||||
if (isFlat()) {
|
||||
wf->decLGKMInstsIssued();
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -44837,9 +44865,11 @@ namespace VegaISA
|
||||
{
|
||||
Wavefront *wf = gpuDynInst->wavefront();
|
||||
|
||||
if (gpuDynInst->exec_mask.none() && isFlat()) {
|
||||
if (gpuDynInst->exec_mask.none()) {
|
||||
wf->decVMemInstsIssued();
|
||||
wf->decLGKMInstsIssued();
|
||||
if (isFlat()) {
|
||||
wf->decLGKMInstsIssued();
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -45370,9 +45400,11 @@ namespace VegaISA
|
||||
{
|
||||
Wavefront *wf = gpuDynInst->wavefront();
|
||||
|
||||
if (gpuDynInst->exec_mask.none() && isFlat()) {
|
||||
if (gpuDynInst->exec_mask.none()) {
|
||||
wf->decVMemInstsIssued();
|
||||
wf->decLGKMInstsIssued();
|
||||
if (isFlat()) {
|
||||
wf->decLGKMInstsIssued();
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -45451,9 +45483,11 @@ namespace VegaISA
|
||||
{
|
||||
Wavefront *wf = gpuDynInst->wavefront();
|
||||
|
||||
if (gpuDynInst->exec_mask.none() && isFlat()) {
|
||||
if (gpuDynInst->exec_mask.none()) {
|
||||
wf->decVMemInstsIssued();
|
||||
wf->decLGKMInstsIssued();
|
||||
if (isFlat()) {
|
||||
wf->decLGKMInstsIssued();
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
@@ -582,6 +582,12 @@ GPUDynInst::readsFlatScratch() const
|
||||
return false;
|
||||
}
|
||||
|
||||
bool
|
||||
GPUDynInst::needsToken() const
|
||||
{
|
||||
return isGlobalMem() || isFlat() || isFlatGlobal() || isFlatScratch();
|
||||
}
|
||||
|
||||
bool
|
||||
GPUDynInst::isAtomicAnd() const
|
||||
{
|
||||
|
||||
@@ -257,6 +257,7 @@ class GPUDynInst : public GPUExecContext
|
||||
bool writesFlatScratch() const;
|
||||
bool readsExecMask() const;
|
||||
bool writesExecMask() const;
|
||||
bool needsToken() const;
|
||||
|
||||
bool isAtomicAnd() const;
|
||||
bool isAtomicOr() const;
|
||||
|
||||
@@ -579,7 +579,7 @@ ScheduleStage::fillDispatchList()
|
||||
// operation.
|
||||
GPUDynInstPtr mp = schIter->first;
|
||||
if (!mp->isMemSync() && !mp->isScalar() &&
|
||||
(mp->isGlobalMem() || mp->isFlat())) {
|
||||
mp->needsToken()) {
|
||||
computeUnit.globalMemoryPipe.acqCoalescerToken(mp);
|
||||
}
|
||||
|
||||
|
||||
@@ -1082,7 +1082,7 @@ Wavefront::exec()
|
||||
* we return here to avoid spurious errors related to flat insts
|
||||
* and their address segment resolution.
|
||||
*/
|
||||
if (execMask().none() && ii->isFlat()) {
|
||||
if (execMask().none() && ii->needsToken()) {
|
||||
computeUnit->getTokenManager()->recvTokens(1);
|
||||
return;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user