gpu-compute, arch-gcn3: Change how waitcnts are implemented
Use single counters per memory operation type and increment them upon issue, not execute. Change-Id: I6afc0b66b21882538ef90a14a57a3ab3cc7bd6f3 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/29973 Reviewed-by: Anthony Gutierrez <anthony.gutierrez@amd.com> Maintainer: Anthony Gutierrez <anthony.gutierrez@amd.com> Tested-by: kokoro <noreply+kokoro@google.com>
This commit is contained in:
committed by
Anthony Gutierrez
parent
63c76448eb
commit
4d737462c2
@@ -32565,6 +32565,7 @@ namespace Gcn3ISA
|
||||
|
||||
vdst.write();
|
||||
|
||||
wf->decLGKMInstsIssued();
|
||||
wf->rdLmReqsInPipe--;
|
||||
wf->validateRequestCounters();
|
||||
} // execute
|
||||
@@ -32635,6 +32636,7 @@ namespace Gcn3ISA
|
||||
|
||||
vdst.write();
|
||||
|
||||
wf->decLGKMInstsIssued();
|
||||
wf->rdLmReqsInPipe--;
|
||||
wf->validateRequestCounters();
|
||||
} // execute
|
||||
@@ -39400,6 +39402,8 @@ namespace Gcn3ISA
|
||||
Wavefront *wf = gpuDynInst->wavefront();
|
||||
|
||||
if (wf->execMask().none()) {
|
||||
wf->decVMemInstsIssued();
|
||||
wf->decLGKMInstsIssued();
|
||||
wf->rdGmReqsInPipe--;
|
||||
wf->rdLmReqsInPipe--;
|
||||
return;
|
||||
@@ -39496,6 +39500,8 @@ namespace Gcn3ISA
|
||||
Wavefront *wf = gpuDynInst->wavefront();
|
||||
|
||||
if (wf->execMask().none()) {
|
||||
wf->decVMemInstsIssued();
|
||||
wf->decLGKMInstsIssued();
|
||||
wf->rdGmReqsInPipe--;
|
||||
wf->rdLmReqsInPipe--;
|
||||
return;
|
||||
@@ -39592,6 +39598,8 @@ namespace Gcn3ISA
|
||||
Wavefront *wf = gpuDynInst->wavefront();
|
||||
|
||||
if (wf->execMask().none()) {
|
||||
wf->decVMemInstsIssued();
|
||||
wf->decLGKMInstsIssued();
|
||||
wf->rdGmReqsInPipe--;
|
||||
wf->rdLmReqsInPipe--;
|
||||
return;
|
||||
@@ -39660,6 +39668,8 @@ namespace Gcn3ISA
|
||||
Wavefront *wf = gpuDynInst->wavefront();
|
||||
|
||||
if (wf->execMask().none()) {
|
||||
wf->decVMemInstsIssued();
|
||||
wf->decLGKMInstsIssued();
|
||||
wf->rdGmReqsInPipe--;
|
||||
wf->rdLmReqsInPipe--;
|
||||
return;
|
||||
@@ -39728,6 +39738,8 @@ namespace Gcn3ISA
|
||||
Wavefront *wf = gpuDynInst->wavefront();
|
||||
|
||||
if (wf->execMask().none()) {
|
||||
wf->decVMemInstsIssued();
|
||||
wf->decLGKMInstsIssued();
|
||||
wf->rdGmReqsInPipe--;
|
||||
wf->rdLmReqsInPipe--;
|
||||
return;
|
||||
@@ -39805,6 +39817,8 @@ namespace Gcn3ISA
|
||||
Wavefront *wf = gpuDynInst->wavefront();
|
||||
|
||||
if (wf->execMask().none()) {
|
||||
wf->decVMemInstsIssued();
|
||||
wf->decLGKMInstsIssued();
|
||||
wf->rdGmReqsInPipe--;
|
||||
wf->rdLmReqsInPipe--;
|
||||
}
|
||||
@@ -39884,6 +39898,8 @@ namespace Gcn3ISA
|
||||
Wavefront *wf = gpuDynInst->wavefront();
|
||||
|
||||
if (wf->execMask().none()) {
|
||||
wf->decVMemInstsIssued();
|
||||
wf->decLGKMInstsIssued();
|
||||
wf->wrGmReqsInPipe--;
|
||||
wf->wrLmReqsInPipe--;
|
||||
return;
|
||||
@@ -39952,6 +39968,8 @@ namespace Gcn3ISA
|
||||
Wavefront *wf = gpuDynInst->wavefront();
|
||||
|
||||
if (wf->execMask().none()) {
|
||||
wf->decVMemInstsIssued();
|
||||
wf->decLGKMInstsIssued();
|
||||
wf->wrGmReqsInPipe--;
|
||||
wf->wrLmReqsInPipe--;
|
||||
return;
|
||||
@@ -40021,6 +40039,8 @@ namespace Gcn3ISA
|
||||
Wavefront *wf = gpuDynInst->wavefront();
|
||||
|
||||
if (wf->execMask().none()) {
|
||||
wf->decVMemInstsIssued();
|
||||
wf->decLGKMInstsIssued();
|
||||
wf->wrGmReqsInPipe--;
|
||||
wf->wrLmReqsInPipe--;
|
||||
return;
|
||||
@@ -40090,6 +40110,8 @@ namespace Gcn3ISA
|
||||
Wavefront *wf = gpuDynInst->wavefront();
|
||||
|
||||
if (wf->execMask().none()) {
|
||||
wf->decVMemInstsIssued();
|
||||
wf->decLGKMInstsIssued();
|
||||
wf->wrGmReqsInPipe--;
|
||||
wf->wrLmReqsInPipe--;
|
||||
return;
|
||||
@@ -40159,6 +40181,8 @@ namespace Gcn3ISA
|
||||
Wavefront *wf = gpuDynInst->wavefront();
|
||||
|
||||
if (wf->execMask().none()) {
|
||||
wf->decVMemInstsIssued();
|
||||
wf->decLGKMInstsIssued();
|
||||
wf->wrGmReqsInPipe--;
|
||||
wf->wrLmReqsInPipe--;
|
||||
return;
|
||||
@@ -40237,6 +40261,8 @@ namespace Gcn3ISA
|
||||
Wavefront *wf = gpuDynInst->wavefront();
|
||||
|
||||
if (wf->execMask().none()) {
|
||||
wf->decVMemInstsIssued();
|
||||
wf->decLGKMInstsIssued();
|
||||
wf->wrGmReqsInPipe--;
|
||||
wf->wrLmReqsInPipe--;
|
||||
return;
|
||||
@@ -40325,6 +40351,8 @@ namespace Gcn3ISA
|
||||
Wavefront *wf = gpuDynInst->wavefront();
|
||||
|
||||
if (wf->execMask().none()) {
|
||||
wf->decVMemInstsIssued();
|
||||
wf->decLGKMInstsIssued();
|
||||
wf->wrGmReqsInPipe--;
|
||||
wf->rdGmReqsInPipe--;
|
||||
return;
|
||||
@@ -40425,6 +40453,8 @@ namespace Gcn3ISA
|
||||
Wavefront *wf = gpuDynInst->wavefront();
|
||||
|
||||
if (wf->execMask().none()) {
|
||||
wf->decVMemInstsIssued();
|
||||
wf->decLGKMInstsIssued();
|
||||
wf->wrGmReqsInPipe--;
|
||||
wf->rdGmReqsInPipe--;
|
||||
return;
|
||||
@@ -40526,6 +40556,8 @@ namespace Gcn3ISA
|
||||
Wavefront *wf = gpuDynInst->wavefront();
|
||||
|
||||
if (wf->execMask().none()) {
|
||||
wf->decVMemInstsIssued();
|
||||
wf->decLGKMInstsIssued();
|
||||
wf->wrGmReqsInPipe--;
|
||||
wf->rdGmReqsInPipe--;
|
||||
return;
|
||||
@@ -40893,6 +40925,8 @@ namespace Gcn3ISA
|
||||
Wavefront *wf = gpuDynInst->wavefront();
|
||||
|
||||
if (wf->execMask().none()) {
|
||||
wf->decVMemInstsIssued();
|
||||
wf->decLGKMInstsIssued();
|
||||
wf->wrGmReqsInPipe--;
|
||||
wf->rdGmReqsInPipe--;
|
||||
return;
|
||||
@@ -40995,6 +41029,8 @@ namespace Gcn3ISA
|
||||
Wavefront *wf = gpuDynInst->wavefront();
|
||||
|
||||
if (wf->execMask().none()) {
|
||||
wf->decVMemInstsIssued();
|
||||
wf->decLGKMInstsIssued();
|
||||
wf->wrGmReqsInPipe--;
|
||||
wf->rdGmReqsInPipe--;
|
||||
return;
|
||||
|
||||
@@ -130,6 +130,7 @@ GlobalMemPipeline::exec()
|
||||
DPRINTF(GPUMem, "CU%d: WF[%d][%d]: Completing global mem instr %s\n",
|
||||
m->cu_id, m->simdId, m->wfSlotId, m->disassemble());
|
||||
m->completeAcc(m);
|
||||
w->decVMemInstsIssued();
|
||||
|
||||
if (m->isLoad() || m->isAtomicRet()) {
|
||||
w->computeUnit->vrf[w->simdId]->
|
||||
|
||||
@@ -819,6 +819,7 @@ GPUDynInst::resolveFlatSegment(const VectorMask &mask)
|
||||
if (executedAs() == Enums::SC_GLOBAL) {
|
||||
// no transormation for global segment
|
||||
wavefront()->execUnitId = wavefront()->flatGmUnitId;
|
||||
wavefront()->decLGKMInstsIssued();
|
||||
if (isLoad()) {
|
||||
wavefront()->rdLmReqsInPipe--;
|
||||
} else if (isStore()) {
|
||||
@@ -838,6 +839,7 @@ GPUDynInst::resolveFlatSegment(const VectorMask &mask)
|
||||
}
|
||||
}
|
||||
wavefront()->execUnitId = wavefront()->flatLmUnitId;
|
||||
wavefront()->decVMemInstsIssued();
|
||||
if (isLoad()) {
|
||||
wavefront()->rdGmReqsInPipe--;
|
||||
} else if (isStore()) {
|
||||
@@ -897,6 +899,7 @@ GPUDynInst::resolveFlatSegment(const VectorMask &mask)
|
||||
}
|
||||
}
|
||||
wavefront()->execUnitId = wavefront()->flatLmUnitId;
|
||||
wavefront()->decLGKMInstsIssued();
|
||||
if (isLoad()) {
|
||||
wavefront()->rdGmReqsInPipe--;
|
||||
} else if (isStore()) {
|
||||
|
||||
@@ -76,6 +76,7 @@ LocalMemPipeline::exec()
|
||||
DPRINTF(GPUMem, "CU%d: WF[%d][%d]: Completing local mem instr %s\n",
|
||||
m->cu_id, m->simdId, m->wfSlotId, m->disassemble());
|
||||
m->completeAcc(m);
|
||||
w->decLGKMInstsIssued();
|
||||
|
||||
if (m->isLoad() || m->isAtomicRet()) {
|
||||
w->computeUnit->vrf[w->simdId]->
|
||||
|
||||
@@ -85,6 +85,7 @@ ScalarMemPipeline::exec()
|
||||
}
|
||||
|
||||
m->completeAcc(m);
|
||||
w->decLGKMInstsIssued();
|
||||
|
||||
if (m->isLoad() || m->isAtomic()) {
|
||||
returnedLoads.pop();
|
||||
|
||||
@@ -135,6 +135,15 @@ ScheduleStage::exec()
|
||||
// this wave spends in SCH stage.
|
||||
wf->schCycles++;
|
||||
addToSchListStalls[j]++;
|
||||
} else {
|
||||
if (gpu_dyn_inst->isScalar() || gpu_dyn_inst->isGroupSeg()) {
|
||||
wf->incLGKMInstsIssued();
|
||||
} else {
|
||||
wf->incVMemInstsIssued();
|
||||
if (gpu_dyn_inst->isFlat()) {
|
||||
wf->incLGKMInstsIssued();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -53,6 +53,7 @@ Wavefront::Wavefront(const Params *p)
|
||||
: SimObject(p), wfSlotId(p->wf_slot_id), simdId(p->simdId),
|
||||
maxIbSize(p->max_ib_size), _gpuISA(*this),
|
||||
vmWaitCnt(-1), expWaitCnt(-1), lgkmWaitCnt(-1),
|
||||
vmemInstsIssued(0), expInstsIssued(0), lgkmInstsIssued(0),
|
||||
barId(WFBarrier::InvalidID)
|
||||
{
|
||||
lastTrace = 0;
|
||||
@@ -1253,37 +1254,27 @@ Wavefront::waitCntsSatisfied()
|
||||
return false;
|
||||
}
|
||||
|
||||
// If we reach here, that means waitCnt instruction is executed and
|
||||
// the waitcnts are set by the execute method. Check if waitcnts are
|
||||
// satisfied.
|
||||
|
||||
// current number of vector memory ops in flight
|
||||
int vm_cnt = outstandingReqsWrGm + outstandingReqsRdGm;
|
||||
|
||||
// current number of export insts or vector memory writes in flight
|
||||
int exp_cnt = outstandingReqsWrGm;
|
||||
|
||||
// current number of scalar/LDS memory ops in flight
|
||||
// we do not consider GDS/message ops
|
||||
int lgkm_cnt = outstandingReqsWrLm + outstandingReqsRdLm +
|
||||
scalarOutstandingReqsRdGm + scalarOutstandingReqsWrGm;
|
||||
|
||||
/**
|
||||
* If we reach here, that means an s_waitcnt instruction was executed
|
||||
* and the waitcnts are set by the execute method. Check if waitcnts
|
||||
* are satisfied.
|
||||
*/
|
||||
if (vmWaitCnt != -1) {
|
||||
if (vm_cnt > vmWaitCnt) {
|
||||
if (vmemInstsIssued > vmWaitCnt) {
|
||||
// vmWaitCnt not satisfied
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (expWaitCnt != -1) {
|
||||
if (exp_cnt > expWaitCnt) {
|
||||
if (expInstsIssued > expWaitCnt) {
|
||||
// expWaitCnt not satisfied
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (lgkmWaitCnt != -1) {
|
||||
if (lgkm_cnt > lgkmWaitCnt) {
|
||||
if (lgkmInstsIssued > lgkmWaitCnt) {
|
||||
// lgkmWaitCnt not satisfied
|
||||
return false;
|
||||
}
|
||||
@@ -1355,6 +1346,42 @@ Wavefront::clearWaitCnts()
|
||||
status = S_RUNNING;
|
||||
}
|
||||
|
||||
void
|
||||
Wavefront::incVMemInstsIssued()
|
||||
{
|
||||
++vmemInstsIssued;
|
||||
}
|
||||
|
||||
void
|
||||
Wavefront::incExpInstsIssued()
|
||||
{
|
||||
++expInstsIssued;
|
||||
}
|
||||
|
||||
void
|
||||
Wavefront::incLGKMInstsIssued()
|
||||
{
|
||||
++lgkmInstsIssued;
|
||||
}
|
||||
|
||||
void
|
||||
Wavefront::decVMemInstsIssued()
|
||||
{
|
||||
--vmemInstsIssued;
|
||||
}
|
||||
|
||||
void
|
||||
Wavefront::decExpInstsIssued()
|
||||
{
|
||||
--expInstsIssued;
|
||||
}
|
||||
|
||||
void
|
||||
Wavefront::decLGKMInstsIssued()
|
||||
{
|
||||
--lgkmInstsIssued;
|
||||
}
|
||||
|
||||
Addr
|
||||
Wavefront::pc() const
|
||||
{
|
||||
|
||||
@@ -304,6 +304,13 @@ class Wavefront : public SimObject
|
||||
void setWaitCnts(int vm_wait_cnt, int exp_wait_cnt, int lgkm_wait_cnt);
|
||||
void clearWaitCnts();
|
||||
|
||||
void incVMemInstsIssued();
|
||||
void incExpInstsIssued();
|
||||
void incLGKMInstsIssued();
|
||||
void decVMemInstsIssued();
|
||||
void decExpInstsIssued();
|
||||
void decLGKMInstsIssued();
|
||||
|
||||
/** Freeing VRF space */
|
||||
void freeRegisterFile();
|
||||
|
||||
@@ -343,6 +350,9 @@ class Wavefront : public SimObject
|
||||
int vmWaitCnt;
|
||||
int expWaitCnt;
|
||||
int lgkmWaitCnt;
|
||||
int vmemInstsIssued;
|
||||
int expInstsIssued;
|
||||
int lgkmInstsIssued;
|
||||
status_e status;
|
||||
Addr _pc;
|
||||
VectorMask _execMask;
|
||||
|
||||
Reference in New Issue
Block a user