gpu: fix bugs with MemFence, Flat Instrs and Resource utilization
Both Memory Fence is now flagged as Global Memory only to avoid resource oversubscribing. Flat instructions now check for Shared Memory resource busy to avoid oversubscribing resources. All WaitClass resources now use cycles (not ticks) to register the number of pipe stages between Scoreboard and Execute to be consistent with instruction scheduling logic which always used clock cycles.
This commit is contained in:
@@ -84,6 +84,7 @@
|
||||
||(a)==Enums::OT_PRIVATE_ATOMIC \
|
||||
||(a)==Enums::OT_SPILL_ATOMIC \
|
||||
||(a)==Enums::OT_READONLY_ATOMIC \
|
||||
||(a)==Enums::OT_BOTH_MEMFENCE \
|
||||
||(a)==Enums::OT_FLAT_ATOMIC)
|
||||
|
||||
#define IS_OT_ATOMIC_GM(a) ((a)==Enums::OT_GLOBAL_ATOMIC \
|
||||
@@ -93,8 +94,7 @@
|
||||
||(a)==Enums::OT_BOTH_MEMFENCE)
|
||||
|
||||
#define IS_OT_ATOMIC_LM(a) ((a)==Enums::OT_SHARED_ATOMIC \
|
||||
||(a)==Enums::OT_SHARED_MEMFENCE \
|
||||
||(a)==Enums::OT_BOTH_MEMFENCE)
|
||||
||(a)==Enums::OT_SHARED_MEMFENCE)
|
||||
|
||||
#define IS_OT_ATOMIC_PM(a) ((a)==Enums::OT_PRIVATE_ATOMIC)
|
||||
|
||||
|
||||
@@ -587,8 +587,8 @@ void
|
||||
ComputeUnit::init()
|
||||
{
|
||||
// Initialize CU Bus models
|
||||
glbMemToVrfBus.init(&shader->tick_cnt, 1);
|
||||
locMemToVrfBus.init(&shader->tick_cnt, 1);
|
||||
glbMemToVrfBus.init(&shader->tick_cnt, shader->ticks(1));
|
||||
locMemToVrfBus.init(&shader->tick_cnt, shader->ticks(1));
|
||||
nextGlbMemBus = 0;
|
||||
nextLocMemBus = 0;
|
||||
fatal_if(numGlbMemUnits > 1,
|
||||
@@ -596,7 +596,7 @@ ComputeUnit::init()
|
||||
vrfToGlobalMemPipeBus.resize(numGlbMemUnits);
|
||||
for (int j = 0; j < numGlbMemUnits; ++j) {
|
||||
vrfToGlobalMemPipeBus[j] = WaitClass();
|
||||
vrfToGlobalMemPipeBus[j].init(&shader->tick_cnt, 1);
|
||||
vrfToGlobalMemPipeBus[j].init(&shader->tick_cnt, shader->ticks(1));
|
||||
}
|
||||
|
||||
fatal_if(numLocMemUnits > 1,
|
||||
@@ -604,7 +604,7 @@ ComputeUnit::init()
|
||||
vrfToLocalMemPipeBus.resize(numLocMemUnits);
|
||||
for (int j = 0; j < numLocMemUnits; ++j) {
|
||||
vrfToLocalMemPipeBus[j] = WaitClass();
|
||||
vrfToLocalMemPipeBus[j].init(&shader->tick_cnt, 1);
|
||||
vrfToLocalMemPipeBus[j].init(&shader->tick_cnt, shader->ticks(1));
|
||||
}
|
||||
vectorRegsReserved.resize(numSIMDs, 0);
|
||||
aluPipe.resize(numSIMDs);
|
||||
@@ -612,12 +612,12 @@ ComputeUnit::init()
|
||||
|
||||
for (int i = 0; i < numSIMDs + numLocMemUnits + numGlbMemUnits; ++i) {
|
||||
wfWait[i] = WaitClass();
|
||||
wfWait[i].init(&shader->tick_cnt, 1);
|
||||
wfWait[i].init(&shader->tick_cnt, shader->ticks(1));
|
||||
}
|
||||
|
||||
for (int i = 0; i < numSIMDs; ++i) {
|
||||
aluPipe[i] = WaitClass();
|
||||
aluPipe[i].init(&shader->tick_cnt, 1);
|
||||
aluPipe[i].init(&shader->tick_cnt, shader->ticks(1));
|
||||
}
|
||||
|
||||
// Setup space for call args
|
||||
|
||||
@@ -162,7 +162,6 @@ Wavefront::isGmInstruction(GPUDynInstPtr ii)
|
||||
|
||||
if (IS_OT_READ_GM(ii->opType()) || IS_OT_WRITE_GM(ii->opType()) ||
|
||||
IS_OT_ATOMIC_GM(ii->opType())) {
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -349,7 +348,7 @@ Wavefront::ready(itype_e type)
|
||||
}
|
||||
bool locMemBusRdy = false;
|
||||
bool locMemIssueRdy = false;
|
||||
if (type == I_SHARED) {
|
||||
if (type == I_SHARED || type == I_FLAT) {
|
||||
for (int j=0; j < computeUnit->numLocMemUnits; ++j) {
|
||||
if (computeUnit->vrfToLocalMemPipeBus[j].prerdy())
|
||||
locMemBusRdy = true;
|
||||
@@ -598,7 +597,6 @@ Wavefront::ready(itype_e type)
|
||||
|
||||
DPRINTF(GPUExec, "CU%d: WF[%d][%d]: Ready Inst : %s\n", computeUnit->cu_id,
|
||||
simdId, wfSlotId, ii->disassemble());
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user