gpu-compute: enable kernel-end WB functionality

Change-Id: Ib17e1d700586d1aa04d408e7b924270f0de82efe
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/29938
Maintainer: Anthony Gutierrez <anthony.gutierrez@amd.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Xianwei Zhang <xianwei.zhang@amd.com>
This commit is contained in:
Xianwei Zhang
2018-06-28 02:34:41 -04:00
committed by Anthony Gutierrez
parent 07fcbf16fc
commit 024f978cff
3 changed files with 27 additions and 18 deletions

View File

@@ -1218,23 +1218,25 @@ ComputeUnit::injectGlobalMemFence(GPUDynInstPtr gpuDynInst,
schedule(mem_req_event, curTick() + req_tick_latency); schedule(mem_req_event, curTick() + req_tick_latency);
} else { } else {
assert(gpuDynInst->isEndOfKernel()); // kernel end release must be enabled
assert(shader->impl_kern_end_rel);
assert(gpuDynInst->isEndOfKernel());
req->setCacheCoherenceFlags(Request::RELEASE); req->setCacheCoherenceFlags(Request::WB_L2);
req->setReqInstSeqNum(gpuDynInst->seqNum()); req->setReqInstSeqNum(gpuDynInst->seqNum());
req->setFlags(Request::KERNEL); req->setFlags(Request::KERNEL);
pkt = new Packet(req, MemCmd::MemSyncReq); pkt = new Packet(req, MemCmd::MemSyncReq);
pkt->pushSenderState( pkt->pushSenderState(
new ComputeUnit::DataPort::SenderState(gpuDynInst, 0, nullptr)); new ComputeUnit::DataPort::SenderState(gpuDynInst, 0, nullptr));
EventFunctionWrapper *mem_req_event = EventFunctionWrapper *mem_req_event =
memPort[0]->createMemReqEvent(pkt); memPort[0]->createMemReqEvent(pkt);
DPRINTF(GPUPort, "CU%d: WF[%d][%d]: index %d, addr %#x scheduling " DPRINTF(GPUPort, "CU%d: WF[%d][%d]: index %d, addr %#x scheduling "
"a release\n", cu_id, gpuDynInst->simdId, "a release\n", cu_id, gpuDynInst->simdId,
gpuDynInst->wfSlotId, 0, pkt->req->getPaddr()); gpuDynInst->wfSlotId, 0, pkt->req->getPaddr());
schedule(mem_req_event, curTick() + req_tick_latency); schedule(mem_req_event, curTick() + req_tick_latency);
} }
} else { } else {
gpuDynInst->setRequestFlags(req); gpuDynInst->setRequestFlags(req);

View File

@@ -223,11 +223,11 @@ Shader::prepareFlush(GPUDynInstPtr gpuDynInst){
// flush has never been started, performed only once at kernel end // flush has never been started, performed only once at kernel end
assert(_dispatcher.getOutstandingWbs(kernId) == 0); assert(_dispatcher.getOutstandingWbs(kernId) == 0);
// iterate all cus, managed by the shader, to perform flush. // the first cu, managed by the shader, performs flush operation,
for (int i_cu = 0; i_cu < n_cu; ++i_cu) { // assuming that L2 cache is shared by all cus in the shader
_dispatcher.updateWbCounter(kernId, +1); int i_cu = 0;
cuList[i_cu]->doFlush(gpuDynInst); _dispatcher.updateWbCounter(kernId, +1);
} cuList[i_cu]->doFlush(gpuDynInst);
} }
bool bool

View File

@@ -225,6 +225,9 @@ class Request
* See the AMD GCN3 ISA Architecture Manual for more * See the AMD GCN3 ISA Architecture Manual for more
* details. * details.
* *
* INV_L1: L1 cache invalidation
* WB_L2: L2 cache writeback
*
* SLC: System Level Coherent. Accesses are forced to miss in * SLC: System Level Coherent. Accesses are forced to miss in
* the L2 cache and are coherent with system memory. * the L2 cache and are coherent with system memory.
* *
@@ -237,6 +240,10 @@ class Request
* between atomic return/no-return operations. * between atomic return/no-return operations.
*/ */
enum : CacheCoherenceFlagsType { enum : CacheCoherenceFlagsType {
/** mem_sync_op flags */
INV_L1 = 0x00000001,
WB_L2 = 0x00000020,
/** user-policy flags */
/** user-policy flags */ /** user-policy flags */
SLC_BIT = 0x00000080, SLC_BIT = 0x00000080,
GLC_BIT = 0x00000100, GLC_BIT = 0x00000100,