gpu-compute: enable kernel-end WB functionality
Change-Id: Ib17e1d700586d1aa04d408e7b924270f0de82efe Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/29938 Maintainer: Anthony Gutierrez <anthony.gutierrez@amd.com> Tested-by: kokoro <noreply+kokoro@google.com> Reviewed-by: Xianwei Zhang <xianwei.zhang@amd.com>
This commit is contained in:
committed by
Anthony Gutierrez
parent
07fcbf16fc
commit
024f978cff
@@ -1218,23 +1218,25 @@ ComputeUnit::injectGlobalMemFence(GPUDynInstPtr gpuDynInst,
|
||||
|
||||
schedule(mem_req_event, curTick() + req_tick_latency);
|
||||
} else {
|
||||
assert(gpuDynInst->isEndOfKernel());
|
||||
// kernel end release must be enabled
|
||||
assert(shader->impl_kern_end_rel);
|
||||
assert(gpuDynInst->isEndOfKernel());
|
||||
|
||||
req->setCacheCoherenceFlags(Request::RELEASE);
|
||||
req->setReqInstSeqNum(gpuDynInst->seqNum());
|
||||
req->setFlags(Request::KERNEL);
|
||||
pkt = new Packet(req, MemCmd::MemSyncReq);
|
||||
pkt->pushSenderState(
|
||||
new ComputeUnit::DataPort::SenderState(gpuDynInst, 0, nullptr));
|
||||
req->setCacheCoherenceFlags(Request::WB_L2);
|
||||
req->setReqInstSeqNum(gpuDynInst->seqNum());
|
||||
req->setFlags(Request::KERNEL);
|
||||
pkt = new Packet(req, MemCmd::MemSyncReq);
|
||||
pkt->pushSenderState(
|
||||
new ComputeUnit::DataPort::SenderState(gpuDynInst, 0, nullptr));
|
||||
|
||||
EventFunctionWrapper *mem_req_event =
|
||||
memPort[0]->createMemReqEvent(pkt);
|
||||
EventFunctionWrapper *mem_req_event =
|
||||
memPort[0]->createMemReqEvent(pkt);
|
||||
|
||||
DPRINTF(GPUPort, "CU%d: WF[%d][%d]: index %d, addr %#x scheduling "
|
||||
"a release\n", cu_id, gpuDynInst->simdId,
|
||||
gpuDynInst->wfSlotId, 0, pkt->req->getPaddr());
|
||||
DPRINTF(GPUPort, "CU%d: WF[%d][%d]: index %d, addr %#x scheduling "
|
||||
"a release\n", cu_id, gpuDynInst->simdId,
|
||||
gpuDynInst->wfSlotId, 0, pkt->req->getPaddr());
|
||||
|
||||
schedule(mem_req_event, curTick() + req_tick_latency);
|
||||
schedule(mem_req_event, curTick() + req_tick_latency);
|
||||
}
|
||||
} else {
|
||||
gpuDynInst->setRequestFlags(req);
|
||||
|
||||
@@ -223,11 +223,11 @@ Shader::prepareFlush(GPUDynInstPtr gpuDynInst){
|
||||
// flush has never been started, performed only once at kernel end
|
||||
assert(_dispatcher.getOutstandingWbs(kernId) == 0);
|
||||
|
||||
// iterate all cus, managed by the shader, to perform flush.
|
||||
for (int i_cu = 0; i_cu < n_cu; ++i_cu) {
|
||||
_dispatcher.updateWbCounter(kernId, +1);
|
||||
cuList[i_cu]->doFlush(gpuDynInst);
|
||||
}
|
||||
// the first cu, managed by the shader, performs flush operation,
|
||||
// assuming that L2 cache is shared by all cus in the shader
|
||||
int i_cu = 0;
|
||||
_dispatcher.updateWbCounter(kernId, +1);
|
||||
cuList[i_cu]->doFlush(gpuDynInst);
|
||||
}
|
||||
|
||||
bool
|
||||
|
||||
@@ -225,6 +225,9 @@ class Request
|
||||
* See the AMD GCN3 ISA Architecture Manual for more
|
||||
* details.
|
||||
*
|
||||
* INV_L1: L1 cache invalidation
|
||||
* WB_L2: L2 cache writeback
|
||||
*
|
||||
* SLC: System Level Coherent. Accesses are forced to miss in
|
||||
* the L2 cache and are coherent with system memory.
|
||||
*
|
||||
@@ -237,6 +240,10 @@ class Request
|
||||
* between atomic return/no-return operations.
|
||||
*/
|
||||
enum : CacheCoherenceFlagsType {
|
||||
/** mem_sync_op flags */
|
||||
INV_L1 = 0x00000001,
|
||||
WB_L2 = 0x00000020,
|
||||
/** user-policy flags */
|
||||
/** user-policy flags */
|
||||
SLC_BIT = 0x00000080,
|
||||
GLC_BIT = 0x00000100,
|
||||
|
||||
Reference in New Issue
Block a user