gpu-compute: Add Icache invalidation at kernel start

Previously, the data caches were invalidated at the start of each
kernel. This commit adds support for invalidating instruction cache at
kernel launch time

Change-Id: I32e50f63fa1442c2514d4dd8f9d7689759f503d3
This commit is contained in:
Vishnu Ramadas
2024-01-30 14:45:12 -06:00
parent 03838afce0
commit 440409d807
3 changed files with 36 additions and 3 deletions

View File

@@ -397,9 +397,9 @@ ComputeUnit::startWavefront(Wavefront *w, int waveId, LdsChunk *ldsChunk,
}
/**
* trigger invalidate operation in the cu
* trigger invalidate operation in the CU
*
* req: request initialized in shader, carrying the invlidate flags
* req: request initialized in shader, carrying the invalidate flags
*/
void
ComputeUnit::doInvalidate(RequestPtr req, int kernId){
@@ -425,6 +425,26 @@ ComputeUnit::doFlush(GPUDynInstPtr gpuDynInst) {
injectGlobalMemFence(gpuDynInst, true);
}
/**
* trigger SQCinvalidate operation in the CU
*
* req: request initialized in shader, carrying the invalidate flags
*/
void
ComputeUnit::doSQCInvalidate(RequestPtr req, int kernId){
GPUDynInstPtr gpuDynInst
= std::make_shared<GPUDynInst>(this, nullptr,
new KernelLaunchStaticInst(), getAndIncSeqNum());
// kern_id will be used in inv responses
gpuDynInst->kern_id = kernId;
// update contextId field
req->setContext(gpuDynInst->wfDynId);
gpuDynInst->staticInstruction()->setFlag(GPUStaticInst::Scalar);
scalarMemoryPipe.injectScalarMemFence(gpuDynInst, true, req);
}
// reseting SIMD register pools
// I couldn't think of any other place and
// I think it is needed in my implementation
@@ -1012,7 +1032,14 @@ ComputeUnit::DataPort::recvReqRetry()
bool
ComputeUnit::SQCPort::recvTimingResp(PacketPtr pkt)
{
computeUnit->handleSQCReturn(pkt);
SenderState *sender_state = safe_cast<SenderState*>(pkt->senderState);
/** Process the response only if there is a wavefront associated with it.
* Otherwise, it is from SQC invalidate that was issued at kernel start
* and doesn't have a wavefront or instruction associated with it.
*/
if (sender_state->wavefront != nullptr) {
computeUnit->handleSQCReturn(pkt);
}
return true;
}

View File

@@ -412,6 +412,7 @@ class ComputeUnit : public ClockedObject
void doInvalidate(RequestPtr req, int kernId);
void doFlush(GPUDynInstPtr gpuDynInst);
void doSQCInvalidate(RequestPtr req, int kernId);
void dispWorkgroup(HSAQueueEntry *task, int num_wfs_in_wg);
bool hasDispResources(HSAQueueEntry *task, int &num_wfs_in_wg);

View File

@@ -221,6 +221,11 @@ Shader::prepareInvalidate(HSAQueueEntry *task) {
// all necessary INV flags are all set now, call cu to execute
cuList[i_cu]->doInvalidate(req, task->dispatchId());
if ((i_cu % 4) == 0) {
cuList[i_cu]->doSQCInvalidate(req, task->dispatchId());
}
// I don't like this. This is intrusive coding.
cuList[i_cu]->resetRegisterPool();
}