gpu-compute: Add support for injecting scalar memory barrier

This commit adds support for injecting a scalar memory barrier in the
GPU. The barrier will primarily be used to invalidate the entire SQC
cache. The commit also invalidates all buffers and decrements related
counters upon completion of the invalidation request

Change-Id: Ib8e270bbeb8229a4470d606c96876ba5c87335bf
This commit is contained in:
Vishnu Ramadas
2024-01-25 13:37:31 -06:00
parent 23dc98ea72
commit 03838afce0
6 changed files with 143 additions and 0 deletions

View File

@@ -1046,6 +1046,28 @@ ComputeUnit::SQCPort::recvReqRetry()
}
}
const char*
ComputeUnit::SQCPort::MemReqEvent::description() const
{
return "ComputeUnit SQC memory request event";
}
void
ComputeUnit::SQCPort::MemReqEvent::process()
{
SenderState *sender_state = safe_cast<SenderState*>(pkt->senderState);
[[maybe_unused]] ComputeUnit *compute_unit = sqcPort.computeUnit;
if (pkt->req->systemReq()) {
assert(compute_unit->shader->systemHub);
SystemHubEvent *resp_event = new SystemHubEvent(pkt, &sqcPort);
compute_unit->shader->systemHub->sendRequest(pkt, resp_event);
} else if (!(sqcPort.sendTimingReq(pkt))) {
sqcPort.retries.push_back(std::pair<PacketPtr, Wavefront*>
(pkt, sender_state->wavefront));
}
}
void
ComputeUnit::sendRequest(GPUDynInstPtr gpuDynInst, PortID index, PacketPtr pkt)
{

View File

@@ -680,6 +680,41 @@ class ComputeUnit : public ClockedObject
kernId(_kernId){ }
};
class MemReqEvent : public Event
{
private:
SQCPort &sqcPort;
PacketPtr pkt;
public:
MemReqEvent(SQCPort &_sqc_port, PacketPtr _pkt)
: Event(), sqcPort(_sqc_port), pkt(_pkt)
{
setFlags(Event::AutoDelete);
}
void process();
const char *description() const;
};
class SystemHubEvent : public Event
{
SQCPort *sqcPort;
PacketPtr reqPkt;
public:
SystemHubEvent(PacketPtr pkt, SQCPort *_sqcPort)
: sqcPort(_sqcPort), reqPkt(pkt)
{
setFlags(Event::AutoDelete);
}
void
process()
{
}
};
std::deque<std::pair<PacketPtr, Wavefront*>> retries;
protected:

View File

@@ -388,6 +388,29 @@ FetchUnit::FetchBufDesc::flushBuf()
wavefront->wfDynId);
}
void
FetchUnit::FetchBufDesc::invBuf()
{
restartFromBranch = false;
/**
* free list may have some entries
* so we clear it here to avoid duplicates
*/
freeList.clear();
bufferedPCs.clear();
reservedPCs.clear();
readPtr = bufStart;
for (int i = 0; i < fetchDepth; ++i) {
freeList.push_back(bufStart + i * cacheLineSize);
}
DPRINTF(GPUFetch, "WF[%d][%d]: Id%d Fetch dropped, flushing fetch "
"buffer\n", wavefront->simdId, wavefront->wfSlotId,
wavefront->wfDynId);
}
Addr
FetchUnit::FetchBufDesc::nextFetchAddr()
{
@@ -471,6 +494,13 @@ FetchUnit::FetchBufDesc::reserveBuf(Addr vaddr)
void
FetchUnit::FetchBufDesc::fetchDone(Addr vaddr)
{
if (vaddr == 0) {
// S_ICACHE_INV fetch done
wavefront->decLGKMInstsIssued();
invBuf();
return;
}
assert(bufferedPCs.find(vaddr) == bufferedPCs.end());
DPRINTF(GPUFetch, "WF[%d][%d]: Id%d done fetching for addr %#x\n",
wavefront->simdId, wavefront->wfSlotId,

View File

@@ -104,6 +104,7 @@ class FetchUnit
int reservedLines() const { return reservedPCs.size(); }
bool hasFreeSpace() const { return !freeList.empty(); }
void flushBuf();
void invBuf();
Addr nextFetchAddr();
/**

View File

@@ -160,4 +160,55 @@ ScalarMemPipeline::issueRequest(GPUDynInstPtr gpuDynInst)
issuedRequests.push(gpuDynInst);
}
void
ScalarMemPipeline::injectScalarMemFence(GPUDynInstPtr gpuDynInst,
bool kernelMemSync,
RequestPtr req)
{
assert(gpuDynInst->isScalar());
if (!req) {
req = std::make_shared<Request>(
0, 0, 0, computeUnit.requestorId(), 0, gpuDynInst->wfDynId);
} else {
req->requestorId(computeUnit.requestorId());
}
req->setPaddr(0);
PacketPtr pkt = nullptr;
if (kernelMemSync) {
req->setCacheCoherenceFlags(Request::INV_L1);
req->setReqInstSeqNum(gpuDynInst->seqNum());
req->setFlags(Request::KERNEL);
pkt = new Packet(req, MemCmd::MemSyncReq);
pkt->pushSenderState(
new ComputeUnit::SQCPort::SenderState(
gpuDynInst->wavefront(), nullptr));
ComputeUnit::SQCPort::MemReqEvent *sqc_event =
new ComputeUnit::SQCPort::MemReqEvent
(computeUnit.sqcPort, pkt);
computeUnit.schedule(
sqc_event, curTick() + computeUnit.scalar_req_tick_latency);
} else {
gpuDynInst->setRequestFlags(req);
req->setReqInstSeqNum(gpuDynInst->seqNum());
pkt = new Packet(req, MemCmd::MemSyncReq);
pkt->pushSenderState(
new ComputeUnit::SQCPort::SenderState(
gpuDynInst->wavefront(), nullptr));
ComputeUnit::SQCPort::MemReqEvent *sqc_event =
new ComputeUnit::SQCPort::MemReqEvent
(computeUnit.sqcPort, pkt);
computeUnit.schedule(
sqc_event, curTick() + computeUnit.scalar_req_tick_latency);
}
}
} // namespace gem5

View File

@@ -36,6 +36,7 @@
#include <string>
#include "gpu-compute/misc.hh"
#include "mem/request.hh"
#include "params/ComputeUnit.hh"
#include "sim/stats.hh"
@@ -67,6 +68,9 @@ class ScalarMemPipeline
void issueRequest(GPUDynInstPtr gpuDynInst);
void injectScalarMemFence(
GPUDynInstPtr gpuDynInst, bool kernelMemSync, RequestPtr req);
bool
isGMLdRespFIFOWrRdy() const
{