gpu-compute: Add support for injecting scalar memory barrier
This commit adds support for injecting a scalar memory barrier in the GPU. The barrier will primarily be used to invalidate the entire SQC cache. The commit also invalidates all buffers and decrements related counters upon completion of the invalidation request Change-Id: Ib8e270bbeb8229a4470d606c96876ba5c87335bf
This commit is contained in:
@@ -1046,6 +1046,28 @@ ComputeUnit::SQCPort::recvReqRetry()
|
||||
}
|
||||
}
|
||||
|
||||
const char*
|
||||
ComputeUnit::SQCPort::MemReqEvent::description() const
|
||||
{
|
||||
return "ComputeUnit SQC memory request event";
|
||||
}
|
||||
|
||||
void
|
||||
ComputeUnit::SQCPort::MemReqEvent::process()
|
||||
{
|
||||
SenderState *sender_state = safe_cast<SenderState*>(pkt->senderState);
|
||||
[[maybe_unused]] ComputeUnit *compute_unit = sqcPort.computeUnit;
|
||||
|
||||
if (pkt->req->systemReq()) {
|
||||
assert(compute_unit->shader->systemHub);
|
||||
SystemHubEvent *resp_event = new SystemHubEvent(pkt, &sqcPort);
|
||||
compute_unit->shader->systemHub->sendRequest(pkt, resp_event);
|
||||
} else if (!(sqcPort.sendTimingReq(pkt))) {
|
||||
sqcPort.retries.push_back(std::pair<PacketPtr, Wavefront*>
|
||||
(pkt, sender_state->wavefront));
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
ComputeUnit::sendRequest(GPUDynInstPtr gpuDynInst, PortID index, PacketPtr pkt)
|
||||
{
|
||||
|
||||
@@ -680,6 +680,41 @@ class ComputeUnit : public ClockedObject
|
||||
kernId(_kernId){ }
|
||||
};
|
||||
|
||||
class MemReqEvent : public Event
|
||||
{
|
||||
private:
|
||||
SQCPort &sqcPort;
|
||||
PacketPtr pkt;
|
||||
|
||||
public:
|
||||
MemReqEvent(SQCPort &_sqc_port, PacketPtr _pkt)
|
||||
: Event(), sqcPort(_sqc_port), pkt(_pkt)
|
||||
{
|
||||
setFlags(Event::AutoDelete);
|
||||
}
|
||||
|
||||
void process();
|
||||
const char *description() const;
|
||||
};
|
||||
|
||||
class SystemHubEvent : public Event
|
||||
{
|
||||
SQCPort *sqcPort;
|
||||
PacketPtr reqPkt;
|
||||
|
||||
public:
|
||||
SystemHubEvent(PacketPtr pkt, SQCPort *_sqcPort)
|
||||
: sqcPort(_sqcPort), reqPkt(pkt)
|
||||
{
|
||||
setFlags(Event::AutoDelete);
|
||||
}
|
||||
|
||||
void
|
||||
process()
|
||||
{
|
||||
}
|
||||
};
|
||||
|
||||
std::deque<std::pair<PacketPtr, Wavefront*>> retries;
|
||||
|
||||
protected:
|
||||
|
||||
@@ -388,6 +388,29 @@ FetchUnit::FetchBufDesc::flushBuf()
|
||||
wavefront->wfDynId);
|
||||
}
|
||||
|
||||
void
|
||||
FetchUnit::FetchBufDesc::invBuf()
|
||||
{
|
||||
restartFromBranch = false;
|
||||
/**
|
||||
* free list may have some entries
|
||||
* so we clear it here to avoid duplicates
|
||||
*/
|
||||
freeList.clear();
|
||||
bufferedPCs.clear();
|
||||
reservedPCs.clear();
|
||||
readPtr = bufStart;
|
||||
|
||||
for (int i = 0; i < fetchDepth; ++i) {
|
||||
freeList.push_back(bufStart + i * cacheLineSize);
|
||||
}
|
||||
|
||||
DPRINTF(GPUFetch, "WF[%d][%d]: Id%d Fetch dropped, flushing fetch "
|
||||
"buffer\n", wavefront->simdId, wavefront->wfSlotId,
|
||||
wavefront->wfDynId);
|
||||
|
||||
}
|
||||
|
||||
Addr
|
||||
FetchUnit::FetchBufDesc::nextFetchAddr()
|
||||
{
|
||||
@@ -471,6 +494,13 @@ FetchUnit::FetchBufDesc::reserveBuf(Addr vaddr)
|
||||
void
|
||||
FetchUnit::FetchBufDesc::fetchDone(Addr vaddr)
|
||||
{
|
||||
if (vaddr == 0) {
|
||||
// S_ICACHE_INV fetch done
|
||||
wavefront->decLGKMInstsIssued();
|
||||
invBuf();
|
||||
return;
|
||||
}
|
||||
|
||||
assert(bufferedPCs.find(vaddr) == bufferedPCs.end());
|
||||
DPRINTF(GPUFetch, "WF[%d][%d]: Id%d done fetching for addr %#x\n",
|
||||
wavefront->simdId, wavefront->wfSlotId,
|
||||
|
||||
@@ -104,6 +104,7 @@ class FetchUnit
|
||||
int reservedLines() const { return reservedPCs.size(); }
|
||||
bool hasFreeSpace() const { return !freeList.empty(); }
|
||||
void flushBuf();
|
||||
void invBuf();
|
||||
Addr nextFetchAddr();
|
||||
|
||||
/**
|
||||
|
||||
@@ -160,4 +160,55 @@ ScalarMemPipeline::issueRequest(GPUDynInstPtr gpuDynInst)
|
||||
issuedRequests.push(gpuDynInst);
|
||||
}
|
||||
|
||||
void
|
||||
ScalarMemPipeline::injectScalarMemFence(GPUDynInstPtr gpuDynInst,
|
||||
bool kernelMemSync,
|
||||
RequestPtr req)
|
||||
{
|
||||
assert(gpuDynInst->isScalar());
|
||||
|
||||
if (!req) {
|
||||
req = std::make_shared<Request>(
|
||||
0, 0, 0, computeUnit.requestorId(), 0, gpuDynInst->wfDynId);
|
||||
} else {
|
||||
req->requestorId(computeUnit.requestorId());
|
||||
}
|
||||
|
||||
req->setPaddr(0);
|
||||
|
||||
PacketPtr pkt = nullptr;
|
||||
|
||||
if (kernelMemSync) {
|
||||
req->setCacheCoherenceFlags(Request::INV_L1);
|
||||
req->setReqInstSeqNum(gpuDynInst->seqNum());
|
||||
req->setFlags(Request::KERNEL);
|
||||
pkt = new Packet(req, MemCmd::MemSyncReq);
|
||||
pkt->pushSenderState(
|
||||
new ComputeUnit::SQCPort::SenderState(
|
||||
gpuDynInst->wavefront(), nullptr));
|
||||
ComputeUnit::SQCPort::MemReqEvent *sqc_event =
|
||||
new ComputeUnit::SQCPort::MemReqEvent
|
||||
(computeUnit.sqcPort, pkt);
|
||||
|
||||
computeUnit.schedule(
|
||||
sqc_event, curTick() + computeUnit.scalar_req_tick_latency);
|
||||
} else {
|
||||
gpuDynInst->setRequestFlags(req);
|
||||
|
||||
req->setReqInstSeqNum(gpuDynInst->seqNum());
|
||||
|
||||
pkt = new Packet(req, MemCmd::MemSyncReq);
|
||||
pkt->pushSenderState(
|
||||
new ComputeUnit::SQCPort::SenderState(
|
||||
gpuDynInst->wavefront(), nullptr));
|
||||
|
||||
ComputeUnit::SQCPort::MemReqEvent *sqc_event =
|
||||
new ComputeUnit::SQCPort::MemReqEvent
|
||||
(computeUnit.sqcPort, pkt);
|
||||
|
||||
computeUnit.schedule(
|
||||
sqc_event, curTick() + computeUnit.scalar_req_tick_latency);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace gem5
|
||||
|
||||
@@ -36,6 +36,7 @@
|
||||
#include <string>
|
||||
|
||||
#include "gpu-compute/misc.hh"
|
||||
#include "mem/request.hh"
|
||||
#include "params/ComputeUnit.hh"
|
||||
#include "sim/stats.hh"
|
||||
|
||||
@@ -67,6 +68,9 @@ class ScalarMemPipeline
|
||||
|
||||
void issueRequest(GPUDynInstPtr gpuDynInst);
|
||||
|
||||
void injectScalarMemFence(
|
||||
GPUDynInstPtr gpuDynInst, bool kernelMemSync, RequestPtr req);
|
||||
|
||||
bool
|
||||
isGMLdRespFIFOWrRdy() const
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user