diff --git a/src/gpu-compute/compute_unit.cc b/src/gpu-compute/compute_unit.cc index 8259f0a950..f28a8e39c7 100644 --- a/src/gpu-compute/compute_unit.cc +++ b/src/gpu-compute/compute_unit.cc @@ -1046,6 +1046,28 @@ ComputeUnit::SQCPort::recvReqRetry() } } +const char* +ComputeUnit::SQCPort::MemReqEvent::description() const +{ + return "ComputeUnit SQC memory request event"; +} + +void +ComputeUnit::SQCPort::MemReqEvent::process() +{ + SenderState *sender_state = safe_cast(pkt->senderState); + [[maybe_unused]] ComputeUnit *compute_unit = sqcPort.computeUnit; + + if (pkt->req->systemReq()) { + assert(compute_unit->shader->systemHub); + SystemHubEvent *resp_event = new SystemHubEvent(pkt, &sqcPort); + compute_unit->shader->systemHub->sendRequest(pkt, resp_event); + } else if (!(sqcPort.sendTimingReq(pkt))) { + sqcPort.retries.push_back(std::pair + (pkt, sender_state->wavefront)); + } +} + void ComputeUnit::sendRequest(GPUDynInstPtr gpuDynInst, PortID index, PacketPtr pkt) { diff --git a/src/gpu-compute/compute_unit.hh b/src/gpu-compute/compute_unit.hh index e6bc03da7d..24324bb515 100644 --- a/src/gpu-compute/compute_unit.hh +++ b/src/gpu-compute/compute_unit.hh @@ -680,6 +680,41 @@ class ComputeUnit : public ClockedObject kernId(_kernId){ } }; + class MemReqEvent : public Event + { + private: + SQCPort &sqcPort; + PacketPtr pkt; + + public: + MemReqEvent(SQCPort &_sqc_port, PacketPtr _pkt) + : Event(), sqcPort(_sqc_port), pkt(_pkt) + { + setFlags(Event::AutoDelete); + } + + void process(); + const char *description() const; + }; + + class SystemHubEvent : public Event + { + SQCPort *sqcPort; + PacketPtr reqPkt; + + public: + SystemHubEvent(PacketPtr pkt, SQCPort *_sqcPort) + : sqcPort(_sqcPort), reqPkt(pkt) + { + setFlags(Event::AutoDelete); + } + + void + process() + { + } + }; + std::deque> retries; protected: diff --git a/src/gpu-compute/fetch_unit.cc b/src/gpu-compute/fetch_unit.cc index 4dadbd363d..19144d55e2 100644 --- a/src/gpu-compute/fetch_unit.cc +++ b/src/gpu-compute/fetch_unit.cc @@ -388,6 +388,29 @@ FetchUnit::FetchBufDesc::flushBuf() wavefront->wfDynId); } +void +FetchUnit::FetchBufDesc::invBuf() +{ + restartFromBranch = false; + /** + * free list may have some entries + * so we clear it here to avoid duplicates + */ + freeList.clear(); + bufferedPCs.clear(); + reservedPCs.clear(); + readPtr = bufStart; + + for (int i = 0; i < fetchDepth; ++i) { + freeList.push_back(bufStart + i * cacheLineSize); + } + + DPRINTF(GPUFetch, "WF[%d][%d]: Id%d Fetch dropped, flushing fetch " + "buffer\n", wavefront->simdId, wavefront->wfSlotId, + wavefront->wfDynId); + +} + Addr FetchUnit::FetchBufDesc::nextFetchAddr() { @@ -471,6 +494,13 @@ FetchUnit::FetchBufDesc::reserveBuf(Addr vaddr) void FetchUnit::FetchBufDesc::fetchDone(Addr vaddr) { + if (vaddr == 0) { + // S_ICACHE_INV fetch done + wavefront->decLGKMInstsIssued(); + invBuf(); + return; + } + assert(bufferedPCs.find(vaddr) == bufferedPCs.end()); DPRINTF(GPUFetch, "WF[%d][%d]: Id%d done fetching for addr %#x\n", wavefront->simdId, wavefront->wfSlotId, diff --git a/src/gpu-compute/fetch_unit.hh b/src/gpu-compute/fetch_unit.hh index 0ba88c7d95..99c91b7299 100644 --- a/src/gpu-compute/fetch_unit.hh +++ b/src/gpu-compute/fetch_unit.hh @@ -104,6 +104,7 @@ class FetchUnit int reservedLines() const { return reservedPCs.size(); } bool hasFreeSpace() const { return !freeList.empty(); } void flushBuf(); + void invBuf(); Addr nextFetchAddr(); /** diff --git a/src/gpu-compute/scalar_memory_pipeline.cc b/src/gpu-compute/scalar_memory_pipeline.cc index de24f9448b..767e4e05a7 100644 --- a/src/gpu-compute/scalar_memory_pipeline.cc +++ b/src/gpu-compute/scalar_memory_pipeline.cc @@ -160,4 +160,55 @@ ScalarMemPipeline::issueRequest(GPUDynInstPtr gpuDynInst) issuedRequests.push(gpuDynInst); } +void +ScalarMemPipeline::injectScalarMemFence(GPUDynInstPtr gpuDynInst, + bool kernelMemSync, + RequestPtr req) +{ + assert(gpuDynInst->isScalar()); + + if (!req) { + req = std::make_shared( + 0, 0, 0, computeUnit.requestorId(), 0, gpuDynInst->wfDynId); + } else { + req->requestorId(computeUnit.requestorId()); + } + + req->setPaddr(0); + + PacketPtr pkt = nullptr; + + if (kernelMemSync) { + req->setCacheCoherenceFlags(Request::INV_L1); + req->setReqInstSeqNum(gpuDynInst->seqNum()); + req->setFlags(Request::KERNEL); + pkt = new Packet(req, MemCmd::MemSyncReq); + pkt->pushSenderState( + new ComputeUnit::SQCPort::SenderState( + gpuDynInst->wavefront(), nullptr)); + ComputeUnit::SQCPort::MemReqEvent *sqc_event = + new ComputeUnit::SQCPort::MemReqEvent + (computeUnit.sqcPort, pkt); + + computeUnit.schedule( + sqc_event, curTick() + computeUnit.scalar_req_tick_latency); + } else { + gpuDynInst->setRequestFlags(req); + + req->setReqInstSeqNum(gpuDynInst->seqNum()); + + pkt = new Packet(req, MemCmd::MemSyncReq); + pkt->pushSenderState( + new ComputeUnit::SQCPort::SenderState( + gpuDynInst->wavefront(), nullptr)); + + ComputeUnit::SQCPort::MemReqEvent *sqc_event = + new ComputeUnit::SQCPort::MemReqEvent + (computeUnit.sqcPort, pkt); + + computeUnit.schedule( + sqc_event, curTick() + computeUnit.scalar_req_tick_latency); + } +} + } // namespace gem5 diff --git a/src/gpu-compute/scalar_memory_pipeline.hh b/src/gpu-compute/scalar_memory_pipeline.hh index 5512c7c01f..e5dc7b4292 100644 --- a/src/gpu-compute/scalar_memory_pipeline.hh +++ b/src/gpu-compute/scalar_memory_pipeline.hh @@ -36,6 +36,7 @@ #include #include "gpu-compute/misc.hh" +#include "mem/request.hh" #include "params/ComputeUnit.hh" #include "sim/stats.hh" @@ -67,6 +68,9 @@ class ScalarMemPipeline void issueRequest(GPUDynInstPtr gpuDynInst); + void injectScalarMemFence( + GPUDynInstPtr gpuDynInst, bool kernelMemSync, RequestPtr req); + bool isGMLdRespFIFOWrRdy() const {