diff --git a/src/mem/ruby/protocol/GPU_VIPER-SQC.sm b/src/mem/ruby/protocol/GPU_VIPER-SQC.sm index bdc5d73f20..3086aab77a 100644 --- a/src/mem/ruby/protocol/GPU_VIPER-SQC.sm +++ b/src/mem/ruby/protocol/GPU_VIPER-SQC.sm @@ -60,6 +60,7 @@ machine(MachineType:SQC, "GPU SQC (L1 I Cache)") // Mem sys initiated Repl, desc="Replacing block from cache"; Data, desc="Received Data"; + Evict, desc="Evict cache line"; } enumeration(RequestType, desc="To communicate stats from transitions to recordStats") { @@ -67,6 +68,7 @@ machine(MachineType:SQC, "GPU SQC (L1 I Cache)") DataArrayWrite, desc="Write the data array"; TagArrayRead, desc="Read the data array"; TagArrayWrite, desc="Write the data array"; + TagArrayFlash, desc="Flash clear the data array"; } @@ -242,7 +244,12 @@ machine(MachineType:SQC, "GPU SQC (L1 I Cache)") peek(mandatoryQueue_in, RubyRequest, block_on="LineAddress") { Entry cache_entry := getCacheEntry(in_msg.LineAddress); TBE tbe := TBEs.lookup(in_msg.LineAddress); - trigger(Event:Fetch, in_msg.LineAddress, cache_entry, tbe); + DPRINTF(RubySlicc, "%s\n", in_msg); + if (in_msg.Type == RubyRequestType:REPLACEMENT) { + trigger(Event:Evict, in_msg.LineAddress, cache_entry, tbe); + } else { + trigger(Event:Fetch, in_msg.LineAddress, cache_entry, tbe); + } } } } @@ -313,6 +320,11 @@ machine(MachineType:SQC, "GPU SQC (L1 I Cache)") APPEND_TRANSITION_COMMENT(cache_entry.DataBlk); } + action(inv_invDone, "inv", desc="local inv done") { + sequencer.invL1Callback(); + } + + action(w_writeCache, "w", desc="write data to cache") { peek(responseToSQC_in, ResponseMsg) { assert(is_valid(cache_entry)); @@ -350,6 +362,13 @@ machine(MachineType:SQC, "GPU SQC (L1 I Cache)") ic_invCache; } + transition({I, IV, V}, Evict, I) {TagArrayRead, TagArrayWrite} { + // since we're evicting something, don't bother classifying as hit/miss + ic_invCache; + inv_invDone; + p_popMandatoryQueue; + } + // if we got a response for a load where the line is in I, then // another request must have come in that replaced the line in question in // the cache. Thus, complete this request without allocating the line, but diff --git a/src/mem/ruby/protocol/RubySlicc_Types.sm b/src/mem/ruby/protocol/RubySlicc_Types.sm index 2206effa29..cc56d3b1b4 100644 --- a/src/mem/ruby/protocol/RubySlicc_Types.sm +++ b/src/mem/ruby/protocol/RubySlicc_Types.sm @@ -157,6 +157,9 @@ structure (Sequencer, external = "yes") { void llscClearLocalMonitor(); void evictionCallback(Addr); + + void invL1Callback(); + void recordRequestType(SequencerRequestType); bool checkResourceAvailable(CacheResourceType, Addr); } diff --git a/src/mem/ruby/system/Sequencer.cc b/src/mem/ruby/system/Sequencer.cc index 48054febef..0a37c64adf 100644 --- a/src/mem/ruby/system/Sequencer.cc +++ b/src/mem/ruby/system/Sequencer.cc @@ -85,6 +85,8 @@ Sequencer::Sequencer(const Params &p) m_runningGarnetStandalone = p.garnet_standalone; + m_num_pending_invs = 0; + m_cache_inv_pkt = nullptr; // These statistical variables are not for display. // The profiler will collate these across different @@ -348,6 +350,10 @@ Sequencer::insertRequest(PacketPtr pkt, RubyRequestType primary_type, return RequestStatus_Ready; } + if (pkt->cmd == MemCmd::MemSyncReq) { + return RequestStatus_Aliased; + } + Addr line_addr = makeLineAddress(pkt->getAddr()); // Check if there is any outstanding request for the same cache line. auto &seq_req_list = m_RequestTable[line_addr]; @@ -576,7 +582,8 @@ Sequencer::readCallback(Addr address, DataBlock& data, } if ((seq_req.m_type != RubyRequestType_LD) && (seq_req.m_type != RubyRequestType_Load_Linked) && - (seq_req.m_type != RubyRequestType_IFETCH)) { + (seq_req.m_type != RubyRequestType_IFETCH) && + (seq_req.m_type != RubyRequestType_REPLACEMENT)) { // Write request: reissue request to the cache hierarchy issueRequest(seq_req.pkt, seq_req.m_second_type); break; @@ -811,6 +818,86 @@ Sequencer::unaddressedCallback(Addr unaddressedReqId, } } +void +Sequencer::completeHitCallback(std::vector & mylist) +{ + for (auto& pkt : mylist) { + // When Ruby is in warmup or cooldown phase, the requests come + // from the cache recorder. They do not track which port to use + // and do not need to send the response back + if (!RubySystem::getWarmupEnabled() + && !RubySystem::getCooldownEnabled()) { + RubyPort::SenderState *ss = + safe_cast(pkt->senderState); + MemResponsePort *port = ss->port; + assert(port != NULL); + + pkt->senderState = ss->predecessor; + + if (pkt->cmd != MemCmd::WriteReq) { + // for WriteReq, we keep the original senderState until + // writeCompleteCallback + delete ss; + } + + port->hitCallback(pkt); + trySendRetries(); + } + } + + RubySystem *rs = m_ruby_system; + if (RubySystem::getWarmupEnabled()) { + rs->m_cache_recorder->enqueueNextFetchRequest(); + } else if (RubySystem::getCooldownEnabled()) { + rs->m_cache_recorder->enqueueNextFlushRequest(); + } else { + testDrainComplete(); + } +} + +void +Sequencer::invL1Callback() +{ + // Since L1 invalidate is currently done with paddr = 0 + assert(m_cache_inv_pkt && m_num_pending_invs > 0); + + m_num_pending_invs--; + + if (m_num_pending_invs == 0) { + std::vector pkt_list { m_cache_inv_pkt }; + m_cache_inv_pkt = nullptr; + completeHitCallback(pkt_list); + } +} + +void +Sequencer::invL1() +{ + int size = m_dataCache_ptr->getNumBlocks(); + DPRINTF(RubySequencer, + "There are %d Invalidations outstanding before Cache Walk\n", + m_num_pending_invs); + // Walk the cache + for (int i = 0; i < size; i++) { + Addr addr = m_dataCache_ptr->getAddressAtIdx(i); + // Evict Read-only data + RubyRequestType request_type = RubyRequestType_REPLACEMENT; + std::shared_ptr msg = std::make_shared( + clockEdge(), addr, 0, 0, + request_type, RubyAccessMode_Supervisor, + nullptr); + DPRINTF(RubySequencer, "Evicting addr 0x%x\n", addr); + assert(m_mandatory_q_ptr != NULL); + Tick latency = cyclesToTicks( + m_controller->mandatoryQueueLatency(request_type)); + m_mandatory_q_ptr->enqueue(msg, clockEdge(), latency); + m_num_pending_invs++; + } + DPRINTF(RubySequencer, + "There are %d Invalidations outstanding after Cache Walk\n", + m_num_pending_invs); +} + bool Sequencer::empty() const { @@ -915,6 +1002,11 @@ Sequencer::makeRequest(PacketPtr pkt) } } else if (pkt->isFlush()) { primary_type = secondary_type = RubyRequestType_FLUSH; + } else if (pkt->cmd == MemCmd::MemSyncReq) { + primary_type = secondary_type = RubyRequestType_REPLACEMENT; + assert(!m_cache_inv_pkt); + m_cache_inv_pkt = pkt; + invL1(); } else { panic("Unsupported ruby packet type\n"); } diff --git a/src/mem/ruby/system/Sequencer.hh b/src/mem/ruby/system/Sequencer.hh index 8f736da6d5..3dc61ab4fa 100644 --- a/src/mem/ruby/system/Sequencer.hh +++ b/src/mem/ruby/system/Sequencer.hh @@ -141,6 +141,10 @@ class Sequencer : public RubyPort const Cycles forwardRequestTime = Cycles(0), const Cycles firstResponseTime = Cycles(0)); + void completeHitCallback(std::vector& list); + void invL1Callback(); + void invL1(); + RequestStatus makeRequest(PacketPtr pkt) override; virtual bool empty() const; int outstandingCount() const override { return m_outstanding_count; } @@ -243,6 +247,10 @@ class Sequencer : public RubyPort private: int m_max_outstanding_requests; + int m_num_pending_invs; + + PacketPtr m_cache_inv_pkt; + CacheMemory* m_dataCache_ptr; // The cache access latency for top-level caches (L0/L1). These are