diff --git a/src/mem/ruby/system/GPUCoalescer.cc b/src/mem/ruby/system/GPUCoalescer.cc index beb8da3f9c..a70af07467 100644 --- a/src/mem/ruby/system/GPUCoalescer.cc +++ b/src/mem/ruby/system/GPUCoalescer.cc @@ -73,6 +73,14 @@ UncoalescedTable::insertPacket(PacketPtr pkt) pkt->getAddr(), seqNum, instMap.size(), instMap[seqNum].size()); } +void +UncoalescedTable::insertReqType(PacketPtr pkt, RubyRequestType type) +{ + uint64_t seqNum = pkt->req->getReqInstSeqNum(); + + reqTypeMap[seqNum] = type; +} + bool UncoalescedTable::packetAvailable() { @@ -128,9 +136,21 @@ UncoalescedTable::updateResources() instMap.erase(iter++); instPktsRemaining.erase(seq_num); - // Release the token - DPRINTF(GPUCoalescer, "Returning token seqNum %d\n", seq_num); - coalescer->getGMTokenPort().sendTokens(1); + // Release the token if the Ruby system is not in cooldown + // or warmup phases. When in these phases, the RubyPorts + // are accessed directly using the makeRequest() command + // instead of accessing through the port. This makes + // sending tokens through the port unnecessary + if (!RubySystem::getWarmupEnabled() + && !RubySystem::getCooldownEnabled()) { + if (reqTypeMap[seq_num] != RubyRequestType_FLUSH) { + DPRINTF(GPUCoalescer, + "Returning token seqNum %d\n", seq_num); + coalescer->getGMTokenPort().sendTokens(1); + } + } + + reqTypeMap.erase(seq_num); } else { ++iter; } @@ -565,6 +585,14 @@ GPUCoalescer::hitCallback(CoalescedRequest* crequest, for (auto& pkt : pktList) { offset = getOffset(pkt->getAddr()); pkt_size = pkt->getSize(); + request_address = pkt->getAddr(); + + // When the Ruby system is cooldown phase, the requests come from + // the cache recorder. These requests do not get coalesced and + // do not return valid data. + if (RubySystem::getCooldownEnabled()) + continue; + if (pkt->getPtr()) { switch(type) { // Store and AtomicNoReturns follow the same path, as the @@ -627,7 +655,6 @@ GPUCoalescer::getRequestType(PacketPtr pkt) assert(!pkt->req->isLLSC()); assert(!pkt->req->isLockedRMW()); assert(!pkt->req->isInstFetch()); - assert(!pkt->isFlush()); if (pkt->req->isAtomicReturn()) { req_type = RubyRequestType_ATOMIC_RETURN; @@ -637,6 +664,8 @@ GPUCoalescer::getRequestType(PacketPtr pkt) req_type = RubyRequestType_LD; } else if (pkt->isWrite()) { req_type = RubyRequestType_ST; + } else if (pkt->isFlush()) { + req_type = RubyRequestType_FLUSH; } else { panic("Unsupported ruby packet type\n"); } @@ -658,7 +687,7 @@ GPUCoalescer::makeRequest(PacketPtr pkt) issueMemSyncRequest(pkt); } else { // otherwise, this must be either read or write command - assert(pkt->isRead() || pkt->isWrite()); + assert(pkt->isRead() || pkt->isWrite() || pkt->isFlush()); InstSeqNum seq_num = pkt->req->getReqInstSeqNum(); @@ -667,10 +696,17 @@ GPUCoalescer::makeRequest(PacketPtr pkt) // number of lanes actives for that vmem request (i.e., the popcnt // of the exec_mask. int num_packets = 1; - if (!m_usingRubyTester) { - num_packets = 0; - for (int i = 0; i < TheGpuISA::NumVecElemPerVecReg; i++) { - num_packets += getDynInst(pkt)->getLaneStatus(i); + + // When Ruby is in warmup or cooldown phase, the requests come from + // the cache recorder. There is no dynamic instruction associated + // with these requests either + if (!RubySystem::getWarmupEnabled() + && !RubySystem::getCooldownEnabled()) { + if (!m_usingRubyTester) { + num_packets = 0; + for (int i = 0; i < TheGpuISA::NumVecElemPerVecReg; i++) { + num_packets += getDynInst(pkt)->getLaneStatus(i); + } } } @@ -679,6 +715,7 @@ GPUCoalescer::makeRequest(PacketPtr pkt) // future cycle. Packets remaining is set to the number of excepted // requests from the instruction based on its exec_mask. uncoalescedTable.insertPacket(pkt); + uncoalescedTable.insertReqType(pkt, getRequestType(pkt)); uncoalescedTable.initPacketsRemaining(seq_num, num_packets); DPRINTF(GPUCoalescer, "Put pkt with addr 0x%X to uncoalescedTable\n", pkt->getAddr()); @@ -945,21 +982,27 @@ void GPUCoalescer::completeHitCallback(std::vector & mylist) { for (auto& pkt : mylist) { - RubyPort::SenderState *ss = - safe_cast(pkt->senderState); - MemResponsePort *port = ss->port; - assert(port != NULL); + // When Ruby is in warmup or cooldown phase, the requests come + // from the cache recorder. They do not track which port to use + // and do not need to send the response back + if (!RubySystem::getWarmupEnabled() + && !RubySystem::getCooldownEnabled()) { + RubyPort::SenderState *ss = + safe_cast(pkt->senderState); + MemResponsePort *port = ss->port; + assert(port != NULL); - pkt->senderState = ss->predecessor; + pkt->senderState = ss->predecessor; - if (pkt->cmd != MemCmd::WriteReq) { - // for WriteReq, we keep the original senderState until - // writeCompleteCallback - delete ss; + if (pkt->cmd != MemCmd::WriteReq) { + // for WriteReq, we keep the original senderState until + // writeCompleteCallback + delete ss; + } + + port->hitCallback(pkt); + trySendRetries(); } - - port->hitCallback(pkt); - trySendRetries(); } // We schedule an event in the same tick as hitCallback (similar to @@ -971,7 +1014,14 @@ GPUCoalescer::completeHitCallback(std::vector & mylist) schedule(issueEvent, curTick()); } - testDrainComplete(); + RubySystem *rs = m_ruby_system; + if (RubySystem::getWarmupEnabled()) { + rs->m_cache_recorder->enqueueNextFetchRequest(); + } else if (RubySystem::getCooldownEnabled()) { + rs->m_cache_recorder->enqueueNextFlushRequest(); + } else { + testDrainComplete(); + } } void diff --git a/src/mem/ruby/system/GPUCoalescer.hh b/src/mem/ruby/system/GPUCoalescer.hh index dd28855547..d6db5c00ba 100644 --- a/src/mem/ruby/system/GPUCoalescer.hh +++ b/src/mem/ruby/system/GPUCoalescer.hh @@ -71,6 +71,7 @@ class UncoalescedTable ~UncoalescedTable() {} void insertPacket(PacketPtr pkt); + void insertReqType(PacketPtr pkt, RubyRequestType type); bool packetAvailable(); void printRequestTable(std::stringstream& ss); @@ -101,6 +102,8 @@ class UncoalescedTable std::map instMap; std::map instPktsRemaining; + + std::map reqTypeMap; }; class CoalescedRequest