/* * Copyright (c) 2013-2015 Advanced Micro Devices, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * 3. Neither the name of the copyright holder nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include "mem/ruby/system/VIPERCoalescer.hh" #include "base/logging.hh" #include "base/str.hh" #include "cpu/testers/rubytest/RubyTester.hh" #include "debug/GPUCoalescer.hh" #include "debug/MemoryAccess.hh" #include "debug/ProtocolTrace.hh" #include "mem/packet.hh" #include "mem/ruby/common/SubBlock.hh" #include "mem/ruby/network/MessageBuffer.hh" #include "mem/ruby/profiler/Profiler.hh" #include "mem/ruby/slicc_interface/AbstractController.hh" #include "mem/ruby/slicc_interface/RubyRequest.hh" #include "mem/ruby/structures/CacheMemory.hh" #include "mem/ruby/system/GPUCoalescer.hh" #include "mem/ruby/system/RubySystem.hh" #include "params/VIPERCoalescer.hh" namespace gem5 { namespace ruby { VIPERCoalescer::VIPERCoalescer(const Params &p) : GPUCoalescer(p), m_cache_inv_pkt(nullptr), m_num_pending_invs(0) { } VIPERCoalescer::~VIPERCoalescer() { } // Places an uncoalesced packet in uncoalescedTable. If the packet is a // special type (MemFence, scoping, etc), it is issued immediately. RequestStatus VIPERCoalescer::makeRequest(PacketPtr pkt) { // VIPER only supports following memory request types // MemSyncReq & INV_L1 : TCP cache invalidation // ReadReq : cache read // WriteReq : cache write // AtomicOp : cache atomic // // VIPER does not expect MemSyncReq & Release since in GCN3, compute unit // does not specify an equivalent type of memory request. assert((pkt->cmd == MemCmd::MemSyncReq && pkt->req->isInvL1()) || pkt->cmd == MemCmd::ReadReq || pkt->cmd == MemCmd::WriteReq || pkt->isAtomicOp()); if (pkt->req->isInvL1() && m_cache_inv_pkt) { // In VIPER protocol, the coalescer is not able to handle two or // more cache invalidation requests at a time. Cache invalidation // requests must be serialized to ensure that all stale data in // TCP are invalidated correctly. If there's already a pending // cache invalidation request, we must retry this request later return RequestStatus_Aliased; } GPUCoalescer::makeRequest(pkt); if (pkt->req->isInvL1()) { // In VIPER protocol, a compute unit sends a MemSyncReq with INV_L1 // flag to invalidate TCP. Upon receiving a request of this type, // VIPERCoalescer starts a cache walk to invalidate all valid entries // in TCP. The request is completed once all entries are invalidated. assert(!m_cache_inv_pkt); m_cache_inv_pkt = pkt; invTCP(); } return RequestStatus_Issued; } void VIPERCoalescer::issueRequest(CoalescedRequest* crequest) { PacketPtr pkt = crequest->getFirstPkt(); int proc_id = -1; if (pkt != NULL && pkt->req->hasContextId()) { proc_id = pkt->req->contextId(); } // If valid, copy the pc to the ruby request Addr pc = 0; if (pkt->req->hasPC()) { pc = pkt->req->getPC(); } Addr line_addr = makeLineAddress(pkt->getAddr()); // Creating WriteMask that records written bytes // and atomic operations. This enables partial writes // and partial reads of those writes DataBlock dataBlock; dataBlock.clear(); uint32_t blockSize = RubySystem::getBlockSizeBytes(); std::vector accessMask(blockSize,false); std::vector< std::pair > atomicOps; uint32_t tableSize = crequest->getPackets().size(); for (int i = 0; i < tableSize; i++) { PacketPtr tmpPkt = crequest->getPackets()[i]; uint32_t tmpOffset = (tmpPkt->getAddr()) - line_addr; uint32_t tmpSize = tmpPkt->getSize(); if (tmpPkt->isAtomicOp()) { std::pair tmpAtomicOp(tmpOffset, tmpPkt->getAtomicOp()); atomicOps.push_back(tmpAtomicOp); } else if (tmpPkt->isWrite()) { dataBlock.setData(tmpPkt->getPtr(), tmpOffset, tmpSize); } for (int j = 0; j < tmpSize; j++) { accessMask[tmpOffset + j] = true; } } std::shared_ptr msg; if (pkt->isAtomicOp()) { msg = std::make_shared(clockEdge(), pkt->getAddr(), pkt->getSize(), pc, crequest->getRubyType(), RubyAccessMode_Supervisor, pkt, PrefetchBit_No, proc_id, 100, blockSize, accessMask, dataBlock, atomicOps, crequest->getSeqNum()); } else { msg = std::make_shared(clockEdge(), pkt->getAddr(), pkt->getSize(), pc, crequest->getRubyType(), RubyAccessMode_Supervisor, pkt, PrefetchBit_No, proc_id, 100, blockSize, accessMask, dataBlock, crequest->getSeqNum()); } if (pkt->cmd == MemCmd::WriteReq) { makeWriteCompletePkts(crequest); } DPRINTFR(ProtocolTrace, "%15s %3s %10s%20s %6s>%-6s %s %s\n", curTick(), m_version, "Coal", "Begin", "", "", printAddress(msg->getPhysicalAddress()), RubyRequestType_to_string(crequest->getRubyType())); fatal_if(crequest->getRubyType() == RubyRequestType_IFETCH, "there should not be any I-Fetch requests in the GPU Coalescer"); if (!deadlockCheckEvent.scheduled()) { schedule(deadlockCheckEvent, m_deadlock_threshold * clockPeriod() + curTick()); } assert(m_mandatory_q_ptr); Tick latency = cyclesToTicks( m_controller->mandatoryQueueLatency(crequest->getRubyType())); m_mandatory_q_ptr->enqueue(msg, clockEdge(), latency); } void VIPERCoalescer::makeWriteCompletePkts(CoalescedRequest* crequest) { // In VIPER protocol, for each write request, down-stream caches // return two responses: writeCallback and writeCompleteCallback. // We need to prepare a writeCompletePkt for each write request so // that when writeCompleteCallback is called, we can respond // requesting wavefront right away. // writeCompletePkt inherits request and senderState of the original // write request packet so that we can find the original requestor // later. This assumes that request and senderState are not deleted // before writeCompleteCallback is called. auto key = crequest->getSeqNum(); std::vector& req_pkts = crequest->getPackets(); for (auto pkt : req_pkts) { DPRINTF(GPUCoalescer, "makeWriteCompletePkts: instSeqNum %d\n", key); assert(pkt->cmd == MemCmd::WriteReq); PacketPtr writeCompletePkt = new Packet(pkt->req, MemCmd::WriteCompleteResp); writeCompletePkt->setAddr(pkt->getAddr()); writeCompletePkt->senderState = pkt->senderState; m_writeCompletePktMap[key].push_back(writeCompletePkt); } } void VIPERCoalescer::writeCompleteCallback(Addr addr, uint64_t instSeqNum) { DPRINTF(GPUCoalescer, "writeCompleteCallback: instSeqNum %d addr 0x%x\n", instSeqNum, addr); auto key = instSeqNum; assert(m_writeCompletePktMap.count(key) == 1 && !m_writeCompletePktMap[key].empty()); m_writeCompletePktMap[key].erase( std::remove_if( m_writeCompletePktMap[key].begin(), m_writeCompletePktMap[key].end(), [addr](PacketPtr writeCompletePkt) -> bool { if (makeLineAddress(writeCompletePkt->getAddr()) == addr) { RubyPort::SenderState *ss = safe_cast (writeCompletePkt->senderState); MemResponsePort *port = ss->port; assert(port != NULL); writeCompletePkt->senderState = ss->predecessor; delete ss; port->hitCallback(writeCompletePkt); return true; } return false; } ), m_writeCompletePktMap[key].end() ); trySendRetries(); if (m_writeCompletePktMap[key].empty()) m_writeCompletePktMap.erase(key); } void VIPERCoalescer::invTCPCallback(Addr addr) { assert(m_cache_inv_pkt && m_num_pending_invs > 0); m_num_pending_invs--; if (m_num_pending_invs == 0) { std::vector pkt_list { m_cache_inv_pkt }; m_cache_inv_pkt = nullptr; completeHitCallback(pkt_list); } } /** * Invalidate TCP */ void VIPERCoalescer::invTCP() { int size = m_dataCache_ptr->getNumBlocks(); DPRINTF(GPUCoalescer, "There are %d Invalidations outstanding before Cache Walk\n", m_num_pending_invs); // Walk the cache for (int i = 0; i < size; i++) { Addr addr = m_dataCache_ptr->getAddressAtIdx(i); // Evict Read-only data RubyRequestType request_type = RubyRequestType_REPLACEMENT; std::shared_ptr msg = std::make_shared( clockEdge(), addr, 0, 0, request_type, RubyAccessMode_Supervisor, nullptr); DPRINTF(GPUCoalescer, "Evicting addr 0x%x\n", addr); assert(m_mandatory_q_ptr != NULL); Tick latency = cyclesToTicks( m_controller->mandatoryQueueLatency(request_type)); m_mandatory_q_ptr->enqueue(msg, clockEdge(), latency); m_num_pending_invs++; } DPRINTF(GPUCoalescer, "There are %d Invalidatons outstanding after Cache Walk\n", m_num_pending_invs); } } // namespace ruby } // namespace gem5