/* * Copyright (c) 2020 Advanced Micro Devices, Inc. * Copyright (c) 2020 Inria * Copyright (c) 2016 Georgia Institute of Technology * Copyright (c) 2008 Princeton University * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer; * redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution; * neither the name of the copyright holders nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "mem/ruby/network/garnet/NetworkInterface.hh" #include #include #include "base/cast.hh" #include "debug/RubyNetwork.hh" #include "mem/ruby/network/MessageBuffer.hh" #include "mem/ruby/network/garnet/Credit.hh" #include "mem/ruby/network/garnet/flitBuffer.hh" #include "mem/ruby/slicc_interface/Message.hh" namespace gem5 { namespace ruby { namespace garnet { NetworkInterface::NetworkInterface(const Params &p) : ClockedObject(p), Consumer(this), m_id(p.id), m_virtual_networks(p.virt_nets), m_vc_per_vnet(0), m_vc_allocator(m_virtual_networks, 0), m_deadlock_threshold(p.garnet_deadlock_threshold), vc_busy_counter(m_virtual_networks, 0) { m_stall_count.resize(m_virtual_networks); niOutVcs.resize(0); } void NetworkInterface::addInPort(NetworkLink *in_link, CreditLink *credit_link) { InputPort *newInPort = new InputPort(in_link, credit_link); inPorts.push_back(newInPort); DPRINTF(RubyNetwork, "Adding input port:%s with vnets %s\n", in_link->name(), newInPort->printVnets()); in_link->setLinkConsumer(this); credit_link->setSourceQueue(newInPort->outCreditQueue(), this); if (m_vc_per_vnet != 0) { in_link->setVcsPerVnet(m_vc_per_vnet); credit_link->setVcsPerVnet(m_vc_per_vnet); } } void NetworkInterface::addOutPort(NetworkLink *out_link, CreditLink *credit_link, SwitchID router_id, uint32_t consumerVcs) { OutputPort *newOutPort = new OutputPort(out_link, credit_link, router_id); outPorts.push_back(newOutPort); assert(consumerVcs > 0); // We are not allowing different physical links to have different vcs // If it is required that the Network Interface support different VCs // for every physical link connected to it. Then they need to change // the logic within outport and inport. if (niOutVcs.size() == 0) { m_vc_per_vnet = consumerVcs; int m_num_vcs = consumerVcs * m_virtual_networks; niOutVcs.resize(m_num_vcs); outVcState.reserve(m_num_vcs); m_ni_out_vcs_enqueue_time.resize(m_num_vcs); // instantiating the NI flit buffers for (int i = 0; i < m_num_vcs; i++) { m_ni_out_vcs_enqueue_time[i] = Tick(INFINITE_); outVcState.emplace_back(i, m_net_ptr, consumerVcs); } // Reset VC Per VNET for input links already instantiated for (auto &iPort: inPorts) { NetworkLink *inNetLink = iPort->inNetLink(); inNetLink->setVcsPerVnet(m_vc_per_vnet); credit_link->setVcsPerVnet(m_vc_per_vnet); } } else { fatal_if(consumerVcs != m_vc_per_vnet, "%s: Connected Physical links have different vc requests: %d and %d\n", name(), consumerVcs, m_vc_per_vnet); } DPRINTF(RubyNetwork, "OutputPort:%s Vnet: %s\n", out_link->name(), newOutPort->printVnets()); out_link->setSourceQueue(newOutPort->outFlitQueue(), this); out_link->setVcsPerVnet(m_vc_per_vnet); credit_link->setLinkConsumer(this); credit_link->setVcsPerVnet(m_vc_per_vnet); } void NetworkInterface::addNode(std::vector& in, std::vector& out) { inNode_ptr = in; outNode_ptr = out; for (auto& it : in) { if (it != nullptr) { it->setConsumer(this); } } } void NetworkInterface::dequeueCallback() { // An output MessageBuffer has dequeued something this cycle and there // is now space to enqueue a stalled message. However, we cannot wake // on the same cycle as the dequeue. Schedule a wake at the soonest // possible time (next cycle). scheduleEventAbsolute(clockEdge(Cycles(1))); } void NetworkInterface::incrementStats(flit *t_flit) { int vnet = t_flit->get_vnet(); // Latency m_net_ptr->increment_received_flits(vnet); Tick network_delay = t_flit->get_dequeue_time() - t_flit->get_enqueue_time() - cyclesToTicks(Cycles(1)); Tick src_queueing_delay = t_flit->get_src_delay(); Tick dest_queueing_delay = (curTick() - t_flit->get_dequeue_time()); Tick queueing_delay = src_queueing_delay + dest_queueing_delay; m_net_ptr->increment_flit_network_latency(network_delay, vnet); m_net_ptr->increment_flit_queueing_latency(queueing_delay, vnet); if (t_flit->get_type() == TAIL_ || t_flit->get_type() == HEAD_TAIL_) { m_net_ptr->increment_received_packets(vnet); m_net_ptr->increment_packet_network_latency(network_delay, vnet); m_net_ptr->increment_packet_queueing_latency(queueing_delay, vnet); } // Hops m_net_ptr->increment_total_hops(t_flit->get_route().hops_traversed); } /* * The NI wakeup checks whether there are any ready messages in the protocol * buffer. If yes, it picks that up, flitisizes it into a number of flits and * puts it into an output buffer and schedules the output link. On a wakeup * it also checks whether there are flits in the input link. If yes, it picks * them up and if the flit is a tail, the NI inserts the corresponding message * into the protocol buffer. It also checks for credits being sent by the * downstream router. */ void NetworkInterface::wakeup() { std::ostringstream oss; for (auto &oPort: outPorts) { oss << oPort->routerID() << "[" << oPort->printVnets() << "] "; } DPRINTF(RubyNetwork, "Network Interface %d connected to router:%s " "woke up. Period: %ld\n", m_id, oss.str(), clockPeriod()); assert(curTick() == clockEdge()); MsgPtr msg_ptr; Tick curTime = clockEdge(); // Checking for messages coming from the protocol // can pick up a message/cycle for each virtual net for (int vnet = 0; vnet < inNode_ptr.size(); ++vnet) { MessageBuffer *b = inNode_ptr[vnet]; if (b == nullptr) { continue; } if (b->isReady(curTime)) { // Is there a message waiting msg_ptr = b->peekMsgPtr(); if (flitisizeMessage(msg_ptr, vnet)) { b->dequeue(curTime); } } } scheduleOutputLink(); // Check if there are flits stalling a virtual channel. Track if a // message is enqueued to restrict ejection to one message per cycle. checkStallQueue(); /*********** Check the incoming flit link **********/ DPRINTF(RubyNetwork, "Number of input ports: %d\n", inPorts.size()); for (auto &iPort: inPorts) { NetworkLink *inNetLink = iPort->inNetLink(); if (inNetLink->isReady(curTick())) { flit *t_flit = inNetLink->consumeLink(); DPRINTF(RubyNetwork, "Recieved flit:%s\n", *t_flit); assert(t_flit->m_width == iPort->bitWidth()); int vnet = t_flit->get_vnet(); t_flit->set_dequeue_time(curTick()); // If a tail flit is received, enqueue into the protocol buffers // if space is available. Otherwise, exchange non-tail flits for // credits. if (t_flit->get_type() == TAIL_ || t_flit->get_type() == HEAD_TAIL_) { if (!iPort->messageEnqueuedThisCycle && outNode_ptr[vnet]->areNSlotsAvailable(1, curTime)) { // Space is available. Enqueue to protocol buffer. outNode_ptr[vnet]->enqueue(t_flit->get_msg_ptr(), curTime, cyclesToTicks(Cycles(1))); // Simply send a credit back since we are not buffering // this flit in the NI Credit *cFlit = new Credit(t_flit->get_vc(), true, curTick()); iPort->sendCredit(cFlit); // Update stats and delete flit pointer incrementStats(t_flit); delete t_flit; } else { // No space available- Place tail flit in stall queue and // set up a callback for when protocol buffer is dequeued. // Stat update and flit pointer deletion will occur upon // unstall. iPort->m_stall_queue.push_back(t_flit); m_stall_count[vnet]++; outNode_ptr[vnet]->registerDequeueCallback([this]() { dequeueCallback(); }); } } else { // Non-tail flit. Send back a credit but not VC free signal. Credit *cFlit = new Credit(t_flit->get_vc(), false, curTick()); // Simply send a credit back since we are not buffering // this flit in the NI iPort->sendCredit(cFlit); // Update stats and delete flit pointer. incrementStats(t_flit); delete t_flit; } } } /****************** Check the incoming credit link *******/ for (auto &oPort: outPorts) { CreditLink *inCreditLink = oPort->inCreditLink(); if (inCreditLink->isReady(curTick())) { Credit *t_credit = (Credit*) inCreditLink->consumeLink(); outVcState[t_credit->get_vc()].increment_credit(); if (t_credit->is_free_signal()) { outVcState[t_credit->get_vc()].setState(IDLE_, curTick()); } delete t_credit; } } // It is possible to enqueue multiple outgoing credit flits if a message // was unstalled in the same cycle as a new message arrives. In this // case, we should schedule another wakeup to ensure the credit is sent // back. for (auto &iPort: inPorts) { if (iPort->outCreditQueue()->getSize() > 0) { DPRINTF(RubyNetwork, "Sending a credit %s via %s at %ld\n", *(iPort->outCreditQueue()->peekTopFlit()), iPort->outCreditLink()->name(), clockEdge(Cycles(1))); iPort->outCreditLink()-> scheduleEventAbsolute(clockEdge(Cycles(1))); } } checkReschedule(); } void NetworkInterface::checkStallQueue() { // Check all stall queues. // There is one stall queue for each input link for (auto &iPort: inPorts) { iPort->messageEnqueuedThisCycle = false; Tick curTime = clockEdge(); if (!iPort->m_stall_queue.empty()) { for (auto stallIter = iPort->m_stall_queue.begin(); stallIter != iPort->m_stall_queue.end(); ) { flit *stallFlit = *stallIter; int vnet = stallFlit->get_vnet(); // If we can now eject to the protocol buffer, // send back credits if (outNode_ptr[vnet]->areNSlotsAvailable(1, curTime)) { outNode_ptr[vnet]->enqueue(stallFlit->get_msg_ptr(), curTime, cyclesToTicks(Cycles(1))); // Send back a credit with free signal now that the // VC is no longer stalled. Credit *cFlit = new Credit(stallFlit->get_vc(), true, curTick()); iPort->sendCredit(cFlit); // Update Stats incrementStats(stallFlit); // Flit can now safely be deleted and removed from stall // queue delete stallFlit; iPort->m_stall_queue.erase(stallIter); m_stall_count[vnet]--; // If there are no more stalled messages for this vnet, the // callback on it's MessageBuffer is not needed. if (m_stall_count[vnet] == 0) outNode_ptr[vnet]->unregisterDequeueCallback(); iPort->messageEnqueuedThisCycle = true; break; } else { ++stallIter; } } } } } // Embed the protocol message into flits bool NetworkInterface::flitisizeMessage(MsgPtr msg_ptr, int vnet) { Message *net_msg_ptr = msg_ptr.get(); NetDest net_msg_dest = net_msg_ptr->getDestination(); // gets all the destinations associated with this message. std::vector dest_nodes = net_msg_dest.getAllDest(); // Number of flits is dependent on the link bandwidth available. // This is expressed in terms of bytes/cycle or the flit size OutputPort *oPort = getOutportForVnet(vnet); assert(oPort); int num_flits = (int)divCeil((float) m_net_ptr->MessageSizeType_to_int( net_msg_ptr->getMessageSize()), (float)oPort->bitWidth()); DPRINTF(RubyNetwork, "Message Size:%d vnet:%d bitWidth:%d\n", m_net_ptr->MessageSizeType_to_int(net_msg_ptr->getMessageSize()), vnet, oPort->bitWidth()); // loop to convert all multicast messages into unicast messages for (int ctr = 0; ctr < dest_nodes.size(); ctr++) { // this will return a free output virtual channel int vc = calculateVC(vnet); if (vc == -1) { return false ; } MsgPtr new_msg_ptr = msg_ptr->clone(); NodeID destID = dest_nodes[ctr]; Message *new_net_msg_ptr = new_msg_ptr.get(); if (dest_nodes.size() > 1) { NetDest personal_dest; for (int m = 0; m < (int) MachineType_NUM; m++) { if ((destID >= MachineType_base_number((MachineType) m)) && destID < MachineType_base_number((MachineType) (m+1))) { // calculating the NetDest associated with this destID personal_dest.clear(); personal_dest.add((MachineID) {(MachineType) m, (destID - MachineType_base_number((MachineType) m))}); new_net_msg_ptr->getDestination() = personal_dest; break; } } net_msg_dest.removeNetDest(personal_dest); // removing the destination from the original message to reflect // that a message with this particular destination has been // flitisized and an output vc is acquired net_msg_ptr->getDestination().removeNetDest(personal_dest); } // Embed Route into the flits // NetDest format is used by the routing table // Custom routing algorithms just need destID RouteInfo route; route.vnet = vnet; route.net_dest = new_net_msg_ptr->getDestination(); route.src_ni = m_id; route.src_router = oPort->routerID(); route.dest_ni = destID; route.dest_router = m_net_ptr->get_router_id(destID, vnet); // initialize hops_traversed to -1 // so that the first router increments it to 0 route.hops_traversed = -1; m_net_ptr->increment_injected_packets(vnet); m_net_ptr->update_traffic_distribution(route); int packet_id = m_net_ptr->getNextPacketID(); for (int i = 0; i < num_flits; i++) { m_net_ptr->increment_injected_flits(vnet); flit *fl = new flit(packet_id, i, vc, vnet, route, num_flits, new_msg_ptr, m_net_ptr->MessageSizeType_to_int( net_msg_ptr->getMessageSize()), oPort->bitWidth(), curTick()); fl->set_src_delay(curTick() - msg_ptr->getTime()); niOutVcs[vc].insert(fl); } m_ni_out_vcs_enqueue_time[vc] = curTick(); outVcState[vc].setState(ACTIVE_, curTick()); } return true ; } // Looking for a free output vc int NetworkInterface::calculateVC(int vnet) { for (int i = 0; i < m_vc_per_vnet; i++) { int delta = m_vc_allocator[vnet]; m_vc_allocator[vnet]++; if (m_vc_allocator[vnet] == m_vc_per_vnet) m_vc_allocator[vnet] = 0; if (outVcState[(vnet*m_vc_per_vnet) + delta].isInState( IDLE_, curTick())) { vc_busy_counter[vnet] = 0; return ((vnet*m_vc_per_vnet) + delta); } } vc_busy_counter[vnet] += 1; panic_if(vc_busy_counter[vnet] > m_deadlock_threshold, "%s: Possible network deadlock in vnet: %d at time: %llu \n", name(), vnet, curTick()); return -1; } void NetworkInterface::scheduleOutputPort(OutputPort *oPort) { int vc = oPort->vcRoundRobin(); for (int i = 0; i < niOutVcs.size(); i++) { vc++; if (vc == niOutVcs.size()) vc = 0; int t_vnet = get_vnet(vc); if (oPort->isVnetSupported(t_vnet)) { // model buffer backpressure if (niOutVcs[vc].isReady(curTick()) && outVcState[vc].has_credit()) { bool is_candidate_vc = true; int vc_base = t_vnet * m_vc_per_vnet; if (m_net_ptr->isVNetOrdered(t_vnet)) { for (int vc_offset = 0; vc_offset < m_vc_per_vnet; vc_offset++) { int t_vc = vc_base + vc_offset; if (niOutVcs[t_vc].isReady(curTick())) { if (m_ni_out_vcs_enqueue_time[t_vc] < m_ni_out_vcs_enqueue_time[vc]) { is_candidate_vc = false; break; } } } } if (!is_candidate_vc) continue; // Update the round robin arbiter oPort->vcRoundRobin(vc); outVcState[vc].decrement_credit(); // Just removing the top flit flit *t_flit = niOutVcs[vc].getTopFlit(); t_flit->set_time(clockEdge(Cycles(1))); // Scheduling the flit scheduleFlit(t_flit); if (t_flit->get_type() == TAIL_ || t_flit->get_type() == HEAD_TAIL_) { m_ni_out_vcs_enqueue_time[vc] = Tick(INFINITE_); } // Done with this port, continue to schedule // other ports return; } } } } /** This function looks at the NI buffers * if some buffer has flits which are ready to traverse the link in the next * cycle, and the downstream output vc associated with this flit has buffers * left, the link is scheduled for the next cycle */ void NetworkInterface::scheduleOutputLink() { // Schedule each output link for (auto &oPort: outPorts) { scheduleOutputPort(oPort); } } NetworkInterface::InputPort * NetworkInterface::getInportForVnet(int vnet) { for (auto &iPort : inPorts) { if (iPort->isVnetSupported(vnet)) { return iPort; } } return nullptr; } /* * This function returns the outport which supports the given vnet. * Currently, HeteroGarnet does not support multiple outports to * support same vnet. Thus, this function returns the first-and * only outport which supports the vnet. */ NetworkInterface::OutputPort * NetworkInterface::getOutportForVnet(int vnet) { for (auto &oPort : outPorts) { if (oPort->isVnetSupported(vnet)) { return oPort; } } return nullptr; } void NetworkInterface::scheduleFlit(flit *t_flit) { OutputPort *oPort = getOutportForVnet(t_flit->get_vnet()); if (oPort) { DPRINTF(RubyNetwork, "Scheduling at %s time:%ld flit:%s Message:%s\n", oPort->outNetLink()->name(), clockEdge(Cycles(1)), *t_flit, *(t_flit->get_msg_ptr())); oPort->outFlitQueue()->insert(t_flit); oPort->outNetLink()->scheduleEventAbsolute(clockEdge(Cycles(1))); return; } panic("No output port found for vnet:%d\n", t_flit->get_vnet()); return; } int NetworkInterface::get_vnet(int vc) { for (int i = 0; i < m_virtual_networks; i++) { if (vc >= (i*m_vc_per_vnet) && vc < ((i+1)*m_vc_per_vnet)) { return i; } } fatal("Could not determine vc"); } // Wakeup the NI in the next cycle if there are waiting // messages in the protocol buffer, or waiting flits in the // output VC buffer. // Also check if we have to reschedule because of a clock period // difference. void NetworkInterface::checkReschedule() { for (const auto& it : inNode_ptr) { if (it == nullptr) { continue; } while (it->isReady(clockEdge())) { // Is there a message waiting scheduleEvent(Cycles(1)); return; } } for (auto& ni_out_vc : niOutVcs) { if (ni_out_vc.isReady(clockEdge(Cycles(1)))) { scheduleEvent(Cycles(1)); return; } } // Check if any input links have flits to be popped. // This can happen if the links are operating at // a higher frequency. for (auto &iPort : inPorts) { NetworkLink *inNetLink = iPort->inNetLink(); if (inNetLink->isReady(curTick())) { scheduleEvent(Cycles(1)); return; } } for (auto &oPort : outPorts) { CreditLink *inCreditLink = oPort->inCreditLink(); if (inCreditLink->isReady(curTick())) { scheduleEvent(Cycles(1)); return; } } } void NetworkInterface::print(std::ostream& out) const { out << "[Network Interface]"; } bool NetworkInterface::functionalRead(Packet *pkt, WriteMask &mask) { bool read = false; for (auto& ni_out_vc : niOutVcs) { if (ni_out_vc.functionalRead(pkt, mask)) read = true; } for (auto &oPort: outPorts) { if (oPort->outFlitQueue()->functionalRead(pkt, mask)) read = true; } return read; } uint32_t NetworkInterface::functionalWrite(Packet *pkt) { uint32_t num_functional_writes = 0; for (auto& ni_out_vc : niOutVcs) { num_functional_writes += ni_out_vc.functionalWrite(pkt); } for (auto &oPort: outPorts) { num_functional_writes += oPort->outFlitQueue()->functionalWrite(pkt); } return num_functional_writes; } } // namespace garnet } // namespace ruby } // namespace gem5