mem: Add crossbar latencies
This patch introduces latencies in crossbar that were neglected before. In particular, it adds three parameters in crossbar model: front_end_latency, forward_latency, and response_latency. Along with these parameters, three corresponding members are added: frontEndLatency, forwardLatency, and responseLatency. The coherent crossbar has an additional snoop_response_latency. The latency of the request path through the xbar is set as --> frontEndLatency + forwardLatency In case the snoop filter is enabled, the request path latency is charged also by look-up latency of the snoop filter. --> frontEndLatency + SF(lookupLatency) + forwardLatency. The latency of the response path through the xbar is set instead as --> responseLatency. In case of snoop response, if the response is treated as a normal response the latency associated is again --> responseLatency; If instead it is forwarded as snoop response we add an additional variable + snoopResponseLatency and the latency associated is --> snoopResponseLatency; Furthermore, this patch lets the crossbar progress on the next clock edge after an unused retry, changing the time the crossbar considers itself busy after sending a retry that was not acted upon.
This commit is contained in:
@@ -1,4 +1,4 @@
|
||||
# Copyright (c) 2012 ARM Limited
|
||||
# Copyright (c) 2012, 2015 ARM Limited
|
||||
# All rights reserved.
|
||||
#
|
||||
# The license below extends only to copyright in the software and shall
|
||||
@@ -49,10 +49,29 @@ class BaseXBar(MemObject):
|
||||
type = 'BaseXBar'
|
||||
abstract = True
|
||||
cxx_header = "mem/xbar.hh"
|
||||
slave = VectorSlavePort("vector port for connecting masters")
|
||||
master = VectorMasterPort("vector port for connecting slaves")
|
||||
header_cycles = Param.Cycles(1, "cycles of overhead per transaction")
|
||||
width = Param.Unsigned(8, "xbar width (bytes)")
|
||||
|
||||
slave = VectorSlavePort("Vector port for connecting masters")
|
||||
master = VectorMasterPort("Vector port for connecting slaves")
|
||||
|
||||
# Latencies governing the time taken for the variuos paths a
|
||||
# packet has through the crossbar. Note that the crossbar itself
|
||||
# does not add the latency due to assumptions in the coherency
|
||||
# mechanism. Instead the latency is annotated on the packet and
|
||||
# left to the neighbouring modules.
|
||||
#
|
||||
# A request incurs the frontend latency, possibly snoop filter
|
||||
# lookup latency, and forward latency. A response incurs the
|
||||
# response latency. Frontend latency encompasses arbitration and
|
||||
# deciding what to do when a request arrives. the forward latency
|
||||
# is the latency involved once a decision is made to forward the
|
||||
# request. The response latency, is similar to the forward
|
||||
# latency, but for responses rather than requests.
|
||||
frontend_latency = Param.Cycles(3, "Frontend latency")
|
||||
forward_latency = Param.Cycles(4, "Forward latency")
|
||||
response_latency = Param.Cycles(2, "Response latency")
|
||||
|
||||
# Width governing the throughput of the crossbar
|
||||
width = Param.Unsigned(8, "Datapath width per port (bytes)")
|
||||
|
||||
# The default port can be left unconnected, or be used to connect
|
||||
# a default slave port
|
||||
@@ -74,12 +93,21 @@ class CoherentXBar(BaseXBar):
|
||||
type = 'CoherentXBar'
|
||||
cxx_header = "mem/coherent_xbar.hh"
|
||||
|
||||
# The coherent crossbar additionally has snoop responses that are
|
||||
# forwarded after a specific latency.
|
||||
snoop_response_latency = Param.Cycles(4, "Snoop response latency")
|
||||
|
||||
# An optional snoop filter
|
||||
snoop_filter = Param.SnoopFilter(NULL, "Selected snoop filter")
|
||||
|
||||
system = Param.System(Parent.any, "System that the crossbar belongs to.")
|
||||
snoop_filter = Param.SnoopFilter(NULL, "Selected snoop filter.")
|
||||
|
||||
class SnoopFilter(SimObject):
|
||||
type = 'SnoopFilter'
|
||||
cxx_header = "mem/snoop_filter.hh"
|
||||
lookup_latency = Param.Cycles(3, "lookup latency (cycles)")
|
||||
|
||||
# Lookup latency of the snoop filter, added to requests that pass
|
||||
# through a coherent crossbar.
|
||||
lookup_latency = Param.Cycles(1, "Lookup latency")
|
||||
|
||||
system = Param.System(Parent.any, "System that the crossbar belongs to.")
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2011-2014 ARM Limited
|
||||
* Copyright (c) 2011-2015 ARM Limited
|
||||
* All rights reserved
|
||||
*
|
||||
* The license below extends only to copyright in the software and shall
|
||||
@@ -55,7 +55,8 @@
|
||||
#include "sim/system.hh"
|
||||
|
||||
CoherentXBar::CoherentXBar(const CoherentXBarParams *p)
|
||||
: BaseXBar(p), system(p->system), snoopFilter(p->snoop_filter)
|
||||
: BaseXBar(p), system(p->system), snoopFilter(p->snoop_filter),
|
||||
snoopResponseLatency(p->snoop_response_latency)
|
||||
{
|
||||
// create the ports based on the size of the master and slave
|
||||
// vector ports, and the presence of the default port, the ports
|
||||
@@ -167,8 +168,17 @@ CoherentXBar::recvTimingReq(PacketPtr pkt, PortID slave_port_id)
|
||||
unsigned int pkt_size = pkt->hasData() ? pkt->getSize() : 0;
|
||||
unsigned int pkt_cmd = pkt->cmdToIndex();
|
||||
|
||||
calcPacketTiming(pkt);
|
||||
Tick packetFinishTime = curTick() + pkt->payloadDelay;
|
||||
// store the old header delay so we can restore it if needed
|
||||
Tick old_header_delay = pkt->headerDelay;
|
||||
|
||||
// a request sees the frontend and forward latency
|
||||
Tick xbar_delay = (frontendLatency + forwardLatency) * clockPeriod();
|
||||
|
||||
// set the packet header and payload delay
|
||||
calcPacketTiming(pkt, xbar_delay);
|
||||
|
||||
// determine how long to be crossbar layer is busy
|
||||
Tick packetFinishTime = clockEdge(Cycles(1)) + pkt->payloadDelay;
|
||||
|
||||
// uncacheable requests need never be snooped
|
||||
if (!pkt->req->isUncacheable() && !system->bypassCaches()) {
|
||||
@@ -177,6 +187,10 @@ CoherentXBar::recvTimingReq(PacketPtr pkt, PortID slave_port_id)
|
||||
if (snoopFilter) {
|
||||
// check with the snoop filter where to forward this packet
|
||||
auto sf_res = snoopFilter->lookupRequest(pkt, *src_port);
|
||||
// If SnoopFilter is enabled, the total time required by a packet
|
||||
// to be delivered through the xbar has to be charged also with
|
||||
// to lookup latency of the snoop filter (sf_res.second).
|
||||
pkt->headerDelay += sf_res.second * clockPeriod();
|
||||
packetFinishTime += sf_res.second * clockPeriod();
|
||||
DPRINTF(CoherentXBar, "recvTimingReq: src %s %s 0x%x"\
|
||||
" SF size: %i lat: %i\n", src_port->name(),
|
||||
@@ -221,15 +235,15 @@ CoherentXBar::recvTimingReq(PacketPtr pkt, PortID slave_port_id)
|
||||
assert(!is_express_snoop);
|
||||
assert(!pkt->memInhibitAsserted());
|
||||
|
||||
// undo the calculation so we can check for 0 again
|
||||
pkt->headerDelay = pkt->payloadDelay = 0;
|
||||
// restore the header delay
|
||||
pkt->headerDelay = old_header_delay;
|
||||
|
||||
DPRINTF(CoherentXBar, "recvTimingReq: src %s %s 0x%x RETRY\n",
|
||||
src_port->name(), pkt->cmdString(), pkt->getAddr());
|
||||
|
||||
// update the layer state and schedule an idle event
|
||||
reqLayers[master_port_id]->failedTiming(src_port,
|
||||
clockEdge(headerCycles));
|
||||
clockEdge(Cycles(1)));
|
||||
} else {
|
||||
// express snoops currently bypass the crossbar state entirely
|
||||
if (!is_express_snoop) {
|
||||
@@ -300,8 +314,14 @@ CoherentXBar::recvTimingResp(PacketPtr pkt, PortID master_port_id)
|
||||
unsigned int pkt_size = pkt->hasData() ? pkt->getSize() : 0;
|
||||
unsigned int pkt_cmd = pkt->cmdToIndex();
|
||||
|
||||
calcPacketTiming(pkt);
|
||||
Tick packetFinishTime = curTick() + pkt->payloadDelay;
|
||||
// a response sees the response latency
|
||||
Tick xbar_delay = responseLatency * clockPeriod();
|
||||
|
||||
// set the packet header and payload delay
|
||||
calcPacketTiming(pkt, xbar_delay);
|
||||
|
||||
// determine how long to be crossbar layer is busy
|
||||
Tick packetFinishTime = clockEdge(Cycles(1)) + pkt->payloadDelay;
|
||||
|
||||
if (snoopFilter && !pkt->req->isUncacheable() && !system->bypassCaches()) {
|
||||
// let the snoop filter inspect the response and update its state
|
||||
@@ -426,8 +446,17 @@ CoherentXBar::recvTimingSnoopResp(PacketPtr pkt, PortID slave_port_id)
|
||||
// responses are never express snoops
|
||||
assert(!pkt->isExpressSnoop());
|
||||
|
||||
calcPacketTiming(pkt);
|
||||
Tick packetFinishTime = curTick() + pkt->payloadDelay;
|
||||
// a snoop response sees the snoop response latency, and if it is
|
||||
// forwarded as a normal response, the response latency
|
||||
Tick xbar_delay =
|
||||
(forwardAsSnoop ? snoopResponseLatency : responseLatency) *
|
||||
clockPeriod();
|
||||
|
||||
// set the packet header and payload delay
|
||||
calcPacketTiming(pkt, xbar_delay);
|
||||
|
||||
// determine how long to be crossbar layer is busy
|
||||
Tick packetFinishTime = clockEdge(Cycles(1)) + pkt->payloadDelay;
|
||||
|
||||
// forward it either as a snoop response or a normal response
|
||||
if (forwardAsSnoop) {
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2011-2014 ARM Limited
|
||||
* Copyright (c) 2011-2015 ARM Limited
|
||||
* All rights reserved
|
||||
*
|
||||
* The license below extends only to copyright in the software and shall
|
||||
@@ -272,6 +272,9 @@ class CoherentXBar : public BaseXBar
|
||||
* broadcast needed for probes. NULL denotes an absent filter. */
|
||||
SnoopFilter *snoopFilter;
|
||||
|
||||
/** Cycles of snoop response latency.*/
|
||||
const Cycles snoopResponseLatency;
|
||||
|
||||
/** Function called by the port when the crossbar is recieving a Timing
|
||||
request packet.*/
|
||||
bool recvTimingReq(PacketPtr pkt, PortID slave_port_id);
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2011-2014 ARM Limited
|
||||
* Copyright (c) 2011-2015 ARM Limited
|
||||
* All rights reserved
|
||||
*
|
||||
* The license below extends only to copyright in the software and shall
|
||||
@@ -127,8 +127,17 @@ NoncoherentXBar::recvTimingReq(PacketPtr pkt, PortID slave_port_id)
|
||||
unsigned int pkt_size = pkt->hasData() ? pkt->getSize() : 0;
|
||||
unsigned int pkt_cmd = pkt->cmdToIndex();
|
||||
|
||||
calcPacketTiming(pkt);
|
||||
Tick packetFinishTime = curTick() + pkt->payloadDelay;
|
||||
// store the old header delay so we can restore it if needed
|
||||
Tick old_header_delay = pkt->headerDelay;
|
||||
|
||||
// a request sees the frontend and forward latency
|
||||
Tick xbar_delay = (frontendLatency + forwardLatency) * clockPeriod();
|
||||
|
||||
// set the packet header and payload delay
|
||||
calcPacketTiming(pkt, xbar_delay);
|
||||
|
||||
// determine how long to be crossbar layer is busy
|
||||
Tick packetFinishTime = clockEdge(Cycles(1)) + pkt->payloadDelay;
|
||||
|
||||
// before forwarding the packet (and possibly altering it),
|
||||
// remember if we are expecting a response
|
||||
@@ -145,12 +154,12 @@ NoncoherentXBar::recvTimingReq(PacketPtr pkt, PortID slave_port_id)
|
||||
DPRINTF(NoncoherentXBar, "recvTimingReq: src %s %s 0x%x RETRY\n",
|
||||
src_port->name(), pkt->cmdString(), pkt->getAddr());
|
||||
|
||||
// undo the calculation so we can check for 0 again
|
||||
pkt->headerDelay = pkt->payloadDelay = 0;
|
||||
// restore the header delay as it is additive
|
||||
pkt->headerDelay = old_header_delay;
|
||||
|
||||
// occupy until the header is sent
|
||||
reqLayers[master_port_id]->failedTiming(src_port,
|
||||
clockEdge(headerCycles));
|
||||
clockEdge(Cycles(1)));
|
||||
|
||||
return false;
|
||||
}
|
||||
@@ -200,8 +209,14 @@ NoncoherentXBar::recvTimingResp(PacketPtr pkt, PortID master_port_id)
|
||||
unsigned int pkt_size = pkt->hasData() ? pkt->getSize() : 0;
|
||||
unsigned int pkt_cmd = pkt->cmdToIndex();
|
||||
|
||||
calcPacketTiming(pkt);
|
||||
Tick packetFinishTime = curTick() + pkt->payloadDelay;
|
||||
// a response sees the response latency
|
||||
Tick xbar_delay = responseLatency * clockPeriod();
|
||||
|
||||
// set the packet header and payload delay
|
||||
calcPacketTiming(pkt, xbar_delay);
|
||||
|
||||
// determine how long to be crossbar layer is busy
|
||||
Tick packetFinishTime = clockEdge(Cycles(1)) + pkt->payloadDelay;
|
||||
|
||||
// send the packet through the destination slave port
|
||||
bool success M5_VAR_USED = slavePorts[slave_port_id]->sendTimingResp(pkt);
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2011-2014 ARM Limited
|
||||
* Copyright (c) 2011-2015 ARM Limited
|
||||
* All rights reserved
|
||||
*
|
||||
* The license below extends only to copyright in the software and shall
|
||||
|
||||
@@ -56,7 +56,10 @@
|
||||
|
||||
BaseXBar::BaseXBar(const BaseXBarParams *p)
|
||||
: MemObject(p),
|
||||
headerCycles(p->header_cycles), width(p->width),
|
||||
frontendLatency(p->frontend_latency),
|
||||
forwardLatency(p->forward_latency),
|
||||
responseLatency(p->response_latency),
|
||||
width(p->width),
|
||||
gotAddrRanges(p->port_default_connection_count +
|
||||
p->port_master_connection_count, false),
|
||||
gotAllAddrRanges(false), defaultPortID(InvalidPortID),
|
||||
@@ -102,34 +105,41 @@ BaseXBar::getSlavePort(const std::string &if_name, PortID idx)
|
||||
}
|
||||
|
||||
void
|
||||
BaseXBar::calcPacketTiming(PacketPtr pkt)
|
||||
BaseXBar::calcPacketTiming(PacketPtr pkt, Tick header_delay)
|
||||
{
|
||||
// the crossbar will be called at a time that is not necessarily
|
||||
// coinciding with its own clock, so start by determining how long
|
||||
// until the next clock edge (could be zero)
|
||||
Tick offset = clockEdge() - curTick();
|
||||
|
||||
// Determine how many cycles are needed to send the data
|
||||
// If the packet has no data we take into account just the cycle to send
|
||||
// the header.
|
||||
unsigned dataCycles = pkt->hasData() ? divCeil(pkt->getSize(), width) : 0;
|
||||
// the header delay depends on the path through the crossbar, and
|
||||
// we therefore rely on the caller to provide the actual
|
||||
// value
|
||||
pkt->headerDelay += offset + header_delay;
|
||||
|
||||
// before setting the bus delay fields of the packet, ensure that
|
||||
// the delay from any previous crossbar has been accounted for
|
||||
if (pkt->headerDelay != 0 || pkt->payloadDelay != 0)
|
||||
panic("Packet %s already has delay (%d, %d) that should be "
|
||||
"accounted for.\n", pkt->cmdString(), pkt->headerDelay,
|
||||
pkt->payloadDelay);
|
||||
// note that we add the header delay to the existing value, and
|
||||
// align it to the crossbar clock
|
||||
|
||||
// The headerDelay takes into account the relative time to deliver the
|
||||
// header of the packet. It will be charged of the additional delay of
|
||||
// the xbar if the packet goes through it.
|
||||
pkt->headerDelay = (headerCycles + 1) * clockPeriod() + offset;
|
||||
// do a quick sanity check to ensure the timings are not being
|
||||
// ignored, note that this specific value may cause problems for
|
||||
// slower interconnects
|
||||
panic_if(pkt->headerDelay > SimClock::Int::us,
|
||||
"Encountered header delay exceeding 1 us\n");
|
||||
|
||||
// The payloadDelay takes into account the relative time to deliver the
|
||||
// payload of the packet. If the packet has no data its value is just one
|
||||
// tick (due to header) plus the offset value.
|
||||
pkt->payloadDelay = (headerCycles + dataCycles) * clockPeriod() + offset;
|
||||
if (pkt->hasData()) {
|
||||
// the payloadDelay takes into account the relative time to
|
||||
// deliver the payload of the packet, after the header delay,
|
||||
// we take the maximum since the payload delay could already
|
||||
// be longer than what this parcitular crossbar enforces.
|
||||
pkt->payloadDelay = std::max<Tick>(pkt->payloadDelay,
|
||||
divCeil(pkt->getSize(), width) *
|
||||
clockPeriod());
|
||||
}
|
||||
|
||||
// the payload delay is not paying for the clock offset as that is
|
||||
// already done using the header delay, and the payload delay is
|
||||
// also used to determine how long the crossbar layer is busy and
|
||||
// thus regulates throughput
|
||||
}
|
||||
|
||||
template <typename SrcType, typename DstType>
|
||||
@@ -274,14 +284,15 @@ BaseXBar::Layer<SrcType,DstType>::retryWaiting()
|
||||
sendRetry(retryingPort);
|
||||
|
||||
// If the layer is still in the retry state, sendTiming wasn't
|
||||
// called in zero time (e.g. the cache does this), burn a cycle
|
||||
// called in zero time (e.g. the cache does this when a writeback
|
||||
// is squashed)
|
||||
if (state == RETRY) {
|
||||
// update the state to busy and reset the retrying port, we
|
||||
// have done our bit and sent the retry
|
||||
state = BUSY;
|
||||
|
||||
// occupy the crossbar layer until the next cycle ends
|
||||
occupyLayer(xbar.clockEdge(Cycles(1)));
|
||||
// occupy the crossbar layer until the next clock edge
|
||||
occupyLayer(xbar.clockEdge());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -309,8 +309,15 @@ class BaseXBar : public MemObject
|
||||
{ retry_port->sendRetrySnoopResp(); }
|
||||
};
|
||||
|
||||
/** cycles of overhead per transaction */
|
||||
const Cycles headerCycles;
|
||||
/**
|
||||
* Cycles of front-end pipeline including the delay to accept the request
|
||||
* and to decode the address.
|
||||
*/
|
||||
const Cycles frontendLatency;
|
||||
/** Cycles of forward latency */
|
||||
const Cycles forwardLatency;
|
||||
/** Cycles of response latency */
|
||||
const Cycles responseLatency;
|
||||
/** the width of the xbar in bytes */
|
||||
const uint32_t width;
|
||||
|
||||
@@ -404,8 +411,11 @@ class BaseXBar : public MemObject
|
||||
* headerDelay and payloadDelay fields of the packet
|
||||
* object with the relative number of ticks required to transmit
|
||||
* the header and the payload, respectively.
|
||||
*
|
||||
* @param pkt Packet to populate with timings
|
||||
* @param header_delay Header delay to be added
|
||||
*/
|
||||
void calcPacketTiming(PacketPtr pkt);
|
||||
void calcPacketTiming(PacketPtr pkt, Tick header_delay);
|
||||
|
||||
/**
|
||||
* Remember for each of the master ports of the crossbar if we got
|
||||
|
||||
Reference in New Issue
Block a user