mem: Add crossbar latencies

This patch introduces latencies in crossbar that were neglected
before. In particular, it adds three parameters in crossbar model:
front_end_latency, forward_latency, and response_latency. Along with
these parameters, three corresponding members are added:
frontEndLatency, forwardLatency, and responseLatency. The coherent
crossbar has an additional snoop_response_latency.

The latency of the request path through the xbar is set as
--> frontEndLatency + forwardLatency

In case the snoop filter is enabled, the request path latency is charged
also by look-up latency of the snoop filter.
--> frontEndLatency + SF(lookupLatency) + forwardLatency.

The latency of the response path through the xbar is set instead as
--> responseLatency.

In case of snoop response, if the response is treated as a normal response
the latency associated is again
--> responseLatency;

If instead it is forwarded as snoop response we add an additional variable
+ snoopResponseLatency
and the latency associated is
--> snoopResponseLatency;

Furthermore, this patch lets the crossbar progress on the next clock
edge after an unused retry, changing the time the crossbar considers
itself busy after sending a retry that was not acted upon.
This commit is contained in:
Marco Balboni
2015-03-02 04:00:46 -05:00
parent 7be9d4eb67
commit d35dd71ab4
7 changed files with 150 additions and 54 deletions

View File

@@ -1,4 +1,4 @@
# Copyright (c) 2012 ARM Limited
# Copyright (c) 2012, 2015 ARM Limited
# All rights reserved.
#
# The license below extends only to copyright in the software and shall
@@ -49,10 +49,29 @@ class BaseXBar(MemObject):
type = 'BaseXBar'
abstract = True
cxx_header = "mem/xbar.hh"
slave = VectorSlavePort("vector port for connecting masters")
master = VectorMasterPort("vector port for connecting slaves")
header_cycles = Param.Cycles(1, "cycles of overhead per transaction")
width = Param.Unsigned(8, "xbar width (bytes)")
slave = VectorSlavePort("Vector port for connecting masters")
master = VectorMasterPort("Vector port for connecting slaves")
# Latencies governing the time taken for the variuos paths a
# packet has through the crossbar. Note that the crossbar itself
# does not add the latency due to assumptions in the coherency
# mechanism. Instead the latency is annotated on the packet and
# left to the neighbouring modules.
#
# A request incurs the frontend latency, possibly snoop filter
# lookup latency, and forward latency. A response incurs the
# response latency. Frontend latency encompasses arbitration and
# deciding what to do when a request arrives. the forward latency
# is the latency involved once a decision is made to forward the
# request. The response latency, is similar to the forward
# latency, but for responses rather than requests.
frontend_latency = Param.Cycles(3, "Frontend latency")
forward_latency = Param.Cycles(4, "Forward latency")
response_latency = Param.Cycles(2, "Response latency")
# Width governing the throughput of the crossbar
width = Param.Unsigned(8, "Datapath width per port (bytes)")
# The default port can be left unconnected, or be used to connect
# a default slave port
@@ -74,12 +93,21 @@ class CoherentXBar(BaseXBar):
type = 'CoherentXBar'
cxx_header = "mem/coherent_xbar.hh"
# The coherent crossbar additionally has snoop responses that are
# forwarded after a specific latency.
snoop_response_latency = Param.Cycles(4, "Snoop response latency")
# An optional snoop filter
snoop_filter = Param.SnoopFilter(NULL, "Selected snoop filter")
system = Param.System(Parent.any, "System that the crossbar belongs to.")
snoop_filter = Param.SnoopFilter(NULL, "Selected snoop filter.")
class SnoopFilter(SimObject):
type = 'SnoopFilter'
cxx_header = "mem/snoop_filter.hh"
lookup_latency = Param.Cycles(3, "lookup latency (cycles)")
# Lookup latency of the snoop filter, added to requests that pass
# through a coherent crossbar.
lookup_latency = Param.Cycles(1, "Lookup latency")
system = Param.System(Parent.any, "System that the crossbar belongs to.")

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2011-2014 ARM Limited
* Copyright (c) 2011-2015 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
@@ -55,7 +55,8 @@
#include "sim/system.hh"
CoherentXBar::CoherentXBar(const CoherentXBarParams *p)
: BaseXBar(p), system(p->system), snoopFilter(p->snoop_filter)
: BaseXBar(p), system(p->system), snoopFilter(p->snoop_filter),
snoopResponseLatency(p->snoop_response_latency)
{
// create the ports based on the size of the master and slave
// vector ports, and the presence of the default port, the ports
@@ -167,8 +168,17 @@ CoherentXBar::recvTimingReq(PacketPtr pkt, PortID slave_port_id)
unsigned int pkt_size = pkt->hasData() ? pkt->getSize() : 0;
unsigned int pkt_cmd = pkt->cmdToIndex();
calcPacketTiming(pkt);
Tick packetFinishTime = curTick() + pkt->payloadDelay;
// store the old header delay so we can restore it if needed
Tick old_header_delay = pkt->headerDelay;
// a request sees the frontend and forward latency
Tick xbar_delay = (frontendLatency + forwardLatency) * clockPeriod();
// set the packet header and payload delay
calcPacketTiming(pkt, xbar_delay);
// determine how long to be crossbar layer is busy
Tick packetFinishTime = clockEdge(Cycles(1)) + pkt->payloadDelay;
// uncacheable requests need never be snooped
if (!pkt->req->isUncacheable() && !system->bypassCaches()) {
@@ -177,6 +187,10 @@ CoherentXBar::recvTimingReq(PacketPtr pkt, PortID slave_port_id)
if (snoopFilter) {
// check with the snoop filter where to forward this packet
auto sf_res = snoopFilter->lookupRequest(pkt, *src_port);
// If SnoopFilter is enabled, the total time required by a packet
// to be delivered through the xbar has to be charged also with
// to lookup latency of the snoop filter (sf_res.second).
pkt->headerDelay += sf_res.second * clockPeriod();
packetFinishTime += sf_res.second * clockPeriod();
DPRINTF(CoherentXBar, "recvTimingReq: src %s %s 0x%x"\
" SF size: %i lat: %i\n", src_port->name(),
@@ -221,15 +235,15 @@ CoherentXBar::recvTimingReq(PacketPtr pkt, PortID slave_port_id)
assert(!is_express_snoop);
assert(!pkt->memInhibitAsserted());
// undo the calculation so we can check for 0 again
pkt->headerDelay = pkt->payloadDelay = 0;
// restore the header delay
pkt->headerDelay = old_header_delay;
DPRINTF(CoherentXBar, "recvTimingReq: src %s %s 0x%x RETRY\n",
src_port->name(), pkt->cmdString(), pkt->getAddr());
// update the layer state and schedule an idle event
reqLayers[master_port_id]->failedTiming(src_port,
clockEdge(headerCycles));
clockEdge(Cycles(1)));
} else {
// express snoops currently bypass the crossbar state entirely
if (!is_express_snoop) {
@@ -300,8 +314,14 @@ CoherentXBar::recvTimingResp(PacketPtr pkt, PortID master_port_id)
unsigned int pkt_size = pkt->hasData() ? pkt->getSize() : 0;
unsigned int pkt_cmd = pkt->cmdToIndex();
calcPacketTiming(pkt);
Tick packetFinishTime = curTick() + pkt->payloadDelay;
// a response sees the response latency
Tick xbar_delay = responseLatency * clockPeriod();
// set the packet header and payload delay
calcPacketTiming(pkt, xbar_delay);
// determine how long to be crossbar layer is busy
Tick packetFinishTime = clockEdge(Cycles(1)) + pkt->payloadDelay;
if (snoopFilter && !pkt->req->isUncacheable() && !system->bypassCaches()) {
// let the snoop filter inspect the response and update its state
@@ -426,8 +446,17 @@ CoherentXBar::recvTimingSnoopResp(PacketPtr pkt, PortID slave_port_id)
// responses are never express snoops
assert(!pkt->isExpressSnoop());
calcPacketTiming(pkt);
Tick packetFinishTime = curTick() + pkt->payloadDelay;
// a snoop response sees the snoop response latency, and if it is
// forwarded as a normal response, the response latency
Tick xbar_delay =
(forwardAsSnoop ? snoopResponseLatency : responseLatency) *
clockPeriod();
// set the packet header and payload delay
calcPacketTiming(pkt, xbar_delay);
// determine how long to be crossbar layer is busy
Tick packetFinishTime = clockEdge(Cycles(1)) + pkt->payloadDelay;
// forward it either as a snoop response or a normal response
if (forwardAsSnoop) {

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2011-2014 ARM Limited
* Copyright (c) 2011-2015 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
@@ -272,6 +272,9 @@ class CoherentXBar : public BaseXBar
* broadcast needed for probes. NULL denotes an absent filter. */
SnoopFilter *snoopFilter;
/** Cycles of snoop response latency.*/
const Cycles snoopResponseLatency;
/** Function called by the port when the crossbar is recieving a Timing
request packet.*/
bool recvTimingReq(PacketPtr pkt, PortID slave_port_id);

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2011-2014 ARM Limited
* Copyright (c) 2011-2015 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
@@ -127,8 +127,17 @@ NoncoherentXBar::recvTimingReq(PacketPtr pkt, PortID slave_port_id)
unsigned int pkt_size = pkt->hasData() ? pkt->getSize() : 0;
unsigned int pkt_cmd = pkt->cmdToIndex();
calcPacketTiming(pkt);
Tick packetFinishTime = curTick() + pkt->payloadDelay;
// store the old header delay so we can restore it if needed
Tick old_header_delay = pkt->headerDelay;
// a request sees the frontend and forward latency
Tick xbar_delay = (frontendLatency + forwardLatency) * clockPeriod();
// set the packet header and payload delay
calcPacketTiming(pkt, xbar_delay);
// determine how long to be crossbar layer is busy
Tick packetFinishTime = clockEdge(Cycles(1)) + pkt->payloadDelay;
// before forwarding the packet (and possibly altering it),
// remember if we are expecting a response
@@ -145,12 +154,12 @@ NoncoherentXBar::recvTimingReq(PacketPtr pkt, PortID slave_port_id)
DPRINTF(NoncoherentXBar, "recvTimingReq: src %s %s 0x%x RETRY\n",
src_port->name(), pkt->cmdString(), pkt->getAddr());
// undo the calculation so we can check for 0 again
pkt->headerDelay = pkt->payloadDelay = 0;
// restore the header delay as it is additive
pkt->headerDelay = old_header_delay;
// occupy until the header is sent
reqLayers[master_port_id]->failedTiming(src_port,
clockEdge(headerCycles));
clockEdge(Cycles(1)));
return false;
}
@@ -200,8 +209,14 @@ NoncoherentXBar::recvTimingResp(PacketPtr pkt, PortID master_port_id)
unsigned int pkt_size = pkt->hasData() ? pkt->getSize() : 0;
unsigned int pkt_cmd = pkt->cmdToIndex();
calcPacketTiming(pkt);
Tick packetFinishTime = curTick() + pkt->payloadDelay;
// a response sees the response latency
Tick xbar_delay = responseLatency * clockPeriod();
// set the packet header and payload delay
calcPacketTiming(pkt, xbar_delay);
// determine how long to be crossbar layer is busy
Tick packetFinishTime = clockEdge(Cycles(1)) + pkt->payloadDelay;
// send the packet through the destination slave port
bool success M5_VAR_USED = slavePorts[slave_port_id]->sendTimingResp(pkt);

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2011-2014 ARM Limited
* Copyright (c) 2011-2015 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall

View File

@@ -56,7 +56,10 @@
BaseXBar::BaseXBar(const BaseXBarParams *p)
: MemObject(p),
headerCycles(p->header_cycles), width(p->width),
frontendLatency(p->frontend_latency),
forwardLatency(p->forward_latency),
responseLatency(p->response_latency),
width(p->width),
gotAddrRanges(p->port_default_connection_count +
p->port_master_connection_count, false),
gotAllAddrRanges(false), defaultPortID(InvalidPortID),
@@ -102,34 +105,41 @@ BaseXBar::getSlavePort(const std::string &if_name, PortID idx)
}
void
BaseXBar::calcPacketTiming(PacketPtr pkt)
BaseXBar::calcPacketTiming(PacketPtr pkt, Tick header_delay)
{
// the crossbar will be called at a time that is not necessarily
// coinciding with its own clock, so start by determining how long
// until the next clock edge (could be zero)
Tick offset = clockEdge() - curTick();
// Determine how many cycles are needed to send the data
// If the packet has no data we take into account just the cycle to send
// the header.
unsigned dataCycles = pkt->hasData() ? divCeil(pkt->getSize(), width) : 0;
// the header delay depends on the path through the crossbar, and
// we therefore rely on the caller to provide the actual
// value
pkt->headerDelay += offset + header_delay;
// before setting the bus delay fields of the packet, ensure that
// the delay from any previous crossbar has been accounted for
if (pkt->headerDelay != 0 || pkt->payloadDelay != 0)
panic("Packet %s already has delay (%d, %d) that should be "
"accounted for.\n", pkt->cmdString(), pkt->headerDelay,
pkt->payloadDelay);
// note that we add the header delay to the existing value, and
// align it to the crossbar clock
// The headerDelay takes into account the relative time to deliver the
// header of the packet. It will be charged of the additional delay of
// the xbar if the packet goes through it.
pkt->headerDelay = (headerCycles + 1) * clockPeriod() + offset;
// do a quick sanity check to ensure the timings are not being
// ignored, note that this specific value may cause problems for
// slower interconnects
panic_if(pkt->headerDelay > SimClock::Int::us,
"Encountered header delay exceeding 1 us\n");
// The payloadDelay takes into account the relative time to deliver the
// payload of the packet. If the packet has no data its value is just one
// tick (due to header) plus the offset value.
pkt->payloadDelay = (headerCycles + dataCycles) * clockPeriod() + offset;
if (pkt->hasData()) {
// the payloadDelay takes into account the relative time to
// deliver the payload of the packet, after the header delay,
// we take the maximum since the payload delay could already
// be longer than what this parcitular crossbar enforces.
pkt->payloadDelay = std::max<Tick>(pkt->payloadDelay,
divCeil(pkt->getSize(), width) *
clockPeriod());
}
// the payload delay is not paying for the clock offset as that is
// already done using the header delay, and the payload delay is
// also used to determine how long the crossbar layer is busy and
// thus regulates throughput
}
template <typename SrcType, typename DstType>
@@ -274,14 +284,15 @@ BaseXBar::Layer<SrcType,DstType>::retryWaiting()
sendRetry(retryingPort);
// If the layer is still in the retry state, sendTiming wasn't
// called in zero time (e.g. the cache does this), burn a cycle
// called in zero time (e.g. the cache does this when a writeback
// is squashed)
if (state == RETRY) {
// update the state to busy and reset the retrying port, we
// have done our bit and sent the retry
state = BUSY;
// occupy the crossbar layer until the next cycle ends
occupyLayer(xbar.clockEdge(Cycles(1)));
// occupy the crossbar layer until the next clock edge
occupyLayer(xbar.clockEdge());
}
}

View File

@@ -309,8 +309,15 @@ class BaseXBar : public MemObject
{ retry_port->sendRetrySnoopResp(); }
};
/** cycles of overhead per transaction */
const Cycles headerCycles;
/**
* Cycles of front-end pipeline including the delay to accept the request
* and to decode the address.
*/
const Cycles frontendLatency;
/** Cycles of forward latency */
const Cycles forwardLatency;
/** Cycles of response latency */
const Cycles responseLatency;
/** the width of the xbar in bytes */
const uint32_t width;
@@ -404,8 +411,11 @@ class BaseXBar : public MemObject
* headerDelay and payloadDelay fields of the packet
* object with the relative number of ticks required to transmit
* the header and the payload, respectively.
*
* @param pkt Packet to populate with timings
* @param header_delay Header delay to be added
*/
void calcPacketTiming(PacketPtr pkt);
void calcPacketTiming(PacketPtr pkt, Tick header_delay);
/**
* Remember for each of the master ports of the crossbar if we got