mem: Make packet bus-related time accounting relative

This patch changes the bus-related time accounting done in the packet
to be relative. Besides making it easier to align the cache timing to
cache clock cycles, it also makes it possible to create a Last-Level
Cache (LLC) directly to a memory controller without a bus inbetween.

The bus is unique in that it does not ever make the packets wait to
reflect the time spent forwarding them. Instead, the cache is
currently responsible for making the packets wait. Thus, the bus
annotates the packets with the time needed for the first word to
appear, and also the last word. The cache then delays the packets in
its queues before passing them on. It is worth noting that every
object attached to a bus (devices, memories, bridges, etc) should be
doing this if we opt for keeping this way of accounting for the bus
timing.
This commit is contained in:
Andreas Hansson
2013-02-19 05:56:06 -05:00
parent 362160c8ae
commit b3fc8839c4
6 changed files with 74 additions and 49 deletions

View File

@@ -129,30 +129,24 @@ BaseBus::getSlavePort(const std::string &if_name, PortID idx)
}
}
Tick
void
BaseBus::calcPacketTiming(PacketPtr pkt)
{
// determine the header time rounded to the closest following
// clock edge
Tick headerTime = clockEdge(headerCycles);
// the bus will be called at a time that is not necessarily
// coinciding with its own clock, so start by determining how long
// until the next clock edge (could be zero)
Tick offset = nextCycle() - curTick();
// The packet will be sent. Figure out how long it occupies the bus, and
// how much of that time is for the first "word", aka bus width.
Cycles numCycles(0);
if (pkt->hasData()) {
// If a packet has data, it needs ceil(size/width) cycles to send it
unsigned dataSize = pkt->getSize();
numCycles = Cycles(divCeil(dataSize, width));
}
// determine how many cycles are needed to send the data
unsigned dataCycles = pkt->hasData() ? divCeil(pkt->getSize(), width) : 0;
// The first word will be delivered on the cycle after the header.
pkt->firstWordTime = headerTime + clockPeriod();
pkt->busFirstWordDelay = (headerCycles + 1) * clockPeriod() + offset;
// Note that currently finishTime can be smaller than
// firstWordTime if the packet has no data
pkt->finishTime = headerTime + numCycles * clockPeriod();
return headerTime;
// Note that currently busLastWordDelay can be smaller than
// busFirstWordDelay if the packet has no data
pkt->busLastWordDelay = (headerCycles + dataCycles) * clockPeriod() +
offset;
}
template <typename PortClass>

View File

@@ -302,12 +302,13 @@ class BaseBus : public MemObject
*/
AddrRangeList getAddrRanges() const;
/** Calculate the timing parameters for the packet. Updates the
* firstWordTime and finishTime fields of the packet object.
* Returns the tick at which the packet header is completed (which
* will be all that is sent if the target rejects the packet).
/**
* Calculate the timing parameters for the packet. Updates the
* busFirstWordDelay and busLastWordDelay fields of the packet
* object with the relative number of ticks required to transmit
* the header and the first word, and the last word, respectively.
*/
Tick calcPacketTiming(PacketPtr pkt);
void calcPacketTiming(PacketPtr pkt);
/**
* Ask everyone on the bus what their size is and determine the

View File

@@ -898,8 +898,9 @@ Cache<TagStore>::handleResponse(PacketPtr pkt)
// responseLatency is the latency of the return path
// from lower level caches/memory to an upper level cache or
// the core.
completion_time = responseLatency * clockPeriod() +
(transfer_offset ? pkt->finishTime : pkt->firstWordTime);
completion_time = curTick() + responseLatency * clockPeriod() +
(transfer_offset ? pkt->busLastWordDelay :
pkt->busFirstWordDelay);
assert(!target->pkt->req->isUncacheable());
@@ -914,15 +915,15 @@ Cache<TagStore>::handleResponse(PacketPtr pkt)
// responseLatency is the latency of the return path
// from lower level caches/memory to an upper level cache or
// the core.
completion_time = responseLatency * clockPeriod() +
pkt->finishTime;
completion_time = curTick() + responseLatency * clockPeriod() +
pkt->busLastWordDelay;
target->pkt->req->setExtraData(0);
} else {
// not a cache fill, just forwarding response
// responseLatency is the latency of the return path
// from lower level cahces/memory to the core.
completion_time = responseLatency * clockPeriod() +
pkt->finishTime;
completion_time = curTick() + responseLatency * clockPeriod() +
pkt->busLastWordDelay;
if (pkt->isRead() && !is_error) {
target->pkt->setData(pkt->getPtr<uint8_t>());
}
@@ -984,7 +985,8 @@ Cache<TagStore>::handleResponse(PacketPtr pkt)
}
MSHRQueue *mq = mshr->queue;
mq->markPending(mshr);
requestMemSideBus((RequestCause)mq->index, pkt->finishTime);
requestMemSideBus((RequestCause)mq->index, curTick() +
pkt->busLastWordDelay);
} else {
mq->deallocate(mshr);
if (wasFull && !mq->isFull()) {
@@ -1217,7 +1219,7 @@ Cache<TagStore>::handleFill(PacketPtr pkt, BlkType *blk,
std::memcpy(blk->data, pkt->getPtr<uint8_t>(), blkSize);
}
blk->whenReady = pkt->finishTime;
blk->whenReady = curTick() + pkt->busLastWordDelay;
return blk;
}
@@ -1575,7 +1577,7 @@ Cache<TagStore>::getTimingPacket()
pkt = new Packet(tgt_pkt);
pkt->cmd = MemCmd::UpgradeFailResp;
pkt->senderState = mshr;
pkt->firstWordTime = pkt->finishTime = curTick();
pkt->busFirstWordDelay = pkt->busLastWordDelay = 0;
handleResponse(pkt);
return NULL;
} else if (mshr->isForwardNoResponse()) {

View File

@@ -135,8 +135,8 @@ CoherentBus::recvTimingReq(PacketPtr pkt, PortID slave_port_id)
// set the source port for routing of the response
pkt->setSrc(slave_port_id);
Tick headerFinishTime = is_express_snoop ? 0 : calcPacketTiming(pkt);
Tick packetFinishTime = is_express_snoop ? 0 : pkt->finishTime;
calcPacketTiming(pkt);
Tick packetFinishTime = pkt->busLastWordDelay + curTick();
// uncacheable requests need never be snooped
if (!pkt->req->isUncacheable() && !system->bypassCaches()) {
@@ -183,7 +183,7 @@ CoherentBus::recvTimingReq(PacketPtr pkt, PortID slave_port_id)
src_port->name(), pkt->cmdString(), pkt->getAddr());
// update the bus state and schedule an idle event
reqLayer.failedTiming(src_port, headerFinishTime);
reqLayer.failedTiming(src_port, clockEdge(Cycles(headerCycles)));
} else {
// update the bus state and schedule an idle event
reqLayer.succeededTiming(packetFinishTime);
@@ -211,7 +211,7 @@ CoherentBus::recvTimingResp(PacketPtr pkt, PortID master_port_id)
src_port->name(), pkt->cmdString(), pkt->getAddr());
calcPacketTiming(pkt);
Tick packetFinishTime = pkt->finishTime;
Tick packetFinishTime = pkt->busLastWordDelay + curTick();
// the packet is a normal response to a request that we should
// have seen passing through the bus
@@ -281,7 +281,7 @@ CoherentBus::recvTimingSnoopResp(PacketPtr pkt, PortID slave_port_id)
assert(!pkt->isExpressSnoop());
calcPacketTiming(pkt);
Tick packetFinishTime = pkt->finishTime;
Tick packetFinishTime = pkt->busLastWordDelay + curTick();
// determine if the response is from a snoop request we
// created as the result of a normal request (in which case it
@@ -385,7 +385,8 @@ CoherentBus::recvAtomic(PacketPtr pkt, PortID slave_port_id)
response_latency = snoop_response_latency;
}
pkt->finishTime = curTick() + response_latency;
// @todo: Not setting first-word time
pkt->busLastWordDelay = response_latency;
return response_latency;
}
@@ -405,7 +406,8 @@ CoherentBus::recvAtomicSnoop(PacketPtr pkt, PortID master_port_id)
if (snoop_response_cmd != MemCmd::InvalidCmd)
pkt->cmd = snoop_response_cmd;
pkt->finishTime = curTick() + snoop_response_latency;
// @todo: Not setting first-word time
pkt->busLastWordDelay = snoop_response_latency;
return snoop_response_latency;
}

View File

@@ -110,8 +110,8 @@ NoncoherentBus::recvTimingReq(PacketPtr pkt, PortID slave_port_id)
// set the source port for routing of the response
pkt->setSrc(slave_port_id);
Tick headerFinishTime = calcPacketTiming(pkt);
Tick packetFinishTime = pkt->finishTime;
calcPacketTiming(pkt);
Tick packetFinishTime = pkt->busLastWordDelay + curTick();
// since it is a normal request, determine the destination
// based on the address and attempt to send the packet
@@ -124,7 +124,8 @@ NoncoherentBus::recvTimingReq(PacketPtr pkt, PortID slave_port_id)
DPRINTF(NoncoherentBus, "recvTimingReq: src %s %s 0x%x RETRY\n",
src_port->name(), pkt->cmdString(), pkt->getAddr());
reqLayer.failedTiming(src_port, headerFinishTime);
// occupy until the header is sent
reqLayer.failedTiming(src_port, clockEdge(Cycles(headerCycles)));
return false;
}
@@ -152,7 +153,7 @@ NoncoherentBus::recvTimingResp(PacketPtr pkt, PortID master_port_id)
src_port->name(), pkt->cmdString(), pkt->getAddr());
calcPacketTiming(pkt);
Tick packetFinishTime = pkt->finishTime;
Tick packetFinishTime = pkt->busLastWordDelay + curTick();
// send the packet to the destination through one of our slave
// ports, as determined by the destination field
@@ -189,7 +190,8 @@ NoncoherentBus::recvAtomic(PacketPtr pkt, PortID slave_port_id)
// forward the request to the appropriate destination
Tick response_latency = masterPorts[dest_id]->sendAtomic(pkt);
pkt->finishTime = curTick() + response_latency;
// @todo: Not setting first-word time
pkt->busLastWordDelay = response_latency;
return response_latency;
}

View File

@@ -330,11 +330,23 @@ class Packet : public Printable
public:
/// The time at which the packet will be fully transmitted
Tick finishTime;
/**
* The extra delay from seeing the packet until the first word is
* transmitted by the bus that provided it (if any). This delay is
* used to communicate the bus waiting time to the neighbouring
* object (e.g. a cache) that actually makes the packet wait. As
* the delay is relative, a 32-bit unsigned should be sufficient.
*/
uint32_t busFirstWordDelay;
/// The time at which the first chunk of the packet will be transmitted
Tick firstWordTime;
/**
* The extra delay from seeing the packet until the last word is
* transmitted by the bus that provided it (if any). Similar to
* the first word time, this is used to make up for the fact that
* the bus does not make the packet wait. As the delay is relative,
* a 32-bit unsigned should be sufficient.
*/
uint32_t busLastWordDelay;
/**
* A virtual base opaque structure used to hold state associated
@@ -583,6 +595,7 @@ class Packet : public Printable
: cmd(_cmd), req(_req), data(NULL),
src(InvalidPortID), dest(InvalidPortID),
bytesValidStart(0), bytesValidEnd(0),
busFirstWordDelay(0), busLastWordDelay(0),
senderState(NULL)
{
if (req->hasPaddr()) {
@@ -604,6 +617,7 @@ class Packet : public Printable
: cmd(_cmd), req(_req), data(NULL),
src(InvalidPortID), dest(InvalidPortID),
bytesValidStart(0), bytesValidEnd(0),
busFirstWordDelay(0), busLastWordDelay(0),
senderState(NULL)
{
if (req->hasPaddr()) {
@@ -625,7 +639,10 @@ class Packet : public Printable
: cmd(pkt->cmd), req(pkt->req),
data(pkt->flags.isSet(STATIC_DATA) ? pkt->data : NULL),
addr(pkt->addr), size(pkt->size), src(pkt->src), dest(pkt->dest),
bytesValidStart(pkt->bytesValidStart), bytesValidEnd(pkt->bytesValidEnd),
bytesValidStart(pkt->bytesValidStart),
bytesValidEnd(pkt->bytesValidEnd),
busFirstWordDelay(pkt->busFirstWordDelay),
busLastWordDelay(pkt->busLastWordDelay),
senderState(pkt->senderState)
{
if (!clearFlags)
@@ -664,6 +681,13 @@ class Packet : public Printable
addr = req->getPaddr();
size = req->getSize();
src = InvalidPortID;
dest = InvalidPortID;
bytesValidStart = 0;
bytesValidEnd = 0;
busFirstWordDelay = 0;
busLastWordDelay = 0;
flags.set(VALID_ADDR|VALID_SIZE);
deleteData();
}