mem: Perform write merging in the DRAM write queue

This patch implements basic write merging in the DRAM to avoid
redundant bursts. When a new access is added to the queue it is
compared against the existing entries, and if it is either
intersecting or immediately succeeding/preceeding an existing item it
is merged.

There is currently no attempt made at avoiding iterating over the
existing items in determining whether merging is possible or not.
This commit is contained in:
Andreas Hansson
2013-08-19 03:52:31 -04:00
parent 243f135e5f
commit ac42db8134
2 changed files with 81 additions and 15 deletions

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2010-2012 ARM Limited
* Copyright (c) 2010-2013 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
@@ -312,7 +312,10 @@ SimpleDRAM::addToReadQueue(PacketPtr pkt, unsigned int pktCount)
bool foundInWrQ = false;
list<DRAMPacket*>::const_iterator i;
for (i = writeQueue.begin(); i != writeQueue.end(); ++i) {
if ((*i)->addr == addr && (*i)->size >= size){
// check if the read is subsumed in the write entry we are
// looking at
if ((*i)->addr <= addr &&
(addr + size) <= ((*i)->addr + (*i)->size)) {
foundInWrQ = true;
servicedByWrQ++;
pktsServicedByWrQ++;
@@ -394,6 +397,10 @@ SimpleDRAM::processWriteEvent()
chooseNextWrite();
DRAMPacket* dram_pkt = writeQueue.front();
// sanity check
assert(dram_pkt->size <= burstSize);
// What's the earliest the request can be put on the bus
Tick schedTime = std::max(curTick(), busBusyUntil);
@@ -513,23 +520,79 @@ SimpleDRAM::addToWriteQueue(PacketPtr pkt, unsigned int pktCount)
writePktSize[ceilLog2(size)]++;
writeBursts++;
DRAMPacket* dram_pkt = decodeAddr(pkt, addr, size);
// see if we can merge with an existing item in the write
// queue and keep track of whether we have merged or not, as
// there is only ever one item to merge with
bool merged = false;
auto w = writeQueue.begin();
assert(writeQueue.size() < writeBufferSize);
wrQLenPdf[writeQueue.size()]++;
while(!merged && w != writeQueue.end()) {
// either of the two could be first, if they are the same
// it does not matter which way we go
if ((*w)->addr >= addr) {
if ((addr + size) >= ((*w)->addr + (*w)->size)) {
// check if the existing one is completely
// subsumed in the new one
DPRINTF(DRAM, "Merging write covering existing burst\n");
merged = true;
// update both the address and the size
(*w)->addr = addr;
(*w)->size = size;
} else if ((addr + size) >= (*w)->addr &&
((*w)->addr + (*w)->size - addr) <= burstSize) {
// the new one is just before or partially
// overlapping with the existing one, and together
// they fit within a burst
DPRINTF(DRAM, "Merging write before existing burst\n");
merged = true;
// the existing queue item needs to be adjusted with
// respect to both address and size
(*w)->addr = addr;
(*w)->size = (*w)->addr + (*w)->size - addr;
}
} else {
if (((*w)->addr + (*w)->size) >= (addr + size)) {
// check if the new one is completely subsumed in the
// existing one
DPRINTF(DRAM, "Merging write into existing burst\n");
merged = true;
// no adjustments necessary
} else if (((*w)->addr + (*w)->size) >= addr &&
(addr + size - (*w)->addr) <= burstSize) {
// the existing one is just before or partially
// overlapping with the new one, and together
// they fit within a burst
DPRINTF(DRAM, "Merging write after existing burst\n");
merged = true;
// the address is right, and only the size has
// to be adjusted
(*w)->size = addr + size - (*w)->addr;
}
}
++w;
}
DPRINTF(DRAM, "Adding to write queue\n");
// if the item was not merged we need to create a new write
// and enqueue it
if (!merged) {
DRAMPacket* dram_pkt = decodeAddr(pkt, addr, size);
writeQueue.push_back(dram_pkt);
assert(writeQueue.size() < writeBufferSize);
wrQLenPdf[writeQueue.size()]++;
// Update stats
uint32_t bank_id = banksPerRank * dram_pkt->rank + dram_pkt->bank;
assert(bank_id < ranksPerChannel * banksPerRank);
perBankWrReqs[bank_id]++;
DPRINTF(DRAM, "Adding to write queue\n");
avgWrQLen = writeQueue.size();
writeQueue.push_back(dram_pkt);
bytesConsumedWr += dram_pkt->size;
// Update stats
uint32_t bank_id = banksPerRank * dram_pkt->rank + dram_pkt->bank;
assert(bank_id < ranksPerChannel * banksPerRank);
perBankWrReqs[bank_id]++;
avgWrQLen = writeQueue.size();
}
bytesConsumedWr += size;
bytesWritten += burstSize;
// Starting address of next dram pkt (aligend to burstSize boundary)
@@ -1077,6 +1140,9 @@ SimpleDRAM::moveToRespQ()
DRAMPacket* dram_pkt = readQueue.front();
readQueue.pop_front();
// sanity check
assert(dram_pkt->size <= burstSize);
// Insert into response queue sorted by readyTime
// It will be sent back to the requestor at its
// readyTime

View File

@@ -209,13 +209,13 @@ class SimpleDRAM : public AbstractMemory
* reason is to keep the address offset so we can accurately check
* incoming read packets with packets in the write queue.
*/
const Addr addr;
Addr addr;
/**
* The size of this dram packet in bytes
* It is always equal or smaller than DRAM burst size
*/
const unsigned int size;
unsigned int size;
/**
* A pointer to the BurstHelper if this DRAMPacket is a split packet