cpu: Make the NonCachingSimpleCPU use a back door for fetch.

If the memory system can provide a back door to memory, store that, and
use it for subsequent accesses to the range it covers. For now, this
covers only fetch. That's because fetch will generally happen more than
loads and stores, and because it's relatively simple to implement since
we can ignore atomic operations, etc.

Some limitted benchmarking suggests that this speeds up x86 linux boot
by about 20%, although my modifications to the config to remove caching
(which blocks the back door mechanism) also made gem5 crash, so it's
hard to say for sure if that's a valid result. The crash happened in the
same way before and after, so it's probably at least relatively
representative.

While this gives a pretty substantial performance boost, it will prevent
statistics from being collected at the memory, or on intermediate objects
in the interconnect like the bus. That is to be expected with this
memory mode, however.

Change-Id: I73f73017e454300fd4d61f58462eb4ec719b8d85
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/36979
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Reviewed-by: Andreas Sandberg <andreas.sandberg@arm.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
This commit is contained in:
Gabe Black
2020-11-04 01:03:25 -08:00
parent 56a549080f
commit b12422c79b
4 changed files with 54 additions and 15 deletions

View File

@@ -674,15 +674,7 @@ AtomicSimpleCPU::tick()
//if (decoder.needMoreBytes())
//{
icache_access = true;
Packet ifetch_pkt = Packet(ifetch_req, MemCmd::ReadReq);
ifetch_pkt.dataStatic(&inst);
icache_latency = sendPacket(icachePort, &ifetch_pkt);
assert(!ifetch_pkt.isError());
// ifetch_req is initialized to read the instruction
// directly into the CPU object's inst field.
icache_latency = fetchInstMem();
//}
}
@@ -747,6 +739,21 @@ AtomicSimpleCPU::tick()
reschedule(tickEvent, curTick() + latency, true);
}
Tick
AtomicSimpleCPU::fetchInstMem()
{
Packet pkt = Packet(ifetch_req, MemCmd::ReadReq);
// ifetch_req is initialized to read the instruction
// directly into the CPU object's inst field.
pkt.dataStatic(&inst);
Tick latency = sendPacket(icachePort, &pkt);
assert(!pkt.isError());
return latency;
}
void
AtomicSimpleCPU::regProbePoints()
{

View File

@@ -57,7 +57,6 @@ class AtomicSimpleCPU : public BaseSimpleCPU
void init() override;
protected:
EventFunctionWrapper tickEvent;
const int width;
@@ -102,6 +101,7 @@ class AtomicSimpleCPU : public BaseSimpleCPU
bool tryCompleteDrain();
virtual Tick sendPacket(RequestPort &port, const PacketPtr &pkt);
virtual Tick fetchInstMem();
/**
* An AtomicCPUPort overrides the default behaviour of the

View File

@@ -59,10 +59,37 @@ NonCachingSimpleCPU::verifyMemoryMode() const
Tick
NonCachingSimpleCPU::sendPacket(RequestPort &port, const PacketPtr &pkt)
{
if (system->isMemAddr(pkt->getAddr())) {
system->getPhysMem().access(pkt);
return 0;
} else {
return port.sendAtomic(pkt);
MemBackdoorPtr bd = nullptr;
Tick latency = port.sendAtomicBackdoor(pkt, bd);
// If the target gave us a backdoor for next time and we didn't
// already have it, record it.
if (bd && memBackdoors.insert(bd->range(), bd) != memBackdoors.end()) {
// Install a callback to erase this backdoor if it goes away.
auto callback = [this](const MemBackdoor &backdoor) {
for (auto it = memBackdoors.begin();
it != memBackdoors.end(); it++) {
if (it->second == &backdoor) {
memBackdoors.erase(it);
return;
}
}
panic("Got invalidation for unknown memory backdoor.");
};
bd->addInvalidationCallback(callback);
}
return latency;
}
Tick
NonCachingSimpleCPU::fetchInstMem()
{
auto bd_it = memBackdoors.contains(ifetch_req->getPaddr());
if (bd_it == memBackdoors.end())
return AtomicSimpleCPU::fetchInstMem();
auto *bd = bd_it->second;
Addr offset = ifetch_req->getPaddr() - bd->range().start();
memcpy(&inst, bd->ptr() + offset, ifetch_req->getSize());
return 0;
}

View File

@@ -38,7 +38,9 @@
#ifndef __CPU_SIMPLE_NONCACHING_HH__
#define __CPU_SIMPLE_NONCACHING_HH__
#include "base/addr_range_map.hh"
#include "cpu/simple/atomic.hh"
#include "mem/backdoor.hh"
#include "params/NonCachingSimpleCPU.hh"
/**
@@ -53,7 +55,10 @@ class NonCachingSimpleCPU : public AtomicSimpleCPU
void verifyMemoryMode() const override;
protected:
AddrRangeMap<MemBackdoorPtr, 1> memBackdoors;
Tick sendPacket(RequestPort &port, const PacketPtr &pkt) override;
Tick fetchInstMem() override;
};
#endif // __CPU_SIMPLE_NONCACHING_HH__