cpu: Make the NonCachingSimpleCPU use a back door for fetch.
If the memory system can provide a back door to memory, store that, and use it for subsequent accesses to the range it covers. For now, this covers only fetch. That's because fetch will generally happen more than loads and stores, and because it's relatively simple to implement since we can ignore atomic operations, etc. Some limitted benchmarking suggests that this speeds up x86 linux boot by about 20%, although my modifications to the config to remove caching (which blocks the back door mechanism) also made gem5 crash, so it's hard to say for sure if that's a valid result. The crash happened in the same way before and after, so it's probably at least relatively representative. While this gives a pretty substantial performance boost, it will prevent statistics from being collected at the memory, or on intermediate objects in the interconnect like the bus. That is to be expected with this memory mode, however. Change-Id: I73f73017e454300fd4d61f58462eb4ec719b8d85 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/36979 Reviewed-by: Jason Lowe-Power <power.jg@gmail.com> Reviewed-by: Andreas Sandberg <andreas.sandberg@arm.com> Maintainer: Jason Lowe-Power <power.jg@gmail.com> Tested-by: kokoro <noreply+kokoro@google.com>
This commit is contained in:
@@ -674,15 +674,7 @@ AtomicSimpleCPU::tick()
|
||||
//if (decoder.needMoreBytes())
|
||||
//{
|
||||
icache_access = true;
|
||||
Packet ifetch_pkt = Packet(ifetch_req, MemCmd::ReadReq);
|
||||
ifetch_pkt.dataStatic(&inst);
|
||||
|
||||
icache_latency = sendPacket(icachePort, &ifetch_pkt);
|
||||
|
||||
assert(!ifetch_pkt.isError());
|
||||
|
||||
// ifetch_req is initialized to read the instruction
|
||||
// directly into the CPU object's inst field.
|
||||
icache_latency = fetchInstMem();
|
||||
//}
|
||||
}
|
||||
|
||||
@@ -747,6 +739,21 @@ AtomicSimpleCPU::tick()
|
||||
reschedule(tickEvent, curTick() + latency, true);
|
||||
}
|
||||
|
||||
Tick
|
||||
AtomicSimpleCPU::fetchInstMem()
|
||||
{
|
||||
Packet pkt = Packet(ifetch_req, MemCmd::ReadReq);
|
||||
|
||||
// ifetch_req is initialized to read the instruction
|
||||
// directly into the CPU object's inst field.
|
||||
pkt.dataStatic(&inst);
|
||||
|
||||
Tick latency = sendPacket(icachePort, &pkt);
|
||||
assert(!pkt.isError());
|
||||
|
||||
return latency;
|
||||
}
|
||||
|
||||
void
|
||||
AtomicSimpleCPU::regProbePoints()
|
||||
{
|
||||
|
||||
@@ -57,7 +57,6 @@ class AtomicSimpleCPU : public BaseSimpleCPU
|
||||
void init() override;
|
||||
|
||||
protected:
|
||||
|
||||
EventFunctionWrapper tickEvent;
|
||||
|
||||
const int width;
|
||||
@@ -102,6 +101,7 @@ class AtomicSimpleCPU : public BaseSimpleCPU
|
||||
bool tryCompleteDrain();
|
||||
|
||||
virtual Tick sendPacket(RequestPort &port, const PacketPtr &pkt);
|
||||
virtual Tick fetchInstMem();
|
||||
|
||||
/**
|
||||
* An AtomicCPUPort overrides the default behaviour of the
|
||||
|
||||
@@ -59,10 +59,37 @@ NonCachingSimpleCPU::verifyMemoryMode() const
|
||||
Tick
|
||||
NonCachingSimpleCPU::sendPacket(RequestPort &port, const PacketPtr &pkt)
|
||||
{
|
||||
if (system->isMemAddr(pkt->getAddr())) {
|
||||
system->getPhysMem().access(pkt);
|
||||
return 0;
|
||||
} else {
|
||||
return port.sendAtomic(pkt);
|
||||
MemBackdoorPtr bd = nullptr;
|
||||
Tick latency = port.sendAtomicBackdoor(pkt, bd);
|
||||
|
||||
// If the target gave us a backdoor for next time and we didn't
|
||||
// already have it, record it.
|
||||
if (bd && memBackdoors.insert(bd->range(), bd) != memBackdoors.end()) {
|
||||
// Install a callback to erase this backdoor if it goes away.
|
||||
auto callback = [this](const MemBackdoor &backdoor) {
|
||||
for (auto it = memBackdoors.begin();
|
||||
it != memBackdoors.end(); it++) {
|
||||
if (it->second == &backdoor) {
|
||||
memBackdoors.erase(it);
|
||||
return;
|
||||
}
|
||||
}
|
||||
panic("Got invalidation for unknown memory backdoor.");
|
||||
};
|
||||
bd->addInvalidationCallback(callback);
|
||||
}
|
||||
return latency;
|
||||
}
|
||||
|
||||
Tick
|
||||
NonCachingSimpleCPU::fetchInstMem()
|
||||
{
|
||||
auto bd_it = memBackdoors.contains(ifetch_req->getPaddr());
|
||||
if (bd_it == memBackdoors.end())
|
||||
return AtomicSimpleCPU::fetchInstMem();
|
||||
|
||||
auto *bd = bd_it->second;
|
||||
Addr offset = ifetch_req->getPaddr() - bd->range().start();
|
||||
memcpy(&inst, bd->ptr() + offset, ifetch_req->getSize());
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -38,7 +38,9 @@
|
||||
#ifndef __CPU_SIMPLE_NONCACHING_HH__
|
||||
#define __CPU_SIMPLE_NONCACHING_HH__
|
||||
|
||||
#include "base/addr_range_map.hh"
|
||||
#include "cpu/simple/atomic.hh"
|
||||
#include "mem/backdoor.hh"
|
||||
#include "params/NonCachingSimpleCPU.hh"
|
||||
|
||||
/**
|
||||
@@ -53,7 +55,10 @@ class NonCachingSimpleCPU : public AtomicSimpleCPU
|
||||
void verifyMemoryMode() const override;
|
||||
|
||||
protected:
|
||||
AddrRangeMap<MemBackdoorPtr, 1> memBackdoors;
|
||||
|
||||
Tick sendPacket(RequestPort &port, const PacketPtr &pkt) override;
|
||||
Tick fetchInstMem() override;
|
||||
};
|
||||
|
||||
#endif // __CPU_SIMPLE_NONCACHING_HH__
|
||||
|
||||
Reference in New Issue
Block a user