arch-riscv: squash walks with tlb hits in startWalkWrapper (#672)

Because each vector load is fragmented into 64 byte cache-aligned
chunks, and one page-table walk is issued per fragment on tlb miss,
walks start to accumulate on a pending queue, which is processed in a
blocking way (no pending walks can be issued while one is being
processed). This adds noticeable latency on vector loads when VLEN is
sufficiently large.

This commit fixes the issue by allowing walks to be squashed if a TLB
lookup hits just before starting the walk on `startWalkWrapper`. This
idea was taken from the ARM walker.
This commit is contained in:
Bobby R. Bruce
2023-12-13 12:45:40 -08:00
committed by GitHub
2 changed files with 38 additions and 10 deletions

View File

@@ -200,8 +200,19 @@ Walker::startWalkWrapper()
{
unsigned num_squashed = 0;
WalkerState *currState = currStates.front();
// check if we get a tlb hit to skip the walk
Addr vaddr = Addr(sext<VADDR_BITS>(currState->req->getVaddr()));
TlbEntry *e = tlb->lookup(vaddr, currState->satp.asid, currState->mode,
true);
Fault fault = NoFault;
if (e) {
fault = tlb->checkPermissions(currState->status, currState->pmode,
vaddr, currState->mode, e->pte);
}
while ((num_squashed < numSquashable) && currState &&
currState->translation->squashed()) {
(currState->translation->squashed() || (e && fault == NoFault))) {
currStates.pop_front();
num_squashed++;
@@ -209,9 +220,14 @@ Walker::startWalkWrapper()
currState->req->getVaddr());
// finish the translation which will delete the translation object
currState->translation->finish(
std::make_shared<UnimpFault>("Squashed Inst"),
currState->req, currState->tc, currState->mode);
if (currState->translation->squashed()) {
currState->translation->finish(
std::make_shared<UnimpFault>("Squashed Inst"),
currState->req, currState->tc, currState->mode);
} else {
tlb->translateTiming(currState->req, currState->tc,
currState->translation, currState->mode);
}
// delete the current request if there are no inflight packets.
// if there is something in flight, delete when the packets are
@@ -223,13 +239,26 @@ Walker::startWalkWrapper()
}
// check the next translation request, if it exists
if (currStates.size())
if (currStates.size()) {
currState = currStates.front();
else
vaddr = Addr(sext<VADDR_BITS>(currState->req->getVaddr()));
e = tlb->lookup(vaddr, currState->satp.asid, currState->mode,
true);
if (e) {
fault = tlb->checkPermissions(currState->status,
currState->pmode, vaddr,
currState->mode, e->pte);
}
} else {
currState = NULL;
}
}
if (currState && !currState->wasStarted()) {
if (!e || fault != NoFault)
currState->startWalk();
else
schedule(startWalkWrapperEvent, clockEdge(Cycles(1)));
}
if (currState && !currState->wasStarted())
currState->startWalk();
}
Fault

View File

@@ -134,12 +134,11 @@ class TLB : public BaseTLB
BaseMMU::Mode mode) override;
Fault finalizePhysical(const RequestPtr &req, ThreadContext *tc,
BaseMMU::Mode mode) const override;
TlbEntry *lookup(Addr vpn, uint16_t asid, BaseMMU::Mode mode, bool hidden);
private:
uint64_t nextSeq() { return ++lruSeq; }
TlbEntry *lookup(Addr vpn, uint16_t asid, BaseMMU::Mode mode, bool hidden);
void evictLRU();
void remove(size_t idx);