O3: Fix itstate prediction and recovery.

Any change of control flow now resets the itstate to 0 mask and 0 condition,
except where the control flow alteration write into the cpsr register. These
case, for example return from an iterrupt, require the predecoder to recover
the itstate.

As there is a window of opportunity between the return from an interrupt
changing the control flow at the head of the pipe and the commit of the update
to the CPSR, the predecoder needs to be able to grab the ITstate early. This
is now handled by setting the forcedItState inside a PCstate for the control
flow altering instruction.

That instruction will have the correct mask/cond, but will not have a valid
itstate until advancePC is called (note this happens to advance the execution).
When the new PCstate is copy constructed it gets the itstate cond/mask, and
upon advancing the PC the itstate becomes valid.

Subsequent advancing invalidates the state and zeroes the cond/mask. This is
handled in isolation for the ARM ISA and should have no impact on other ISAs.

Refer arch/arm/types.hh and arch/arm/predecoder.cc for the details.
This commit is contained in:
Matt Horsnell
2011-01-18 16:30:05 -06:00
parent b13a79ee71
commit 77853b9f52
8 changed files with 67 additions and 8 deletions

View File

@@ -241,6 +241,8 @@ let {{
CondCodes = CondCodesMask & newCpsr;
NextThumb = ((CPSR)newCpsr).t;
NextJazelle = ((CPSR)newCpsr).j;
ForcedItState = ((((CPSR)newCpsr).it2 << 2) & 0xFC)
| (((CPSR)newCpsr).it1 & 0x3);
'''
buildImmDataInst(mnem + 's', code, flagType,
suffix = "ImmPclr", buildCc = False,

View File

@@ -107,13 +107,15 @@ let {{
SCTLR sctlr = Sctlr;
// Use the version of NPC that gets set before NextThumb
pNPC = cSwap<uint32_t>(Mem.ud, cpsr.e);
uint32_t tempSpsr = cSwap<uint32_t>(Mem.ud >> 32, cpsr.e);
uint32_t newCpsr =
cpsrWriteByInstr(cpsr | CondCodes,
cSwap<uint32_t>(Mem.ud >> 32, cpsr.e),
cpsrWriteByInstr(cpsr | CondCodes, tempSpsr,
0xF, true, sctlr.nmfi);
Cpsr = ~CondCodesMask & newCpsr;
NextThumb = ((CPSR)newCpsr).t;
NextJazelle = ((CPSR)newCpsr).j;
ForcedItState = ((((CPSR)tempSpsr).it2 << 2) & 0xFC)
| (((CPSR)tempSpsr).it1 & 0x3);
CondCodes = CondCodesMask & newCpsr;
'''
self.codeBlobs["memacc_code"] = accCode

View File

@@ -94,6 +94,8 @@ let {{
Cpsr = ~CondCodesMask & newCpsr;
CondCodes = CondCodesMask & newCpsr;
IWNPC = cSwap(Mem.uw, cpsr.e) | ((Spsr & 0x20) ? 1 : 0);
ForcedItState = ((((CPSR)Spsr).it2 << 2) & 0xFC)
| (((CPSR)Spsr).it1 & 0x3);
'''
microLdrRetUopIop = InstObjParams('ldr_ret_uop', 'MicroLdrRetUop',
'MicroMemOp',

View File

@@ -85,6 +85,8 @@ let {{
Cpsr = ~CondCodesMask & newCpsr;
NextThumb = ((CPSR)newCpsr).t;
NextJazelle = ((CPSR)newCpsr).j;
ForcedItState = ((((CPSR)Op1).it2 << 2) & 0xFC)
| (((CPSR)Op1).it1 & 0x3);
CondCodes = CondCodesMask & newCpsr;
'''
msrCpsrRegIop = InstObjParams("msr", "MsrCpsrReg", "MsrRegOp",
@@ -111,6 +113,8 @@ let {{
Cpsr = ~CondCodesMask & newCpsr;
NextThumb = ((CPSR)newCpsr).t;
NextJazelle = ((CPSR)newCpsr).j;
ForcedItState = ((((CPSR)imm).it2 << 2) & 0xFC)
| (((CPSR)imm).it1 & 0x3);
CondCodes = CondCodesMask & newCpsr;
'''
msrCpsrImmIop = InstObjParams("msr", "MsrCpsrImm", "MsrImmOp",

View File

@@ -245,6 +245,7 @@ def operands {{
'Thumb': pcStateReg('thumb', srtPC),
'NextThumb': pcStateReg('nextThumb', srtMode),
'NextJazelle': pcStateReg('nextJazelle', srtMode),
'ForcedItState': pcStateReg('forcedItState', srtMode),
#Register operands depending on a field in the instruction encoding. These
#should be avoided since they may not be portable across different

View File

@@ -162,9 +162,17 @@ Predecoder::moreBytes(const PCState &pc, Addr fetchPC, MachInst inst)
FPSCR fpscr = tc->readMiscReg(MISCREG_FPSCR);
emi.fpscrLen = fpscr.len;
emi.fpscrStride = fpscr.stride;
CPSR cpsr = tc->readMiscReg(MISCREG_CPSR);
itstate.top6 = cpsr.it2;
itstate.bottom2 = cpsr.it1;
if (pc.forcedItStateIsValid()) {
// returns from exceptions/interrupts force the it state.
itstate = pc.forcedItState();
DPRINTF(Predecoder, "Predecoder, itstate forced = %08x.\n", pc.forcedItState());
} else if (predAddrValid && (pc.instAddr() != predAddr)) {
// Control flow changes necessitate a 0 itstate.
itstate.top6 = 0;
itstate.bottom2 = 0;
}
outOfBytes = false;
process();
}

View File

@@ -67,6 +67,8 @@ namespace ArmISA
bool outOfBytes;
int offset;
ITSTATE itstate;
Addr predAddr;
bool predAddrValid;
public:
void reset()
@@ -76,6 +78,9 @@ namespace ArmISA
emi = 0;
emiReady = false;
outOfBytes = true;
itstate = 0;
predAddr = 0;
predAddrValid = false;
}
Predecoder(ThreadContext * _tc) :
@@ -138,6 +143,8 @@ namespace ArmISA
assert(emiReady);
ExtMachInst thisEmi = emi;
pc.npc(pc.pc() + getInstSize());
predAddrValid = true;
predAddr = pc.pc() + getInstSize();
emi = 0;
emiReady = false;
return thisEmi;

View File

@@ -202,9 +202,10 @@ namespace ArmISA
};
uint8_t flags;
uint8_t nextFlags;
uint8_t forcedItStateValue;
bool forcedItStateValid;
public:
PCState() : flags(0), nextFlags(0)
PCState() : flags(0), nextFlags(0), forcedItStateValue(0), forcedItStateValid(false)
{}
void
@@ -214,7 +215,7 @@ namespace ArmISA
npc(val + (thumb() ? 2 : 4));
}
PCState(Addr val) : flags(0), nextFlags(0)
PCState(Addr val) : flags(0), nextFlags(0), forcedItStateValue(0), forcedItStateValid(false)
{ set(val); }
bool
@@ -277,12 +278,40 @@ namespace ArmISA
nextFlags &= ~JazelleBit;
}
uint8_t
forcedItState() const
{
return forcedItStateValue;
}
void
forcedItState(uint8_t value)
{
forcedItStateValue = value;
// Not valid unless the advance is called.
forcedItStateValid = false;
}
bool
forcedItStateIsValid() const
{
return forcedItStateValid;
}
void
advance()
{
Base::advance();
npc(pc() + (thumb() ? 2 : 4));
flags = nextFlags;
// Validate the itState
if (forcedItStateValue != 0 && !forcedItStateValid) {
forcedItStateValid = true;
} else {
forcedItStateValid = false;
forcedItStateValue = 0;
}
}
void
@@ -366,6 +395,8 @@ namespace ArmISA
Base::serialize(os);
SERIALIZE_SCALAR(flags);
SERIALIZE_SCALAR(nextFlags);
SERIALIZE_SCALAR(forcedItStateValue);
SERIALIZE_SCALAR(forcedItStateValid);
}
void
@@ -374,6 +405,8 @@ namespace ArmISA
Base::unserialize(cp, section);
UNSERIALIZE_SCALAR(flags);
UNSERIALIZE_SCALAR(nextFlags);
UNSERIALIZE_SCALAR(forcedItStateValue);
UNSERIALIZE_SCALAR(forcedItStateValid);
}
};