arch-x86,cpu: Fix bpred by annotating branch instructions in x86
Original Creator: Adria Armejach. Branch instructions needed to be annotated in x86 as direct/indirect and conditional/unconditional. These annotations where not present causing the branch predictor to misbehave, not using the BTB. In addition, logic to determine the real branch target at decode needed to be added as it was also missing. Change-Id: I91e707452c1825b9bb4ae75c3f599da489ae5b9a Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/29154 Reviewed-by: Alexandru Duțu <alexandru.dutu@amd.com> Maintainer: Jason Lowe-Power <power.jg@gmail.com> Tested-by: kokoro <noreply+kokoro@google.com>
This commit is contained in:
committed by
Juan Manuel Cebrián González
parent
1009c9878c
commit
9b8cc2f5c3
@@ -39,6 +39,7 @@ def macroop CALL_NEAR_I
|
||||
# Make the default data size of calls 64 bits in 64 bit mode
|
||||
.adjust_env oszIn64Override
|
||||
.function_call
|
||||
.control_direct
|
||||
|
||||
limm t1, imm
|
||||
rdip t7
|
||||
@@ -53,6 +54,7 @@ def macroop CALL_NEAR_R
|
||||
# Make the default data size of calls 64 bits in 64 bit mode
|
||||
.adjust_env oszIn64Override
|
||||
.function_call
|
||||
.control_indirect
|
||||
|
||||
rdip t1
|
||||
# Check target of call
|
||||
@@ -66,6 +68,7 @@ def macroop CALL_NEAR_M
|
||||
# Make the default data size of calls 64 bits in 64 bit mode
|
||||
.adjust_env oszIn64Override
|
||||
.function_call
|
||||
.control_indirect
|
||||
|
||||
rdip t7
|
||||
ld t1, seg, sib, disp
|
||||
@@ -80,6 +83,7 @@ def macroop CALL_NEAR_P
|
||||
# Make the default data size of calls 64 bits in 64 bit mode
|
||||
.adjust_env oszIn64Override
|
||||
.function_call
|
||||
.control_indirect
|
||||
|
||||
rdip t7
|
||||
ld t1, seg, riprel, disp
|
||||
|
||||
@@ -38,6 +38,7 @@ def macroop JZ_I
|
||||
{
|
||||
# Make the defualt data size of jumps 64 bits in 64 bit mode
|
||||
.adjust_env oszIn64Override
|
||||
.control_direct
|
||||
|
||||
rdip t1
|
||||
limm t2, imm
|
||||
@@ -48,6 +49,7 @@ def macroop JNZ_I
|
||||
{
|
||||
# Make the defualt data size of jumps 64 bits in 64 bit mode
|
||||
.adjust_env oszIn64Override
|
||||
.control_direct
|
||||
|
||||
rdip t1
|
||||
limm t2, imm
|
||||
@@ -58,6 +60,7 @@ def macroop JB_I
|
||||
{
|
||||
# Make the default data size of jumps 64 bits in 64 bit mode
|
||||
.adjust_env oszIn64Override
|
||||
.control_direct
|
||||
|
||||
rdip t1
|
||||
limm t2, imm
|
||||
@@ -68,6 +71,7 @@ def macroop JNB_I
|
||||
{
|
||||
# Make the default data size of jumps 64 bits in 64 bit mode
|
||||
.adjust_env oszIn64Override
|
||||
.control_direct
|
||||
|
||||
rdip t1
|
||||
limm t2, imm
|
||||
@@ -78,6 +82,7 @@ def macroop JBE_I
|
||||
{
|
||||
# Make the default data size of jumps 64 bits in 64 bit mode
|
||||
.adjust_env oszIn64Override
|
||||
.control_direct
|
||||
|
||||
rdip t1
|
||||
limm t2, imm
|
||||
@@ -88,6 +93,7 @@ def macroop JNBE_I
|
||||
{
|
||||
# Make the default data size of jumps 64 bits in 64 bit mode
|
||||
.adjust_env oszIn64Override
|
||||
.control_direct
|
||||
|
||||
rdip t1
|
||||
limm t2, imm
|
||||
@@ -98,6 +104,7 @@ def macroop JS_I
|
||||
{
|
||||
# Make the default data size of jumps 64 bits in 64 bit mode
|
||||
.adjust_env oszIn64Override
|
||||
.control_direct
|
||||
|
||||
rdip t1
|
||||
limm t2, imm
|
||||
@@ -108,6 +115,7 @@ def macroop JNS_I
|
||||
{
|
||||
# Make the default data size of jumps 64 bits in 64 bit mode
|
||||
.adjust_env oszIn64Override
|
||||
.control_direct
|
||||
|
||||
rdip t1
|
||||
limm t2, imm
|
||||
@@ -118,6 +126,7 @@ def macroop JP_I
|
||||
{
|
||||
# Make the default data size of jumps 64 bits in 64 bit mode
|
||||
.adjust_env oszIn64Override
|
||||
.control_direct
|
||||
|
||||
rdip t1
|
||||
limm t2, imm
|
||||
@@ -128,6 +137,7 @@ def macroop JNP_I
|
||||
{
|
||||
# Make the default data size of jumps 64 bits in 64 bit mode
|
||||
.adjust_env oszIn64Override
|
||||
.control_direct
|
||||
|
||||
rdip t1
|
||||
limm t2, imm
|
||||
@@ -138,6 +148,7 @@ def macroop JL_I
|
||||
{
|
||||
# Make the default data size of jumps 64 bits in 64 bit mode
|
||||
.adjust_env oszIn64Override
|
||||
.control_direct
|
||||
|
||||
rdip t1
|
||||
limm t2, imm
|
||||
@@ -148,6 +159,7 @@ def macroop JNL_I
|
||||
{
|
||||
# Make the default data size of jumps 64 bits in 64 bit mode
|
||||
.adjust_env oszIn64Override
|
||||
.control_direct
|
||||
|
||||
rdip t1
|
||||
limm t2, imm
|
||||
@@ -158,6 +170,7 @@ def macroop JLE_I
|
||||
{
|
||||
# Make the default data size of jumps 64 bits in 64 bit mode
|
||||
.adjust_env oszIn64Override
|
||||
.control_direct
|
||||
|
||||
rdip t1
|
||||
limm t2, imm
|
||||
@@ -168,6 +181,7 @@ def macroop JNLE_I
|
||||
{
|
||||
# Make the default data size of jumps 64 bits in 64 bit mode
|
||||
.adjust_env oszIn64Override
|
||||
.control_direct
|
||||
|
||||
rdip t1
|
||||
limm t2, imm
|
||||
@@ -178,6 +192,7 @@ def macroop JO_I
|
||||
{
|
||||
# Make the default data size of jumps 64 bits in 64 bit mode
|
||||
.adjust_env oszIn64Override
|
||||
.control_direct
|
||||
|
||||
rdip t1
|
||||
limm t2, imm
|
||||
@@ -188,6 +203,7 @@ def macroop JNO_I
|
||||
{
|
||||
# Make the default data size of jumps 64 bits in 64 bit mode
|
||||
.adjust_env oszIn64Override
|
||||
.control_direct
|
||||
|
||||
rdip t1
|
||||
limm t2, imm
|
||||
@@ -196,6 +212,8 @@ def macroop JNO_I
|
||||
|
||||
def macroop JRCX_I
|
||||
{
|
||||
.control_direct
|
||||
|
||||
rdip t1
|
||||
add t0, t0, rcx, flags=(EZF,), dataSize=asz
|
||||
wripi t1, imm, flags=(CEZF,)
|
||||
|
||||
@@ -39,6 +39,7 @@ def macroop JMP_I
|
||||
{
|
||||
# Make the default data size of jumps 64 bits in 64 bit mode
|
||||
.adjust_env oszIn64Override
|
||||
.control_direct
|
||||
|
||||
rdip t1
|
||||
limm t2, imm
|
||||
@@ -49,6 +50,7 @@ def macroop JMP_R
|
||||
{
|
||||
# Make the default data size of jumps 64 bits in 64 bit mode
|
||||
.adjust_env oszIn64Override
|
||||
.control_indirect
|
||||
|
||||
wripi reg, 0
|
||||
};
|
||||
@@ -57,6 +59,7 @@ def macroop JMP_M
|
||||
{
|
||||
# Make the default data size of jumps 64 bits in 64 bit mode
|
||||
.adjust_env oszIn64Override
|
||||
.control_indirect
|
||||
|
||||
ld t1, seg, sib, disp
|
||||
wripi t1, 0
|
||||
@@ -66,6 +69,7 @@ def macroop JMP_P
|
||||
{
|
||||
# Make the default data size of jumps 64 bits in 64 bit mode
|
||||
.adjust_env oszIn64Override
|
||||
.control_indirect
|
||||
|
||||
rdip t7
|
||||
ld t1, seg, riprel, disp
|
||||
@@ -74,6 +78,8 @@ def macroop JMP_P
|
||||
|
||||
def macroop JMP_FAR_M
|
||||
{
|
||||
.control_indirect
|
||||
|
||||
limm t1, 0, dataSize=8
|
||||
limm t2, 0, dataSize=8
|
||||
lea t1, seg, sib, disp, dataSize=asz
|
||||
@@ -84,6 +90,8 @@ def macroop JMP_FAR_M
|
||||
|
||||
def macroop JMP_FAR_P
|
||||
{
|
||||
.control_indirect
|
||||
|
||||
limm t1, 0, dataSize=8
|
||||
limm t2, 0, dataSize=8
|
||||
rdip t7, dataSize=asz
|
||||
@@ -95,6 +103,8 @@ def macroop JMP_FAR_P
|
||||
|
||||
def macroop JMP_FAR_I
|
||||
{
|
||||
.control_indirect
|
||||
|
||||
# Put the whole far pointer into a register.
|
||||
limm t2, imm, dataSize=8
|
||||
# Figure out the width of the offset.
|
||||
@@ -138,6 +148,8 @@ farJmpSystemDescriptor:
|
||||
|
||||
def macroop JMP_FAR_REAL_M
|
||||
{
|
||||
.control_indirect
|
||||
|
||||
lea t1, seg, sib, disp, dataSize=asz
|
||||
ld t2, seg, [1, t0, t1], dsz
|
||||
ld t1, seg, [1, t0, t1]
|
||||
@@ -150,11 +162,14 @@ def macroop JMP_FAR_REAL_M
|
||||
|
||||
def macroop JMP_FAR_REAL_P
|
||||
{
|
||||
.control_indirect
|
||||
panic "Real mode far jump executed in 64 bit mode!"
|
||||
};
|
||||
|
||||
def macroop JMP_FAR_REAL_I
|
||||
{
|
||||
.control_indirect
|
||||
|
||||
# Put the whole far pointer into a register.
|
||||
limm t2, imm, dataSize=8
|
||||
# Figure out the width of the offset.
|
||||
|
||||
@@ -35,6 +35,8 @@
|
||||
|
||||
microcode = '''
|
||||
def macroop LOOP_I {
|
||||
.control_direct
|
||||
|
||||
# Make the default data size of pops 64 bits in 64 bit mode
|
||||
.adjust_env oszIn64Override
|
||||
rdip t1
|
||||
@@ -43,6 +45,8 @@ def macroop LOOP_I {
|
||||
};
|
||||
|
||||
def macroop LOOPNE_I {
|
||||
.control_direct
|
||||
|
||||
# Make the default data size of pops 64 bits in 64 bit mode
|
||||
.adjust_env oszIn64Override
|
||||
rdip t1
|
||||
@@ -51,6 +55,8 @@ def macroop LOOPNE_I {
|
||||
};
|
||||
|
||||
def macroop LOOPE_I {
|
||||
.control_direct
|
||||
|
||||
# Make the default data size of pops 64 bits in 64 bit mode
|
||||
.adjust_env oszIn64Override
|
||||
rdip t1
|
||||
|
||||
@@ -39,6 +39,7 @@ def macroop RET_NEAR
|
||||
# Make the default data size of rets 64 bits in 64 bit mode
|
||||
.adjust_env oszIn64Override
|
||||
.function_return
|
||||
.control_indirect
|
||||
|
||||
ld t1, ss, [1, t0, rsp]
|
||||
# Check address of return
|
||||
@@ -51,6 +52,7 @@ def macroop RET_NEAR_I
|
||||
# Make the default data size of rets 64 bits in 64 bit mode
|
||||
.adjust_env oszIn64Override
|
||||
.function_return
|
||||
.control_indirect
|
||||
|
||||
limm t2, imm
|
||||
ld t1, ss, [1, t0, rsp]
|
||||
@@ -63,6 +65,7 @@ def macroop RET_NEAR_I
|
||||
def macroop RET_FAR {
|
||||
.adjust_env oszIn64Override
|
||||
.function_return
|
||||
.control_indirect
|
||||
|
||||
# Get the return RIP
|
||||
ld t1, ss, [1, t0, rsp]
|
||||
|
||||
@@ -150,6 +150,10 @@ let {{
|
||||
self.function_call = True
|
||||
def function_return(self):
|
||||
self.function_return = True
|
||||
def control_direct(self):
|
||||
self.control_direct = True
|
||||
def control_indirect(self):
|
||||
self.control_indirect = True
|
||||
|
||||
def __init__(self, name):
|
||||
super(X86Macroop, self).__init__(name)
|
||||
@@ -160,7 +164,9 @@ let {{
|
||||
"serialize_before" : self.serializeBefore,
|
||||
"serialize_after" : self.serializeAfter,
|
||||
"function_call" : self.function_call,
|
||||
"function_return" : self.function_return
|
||||
"function_return" : self.function_return,
|
||||
"control_direct" : self.control_direct,
|
||||
"control_indirect" : self.control_indirect
|
||||
}
|
||||
self.declared = False
|
||||
self.adjust_env = ""
|
||||
@@ -179,6 +185,8 @@ let {{
|
||||
self.serialize_after = False
|
||||
self.function_call = False
|
||||
self.function_return = False
|
||||
self.control_direct = False
|
||||
self.control_indirect = False
|
||||
|
||||
def getAllocator(self, env):
|
||||
return "new X86Macroop::%s(machInst, %s)" % \
|
||||
@@ -227,6 +235,10 @@ let {{
|
||||
if self.function_return:
|
||||
flags.append("IsReturn")
|
||||
flags.append("IsUncondControl")
|
||||
if self.control_direct:
|
||||
flags.append("IsDirectControl")
|
||||
if self.control_indirect:
|
||||
flags.append("IsIndirectControl")
|
||||
else:
|
||||
flags.append("IsDelayedCommit")
|
||||
|
||||
|
||||
@@ -110,6 +110,12 @@ def template MicroRegOpDeclare {{
|
||||
uint8_t _dataSize, uint16_t _ext);
|
||||
|
||||
Fault execute(ExecContext *, Trace::InstRecord *) const;
|
||||
|
||||
X86ISA::PCState branchTarget(const X86ISA::PCState &branchPC) const
|
||||
override;
|
||||
|
||||
/// Explicitly import the otherwise hidden branchTarget
|
||||
using StaticInst::branchTarget;
|
||||
};
|
||||
}};
|
||||
|
||||
@@ -124,6 +130,12 @@ def template MicroRegOpImmDeclare {{
|
||||
uint8_t _dataSize, uint16_t _ext);
|
||||
|
||||
Fault execute(ExecContext *, Trace::InstRecord *) const;
|
||||
|
||||
X86ISA::PCState branchTarget(const X86ISA::PCState &branchPC) const
|
||||
override;
|
||||
|
||||
/// Explicitly import the otherwise hidden branchTarget
|
||||
using StaticInst::branchTarget;
|
||||
};
|
||||
}};
|
||||
|
||||
@@ -139,6 +151,17 @@ def template MicroRegOpConstructor {{
|
||||
%(constructor)s;
|
||||
%(cond_control_flag_init)s;
|
||||
}
|
||||
|
||||
X86ISA::PCState
|
||||
%(class_name)s::branchTarget(const X86ISA::PCState &branchPC) const
|
||||
{
|
||||
X86ISA::PCState pcs = branchPC;
|
||||
DPRINTF(X86, "branchTarget PC info: %s, Immediate: %lx\n",
|
||||
pcs, (int64_t) this->machInst.immediate);
|
||||
pcs.npc(pcs.npc() + (int64_t) this->machInst.immediate);
|
||||
pcs.uEnd();
|
||||
return pcs;
|
||||
}
|
||||
}};
|
||||
|
||||
def template MicroRegOpImmConstructor {{
|
||||
@@ -153,6 +176,17 @@ def template MicroRegOpImmConstructor {{
|
||||
%(constructor)s;
|
||||
%(cond_control_flag_init)s;
|
||||
}
|
||||
|
||||
X86ISA::PCState
|
||||
%(class_name)s::branchTarget(const X86ISA::PCState &branchPC) const
|
||||
{
|
||||
X86ISA::PCState pcs = branchPC;
|
||||
DPRINTF(X86, "branchTarget PC info: %s, Immediate (imm8): %lx\n",
|
||||
pcs, (int8_t)imm8);
|
||||
pcs.npc(pcs.npc() + (int8_t)imm8);
|
||||
pcs.uEnd();
|
||||
return pcs;
|
||||
}
|
||||
}};
|
||||
|
||||
output header {{
|
||||
@@ -273,7 +307,8 @@ let {{
|
||||
# a version without it and fix up this version to use it.
|
||||
if flag_code != "" or cond_check != "true":
|
||||
self.buildCppClasses(name, Name, suffix,
|
||||
code, big_code, "", "true", else_code, "", op_class)
|
||||
code, big_code, "", "true", else_code,
|
||||
"flags[IsUncondControl] = flags[IsControl];", op_class)
|
||||
suffix = "Flags" + suffix
|
||||
|
||||
# If psrc1 or psrc2 is used, we need to actually insert code to
|
||||
|
||||
@@ -62,6 +62,12 @@ def template SeqOpDeclare {{
|
||||
uint64_t setFlags, uint16_t _target, uint8_t _cc);
|
||||
|
||||
Fault execute(ExecContext *, Trace::InstRecord *) const;
|
||||
|
||||
X86ISA::PCState branchTarget(const X86ISA::PCState &branchPC) const
|
||||
override;
|
||||
|
||||
/// Explicitly import the otherwise hidden branchTarget
|
||||
using StaticInst::branchTarget;
|
||||
};
|
||||
}};
|
||||
|
||||
@@ -101,6 +107,17 @@ def template SeqOpConstructor {{
|
||||
%(constructor)s;
|
||||
%(cond_control_flag_init)s;
|
||||
}
|
||||
|
||||
X86ISA::PCState
|
||||
%(class_name)s::branchTarget(const X86ISA::PCState &branchPC) const
|
||||
{
|
||||
X86ISA::PCState pcs = branchPC;
|
||||
DPRINTF(X86, "Br branchTarget PC info: %s, Target: %d\n",
|
||||
pcs, (int16_t)target);
|
||||
pcs.nupc(target);
|
||||
pcs.uAdvance();
|
||||
return pcs;
|
||||
}
|
||||
}};
|
||||
|
||||
output decoder {{
|
||||
@@ -173,7 +190,8 @@ let {{
|
||||
"else_code": "nuIP = nuIP;",
|
||||
"cond_test": "checkCondition(ccFlagBits | cfofBits | dfBit | \
|
||||
ecfBit | ezfBit, cc)",
|
||||
"cond_control_flag_init": "flags[IsCondControl] = true"})
|
||||
"cond_control_flag_init": "flags[IsCondControl] = true; \
|
||||
flags[IsDirectControl] = true;"})
|
||||
exec_output += SeqOpExecute.subst(iop)
|
||||
header_output += SeqOpDeclare.subst(iop)
|
||||
decoder_output += SeqOpConstructor.subst(iop)
|
||||
@@ -191,7 +209,8 @@ let {{
|
||||
{"code": "", "else_code": "",
|
||||
"cond_test": "checkCondition(ccFlagBits | cfofBits | dfBit | \
|
||||
ecfBit | ezfBit, cc)",
|
||||
"cond_control_flag_init": ""})
|
||||
"cond_control_flag_init": "flags[IsUncondControl] = true;\
|
||||
flags[IsDirectControl] = true;"})
|
||||
exec_output += SeqOpExecute.subst(iop)
|
||||
header_output += SeqOpDeclare.subst(iop)
|
||||
decoder_output += SeqOpConstructor.subst(iop)
|
||||
|
||||
@@ -747,8 +747,9 @@ DefaultDecode<Impl>::decodeInsts(ThreadID tid)
|
||||
|
||||
DPRINTF(Decode,
|
||||
"[tid:%i] [sn:%llu] "
|
||||
"Updating predictions: PredPC: %s\n",
|
||||
tid, inst->seqNum, target);
|
||||
"Updating predictions: Wrong predicted target: %s \
|
||||
PredPC: %s\n",
|
||||
tid, inst->seqNum, inst->readPredTarg(), target);
|
||||
//The micro pc after an instruction level branch should be 0
|
||||
inst->setPredTarg(target);
|
||||
break;
|
||||
|
||||
Reference in New Issue
Block a user