X86: Rework the multiplication microops so that they work like they would in the patent.
--HG-- extra : convert_revision : 6fcf5dee440288d8bf92f6c5c2f97ef019975536
This commit is contained in:
@@ -105,7 +105,8 @@ namespace X86ISA
|
||||
|
||||
const int NumIntArchRegs = NUM_INTREGS;
|
||||
const int NumIntRegs =
|
||||
NumIntArchRegs + NumMicroIntRegs + NumPseudoIntRegs;
|
||||
NumIntArchRegs + NumMicroIntRegs +
|
||||
NumPseudoIntRegs + NumImplicitIntRegs;
|
||||
|
||||
class IntRegFile
|
||||
{
|
||||
|
||||
@@ -58,6 +58,7 @@
|
||||
#ifndef __ARCH_X86_INTREGS_HH__
|
||||
#define __ARCH_X86_INTREGS_HH__
|
||||
|
||||
#include "arch/x86/x86_traits.hh"
|
||||
#include "base/bitunion.hh"
|
||||
|
||||
namespace X86ISA
|
||||
@@ -163,6 +164,31 @@ namespace X86ISA
|
||||
|
||||
NUM_INTREGS
|
||||
};
|
||||
|
||||
inline static IntRegIndex
|
||||
INTREG_MICRO(int index)
|
||||
{
|
||||
return (IntRegIndex)(NUM_INTREGS + index);
|
||||
}
|
||||
|
||||
inline static IntRegIndex
|
||||
INTREG_PSEUDO(int index)
|
||||
{
|
||||
return (IntRegIndex)(NUM_INTREGS + NumMicroIntRegs + index);
|
||||
}
|
||||
|
||||
inline static IntRegIndex
|
||||
INTREG_IMPLICIT(int index)
|
||||
{
|
||||
return (IntRegIndex)(NUM_INTREGS + NumMicroIntRegs +
|
||||
NumPseudoIntRegs + index);
|
||||
}
|
||||
|
||||
inline static IntRegIndex
|
||||
INTREG_FOLDED(int index, int foldBit)
|
||||
{
|
||||
return (IntRegIndex)(((index & 0x1C) == 4 ? foldBit : 0) | index);
|
||||
}
|
||||
};
|
||||
|
||||
#endif // __ARCH_X86_INTREGS_HH__
|
||||
|
||||
@@ -61,20 +61,29 @@ microcode = '''
|
||||
|
||||
def macroop MUL_B_R
|
||||
{
|
||||
mul1u rax, rax, reg, dataSize="2"
|
||||
mul1u rax, reg
|
||||
mulel rax
|
||||
# Really ah
|
||||
muleh rsi, flags=(OF,CF)
|
||||
};
|
||||
|
||||
def macroop MUL_B_M
|
||||
{
|
||||
ld t1, seg, sib, disp
|
||||
mul1u rax, rax, t1, dataSize="2"
|
||||
mul1u rax, t1
|
||||
mulel rax
|
||||
# Really ah
|
||||
muleh rsi, flags=(OF,CF)
|
||||
};
|
||||
|
||||
def macroop MUL_B_P
|
||||
{
|
||||
rdip t7
|
||||
ld t1, seg, riprel, disp
|
||||
mul1u rax, rax, t1, dataSize="2"
|
||||
mul1u rax, t1
|
||||
mulel rax
|
||||
# Really ah
|
||||
muleh rsi, flags=(OF,CF)
|
||||
};
|
||||
|
||||
#
|
||||
@@ -83,27 +92,26 @@ def macroop MUL_B_P
|
||||
|
||||
def macroop MUL_R
|
||||
{
|
||||
# We need to store the result of the multiplication in a temporary
|
||||
# and then move it later because reg may be rdx. If it is, we can't
|
||||
# clobber its old value yet.
|
||||
muleh t1, rax, reg
|
||||
mulel rax, rax, reg
|
||||
mov rdx, rdx, t1
|
||||
mul1u rax, reg
|
||||
mulel rax
|
||||
muleh rdx, flags=(OF,CF)
|
||||
};
|
||||
|
||||
def macroop MUL_M
|
||||
{
|
||||
ld t1, seg, sib, disp
|
||||
muleh rdx, rax, t1
|
||||
mulel rax, rax, t1
|
||||
mul1u rax, t1
|
||||
mulel rax
|
||||
muleh rdx, flags=(OF,CF)
|
||||
};
|
||||
|
||||
def macroop MUL_P
|
||||
{
|
||||
rdip t7
|
||||
ld t1, seg, riprel, disp
|
||||
muleh rdx, rax, t1
|
||||
mulel rax, rax, t1
|
||||
mul1u rax, t1
|
||||
mulel rax
|
||||
muleh rdx, flags=(OF,CF)
|
||||
};
|
||||
|
||||
#
|
||||
@@ -112,20 +120,29 @@ def macroop MUL_P
|
||||
|
||||
def macroop IMUL_B_R
|
||||
{
|
||||
mul1s rax, rax, reg, dataSize="2"
|
||||
mul1s rax, reg
|
||||
mulel rax
|
||||
# Really ah
|
||||
muleh rsi, flags=(OF,CF)
|
||||
};
|
||||
|
||||
def macroop IMUL_B_M
|
||||
{
|
||||
ld t1, seg, sib, disp
|
||||
mul1s rax, rax, t1, dataSize="2"
|
||||
mul1s rax, t1
|
||||
mulel rax
|
||||
# Really ah
|
||||
muleh rsi, flags=(OF,CF)
|
||||
};
|
||||
|
||||
def macroop IMUL_B_P
|
||||
{
|
||||
rdip t7
|
||||
ld t1, seg, riprel, disp
|
||||
mul1s rax, rax, t1, dataSize="2"
|
||||
mul1s rax, t1
|
||||
mulel rax
|
||||
# Really ah
|
||||
muleh rsi, flags=(OF,CF)
|
||||
};
|
||||
|
||||
#
|
||||
@@ -134,47 +151,50 @@ def macroop IMUL_B_P
|
||||
|
||||
def macroop IMUL_R
|
||||
{
|
||||
mulehs t1, rax, reg
|
||||
mulel rax, rax, reg
|
||||
mov rdx, rdx, t1
|
||||
mul1s rax, reg
|
||||
mulel rax
|
||||
muleh rdx, flags=(OF,CF)
|
||||
};
|
||||
|
||||
def macroop IMUL_M
|
||||
{
|
||||
ld t1, seg, sib, disp
|
||||
mulehs rdx, rax, t1
|
||||
mulel rax, rax, t1
|
||||
mul1s rax, t1
|
||||
mulel rax
|
||||
muleh rdx, flags=(OF,CF)
|
||||
};
|
||||
|
||||
def macroop IMUL_P
|
||||
{
|
||||
rdip t7
|
||||
ld t1, seg, riprel, disp
|
||||
mulehs rdx, rax, t1
|
||||
mulel rax, rax, t1
|
||||
mul1s rax, t1
|
||||
mulel rax
|
||||
muleh rdx, flags=(OF,CF)
|
||||
};
|
||||
|
||||
#
|
||||
# Two operand signed multiply. These should set the CF and OF flags if the
|
||||
# result is too large for the destination register
|
||||
#
|
||||
|
||||
def macroop IMUL_R_R
|
||||
{
|
||||
mulel reg, reg, regm
|
||||
mul1s reg, regm
|
||||
mulel reg
|
||||
muleh t0, flags=(CF,OF)
|
||||
};
|
||||
|
||||
def macroop IMUL_R_M
|
||||
{
|
||||
ld t1, seg, sib, disp
|
||||
mulel reg, reg, t1
|
||||
mul1s reg, t1
|
||||
mulel reg
|
||||
muleh t0, flags=(CF,OF)
|
||||
};
|
||||
|
||||
def macroop IMUL_R_P
|
||||
{
|
||||
rdip t7
|
||||
ld t1, seg, riprel, disp
|
||||
mulel reg, reg, t1
|
||||
mul1s reg, t1
|
||||
mulel reg
|
||||
muleh t0, flags=(CF,OF)
|
||||
};
|
||||
|
||||
#
|
||||
@@ -184,14 +204,18 @@ def macroop IMUL_R_P
|
||||
def macroop IMUL_R_R_I
|
||||
{
|
||||
limm t1, imm
|
||||
mulel reg, regm, t1
|
||||
mul1s regm, t1
|
||||
mulel reg
|
||||
muleh t0, flags=(OF,CF)
|
||||
};
|
||||
|
||||
def macroop IMUL_R_M_I
|
||||
{
|
||||
limm t1, imm
|
||||
ld t2, seg, sib, disp
|
||||
mulel reg, t2, t1
|
||||
mul1s t2, t1
|
||||
mulel reg
|
||||
muleh t0, flags=(OF,CF)
|
||||
};
|
||||
|
||||
def macroop IMUL_R_P_I
|
||||
@@ -199,7 +223,9 @@ def macroop IMUL_R_P_I
|
||||
rdip t7
|
||||
limm t1, imm
|
||||
ld t2, seg, riprel
|
||||
mulel reg, t2, t1
|
||||
mul1s t2, t1
|
||||
mulel reg
|
||||
muleh t0, flags=(OF,CF)
|
||||
};
|
||||
|
||||
#
|
||||
|
||||
@@ -421,6 +421,18 @@ let {{
|
||||
abstract = True
|
||||
cond_check = "checkCondition(ccFlagBits)"
|
||||
|
||||
class RdRegOp(RegOp):
|
||||
abstract = True
|
||||
def __init__(self, dest, src1=None, dataSize="env.dataSize"):
|
||||
if not src1:
|
||||
src1 = dest
|
||||
super(RdRegOp, self).__init__(dest, src1, "NUM_INTREGS", None, dataSize)
|
||||
|
||||
class WrRegOp(RegOp):
|
||||
abstract = True
|
||||
def __init__(self, src1, src2, flags=None, dataSize="env.dataSize"):
|
||||
super(WrRegOp, self).__init__("NUM_INTREGS", src1, src2, flags, dataSize)
|
||||
|
||||
class Add(FlagRegOp):
|
||||
code = 'DestReg = merge(DestReg, psrc1 + op2, dataSize);'
|
||||
|
||||
@@ -448,57 +460,52 @@ let {{
|
||||
class Xor(LogicRegOp):
|
||||
code = 'DestReg = merge(DestReg, psrc1 ^ op2, dataSize)'
|
||||
|
||||
class Mul1s(FlagRegOp):
|
||||
code = '''
|
||||
int signPos = (dataSize * 8) / 2 - 1;
|
||||
IntReg srcVal1 = psrc1 | (-bits(psrc1, signPos) << signPos);
|
||||
IntReg srcVal2 = op2 | (-bits(psrc1, signPos) << signPos);
|
||||
DestReg = merge(DestReg, srcVal1 * srcVal2, dataSize)
|
||||
'''
|
||||
|
||||
class Mul1u(FlagRegOp):
|
||||
class Mul1s(WrRegOp):
|
||||
code = '''
|
||||
ProdLow = psrc1 * op2;
|
||||
int halfSize = (dataSize * 8) / 2;
|
||||
IntReg srcVal1 = psrc1 & mask(halfSize);
|
||||
IntReg srcVal2 = op2 & mask(halfSize);
|
||||
DestReg = merge(DestReg, srcVal1 * srcVal2, dataSize)
|
||||
int64_t spsrc1_h = spsrc1 >> halfSize;
|
||||
int64_t spsrc1_l = spsrc1 & mask(halfSize);
|
||||
int64_t spsrc2_h = sop2 >> halfSize;
|
||||
int64_t spsrc2_l = sop2 & mask(halfSize);
|
||||
ProdHi = ((spsrc1_l * spsrc2_h + spsrc1_h * spsrc2_l +
|
||||
((spsrc1_l * spsrc2_l) >> halfSize)) >> halfSize) +
|
||||
spsrc1_h * spsrc2_h;
|
||||
'''
|
||||
|
||||
class Mulel(FlagRegOp):
|
||||
code = 'DestReg = merge(DestReg, psrc1 * op2, dataSize);'
|
||||
class Mul1u(WrRegOp):
|
||||
code = '''
|
||||
ProdLow = psrc1 * op2;
|
||||
int halfSize = (dataSize * 8) / 2;
|
||||
uint64_t psrc1_h = psrc1 >> halfSize;
|
||||
uint64_t psrc1_l = psrc1 & mask(halfSize);
|
||||
uint64_t psrc2_h = op2 >> halfSize;
|
||||
uint64_t psrc2_l = op2 & mask(halfSize);
|
||||
ProdHi = ((psrc1_l * psrc2_h + psrc1_h * psrc2_l +
|
||||
((psrc1_l * psrc2_l) >> halfSize)) >> halfSize) +
|
||||
psrc1_h * psrc2_h;
|
||||
'''
|
||||
|
||||
class Mulel(RdRegOp):
|
||||
code = 'DestReg = merge(SrcReg1, ProdLow, dataSize);'
|
||||
|
||||
# Neither of these is quite correct because it assumes that right shifting
|
||||
# a signed or unsigned value does sign or zero extension respectively.
|
||||
# The C standard says that what happens on a right shift with a 1 in the
|
||||
# MSB position is undefined. On x86 and under likely most compilers the
|
||||
# "right thing" happens, but this isn't a guarantee.
|
||||
class Muleh(FlagRegOp):
|
||||
code = '''
|
||||
int halfSize = (dataSize * 8) / 2;
|
||||
uint64_t psrc1_h = psrc1 >> halfSize;
|
||||
uint64_t psrc1_l = psrc1 & mask(halfSize);
|
||||
uint64_t psrc2_h = op2 >> halfSize;
|
||||
uint64_t psrc2_l = op2 & mask(halfSize);
|
||||
uint64_t result =
|
||||
((psrc1_l * psrc2_h + psrc1_h * psrc2_l +
|
||||
((psrc1_l * psrc2_l) >> halfSize)) >> halfSize) +
|
||||
psrc1_h * psrc2_h;
|
||||
DestReg = merge(DestReg, result, dataSize);
|
||||
'''
|
||||
|
||||
class Mulehs(FlagRegOp):
|
||||
code = '''
|
||||
int halfSize = (dataSize * 8) / 2;
|
||||
int64_t spsrc1_h = spsrc1 >> halfSize;
|
||||
int64_t spsrc1_l = spsrc1 & mask(halfSize);
|
||||
int64_t spsrc2_h = sop2 >> halfSize;
|
||||
int64_t spsrc2_l = sop2 & mask(halfSize);
|
||||
int64_t result =
|
||||
((spsrc1_l * spsrc2_h + spsrc1_h * spsrc2_l +
|
||||
((spsrc1_l * spsrc2_l) >> halfSize)) >> halfSize) +
|
||||
spsrc1_h * spsrc2_h;
|
||||
DestReg = merge(DestReg, result, dataSize);
|
||||
'''
|
||||
class Muleh(RdRegOp):
|
||||
def __init__(self, dest, src1=None, flags=None, dataSize="env.dataSize"):
|
||||
if not src1:
|
||||
src1 = dest
|
||||
super(RdRegOp, self).__init__(dest, src1, "NUM_INTREGS", flags, dataSize)
|
||||
code = 'DestReg = merge(SrcReg1, ProdHi, dataSize);'
|
||||
flag_code = '''
|
||||
if (ProdHi)
|
||||
ccFlagBits = ccFlagBits | (ext & (CFBit | OFBit | ECFBit));
|
||||
else
|
||||
ccFlagBits = ccFlagBits & ~(ext & (CFBit | OFBit | ECFBit));
|
||||
'''
|
||||
|
||||
class Div1(FlagRegOp):
|
||||
code = '''
|
||||
@@ -613,11 +620,6 @@ let {{
|
||||
DestReg = DestReg;
|
||||
'''
|
||||
|
||||
class WrRegOp(RegOp):
|
||||
abstract = True
|
||||
def __init__(self, src1, src2, flags=None, dataSize="env.dataSize"):
|
||||
super(WrRegOp, self).__init__("NUM_INTREGS", src1, src2, flags, dataSize)
|
||||
|
||||
class Wrip(WrRegOp, CondRegOp):
|
||||
code = 'RIP = psrc1 + op2'
|
||||
else_code="RIP = RIP;"
|
||||
@@ -629,11 +631,6 @@ let {{
|
||||
class Wruflags(WrRegOp):
|
||||
code = 'ccFlagBits = psrc1 ^ op2'
|
||||
|
||||
class RdRegOp(RegOp):
|
||||
abstract = True
|
||||
def __init__(self, dest, src1 = "NUM_INTREGS", dataSize="env.dataSize"):
|
||||
super(RdRegOp, self).__init__(dest, src1, "NUM_INTREGS", None, dataSize)
|
||||
|
||||
class Rdip(RdRegOp):
|
||||
code = 'DestReg = RIP'
|
||||
|
||||
|
||||
@@ -95,13 +95,17 @@ def operand_types {{
|
||||
}};
|
||||
|
||||
def operands {{
|
||||
'SrcReg1': ('IntReg', 'uqw', '(((src1 & 0x1C) == 4 ? foldOBit : 0) | src1)', 'IsInteger', 1),
|
||||
'SrcReg2': ('IntReg', 'uqw', '(((src2 & 0x1C) == 4 ? foldOBit : 0) | src2)', 'IsInteger', 2),
|
||||
'Index': ('IntReg', 'uqw', '(((index & 0x1C) == 4 ? foldABit : 0) | index)', 'IsInteger', 3),
|
||||
'Base': ('IntReg', 'uqw', '(((base & 0x1C) == 4 ? foldABit : 0) | base)', 'IsInteger', 4),
|
||||
'DestReg': ('IntReg', 'uqw', '(((dest & 0x1C) == 4 ? foldOBit : 0) | dest)', 'IsInteger', 5),
|
||||
'Data': ('IntReg', 'uqw', '(((data & 0x1C) == 4 ? foldOBit : 0) | data)', 'IsInteger', 6),
|
||||
'rax': ('IntReg', 'uqw', '(INTREG_RAX)', 'IsInteger', 7),
|
||||
'SrcReg1': ('IntReg', 'uqw', 'INTREG_FOLDED(src1, foldOBit)', 'IsInteger', 1),
|
||||
'SrcReg2': ('IntReg', 'uqw', 'INTREG_FOLDED(src2, foldOBit)', 'IsInteger', 2),
|
||||
'Index': ('IntReg', 'uqw', 'INTREG_FOLDED(index, foldABit)', 'IsInteger', 3),
|
||||
'Base': ('IntReg', 'uqw', 'INTREG_FOLDED(base, foldABit)', 'IsInteger', 4),
|
||||
'DestReg': ('IntReg', 'uqw', 'INTREG_FOLDED(dest, foldOBit)', 'IsInteger', 5),
|
||||
'Data': ('IntReg', 'uqw', 'INTREG_FOLDED(data, foldOBit)', 'IsInteger', 6),
|
||||
'ProdLow': ('IntReg', 'uqw', 'INTREG_IMPLICIT(0)', 'IsInteger', 7),
|
||||
'ProdHi': ('IntReg', 'uqw', 'INTREG_IMPLICIT(1)', 'IsInteger', 8),
|
||||
'Quotient': ('IntReg', 'uqw', 'INTREG_IMPLICIT(2)', 'IsInteger', 9),
|
||||
'Remainder': ('IntReg', 'uqw', 'INTREG_IMPLICIT(3)', 'IsInteger', 10),
|
||||
'rax': ('IntReg', 'uqw', '(INTREG_RAX)', 'IsInteger', 11),
|
||||
'FpSrcReg1': ('FloatReg', 'df', 'src1', 'IsFloating', 20),
|
||||
'FpSrcReg2': ('FloatReg', 'df', 'src2', 'IsFloating', 21),
|
||||
'FpDestReg': ('FloatReg', 'df', 'dest', 'IsFloating', 22),
|
||||
@@ -109,7 +113,7 @@ def operands {{
|
||||
'RIP': ('NPC', 'uqw', None, (None, None, 'IsControl'), 50),
|
||||
'uIP': ('UPC', 'uqw', None, (None, None, 'IsControl'), 51),
|
||||
'nuIP': ('NUPC', 'uqw', None, (None, None, 'IsControl'), 52),
|
||||
'ccFlagBits': ('IntReg', 'uqw', 'NUM_INTREGS + NumMicroIntRegs', None, 60),
|
||||
'ccFlagBits': ('IntReg', 'uqw', 'INTREG_PSEUDO(0)', None, 60),
|
||||
'SegBase': ('ControlReg', 'uqw', 'MISCREG_SEG_BASE(segment)', (None, None, ['IsSerializeAfter','IsSerializing','IsNonSpeculative']), 70),
|
||||
'Mem': ('Mem', 'uqw', None, ('IsMemRef', 'IsLoad', 'IsStore'), 100)
|
||||
}};
|
||||
|
||||
@@ -81,8 +81,9 @@ namespace X86ISA
|
||||
|
||||
// These enumerate all the registers for dependence tracking.
|
||||
enum DependenceTags {
|
||||
//There are 16 microcode registers at the moment
|
||||
FP_Base_DepTag = 1 << 7,
|
||||
//There are 16 microcode registers at the moment. This is an
|
||||
//unusually large constant to make sure there isn't overflow.
|
||||
FP_Base_DepTag = 128,
|
||||
Ctrl_Base_DepTag =
|
||||
FP_Base_DepTag +
|
||||
//mmx/x87 registers
|
||||
|
||||
@@ -61,7 +61,14 @@
|
||||
namespace X86ISA
|
||||
{
|
||||
const int NumMicroIntRegs = 16;
|
||||
|
||||
const int NumPseudoIntRegs = 1;
|
||||
//1. The condition code bits of the rflags register.
|
||||
const int NumImplicitIntRegs = 4;
|
||||
//1. The lower part of the result of multiplication.
|
||||
//2. The upper part of the result of multiplication.
|
||||
//3. The quotient from division
|
||||
//4. The remainder from division
|
||||
|
||||
const int NumMMXRegs = 8;
|
||||
const int NumXMMRegs = 16;
|
||||
|
||||
Reference in New Issue
Block a user