ARM: Implement conversion to/from half precision.

This commit is contained in:
Gabe Black
2010-06-02 12:58:16 -05:00
parent 04e196f422
commit 237c0617a0
3 changed files with 303 additions and 2 deletions

View File

@@ -396,6 +396,223 @@ fixFpSFpDDest(FPSCR fpscr, float val)
return mid;
}
static inline float
vcvtFpSFpH(FPSCR &fpscr, float op, float dest, bool top)
{
float junk = 0.0;
uint32_t destBits = fpToBits(dest);
uint32_t opBits = fpToBits(op);
// Extract the operand.
bool neg = bits(opBits, 31);
uint32_t exponent = bits(opBits, 30, 23);
uint32_t oldMantissa = bits(opBits, 22, 0);
uint32_t mantissa = oldMantissa >> (23 - 10);
// Do the conversion.
uint32_t extra = oldMantissa & mask(23 - 10);
if (exponent == 0xff) {
if (oldMantissa != 0) {
// Nans.
if (bits(mantissa, 9) == 0) {
// Signalling nan.
fpscr.ioc = 1;
}
if (fpscr.ahp) {
mantissa = 0;
exponent = 0;
fpscr.ioc = 1;
} else if (fpscr.dn) {
mantissa = (1 << 9);
exponent = 0x1f;
neg = false;
} else {
exponent = 0x1f;
mantissa |= (1 << 9);
}
} else {
// Infinities.
exponent = 0x1F;
if (fpscr.ahp) {
fpscr.ioc = 1;
mantissa = 0x3ff;
} else {
mantissa = 0;
}
}
} else if (exponent == 0 && oldMantissa == 0) {
// Zero, don't need to do anything.
} else {
// Normalized or denormalized numbers.
bool inexact = (extra != 0);
if (exponent == 0) {
// Denormalized.
// If flush to zero is on, this shouldn't happen.
assert(fpscr.fz == 0);
// Check for underflow
if (inexact || fpscr.ufe)
fpscr.ufc = 1;
// Handle rounding.
unsigned mode = fpscr.rMode;
if ((mode == VfpRoundUpward && !neg && extra) ||
(mode == VfpRoundDown && neg && extra) ||
(mode == VfpRoundNearest &&
(extra > (1 << 9) ||
(extra == (1 << 9) && bits(mantissa, 0))))) {
mantissa++;
}
// See if the number became normalized after rounding.
if (mantissa == (1 << 10)) {
mantissa = 0;
exponent = 1;
}
} else {
// Normalized.
// We need to track the dropped bits differently since
// more can be dropped by denormalizing.
bool topOne = bits(extra, 12);
bool restZeros = bits(extra, 11, 0) == 0;
if (exponent <= (127 - 15)) {
// The result is too small. Denormalize.
mantissa |= (1 << 10);
while (mantissa && exponent <= (127 - 15)) {
restZeros = restZeros && !topOne;
topOne = bits(mantissa, 0);
mantissa = mantissa >> 1;
exponent++;
}
if (topOne || !restZeros)
inexact = true;
exponent = 0;
} else {
// Change bias.
exponent -= (127 - 15);
}
if (exponent == 0 && (inexact || fpscr.ufe)) {
// Underflow
fpscr.ufc = 1;
}
// Handle rounding.
unsigned mode = fpscr.rMode;
bool nonZero = topOne || !restZeros;
if ((mode == VfpRoundUpward && !neg && nonZero) ||
(mode == VfpRoundDown && neg && nonZero) ||
(mode == VfpRoundNearest && topOne &&
(!restZeros || bits(mantissa, 0)))) {
mantissa++;
}
// See if we rounded up and need to bump the exponent.
if (mantissa == (1 << 10)) {
mantissa = 0;
exponent++;
}
// Deal with overflow
if (fpscr.ahp) {
if (exponent >= 0x20) {
exponent = 0x1f;
mantissa = 0x3ff;
fpscr.ioc = 1;
// Supress inexact exception.
inexact = false;
}
} else {
if (exponent >= 0x1f) {
if ((mode == VfpRoundNearest) ||
(mode == VfpRoundUpward && !neg) ||
(mode == VfpRoundDown && neg)) {
// Overflow to infinity.
exponent = 0x1f;
mantissa = 0;
} else {
// Overflow to max normal.
exponent = 0x1e;
mantissa = 0x3ff;
}
fpscr.ofc = 1;
inexact = true;
}
}
}
if (inexact) {
fpscr.ixc = 1;
}
}
// Reassemble and install the result.
uint32_t result = bits(mantissa, 9, 0);
replaceBits(result, 14, 10, exponent);
if (neg)
result |= (1 << 15);
if (top)
replaceBits(destBits, 31, 16, result);
else
replaceBits(destBits, 15, 0, result);
return bitsToFp(destBits, junk);
}
static inline float
vcvtFpHFpS(FPSCR &fpscr, float op, bool top)
{
float junk = 0.0;
uint32_t opBits = fpToBits(op);
// Extract the operand.
if (top)
opBits = bits(opBits, 31, 16);
else
opBits = bits(opBits, 15, 0);
// Extract the bitfields.
bool neg = bits(opBits, 15);
uint32_t exponent = bits(opBits, 14, 10);
uint32_t mantissa = bits(opBits, 9, 0);
// Do the conversion.
if (exponent == 0) {
if (mantissa != 0) {
// Normalize the value.
exponent = exponent + (127 - 15) + 1;
while (mantissa < (1 << 10)) {
mantissa = mantissa << 1;
exponent--;
}
}
mantissa = mantissa << (23 - 10);
} else if (exponent == 0x1f && !fpscr.ahp) {
// Infinities and nans.
exponent = 0xff;
if (mantissa != 0) {
// Nans.
mantissa = mantissa << (23 - 10);
if (bits(mantissa, 22) == 0) {
// Signalling nan.
fpscr.ioc = 1;
mantissa |= (1 << 22);
}
if (fpscr.dn) {
mantissa &= ~mask(22);
neg = false;
}
}
} else {
exponent = exponent + (127 - 15);
mantissa = mantissa << (23 - 10);
}
// Reassemble the result.
uint32_t result = bits(mantissa, 22, 0);
replaceBits(result, 30, 23, exponent);
if (neg)
result |= (1 << 31);
return bitsToFp(result, junk);
}
static inline double
makeDouble(uint32_t low, uint32_t high)
{

View File

@@ -655,8 +655,23 @@ let {{
}
case 0x2:
case 0x3:
// Between half and single precision.
return new WarnUnimplemented("vcvtb, vcvtt", machInst);
{
const bool toHalf = bits(machInst, 16);
const bool top = bits(machInst, 7);
if (top) {
if (toHalf) {
return new VcvtFpSFpHT(machInst, vd, vm);
} else {
return new VcvtFpHTFpS(machInst, vd, vm);
}
} else {
if (toHalf) {
return new VcvtFpSFpHB(machInst, vd, vm);
} else {
return new VcvtFpHBFpS(machInst, vd, vm);
}
}
}
case 0x4:
if (single) {
if (e) {

View File

@@ -912,6 +912,75 @@ let {{
decoder_output += FpRegRegOpConstructor.subst(vcvtFpDFpSIop);
exec_output += PredOpExecute.subst(vcvtFpDFpSIop);
vcvtFpHTFpSCode = '''
FPSCR fpscr = Fpscr;
vfpFlushToZero(fpscr, FpOp1);
VfpSavedState state = prepFpState(fpscr.rMode);
__asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1));
FpDest = vcvtFpHFpS(fpscr, FpOp1, true);
__asm__ __volatile__("" :: "m" (FpDest));
finishVfp(fpscr, state);
Fpscr = fpscr;
'''
vcvtFpHTFpSIop = InstObjParams("vcvtt", "VcvtFpHTFpS", "FpRegRegOp",
{ "code": vcvtFpHTFpSCode,
"predicate_test": predicateTest }, [])
header_output += FpRegRegOpDeclare.subst(vcvtFpHTFpSIop);
decoder_output += FpRegRegOpConstructor.subst(vcvtFpHTFpSIop);
exec_output += PredOpExecute.subst(vcvtFpHTFpSIop);
vcvtFpHBFpSCode = '''
FPSCR fpscr = Fpscr;
VfpSavedState state = prepFpState(fpscr.rMode);
__asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1));
FpDest = vcvtFpHFpS(fpscr, FpOp1, false);
__asm__ __volatile__("" :: "m" (FpDest));
finishVfp(fpscr, state);
Fpscr = fpscr;
'''
vcvtFpHBFpSIop = InstObjParams("vcvtb", "VcvtFpHBFpS", "FpRegRegOp",
{ "code": vcvtFpHBFpSCode,
"predicate_test": predicateTest }, [])
header_output += FpRegRegOpDeclare.subst(vcvtFpHBFpSIop);
decoder_output += FpRegRegOpConstructor.subst(vcvtFpHBFpSIop);
exec_output += PredOpExecute.subst(vcvtFpHBFpSIop);
vcvtFpSFpHTCode = '''
FPSCR fpscr = Fpscr;
vfpFlushToZero(fpscr, FpOp1);
VfpSavedState state = prepFpState(fpscr.rMode);
__asm__ __volatile__("" : "=m" (FpOp1), "=m" (FpDest)
: "m" (FpOp1), "m" (FpDest));
FpDest = vcvtFpSFpH(fpscr, FpOp1, FpDest, true);
__asm__ __volatile__("" :: "m" (FpDest));
finishVfp(fpscr, state);
Fpscr = fpscr;
'''
vcvtFpSFpHTIop = InstObjParams("vcvtt", "VcvtFpSFpHT", "FpRegRegOp",
{ "code": vcvtFpHTFpSCode,
"predicate_test": predicateTest }, [])
header_output += FpRegRegOpDeclare.subst(vcvtFpSFpHTIop);
decoder_output += FpRegRegOpConstructor.subst(vcvtFpSFpHTIop);
exec_output += PredOpExecute.subst(vcvtFpSFpHTIop);
vcvtFpSFpHBCode = '''
FPSCR fpscr = Fpscr;
vfpFlushToZero(fpscr, FpOp1);
VfpSavedState state = prepFpState(fpscr.rMode);
__asm__ __volatile__("" : "=m" (FpOp1), "=m" (FpDest)
: "m" (FpOp1), "m" (FpDest));
FpDest = vcvtFpSFpH(fpscr, FpOp1, FpDest, false);
__asm__ __volatile__("" :: "m" (FpDest));
finishVfp(fpscr, state);
Fpscr = fpscr;
'''
vcvtFpSFpHBIop = InstObjParams("vcvtb", "VcvtFpSFpHB", "FpRegRegOp",
{ "code": vcvtFpSFpHBCode,
"predicate_test": predicateTest }, [])
header_output += FpRegRegOpDeclare.subst(vcvtFpSFpHBIop);
decoder_output += FpRegRegOpConstructor.subst(vcvtFpSFpHBIop);
exec_output += PredOpExecute.subst(vcvtFpSFpHBIop);
vcmpSCode = '''
FPSCR fpscr = Fpscr;
vfpFlushToZero(fpscr, FpDest, FpOp1);