arch-arm: Add support of AArch32 VRINTN/X/A/Z/M/P instructions.
Add decoder and function of AArch32 VRINTN, VRINTX, VRINTA, VRINTZ, VRINTM, and VRINTP (Advanced SIMD) instructions. Support both 16-bit and 32-bit variants. Add vfpFPRint in vfp.hh to perform the behavior of round-to-integer. Only support A32 encoding. Change-Id: Icb9b6f71edf16ea14a439e15c480351cd8e1eb88
This commit is contained in:
committed by
Giacomo Travaglini
parent
1c8ab47a54
commit
7df35187a0
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2010-2013, 2019 ARM Limited
|
||||
* Copyright (c) 2010-2013, 2019, 2024 Arm Limited
|
||||
* All rights reserved
|
||||
*
|
||||
* The license below extends only to copyright in the software and shall
|
||||
@@ -435,6 +435,119 @@ vfpFpToFixed(T val, bool isSigned, uint8_t width, uint8_t imm, bool
|
||||
};
|
||||
|
||||
|
||||
template <typename T>
|
||||
T
|
||||
vfpFpRint(T val, bool exact, bool defaultNan, bool useRmode = true,
|
||||
VfpRoundingMode roundMode = VfpRoundZero)
|
||||
{
|
||||
int rmode;
|
||||
bool roundAwayFix = false;
|
||||
|
||||
if (!useRmode) {
|
||||
rmode = fegetround();
|
||||
} else {
|
||||
switch (roundMode)
|
||||
{
|
||||
case VfpRoundNearest:
|
||||
rmode = FeRoundNearest;
|
||||
break;
|
||||
case VfpRoundUpward:
|
||||
rmode = FeRoundUpward;
|
||||
break;
|
||||
case VfpRoundDown:
|
||||
rmode = FeRoundDown;
|
||||
break;
|
||||
case VfpRoundZero:
|
||||
rmode = FeRoundZero;
|
||||
break;
|
||||
case VfpRoundAway:
|
||||
// There is no equivalent rounding mode, use round down and we'll
|
||||
// fix it later
|
||||
rmode = FeRoundDown;
|
||||
roundAwayFix = true;
|
||||
break;
|
||||
default:
|
||||
panic("Unsupported roundMode %d\n", roundMode);
|
||||
}
|
||||
}
|
||||
__asm__ __volatile__("" : "=m" (rmode) : "m" (rmode));
|
||||
__asm__ __volatile__("" : "=m" (val) : "m" (val));
|
||||
fesetround(rmode);
|
||||
feclearexcept(FeAllExceptions);
|
||||
__asm__ __volatile__("" : "=m" (val) : "m" (val));
|
||||
T origVal = val;
|
||||
val = rint(val);
|
||||
__asm__ __volatile__("" : "=m" (val) : "m" (val));
|
||||
|
||||
int exceptions = fetestexcept(FeAllExceptions);
|
||||
if (!exact) {
|
||||
exceptions &= ~FeInexact;
|
||||
}
|
||||
|
||||
int fpType = std::fpclassify(val);
|
||||
if (fpType == FP_SUBNORMAL || fpType == FP_NAN) {
|
||||
if (fpType == FP_NAN) {
|
||||
if (isSnan(val)) {
|
||||
exceptions |= FeInvalid;
|
||||
}
|
||||
if (defaultNan || !isSnan(val)) {
|
||||
bool single = (sizeof(T) == sizeof(float));
|
||||
uint64_t qnan = single ? 0x7fc00000 : 0x7ff8000000000000ULL;
|
||||
val = bitsToFp(qnan, (T)0.0);
|
||||
}
|
||||
} else {
|
||||
val = 0.0;
|
||||
}
|
||||
} else if (origVal != val) {
|
||||
switch (rmode) {
|
||||
case FeRoundNearest:
|
||||
if (origVal - val > 0.5)
|
||||
val += 1.0;
|
||||
else if (val - origVal > 0.5)
|
||||
val -= 1.0;
|
||||
break;
|
||||
case FeRoundDown:
|
||||
if (roundAwayFix) {
|
||||
// The ordering on the subtraction looks a bit odd in that we
|
||||
// don't do the obvious origVal - val, instead we do
|
||||
// -(val - origVal). This is required to get the corruct bit
|
||||
// exact behaviour when very close to the 0.5 threshold.
|
||||
volatile T error = val;
|
||||
error -= origVal;
|
||||
error = -error;
|
||||
if ( (error > 0.5) ||
|
||||
((error == 0.5) && (val >= 0)) )
|
||||
val += 1.0;
|
||||
} else {
|
||||
if (origVal < val)
|
||||
val -= 1.0;
|
||||
}
|
||||
break;
|
||||
case FeRoundUpward:
|
||||
if (origVal > val)
|
||||
val += 1.0;
|
||||
break;
|
||||
}
|
||||
if (exact) {
|
||||
exceptions |= FeInexact;
|
||||
}
|
||||
}
|
||||
// Fix signal of zero.
|
||||
fpType = std::fpclassify(val);
|
||||
if (fpType == FP_ZERO) {
|
||||
bool single = (sizeof(T) == sizeof(float));
|
||||
uint64_t mask = single ? 0x80000000 : 0x8000000000000000ULL;
|
||||
val = bitsToFp((fpToBits(val) & (~mask)) | (fpToBits(origVal) & mask),
|
||||
(T)0.0);
|
||||
}
|
||||
|
||||
// __asm__ __volatile__("" : "=m" (val) : "m" (val));
|
||||
setFPExceptions(exceptions);
|
||||
|
||||
return val;
|
||||
};
|
||||
|
||||
|
||||
float vfpUFixedToFpS(bool flush, bool defaultNan,
|
||||
uint64_t val, uint8_t width, uint8_t imm);
|
||||
float vfpSFixedToFpS(bool flush, bool defaultNan,
|
||||
|
||||
@@ -1804,6 +1804,108 @@ let {{
|
||||
} else {
|
||||
return new SHA1SU1(machInst, vd, vm);
|
||||
}
|
||||
case 0x8:
|
||||
switch (size) {
|
||||
case 0b01:
|
||||
if (q) {
|
||||
return new NVrintnhpQ<uint16_t>(machInst, vd, vm);
|
||||
} else {
|
||||
return new NVrintnhpD<uint16_t>(machInst, vd, vm);
|
||||
}
|
||||
case 0b10:
|
||||
if (q) {
|
||||
return new NVrintnspQ<uint32_t>(machInst, vd, vm);
|
||||
} else {
|
||||
return new NVrintnspD<uint32_t>(machInst, vd, vm);
|
||||
}
|
||||
default:
|
||||
return new Unknown64(machInst);
|
||||
}
|
||||
case 0x9:
|
||||
switch (size) {
|
||||
case 0b01:
|
||||
if (q) {
|
||||
return new NVrintxhpQ<uint16_t>(machInst, vd, vm);
|
||||
} else {
|
||||
return new NVrintxhpD<uint16_t>(machInst, vd, vm);
|
||||
}
|
||||
case 0b10:
|
||||
if (q) {
|
||||
return new NVrintxspQ<uint32_t>(machInst, vd, vm);
|
||||
} else {
|
||||
return new NVrintxspD<uint32_t>(machInst, vd, vm);
|
||||
}
|
||||
default:
|
||||
return new Unknown64(machInst);
|
||||
}
|
||||
case 0xa:
|
||||
switch (size) {
|
||||
case 0b01:
|
||||
if (q) {
|
||||
return new NVrintahpQ<uint16_t>(machInst, vd, vm);
|
||||
} else {
|
||||
return new NVrintahpD<uint16_t>(machInst, vd, vm);
|
||||
}
|
||||
case 0b10:
|
||||
if (q) {
|
||||
return new NVrintaspQ<uint32_t>(machInst, vd, vm);
|
||||
} else {
|
||||
return new NVrintaspD<uint32_t>(machInst, vd, vm);
|
||||
}
|
||||
default:
|
||||
return new Unknown64(machInst);
|
||||
}
|
||||
case 0xb:
|
||||
switch (size) {
|
||||
case 0b01:
|
||||
if (q) {
|
||||
return new NVrintzhpQ<uint16_t>(machInst, vd, vm);
|
||||
} else {
|
||||
return new NVrintzhpD<uint16_t>(machInst, vd, vm);
|
||||
}
|
||||
case 0b10:
|
||||
if (q) {
|
||||
return new NVrintzspQ<uint32_t>(machInst, vd, vm);
|
||||
} else {
|
||||
return new NVrintzspD<uint32_t>(machInst, vd, vm);
|
||||
}
|
||||
default:
|
||||
return new Unknown64(machInst);
|
||||
}
|
||||
case 0xd:
|
||||
switch (size) {
|
||||
case 0b01:
|
||||
if (q) {
|
||||
return new NVrintmhpQ<uint16_t>(machInst, vd, vm);
|
||||
} else {
|
||||
return new NVrintmhpD<uint16_t>(machInst, vd, vm);
|
||||
}
|
||||
case 0b10:
|
||||
if (q) {
|
||||
return new NVrintmspQ<uint32_t>(machInst, vd, vm);
|
||||
} else {
|
||||
return new NVrintmspD<uint32_t>(machInst, vd, vm);
|
||||
}
|
||||
default:
|
||||
return new Unknown64(machInst);
|
||||
}
|
||||
case 0xf:
|
||||
switch (size) {
|
||||
case 0b01:
|
||||
if (q) {
|
||||
return new NVrintphpQ<uint16_t>(machInst, vd, vm);
|
||||
} else {
|
||||
return new NVrintphpD<uint16_t>(machInst, vd, vm);
|
||||
}
|
||||
case 0b10:
|
||||
if (q) {
|
||||
return new NVrintpspQ<uint32_t>(machInst, vd, vm);
|
||||
} else {
|
||||
return new NVrintpspD<uint32_t>(machInst, vd, vm);
|
||||
}
|
||||
default:
|
||||
return new Unknown64(machInst);
|
||||
}
|
||||
case 0xc:
|
||||
case 0xe:
|
||||
if (b == 0x18) {
|
||||
|
||||
@@ -3701,6 +3701,102 @@ let {{
|
||||
twoRegMiscInst("vcvtm.s32.f32", "NVcvt2ssMQ", "SimdCvtOp",
|
||||
("int32_t",), 4, vcvtmsp2ssCode)
|
||||
|
||||
vrinthpCode = '''
|
||||
FPSCR fpscr = (FPSCR) FpscrExc;
|
||||
VfpSavedState state = prepFpState(fpscr.rMode);
|
||||
__asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1));
|
||||
float mid = vcvtFpHFpS(fpscr, fpscr.dn, fpscr.ahp, srcElem1);
|
||||
if (flushToZero(mid))
|
||||
fpscr.idc = 1;
|
||||
float mid2 = vfpFpRint<float>(mid, %s, fpscr.dn, true, %s);
|
||||
destElem = vcvtFpSFpH(fpscr, fpscr.fz, fpscr.dn, %s, fpscr.ahp, mid2);
|
||||
__asm__ __volatile__("" :: "m" (destElem));
|
||||
finishVfp(fpscr, state, true);
|
||||
FpscrExc = fpscr;
|
||||
'''
|
||||
vrintnhpCode = vrinthpCode % ("false",
|
||||
"VfpRoundNearest", "VfpRoundNearest")
|
||||
twoRegMiscInst("vrintn.f16", "NVrintnhpD", "SimdCvtOp",
|
||||
("uint16_t",), 2, vrintnhpCode)
|
||||
twoRegMiscInst("vrintn.f16", "NVrintnhpQ", "SimdCvtOp",
|
||||
("uint16_t",), 4, vrintnhpCode)
|
||||
vrintxhpCode = vrinthpCode % ("true",
|
||||
"VfpRoundNearest", "VfpRoundNearest")
|
||||
twoRegMiscInst("vrintx.f16", "NVrintxhpD", "SimdCvtOp",
|
||||
("uint16_t",), 2, vrintxhpCode)
|
||||
twoRegMiscInst("vrintx.f16", "NVrintxhpQ", "SimdCvtOp",
|
||||
("uint16_t",), 4, vrintxhpCode)
|
||||
vrintahpCode = vrinthpCode % ("false", "VfpRoundAway", "VfpRoundAway")
|
||||
twoRegMiscInst("vrinta.f16", "NVrintahpD", "SimdCvtOp",
|
||||
("uint16_t",), 2, vrintahpCode)
|
||||
twoRegMiscInst("vrinta.f16", "NVrintahpQ", "SimdCvtOp",
|
||||
("uint16_t",), 4, vrintahpCode)
|
||||
vrintzhpCode = vrinthpCode % ("false", "VfpRoundZero", "VfpRoundZero")
|
||||
twoRegMiscInst("vrintz.f16", "NVrintzhpD", "SimdCvtOp",
|
||||
("uint16_t",), 2, vrintzhpCode)
|
||||
twoRegMiscInst("vrintz.f16", "NVrintzhpQ", "SimdCvtOp",
|
||||
("uint16_t",), 4, vrintzhpCode)
|
||||
vrintmhpCode = vrinthpCode % ("false", "VfpRoundDown", "VfpRoundDown")
|
||||
twoRegMiscInst("vrintm.f16", "NVrintmhpD", "SimdCvtOp",
|
||||
("uint16_t",), 2, vrintmhpCode)
|
||||
twoRegMiscInst("vrintm.f16", "NVrintmhpQ", "SimdCvtOp",
|
||||
("uint16_t",), 4, vrintmhpCode)
|
||||
vrintphpCode = vrinthpCode % ("false", "VfpRoundUpward", "VfpRoundUpward")
|
||||
twoRegMiscInst("vrintp.f16", "NVrintphpD", "SimdCvtOp",
|
||||
("uint16_t",), 2, vrintphpCode)
|
||||
twoRegMiscInst("vrintp.f16", "NVrintphpQ", "SimdCvtOp",
|
||||
("uint16_t",), 4, vrintphpCode)
|
||||
|
||||
vrintspCode = '''
|
||||
FPSCR fpscr = (FPSCR) FpscrExc;
|
||||
VfpSavedState state = prepFpState(fpscr.rMode);
|
||||
__asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1));
|
||||
float mid = bitsToFp(srcElem1, (float)0.0);
|
||||
if (flushToZero(mid))
|
||||
fpscr.idc = 1;
|
||||
float mid2 = vfpFpRint<float>(mid, %s, fpscr.dn, true, %s);
|
||||
destElem = fpToBits(mid2);
|
||||
__asm__ __volatile__("" :: "m" (destElem));
|
||||
finishVfp(fpscr, state, true);
|
||||
FpscrExc = fpscr;
|
||||
'''
|
||||
|
||||
vrintnspCode = vrintspCode % ("false", "VfpRoundNearest")
|
||||
twoRegMiscInst("vrintn.f32", "NVrintnspD", "SimdCvtOp",
|
||||
("uint32_t",), 2, vrintnspCode)
|
||||
twoRegMiscInst("vrintn.f32", "NVrintnspQ", "SimdCvtOp",
|
||||
("uint32_t",), 4, vrintnspCode)
|
||||
|
||||
vrintxspCode = vrintspCode % ("true", "VfpRoundNearest")
|
||||
twoRegMiscInst("vrintx.f32", "NVrintxspD", "SimdCvtOp",
|
||||
("uint32_t",), 2, vrintxspCode)
|
||||
twoRegMiscInst("vrintx.f32", "NVrintxspQ", "SimdCvtOp",
|
||||
("uint32_t",), 4, vrintxspCode)
|
||||
|
||||
vrintaspCode = vrintspCode % ("false", "VfpRoundAway")
|
||||
twoRegMiscInst("vrinta.f32", "NVrintaspD", "SimdCvtOp",
|
||||
("uint32_t",), 2, vrintaspCode)
|
||||
twoRegMiscInst("vrinta.f32", "NVrintaspQ", "SimdCvtOp",
|
||||
("uint32_t",), 4, vrintaspCode)
|
||||
|
||||
vrintzspCode = vrintspCode % ("false", "VfpRoundZero")
|
||||
twoRegMiscInst("vrintz.f32", "NVrintzspD", "SimdCvtOp",
|
||||
("uint32_t",), 2, vrintzspCode)
|
||||
twoRegMiscInst("vrintz.f32", "NVrintzspQ", "SimdCvtOp",
|
||||
("uint32_t",), 4, vrintzspCode)
|
||||
|
||||
vrintmspCode = vrintspCode % ("false", "VfpRoundDown")
|
||||
twoRegMiscInst("vrintm.f32", "NVrintmspD", "SimdCvtOp",
|
||||
("uint32_t",), 2, vrintmspCode)
|
||||
twoRegMiscInst("vrintm.f32", "NVrintmspQ", "SimdCvtOp",
|
||||
("uint32_t",), 4, vrintmspCode)
|
||||
|
||||
vrintpspCode = vrintspCode % ("false", "VfpRoundUpward")
|
||||
twoRegMiscInst("vrintp.f32", "NVrintpspD", "SimdCvtOp",
|
||||
("uint32_t",), 2, vrintpspCode)
|
||||
twoRegMiscInst("vrintp.f32", "NVrintpspQ", "SimdCvtOp",
|
||||
("uint32_t",), 4, vrintpspCode)
|
||||
|
||||
vrsqrteCode = '''
|
||||
destElem = unsignedRSqrtEstimate(srcElem1);
|
||||
'''
|
||||
|
||||
Reference in New Issue
Block a user