arch-arm: Add support of AArch32 VRINTN/X/A/Z/M/P instructions.

Add decoder and function of AArch32 VRINTN, VRINTX, VRINTA, VRINTZ,
VRINTM, and VRINTP (Advanced SIMD) instructions. Support both 16-bit and
32-bit variants.

Add vfpFPRint in vfp.hh to perform the behavior of round-to-integer.

Only support A32 encoding.

Change-Id: Icb9b6f71edf16ea14a439e15c480351cd8e1eb88
This commit is contained in:
Junshi Wang
2024-08-17 14:10:13 +08:00
committed by Giacomo Travaglini
parent 1c8ab47a54
commit 7df35187a0
3 changed files with 312 additions and 1 deletions

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2010-2013, 2019 ARM Limited
* Copyright (c) 2010-2013, 2019, 2024 Arm Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
@@ -435,6 +435,119 @@ vfpFpToFixed(T val, bool isSigned, uint8_t width, uint8_t imm, bool
};
template <typename T>
T
vfpFpRint(T val, bool exact, bool defaultNan, bool useRmode = true,
VfpRoundingMode roundMode = VfpRoundZero)
{
int rmode;
bool roundAwayFix = false;
if (!useRmode) {
rmode = fegetround();
} else {
switch (roundMode)
{
case VfpRoundNearest:
rmode = FeRoundNearest;
break;
case VfpRoundUpward:
rmode = FeRoundUpward;
break;
case VfpRoundDown:
rmode = FeRoundDown;
break;
case VfpRoundZero:
rmode = FeRoundZero;
break;
case VfpRoundAway:
// There is no equivalent rounding mode, use round down and we'll
// fix it later
rmode = FeRoundDown;
roundAwayFix = true;
break;
default:
panic("Unsupported roundMode %d\n", roundMode);
}
}
__asm__ __volatile__("" : "=m" (rmode) : "m" (rmode));
__asm__ __volatile__("" : "=m" (val) : "m" (val));
fesetround(rmode);
feclearexcept(FeAllExceptions);
__asm__ __volatile__("" : "=m" (val) : "m" (val));
T origVal = val;
val = rint(val);
__asm__ __volatile__("" : "=m" (val) : "m" (val));
int exceptions = fetestexcept(FeAllExceptions);
if (!exact) {
exceptions &= ~FeInexact;
}
int fpType = std::fpclassify(val);
if (fpType == FP_SUBNORMAL || fpType == FP_NAN) {
if (fpType == FP_NAN) {
if (isSnan(val)) {
exceptions |= FeInvalid;
}
if (defaultNan || !isSnan(val)) {
bool single = (sizeof(T) == sizeof(float));
uint64_t qnan = single ? 0x7fc00000 : 0x7ff8000000000000ULL;
val = bitsToFp(qnan, (T)0.0);
}
} else {
val = 0.0;
}
} else if (origVal != val) {
switch (rmode) {
case FeRoundNearest:
if (origVal - val > 0.5)
val += 1.0;
else if (val - origVal > 0.5)
val -= 1.0;
break;
case FeRoundDown:
if (roundAwayFix) {
// The ordering on the subtraction looks a bit odd in that we
// don't do the obvious origVal - val, instead we do
// -(val - origVal). This is required to get the corruct bit
// exact behaviour when very close to the 0.5 threshold.
volatile T error = val;
error -= origVal;
error = -error;
if ( (error > 0.5) ||
((error == 0.5) && (val >= 0)) )
val += 1.0;
} else {
if (origVal < val)
val -= 1.0;
}
break;
case FeRoundUpward:
if (origVal > val)
val += 1.0;
break;
}
if (exact) {
exceptions |= FeInexact;
}
}
// Fix signal of zero.
fpType = std::fpclassify(val);
if (fpType == FP_ZERO) {
bool single = (sizeof(T) == sizeof(float));
uint64_t mask = single ? 0x80000000 : 0x8000000000000000ULL;
val = bitsToFp((fpToBits(val) & (~mask)) | (fpToBits(origVal) & mask),
(T)0.0);
}
// __asm__ __volatile__("" : "=m" (val) : "m" (val));
setFPExceptions(exceptions);
return val;
};
float vfpUFixedToFpS(bool flush, bool defaultNan,
uint64_t val, uint8_t width, uint8_t imm);
float vfpSFixedToFpS(bool flush, bool defaultNan,

View File

@@ -1804,6 +1804,108 @@ let {{
} else {
return new SHA1SU1(machInst, vd, vm);
}
case 0x8:
switch (size) {
case 0b01:
if (q) {
return new NVrintnhpQ<uint16_t>(machInst, vd, vm);
} else {
return new NVrintnhpD<uint16_t>(machInst, vd, vm);
}
case 0b10:
if (q) {
return new NVrintnspQ<uint32_t>(machInst, vd, vm);
} else {
return new NVrintnspD<uint32_t>(machInst, vd, vm);
}
default:
return new Unknown64(machInst);
}
case 0x9:
switch (size) {
case 0b01:
if (q) {
return new NVrintxhpQ<uint16_t>(machInst, vd, vm);
} else {
return new NVrintxhpD<uint16_t>(machInst, vd, vm);
}
case 0b10:
if (q) {
return new NVrintxspQ<uint32_t>(machInst, vd, vm);
} else {
return new NVrintxspD<uint32_t>(machInst, vd, vm);
}
default:
return new Unknown64(machInst);
}
case 0xa:
switch (size) {
case 0b01:
if (q) {
return new NVrintahpQ<uint16_t>(machInst, vd, vm);
} else {
return new NVrintahpD<uint16_t>(machInst, vd, vm);
}
case 0b10:
if (q) {
return new NVrintaspQ<uint32_t>(machInst, vd, vm);
} else {
return new NVrintaspD<uint32_t>(machInst, vd, vm);
}
default:
return new Unknown64(machInst);
}
case 0xb:
switch (size) {
case 0b01:
if (q) {
return new NVrintzhpQ<uint16_t>(machInst, vd, vm);
} else {
return new NVrintzhpD<uint16_t>(machInst, vd, vm);
}
case 0b10:
if (q) {
return new NVrintzspQ<uint32_t>(machInst, vd, vm);
} else {
return new NVrintzspD<uint32_t>(machInst, vd, vm);
}
default:
return new Unknown64(machInst);
}
case 0xd:
switch (size) {
case 0b01:
if (q) {
return new NVrintmhpQ<uint16_t>(machInst, vd, vm);
} else {
return new NVrintmhpD<uint16_t>(machInst, vd, vm);
}
case 0b10:
if (q) {
return new NVrintmspQ<uint32_t>(machInst, vd, vm);
} else {
return new NVrintmspD<uint32_t>(machInst, vd, vm);
}
default:
return new Unknown64(machInst);
}
case 0xf:
switch (size) {
case 0b01:
if (q) {
return new NVrintphpQ<uint16_t>(machInst, vd, vm);
} else {
return new NVrintphpD<uint16_t>(machInst, vd, vm);
}
case 0b10:
if (q) {
return new NVrintpspQ<uint32_t>(machInst, vd, vm);
} else {
return new NVrintpspD<uint32_t>(machInst, vd, vm);
}
default:
return new Unknown64(machInst);
}
case 0xc:
case 0xe:
if (b == 0x18) {

View File

@@ -3701,6 +3701,102 @@ let {{
twoRegMiscInst("vcvtm.s32.f32", "NVcvt2ssMQ", "SimdCvtOp",
("int32_t",), 4, vcvtmsp2ssCode)
vrinthpCode = '''
FPSCR fpscr = (FPSCR) FpscrExc;
VfpSavedState state = prepFpState(fpscr.rMode);
__asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1));
float mid = vcvtFpHFpS(fpscr, fpscr.dn, fpscr.ahp, srcElem1);
if (flushToZero(mid))
fpscr.idc = 1;
float mid2 = vfpFpRint<float>(mid, %s, fpscr.dn, true, %s);
destElem = vcvtFpSFpH(fpscr, fpscr.fz, fpscr.dn, %s, fpscr.ahp, mid2);
__asm__ __volatile__("" :: "m" (destElem));
finishVfp(fpscr, state, true);
FpscrExc = fpscr;
'''
vrintnhpCode = vrinthpCode % ("false",
"VfpRoundNearest", "VfpRoundNearest")
twoRegMiscInst("vrintn.f16", "NVrintnhpD", "SimdCvtOp",
("uint16_t",), 2, vrintnhpCode)
twoRegMiscInst("vrintn.f16", "NVrintnhpQ", "SimdCvtOp",
("uint16_t",), 4, vrintnhpCode)
vrintxhpCode = vrinthpCode % ("true",
"VfpRoundNearest", "VfpRoundNearest")
twoRegMiscInst("vrintx.f16", "NVrintxhpD", "SimdCvtOp",
("uint16_t",), 2, vrintxhpCode)
twoRegMiscInst("vrintx.f16", "NVrintxhpQ", "SimdCvtOp",
("uint16_t",), 4, vrintxhpCode)
vrintahpCode = vrinthpCode % ("false", "VfpRoundAway", "VfpRoundAway")
twoRegMiscInst("vrinta.f16", "NVrintahpD", "SimdCvtOp",
("uint16_t",), 2, vrintahpCode)
twoRegMiscInst("vrinta.f16", "NVrintahpQ", "SimdCvtOp",
("uint16_t",), 4, vrintahpCode)
vrintzhpCode = vrinthpCode % ("false", "VfpRoundZero", "VfpRoundZero")
twoRegMiscInst("vrintz.f16", "NVrintzhpD", "SimdCvtOp",
("uint16_t",), 2, vrintzhpCode)
twoRegMiscInst("vrintz.f16", "NVrintzhpQ", "SimdCvtOp",
("uint16_t",), 4, vrintzhpCode)
vrintmhpCode = vrinthpCode % ("false", "VfpRoundDown", "VfpRoundDown")
twoRegMiscInst("vrintm.f16", "NVrintmhpD", "SimdCvtOp",
("uint16_t",), 2, vrintmhpCode)
twoRegMiscInst("vrintm.f16", "NVrintmhpQ", "SimdCvtOp",
("uint16_t",), 4, vrintmhpCode)
vrintphpCode = vrinthpCode % ("false", "VfpRoundUpward", "VfpRoundUpward")
twoRegMiscInst("vrintp.f16", "NVrintphpD", "SimdCvtOp",
("uint16_t",), 2, vrintphpCode)
twoRegMiscInst("vrintp.f16", "NVrintphpQ", "SimdCvtOp",
("uint16_t",), 4, vrintphpCode)
vrintspCode = '''
FPSCR fpscr = (FPSCR) FpscrExc;
VfpSavedState state = prepFpState(fpscr.rMode);
__asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1));
float mid = bitsToFp(srcElem1, (float)0.0);
if (flushToZero(mid))
fpscr.idc = 1;
float mid2 = vfpFpRint<float>(mid, %s, fpscr.dn, true, %s);
destElem = fpToBits(mid2);
__asm__ __volatile__("" :: "m" (destElem));
finishVfp(fpscr, state, true);
FpscrExc = fpscr;
'''
vrintnspCode = vrintspCode % ("false", "VfpRoundNearest")
twoRegMiscInst("vrintn.f32", "NVrintnspD", "SimdCvtOp",
("uint32_t",), 2, vrintnspCode)
twoRegMiscInst("vrintn.f32", "NVrintnspQ", "SimdCvtOp",
("uint32_t",), 4, vrintnspCode)
vrintxspCode = vrintspCode % ("true", "VfpRoundNearest")
twoRegMiscInst("vrintx.f32", "NVrintxspD", "SimdCvtOp",
("uint32_t",), 2, vrintxspCode)
twoRegMiscInst("vrintx.f32", "NVrintxspQ", "SimdCvtOp",
("uint32_t",), 4, vrintxspCode)
vrintaspCode = vrintspCode % ("false", "VfpRoundAway")
twoRegMiscInst("vrinta.f32", "NVrintaspD", "SimdCvtOp",
("uint32_t",), 2, vrintaspCode)
twoRegMiscInst("vrinta.f32", "NVrintaspQ", "SimdCvtOp",
("uint32_t",), 4, vrintaspCode)
vrintzspCode = vrintspCode % ("false", "VfpRoundZero")
twoRegMiscInst("vrintz.f32", "NVrintzspD", "SimdCvtOp",
("uint32_t",), 2, vrintzspCode)
twoRegMiscInst("vrintz.f32", "NVrintzspQ", "SimdCvtOp",
("uint32_t",), 4, vrintzspCode)
vrintmspCode = vrintspCode % ("false", "VfpRoundDown")
twoRegMiscInst("vrintm.f32", "NVrintmspD", "SimdCvtOp",
("uint32_t",), 2, vrintmspCode)
twoRegMiscInst("vrintm.f32", "NVrintmspQ", "SimdCvtOp",
("uint32_t",), 4, vrintmspCode)
vrintpspCode = vrintspCode % ("false", "VfpRoundUpward")
twoRegMiscInst("vrintp.f32", "NVrintpspD", "SimdCvtOp",
("uint32_t",), 2, vrintpspCode)
twoRegMiscInst("vrintp.f32", "NVrintpspQ", "SimdCvtOp",
("uint32_t",), 4, vrintpspCode)
vrsqrteCode = '''
destElem = unsignedRSqrtEstimate(srcElem1);
'''