arch-arm: implement floating point aarch32 VCVTA family

These instructions round floating point to integer, and were added to
aarch32 as an extension to ARMv7.

Change-Id: I62d1705badc95a4e8954a5ad62b2b6bc9e4ffe00
Reviewed-on: https://gem5-review.googlesource.com/c/16788
Reviewed-by: Giacomo Travaglini <giacomo.travaglini@arm.com>
Maintainer: Andreas Sandberg <andreas.sandberg@arm.com>
This commit is contained in:
Ciro Santilli
2019-02-18 18:06:45 +00:00
parent b48e4a90bf
commit 2c242d665f
2 changed files with 200 additions and 105 deletions

View File

@@ -1,6 +1,6 @@
// -*- mode:c++ -*-
// Copyright (c) 2010-2011, 2016-2018 ARM Limited
// Copyright (c) 2010-2011, 2016-2019 ARM Limited
// All rights reserved
//
// The license below extends only to copyright in the software and shall
@@ -2001,6 +2001,26 @@ let {{
decodeShortFpTransfer(ExtMachInst machInst);
'''
decoder_output = '''
IntRegIndex decodeFpVd(ExtMachInst machInst, uint32_t size, bool isInt)
{
if (!isInt and size == 3) {
return (IntRegIndex)((bits(machInst, 22) << 5) |
(bits(machInst, 15, 12) << 1));
} else {
return (IntRegIndex)(bits(machInst, 22) |
(bits(machInst, 15, 12) << 1));
}
}
IntRegIndex decodeFpVm(ExtMachInst machInst, uint32_t size, bool isInt)
{
if (!isInt and size == 3) {
return (IntRegIndex)((bits(machInst, 5) << 5) |
(bits(machInst, 3, 0) << 1));
} else {
return (IntRegIndex)(bits(machInst, 5) |
(bits(machInst, 3, 0) << 1));
}
}
StaticInstPtr
decodeShortFpTransfer(ExtMachInst machInst)
{
@@ -2008,67 +2028,143 @@ let {{
const uint32_t c = bits(machInst, 8);
const uint32_t a = bits(machInst, 23, 21);
const uint32_t b = bits(machInst, 6, 5);
const uint32_t o1 = bits(machInst, 18);
if ((machInst.thumb == 1 && bits(machInst, 28) == 1) ||
(machInst.thumb == 0 && machInst.condCode == 0xf)) {
// Determine if this is backported aarch64 FP instruction
const bool b31_b24 = bits(machInst, 31, 24) == 0xFE;
const bool b23 = bits(machInst, 23);
const bool b21_b18 = bits(machInst, 21, 18) == 0xE;
const bool b21_b19 = bits(machInst, 21, 19) == 0x7;
const bool b11_b9 = bits(machInst, 11, 9) == 0x5;
const bool sz = bits(machInst, 8);
const bool b7_b6 = bits(machInst, 7, 6) == 0x1;
const bool b6 = bits(machInst, 6) == 0x0;
const uint32_t size = bits(machInst, 9, 8);
const bool op3 = bits(machInst, 6);
const bool b4 = bits(machInst, 4) == 0x0;
if (b31_b24 && b23 && b21_b18 && b11_b9 && b7_b6 && b4) {
// VINT* Integer Rounding Instructon
const uint32_t rm = bits(machInst, 17, 16);
if (sz) {
const IntRegIndex vd =
(IntRegIndex)((bits(machInst, 22) << 5) |
(bits(machInst, 15, 12) << 1));
const IntRegIndex vm =
(IntRegIndex)((bits(machInst, 5) << 5) |
(bits(machInst, 3, 0) << 1));
switch(rm) {
case 0x0:
return decodeVfpRegRegOp<VRIntAD>(machInst, vd, vm,
true);
case 0x1:
return decodeVfpRegRegOp<VRIntND>(machInst, vd, vm,
true);
case 0x2:
return decodeVfpRegRegOp<VRIntPD>(machInst, vd, vm,
true);
case 0x3:
return decodeVfpRegRegOp<VRIntMD>(machInst, vd, vm,
true);
default: return new Unknown(machInst);
}
} else {
const IntRegIndex vd =
(IntRegIndex)(bits(machInst, 22) |
(bits(machInst, 15, 12) << 1));
const IntRegIndex vm =
(IntRegIndex)(bits(machInst, 5) |
(bits(machInst, 3, 0) << 1));
switch(rm) {
case 0x0:
return decodeVfpRegRegOp<VRIntAS>(machInst, vd, vm,
false);
case 0x1:
return decodeVfpRegRegOp<VRIntNS>(machInst, vd, vm,
false);
case 0x2:
return decodeVfpRegRegOp<VRIntPS>(machInst, vd, vm,
false);
case 0x3:
return decodeVfpRegRegOp<VRIntMS>(machInst, vd, vm,
false);
default: return new Unknown(machInst);
}
}
} else if (b31_b24 && !b23 && b11_b9 && b6 && b4){
const uint32_t rm = bits(machInst, 17, 16);
IntRegIndex vd = decodeFpVd(machInst, size, false);
IntRegIndex vm = decodeFpVm(machInst, size, false);
IntRegIndex vdInt = decodeFpVd(machInst, size, true);
if (b31_b24 && b23 && b21_b19 && b11_b9 && op3 && b4) {
if (o1 == 0) {
// VINT* Integer Rounding Instruction
if (size == 3) {
switch(rm) {
case 0x0:
return decodeVfpRegRegOp<VRIntAD>(machInst, vd, vm,
true);
case 0x1:
return decodeVfpRegRegOp<VRIntND>(machInst, vd, vm,
true);
case 0x2:
return decodeVfpRegRegOp<VRIntPD>(machInst, vd, vm,
true);
case 0x3:
return decodeVfpRegRegOp<VRIntMD>(machInst, vd, vm,
true);
default: return new Unknown(machInst);
}
} else {
switch(rm) {
case 0x0:
return decodeVfpRegRegOp<VRIntAS>(machInst, vd, vm,
false);
case 0x1:
return decodeVfpRegRegOp<VRIntNS>(machInst, vd, vm,
false);
case 0x2:
return decodeVfpRegRegOp<VRIntPS>(machInst, vd, vm,
false);
case 0x3:
return decodeVfpRegRegOp<VRIntMS>(machInst, vd, vm,
false);
default: return new Unknown(machInst);
}
}
} else {
const bool op = bits(machInst, 7);
switch(rm) {
case 0x0:
switch(size) {
case 0x0:
return new Unknown(machInst);
case 0x1:
return new FailUnimplemented(
"vcvta.u32.f16", machInst);
case 0x2:
if (op) {
return new VcvtaFpSIntS(machInst, vdInt, vm);
} else {
return new VcvtaFpUIntS(machInst, vdInt, vm);
}
case 0x3:
if (op) {
return new VcvtaFpSIntD(machInst, vdInt, vm);
} else {
return new VcvtaFpUIntD(machInst, vdInt, vm);
}
}
case 0x1:
switch(size) {
case 0x0:
return new Unknown(machInst);
case 0x1:
return new FailUnimplemented(
"vcvtn.u32.f16", machInst);
case 0x2:
if (op) {
return new VcvtnFpSIntS(machInst, vdInt, vm);
} else {
return new VcvtnFpUIntS(machInst, vdInt, vm);
}
case 0x3:
if (op) {
return new VcvtnFpSIntD(machInst, vdInt, vm);
} else {
return new VcvtnFpUIntD(machInst, vdInt, vm);
}
}
case 0x2:
switch(size) {
case 0x0:
return new Unknown(machInst);
case 0x1:
return new FailUnimplemented(
"vcvtp.u32.f16", machInst);
case 0x2:
if (op) {
return new VcvtpFpSIntS(machInst, vdInt, vm);
} else {
return new VcvtpFpUIntS(machInst, vdInt, vm);
}
case 0x3:
if (op) {
return new VcvtpFpSIntD(machInst, vdInt, vm);
} else {
return new VcvtpFpUIntD(machInst, vdInt, vm);
}
}
case 0x3:
switch(size) {
case 0x0:
return new Unknown(machInst);
case 0x1:
return new FailUnimplemented(
"vcvtm.u32.f16", machInst);
case 0x2:
if (op) {
return new VcvtmFpSIntS(machInst, vdInt, vm);
} else {
return new VcvtmFpUIntS(machInst, vdInt, vm);
}
case 0x3:
if (op) {
return new VcvtmFpSIntD(machInst, vdInt, vm);
} else {
return new VcvtmFpUIntD(machInst, vdInt, vm);
}
}
}
}
} else if (b31_b24 && !b23 && b11_b9 && !op3 && b4){
// VSEL* floating point conditional select
ConditionCode cond;
@@ -2079,24 +2175,12 @@ let {{
case 0x3: cond = COND_GT; break;
}
if (sz) {
const IntRegIndex vd =
(IntRegIndex)((bits(machInst, 22) << 5) |
(bits(machInst, 15, 12) << 1));
const IntRegIndex vm =
(IntRegIndex)((bits(machInst, 5) << 5) |
(bits(machInst, 3, 0) << 1));
if (size == 3) {
const IntRegIndex vn =
(IntRegIndex)((bits(machInst, 7) << 5) |
(bits(machInst, 19, 16) << 1));
return new VselD(machInst, vd, vn, vm, cond);
} else {
const IntRegIndex vd =
(IntRegIndex)(bits(machInst, 22) |
(bits(machInst, 15, 12) << 1));
const IntRegIndex vm =
(IntRegIndex)(bits(machInst, 5) |
(bits(machInst, 3, 0) << 1));
const IntRegIndex vn =
(IntRegIndex)((bits(machInst, 19, 16) << 1) |
bits(machInst, 7));

View File

@@ -1,6 +1,6 @@
// -*- mode:c++ -*-
// Copyright (c) 2010-2013,2016 ARM Limited
// Copyright (c) 2010-2013,2016,2018-2019 ARM Limited
// All rights reserved
//
// The license below extends only to copyright in the software and shall
@@ -993,85 +993,96 @@ let {{
decoder_output += FpRegRegOpConstructor.subst(vcvtFpSIntDRIop);
exec_output += PredOpExecute.subst(vcvtFpSIntDRIop);
vcvtFpUIntSCode = vfpEnabledCheckCode + '''
round_mode_suffix_to_mode = {
'': 'VfpRoundZero',
'a': 'VfpRoundAway',
'm': 'VfpRoundDown',
'n': 'VfpRoundNearest',
'p': 'VfpRoundUpward',
}
def buildVcvt(code, className, roundModeSuffix):
global header_output, decoder_output, exec_output, \
vfpEnabledCheckCode, round_mode_suffix_to_mode
full_code = vfpEnabledCheckCode + code.format(
round_mode=round_mode_suffix_to_mode[roundModeSuffix],
)
iop = InstObjParams(
"vcvt{}".format(roundModeSuffix),
className.format(roundModeSuffix),
"FpRegRegOp",
{ "code": full_code,
"predicate_test": predicateTest,
"op_class": "SimdFloatCvtOp" },
[]
)
header_output += FpRegRegOpDeclare.subst(iop);
decoder_output += FpRegRegOpConstructor.subst(iop);
exec_output += PredOpExecute.subst(iop);
code = '''
FPSCR fpscr = (FPSCR) FpscrExc;
vfpFlushToZero(fpscr, FpOp1);
VfpSavedState state = prepFpState(fpscr.rMode);
fesetround(FeRoundZero);
__asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1));
FpDest_uw = vfpFpToFixed<float>(FpOp1, false, 32, 0);
FpDest_uw = vfpFpToFixed<float>(
FpOp1, false, 32, 0, true, {round_mode});
__asm__ __volatile__("" :: "m" (FpDest_uw));
finishVfp(fpscr, state, fpscr.fz);
FpscrExc = fpscr;
'''
vcvtFpUIntSIop = InstObjParams("vcvt", "VcvtFpUIntS", "FpRegRegOp",
{ "code": vcvtFpUIntSCode,
"predicate_test": predicateTest,
"op_class": "SimdFloatCvtOp" }, [])
header_output += FpRegRegOpDeclare.subst(vcvtFpUIntSIop);
decoder_output += FpRegRegOpConstructor.subst(vcvtFpUIntSIop);
exec_output += PredOpExecute.subst(vcvtFpUIntSIop);
for round_mode_suffix in round_mode_suffix_to_mode:
buildVcvt(code, "Vcvt{}FpUIntS", round_mode_suffix)
vcvtFpUIntDCode = vfpEnabledCheckCode + '''
code = '''
FPSCR fpscr = (FPSCR) FpscrExc;
double cOp1 = dbl(FpOp1P0_uw, FpOp1P1_uw);
vfpFlushToZero(fpscr, cOp1);
VfpSavedState state = prepFpState(fpscr.rMode);
fesetround(FeRoundZero);
__asm__ __volatile__("" : "=m" (cOp1) : "m" (cOp1));
uint64_t result = vfpFpToFixed<double>(cOp1, false, 32, 0);
uint64_t result = vfpFpToFixed<double>(
cOp1, false, 32, 0, true, {round_mode});
__asm__ __volatile__("" :: "m" (result));
finishVfp(fpscr, state, fpscr.fz);
FpDestP0_uw = result;
FpscrExc = fpscr;
'''
vcvtFpUIntDIop = InstObjParams("vcvt", "VcvtFpUIntD", "FpRegRegOp",
{ "code": vcvtFpUIntDCode,
"predicate_test": predicateTest,
"op_class": "SimdFloatCvtOp" }, [])
header_output += FpRegRegOpDeclare.subst(vcvtFpUIntDIop);
decoder_output += FpRegRegOpConstructor.subst(vcvtFpUIntDIop);
exec_output += PredOpExecute.subst(vcvtFpUIntDIop);
for round_mode_suffix in round_mode_suffix_to_mode:
buildVcvt(code, "Vcvt{}FpUIntD", round_mode_suffix)
vcvtFpSIntSCode = vfpEnabledCheckCode + '''
code = '''
FPSCR fpscr = (FPSCR) FpscrExc;
vfpFlushToZero(fpscr, FpOp1);
VfpSavedState state = prepFpState(fpscr.rMode);
fesetround(FeRoundZero);
__asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1));
FpDest_sw = vfpFpToFixed<float>(FpOp1, true, 32, 0);
FpDest_sw = vfpFpToFixed<float>(
FpOp1, true, 32, 0, true, {round_mode});
__asm__ __volatile__("" :: "m" (FpDest_sw));
finishVfp(fpscr, state, fpscr.fz);
FpscrExc = fpscr;
'''
vcvtFpSIntSIop = InstObjParams("vcvt", "VcvtFpSIntS", "FpRegRegOp",
{ "code": vcvtFpSIntSCode,
"predicate_test": predicateTest,
"op_class": "SimdFloatCvtOp" }, [])
header_output += FpRegRegOpDeclare.subst(vcvtFpSIntSIop);
decoder_output += FpRegRegOpConstructor.subst(vcvtFpSIntSIop);
exec_output += PredOpExecute.subst(vcvtFpSIntSIop);
for round_mode_suffix in round_mode_suffix_to_mode:
buildVcvt(code, "Vcvt{}FpSIntS", round_mode_suffix)
vcvtFpSIntDCode = vfpEnabledCheckCode + '''
code = '''
FPSCR fpscr = (FPSCR) FpscrExc;
double cOp1 = dbl(FpOp1P0_uw, FpOp1P1_uw);
vfpFlushToZero(fpscr, cOp1);
VfpSavedState state = prepFpState(fpscr.rMode);
fesetround(FeRoundZero);
__asm__ __volatile__("" : "=m" (cOp1) : "m" (cOp1));
int64_t result = vfpFpToFixed<double>(cOp1, true, 32, 0);
int64_t result = vfpFpToFixed<double>(
cOp1, true, 32, 0, true, {round_mode});
__asm__ __volatile__("" :: "m" (result));
finishVfp(fpscr, state, fpscr.fz);
FpDestP0_uw = result;
FpscrExc = fpscr;
'''
vcvtFpSIntDIop = InstObjParams("vcvt", "VcvtFpSIntD", "FpRegRegOp",
{ "code": vcvtFpSIntDCode,
"predicate_test": predicateTest,
"op_class": "SimdFloatCvtOp" }, [])
header_output += FpRegRegOpDeclare.subst(vcvtFpSIntDIop);
decoder_output += FpRegRegOpConstructor.subst(vcvtFpSIntDIop);
exec_output += PredOpExecute.subst(vcvtFpSIntDIop);
for round_mode_suffix in round_mode_suffix_to_mode:
buildVcvt(code, "Vcvt{}FpSIntD", round_mode_suffix)
vcvtFpSFpDCode = vfpEnabledCheckCode + '''
FPSCR fpscr = (FPSCR) FpscrExc;