From bf61bd127f88eea200ae306fb20caf02c03332aa Mon Sep 17 00:00:00 2001 From: Junshi Wang Date: Fri, 16 Aug 2024 21:03:10 +0800 Subject: [PATCH] arch-arm: Add support of AArch32 VCVTA/P/N/M instructions. Add decoder and function of AArch32 VCVTA, VCVTP, VCVTN and VCVTM instructions. Support both 16-bit and 32-bit variants. Only support A32 encoding. Change-Id: I6ece0e1b779f9a7cc9d709894a49a7fdcda28373 Reviewed-by: Giacomo Travaglini --- src/arch/arm/isa/formats/fp.isa | 146 +++++++++++++++++++++++++++++++- src/arch/arm/isa/insts/neon.isa | 124 ++++++++++++++++++++++++++- 2 files changed, 268 insertions(+), 2 deletions(-) diff --git a/src/arch/arm/isa/formats/fp.isa b/src/arch/arm/isa/formats/fp.isa index c8508e16e1..45b0985838 100644 --- a/src/arch/arm/isa/formats/fp.isa +++ b/src/arch/arm/isa/formats/fp.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2010-2011, 2016-2019 ARM Limited +// Copyright (c) 2010-2011, 2016-2019, 2024 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -1891,6 +1891,150 @@ let {{ return new NVrsqrteD(machInst, vd, vm); } } + } else if ((b & 0x1c) == 0x00) { + if (bits(b, 1)) { + switch(size) { + case 1: + if (q) { + return new NVcvt2uhAQ(machInst, vd, vm); + } else { + return new NVcvt2uhAD(machInst, vd, vm); + } + case 2: + if (q) { + return new NVcvt2usAQ(machInst, vd, vm); + } else { + return new NVcvt2usAD(machInst, vd, vm); + } + default: + return new Unknown(machInst); + } + } else { + switch (size) { + case 0b01: + if (q) { + return new NVcvt2shAQ(machInst, vd, vm); + } else { + return new NVcvt2shAD(machInst, vd, vm); + } + case 0b10: + if (q) { + return new NVcvt2ssAQ(machInst, vd, vm); + } else { + return new NVcvt2ssAD(machInst, vd, vm); + } + default: + return new Unknown(machInst); + } + } + } else if ((b & 0x1c) == 0x04) { + if (bits(b, 1)) { + switch (size) { + case 0b01: + if (q) { + return new NVcvt2uhNQ(machInst, vd, vm); + } else { + return new NVcvt2uhND(machInst, vd, vm); + } + case 0b10: + if (q) { + return new NVcvt2usNQ(machInst, vd, vm); + } else { + return new NVcvt2usND(machInst, vd, vm); + } + default: + return new Unknown(machInst); + } + } else { + switch (size) { + case 0b01: + if (q) { + return new NVcvt2shNQ(machInst, vd, vm); + } else { + return new NVcvt2shND(machInst, vd, vm); + } + case 0b10: + if (q) { + return new NVcvt2ssNQ(machInst, vd, vm); + } else { + return new NVcvt2ssND(machInst, vd, vm); + } + default: + return new Unknown(machInst); + } + } + } else if ((b & 0x1c) == 0x08) { + if (bits(b, 1)) { + switch (size) { + case 0b01: + if (q) { + return new NVcvt2uhPQ(machInst, vd, vm); + } else { + return new NVcvt2uhPD(machInst, vd, vm); + } + case 0b10: + if (q) { + return new NVcvt2usPQ(machInst, vd, vm); + } else { + return new NVcvt2usPD(machInst, vd, vm); + } + default: + return new Unknown(machInst); + } + } else { + switch (size) { + case 0b01: + if (q) { + return new NVcvt2shPQ(machInst, vd, vm); + } else { + return new NVcvt2shPD(machInst, vd, vm); + } + case 0b10: + if (q) { + return new NVcvt2ssPQ(machInst, vd, vm); + } else { + return new NVcvt2ssPD(machInst, vd, vm); + } + default: + return new Unknown(machInst); + } + } + } else if ((b & 0x1c) == 0x0c) { + if (bits(b, 1)) { + switch (size) { + case 0b01: + if (q) { + return new NVcvt2uhMQ(machInst, vd, vm); + } else { + return new NVcvt2uhMD(machInst, vd, vm); + } + case 0b10: + if (q) { + return new NVcvt2usMQ(machInst, vd, vm); + } else { + return new NVcvt2usMD(machInst, vd, vm); + } + default: + return new Unknown(machInst); + } + } else { + switch (size) { + case 0b01: + if (q) { + return new NVcvt2shMQ(machInst, vd, vm); + } else { + return new NVcvt2shMD(machInst, vd, vm); + } + case 0b10: + if (q) { + return new NVcvt2ssMQ(machInst, vd, vm); + } else { + return new NVcvt2ssMD(machInst, vd, vm); + } + default: + return new Unknown(machInst); + } + } } else { return new Unknown(machInst); } diff --git a/src/arch/arm/isa/insts/neon.isa b/src/arch/arm/isa/insts/neon.isa index 5f39e48cce..04d6929ae0 100644 --- a/src/arch/arm/isa/insts/neon.isa +++ b/src/arch/arm/isa/insts/neon.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2010-2011, 2015, 2019 ARM Limited +// Copyright (c) 2010-2011, 2015, 2019, 2024 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -3579,6 +3579,128 @@ let {{ ''' twoRegLongMiscInst("vcvt", "NVcvth2s", "SimdCvtOp", ("uint16_t",), vcvth2sCode) + vcvthp2hCode = ''' + FPSCR fpscr = (FPSCR) FpscrExc; + VfpSavedState state = prepFpState(fpscr.rMode); + __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1)); + float mid = vcvtFpHFpS(fpscr, fpscr.dn, fpscr.ahp, srcElem1); + if (flushToZero(mid)) + fpscr.idc = 1; + destElem = vfpFpToFixed(mid, %s, 16, 0, true, %s); + __asm__ __volatile__("" :: "m" (destElem)); + finishVfp(fpscr, state, true); + FpscrExc = fpscr; + ''' + + vcvtahp2uhCode = vcvthp2hCode % ("false", "VfpRoundAway") + twoRegMiscInst("vcvta.u16.f16", "NVcvt2uhAD", "SimdCvtOp", + ("uint16_t",), 2, vcvtahp2uhCode) + twoRegMiscInst("vcvta.u16.f16", "NVcvt2uhAQ", "SimdCvtOp", + ("uint16_t",), 4, vcvtahp2uhCode) + + vcvtnhp2uhCode = vcvthp2hCode % ("false", "VfpRoundNearest") + twoRegMiscInst("vcvtn.u16.f16", "NVcvt2uhND", "SimdCvtOp", + ("uint16_t",), 2, vcvtnhp2uhCode) + twoRegMiscInst("vcvtn.u16.f16", "NVcvt2uhNQ", "SimdCvtOp", + ("uint16_t",), 4, vcvtnhp2uhCode) + + vcvtphp2uhCode = vcvthp2hCode % ("false", "VfpRoundUpward") + twoRegMiscInst("vcvtp.u16.f16", "NVcvt2uhPD", "SimdCvtOp", + ("uint16_t",), 2, vcvtphp2uhCode) + twoRegMiscInst("vcvtp.u16.f16", "NVcvt2uhPQ", "SimdCvtOp", + ("uint16_t",), 4, vcvtphp2uhCode) + + vcvtmhp2uhCode = vcvthp2hCode % ("false", "VfpRoundDown") + twoRegMiscInst("vcvtm.u16.f16", "NVcvt2uhMD", "SimdCvtOp", + ("uint16_t",), 2, vcvtmhp2uhCode) + twoRegMiscInst("vcvtm.u16.f16", "NVcvt2uhMQ", "SimdCvtOp", + ("uint16_t",), 4, vcvtmhp2uhCode) + + vcvtahp2shCode = vcvthp2hCode % ("true", "VfpRoundAway") + twoRegMiscInst("vcvta.s16.f16", "NVcvt2shAD", "SimdCvtOp", + ("int16_t",), 2, vcvtahp2shCode) + twoRegMiscInst("vcvta.s16.f16", "NVcvt2shAQ", "SimdCvtOp", + ("int16_t",), 4, vcvtahp2shCode) + + vcvtnhp2shCode = vcvthp2hCode % ("true", "VfpRoundNearest") + twoRegMiscInst("vcvtn.s16.f16", "NVcvt2shND", "SimdCvtOp", + ("int16_t",), 2, vcvtnhp2shCode) + twoRegMiscInst("vcvtn.s16.f16", "NVcvt2shNQ", "SimdCvtOp", + ("int16_t",), 4, vcvtnhp2shCode) + + vcvtphp2shCode = vcvthp2hCode % ("true", "VfpRoundUpward") + twoRegMiscInst("vcvtp.s16.f16", "NVcvt2shPD", "SimdCvtOp", + ("int16_t",), 2, vcvtphp2shCode) + twoRegMiscInst("vcvtp.s16.f16", "NVcvt2shPQ", "SimdCvtOp", + ("int16_t",), 4, vcvtphp2shCode) + + vcvtmhp2shCode = vcvthp2hCode % ("true", "VfpRoundDown") + twoRegMiscInst("vcvtm.s16.f16", "NVcvt2shMD", "SimdCvtOp", + ("int16_t",), 2, vcvtmhp2shCode) + twoRegMiscInst("vcvtm.s16.f16", "NVcvt2shMQ", "SimdCvtOp", + ("int16_t",), 4, vcvtmhp2shCode) + + vcvtsp2sCode = ''' + FPSCR fpscr = (FPSCR) FpscrExc; + VfpSavedState state = prepFpState(fpscr.rMode); + __asm__ __volatile__("" : "=m" (srcElem1) : "m" (srcElem1)); + float mid = bitsToFp(srcElem1, (float)0.0); + if (flushToZero(mid)) + fpscr.idc = 1; + destElem = vfpFpToFixed(mid, %s, 32, 0, true, %s); + __asm__ __volatile__("" :: "m" (destElem)); + finishVfp(fpscr, state, true); + FpscrExc = fpscr; + ''' + + vcvtasp2usCode = vcvtsp2sCode % ("false", "VfpRoundAway") + twoRegMiscInst("vcvta.u32.f32", "NVcvt2usAD", "SimdCvtOp", + ("uint32_t",), 2, vcvtasp2usCode) + twoRegMiscInst("vcvta.u32.f32", "NVcvt2usAQ", "SimdCvtOp", + ("uint32_t",), 4, vcvtasp2usCode) + + vcvtnsp2usCode = vcvtsp2sCode % ("false", "VfpRoundNearest") + twoRegMiscInst("vcvtn.u32.f32", "NVcvt2usND", "SimdCvtOp", + ("uint32_t",), 2, vcvtnsp2usCode) + twoRegMiscInst("vcvtn.u32.f32", "NVcvt2usNQ", "SimdCvtOp", + ("uint32_t",), 4, vcvtnsp2usCode) + + vcvtpsp2usCode = vcvtsp2sCode % ("false", "VfpRoundUpward") + twoRegMiscInst("vcvtp.u32.f32", "NVcvt2usPD", "SimdCvtOp", + ("uint32_t",), 2, vcvtpsp2usCode) + twoRegMiscInst("vcvtp.u32.f32", "NVcvt2usPQ", "SimdCvtOp", + ("uint32_t",), 4, vcvtpsp2usCode) + + vcvtmsp2usCode = vcvtsp2sCode % ("false", "VfpRoundDown") + twoRegMiscInst("vcvtm.u32.f32", "NVcvt2usMD", "SimdCvtOp", + ("uint32_t",), 2, vcvtmsp2usCode) + twoRegMiscInst("vcvtm.u32.f32", "NVcvt2usMQ", "SimdCvtOp", + ("uint32_t",), 4, vcvtmsp2usCode) + + vcvtasp2ssCode = vcvtsp2sCode % ("true", "VfpRoundAway") + twoRegMiscInst("vcvta.s32.f32", "NVcvt2ssAD", "SimdCvtOp", + ("int32_t",), 2, vcvtasp2ssCode) + twoRegMiscInst("vcvta.s32.f32", "NVcvt2ssAQ", "SimdCvtOp", + ("int32_t",), 4, vcvtasp2ssCode) + + vcvtnsp2ssCode = vcvtsp2sCode % ("true", "VfpRoundNearest") + twoRegMiscInst("vcvtn.s32.f32", "NVcvt2ssND", "SimdCvtOp", + ("int32_t",), 2, vcvtnsp2ssCode) + twoRegMiscInst("vcvtn.s32.f32", "NVcvt2ssNQ", "SimdCvtOp", + ("int32_t",), 4, vcvtnsp2ssCode) + + vcvtpsp2ssCode = vcvtsp2sCode % ("true", "VfpRoundUpward") + twoRegMiscInst("vcvtp.s32.f32", "NVcvt2ssPD", "SimdCvtOp", + ("int32_t",), 2, vcvtpsp2ssCode) + twoRegMiscInst("vcvtp.s32.f32", "NVcvt2ssPQ", "SimdCvtOp", + ("int32_t",), 4, vcvtpsp2ssCode) + + vcvtmsp2ssCode = vcvtsp2sCode % ("true", "VfpRoundDown") + twoRegMiscInst("vcvtm.s32.f32", "NVcvt2ssMD", "SimdCvtOp", + ("int32_t",), 2, vcvtmsp2ssCode) + twoRegMiscInst("vcvtm.s32.f32", "NVcvt2ssMQ", "SimdCvtOp", + ("int32_t",), 4, vcvtmsp2ssCode) + vrsqrteCode = ''' destElem = unsignedRSqrtEstimate(srcElem1); '''