arch-arm: implement VMINNM and VMAXNM SIMD version

This instruction is backported from aarch64.

In order to use the existing fplibMinNum backend, we first move
VMIN and VPMIN to use fplib. Adding VMINNM is then trivial.

Change-Id: I404daabeb6079f60e51a648a06d5b3e54f1c24a9
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/18689
Reviewed-by: Andreas Sandberg <andreas.sandberg@arm.com>
Maintainer: Andreas Sandberg <andreas.sandberg@arm.com>
Tested-by: kokoro <noreply+kokoro@google.com>
This commit is contained in:
Ciro Santilli
2019-04-10 14:34:03 +01:00
parent 396a07e34d
commit 0dee5c3d1b
4 changed files with 87 additions and 51 deletions

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2010-2013 ARM Limited
* Copyright (c) 2010-2013, 2019 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
@@ -891,6 +891,17 @@ unsignedRecipEstimate(uint32_t op)
}
}
FPSCR
fpStandardFPSCRValue(const FPSCR &fpscr)
{
FPSCR new_fpscr(0);
new_fpscr.ahp = fpscr.ahp;
new_fpscr.dn = 1;
new_fpscr.fz = 1;
new_fpscr.fz16 = fpscr.fz16;
return new_fpscr;
};
template <class fpType>
fpType
FpOp::processNans(FPSCR &fpscr, bool &done, bool defaultNan,

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2010-2013 ARM Limited
* Copyright (c) 2010-2013, 2019 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
@@ -447,6 +447,9 @@ uint32_t unsignedRSqrtEstimate(uint32_t op);
float fpRecipEstimate(FPSCR &fpscr, float op);
uint32_t unsignedRecipEstimate(uint32_t op);
FPSCR
fpStandardFPSCRValue(const FPSCR &fpscr);
class VfpMacroOp : public PredMacroOp
{
public:

View File

@@ -742,7 +742,23 @@ let {{
case 0xf:
if (o1) {
if (u) {
return new Unknown(machInst);
if (bits(size, 1) == 0) {
if (q) {
return new VmaxnmQFp<uint32_t>(
machInst, vd, vn, vm);
} else {
return new VmaxnmDFp<uint32_t>(
machInst, vd, vn, vm);
}
} else {
if (q) {
return new VminnmQFp<uint32_t>(
machInst, vd, vn, vm);
} else {
return new VminnmDFp<uint32_t>(
machInst, vd, vn, vm);
}
}
} else {
if (bits(size, 1) == 0) {
if (q) {
@@ -762,29 +778,37 @@ let {{
if (u) {
if (bits(size, 1) == 0) {
if (q) {
return new VpmaxQFp<float>(machInst, vd, vn, vm);
return new VpmaxQFp<uint32_t>(
machInst, vd, vn, vm);
} else {
return new VpmaxDFp<float>(machInst, vd, vn, vm);
return new VpmaxDFp<uint32_t>(
machInst, vd, vn, vm);
}
} else {
if (q) {
return new VpminQFp<float>(machInst, vd, vn, vm);
return new VpminQFp<uint32_t>(
machInst, vd, vn, vm);
} else {
return new VpminDFp<float>(machInst, vd, vn, vm);
return new VpminDFp<uint32_t>(
machInst, vd, vn, vm);
}
}
} else {
if (bits(size, 1) == 0) {
if (q) {
return new VmaxQFp<float>(machInst, vd, vn, vm);
return new VmaxQFp<uint32_t>(
machInst, vd, vn, vm);
} else {
return new VmaxDFp<float>(machInst, vd, vn, vm);
return new VmaxDFp<uint32_t>(
machInst, vd, vn, vm);
}
} else {
if (q) {
return new VminQFp<float>(machInst, vd, vn, vm);
return new VminQFp<uint32_t>(
machInst, vd, vn, vm);
} else {
return new VminDFp<float>(machInst, vd, vn, vm);
return new VminDFp<uint32_t>(
machInst, vd, vn, vm);
}
}
}

View File

@@ -1,6 +1,6 @@
// -*- mode:c++ -*-
// Copyright (c) 2010-2011, 2015 ARM Limited
// Copyright (c) 2010-2011, 2015, 2019 ARM Limited
// All rights reserved
//
// The license below extends only to copyright in the software and shall
@@ -1132,7 +1132,8 @@ let {{
allTypes = unsignedTypes + signedTypes
def threeEqualRegInst(name, Name, opClass, types, rCount, op,
readDest=False, pairwise=False):
readDest=False, pairwise=False,
standardFpcsr=False):
global header_output, exec_output
eWalkCode = simdEnabledCheckCode + '''
RegVect srcReg1, srcReg2, destReg;
@@ -1147,6 +1148,10 @@ let {{
destReg.regs[%(reg)d] = htog(FpDestP%(reg)d_uw);
''' % { "reg" : reg }
readDestCode = ''
if standardFpcsr:
eWalkCode += '''
FPSCR fpscr = fpStandardFPSCRValue((FPSCR)FpscrExc);
'''
if readDest:
readDestCode = 'destElem = gtoh(destReg.elements[i]);'
if pairwise:
@@ -1175,6 +1180,10 @@ let {{
destReg.elements[i] = htog(destElem);
}
''' % { "op" : op, "readDest" : readDestCode }
if standardFpcsr:
eWalkCode += '''
FpscrExc = fpscr;
'''
for reg in range(rCount):
eWalkCode += '''
FpDestP%(reg)d_uw = gtoh(destReg.regs[%(reg)d]);
@@ -2649,45 +2658,34 @@ let {{
threeEqualRegInst("vqrdmulh", "VqrdmulhQ",
"SimdMultOp", smallSignedTypes, 4, vqrdmulhCode)
vmaxfpCode = '''
FPSCR fpscr = (FPSCR) FpscrExc;
bool done;
destReg = processNans(fpscr, done, true, srcReg1, srcReg2);
if (!done) {
destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMax<float>,
true, true, VfpRoundNearest);
} else if (flushToZero(srcReg1, srcReg2)) {
fpscr.idc = 1;
}
FpscrExc = fpscr;
vMinMaxFpCode = '''
destElem = fplib%s<Element>(srcElem1, srcElem2, fpscr);
'''
threeEqualRegInstFp("vmax", "VmaxDFp", "SimdFloatCmpOp", ("float",), 2, vmaxfpCode)
threeEqualRegInstFp("vmax", "VmaxQFp", "SimdFloatCmpOp", ("float",), 4, vmaxfpCode)
vminfpCode = '''
FPSCR fpscr = (FPSCR) FpscrExc;
bool done;
destReg = processNans(fpscr, done, true, srcReg1, srcReg2);
if (!done) {
destReg = binaryOp(fpscr, srcReg1, srcReg2, fpMin<float>,
true, true, VfpRoundNearest);
} else if (flushToZero(srcReg1, srcReg2)) {
fpscr.idc = 1;
}
FpscrExc = fpscr;
'''
threeEqualRegInstFp("vmin", "VminDFp", "SimdFloatCmpOp", ("float",), 2, vminfpCode)
threeEqualRegInstFp("vmin", "VminQFp", "SimdFloatCmpOp", ("float",), 4, vminfpCode)
threeEqualRegInstFp("vpmax", "VpmaxDFp", "SimdFloatCmpOp", ("float",),
2, vmaxfpCode, pairwise=True)
threeEqualRegInstFp("vpmax", "VpmaxQFp", "SimdFloatCmpOp", ("float",),
4, vmaxfpCode, pairwise=True)
threeEqualRegInstFp("vpmin", "VpminDFp", "SimdFloatCmpOp", ("float",),
2, vminfpCode, pairwise=True)
threeEqualRegInstFp("vpmin", "VpminQFp", "SimdFloatCmpOp", ("float",),
4, vminfpCode, pairwise=True)
vMinMaxInsts = [
("vmax", "VmaxDFp", 2, "Max", False, ),
("vmax", "VmaxQFp", 4, "Max", False, ),
("vmaxnm", "VmaxnmDFp", 2, "MaxNum", False, ),
("vmaxnm", "VmaxnmQFp", 4, "MaxNum", False, ),
("vpmax", "VpmaxDFp", 2, "Max", True, ),
("vpmax", "VpmaxQFp", 4, "Max", True, ),
("vmin", "VminDFp", 2, "Min", False, ),
("vmin", "VminQFp", 4, "Min", False, ),
("vminnm", "VminnmDFp", 2, "MinNum", False, ),
("vminnm", "VminnmQFp", 4, "MinNum", False, ),
("vpmin", "VpminDFp", 2, "Min", True, ),
("vpmin", "VpminQFp", 4, "Min", True, ),
]
for name, Name, rCount, op, pairwise in vMinMaxInsts:
threeEqualRegInst(
name,
Name,
"SimdFloatCmpOp",
("uint32_t",),
rCount,
vMinMaxFpCode % op,
pairwise=pairwise,
standardFpcsr=True,
)
vaddfpCode = '''
FPSCR fpscr = (FPSCR) FpscrExc;