arch-arm: Add initial support for SVE gather/scatter loads/stores

Change-Id: I891623015b47a39f61ed616f8896f32a7134c8e2
Signed-off-by: Giacomo Gabrielli <giacomo.gabrielli@arm.com>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/13521
Reviewed-by: Andreas Sandberg <andreas.sandberg@arm.com>
Maintainer: Andreas Sandberg <andreas.sandberg@arm.com>
Tested-by: kokoro <noreply+kokoro@google.com>
This commit is contained in:
Giacomo Gabrielli
2018-10-23 13:57:05 +01:00
parent f26f3e22b3
commit 3cf4a04fce
7 changed files with 1403 additions and 100 deletions

View File

@@ -0,0 +1,224 @@
/*
* Copyright (c) 2018 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
* not be construed as granting a license to any other intellectual
* property including but not limited to intellectual property relating
* to a hardware implementation of the functionality of the software
* licensed hereunder. You may use the software subject to the license
* terms below provided that you ensure that this notice is replicated
* unmodified and in its entirety in all distributions of the software,
* modified or unmodified, in source code or in binary form.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Authors: Giacomo Gabrielli
*/
#ifndef __ARCH_ARM_SVE_MACROMEM_HH__
#define __ARCH_ARM_SVE_MACROMEM_HH__
#include "arch/arm/generated/decoder.hh"
#include "arch/arm/insts/pred_inst.hh"
namespace ArmISA {
template <typename RegElemType, typename MemElemType,
template <typename, typename> class MicroopType>
class SveIndexedMemVI : public PredMacroOp
{
protected:
IntRegIndex dest;
IntRegIndex gp;
IntRegIndex base;
uint64_t imm;
public:
SveIndexedMemVI(const char *mnem, ExtMachInst machInst, OpClass __opClass,
IntRegIndex _dest, IntRegIndex _gp, IntRegIndex _base,
uint64_t _imm)
: PredMacroOp(mnem, machInst, __opClass),
dest(_dest), gp(_gp), base(_base), imm(_imm)
{
bool isLoad = (__opClass == MemReadOp);
int num_elems = ((machInst.sveLen + 1) * 16) / sizeof(RegElemType);
numMicroops = num_elems;
if (isLoad) {
numMicroops++;
}
microOps = new StaticInstPtr[numMicroops];
StaticInstPtr *uop = microOps;
if (isLoad) {
// The first microop of a gather load copies the source vector
// register used for address calculation to an auxiliary register,
// with all subsequent microops reading from the latter. This is
// needed to properly handle cases where the source vector
// register is the same as the destination register
*uop = new ArmISAInst::SveGatherLoadCpySrcVecMicroop(
mnem, machInst, _base, this);
uop++;
}
for (int i = 0; i < num_elems; i++, uop++) {
*uop = new MicroopType<RegElemType, MemElemType>(
mnem, machInst, __opClass, _dest, _gp,
isLoad ? (IntRegIndex) VECREG_UREG0 : _base, _imm, i,
num_elems);
}
--uop;
(*uop)->setLastMicroop();
microOps[0]->setFirstMicroop();
for (StaticInstPtr *uop = microOps; !(*uop)->isLastMicroop(); uop++) {
(*uop)->setDelayedCommit();
}
}
Fault
execute(ExecContext *, Trace::InstRecord *) const
{
panic("Execute method called when it shouldn't!");
return NoFault;
}
std::string
generateDisassembly(Addr pc, const SymbolTable *symtab) const
{
// TODO: add suffix to transfer and base registers
std::stringstream ss;
printMnemonic(ss, "", false);
ccprintf(ss, "{");
printVecReg(ss, dest, true);
ccprintf(ss, "}, ");
printVecPredReg(ss, gp);
ccprintf(ss, "/z, [");
printVecReg(ss, base, true);
if (imm != 0) {
ccprintf(ss, ", #%d", imm * sizeof(MemElemType));
}
ccprintf(ss, "]");
return ss.str();
}
};
template <typename RegElemType, typename MemElemType,
template <typename, typename> class MicroopType>
class SveIndexedMemSV : public PredMacroOp
{
protected:
IntRegIndex dest;
IntRegIndex gp;
IntRegIndex base;
IntRegIndex offset;
bool offsetIs32;
bool offsetIsSigned;
bool offsetIsScaled;
public:
SveIndexedMemSV(const char *mnem, ExtMachInst machInst, OpClass __opClass,
IntRegIndex _dest, IntRegIndex _gp, IntRegIndex _base,
IntRegIndex _offset, bool _offsetIs32,
bool _offsetIsSigned, bool _offsetIsScaled)
: PredMacroOp(mnem, machInst, __opClass),
dest(_dest), gp(_gp), base(_base), offset(_offset),
offsetIs32(_offsetIs32), offsetIsSigned(_offsetIsSigned),
offsetIsScaled(_offsetIsScaled)
{
bool isLoad = (__opClass == MemReadOp);
int num_elems = ((machInst.sveLen + 1) * 16) / sizeof(RegElemType);
numMicroops = num_elems;
if (isLoad) {
numMicroops++;
}
microOps = new StaticInstPtr[numMicroops];
StaticInstPtr *uop = microOps;
if (isLoad) {
// The first microop of a gather load copies the source vector
// register used for address calculation to an auxiliary register,
// with all subsequent microops reading from the latter. This is
// needed to properly handle cases where the source vector
// register is the same as the destination register
*uop = new ArmISAInst::SveGatherLoadCpySrcVecMicroop(
mnem, machInst, _offset, this);
uop++;
}
for (int i = 0; i < num_elems; i++, uop++) {
*uop = new MicroopType<RegElemType, MemElemType>(
mnem, machInst, __opClass, _dest, _gp, _base,
isLoad ? (IntRegIndex) VECREG_UREG0 : _offset, _offsetIs32,
_offsetIsSigned, _offsetIsScaled, i, num_elems);
}
--uop;
(*uop)->setLastMicroop();
microOps[0]->setFirstMicroop();
for (StaticInstPtr *uop = microOps; !(*uop)->isLastMicroop(); uop++) {
(*uop)->setDelayedCommit();
}
}
Fault
execute(ExecContext *, Trace::InstRecord *) const
{
panic("Execute method called when it shouldn't!");
return NoFault;
}
std::string
generateDisassembly(Addr pc, const SymbolTable *symtab) const
{
// TODO: add suffix to transfer and base registers
std::stringstream ss;
printMnemonic(ss, "", false);
ccprintf(ss, "{");
printVecReg(ss, dest, true);
ccprintf(ss, "}, ");
printVecPredReg(ss, gp);
ccprintf(ss, "/z, [");
printIntReg(ss, base);
ccprintf(ss, ", ");
printVecReg(ss, offset, true);
ccprintf(ss, "]");
return ss.str();
}
};
} // namespace ArmISA
#endif // __ARCH_ARM_SVE_MACROMEM_HH__

View File

@@ -2896,34 +2896,153 @@ namespace Aarch64
StaticInstPtr
decodeSveMemGather32(ExtMachInst machInst)
{
// TODO: for now only LDR and LD1R are implemented
if (bits(machInst, 22) && bits(machInst, 15)) {
IntRegIndex zt = (IntRegIndex) (uint8_t) bits(machInst, 4, 0);
IntRegIndex rn = makeSP(
(IntRegIndex) (uint8_t) bits(machInst, 9, 5));
uint64_t imm = bits(machInst, 21, 16);
IntRegIndex pg = (IntRegIndex) (uint8_t) bits(machInst, 12, 10);
uint8_t dtype = (bits(machInst, 24, 23) << 2) |
bits(machInst, 14, 13);
return decodeSveContigLoadSIInsts<SveLoadAndRepl>(
dtype, machInst, zt, pg, rn, imm, false, true);
} else if (bits(machInst, 24, 22) == 0x6 &&
bits(machInst, 15, 13) == 0x0 &&
bits(machInst, 4) == 0x0) {
IntRegIndex pt = (IntRegIndex) (uint8_t) bits(machInst, 3, 0);
IntRegIndex rn = makeSP(
(IntRegIndex) (uint8_t) bits(machInst, 9, 5));
uint64_t imm = sext<9>((bits(machInst, 21, 16) << 3) |
bits(machInst, 12, 10));
return new SveLdrPred(machInst, pt, rn, imm);
} else if (bits(machInst, 24, 22) == 0x6 &&
bits(machInst, 15, 13) == 0x2) {
IntRegIndex zt = (IntRegIndex) (uint8_t) bits(machInst, 4, 0);
IntRegIndex rn = makeSP(
(IntRegIndex) (uint8_t) bits(machInst, 9, 5));
uint64_t imm = sext<9>((bits(machInst, 21, 16) << 3) |
bits(machInst, 12, 10));
return new SveLdrVec(machInst, zt, rn, imm);
if (bits(machInst, 15)) {
if (bits(machInst, 22)) {
// SVE load and broadcast element
IntRegIndex zt = (IntRegIndex) (uint8_t) bits(machInst, 4, 0);
IntRegIndex rn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5);
uint64_t imm = bits(machInst, 21, 16);
IntRegIndex pg = (IntRegIndex) (uint8_t)
bits(machInst, 12, 10);
uint8_t dtype = (bits(machInst, 24, 23) << 2) |
bits(machInst, 14, 13);
return decodeSveContigLoadSIInsts<SveLoadAndRepl>(
dtype, machInst, zt, pg, rn, imm, false, true);
} else {
if (bits(machInst, 21)) {
// SVE 32-bit gather load (vector plus immediate)
IntRegIndex zt = (IntRegIndex) (uint8_t)
bits(machInst, 4, 0);
IntRegIndex zn = (IntRegIndex) (uint8_t)
bits(machInst, 9, 5);
uint64_t imm = bits(machInst, 20, 16);
IntRegIndex pg = (IntRegIndex) (uint8_t)
bits(machInst, 12, 10);
uint8_t dtype = (bits(machInst, 24, 23) << 1) |
bits(machInst, 14);
uint8_t ff = bits(machInst, 13);
if (ff) {
return new Unknown64(machInst);
}
return decodeSveGatherLoadVIInsts(
dtype, machInst, zt, pg, zn, imm, true, ff);
} else {
uint8_t b14_13 = bits(machInst, 14, 13);
if (b14_13 == 0x2 && bits(machInst, 4) == 0) {
// TODO: SVE contiguous prefetch (scalar plus scalar)
return new Unknown64(machInst);
} else if (b14_13 == 0x3 && bits(machInst, 4) == 0) {
// TODO: SVE 32-bit gather prefetch (vector plus
// immediate)
return new Unknown64(machInst);
}
}
}
} else {
uint8_t b24_23 = bits(machInst, 24, 23);
if (b24_23 != 0x3 && bits(machInst, 21) == 0) {
// SVE 32-bit gather load (scalar plus 32-bit unscaled offsets)
IntRegIndex zt = (IntRegIndex) (uint8_t) bits(machInst, 4, 0);
IntRegIndex rn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5);
IntRegIndex zm = (IntRegIndex) (uint8_t)
bits(machInst, 20, 16);
IntRegIndex pg = (IntRegIndex) (uint8_t)
bits(machInst, 12, 10);
uint8_t dtype = (bits(machInst, 24, 23) << 1) |
bits(machInst, 14);
uint8_t xs = bits(machInst, 22);
uint8_t ff = bits(machInst, 13);
if (ff) {
return new Unknown64(machInst);
}
return decodeSveGatherLoadSVInsts(
dtype, machInst, zt, pg, rn, zm,
true, true, xs, false, ff);
}
switch (b24_23) {
case 0x0:
if (bits(machInst, 21) && bits(machInst, 4) == 0) {
// TODO: SVE 32-bit gather prefetch (vector plus immediate)
break;
}
break;
case 0x1:
if (bits(machInst, 21)) {
// SVE 32-bit gather load halfwords (scalar plus 32-bit
// scaled offsets)
IntRegIndex zt = (IntRegIndex) (uint8_t)
bits(machInst, 4, 0);
IntRegIndex rn = (IntRegIndex) (uint8_t)
bits(machInst, 9, 5);
IntRegIndex zm = (IntRegIndex) (uint8_t)
bits(machInst, 20, 16);
IntRegIndex pg = (IntRegIndex) (uint8_t)
bits(machInst, 12, 10);
uint8_t xs = bits(machInst, 22);
uint8_t ff = bits(machInst, 13);
if (ff) {
return new Unknown64(machInst);
}
if (bits(machInst, 14)) {
return new SveIndexedMemSV<uint32_t, uint16_t,
SveGatherLoadSVMicroop>(
"ld1", machInst, MemReadOp, zt, pg, rn, zm,
true, xs, true);
} else {
return new SveIndexedMemSV<int32_t, int16_t,
SveGatherLoadSVMicroop>(
"ld1", machInst, MemReadOp, zt, pg, rn, zm,
true, xs, true);
}
}
break;
case 0x2:
if (bits(machInst, 21)) {
// SVE 32-bit gather load words (scalar plus 32-bit scaled
// offsets)
IntRegIndex zt = (IntRegIndex) (uint8_t)
bits(machInst, 4, 0);
IntRegIndex rn = (IntRegIndex) (uint8_t)
bits(machInst, 9, 5);
IntRegIndex zm = (IntRegIndex) (uint8_t)
bits(machInst, 20, 16);
IntRegIndex pg = (IntRegIndex) (uint8_t)
bits(machInst, 12, 10);
uint8_t xs = bits(machInst, 22);
uint8_t ff = bits(machInst, 13);
if (ff) {
return new Unknown64(machInst);
}
return new SveIndexedMemSV<uint32_t, uint32_t,
SveGatherLoadSVMicroop>(
"ld1", machInst, MemReadOp, zt, pg, rn, zm,
true, xs, true);
}
break;
case 0x3:
if (bits(machInst, 22) == 0 && bits(machInst, 14, 13) == 0x0 &&
bits(machInst, 4) == 0) {
// SVE load predicate register
IntRegIndex pt = (IntRegIndex) (uint8_t)
bits(machInst, 3, 0);
IntRegIndex rn = (IntRegIndex) (uint8_t)
bits(machInst, 9, 5);
uint64_t imm = sext<9>((bits(machInst, 21, 16) << 3) |
bits(machInst, 12, 10));
return new SveLdrPred(machInst, pt, rn, imm);
} else if (bits(machInst, 22) == 0 &&
bits(machInst, 14, 13) == 0x2) {
// SVE load vector register
IntRegIndex zt = (IntRegIndex) (uint8_t)
bits(machInst, 4, 0);
IntRegIndex rn = (IntRegIndex) (uint8_t)
bits(machInst, 9, 5);
uint64_t imm = sext<9>((bits(machInst, 21, 16) << 3) |
bits(machInst, 12, 10));
return new SveLdrVec(machInst, zt, rn, imm);
}
break;
}
}
return new Unknown64(machInst);
} // decodeSveMemGather32
@@ -3048,6 +3167,124 @@ namespace Aarch64
StaticInstPtr
decodeSveMemGather64(ExtMachInst machInst)
{
switch ((bits(machInst, 21) << 1) | bits(machInst, 15)) {
case 0x0:
{
// SVE 64-bit gather load (scalar plus unpacked 32-bit unscaled
// offsets)
IntRegIndex zt = (IntRegIndex) (uint8_t) bits(machInst, 4, 0);
IntRegIndex rn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5);
IntRegIndex zm = (IntRegIndex) (uint8_t)
bits(machInst, 20, 16);
IntRegIndex pg = (IntRegIndex) (uint8_t)
bits(machInst, 12, 10);
uint8_t dtype = (bits(machInst, 24, 23) << 1) |
bits(machInst, 14);
uint8_t xs = bits(machInst, 22);
uint8_t ff = bits(machInst, 13);
if (ff) {
return new Unknown64(machInst);
}
return decodeSveGatherLoadSVInsts(
dtype, machInst, zt, pg, rn, zm,
false, true, xs, false, ff);
}
case 0x1:
if (bits(machInst, 22)) {
// SVE 64-bit gather load (scalar plus 64-bit unscaled offsets)
IntRegIndex zt = (IntRegIndex) (uint8_t) bits(machInst, 4, 0);
IntRegIndex rn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5);
IntRegIndex zm = (IntRegIndex) (uint8_t)
bits(machInst, 20, 16);
IntRegIndex pg = (IntRegIndex) (uint8_t)
bits(machInst, 12, 10);
uint8_t dtype = (bits(machInst, 24, 23) << 1) |
bits(machInst, 14);
uint8_t ff = bits(machInst, 13);
if (ff) {
return new Unknown64(machInst);
}
return decodeSveGatherLoadSVInsts(
dtype, machInst, zt, pg, rn, zm,
false, false, false, false, ff);
} else {
if (bits(machInst, 14, 13) == 0x3 && bits(machInst, 4) == 0) {
// TODO: SVE 64-bit gather prefetch (vector plus immediate)
break;
}
}
break;
case 0x2:
if (bits(machInst, 24, 23) != 0x0) {
// SVE 64-bit gather load (scalar plus unpacked 32-bit scaled
// offsets)
IntRegIndex zt = (IntRegIndex) (uint8_t) bits(machInst, 4, 0);
IntRegIndex rn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5);
IntRegIndex zm = (IntRegIndex) (uint8_t)
bits(machInst, 20, 16);
IntRegIndex pg = (IntRegIndex) (uint8_t)
bits(machInst, 12, 10);
uint8_t dtype = (bits(machInst, 24, 23) << 1) |
bits(machInst, 14);
uint8_t xs = bits(machInst, 22);
uint8_t ff = bits(machInst, 13);
if (ff) {
return new Unknown64(machInst);
}
return decodeSveGatherLoadSVInsts(
dtype, machInst, zt, pg, rn, zm,
false, true, xs, true, ff);
} else if (bits(machInst, 4) == 0) {
// TODO: SVE 64-bit gather prefetch (scalar plus unpacked
// 32-bit scaled offsets)
return new Unknown64(machInst);
}
break;
case 0x3:
if (bits(machInst, 22) == 0) {
// SVE 64-bit gather load (vector plus immediate)
IntRegIndex zt = (IntRegIndex) (uint8_t) bits(machInst, 4, 0);
IntRegIndex zn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5);
uint64_t imm = bits(machInst, 20, 16);
IntRegIndex pg = (IntRegIndex) (uint8_t)
bits(machInst, 12, 10);
uint8_t dtype = (bits(machInst, 24, 23) << 1) |
bits(machInst, 14);
uint8_t ff = bits(machInst, 13);
if (ff) {
return new Unknown64(machInst);
}
return decodeSveGatherLoadVIInsts(
dtype, machInst, zt, pg, zn, imm, false, ff);
} else {
if (bits(machInst, 24, 23) != 0x0) {
// SVE 64-bit gather load (scalar plus 64-bit scaled
// offsets)
IntRegIndex zt = (IntRegIndex) (uint8_t)
bits(machInst, 4, 0);
IntRegIndex rn = (IntRegIndex) (uint8_t)
bits(machInst, 9, 5);
IntRegIndex zm = (IntRegIndex) (uint8_t)
bits(machInst, 20, 16);
IntRegIndex pg = (IntRegIndex) (uint8_t)
bits(machInst, 12, 10);
uint8_t dtype = (bits(machInst, 24, 23) << 1) |
bits(machInst, 14);
uint8_t ff = bits(machInst, 13);
if (ff) {
return new Unknown64(machInst);
}
return decodeSveGatherLoadSVInsts(
dtype, machInst, zt, pg, rn, zm,
false, false, false, true, ff);
} else if (bits(machInst, 4) == 0) {
// TODO: SVE 64-bit gather prefetch (scalar plus 64-bit
// scaled offsets)
break;
}
}
break;
}
return new Unknown64(machInst);
} // decodeSveMemGather64
@@ -3086,36 +3323,12 @@ namespace Aarch64
return new Unknown64(machInst);
} // decodeSveContigNTStoreSS
StaticInstPtr
decodeSveScatterStore64SV32U(ExtMachInst machInst)
{
return new Unknown64(machInst);
} // decodeSveScatterStore64SV32U
StaticInstPtr
decodeSveScatterStore64SV64U(ExtMachInst machInst)
{
return new Unknown64(machInst);
} // decodeSveScatterStore64SV64U
StaticInstPtr
decodeSveContigNTStoreSI(ExtMachInst machInst)
{
return new Unknown64(machInst);
} // decodeSveContigNTStoreSI
StaticInstPtr
decodeSveScatterStore64VI(ExtMachInst machInst)
{
return new Unknown64(machInst);
} // decodeSveScatterStore64VI
StaticInstPtr
decodeSveScatterStore32SV32S(ExtMachInst machInst)
{
return new Unknown64(machInst);
} // decodeSveScatterStore32SV32S
StaticInstPtr
decodeSveStoreStructsSS(ExtMachInst machInst)
{
@@ -3128,30 +3341,6 @@ namespace Aarch64
return new Unknown64(machInst);
} // decodeSveStoreStructsSI
StaticInstPtr
decodeSveScatterStore32SV32U(ExtMachInst machInst)
{
return new Unknown64(machInst);
} // decodeSveScatterStore32SV32U
StaticInstPtr
decodeSveScatterStore32VI(ExtMachInst machInst)
{
return new Unknown64(machInst);
} // decodeSveScatterStore32VI
StaticInstPtr
decodeSveScatterStore64SV32S(ExtMachInst machInst)
{
return new Unknown64(machInst);
} // decodeSveScatterStore64SV32S
StaticInstPtr
decodeSveScatterStore64SV64S(ExtMachInst machInst)
{
return new Unknown64(machInst);
} // decodeSveScatterStore64SV64S
StaticInstPtr
decodeSveMemStore(ExtMachInst machInst)
{
@@ -3186,37 +3375,118 @@ namespace Aarch64
}
case 0x4:
case 0x6:
switch (bits(machInst, 22, 21)) {
case 0x0:
return decodeSveScatterStore64SV32U(machInst);
case 0x1:
if (bits(machInst, 24, 23) != 0x0) {
return decodeSveScatterStore64SV32S(machInst);
{
IntRegIndex zt = (IntRegIndex) (uint8_t) bits(machInst, 4, 0);
IntRegIndex rn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5);
IntRegIndex zm = (IntRegIndex) (uint8_t)
bits(machInst, 20, 16);
IntRegIndex pg = (IntRegIndex) (uint8_t)
bits(machInst, 12, 10);
uint8_t msz = bits(machInst, 24, 23);
uint8_t xs = bits(machInst, 22);
switch (bits(machInst, 22, 21)) {
case 0x0:
// SVE 64-bit scatter store (scalar plus unpacked 32-bit
// unscaled offsets)
return decodeSveScatterStoreSVInsts(
msz, machInst, zt, pg, rn, zm,
false, true, xs, false);
case 0x1:
if (bits(machInst, 24, 23) != 0x0) {
// SVE 64-bit scatter store (scalar plus unpacked
// 32-bit scaled offsets)
return decodeSveScatterStoreSVInsts(
msz, machInst, zt, pg, rn, zm,
false, true, xs, true);
}
break;
case 0x2:
if (bits(machInst, 24, 23) != 0x3) {
// SVE 32-bit scatter store (scalar plus 32-bit
// unscaled offsets)
return decodeSveScatterStoreSVInsts(
msz, machInst, zt, pg, rn, zm,
true, true, xs, false);
}
break;
case 0x3:
// SVE 32-bit scatter store (scalar plus 32-bit scaled
// offsets)
return decodeSveScatterStoreSVInsts(
msz, machInst, zt, pg, rn, zm,
true, true, xs, true);
}
break;
case 0x2:
if (bits(machInst, 24, 23) != 0x3) {
return decodeSveScatterStore32SV32U(machInst);
}
break;
case 0x3:
return decodeSveScatterStore32SV32S(machInst);
}
break;
case 0x5:
switch (bits(machInst, 22, 21)) {
case 0x0:
return decodeSveScatterStore64SV64U(machInst);
{
// SVE 64-bit scatter store (scalar plus 64-bit unscaled
// offsets)
IntRegIndex zt = (IntRegIndex) (uint8_t)
bits(machInst, 4, 0);
IntRegIndex rn = (IntRegIndex) (uint8_t)
bits(machInst, 9, 5);
IntRegIndex zm = (IntRegIndex) (uint8_t)
bits(machInst, 20, 16);
IntRegIndex pg = (IntRegIndex) (uint8_t)
bits(machInst, 12, 10);
uint8_t msz = bits(machInst, 24, 23);
return decodeSveScatterStoreSVInsts(
msz, machInst, zt, pg, rn, zm,
false, false, false, false);
}
case 0x1:
if (bits(machInst, 24, 23) != 0x0) {
return decodeSveScatterStore64SV64S(machInst);
// SVE 64-bit scatter store (scalar plus 64-bit scaled
// offsets)
IntRegIndex zt = (IntRegIndex) (uint8_t)
bits(machInst, 4, 0);
IntRegIndex rn = (IntRegIndex) (uint8_t)
bits(machInst, 9, 5);
IntRegIndex zm = (IntRegIndex) (uint8_t)
bits(machInst, 20, 16);
IntRegIndex pg = (IntRegIndex) (uint8_t)
bits(machInst, 12, 10);
uint8_t msz = bits(machInst, 24, 23);
return decodeSveScatterStoreSVInsts(
msz, machInst, zt, pg, rn, zm,
false, false, false, true);
}
break;
case 0x2:
return decodeSveScatterStore64VI(machInst);
{
// SVE 64-bit scatter store (vector plus immediate)
IntRegIndex zt = (IntRegIndex) (uint8_t)
bits(machInst, 4, 0);
IntRegIndex zn = (IntRegIndex) (uint8_t)
bits(machInst, 9, 5);
uint64_t imm = bits(machInst, 20, 16);
IntRegIndex pg = (IntRegIndex) (uint8_t)
bits(machInst, 12, 10);
uint8_t msz = bits(machInst, 24, 23);
return decodeSveScatterStoreVIInsts(
msz, machInst, zt, pg, zn, imm, false);
}
case 0x3:
if (bits(machInst, 24, 23) != 0x3) {
return decodeSveScatterStore64VI(machInst);
// SVE 32-bit scatter store (vector plus immediate)
IntRegIndex zt = (IntRegIndex) (uint8_t)
bits(machInst, 4, 0);
IntRegIndex zn = (IntRegIndex) (uint8_t)
bits(machInst, 9, 5);
uint64_t imm = bits(machInst, 20, 16);
IntRegIndex pg = (IntRegIndex) (uint8_t)
bits(machInst, 12, 10);
uint8_t msz = bits(machInst, 24, 23);
return decodeSveScatterStoreVIInsts(
msz, machInst, zt, pg, zn, imm, true);
}
break;
}

View File

@@ -1,6 +1,6 @@
// -*- mode:c++ -*-
// Copyright (c) 2010, 2012, 2017 ARM Limited
// Copyright (c) 2010, 2012, 2017-2018 ARM Limited
// All rights reserved
//
// The license below extends only to copyright in the software and shall
@@ -68,9 +68,10 @@ output header {{
#include "arch/arm/insts/sve_mem.hh"
#include "arch/arm/insts/vfp.hh"
#include "arch/arm/isa_traits.hh"
#include "enums/DecoderFlavour.hh"
#include "mem/packet.hh"
#include "sim/faults.hh"
#include "enums/DecoderFlavour.hh"
}};
output decoder {{
@@ -80,11 +81,12 @@ output decoder {{
#include "arch/arm/decoder.hh"
#include "arch/arm/faults.hh"
#include "arch/arm/insts/sve_macromem.hh"
#include "arch/arm/intregs.hh"
#include "arch/arm/isa_traits.hh"
#include "arch/arm/utility.hh"
#include "base/loader/symtab.hh"
#include "base/cprintf.hh"
#include "base/loader/symtab.hh"
#include "cpu/thread_context.hh"
using namespace ArmISA;
@@ -102,8 +104,10 @@ output exec {{
#include "base/crc.hh"
#include "cpu/base.hh"
#include "sim/pseudo_inst.hh"
#if defined(linux)
#include <fenv.h>
#endif
#include "base/cp_annotate.hh"

View File

@@ -1,4 +1,4 @@
// Copyright (c) 2017 ARM Limited
// Copyright (c) 2017-2018 ARM Limited
// All rights reserved
//
// The license below extends only to copyright in the software and shall
@@ -204,6 +204,288 @@ output header {{
}};
output decoder {{
StaticInstPtr
decodeSveGatherLoadVIInsts(uint8_t dtype, ExtMachInst machInst,
IntRegIndex zt, IntRegIndex pg, IntRegIndex zn,
uint64_t imm, bool esizeIs32,
bool firstFaulting)
{
const char* mn = firstFaulting ? "ldff1" : "ld1";
switch (dtype) {
case 0x0:
if (esizeIs32) {
return new SveIndexedMemVI<int32_t, int8_t,
SveGatherLoadVIMicroop>(
mn, machInst, MemReadOp, zt, pg, zn, imm);
} else {
return new SveIndexedMemVI<int64_t, int8_t,
SveGatherLoadVIMicroop>(
mn, machInst, MemReadOp, zt, pg, zn, imm);
}
case 0x1:
if (esizeIs32) {
return new SveIndexedMemVI<uint32_t, uint8_t,
SveGatherLoadVIMicroop>(
mn, machInst, MemReadOp, zt, pg, zn, imm);
} else {
return new SveIndexedMemVI<uint64_t, uint8_t,
SveGatherLoadVIMicroop>(
mn, machInst, MemReadOp, zt, pg, zn, imm);
}
case 0x2:
if (esizeIs32) {
return new SveIndexedMemVI<int32_t, int16_t,
SveGatherLoadVIMicroop>(
mn, machInst, MemReadOp, zt, pg, zn, imm);
} else {
return new SveIndexedMemVI<int64_t, int16_t,
SveGatherLoadVIMicroop>(
mn, machInst, MemReadOp, zt, pg, zn, imm);
}
case 0x3:
if (esizeIs32) {
return new SveIndexedMemVI<uint32_t, uint16_t,
SveGatherLoadVIMicroop>(
mn, machInst, MemReadOp, zt, pg, zn, imm);
} else {
return new SveIndexedMemVI<uint64_t, uint16_t,
SveGatherLoadVIMicroop>(
mn, machInst, MemReadOp, zt, pg, zn, imm);
}
case 0x4:
if (esizeIs32) {
break;
} else {
return new SveIndexedMemVI<int64_t, int32_t,
SveGatherLoadVIMicroop>(
mn, machInst, MemReadOp, zt, pg, zn, imm);
}
case 0x5:
if (esizeIs32) {
return new SveIndexedMemVI<uint32_t, uint32_t,
SveGatherLoadVIMicroop>(
mn, machInst, MemReadOp, zt, pg, zn, imm);
} else {
return new SveIndexedMemVI<uint64_t, uint32_t,
SveGatherLoadVIMicroop>(
mn, machInst, MemReadOp, zt, pg, zn, imm);
}
case 0x7:
if (esizeIs32) {
break;
} else {
return new SveIndexedMemVI<uint64_t, uint64_t,
SveGatherLoadVIMicroop>(
mn, machInst, MemReadOp, zt, pg, zn, imm);
}
}
return new Unknown64(machInst);
}
StaticInstPtr
decodeSveGatherLoadSVInsts(uint8_t dtype, ExtMachInst machInst,
IntRegIndex zt, IntRegIndex pg, IntRegIndex rn,
IntRegIndex zm, bool esizeIs32, bool offsetIs32,
bool offsetIsSigned, bool offsetIsScaled,
bool firstFaulting)
{
const char* mn = firstFaulting ? "ldff1" : "ld1";
switch (dtype) {
case 0x0:
if (esizeIs32) {
return new SveIndexedMemSV<int32_t, int8_t,
SveGatherLoadSVMicroop>(
mn, machInst, MemReadOp, zt, pg, rn, zm,
offsetIs32, offsetIsSigned, offsetIsScaled);
} else {
return new SveIndexedMemSV<int64_t, int8_t,
SveGatherLoadSVMicroop>(
mn, machInst, MemReadOp, zt, pg, rn, zm,
offsetIs32, offsetIsSigned, offsetIsScaled);
}
case 0x1:
if (esizeIs32) {
return new SveIndexedMemSV<uint32_t, uint8_t,
SveGatherLoadSVMicroop>(
mn, machInst, MemReadOp, zt, pg, rn, zm,
offsetIs32, offsetIsSigned, offsetIsScaled);
} else {
return new SveIndexedMemSV<uint64_t, uint8_t,
SveGatherLoadSVMicroop>(
mn, machInst, MemReadOp, zt, pg, rn, zm,
offsetIs32, offsetIsSigned, offsetIsScaled);
}
case 0x2:
if (esizeIs32) {
return new SveIndexedMemSV<int32_t, int16_t,
SveGatherLoadSVMicroop>(
mn, machInst, MemReadOp, zt, pg, rn, zm,
offsetIs32, offsetIsSigned, offsetIsScaled);
} else {
return new SveIndexedMemSV<int64_t, int16_t,
SveGatherLoadSVMicroop>(
mn, machInst, MemReadOp, zt, pg, rn, zm,
offsetIs32, offsetIsSigned, offsetIsScaled);
}
case 0x3:
if (esizeIs32) {
return new SveIndexedMemSV<uint32_t, uint16_t,
SveGatherLoadSVMicroop>(
mn, machInst, MemReadOp, zt, pg, rn, zm,
offsetIs32, offsetIsSigned, offsetIsScaled);
} else {
return new SveIndexedMemSV<uint64_t, uint16_t,
SveGatherLoadSVMicroop>(
mn, machInst, MemReadOp, zt, pg, rn, zm,
offsetIs32, offsetIsSigned, offsetIsScaled);
}
case 0x4:
if (esizeIs32) {
break;
} else {
return new SveIndexedMemSV<int64_t, int32_t,
SveGatherLoadSVMicroop>(
mn, machInst, MemReadOp, zt, pg, rn, zm,
offsetIs32, offsetIsSigned, offsetIsScaled);
}
case 0x5:
if (esizeIs32) {
return new SveIndexedMemSV<uint32_t, uint32_t,
SveGatherLoadSVMicroop>(
mn, machInst, MemReadOp, zt, pg, rn, zm,
offsetIs32, offsetIsSigned, offsetIsScaled);
} else {
return new SveIndexedMemSV<uint64_t, uint32_t,
SveGatherLoadSVMicroop>(
mn, machInst, MemReadOp, zt, pg, rn, zm,
offsetIs32, offsetIsSigned, offsetIsScaled);
}
case 0x7:
if (esizeIs32) {
break;
} else {
return new SveIndexedMemSV<uint64_t, uint64_t,
SveGatherLoadSVMicroop>(
mn, machInst, MemReadOp, zt, pg, rn, zm,
offsetIs32, offsetIsSigned, offsetIsScaled);
}
}
return new Unknown64(machInst);
}
StaticInstPtr
decodeSveScatterStoreVIInsts(uint8_t msz, ExtMachInst machInst,
IntRegIndex zt, IntRegIndex pg,
IntRegIndex zn, uint64_t imm,
bool esizeIs32)
{
const char* mn = "st1";
switch (msz) {
case 0x0:
if (esizeIs32) {
return new SveIndexedMemVI<uint32_t, uint8_t,
SveScatterStoreVIMicroop>(
mn, machInst, MemWriteOp, zt, pg, zn, imm);
} else {
return new SveIndexedMemVI<uint64_t, uint8_t,
SveScatterStoreVIMicroop>(
mn, machInst, MemWriteOp, zt, pg, zn, imm);
}
case 0x1:
if (esizeIs32) {
return new SveIndexedMemVI<uint32_t, uint16_t,
SveScatterStoreVIMicroop>(
mn, machInst, MemWriteOp, zt, pg, zn, imm);
} else {
return new SveIndexedMemVI<uint64_t, uint16_t,
SveScatterStoreVIMicroop>(
mn, machInst, MemWriteOp, zt, pg, zn, imm);
}
case 0x2:
if (esizeIs32) {
return new SveIndexedMemVI<uint32_t, uint32_t,
SveScatterStoreVIMicroop>(
mn, machInst, MemWriteOp, zt, pg, zn, imm);
} else {
return new SveIndexedMemVI<uint64_t, uint32_t,
SveScatterStoreVIMicroop>(
mn, machInst, MemWriteOp, zt, pg, zn, imm);
}
case 0x3:
if (esizeIs32) {
break;
} else {
return new SveIndexedMemVI<uint64_t, uint64_t,
SveScatterStoreVIMicroop>(
mn, machInst, MemWriteOp, zt, pg, zn, imm);
}
}
return new Unknown64(machInst);
}
StaticInstPtr
decodeSveScatterStoreSVInsts(uint8_t msz, ExtMachInst machInst,
IntRegIndex zt, IntRegIndex pg,
IntRegIndex rn, IntRegIndex zm,
bool esizeIs32, bool offsetIs32,
bool offsetIsSigned, bool offsetIsScaled)
{
const char* mn = "st1";
switch (msz) {
case 0x0:
if (esizeIs32) {
return new SveIndexedMemSV<uint32_t, uint8_t,
SveScatterStoreSVMicroop>(
mn, machInst, MemWriteOp, zt, pg, rn, zm,
offsetIs32, offsetIsSigned, offsetIsScaled);
} else {
return new SveIndexedMemSV<uint64_t, uint8_t,
SveScatterStoreSVMicroop>(
mn, machInst, MemWriteOp, zt, pg, rn, zm,
offsetIs32, offsetIsSigned, offsetIsScaled);
}
case 0x1:
if (esizeIs32) {
return new SveIndexedMemSV<uint32_t, uint16_t,
SveScatterStoreSVMicroop>(
mn, machInst, MemWriteOp, zt, pg, rn, zm,
offsetIs32, offsetIsSigned, offsetIsScaled);
} else {
return new SveIndexedMemSV<uint64_t, uint16_t,
SveScatterStoreSVMicroop>(
mn, machInst, MemWriteOp, zt, pg, rn, zm,
offsetIs32, offsetIsSigned, offsetIsScaled);
}
case 0x2:
if (esizeIs32) {
return new SveIndexedMemSV<uint32_t, uint32_t,
SveScatterStoreSVMicroop>(
mn, machInst, MemWriteOp, zt, pg, rn, zm,
offsetIs32, offsetIsSigned, offsetIsScaled);
} else {
return new SveIndexedMemSV<uint64_t, uint32_t,
SveScatterStoreSVMicroop>(
mn, machInst, MemWriteOp, zt, pg, rn, zm,
offsetIs32, offsetIsSigned, offsetIsScaled);
}
case 0x3:
if (esizeIs32) {
break;
} else {
return new SveIndexedMemSV<uint64_t, uint64_t,
SveScatterStoreSVMicroop>(
mn, machInst, MemWriteOp, zt, pg, rn, zm,
offsetIs32, offsetIsSigned, offsetIsScaled);
}
}
return new Unknown64(machInst);
}
}};
let {{
header_output = ''
@@ -323,6 +605,31 @@ let {{
('uint64_t', 'uint64_t'),
)
gatherLoadTplArgs = (
('int32_t', 'int8_t'),
('int64_t', 'int8_t'),
('uint32_t', 'uint8_t'),
('uint64_t', 'uint8_t'),
('int32_t', 'int16_t'),
('int64_t', 'int16_t'),
('uint32_t', 'uint16_t'),
('uint64_t', 'uint16_t'),
('int64_t', 'int32_t'),
('uint32_t', 'uint32_t'),
('uint64_t', 'uint32_t'),
('uint64_t', 'uint64_t'),
)
scatterStoreTplArgs = (
('uint32_t', 'uint8_t'),
('uint64_t', 'uint8_t'),
('uint32_t', 'uint16_t'),
('uint64_t', 'uint16_t'),
('uint32_t', 'uint32_t'),
('uint64_t', 'uint32_t'),
('uint64_t', 'uint64_t'),
)
# Generates definitions for SVE contiguous loads
def emitSveContigMemInsts(offsetIsImm):
global header_output, exec_output, decoders
@@ -437,9 +744,124 @@ let {{
'class_name': 'SveLoadAndRepl'}
exec_output += SveContigMemExecDeclare.subst(substDict)
class IndexedAddrForm:
VEC_PLUS_IMM = 0
SCA_PLUS_VEC = 1
# Generates definitions for the transfer microops of SVE indexed memory
# operations (gather loads, scatter stores)
def emitSveIndexedMemMicroops(indexed_addr_form):
assert indexed_addr_form in (IndexedAddrForm.VEC_PLUS_IMM,
IndexedAddrForm.SCA_PLUS_VEC)
global header_output, exec_output, decoders
tplHeader = 'template <class RegElemType, class MemElemType>'
tplArgs = '<RegElemType, MemElemType>'
if indexed_addr_form == IndexedAddrForm.VEC_PLUS_IMM:
eaCode = '''
EA = AA64FpBase_x[elemIndex] + imm * sizeof(MemElemType)'''
else:
eaCode = '''
uint64_t offset = AA64FpOffset_x[elemIndex];
if (offsetIs32) {
offset &= (1ULL << 32) - 1;
}
if (offsetIsSigned) {
offset = sext<32>(offset);
}
if (offsetIsScaled) {
offset *= sizeof(MemElemType);
}
EA = XBase + offset'''
loadMemAccCode = '''
if (GpOp_x[elemIndex]) {
AA64FpDest_x[elemIndex] = memData;
} else {
AA64FpDest_x[elemIndex] = 0;
}
'''
storeMemAccCode = '''
memData = AA64FpDest_x[elemIndex];
'''
predCheckCode = 'GpOp_x[elemIndex]'
loadIop = InstObjParams('ld1',
('SveGatherLoadVIMicroop'
if indexed_addr_form == IndexedAddrForm.VEC_PLUS_IMM
else 'SveGatherLoadSVMicroop'),
'MicroOp',
{'tpl_header': tplHeader,
'tpl_args': tplArgs,
'memacc_code': loadMemAccCode,
'ea_code' : sveEnabledCheckCode + eaCode,
'pred_check_code' : predCheckCode,
'fa_code' : ''},
['IsMicroop', 'IsMemRef', 'IsLoad'])
storeIop = InstObjParams('st1',
('SveScatterStoreVIMicroop'
if indexed_addr_form == IndexedAddrForm.VEC_PLUS_IMM
else 'SveScatterStoreSVMicroop'),
'MicroOp',
{'tpl_header': tplHeader,
'tpl_args': tplArgs,
'memacc_code': storeMemAccCode,
'ea_code' : sveEnabledCheckCode + eaCode,
'pred_check_code' : predCheckCode,
'fa_code' : ''},
['IsMicroop', 'IsMemRef', 'IsStore'])
if indexed_addr_form == IndexedAddrForm.VEC_PLUS_IMM:
header_output += SveIndexedMemVIMicroopDeclare.subst(loadIop)
header_output += SveIndexedMemVIMicroopDeclare.subst(storeIop)
else:
header_output += SveIndexedMemSVMicroopDeclare.subst(loadIop)
header_output += SveIndexedMemSVMicroopDeclare.subst(storeIop)
exec_output += (
SveGatherLoadMicroopExecute.subst(loadIop) +
SveGatherLoadMicroopInitiateAcc.subst(loadIop) +
SveGatherLoadMicroopCompleteAcc.subst(loadIop) +
SveScatterStoreMicroopExecute.subst(storeIop) +
SveScatterStoreMicroopInitiateAcc.subst(storeIop) +
SveScatterStoreMicroopCompleteAcc.subst(storeIop))
for args in gatherLoadTplArgs:
substDict = {'tpl_args': '<%s>' % ', '.join(args),
'class_name': (
'SveGatherLoadVIMicroop'
if indexed_addr_form == \
IndexedAddrForm.VEC_PLUS_IMM
else 'SveGatherLoadSVMicroop')}
# TODO: this should become SveMemExecDeclare
exec_output += SveContigMemExecDeclare.subst(substDict)
for args in scatterStoreTplArgs:
substDict = {'tpl_args': '<%s>' % ', '.join(args),
'class_name': (
'SveScatterStoreVIMicroop'
if indexed_addr_form == \
IndexedAddrForm.VEC_PLUS_IMM
else 'SveScatterStoreSVMicroop')}
# TODO: this should become SveMemExecDeclare
exec_output += SveContigMemExecDeclare.subst(substDict)
# Generates definitions for the first microop of SVE gather loads, required
# to propagate the source vector register to the transfer microops
def emitSveGatherLoadCpySrcVecMicroop():
global header_output, exec_output, decoders
code = sveEnabledCheckCode + '''
unsigned eCount = ArmStaticInst::getCurSveVecLen<uint8_t>(
xc->tcBase());
for (unsigned i = 0; i < eCount; i++) {
AA64FpUreg0_ub[i] = AA64FpOp1_ub[i];
}'''
iop = InstObjParams('ld1',
'SveGatherLoadCpySrcVecMicroop',
'MicroOp',
{'code': code},
['IsMicroop'])
header_output += SveGatherLoadCpySrcVecMicroopDeclare.subst(iop)
exec_output += SveGatherLoadCpySrcVecMicroopExecute.subst(iop)
# LD1[S]{B,H,W,D} (scalar plus immediate)
# ST1[S]{B,H,W,D} (scalar plus immediate)
emitSveContigMemInsts(True)
# LD1[S]{B,H,W,D} (scalar plus scalar)
# ST1[S]{B,H,W,D} (scalar plus scalar)
emitSveContigMemInsts(False)
# LD1R[S]{B,H,W,D}
@@ -450,4 +872,14 @@ let {{
# LDR (vector), STR (vector)
emitSveMemFillSpill(False)
# LD1[S]{B,H,W,D} (vector plus immediate)
# ST1[S]{B,H,W,D} (vector plus immediate)
emitSveIndexedMemMicroops(IndexedAddrForm.VEC_PLUS_IMM)
# LD1[S]{B,H,W,D} (scalar plus vector)
# ST1[S]{B,H,W,D} (scalar plus vector)
emitSveIndexedMemMicroops(IndexedAddrForm.SCA_PLUS_VEC)
# Source vector copy microop for gather loads
emitSveGatherLoadCpySrcVecMicroop()
}};

View File

@@ -1,5 +1,5 @@
// -*- mode:c++ -*-
// Copyright (c) 2010-2014, 2016 ARM Limited
// Copyright (c) 2010-2014, 2016-2018 ARM Limited
// All rights reserved
//
// The license below extends only to copyright in the software and shall
@@ -541,6 +541,39 @@ def operands {{
'AA64FpDestMergeQ': vectorRegElem('0', 'tud', zeroing = True)
}),
'AA64FpBase': vectorReg('base',
{
'AA64FpBaseP0': vectorRegElem('0'),
'AA64FpBaseP1': vectorRegElem('1'),
'AA64FpBaseP2': vectorRegElem('2'),
'AA64FpBaseP3': vectorRegElem('3'),
'AA64FpBaseS': vectorRegElem('0', 'sf', zeroing = True),
'AA64FpBaseD': vectorRegElem('0', 'df', zeroing = True),
'AA64FpBaseQ': vectorRegElem('0', 'tud', zeroing = True)
}),
'AA64FpOffset': vectorReg('offset',
{
'AA64FpOffsetP0': vectorRegElem('0'),
'AA64FpOffsetP1': vectorRegElem('1'),
'AA64FpOffsetP2': vectorRegElem('2'),
'AA64FpOffsetP3': vectorRegElem('3'),
'AA64FpOffsetS': vectorRegElem('0', 'sf', zeroing = True),
'AA64FpOffsetD': vectorRegElem('0', 'df', zeroing = True),
'AA64FpOffsetQ': vectorRegElem('0', 'tud', zeroing = True)
}),
'AA64FpUreg0': vectorReg('VECREG_UREG0',
{
'AA64FpUreg0P0': vectorRegElem('0'),
'AA64FpUreg0P1': vectorRegElem('1'),
'AA64FpUreg0P2': vectorRegElem('2'),
'AA64FpUreg0P3': vectorRegElem('3'),
'AA64FpUreg0S': vectorRegElem('0', 'sf', zeroing = True),
'AA64FpUreg0D': vectorRegElem('0', 'df', zeroing = True),
'AA64FpUreg0Q': vectorRegElem('0', 'tud', zeroing = True)
}),
# Predicate register operands
'GpOp': vecPredReg('gp'),
'POp1': vecPredReg('op1'),

View File

@@ -1,4 +1,4 @@
// Copyright (c) 2017 ARM Limited
// Copyright (c) 2017-2018 ARM Limited
// All rights reserved
//
// The license below extends only to copyright in the software and shall
@@ -384,3 +384,342 @@ def template SveLoadAndReplCompleteAcc {{
}
}};
def template SveIndexedMemVIMicroopDeclare {{
%(tpl_header)s
class %(class_name)s : public %(base_class)s
{
protected:
typedef RegElemType TPElem;
IntRegIndex dest;
IntRegIndex gp;
IntRegIndex base;
uint64_t imm;
int elemIndex;
int numElems;
unsigned memAccessFlags;
public:
%(class_name)s(const char* mnem, ExtMachInst machInst,
OpClass __opClass, IntRegIndex _dest, IntRegIndex _gp,
IntRegIndex _base, uint64_t _imm, int _elemIndex, int _numElems)
: %(base_class)s(mnem, machInst, %(op_class)s),
dest(_dest), gp(_gp), base(_base), imm(_imm),
elemIndex(_elemIndex), numElems(_numElems),
memAccessFlags(ArmISA::TLB::AllowUnaligned |
ArmISA::TLB::MustBeOne)
{
%(constructor)s;
if (_opClass == MemReadOp && elemIndex == 0) {
// The first micro-op is responsible for pinning the
// destination register
_destRegIdx[0].setNumPinnedWrites(numElems - 1);
}
}
Fault execute(ExecContext *, Trace::InstRecord *) const;
Fault initiateAcc(ExecContext *, Trace::InstRecord *) const;
Fault completeAcc(PacketPtr, ExecContext *, Trace::InstRecord *) const;
virtual void
annotateFault(ArmFault *fault)
{
%(fa_code)s
}
std::string
generateDisassembly(Addr pc, const SymbolTable *symtab) const
{
// TODO: add suffix to transfer register
std::stringstream ss;
printMnemonic(ss, "", false);
ccprintf(ss, "{");
printVecReg(ss, dest, true);
ccprintf(ss, "}, ");
printVecPredReg(ss, gp);
if (_opClass == MemReadOp) {
ccprintf(ss, "/z");
}
ccprintf(ss, ", [");
printVecReg(ss, base, true);
if (imm != 0) {
ccprintf(ss, ", #%d", imm * sizeof(MemElemType));
}
ccprintf(ss, "] (uop elem %d tfer)", elemIndex);
return ss.str();
}
};
}};
def template SveIndexedMemSVMicroopDeclare {{
%(tpl_header)s
class %(class_name)s : public %(base_class)s
{
protected:
typedef RegElemType TPElem;
IntRegIndex dest;
IntRegIndex gp;
IntRegIndex base;
IntRegIndex offset;
bool offsetIs32;
bool offsetIsSigned;
bool offsetIsScaled;
int elemIndex;
int numElems;
unsigned memAccessFlags;
public:
%(class_name)s(const char* mnem, ExtMachInst machInst,
OpClass __opClass, IntRegIndex _dest, IntRegIndex _gp,
IntRegIndex _base, IntRegIndex _offset, bool _offsetIs32,
bool _offsetIsSigned, bool _offsetIsScaled, int _elemIndex,
int _numElems)
: %(base_class)s(mnem, machInst, %(op_class)s),
dest(_dest), gp(_gp), base(_base), offset(_offset),
offsetIs32(_offsetIs32), offsetIsSigned(_offsetIsSigned),
offsetIsScaled(_offsetIsScaled), elemIndex(_elemIndex),
numElems(_numElems),
memAccessFlags(ArmISA::TLB::AllowUnaligned |
ArmISA::TLB::MustBeOne)
{
%(constructor)s;
if (_opClass == MemReadOp && elemIndex == 0) {
// The first micro-op is responsible for pinning the
// destination register
_destRegIdx[0].setNumPinnedWrites(numElems - 1);
}
}
Fault execute(ExecContext *, Trace::InstRecord *) const;
Fault initiateAcc(ExecContext *, Trace::InstRecord *) const;
Fault completeAcc(PacketPtr, ExecContext *, Trace::InstRecord *) const;
virtual void
annotateFault(ArmFault *fault)
{
%(fa_code)s
}
std::string
generateDisassembly(Addr pc, const SymbolTable *symtab) const
{
// TODO: add suffix to transfer and base registers
std::stringstream ss;
printMnemonic(ss, "", false);
ccprintf(ss, "{");
printVecReg(ss, dest, true);
ccprintf(ss, "}, ");
printVecPredReg(ss, gp);
if (_opClass == MemReadOp) {
ccprintf(ss, "/z");
}
ccprintf(ss, ", [");
printIntReg(ss, base);
ccprintf(ss, ", ");
printVecReg(ss, offset, true);
ccprintf(ss, "] (uop elem %d tfer)", elemIndex);
return ss.str();
}
};
}};
def template SveGatherLoadMicroopExecute {{
%(tpl_header)s
Fault %(class_name)s%(tpl_args)s::execute(ExecContext *xc,
Trace::InstRecord *traceData) const
{
Addr EA;
Fault fault = NoFault;
bool aarch64 M5_VAR_USED = true;
%(op_decl)s;
%(op_rd)s;
%(ea_code)s;
MemElemType memData;
if (%(pred_check_code)s) {
fault = readMemAtomic(xc, traceData, EA, memData,
this->memAccessFlags);
}
if (fault == NoFault) {
%(memacc_code)s;
%(op_wb)s;
}
return fault;
}
}};
def template SveGatherLoadMicroopInitiateAcc {{
%(tpl_header)s
Fault %(class_name)s%(tpl_args)s::initiateAcc(ExecContext *xc,
Trace::InstRecord *traceData) const
{
Addr EA;
Fault fault = NoFault;
bool aarch64 M5_VAR_USED = true;
%(op_src_decl)s;
%(op_rd)s;
%(ea_code)s;
MemElemType memData;
if (%(pred_check_code)s) {
fault = initiateMemRead(xc, traceData, EA, memData,
this->memAccessFlags);
} else {
xc->setMemAccPredicate(false);
}
return fault;
}
}};
def template SveGatherLoadMicroopCompleteAcc {{
%(tpl_header)s
Fault %(class_name)s%(tpl_args)s::completeAcc(PacketPtr pkt,
ExecContext *xc, Trace::InstRecord *traceData) const
{
Fault fault = NoFault;
bool aarch64 M5_VAR_USED = true;
%(op_decl)s;
%(op_rd)s;
MemElemType memData = 0;
if (%(pred_check_code)s) {
getMem(pkt, memData, traceData);
}
if (fault == NoFault) {
%(memacc_code)s;
}
if (fault == NoFault) {
%(op_wb)s;
}
return fault;
}
}};
def template SveScatterStoreMicroopExecute {{
%(tpl_header)s
Fault %(class_name)s%(tpl_args)s::execute(ExecContext *xc,
Trace::InstRecord *traceData) const
{
Addr EA;
Fault fault = NoFault;
bool aarch64 M5_VAR_USED = true;
%(op_decl)s;
%(op_rd)s;
%(ea_code)s;
MemElemType memData;
%(memacc_code)s;
if (%(pred_check_code)s) {
fault = writeMemAtomic(xc, traceData, memData, EA,
this->memAccessFlags, NULL);
}
if (fault == NoFault) {
%(op_wb)s;
}
return fault;
}
}};
def template SveScatterStoreMicroopInitiateAcc {{
%(tpl_header)s
Fault %(class_name)s%(tpl_args)s::initiateAcc(ExecContext *xc,
Trace::InstRecord *traceData) const
{
Addr EA;
Fault fault = NoFault;
bool aarch64 M5_VAR_USED = true;
%(op_decl)s;
%(op_rd)s;
%(ea_code)s;
MemElemType memData;
%(memacc_code)s;
if (%(pred_check_code)s) {
fault = writeMemTiming(xc, traceData, memData, EA,
this->memAccessFlags, NULL);
} else {
xc->setPredicate(false);
}
return fault;
}
}};
def template SveScatterStoreMicroopCompleteAcc {{
%(tpl_header)s
Fault %(class_name)s%(tpl_args)s::completeAcc(PacketPtr pkt,
ExecContext *xc, Trace::InstRecord *traceData) const
{
return NoFault;
}
}};
def template SveGatherLoadCpySrcVecMicroopDeclare {{
class SveGatherLoadCpySrcVecMicroop : public MicroOp
{
protected:
IntRegIndex op1;
StaticInst *macroOp;
public:
SveGatherLoadCpySrcVecMicroop(const char* mnem, ExtMachInst machInst,
IntRegIndex _op1, StaticInst *_macroOp)
: MicroOp(mnem, machInst, SimdAluOp), op1(_op1), macroOp(_macroOp)
{
%(constructor)s;
}
Fault execute(ExecContext *, Trace::InstRecord *) const;
std::string
generateDisassembly(Addr pc, const SymbolTable *symtab) const
{
std::stringstream ss;
ccprintf(ss, "%s", macroOp->disassemble(pc, symtab));
ccprintf(ss, " (uop src vec cpy)");
return ss.str();
}
};
}};
def template SveGatherLoadCpySrcVecMicroopExecute {{
Fault SveGatherLoadCpySrcVecMicroop::execute(ExecContext *xc,
Trace::InstRecord *traceData) const
{
Fault fault = NoFault;
%(op_decl)s;
%(op_rd)s;
%(code)s;
if (fault == NoFault)
{
%(op_wb)s;
}
return fault;
}
}};

View File

@@ -88,6 +88,7 @@ const int NumVecSpecialRegs = 8;
const int NumIntRegs = NUM_INTREGS;
const int NumFloatRegs = NumFloatV8ArchRegs + NumFloatSpecialRegs;
const int NumVecRegs = NumVecV8ArchRegs + NumVecSpecialRegs;
const int VECREG_UREG0 = 32;
const int NumVecPredRegs = 17; // P0-P15, FFR
const int PREDREG_FFR = 16;
const int NumCCRegs = NUM_CCREGS;